from io import StringIO
from urllib.parse import urlparse

import pandas as pd

from scraper import http_queue


def load_pony_sheet():
    url = 'https://docs.google.com/spreadsheets/d/1J7BeqJVDS51cXF8Pgm2YZaFq-Z6ykSJT/export?exportFormat=csv'
    response = http_queue.add(url)
    data = StringIO(response.text)
    df = pd.read_csv(data, header=None)
    df = df.iloc[:, 1:5]  # Select only the columns from 2 to 5
    df.columns = ['name', 'author', 'url', 'description']
    data = df.to_dict('records')
    catbox_urls = set()
    for row in data:
        try:
            url_parts = urlparse(row['url'])
        except:
            continue
        if '.catbox.moe' in url_parts.netloc and url_parts.path.endswith('.png'):
            catbox_urls.add(row['url'])
    return catbox_urls
