import re
import traceback
from collections import OrderedDict

from scraper import http_queue
from scraper.log import root_logger
from scraper.risuai.retarded_ass_json_fixer import fix_shitty_risuai_json
from scraper.risuai.types import RisuaiNode, RisuaiUser

_logger = root_logger.get_child('RISUAI.LIST')

from bs4 import BeautifulSoup


def get_page_characters(i: int, base_url: str):
    r = http_queue.add(base_url + str(i))
    soup = BeautifulSoup(r.text, 'html.parser')
    elements = soup.find_all('a', class_='border p-4 flex hover:ring-2 rounded-md transition', href=True)
    elements = [element['href'] for element in elements if element['href'].startswith('/character/')]
    return elements


def get_characters_list(limit: int = None) -> list:
    base_url = 'https://realm.risuai.net/?sort=latest&page='
    found_urls = []  # Use a list to keep ordering
    if not limit:
        limit = 10000000
    for i in range(1, limit + 1):
        elements = get_page_characters(i, base_url)
        found_urls.extend(elements)
        if limit and len(found_urls) >= limit:
            break
        if not len(elements):
            _logger.info(f'No more characters found. Exiting at page {i}.')
            break  # exit the loop if no more characters are found
        _logger.info(f'Page {i}: {len(elements)} characters')
    return list(OrderedDict((x, True) for x in found_urls).keys())


def get_risuai_character(url: str):
    r = http_queue.add('https://realm.risuai.net/' + url.lstrip('/'))
    r.encoding = r.apparent_encoding
    soup = BeautifulSoup(r.text, 'html.parser')
    f = soup.find_all('script')
    if not f:
        return None
    script = f[0]
    j = re.findall(r'data: \[.*?,({.*?})],', script.text)[0]
    fixed_json, json_parse_err = fix_shitty_risuai_json(j)
    if json_parse_err:
        _logger.critical(json_parse_err)
        return None
    d = fixed_json['data']['card']
    try:
        return RisuaiNode(is_charx='/api/v1/download/charx-v3/' in r.text, **d)
    except:
        _logger.critical(traceback.format_exc())
        print(d)
        return None


def get_risuai_user(username: str):
    r = http_queue.add('https://realm.risuai.net/creator/' + username)
    r.encoding = r.apparent_encoding
    soup = BeautifulSoup(r.text, 'html.parser')
    div_element = soup.find('div', class_='border p-4 flex rounded-md transition flex-col')
    if not div_element:
        return None
    html_username = div_element.find('h1').get_text(strip=True).lower().strip(' ')
    assert html_username == username
    html_description = div_element.find('p').get_text()
    return RisuaiUser(name=html_username, description=html_description)
