import base64
import copy
import json
import os
import re
import sys
import traceback
from pathlib import Path

import requests
from bs4 import BeautifulSoup

from scraper.database.connection import CursorFromConnectionFromPool
from .elastic import ELASTIC_CLIENT
from .operations import prepare_data
from .parse import extract_api_types
from .regex import url_regex, remove_http

script_dir = os.path.dirname(os.path.realpath(__file__))
sys.path.insert(0, str(Path(script_dir).parent.parent))

import scraper
from scraper.log import root_logger

logger = root_logger.get_child('PROXY_STATS.SCRAPE')


def is_valid_url(test_url: str) -> bool:
    """Check if the URL is valid."""
    return bool(re.match(url_regex, test_url))


def scraper_validate_proxy(proxy_url: str):
    response, err_msg = get_proxy_page(proxy_url)
    if not response:
        return None, proxy_url, err_msg
    return validate_proxy(proxy_url, response)


def get_proxy_page(proxy_url: str):
    """Get the space from the given URL."""
    if not proxy_url.startswith('http'):
        proxy_url = 'https://' + proxy_url

    if not is_valid_url(proxy_url):
        return None, "Not valid URL"

    response = scraper.http_queue.add(proxy_url, retries=1, delay=5)
    if response:
        return response, ''
    else:
        logger.error(f'Failed to get space "{proxy_url}"')
        return None, 'HTTP error'


def validate_proxy(proxy_url: str, response: requests.Response):
    if not response or response.status_code != 200:
        return None, proxy_url, f"Bad response code: {response.status_code}"
    body = response.text
    if '<h2 id="estimatedwaittimes">' in body or '<h2 id="servergreeting">' in body or '<h2>Service Info</h2>' in body:
        soup = BeautifulSoup(body, 'html.parser')
        pre_elm = soup.find_all('pre')
        # if len(pre_elm) != 1:
        #     return None, proxy_url, f"Incorrect page structure (<pre> count: {len(pre_elm)})"
        pre = pre_elm[-1].text
        try:
            proxy_json = json.loads(pre)
            if not proxy_json.get('endpoints'):
                return None, proxy_url, f'Incorrect page structure (missing "endpoints" in JSON)'
            proxy_json['url'] = proxy_url
            return proxy_json, proxy_url, None
        except Exception as e:
            return None, proxy_url, f'Failed to load JSON for {proxy_url} - {e}'
    else:
        return None, proxy_url, "Incorrect page structure (missing required elements)"


def get_proxy_evulid():
    """Get the proxy evulid."""
    space_name = 'proxy_chub_archive_evulid'
    space_url = 'https://proxy.chub-archive.example.com'
    api_url = f'{space_url}/api/stats'
    try:
        response = scraper.http_queue.add(api_url, handle_err=False).json()
    except Exception as e:
        logger.error(f'Failed to get space "{space_url}" - {e.__class__.__name__}: {e}')
        return None, space_name
    default_model = response['models']['default']
    if not len(list(response['models']['choices'].keys())):
        logger.warning('No model choices for proxy.chub-archive.example.com, skipping.')
        return
    results = prepare_results(response, default_model, space_url)
    parse_space_results(results)


def prepare_results(response: dict, default_model: str, space_url: str) -> dict:
    results = {
        'uptime': response['stats']['uptime'],
        'endpoints': {
            **response['endpoints'],
            'openai': space_url,
        },
        'proompts': response['stats']['proompts_total'],
        'proomptersNow': response['stats']['proompters']['5_min'],
        'config': {
            'gatekeeper': response['config']['gatekeeper'],
        },
        'build': 'cyberes/local-llm-server',
        'gpt4': {
            'proomptersInQueue': response['models']['choices'][default_model]['queued'],
            'estimatedQueueTime': response['models']['choices'][default_model]['estimated_wait']
        },
        'url': space_url,
        'json': response
    }
    return results


def parse_space_results(proxy_json: dict) -> bool:
    proxy_json = copy.deepcopy(proxy_json)
    try:
        proxy_url = remove_http(proxy_json["url"])
        proxy_json['url'] = proxy_url
        data = prepare_data(proxy_json)
        if proxy_json['url'] != 'proxy.chub-archive.example.com':
            with CursorFromConnectionFromPool() as cursor:
                cursor.execute('INSERT INTO proxies (url) VALUES (%s) ON CONFLICT (url) DO NOTHING', (proxy_url,))
                for api_type in extract_api_types(proxy_json):
                    cursor.execute('INSERT INTO api_types (type) VALUES (%s) ON CONFLICT (type) DO NOTHING', (api_type,))
        return ELASTIC_CLIENT.insert_json(data, item_id=base64.urlsafe_b64encode(json.dumps({'url': proxy_url, 'timestamp': proxy_json["timestamp"]}).encode()).decode())
    except Exception as e:
        logger.error(traceback.format_exc())
        raise
