#!/usr/bin/env python3
import json
import os
import signal
import sys
import traceback
from concurrent.futures import ThreadPoolExecutor
from pathlib import Path

import pymysql
from tqdm import tqdm

from scraper.log import root_logger

script_dir = os.path.dirname(os.path.realpath(__file__))
sys.path.insert(0, str(Path(script_dir).parent))

from scraper.suicide import signal_handler, watchdog_expired
import scraper
from scraper import globals
from helpers.database import MySQLConnection
from helpers.SQL import INIT_SQL
from helpers.elastic import ELASTIC_CLIENT
from helpers.scrape import parse_space_results

signal.signal(signal.SIGINT, signal_handler)

mysql_conn = MySQLConnection('172.0.3.109', 'proxy_stats', 'bp9xrimxpDspfGhp', 'proxy_stats')
ELASTIC_CLIENT.connect('http://172.0.3.109:9200', 'proxy_stats', 'RVZ1ZlJvd0JaLUdWZ0pKU2tXdHM6aEt1Z2xlb0xRZDZDZFg5YUNOS19hQQ==')
logger = root_logger.get_child('PROXY_STATS.MYSQL_TO_ELASTIC')

def process_row(data):
    if not data:
        return

    json_str, timestamp = data
    if not timestamp or not json_str:
        return

    try:
        if json_str:
            j = json.loads(json_str)
        else:
            return
    except:
        print(json_str)
        raise Exception('FAILED TO LOAD JSON')

    try:
        for k, v in j.items():
            try:
                j[k] = json.loads(v)
            except:
                pass

        j_formatted = j.copy()
        for k, v in j.items():
            parts = k.split('_')
            if parts[0] in ['turbo', 'gpt4', 'gpt4_32k', 'claude', 'palm_bison', 'azure_gpt4', 'azure_gpt4_32k', 'azure_turbo']:
                if not j.get(parts[0]):
                    j_formatted[parts[0]] = {}
                j_formatted[parts[0]][parts[1]] = v
                del j_formatted[k]

        j_formatted['timestamp'] = timestamp
        parse_space_results(j_formatted)
    except:
        print(j)
        raise


def main():
    mysql_conn.execute_query('DROP TABLE proxies')
    mysql_conn.execute_query('DROP TABLE api_types')
    for query in INIT_SQL:
        mysql_conn.execute_query(query)

    ELASTIC_CLIENT.es.indices.delete(index='proxy_stats')
    ELASTIC_CLIENT.create_index()

    tables = mysql_conn.execute_query("SHOW TABLES")

    for table in tqdm(tables, position=0):
        table_name = table[0]

        if table_name == 'proxy_chub_archive_evulid':
            continue

        tqdm.write(table_name)
        try:
            rows = mysql_conn.execute_query(f"SELECT json, timestamp FROM {table_name}")
        except pymysql.err.OperationalError:
            # tqdm.write('NO JSON FOR', table_name)
            continue

        if not rows:
            continue

        with ThreadPoolExecutor(max_workers=25) as executor:
            list(tqdm(executor.map(process_row, rows), total=len(rows), position=1))


if __name__ == '__main__':
    signal.signal(signal.SIGALRM, watchdog_expired)
    try:
        main()
    except:
        traceback.print_exc()
    scraper.http_queue.quit()
