#!/usr/bin/env python3
import argparse
import functools
import logging
import os
import signal
import time
import traceback
from concurrent.futures import ThreadPoolExecutor, as_completed
from datetime import datetime
from pathlib import Path

import yaml

import scraper
from scraper import log, RequestQueueManager
from scraper.chub.auth import chub_login
from scraper.chub.chars.process import process_char_node
from scraper.chub.lorebooks.process import process_lore_node
from scraper.chub.nodes import fetch_api_node, scrape_nodes
from scraper.chub.runner import run_chub_scrape
from scraper.chub.users.process import process_chub_user
from scraper.config.models import ConfigModel
from scraper.database.connection import Database
from scraper.globals import GLOBALS
from scraper.helpers import valid_date, is_service_running
from scraper.paths import resolve_path, create_directory
from scraper.suicide import signal_handler, watchdog_expired, watchdog_suicide
from scraper.time import calculate_elapsed_time

SCRIPT_DIR = os.path.dirname(os.path.realpath(__file__))
START_TIMESTAMP = datetime.now()

signal.signal(signal.SIGINT, signal_handler)

logger: logging.Logger


def startup():
    global logger
    if args.debug:
        log_level = logging.DEBUG
    else:
        log_level = logging.INFO
    log.root_logger.init(log_level)
    logger = log.root_logger.logger

    if args.test:
        logger.warning('Test mode enabled!')

    if args.since:
        logger.info(f'Scraping nodes updated after {args.since.astimezone()}')

    if args.exit_when_service:
        if is_service_running(args.exit_when_service):
            logger.error(f'Service {args.exit_when_service} is running, exiting...')
            return
        else:
            logger.info(f'Service {args.exit_when_service} is not running, continuing...')

    cfg_data = yaml.load(resolve_path(args.config).read_text(), Loader=yaml.FullLoader)
    cfg = ConfigModel(**cfg_data)

    logger.info(f'Run time limit: {args.run_time_limit} seconds.')
    signal.alarm(args.run_time_limit)

    GLOBALS.log_http_requests = cfg.log_http_requests or args.log_requests
    GLOBALS.request_timeout = (cfg.request_connect_timeout, cfg.request_read_timeout)
    GLOBALS.hashed_data_path = cfg.hashed_data_path

    GLOBALS.proxies = cfg.proxies
    scraper.init_proxy_pool()
    scraper.http_queue.http_queue = RequestQueueManager(
        num_workers_per_proxy=cfg.chub.http_workers
    )

    if cfg.chub.hide_404s:
        GLOBALS.hide_404s = True

    if args.latest:
        logger.info('Starting with latest updated items')

    return cfg


def main(args):
    cfg = startup()

    Database.initialise(minconn=1, maxconn=100, host=cfg.database.host, database=cfg.database.database, user=cfg.database.user, password=cfg.database.password)

    hashed_data_path = create_directory(cfg.hashed_data_path)

    # =========================================================================================================
    # Get the nodes to scrape.

    char_nodes = []
    lore_nodes = []

    if args.node_name:
        parts = args.node_name.split('/')
        del parts[0]
        node_name = '/'.join(parts)
        if args.node_name.startswith('characters'):
            found = fetch_api_node('characters', node_name)
            if not found:
                logger.critical(f'Node not found: "{node_name}"')
                watchdog_suicide()
            char_nodes = [found['node']]
        elif args.node_name.startswith('lorebooks'):
            found = fetch_api_node('lorebooks', node_name)
            if not found:
                logger.critical(f'Node not found: "{node_name}"')
                watchdog_suicide()
            lore_nodes = [found['node']]
        else:
            logger.error('Prefix your full path with either "characters" or "lorebooks".')
            watchdog_suicide()
        if not len(char_nodes) and not len(lore_nodes):
            logger.error('Failed to find the specified node.')
            watchdog_suicide()
    else:
        logger.info('Logging in...')
        token = chub_login('fluted.entrance@largelargelargelarge.xyz', 'aephee2phaix6Ohvaip9reiyooseegho!!')
        if token is None:
            logger.error('Failed to log in. Continuing unauthenticated.')
            token = "FIX YOUR SHIT, LORE"
            # watchdog_suicide()

        logger.info('Fetching nodes...')
        char_nodes = scrape_nodes('characters', token, args.latest, args.since, test_mode=args.test)
        lore_nodes = scrape_nodes('lorebooks', token, args.latest, args.since, test_mode=args.test)

    logger.info(f'chub.ai returned {len(char_nodes)} character nodes.')
    logger.info(f'chub.ai returned {len(lore_nodes)} lorebook nodes.')

    time.sleep(1)

    # =========================================================================================================
    # Download nodes.

    download_worker_count = cfg.chub.download_threads
    workers_per_node_type = download_worker_count // 2

    logger.info(f'Downloading {len(char_nodes) + len(lore_nodes)} nodes using {len(GLOBALS.proxies)} proxies and {download_worker_count} downloads threads.')

    with ThreadPoolExecutor() as executor:
        char_scraper_future = executor.submit(run_chub_scrape, 'Character', char_nodes, process_char_node, workers_per_node_type)
        lore_scraper_future = executor.submit(run_chub_scrape, 'Lorebook', lore_nodes, process_lore_node, workers_per_node_type)
        char_processed_count, char_new_count, char_modified_count, char_avg_execution, char_found_users = char_scraper_future.result()
        lore_processed_count, lore_new_count, lore_modified_count, lore_avg_execution, lore_found_users = lore_scraper_future.result()

    # Character and lorebook scraping is now done.
    time.sleep(1)

    users_to_scrape = sorted(char_found_users | lore_found_users)
    logger.info(f'Scraping {len(users_to_scrape)} users')
    users_start = datetime.now()
    with ThreadPoolExecutor(max_workers=download_worker_count) as executor:
        futures = {executor.submit(functools.partial(process_chub_user, hashed_data_path), user) for user in users_to_scrape}
        for future in as_completed(futures):
            username, elapsed = future.result()
            if not username:
                # Already handled
                continue
            logger.info(f'User completed in {int(elapsed)}s: {username}')

    user_time_str = calculate_elapsed_time(users_start)
    logger.info(f'-- Users scraping completed in {user_time_str} --'.upper())

    logger.info('-- DONE --')
    total_time_str = calculate_elapsed_time(START_TIMESTAMP)
    logger.info(f'Scraping took {total_time_str}')
    logger.info(f'CHUB STATUS -- Characters: {len(char_nodes)}, Users: {len(users_to_scrape)}, Lorebooks: {len(lore_nodes)}')

    if char_processed_count == 0:
        logger.info(f'--> Characters: none processed.')
    else:
        logger.info(f'--> Characters: {len(char_nodes)}/{char_processed_count} ({(len(char_nodes) / char_processed_count):,.2%}) processed, {char_modified_count} updated, {char_new_count} new.')
    if lore_processed_count == 0:
        logger.info(f'--> Lorebooks: none processed.')
    else:
        logger.info(f'--> Lorebooks: {len(lore_nodes)}/{lore_processed_count} ({(len(lore_nodes) / lore_processed_count):,.2%}) processed, {lore_modified_count} updated, {lore_new_count} new')
    logger.info(f'Average char execution time: {round(char_avg_execution, 2)} sec. Average lore execution time: {round(lore_avg_execution, 2)} sec')


if __name__ == '__main__':
    parser = argparse.ArgumentParser(description='Scrape data from chub.ai.')
    parser.add_argument('--config', default=Path(SCRIPT_DIR, 'config', 'chub.yml'), help='Path to the config file. Default: config/chub.yml in the config directory next to the script.')
    parser.add_argument('--debug', '-d', '-v', action='store_true', help='Debug logging.')
    parser.add_argument('--log-requests', '-r', action='store_true', help='Log all HTTP requests when debugging is enabled.')
    parser.add_argument('--latest', '-l', action='store_true', help='Start with the latest updated items on Chub.')
    parser.add_argument('--test', '-t', action='store_true', help='Test mode: only do 20 characters and 20 lorebooks.')
    parser.add_argument('--since', help='Scrape all cards since this timedelta. Examples: "5 minutes ago", "one week ago", "25 hours", "3 sec"', type=valid_date)
    parser.add_argument('--run-time-limit', type=int, default=43200, help='If the program runs longer than this, kill it. This helps prevent cases where the scraper gets stuck. Value in seconds. Default: 43200 (12 hours)')
    parser.add_argument('--exit-when-service', type=str, default=None, help='If this systemctl service is running, exit. Checks services running in --user mode.')
    parser.add_argument('--node-name', type=str, default=None, help='Only scrape this card.')
    args = parser.parse_args()

    signal.signal(signal.SIGALRM, watchdog_expired)
    bad = False
    try:
        main(args)
    except:
        traceback.print_exc()
        bad = True
    scraper.http_queue.quit()
    if bad:
        watchdog_suicide()
