#!/usr/bin/env python3
import argparse
import logging
import os
import signal
import traceback
from datetime import datetime
from pathlib import Path

import yaml

import scraper
from scraper import RequestQueueManager, log
from scraper.config.models import ConfigModel
from scraper.database.connection import Database
from scraper.globals import GLOBALS
from scraper.nyai_me.fetch import fetch_nyai_nodes
from scraper.nyai_me.runners import nyai_run_card_scrape, nyai_process_authors
from scraper.paths import create_directory, resolve_path
from scraper.suicide import signal_handler, watchdog_expired, watchdog_suicide
from scraper.time import calculate_elapsed_time

signal.signal(signal.SIGINT, signal_handler)

_logger: logging.Logger

_SCRIPT_DIR = os.path.dirname(os.path.realpath(__file__))
_START_TIMESTAMP = datetime.now()

# This scraper does not support ccv3 cards and will ignore them


def load_cfg():
    cfg_data = yaml.load(resolve_path(args.config).read_text(), Loader=yaml.FullLoader)
    cfg = ConfigModel(**cfg_data)

    _logger.info(f'Run time limit: {args.run_time_limit} seconds.')
    signal.alarm(args.run_time_limit)

    GLOBALS.log_http_requests = cfg.log_http_requests or args.log_requests
    GLOBALS.request_timeout = (cfg.request_connect_timeout, cfg.request_read_timeout)
    GLOBALS.hashed_data_path = cfg.hashed_data_path

    GLOBALS.proxies = cfg.proxies
    scraper.init_proxy_pool()
    scraper.http_queue.http_queue = RequestQueueManager(
        num_workers_per_proxy=cfg.risuai.http_workers
    )

    return cfg


def main(args):
    global _logger
    if args.debug:
        log_level = logging.DEBUG
    else:
        log_level = logging.INFO
    log.root_logger.init(log_level)
    _logger = log.root_logger.logger

    cfg = load_cfg()
    Database.initialise(minconn=1, maxconn=100, host=cfg.database.host, database=cfg.database.database, user=cfg.database.user, password=cfg.database.password)
    hashed_data_path = create_directory(cfg.hashed_data_path)
    download_threads = cfg.nyai_me.download_threads

    _logger.info(f'Fetching nodes...')
    if args.test:
        _logger.info('Test mode: 1 pages only')
    character_list = fetch_nyai_nodes(download_threads, page_limit=1 if args.test else None)
    _logger.info(f'Got {len(character_list)} characters.')

    scrape_start = datetime.now()
    new_items, updated_items, authors = nyai_run_card_scrape(character_list, hashed_data_path)
    _logger.info(f'-- Card scraping completed in {calculate_elapsed_time(scrape_start)} --'.upper())
    _logger.info(f'New cards: {new_items}. Refreshed cards: {updated_items}')

    _logger.info(f'Scraping {len(authors)} authors.')
    new_authors, updated_authors = nyai_process_authors(authors)
    _logger.info(f'-- User scraping completed in {calculate_elapsed_time(scrape_start)} --'.upper())
    _logger.info(f'New authors: {new_authors}. Updated authors: {updated_authors}')


if __name__ == '__main__':
    parser = argparse.ArgumentParser()
    parser.add_argument('--config', default=Path(_SCRIPT_DIR, 'config', 'chub.yml'), help='Path to the config file. Default: config/chub.yml in the config directory next to the script.')
    parser.add_argument('-d', '--debug', action='store_true', help='Enable debug logging.')
    parser.add_argument('-t', '--test', action='store_true')
    parser.add_argument('--log-requests', action='store_true', help='Log all HTTP requests when debugging is enabled.')
    parser.add_argument('--run-time-limit', type=int, default=3600, help='If the program runs longer than this, kill it. This helps prevent cases where the scraper gets stuck. Value in seconds. Default: 3600 (one hour)')
    args = parser.parse_args()

    signal.signal(signal.SIGALRM, watchdog_expired)
    bad = False
    try:
        main(args)
    except:
        traceback.print_exc()
        bad = True
    scraper.http_queue.quit()
    if bad:
        watchdog_suicide()
