#!/usr/bin/env python3
import argparse
import logging
import os
import signal
import traceback
from datetime import datetime
from pathlib import Path

import yaml

import scraper
from scraper import RequestQueueManager, log
from scraper.config.models import ConfigModel
from scraper.database.connection import Database
from scraper.globals import GLOBALS
from scraper.paths import create_directory, resolve_path
from scraper.risuai.char_runner import run_risuai_char_scrape
from scraper.risuai.fetch import get_characters_list
from scraper.risuai.user_runner import run_risuai_user_scrape
from scraper.suicide import signal_handler, watchdog_expired, watchdog_suicide

signal.signal(signal.SIGINT, signal_handler)

_logger: logging.Logger

_SCRIPT_DIR = os.path.dirname(os.path.realpath(__file__))
_START_TIMESTAMP = datetime.now()


def load_cfg():
    cfg_data = yaml.load(resolve_path(args.config).read_text(), Loader=yaml.FullLoader)
    cfg = ConfigModel(**cfg_data)

    _logger.info(f'Run time limit: {args.run_time_limit} seconds.')
    signal.alarm(args.run_time_limit)

    GLOBALS.log_http_requests = cfg.log_http_requests or args.log_requests
    GLOBALS.request_timeout = (cfg.request_connect_timeout, cfg.request_read_timeout)
    GLOBALS.hashed_data_path = cfg.hashed_data_path

    GLOBALS.proxies = cfg.proxies
    scraper.init_proxy_pool()
    scraper.http_queue.http_queue = RequestQueueManager(
        num_workers_per_proxy=cfg.risuai.http_workers
    )

    return cfg


def main(args):
    global _logger
    if args.debug:
        log_level = logging.DEBUG
    else:
        log_level = logging.INFO
    log.root_logger.init(log_level)
    _logger = log.root_logger.logger

    if args.test:
        _logger.info('Test mode')

    cfg = load_cfg()
    Database.initialise(minconn=1, maxconn=100, host=cfg.database.host, database=cfg.database.database, user=cfg.database.user, password=cfg.database.password)
    create_directory(cfg.hashed_data_path)
    download_threads = cfg.risuai.download_threads

    _logger.info('Finding characters...')
    char_urls = get_characters_list(limit=1 if args.test else None)
    _logger.info(f'Found {len(char_urls)} characters...')

    _logger.info('Downloading characters...')
    char_processed_count, char_new_count, char_modified_count, char_avg_execution, char_found_users = run_risuai_char_scrape(char_urls, download_threads)

    _logger.info('Downloading users...')
    user_processed_count, user_new_count = run_risuai_user_scrape(char_found_users, download_threads)

    _logger.info('-- DONE --')
    _logger.info(f'RISUAI STATUS -- Characters: {char_processed_count}, Users: {user_processed_count}')
    _logger.info(f'--> Characters: {char_processed_count}/{len(char_urls)} ({(char_processed_count / len(char_urls)):,.2%}) processed, {char_modified_count} updated, {char_new_count} new.')
    _logger.info(f'Average char execution time: {round(char_avg_execution, 2)} sec.')


if __name__ == '__main__':
    parser = argparse.ArgumentParser()
    parser.add_argument('--config', default=Path(_SCRIPT_DIR, 'config', 'chub.yml'), help='Path to the config file. Default: config/chub.yml in the config directory next to the script.')
    parser.add_argument('-d', '--debug', action='store_true', help='Enable debug logging.')
    parser.add_argument('-t', '--test', action='store_true')
    parser.add_argument('--log-requests', action='store_true', help='Log all HTTP requests when debugging is enabled.')
    parser.add_argument('--run-time-limit', type=int, default=3600, help='If the program runs longer than this, kill it. This helps prevent cases where the scraper gets stuck. Value in seconds. Default: 3600 (one hour)')
    args = parser.parse_args()

    signal.signal(signal.SIGALRM, watchdog_expired)
    bad = False
    try:
        main(args)
    except:
        traceback.print_exc()
        bad = True
    scraper.http_queue.quit()
    if bad:
        watchdog_suicide()
