#!/usr/bin/env python3
import argparse
import hashlib
import logging
import os
import signal
import traceback
from datetime import datetime
from pathlib import Path

import psycopg2

import scraper
from scraper import log, http_queue
from scraper.config.load import load_cfg
from scraper.database.connection import Database, CursorFromConnectionFromPool
from scraper.helpers import is_service_running
from scraper.suicide import signal_handler, watchdog_expired, watchdog_suicide

signal.signal(signal.SIGINT, signal_handler)

_logger: logging.Logger

_SCRIPT_DIR = os.path.dirname(os.path.realpath(__file__))
_START_TIMESTAMP = datetime.now()


def insert_data(text: str, url: str):
    m = hashlib.md5()
    m.update(text.encode())
    m.update(url.encode())
    id_hash = m.hexdigest()

    with CursorFromConnectionFromPool() as cursor:
        try:
            cursor.execute(
                """
                INSERT INTO aicg_chronicles (id_hash, text, url)
                VALUES (%s, %s, %s);
                """,
                (id_hash, text, url)
            )
            return True
        except psycopg2.errors.UniqueViolation as e:
            if 'duplicate key' in str(e).lower():
                # Duplicate entry, do nothing
                return False
            else:
                # An unexpected error occurred
                raise


def main(args):
    global _logger
    if args.debug:
        log_level = logging.DEBUG
    else:
        log_level = logging.INFO
    log.root_logger.init(log_level)
    _logger = log.root_logger.logger

    if args.exit_when_service:
        if is_service_running(args.exit_when_service):
            _logger.error(f'Service {args.exit_when_service} is running, exiting...')
            return
        else:
            _logger.info(f'Service {args.exit_when_service} is not running, continuing...')

    cfg = load_cfg(args)
    Database.initialise(minconn=1, maxconn=100, host=cfg.database.host, database=cfg.database.database, user=cfg.database.user, password=cfg.database.password)

    aicg_chronicles_url = 'https://rentry.org/aicg_chronicles/raw'
    aicg_chronicles_response = http_queue.add(aicg_chronicles_url)
    aicg_chronicles_is_new = insert_data(aicg_chronicles_response.text, aicg_chronicles_url)
    _logger.info(f'/aicg/ chronicles -- {"new" if aicg_chronicles_is_new else "unchanged"}')

    aicg_chronicles_nasa_url = 'https://rentry.org/aicg_chronicles-NASA/raw'
    aicg_chronicles_nasa_response = http_queue.add(aicg_chronicles_nasa_url)
    aicg_chronicles_nasa_is_new = insert_data(aicg_chronicles_nasa_response.text, aicg_chronicles_nasa_url)
    _logger.info(f'/aicg/ NASA chronicles -- {"new" if aicg_chronicles_nasa_is_new else "unchanged"}')

    aicg_chronicles_nara_url = 'https://rentry.org/aicg_chronicles-NARA/raw'
    aicg_chronicles_nara_response = http_queue.add(aicg_chronicles_nara_url)
    aicg_chronicles_nara_is_new = insert_data(aicg_chronicles_nara_response.text, aicg_chronicles_nara_url)
    _logger.info(f'/aicg/ NARA chronicles -- {"new" if aicg_chronicles_nara_is_new else "unchanged"}')


if __name__ == '__main__':
    parser = argparse.ArgumentParser(description='')
    parser.add_argument('--config', default=Path(_SCRIPT_DIR, 'config', 'chub.yml'), help='Path to the config file. Default: config/chub.yml in the config directory next to the script.')
    parser.add_argument('--pages', type=int, default=205, help='How many pages to parse on desuarchive.')
    parser.add_argument('-d', '--debug', action='store_true', help='Enable debug logging.')
    parser.add_argument('--log-requests', action='store_true', help='Log all HTTP requests when debugging is enabled.')
    parser.add_argument('--run-time-limit', type=int, default=3600, help='If the program runs longer than this, kill it. This helps prevent cases where the scraper gets stuck. Value in seconds. Default: 3600 (one hour)')
    parser.add_argument('--exit-when-service', type=str, default=None, help='If this systemctl service is running, exit. Checks services running in --user mode.')
    args = parser.parse_args()

    signal.signal(signal.SIGALRM, watchdog_expired)
    bad = False
    try:
        main(args)
    except:
        traceback.print_exc()
        bad = True
    scraper.http_queue.quit()
    if bad:
        watchdog_suicide()
