import concurrent.futures
import traceback
from datetime import timezone, datetime
from typing import List, Dict

from scraper import http_queue
from scraper.character_tavern.types import CharTavernNode
from scraper.log import root_logger
from scraper.suicide import watchdog_suicide

_logger = root_logger.get_child('CHAR-TAVERN.SEARCH')


def fetch_page(i: int, direction, auth_header: str) -> Dict:
    _logger.debug(f"Fetching page {i}")
    response = http_queue.add(
        method='POST',
        url='https://search.character-tavern.com/indexes/characters/search',
        headers={
            'Accept': '*/*',
            'Accept-Language': 'en-US,en;q=0.5',
            'Referer': 'https://character-tavern.com/',
            'authorization': auth_header,
            'content-type': 'application/json',
            'x-meilisearch-client': 'Meilisearch JavaScript (v0.50.0)',
            'Origin': 'https://character-tavern.com',
        },
        json_data={
            'q': '',
            'hitsPerPage': 49,
            'sort': [f'createdAt:{direction}'],
            'filter': ['(contentWarnings IS EMPTY OR contentWarnings NOT IN [])'],
            'page': i
        }
    ).json()
    return response


def fetch_character_tavern_sorted(direction: str, auth_header: str, thread_count: int = 10, test_mode: bool = False, since_date: datetime = None) -> List[CharTavernNode]:
    assert direction in ['asc', 'desc']

    # First, fetch page 1 to get totalPages
    first_page = fetch_page(1, direction, auth_header)
    total_pages = first_page.get('totalPages', 1)
    _logger.debug(f"Total pages available: {total_pages}")

    pages = [first_page]

    if test_mode:
        total_pages = min(total_pages, 10)
        _logger.info(f"Test mode: limiting to {total_pages} pages")

    # Fetch remaining pages
    with concurrent.futures.ThreadPoolExecutor(max_workers=thread_count) as executor:
        # Create futures for all remaining pages
        future_to_page = {
            executor.submit(fetch_page, page, direction, auth_header): page
            for page in range(2, total_pages + 1)
        }

        for future in concurrent.futures.as_completed(future_to_page):
            page_number = future_to_page[future]
            try:
                response = future.result()
                if response.get('hits'):
                    pages.append(response)
                else:
                    _logger.warning(f"Page {page_number} returned no hits")

            except Exception as e:
                _logger.critical(f"Page {page_number} generated an exception: {e}\n{traceback.format_exc()}")
                watchdog_suicide()

    results = []
    for page in pages:
        for hit in page['hits']:
            try:
                hit['author'] = hit['path'].split('/')[0]
                results.append(CharTavernNode(**hit))
            except:
                print(hit)
                raise

    # Sort by lastUpdateAt in descending order (most recent first)
    sorted_results = sorted(results, key=lambda node: node.lastUpdateAt, reverse=True)

    # Apply since_date filter if provided
    if since_date:
        # Ensure since_date is timezone-aware if it isn't already
        if since_date.tzinfo is None:
            since_date = since_date.replace(tzinfo=timezone.utc)

        # Filter results to only include nodes updated after since_date
        filtered_results = []
        for node in sorted_results:
            # Handle different possible formats for lastUpdateAt
            if isinstance(node.lastUpdateAt, int):
                # Unix timestamp (seconds since epoch)
                node_date = datetime.fromtimestamp(node.lastUpdateAt, tz=timezone.utc)
            elif isinstance(node.lastUpdateAt, float):
                # Unix timestamp with decimal (milliseconds)
                node_date = datetime.fromtimestamp(node.lastUpdateAt, tz=timezone.utc)
            elif isinstance(node.lastUpdateAt, str):
                # ISO format string
                node_date = datetime.strptime(node.lastUpdateAt, '%Y-%m-%dT%H:%M:%SZ').replace(tzinfo=timezone.utc)
            elif isinstance(node.lastUpdateAt, datetime):
                # Already a datetime object
                node_date = node.lastUpdateAt if node.lastUpdateAt.tzinfo else node.lastUpdateAt.replace(tzinfo=timezone.utc)
            else:
                _logger.warning(f"Unexpected type for lastUpdateAt: {type(node.lastUpdateAt)} with value {node.lastUpdateAt}")
                continue

            if node_date > since_date:
                filtered_results.append(node)

        sorted_results = filtered_results

    _logger.info(f"Successfully fetched {len(pages)} pages with {len(sorted_results)} total results")
    return sorted_results
