from datetime import datetime, timezone
from typing import List

from bs4 import BeautifulSoup
from dateutil.parser import parse
from pydantic import BaseModel, Field, field_validator


class BooruComment(BaseModel):
    date: datetime
    text: str


class BooruNode(BaseModel):
    bpi: str
    date: datetime
    type: str
    rating: str
    tags: List[str] = Field(default_factory=list)
    comments: List[BooruComment]
    author: str

    class Config:
        json_encoders = {
            datetime: lambda v: v.astimezone(timezone.utc).isoformat()
        }
        validate_assignment = True

    @field_validator('date', mode='before')
    @classmethod
    def parse_date(cls, v):
        if v:
            return v
        else:
            return datetime.fromtimestamp(0)


def parse_booru_node(node: dict):
    html = node['html']
    soup = BeautifulSoup(html, 'html.parser')
    information_div = soup.select('#information')
    spans = [span.text.strip('\xa0') for span in information_div[0].find_all('span')]
    info = {}
    for i, span in enumerate(spans):
        if i % 2:
            k = spans[i - 1].strip(':').lower()
            v = spans[i]
            info[k] = v
    info['date'] = parse(info['date'])
    comments = [{'date': parse(x.select('time')[0]['datetime']), 'text': x.select('.comment-line')[0].text} for x in soup.select('.comment')]
    username = soup.select('.user-name')[0].text
    tags = [x.text for x in soup.select('.tag-name')]
    return BooruNode(**info, comments=comments, author=username, tags=tags)
