"""Run the full pipeline once: seed tokens -> run spider(s) -> persist -> report -> basic analytics.

Usage (PowerShell):
  # Ensure deps
  # python -m pip install requests pymongo
  # Configure DB if needed
  # $env:DATABANK_DB_URI = "mongodb://localhost:27017"
  # $env:DATABANK_DB_NAME = "databank"
  python scripts/run_pipeline_once.py
"""

from __future__ import annotations

import os
from collections import Counter
from datetime import UTC, datetime
from typing import Dict, List

from databank.db import MongoDB
from databank.reporter.daily_file import DailyFileReporter
from databank.scheduler.simple_runner import SimpleRunner
from databank.spiders.base import BaseSpider, Task
from databank.spiders.get_league_match_list import GetLeagueMatchListSpider
from databank.core.tasks import MatchListTask


def pick_tokens(db: MongoDB, max_tokens: int = 3) -> list[MatchListTask]:
    """Build MatchListTask list from DB seed data (league/season)."""
    leagues = db.find("leagues", projection={"_id": 0}, limit=10)
    seasons = db.find("seasons", projection={"_id": 0}, limit=10)
    if not leagues:
        raise RuntimeError("No leagues found. Seed leagues first.")
    if not seasons:
        raise RuntimeError("No seasons found. Seed seasons first.")
    league = sorted(leagues, key=lambda x: x.get("league_id", 0))[0]
    max_round = int(league.get("max_round", 1))
    season_name = seasons[0]["season"]
    rounds = list(range(1, max_round + 1))[:max_tokens]
    return [
        MatchListTask(
            league_id=int(league["league_id"]), season=season_name, round_no=int(r)
        )
        for r in rounds
    ]


def main() -> None:
    """Run one full pipeline pass using structured tasks and SimpleRunner."""
    uri = os.getenv("DATABANK_DB_URI", "mongodb://localhost:27017")
    name = os.getenv("DATABANK_DB_NAME", "databank")

    db = MongoDB(uri=uri, name=name)
    db.connect()

    reporter = DailyFileReporter(timezone="utc+8")
    runner = SimpleRunner(db=db, reporters=[reporter])

    spider = GetLeagueMatchListSpider()
    tasks = pick_tokens(db)
    spiders: Dict[BaseSpider, List[Task]] = {spider: tasks}
    summary = runner.run(spiders)

    # Basic analytics example: count docs per token for this run (from runner.last_docs)
    per_token = Counter(d.data.get("token", "unknown") for d in runner.last_docs)
    top = per_token.most_common(3)

    print(f"Run finished at {datetime.now(UTC).isoformat()}Z")
    print(f"Total persisted: {summary.total_docs}")
    print("Top tokens (by docs):", top)


if __name__ == "__main__":
    main()