"""Run scheduler in full mode: all seasons and rounds for available leagues.""" from __future__ import annotations import os from typing import Dict, List from databank.db import MongoDB from databank.reporter.daily_file import DailyFileReporter from databank.spiders.get_league_match_list import GetLeagueMatchListSpider from databank.spiders.base import BaseSpider from databank.scheduler.orchestrator import DatabankScheduler, TaskProvider from databank.tasks.providers import league_matchlist_from_mongo from databank.analytics.simple_counts import PerTokenCounter essential_env = { "DATABANK_DB_URI": "mongodb://localhost:27017", "DATABANK_DB_NAME": "databank", } def main() -> None: """Entry point: run the scheduler in full mode (all seasons and rounds).""" uri = os.getenv("DATABANK_DB_URI", essential_env["DATABANK_DB_URI"]) name = os.getenv("DATABANK_DB_NAME", essential_env["DATABANK_DB_NAME"]) db = MongoDB( uri=uri, name=name, indexes={ "match": [ { "keys": [("match.matchId", 1)], "unique": True, "name": "uniq_match_matchId", } ] }, ) db.connect() db.ensure_indexes() # Spiders get_match = GetLeagueMatchListSpider() spiders: List[BaseSpider] = [get_match] # Reporters reporters = [DailyFileReporter(timezone="utc+8")] # Task providers wiring (full, no caps) tasks_provider: Dict[BaseSpider, TaskProvider] = { get_match: league_matchlist_from_mongo(mode="full"), } # Analyzers analyzers = [PerTokenCounter()] scheduler = DatabankScheduler( db=db, spiders=spiders, reporters=reporters, task_providers=tasks_provider, analyzers=analyzers, ) summary = scheduler.run_once() print("Scheduler(full) finished. Total persisted:", summary.total_docs) if __name__ == "__main__": main()