| 123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172 |
- """Run scheduler in full mode: all seasons and rounds for available leagues."""
- from __future__ import annotations
- import os
- from typing import Dict, List
- from databank.db import MongoDB
- from databank.reporter.daily_file import DailyFileReporter
- from databank.spiders.get_league_match_list import GetLeagueMatchListSpider
- from databank.spiders.base import BaseSpider
- from databank.scheduler.orchestrator import DatabankScheduler, TaskProvider
- from databank.tasks.providers import league_matchlist_from_mongo
- from databank.analytics.simple_counts import PerTokenCounter
- essential_env = {
- "DATABANK_DB_URI": "mongodb://localhost:27017",
- "DATABANK_DB_NAME": "databank",
- }
- def main() -> None:
- """Entry point: run the scheduler in full mode (all seasons and rounds)."""
- uri = os.getenv("DATABANK_DB_URI", essential_env["DATABANK_DB_URI"])
- name = os.getenv("DATABANK_DB_NAME", essential_env["DATABANK_DB_NAME"])
- db = MongoDB(
- uri=uri,
- name=name,
- indexes={
- "match": [
- {
- "keys": [("match.matchId", 1)],
- "unique": True,
- "name": "uniq_match_matchId",
- }
- ]
- },
- )
- db.connect()
- db.ensure_indexes()
- # Spiders
- get_match = GetLeagueMatchListSpider()
- spiders: List[BaseSpider] = [get_match]
- # Reporters
- reporters = [DailyFileReporter(timezone="utc+8")]
- # Task providers wiring (full, no caps)
- tasks_provider: Dict[BaseSpider, TaskProvider] = {
- get_match: league_matchlist_from_mongo(mode="full"),
- }
- # Analyzers
- analyzers = [PerTokenCounter()]
- scheduler = DatabankScheduler(
- db=db,
- spiders=spiders,
- reporters=reporters,
- task_providers=tasks_provider,
- analyzers=analyzers,
- )
- summary = scheduler.run_once()
- print("Scheduler(full) finished. Total persisted:", summary.total_docs)
- if __name__ == "__main__":
- main()
|