run_scheduler_full.py 1.7 KB

1234567891011121314151617181920212223242526272829303132333435363738394041424344454647484950515253545556575859
  1. """Run scheduler in full mode: all seasons and rounds for available leagues."""
  2. from __future__ import annotations
  3. import os
  4. from typing import Dict, List
  5. from databank.db import MongoDB
  6. from databank.reporter.daily_file import DailyFileReporter
  7. from databank.spiders.get_league_match_list import GetLeagueMatchListSpider
  8. from databank.spiders.base import BaseSpider
  9. from databank.scheduler.orchestrator import DatabankScheduler, TaskProvider
  10. from databank.tasks.providers import league_matchlist_from_mongo
  11. from databank.analytics.simple_counts import PerTokenCounter
  12. essential_env = {
  13. "DATABANK_DB_URI": "mongodb://localhost:27017",
  14. "DATABANK_DB_NAME": "databank",
  15. }
  16. def main() -> None:
  17. """Entry point: run the scheduler in full mode (all seasons and rounds)."""
  18. uri = os.getenv("DATABANK_DB_URI", essential_env["DATABANK_DB_URI"])
  19. name = os.getenv("DATABANK_DB_NAME", essential_env["DATABANK_DB_NAME"])
  20. db = MongoDB(uri=uri, name=name)
  21. db.connect()
  22. # Spiders
  23. get_match = GetLeagueMatchListSpider()
  24. spiders: List[BaseSpider] = [get_match]
  25. # Reporters
  26. reporters = [DailyFileReporter(timezone="utc+8")]
  27. # Task providers wiring (full, no caps)
  28. tasks_provider: Dict[BaseSpider, TaskProvider] = {
  29. get_match: league_matchlist_from_mongo(mode="full"),
  30. }
  31. # Analyzers
  32. analyzers = [PerTokenCounter()]
  33. scheduler = DatabankScheduler(
  34. db=db,
  35. spiders=spiders,
  36. reporters=reporters,
  37. task_providers=tasks_provider,
  38. analyzers=analyzers,
  39. )
  40. summary = scheduler.run_once()
  41. print("Scheduler(full) finished. Total persisted:", summary.total_docs)
  42. if __name__ == "__main__":
  43. main()