run_analyzers.py 2.3 KB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566
  1. """Run selected analyzers over existing DB data or last run results.
  2. This is a scaffold to wire analyzers end-to-end once implementations are ready.
  3. """
  4. from __future__ import annotations
  5. import os
  6. from typing import Iterable, List
  7. from databank.db import MongoDB
  8. from databank.analytics.base import AnalyticsBase
  9. from databank.analytics.teams import TeamExtractorAnalyzer
  10. from databank.analytics.elo import EloAnalyzer
  11. from databank.analytics.dixon_coles import DixonColesAnalyzer
  12. from databank.analytics.markov_chain import MarkovChainAnalyzer
  13. from databank.analytics.h2h import H2HAnalyzer
  14. from databank.analytics.calibration import CalibrationAnalyzer
  15. from databank.analytics.monte_carlo import SeasonMonteCarloAnalyzer
  16. from databank.analytics.sos import StrengthOfScheduleAnalyzer
  17. def load_data(db: MongoDB, limit: int | None = 2000) -> List[dict]:
  18. """Load match documents from DB as analyzer inputs (scaffold)."""
  19. # NOTE: Adjust projection to include what analyzers need.
  20. return db.find("match", projection={"_id": 0}, limit=limit)
  21. def main() -> None:
  22. """Build analyzers and run them on existing data (skeleton)."""
  23. uri = os.getenv("DATABANK_DB_URI", "mongodb://localhost:27017")
  24. name = os.getenv("DATABANK_DB_NAME", "databank")
  25. db = MongoDB(uri=uri, name=name)
  26. db.connect()
  27. data = load_data(db)
  28. analyzers: Iterable[AnalyticsBase] = [
  29. TeamExtractorAnalyzer(),
  30. EloAnalyzer(),
  31. DixonColesAnalyzer(),
  32. MarkovChainAnalyzer(),
  33. H2HAnalyzer(),
  34. CalibrationAnalyzer(),
  35. StrengthOfScheduleAnalyzer(),
  36. SeasonMonteCarloAnalyzer(),
  37. ]
  38. for analyzer in analyzers:
  39. print(f"Running analyzer: {analyzer.__class__.__name__}")
  40. try:
  41. analyzer.prepare(data)
  42. analyzer.validate(data)
  43. transformed = analyzer.transform(data)
  44. result = analyzer.compute(transformed, db=db, persist=True)
  45. analyzer.finalize(result)
  46. print(f" -> Done: {analyzer.__class__.__name__}")
  47. except NotImplementedError as exc:
  48. print(f" -> Skipped (not implemented): {exc}")
  49. except (RuntimeError, ValueError) as exc: # pragma: no cover - diagnostics only
  50. print(f" -> Error: {type(exc).__name__}: {exc}")
  51. if __name__ == "__main__":
  52. main()