| 1234567891011121314151617181920212223242526272829303132333435363738394041424344454647484950515253545556575859 |
- """Run scheduler in full mode: all seasons and rounds for available leagues."""
- from __future__ import annotations
- import os
- from typing import Dict, List
- from databank.db import MongoDB
- from databank.reporter.daily_file import DailyFileReporter
- from databank.spiders.get_league_match_list import GetLeagueMatchListSpider
- from databank.spiders.base import BaseSpider
- from databank.scheduler.orchestrator import DatabankScheduler, TaskProvider
- from databank.tasks.providers import league_matchlist_from_mongo
- from databank.analytics.simple_counts import PerTokenCounter
- essential_env = {
- "DATABANK_DB_URI": "mongodb://localhost:27017",
- "DATABANK_DB_NAME": "databank",
- }
- def main() -> None:
- """Entry point: run the scheduler in full mode (all seasons and rounds)."""
- uri = os.getenv("DATABANK_DB_URI", essential_env["DATABANK_DB_URI"])
- name = os.getenv("DATABANK_DB_NAME", essential_env["DATABANK_DB_NAME"])
- db = MongoDB(uri=uri, name=name)
- db.connect()
- # Spiders
- get_match = GetLeagueMatchListSpider()
- spiders: List[BaseSpider] = [get_match]
- # Reporters
- reporters = [DailyFileReporter(timezone="utc+8")]
- # Task providers wiring (full, no caps)
- tasks_provider: Dict[BaseSpider, TaskProvider] = {
- get_match: league_matchlist_from_mongo(mode="full"),
- }
- # Analyzers
- analyzers = [PerTokenCounter()]
- scheduler = DatabankScheduler(
- db=db,
- spiders=spiders,
- reporters=reporters,
- task_providers=tasks_provider,
- analyzers=analyzers,
- )
- summary = scheduler.run_once()
- print("Scheduler(full) finished. Total persisted:", summary.total_docs)
- if __name__ == "__main__":
- main()
|