"""Executable demo: run GetLeagueMatchListSpider for three requests. Usage (PowerShell): python -m pip install requests pymongo $env:DATABANK_DB_URI = "mongodb://localhost:27017" $env:DATABANK_DB_NAME = "databank" python scripts/test_get_league_match_list.py """ from __future__ import annotations import os from collections import defaultdict from databank.db import MongoDB from databank.spiders.get_league_match_list import GetLeagueMatchListSpider def pick_tokens(max_tokens: int = 3) -> list[str]: """Build up to ``max_tokens`` URL tokens from MongoDB collections.""" uri = os.getenv("DATABANK_DB_URI", "mongodb://localhost:27017") name = os.getenv("DATABANK_DB_NAME", "databank") db = MongoDB(uri=uri, name=name) db.connect() try: leagues = db.find("leagues", projection={"_id": 0}, limit=10) seasons = db.find("seasons", projection={"_id": 0}, limit=10) if not leagues: raise RuntimeError("No leagues found. Seed leagues first.") if not seasons: raise RuntimeError("No seasons found. Seed seasons first.") league = sorted(leagues, key=lambda x: x.get("league_id", 0))[0] max_round = int(league.get("max_round", 1)) season_name = seasons[0]["season"] tokens: list[str] = [] rounds = list(range(1, max_round + 1))[:max_tokens] for r in rounds: tokens.append(f"{league['league_id']}|{season_name}|{r}") return tokens[:max_tokens] finally: db.close() def main() -> None: """Run the demo and print a compact summary to stdout.""" spider = GetLeagueMatchListSpider() urls = pick_tokens() docs = spider.run(urls) print(f"Fetched {len(docs)} documents in total.") per_token = defaultdict(list) for d in docs: per_token[d.data.get("token", "unknown")].append(d) for token, items in per_token.items(): print(f"Token: {token}, docs: {len(items)}") if items: print("Sample:", items[0].data.get("match") or items[0].data) if __name__ == "__main__": main()