| 1234567891011121314151617181920212223242526272829303132333435363738394041424344454647484950515253545556575859606162636465 |
- """Executable demo: run GetLeagueMatchListSpider for three requests.
- Usage (PowerShell):
- python -m pip install requests pymongo
- $env:DATABANK_DB_URI = "mongodb://localhost:27017"
- $env:DATABANK_DB_NAME = "databank"
- python scripts/test_get_league_match_list.py
- """
- from __future__ import annotations
- import os
- from collections import defaultdict
- from databank.db import MongoDB
- from databank.spiders.get_league_match_list import GetLeagueMatchListSpider
- def pick_tokens(max_tokens: int = 3) -> list[str]:
- """Build up to ``max_tokens`` URL tokens from MongoDB collections."""
- uri = os.getenv("DATABANK_DB_URI", "mongodb://localhost:27017")
- name = os.getenv("DATABANK_DB_NAME", "databank")
- db = MongoDB(uri=uri, name=name)
- db.connect()
- try:
- leagues = db.find("leagues", projection={"_id": 0}, limit=10)
- seasons = db.find("seasons", projection={"_id": 0}, limit=10)
- if not leagues:
- raise RuntimeError("No leagues found. Seed leagues first.")
- if not seasons:
- raise RuntimeError("No seasons found. Seed seasons first.")
- league = sorted(leagues, key=lambda x: x.get("league_id", 0))[0]
- max_round = int(league.get("max_round", 1))
- season_name = seasons[0]["season"]
- tokens: list[str] = []
- rounds = list(range(1, max_round + 1))[:max_tokens]
- for r in rounds:
- tokens.append(f"{league['league_id']}|{season_name}|{r}")
- return tokens[:max_tokens]
- finally:
- db.close()
- def main() -> None:
- """Run the demo and print a compact summary to stdout."""
- spider = GetLeagueMatchListSpider()
- urls = pick_tokens()
- docs = spider.run(urls)
- print(f"Fetched {len(docs)} documents in total.")
- per_token = defaultdict(list)
- for d in docs:
- per_token[d.data.get("token", "unknown")].append(d)
- for token, items in per_token.items():
- print(f"Token: {token}, docs: {len(items)}")
- if items:
- print("Sample:", items[0].data.get("match") or items[0].data)
- if __name__ == "__main__":
- main()
|