test_get_league_match_list.py 2.1 KB

1234567891011121314151617181920212223242526272829303132333435363738394041424344454647484950515253545556575859606162636465
  1. """Executable demo: run GetLeagueMatchListSpider for three requests.
  2. Usage (PowerShell):
  3. python -m pip install requests pymongo
  4. $env:DATABANK_DB_URI = "mongodb://localhost:27017"
  5. $env:DATABANK_DB_NAME = "databank"
  6. python scripts/test_get_league_match_list.py
  7. """
  8. from __future__ import annotations
  9. import os
  10. from collections import defaultdict
  11. from databank.db import MongoDB
  12. from databank.spiders.get_league_match_list import GetLeagueMatchListSpider
  13. def pick_tokens(max_tokens: int = 3) -> list[str]:
  14. """Build up to ``max_tokens`` URL tokens from MongoDB collections."""
  15. uri = os.getenv("DATABANK_DB_URI", "mongodb://localhost:27017")
  16. name = os.getenv("DATABANK_DB_NAME", "databank")
  17. db = MongoDB(uri=uri, name=name)
  18. db.connect()
  19. try:
  20. leagues = db.find("leagues", projection={"_id": 0}, limit=10)
  21. seasons = db.find("seasons", projection={"_id": 0}, limit=10)
  22. if not leagues:
  23. raise RuntimeError("No leagues found. Seed leagues first.")
  24. if not seasons:
  25. raise RuntimeError("No seasons found. Seed seasons first.")
  26. league = sorted(leagues, key=lambda x: x.get("league_id", 0))[0]
  27. max_round = int(league.get("max_round", 1))
  28. season_name = seasons[0]["season"]
  29. tokens: list[str] = []
  30. rounds = list(range(1, max_round + 1))[:max_tokens]
  31. for r in rounds:
  32. tokens.append(f"{league['league_id']}|{season_name}|{r}")
  33. return tokens[:max_tokens]
  34. finally:
  35. db.close()
  36. def main() -> None:
  37. """Run the demo and print a compact summary to stdout."""
  38. spider = GetLeagueMatchListSpider()
  39. urls = pick_tokens()
  40. docs = spider.run(urls)
  41. print(f"Fetched {len(docs)} documents in total.")
  42. per_token = defaultdict(list)
  43. for d in docs:
  44. per_token[d.data.get("token", "unknown")].append(d)
  45. for token, items in per_token.items():
  46. print(f"Token: {token}, docs: {len(items)}")
  47. if items:
  48. print("Sample:", items[0].data.get("match") or items[0].data)
  49. if __name__ == "__main__":
  50. main()