seed_seasons_mongo.py 3.2 KB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104
  1. """Seed seasons into a local MongoDB instance with unique ``season``.
  2. Rules:
  3. - initial_year starts from 2016.
  4. - Determine current_year by today's date:
  5. - If today > July 1 (strict), current_year = this_year + 1
  6. - Else, current_year = this_year
  7. - Generate seasons from initial_year up to (but excluding) current_year, as strings
  8. like "YYYY-YYYY+1". Stop when initial_year == current_year.
  9. - Ensure unique index on ``season`` and upsert each document.
  10. Environment variables (with defaults):
  11. - DATABANK_DB_URI (default: mongodb://localhost:27017)
  12. - DATABANK_DB_NAME (default: databank)
  13. Usage (PowerShell):
  14. python -m pip install "pymongo>=4.7"
  15. $env:DATABANK_DB_URI = "mongodb://localhost:27017"
  16. $env:DATABANK_DB_NAME = "databank"
  17. python scripts/seed_seasons_mongo.py
  18. """
  19. from __future__ import annotations
  20. import os
  21. from datetime import date
  22. from typing import Any, Dict, Iterable, List, Tuple
  23. from pymongo import MongoClient
  24. def get_connection() -> Tuple[MongoClient, str]:
  25. """Create a MongoDB client and return ``(client, db_name)``."""
  26. uri = os.getenv("DATABANK_DB_URI", "mongodb://localhost:27017")
  27. name = os.getenv("DATABANK_DB_NAME", "databank")
  28. client = MongoClient(uri)
  29. return client, name
  30. def calc_current_year(today: date | None = None) -> int:
  31. """Calculate current_year per rule (> July 1 => year+1 else year)."""
  32. today = today or date.today()
  33. threshold = date(today.year, 7, 1)
  34. return today.year + 1 if today > threshold else today.year
  35. def generate_seasons(initial_year: int, current_year: int) -> List[Dict[str, Any]]:
  36. """Generate season documents from initial_year to current_year (exclusive)."""
  37. seasons: List[Dict[str, Any]] = []
  38. for start in range(initial_year, current_year):
  39. end = start + 1
  40. seasons.append(
  41. {
  42. "season": f"{start}-{end}",
  43. "start_year": start,
  44. "end_year": end,
  45. }
  46. )
  47. return seasons
  48. def ensure_unique_index_seasons(db) -> None:
  49. """Ensure a unique index on seasons.season."""
  50. seasons = db["seasons"]
  51. seasons.create_index("season", unique=True)
  52. def upsert_seasons(db, docs: Iterable[Dict[str, Any]]) -> Dict[str, int]:
  53. """Upsert provided season docs by ``season``; return counters."""
  54. seasons = db["seasons"]
  55. inserted = 0
  56. updated = 0
  57. for doc in docs:
  58. key = {"season": doc["season"]}
  59. result = seasons.replace_one(key, doc, upsert=True)
  60. if result.matched_count:
  61. updated += int(result.modified_count == 1)
  62. else:
  63. inserted += 1
  64. return {"inserted": inserted, "updated": updated}
  65. def main() -> None:
  66. """Entry point to seed season documents into MongoDB with unique ``season``."""
  67. client, db_name = get_connection()
  68. try:
  69. db = client[db_name]
  70. ensure_unique_index_seasons(db)
  71. initial_year = 2016
  72. current = calc_current_year()
  73. docs = generate_seasons(initial_year, current)
  74. stats = upsert_seasons(db, docs)
  75. print(
  76. f"Seasons seed done: range=[{initial_year}, {current}) "
  77. f"inserted={stats['inserted']}, updated={stats['updated']}"
  78. )
  79. finally:
  80. client.close()
  81. if __name__ == "__main__":
  82. main()