test_analytics_team_elo.py 5.0 KB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149
  1. """Basic unit tests for TeamExtractor and Elo analyzers.
  2. This module uses a simple in-memory FakeDB to validate:
  3. - Team aliases union behavior
  4. - Elo snapshot and history persistence contracts
  5. """
  6. import unittest
  7. from typing import Any, Dict, List
  8. from databank.analytics.teams import TeamExtractorAnalyzer
  9. from databank.analytics.elo import EloAnalyzer
  10. from databank.core.models import Document
  11. class FakeDB:
  12. """A minimal in-memory DB stub implementing insert_many/find for tests."""
  13. def __init__(self) -> None:
  14. self.storage: Dict[str, List[Dict[str, Any]]] = {}
  15. def connect(self) -> None: # for parity
  16. """No-op connect to match real DB interface."""
  17. return None
  18. def insert_many(self, docs: List[Document]) -> int:
  19. """Append documents by kind; return inserted count."""
  20. count = 0
  21. for d in docs:
  22. kind = d.kind
  23. rec = {"_id": d.id, "kind": kind, "data": d.data}
  24. self.storage.setdefault(kind, []).append(rec)
  25. count += 1
  26. return count
  27. def find(
  28. self,
  29. kind: str,
  30. query: Dict[str, Any] | None = None,
  31. _projection=None,
  32. _limit=None,
  33. ): # noqa: A002 - filter naming for parity
  34. """Retrieve documents by kind; supports _id $in filter."""
  35. data = self.storage.get(kind, [])
  36. if not query:
  37. return list(data)
  38. # Support _id $in
  39. ids = None
  40. if "_id" in query and isinstance(query["_id"], dict) and "$in" in query["_id"]:
  41. ids = set(map(str, query["_id"]["$in"]))
  42. if ids is not None:
  43. return [d for d in data if str(d.get("_id")) in ids]
  44. return list(data)
  45. class TestAnalyticsTeamElo(unittest.TestCase):
  46. """Tests for TeamExtractorAnalyzer and EloAnalyzer integrations."""
  47. def test_team_extractor_alias_union(self) -> None:
  48. """Teams analyzer should union new aliases with existing ones."""
  49. db = FakeDB()
  50. # Existing team with alias
  51. db.insert_many(
  52. [
  53. Document(
  54. id="100",
  55. kind="teams",
  56. data={
  57. "team_id": "100",
  58. "name_canonical": "manchester city",
  59. "aliases": ["曼城"],
  60. },
  61. )
  62. ]
  63. )
  64. # Two matches with different alias spellings
  65. matches = [
  66. {
  67. "match": {
  68. "homeTeamId": 100,
  69. "homeTeamName": "Manchester City",
  70. "awayTeamId": 200,
  71. "awayTeamName": "Arsenal",
  72. }
  73. },
  74. {
  75. "match": {
  76. "homeTeamId": 300,
  77. "homeTeamName": "Chelsea",
  78. "awayTeamId": 100,
  79. "awayTeamName": "Man City",
  80. }
  81. },
  82. ]
  83. res = TeamExtractorAnalyzer().compute(matches, db=db)
  84. self.assertIn("upserted", res)
  85. # Check merged aliases contain existing + new
  86. teams = db.find("teams")
  87. # Find all records for _id 100 and pick the latest appended (upsert effect)
  88. mlist = [x for x in teams if str(x.get("_id")) == "100"]
  89. self.assertGreaterEqual(len(mlist), 1)
  90. mrec = mlist[-1]
  91. aliases = set(mrec["data"]["aliases"]) # type: ignore[index]
  92. self.assertTrue({"曼城", "Manchester City", "Man City"}.issubset(aliases))
  93. def test_elo_persist_snapshot_and_history(self) -> None:
  94. """Elo analyzer should persist snapshot and two history entries per match."""
  95. db = FakeDB()
  96. # Two matches: A beats B 1-0, then draw 0-0
  97. matches = [
  98. {
  99. "match": {
  100. "matchId": 1,
  101. "homeTeamId": "A",
  102. "awayTeamId": "B",
  103. "homeScore": 1,
  104. "awayScore": 0,
  105. "elapsedTime": "已完场",
  106. "matchTime": 1_700_000_000,
  107. }
  108. },
  109. {
  110. "match": {
  111. "matchId": 2,
  112. "homeTeamId": "A",
  113. "awayTeamId": "B",
  114. "homeScore": 0,
  115. "awayScore": 0,
  116. "elapsedTime": "已完场",
  117. "matchTime": 1_700_000_100,
  118. }
  119. },
  120. ]
  121. res = EloAnalyzer().compute(matches, db=db, persist=True)
  122. self.assertEqual(res["processed"], 2)
  123. # Snapshot should exist
  124. elo = db.find("elo_ratings")
  125. self.assertTrue(len(elo) >= 2)
  126. # History should have 4 entries (2 matches x 2 teams)
  127. hist = db.find("ratings_history")
  128. self.assertEqual(len(hist), 4)
  129. # Ensure history contains match_id and ts
  130. for h in hist:
  131. self.assertIn("match_id", h["data"]) # type: ignore[index]
  132. self.assertIn("ts", h["data"]) # type: ignore[index]
  133. if __name__ == "__main__":
  134. unittest.main()