|
|
@@ -7,7 +7,13 @@ Idea:
|
|
|
|
|
|
from __future__ import annotations
|
|
|
|
|
|
-from typing import Any
|
|
|
+from typing import Any, Dict, Iterable, Optional, Tuple
|
|
|
+import re
|
|
|
+import time as _t
|
|
|
+import calendar as _cal
|
|
|
+
|
|
|
+from databank.core.models import Document
|
|
|
+from databank.db.base import BaseDB
|
|
|
|
|
|
from .base import AnalyticsBase
|
|
|
|
|
|
@@ -25,4 +31,219 @@ class H2HAnalyzer(AnalyticsBase):
|
|
|
Returns:
|
|
|
A structure keyed by (team_a, team_b) with stats and optional rating.
|
|
|
"""
|
|
|
- raise NotImplementedError("H2H analyzer not implemented yet")
|
|
|
+
|
|
|
+ persist: bool = bool(kwargs.get("persist", True))
|
|
|
+ db: Optional[BaseDB] = kwargs.get("db")
|
|
|
+ recent_window: int = int(kwargs.get("recent_window", 20))
|
|
|
+ separate_home_away: bool = bool(kwargs.get("separate_home_away", False))
|
|
|
+ group_by: str = str(kwargs.get("group_by", "league"))
|
|
|
+
|
|
|
+ def _get_ts(match: dict) -> int:
|
|
|
+ for k in ("timestamp", "ts", "kickoffTs", "timeTs"):
|
|
|
+ v = match.get(k)
|
|
|
+ if isinstance(v, (int, float)):
|
|
|
+ return int(v)
|
|
|
+ date_s = (
|
|
|
+ match.get("matchDate") or match.get("date") or match.get("gameDate")
|
|
|
+ )
|
|
|
+ time_s = (
|
|
|
+ match.get("matchTime") or match.get("time") or match.get("gameTime")
|
|
|
+ )
|
|
|
+ if isinstance(date_s, str) and isinstance(time_s, str):
|
|
|
+ try:
|
|
|
+ y, m, d = [int(x) for x in re.split(r"[-/]", date_s.strip())[:3]]
|
|
|
+ hh, mm = [int(x) for x in time_s.strip().split(":")[:2]]
|
|
|
+ try:
|
|
|
+ struct = _t.struct_time((y, m, d, hh, mm, 0, 0, 0, 0))
|
|
|
+ return int(_cal.timegm(struct))
|
|
|
+ except (OverflowError, ValueError):
|
|
|
+ return 0
|
|
|
+ except (ValueError, TypeError):
|
|
|
+ return 0
|
|
|
+ return 0
|
|
|
+
|
|
|
+ def _get_team(match: dict, side: str) -> Optional[str]:
|
|
|
+ id_val = match.get(f"{side}TeamId") or match.get(f"{side}Id")
|
|
|
+ if side == "home":
|
|
|
+ id_val = id_val or match.get("hostTeamId") or match.get("hostId")
|
|
|
+ else:
|
|
|
+ id_val = (
|
|
|
+ id_val
|
|
|
+ or match.get("awayTeamId")
|
|
|
+ or match.get("guestTeamId")
|
|
|
+ or match.get("guestId")
|
|
|
+ )
|
|
|
+ return str(id_val) if id_val is not None else None
|
|
|
+
|
|
|
+ def _get_score(match: dict, side: str) -> Optional[int]:
|
|
|
+ keys = [
|
|
|
+ f"{side}Score",
|
|
|
+ f"{side}Goals",
|
|
|
+ f"{side}Goal",
|
|
|
+ f"{'hostScore' if side=='home' else 'guestScore'}",
|
|
|
+ ]
|
|
|
+ for k in keys:
|
|
|
+ if k in match and match[k] is not None:
|
|
|
+ try:
|
|
|
+ return int(match[k])
|
|
|
+ except (ValueError, TypeError):
|
|
|
+ continue
|
|
|
+ sc = match.get("score")
|
|
|
+ if isinstance(sc, list):
|
|
|
+ for s in reversed(sc):
|
|
|
+ if not isinstance(s, str):
|
|
|
+ continue
|
|
|
+ s2 = s.strip()
|
|
|
+ m = re.search(r"(\d+)\s*:\s*(\d+)", s2)
|
|
|
+ if m:
|
|
|
+ try:
|
|
|
+ h = int(m.group(1))
|
|
|
+ a = int(m.group(2))
|
|
|
+ return h if side == "home" else a
|
|
|
+ except (ValueError, TypeError):
|
|
|
+ pass
|
|
|
+ return None
|
|
|
+
|
|
|
+ def _norm_season(s: Optional[str]) -> Optional[str]:
|
|
|
+ if not isinstance(s, str):
|
|
|
+ return s
|
|
|
+ s2 = s.strip()
|
|
|
+ # Try to extract canonical season like '2024-2025' or '2024'
|
|
|
+ m = re.search(r"(20\d{2})\s*[-/~––]\s*(20\d{2})", s2)
|
|
|
+ if m:
|
|
|
+ return f"{m.group(1)}-{m.group(2)}"
|
|
|
+ m2 = re.search(r"(19\d{2}|20\d{2})", s2)
|
|
|
+ if m2:
|
|
|
+ return m2.group(1)
|
|
|
+ return s2
|
|
|
+
|
|
|
+ def _context(doc: dict) -> Tuple[Optional[str], Optional[str]]:
|
|
|
+ payload = None
|
|
|
+ if isinstance(doc, dict):
|
|
|
+ data_dict = (
|
|
|
+ doc.get("data") if isinstance(doc.get("data"), dict) else None
|
|
|
+ )
|
|
|
+ if data_dict:
|
|
|
+ payload = data_dict.get("payload")
|
|
|
+ if payload is None and isinstance(doc.get("payload"), dict):
|
|
|
+ payload = doc.get("payload")
|
|
|
+ if isinstance(payload, dict):
|
|
|
+ league_id = payload.get("leagueId")
|
|
|
+ season = payload.get("seasonName") or payload.get("season")
|
|
|
+ lid = str(league_id) if league_id is not None else None
|
|
|
+ s = _norm_season(str(season)) if season is not None else None
|
|
|
+ return (lid, s)
|
|
|
+ return (None, None)
|
|
|
+
|
|
|
+ def _group_key(doc: dict) -> str:
|
|
|
+ if group_by == "league_season":
|
|
|
+ lid, s = _context(doc)
|
|
|
+ return f"{lid or 'NA'}::{s or 'NA'}"
|
|
|
+ if group_by == "league":
|
|
|
+ lid, _s = _context(doc)
|
|
|
+ return f"{lid or 'NA'}"
|
|
|
+ return "global"
|
|
|
+
|
|
|
+ # Collect finished matches
|
|
|
+ items = list(data) if isinstance(data, Iterable) else []
|
|
|
+ rows: Dict[str, list[dict]] = {}
|
|
|
+ for d in items:
|
|
|
+ m = None
|
|
|
+ if isinstance(d, dict):
|
|
|
+ m = d.get("match") or d.get("data", {}).get("match")
|
|
|
+ if not isinstance(m, dict):
|
|
|
+ continue
|
|
|
+ hs = _get_score(m, "home")
|
|
|
+ as_ = _get_score(m, "away")
|
|
|
+ if hs is None or as_ is None:
|
|
|
+ continue
|
|
|
+ h = _get_team(m, "home")
|
|
|
+ a = _get_team(m, "away")
|
|
|
+ if not h or not a:
|
|
|
+ continue
|
|
|
+ gk = _group_key(d)
|
|
|
+ key = (
|
|
|
+ f"{gk}:{h}:{a}"
|
|
|
+ if separate_home_away
|
|
|
+ else f"{gk}:{':'.join(sorted([h,a]))}"
|
|
|
+ )
|
|
|
+ rows.setdefault(key, []).append(
|
|
|
+ {
|
|
|
+ "group": gk,
|
|
|
+ "home": h,
|
|
|
+ "away": a,
|
|
|
+ "hs": int(hs),
|
|
|
+ "as": int(as_),
|
|
|
+ "ts": _get_ts(m),
|
|
|
+ }
|
|
|
+ )
|
|
|
+
|
|
|
+ # Aggregate per key
|
|
|
+ summaries: Dict[str, dict] = {}
|
|
|
+ docs: list[Document] = []
|
|
|
+ for key, lst in rows.items():
|
|
|
+ lst_sorted = sorted(lst, key=lambda r: r["ts"]) # ascending
|
|
|
+ n = len(lst_sorted)
|
|
|
+ gf = sum(r["hs"] for r in lst_sorted)
|
|
|
+ ga = sum(r["as"] for r in lst_sorted)
|
|
|
+ # From perspective of the listed order (home team first if separate_home_away)
|
|
|
+ # Define team1/team2 for symmetric key
|
|
|
+ parts = key.split(":")
|
|
|
+ gk = parts[0]
|
|
|
+ if separate_home_away:
|
|
|
+ team1, team2 = parts[1], parts[2]
|
|
|
+ else:
|
|
|
+ # Recover team ids from first row
|
|
|
+ team1 = lst_sorted[0]["home"]
|
|
|
+ team2 = lst_sorted[0]["away"]
|
|
|
+
|
|
|
+ wins = sum(1 for r in lst_sorted if r["hs"] > r["as"])
|
|
|
+ draws = sum(1 for r in lst_sorted if r["hs"] == r["as"])
|
|
|
+ losses = n - wins - draws
|
|
|
+ recent = lst_sorted[-recent_window:] if recent_window > 0 else lst_sorted
|
|
|
+ rw = sum(1 for r in recent if r["hs"] > r["as"])
|
|
|
+ rd = sum(1 for r in recent if r["hs"] == r["as"])
|
|
|
+ rl = len(recent) - rw - rd
|
|
|
+ rgf = sum(r["hs"] for r in recent)
|
|
|
+ rga = sum(r["as"] for r in recent)
|
|
|
+
|
|
|
+ summary = {
|
|
|
+ "group": gk,
|
|
|
+ "team1": team1,
|
|
|
+ "team2": team2,
|
|
|
+ "matches": n,
|
|
|
+ "wins": wins,
|
|
|
+ "draws": draws,
|
|
|
+ "losses": losses,
|
|
|
+ "goals_for": gf,
|
|
|
+ "goals_against": ga,
|
|
|
+ "recent_window": int(recent_window),
|
|
|
+ "recent": {
|
|
|
+ "matches": len(recent),
|
|
|
+ "wins": rw,
|
|
|
+ "draws": rd,
|
|
|
+ "losses": rl,
|
|
|
+ "goals_for": rgf,
|
|
|
+ "goals_against": rga,
|
|
|
+ },
|
|
|
+ "separate_home_away": bool(separate_home_away),
|
|
|
+ }
|
|
|
+
|
|
|
+ summaries[key] = summary
|
|
|
+ if persist and db:
|
|
|
+ docs.append(
|
|
|
+ Document(
|
|
|
+ id=f"{key}:sum",
|
|
|
+ kind="h2h_summary",
|
|
|
+ data=summary,
|
|
|
+ )
|
|
|
+ )
|
|
|
+
|
|
|
+ if persist and db and docs:
|
|
|
+ db.insert_many(docs)
|
|
|
+
|
|
|
+ return {
|
|
|
+ "pairs": list(summaries.keys()),
|
|
|
+ "summaries": summaries,
|
|
|
+ "persisted": len(docs) if (persist and db) else 0,
|
|
|
+ }
|