|
@@ -1,7 +1,8 @@
|
|
|
"""Elo rating analyzer skeleton for football.
|
|
"""Elo rating analyzer skeleton for football.
|
|
|
|
|
|
|
|
Features considered (to implement):
|
|
Features considered (to implement):
|
|
|
-- Base K, goal-difference scaling, home advantage offset, time decay, season reset.
|
|
|
|
|
|
|
+- Base K, goal-difference scaling, home advantage offset,
|
|
|
|
|
+ time decay, season reset.
|
|
|
- Probability mapping and optional calibration as a downstream step.
|
|
- Probability mapping and optional calibration as a downstream step.
|
|
|
"""
|
|
"""
|
|
|
|
|
|
|
@@ -44,6 +45,8 @@ class EloAnalyzer(AnalyticsBase):
|
|
|
home_adv: float = float(kwargs.get("home_adv", 60.0))
|
|
home_adv: float = float(kwargs.get("home_adv", 60.0))
|
|
|
persist: bool = bool(kwargs.get("persist", True))
|
|
persist: bool = bool(kwargs.get("persist", True))
|
|
|
return_expectations: bool = bool(kwargs.get("return_expectations", False))
|
|
return_expectations: bool = bool(kwargs.get("return_expectations", False))
|
|
|
|
|
+ incremental: bool = bool(kwargs.get("incremental", True))
|
|
|
|
|
+ seed_from_snapshot: bool = bool(kwargs.get("seed_from_snapshot", True))
|
|
|
db: Optional[BaseDB] = kwargs.get("db")
|
|
db: Optional[BaseDB] = kwargs.get("db")
|
|
|
|
|
|
|
|
# Helpers
|
|
# Helpers
|
|
@@ -55,12 +58,33 @@ class EloAnalyzer(AnalyticsBase):
|
|
|
|
|
|
|
|
def _get_team(match: dict, side: str) -> Tuple[Optional[str], Optional[str]]:
|
|
def _get_team(match: dict, side: str) -> Tuple[Optional[str], Optional[str]]:
|
|
|
# returns (team_id, team_name)
|
|
# returns (team_id, team_name)
|
|
|
|
|
+ # Try standard side-based keys
|
|
|
id_val = match.get(f"{side}TeamId") or match.get(f"{side}Id")
|
|
id_val = match.get(f"{side}TeamId") or match.get(f"{side}Id")
|
|
|
- name_val = (
|
|
|
|
|
- match.get(f"{side}TeamName")
|
|
|
|
|
- or match.get(f"{side}Name")
|
|
|
|
|
- or (match.get("homeName") if side == "home" else match.get("awayName"))
|
|
|
|
|
- )
|
|
|
|
|
|
|
+ name_val = match.get(f"{side}TeamName") or match.get(f"{side}Name")
|
|
|
|
|
+ # Try host/guest variants
|
|
|
|
|
+ if side == "home":
|
|
|
|
|
+ id_val = id_val or match.get("hostTeamId") or match.get("hostId")
|
|
|
|
|
+ name_val = (
|
|
|
|
|
+ name_val or match.get("hostTeamName") or match.get("hostName")
|
|
|
|
|
+ )
|
|
|
|
|
+ else:
|
|
|
|
|
+ id_val = (
|
|
|
|
|
+ id_val
|
|
|
|
|
+ or match.get("awayTeamId")
|
|
|
|
|
+ or match.get("guestTeamId")
|
|
|
|
|
+ or match.get("guestId")
|
|
|
|
|
+ )
|
|
|
|
|
+ name_val = (
|
|
|
|
|
+ name_val
|
|
|
|
|
+ or match.get("awayTeamName")
|
|
|
|
|
+ or match.get("guestTeamName")
|
|
|
|
|
+ or match.get("guestName")
|
|
|
|
|
+ )
|
|
|
|
|
+ # Final fallback to generic homeName/awayName
|
|
|
|
|
+ if name_val is None:
|
|
|
|
|
+ name_val = (
|
|
|
|
|
+ match.get("homeName") if side == "home" else match.get("awayName")
|
|
|
|
|
+ )
|
|
|
team_id = str(id_val) if id_val is not None else None
|
|
team_id = str(id_val) if id_val is not None else None
|
|
|
if team_id is None and isinstance(name_val, str):
|
|
if team_id is None and isinstance(name_val, str):
|
|
|
team_id = _norm(name_val)
|
|
team_id = _norm(name_val)
|
|
@@ -79,10 +103,37 @@ class EloAnalyzer(AnalyticsBase):
|
|
|
return int(match[k])
|
|
return int(match[k])
|
|
|
except (ValueError, TypeError):
|
|
except (ValueError, TypeError):
|
|
|
continue
|
|
continue
|
|
|
|
|
+ # Fallback: parse from score array or strings like "FT 2:1"
|
|
|
|
|
+ sc = match.get("score")
|
|
|
|
|
+ if isinstance(sc, list):
|
|
|
|
|
+ for s in reversed(sc):
|
|
|
|
|
+ if not isinstance(s, str):
|
|
|
|
|
+ continue
|
|
|
|
|
+ s2 = s.strip()
|
|
|
|
|
+ # Extract score pattern anywhere in the string
|
|
|
|
|
+ m = re.search(r"(\d+)\s*:\s*(\d+)", s2)
|
|
|
|
|
+ if m:
|
|
|
|
|
+ try:
|
|
|
|
|
+ h = int(m.group(1))
|
|
|
|
|
+ a = int(m.group(2))
|
|
|
|
|
+ return h if side == "home" else a
|
|
|
|
|
+ except (ValueError, TypeError):
|
|
|
|
|
+ continue
|
|
|
return None
|
|
return None
|
|
|
|
|
|
|
|
def _get_ts(match: dict) -> float:
|
|
def _get_ts(match: dict) -> float:
|
|
|
# Try common timestamp fields (ms or s)
|
|
# Try common timestamp fields (ms or s)
|
|
|
|
|
+ # 1) Combined date+time if available (e.g., matchDate + matchTime)
|
|
|
|
|
+ md = match.get("matchDate")
|
|
|
|
|
+ mt = match.get("matchTime")
|
|
|
|
|
+ if isinstance(md, str) and isinstance(mt, str) and md and mt:
|
|
|
|
|
+ combo = f"{md.strip()} {mt.strip()}"
|
|
|
|
|
+ for fmt in ("%Y-%m-%d %H:%M:%S", "%Y-%m-%d %H:%M"):
|
|
|
|
|
+ try:
|
|
|
|
|
+ dtc = datetime.strptime(combo, fmt).replace(tzinfo=timezone.utc)
|
|
|
|
|
+ return dtc.timestamp()
|
|
|
|
|
+ except ValueError:
|
|
|
|
|
+ continue
|
|
|
for k in ("matchTime", "startTime", "time", "date", "matchDate"):
|
|
for k in ("matchTime", "startTime", "time", "date", "matchDate"):
|
|
|
if k in match and match[k] is not None:
|
|
if k in match and match[k] is not None:
|
|
|
v = match[k]
|
|
v = match[k]
|
|
@@ -119,6 +170,7 @@ class EloAnalyzer(AnalyticsBase):
|
|
|
# Normalize input
|
|
# Normalize input
|
|
|
items = list(data) if isinstance(data, Iterable) else []
|
|
items = list(data) if isinstance(data, Iterable) else []
|
|
|
matches = []
|
|
matches = []
|
|
|
|
|
+ team_names: Dict[str, str] = {}
|
|
|
for d in items:
|
|
for d in items:
|
|
|
m = None
|
|
m = None
|
|
|
if isinstance(d, dict):
|
|
if isinstance(d, dict):
|
|
@@ -128,13 +180,38 @@ class EloAnalyzer(AnalyticsBase):
|
|
|
if not _finished(m):
|
|
if not _finished(m):
|
|
|
continue
|
|
continue
|
|
|
ts = _get_ts(m)
|
|
ts = _get_ts(m)
|
|
|
|
|
+ # Capture context from original document payload if available (nested or top-level)
|
|
|
|
|
+ payload_ctx = None
|
|
|
|
|
+ token = None
|
|
|
|
|
+ if isinstance(d, dict):
|
|
|
|
|
+ data_dict = d.get("data") if isinstance(d.get("data"), dict) else None
|
|
|
|
|
+ if data_dict:
|
|
|
|
|
+ payload_ctx = data_dict.get("payload")
|
|
|
|
|
+ token = data_dict.get("token")
|
|
|
|
|
+ # Top-level fallbacks
|
|
|
|
|
+ if payload_ctx is None and isinstance(d.get("payload"), dict):
|
|
|
|
|
+ payload_ctx = d.get("payload")
|
|
|
|
|
+ if token is None and isinstance(d.get("token"), str):
|
|
|
|
|
+ token = d.get("token")
|
|
|
mid = m.get("matchId") or m.get("id")
|
|
mid = m.get("matchId") or m.get("id")
|
|
|
- h_id, _ = _get_team(m, "home")
|
|
|
|
|
- a_id, _ = _get_team(m, "away")
|
|
|
|
|
|
|
+ h_id, h_name = _get_team(m, "home")
|
|
|
|
|
+ a_id, a_name = _get_team(m, "away")
|
|
|
hs = _get_score(m, "home")
|
|
hs = _get_score(m, "home")
|
|
|
as_ = _get_score(m, "away")
|
|
as_ = _get_score(m, "away")
|
|
|
if not h_id or not a_id or hs is None or as_ is None:
|
|
if not h_id or not a_id or hs is None or as_ is None:
|
|
|
continue
|
|
continue
|
|
|
|
|
+ if isinstance(h_name, str) and h_name:
|
|
|
|
|
+ team_names.setdefault(h_id, h_name)
|
|
|
|
|
+ if isinstance(a_name, str) and a_name:
|
|
|
|
|
+ team_names.setdefault(a_id, a_name)
|
|
|
|
|
+ # Normalize season/league/round context
|
|
|
|
|
+ league_id = None
|
|
|
|
|
+ season = None
|
|
|
|
|
+ round_no = None
|
|
|
|
|
+ if isinstance(payload_ctx, dict):
|
|
|
|
|
+ league_id = payload_ctx.get("leagueId")
|
|
|
|
|
+ season = payload_ctx.get("seasonName") or payload_ctx.get("season")
|
|
|
|
|
+ round_no = payload_ctx.get("round") or payload_ctx.get("round_no")
|
|
|
matches.append(
|
|
matches.append(
|
|
|
{
|
|
{
|
|
|
"ts": ts,
|
|
"ts": ts,
|
|
@@ -143,12 +220,70 @@ class EloAnalyzer(AnalyticsBase):
|
|
|
"away": a_id,
|
|
"away": a_id,
|
|
|
"hs": hs,
|
|
"hs": hs,
|
|
|
"as": as_,
|
|
"as": as_,
|
|
|
|
|
+ "league_id": league_id,
|
|
|
|
|
+ "season": season,
|
|
|
|
|
+ "round_no": round_no,
|
|
|
|
|
+ "token": token,
|
|
|
}
|
|
}
|
|
|
)
|
|
)
|
|
|
|
|
|
|
|
matches.sort(key=lambda x: x["ts"])
|
|
matches.sort(key=lambda x: x["ts"])
|
|
|
|
|
|
|
|
|
|
+ # Optional incremental skip:
|
|
|
|
|
+ # ignore matches already in ratings_history by match_id
|
|
|
|
|
+ if db and incremental:
|
|
|
|
|
+ mids = sorted(
|
|
|
|
|
+ {rec["match_id"] for rec in matches if rec.get("match_id") is not None}
|
|
|
|
|
+ )
|
|
|
|
|
+ if mids:
|
|
|
|
|
+ try:
|
|
|
|
|
+ db_any: Any = db # dynamic access to find
|
|
|
|
|
+ existing = db_any.find(
|
|
|
|
|
+ "ratings_history",
|
|
|
|
|
+ {"match_id": {"$in": mids}},
|
|
|
|
|
+ projection={"match_id": 1},
|
|
|
|
|
+ limit=None,
|
|
|
|
|
+ )
|
|
|
|
|
+ processed_mids = {doc.get("match_id") for doc in (existing or [])}
|
|
|
|
|
+ matches = [
|
|
|
|
|
+ rec
|
|
|
|
|
+ for rec in matches
|
|
|
|
|
+ if rec.get("match_id") not in processed_mids
|
|
|
|
|
+ ]
|
|
|
|
|
+ except (RuntimeError, ValueError, TypeError, AttributeError):
|
|
|
|
|
+ # Best-effort: if backend doesn't support find, process all
|
|
|
|
|
+ pass
|
|
|
|
|
+
|
|
|
ratings: Dict[str, float] = {}
|
|
ratings: Dict[str, float] = {}
|
|
|
|
|
+ # Seed ratings from existing elo_ratings snapshot for true incremental behavior
|
|
|
|
|
+ if db and seed_from_snapshot:
|
|
|
|
|
+ try:
|
|
|
|
|
+ db_any: Any = db
|
|
|
|
|
+ snaps = db_any.find(
|
|
|
|
|
+ "elo_ratings",
|
|
|
|
|
+ projection={"_id": 1, "team_id": 1, "rating": 1},
|
|
|
|
|
+ limit=None,
|
|
|
|
|
+ )
|
|
|
|
|
+ for s in snaps or []:
|
|
|
|
|
+ # Support both flat and nested shapes
|
|
|
|
|
+ tid = (
|
|
|
|
|
+ s.get("_id")
|
|
|
|
|
+ or s.get("team_id")
|
|
|
|
|
+ or (s.get("data", {}) or {}).get("team_id")
|
|
|
|
|
+ )
|
|
|
|
|
+ r = (
|
|
|
|
|
+ s.get("rating")
|
|
|
|
|
+ if "rating" in s
|
|
|
|
|
+ else (s.get("data", {}) or {}).get("rating")
|
|
|
|
|
+ )
|
|
|
|
|
+ if tid is not None and r is not None:
|
|
|
|
|
+ try:
|
|
|
|
|
+ ratings[str(tid)] = float(r)
|
|
|
|
|
+ except (ValueError, TypeError):
|
|
|
|
|
+ continue
|
|
|
|
|
+ except (RuntimeError, ValueError, TypeError, AttributeError):
|
|
|
|
|
+ # If snapshot read fails, continue with default ratings
|
|
|
|
|
+ pass
|
|
|
expectations = []
|
|
expectations = []
|
|
|
|
|
|
|
|
def _expected(r_home: float, r_away: float) -> float:
|
|
def _expected(r_home: float, r_away: float) -> float:
|
|
@@ -180,19 +315,43 @@ class EloAnalyzer(AnalyticsBase):
|
|
|
ra2 = ra - change
|
|
ra2 = ra - change
|
|
|
ratings[h] = rh2
|
|
ratings[h] = rh2
|
|
|
ratings[a] = ra2
|
|
ratings[a] = ra2
|
|
|
- # history for this match (no id to allow multiple entries)
|
|
|
|
|
|
|
+ # history for this match with deterministic ids for idempotency
|
|
|
|
|
+ league_id = rec.get("league_id")
|
|
|
|
|
+ season = rec.get("season")
|
|
|
|
|
+ round_no = rec.get("round_no")
|
|
|
|
|
+ token = rec.get("token")
|
|
|
|
|
+ common_ctx = {
|
|
|
|
|
+ "match_id": mid,
|
|
|
|
|
+ "ts": ts,
|
|
|
|
|
+ "league_id": league_id,
|
|
|
|
|
+ "season": season,
|
|
|
|
|
+ "round_no": round_no,
|
|
|
|
|
+ "token": token,
|
|
|
|
|
+ }
|
|
|
|
|
+ hid = f"{h}:{mid}" if mid is not None else None
|
|
|
|
|
+ aid = f"{a}:{mid}" if mid is not None else None
|
|
|
history_docs.append(
|
|
history_docs.append(
|
|
|
Document(
|
|
Document(
|
|
|
- id=None,
|
|
|
|
|
|
|
+ id=hid,
|
|
|
kind="ratings_history",
|
|
kind="ratings_history",
|
|
|
- data={"team_id": h, "rating": rh2, "ts": ts, "match_id": mid},
|
|
|
|
|
|
|
+ data={
|
|
|
|
|
+ "team_id": h,
|
|
|
|
|
+ "team_name": team_names.get(h),
|
|
|
|
|
+ "rating": rh2,
|
|
|
|
|
+ **common_ctx,
|
|
|
|
|
+ },
|
|
|
)
|
|
)
|
|
|
)
|
|
)
|
|
|
history_docs.append(
|
|
history_docs.append(
|
|
|
Document(
|
|
Document(
|
|
|
- id=None,
|
|
|
|
|
|
|
+ id=aid,
|
|
|
kind="ratings_history",
|
|
kind="ratings_history",
|
|
|
- data={"team_id": a, "rating": ra2, "ts": ts, "match_id": mid},
|
|
|
|
|
|
|
+ data={
|
|
|
|
|
+ "team_id": a,
|
|
|
|
|
+ "team_name": team_names.get(a),
|
|
|
|
|
+ "rating": ra2,
|
|
|
|
|
+ **common_ctx,
|
|
|
|
|
+ },
|
|
|
)
|
|
)
|
|
|
)
|
|
)
|
|
|
if return_expectations:
|
|
if return_expectations:
|
|
@@ -206,7 +365,15 @@ class EloAnalyzer(AnalyticsBase):
|
|
|
}
|
|
}
|
|
|
)
|
|
)
|
|
|
|
|
|
|
|
- result: Dict[str, Any] = {"ratings": ratings, "processed": len(matches)}
|
|
|
|
|
|
|
+ # Compute ranks across all teams after processing
|
|
|
|
|
+ ranked = sorted(ratings.items(), key=lambda x: x[1], reverse=True)
|
|
|
|
|
+ ranks: Dict[str, int] = {tid: idx + 1 for idx, (tid, _) in enumerate(ranked)}
|
|
|
|
|
+
|
|
|
|
|
+ result: Dict[str, Any] = {
|
|
|
|
|
+ "ratings": ratings,
|
|
|
|
|
+ "processed": len(matches),
|
|
|
|
|
+ "ranks": ranks,
|
|
|
|
|
+ }
|
|
|
if return_expectations:
|
|
if return_expectations:
|
|
|
result["expectations"] = expectations
|
|
result["expectations"] = expectations
|
|
|
|
|
|
|
@@ -215,7 +382,14 @@ class EloAnalyzer(AnalyticsBase):
|
|
|
if ratings:
|
|
if ratings:
|
|
|
docs = [
|
|
docs = [
|
|
|
Document(
|
|
Document(
|
|
|
- id=tid, kind="elo_ratings", data={"team_id": tid, "rating": r}
|
|
|
|
|
|
|
+ id=tid,
|
|
|
|
|
+ kind="elo_ratings",
|
|
|
|
|
+ data={
|
|
|
|
|
+ "team_id": tid,
|
|
|
|
|
+ "team_name": team_names.get(tid),
|
|
|
|
|
+ "rating": r,
|
|
|
|
|
+ "rank": ranks.get(tid),
|
|
|
|
|
+ },
|
|
|
)
|
|
)
|
|
|
for tid, r in ratings.items()
|
|
for tid, r in ratings.items()
|
|
|
]
|
|
]
|