3 meses atrás · 65e72b89ba
--- a/scripts/run_analyzers.py
+++ b/scripts/run_analyzers.py
@@ -88,13 +88,59 @@ def main() -> None:
 
				         season_mc_cls(),
			
 
				     ]
			
 
				 
			
 
				+    # Prepare optional DC config from environment
			
 
				+    def _env_float(name: str, default: float) -> float:
			
 
				+        try:
			
 
				+            return (
			
 
				+                float(os.getenv(name, "")) if os.getenv(name) is not None else default
			
 
				+            )
			
 
				+        except ValueError:
			
 
				+            return default
			
 
				+
			
 
				+    def _env_int(name: str, default: int) -> int:
			
 
				+        try:
			
 
				+            return int(os.getenv(name, "")) if os.getenv(name) is not None else default
			
 
				+        except ValueError:
			
 
				+            return default
			
 
				+
			
 
				+    def _env_rho_range(name: str, default: tuple[float, float]) -> tuple[float, float]:
			
 
				+        s = os.getenv(name)
			
 
				+        if not s:
			
 
				+            return default
			
 
				+        try:
			
 
				+            lo_str, hi_str = s.split(",", 1)
			
 
				+            return float(lo_str), float(hi_str)
			
 
				+        except (ValueError, TypeError):
			
 
				+            return default
			
 
				+
			
 
				+    dc_kwargs = {
			
 
				+        "halflife_days": _env_float("DATABANK_DC_HALFLIFE_DAYS", 180.0),
			
 
				+        "rho_range": _env_rho_range("DATABANK_DC_RHO_RANGE", (-0.3, 0.3)),
			
 
				+        "rho_step": _env_float("DATABANK_DC_RHO_STEP", 0.01),
			
 
				+        "max_iters": _env_int("DATABANK_DC_MAX_ITERS", 20),
			
 
				+        "tol": _env_float("DATABANK_DC_TOL", 1e-4),
			
 
				+    }
			
 
				+
			
 
				+    # Optional history configuration for DC
			
 
				+    history_mode = os.getenv("DATABANK_DC_HISTORY", "none").strip().lower()
			
 
				+    if history_mode in {"none", "predictions", "snapshots"}:
			
 
				+        dc_kwargs["history"] = history_mode
			
 
				+    dc_kwargs["snapshot_every"] = _env_int("DATABANK_DC_SNAPSHOT_EVERY", 10)
			
 
				+    dc_kwargs["max_iters_history"] = _env_int("DATABANK_DC_MAX_ITERS_HISTORY", 10)
			
 
				+    dc_kwargs["max_goals"] = _env_int("DATABANK_DC_MAX_GOALS", 8)
			
 
				+
			
 
				     for analyzer in analyzers:
			
 
				         print(f"Running analyzer: {analyzer.__class__.__name__}")
			
 
				         try:
			
 
				             analyzer.prepare(data)
			
 
				             analyzer.validate(data)
			
 
				             transformed = analyzer.transform(data)
			
 
				-            result = analyzer.compute(transformed, db=db, persist=True)
			
 
				+            if isinstance(analyzer, dixon_coles_cls):
			
 
				+                # Pass DC-specific kwargs from environment
			
 
				+                result = analyzer.compute(transformed, db=db, persist=True, **dc_kwargs)
			
 
				+                print("    DC config:", dc_kwargs)
			
 
				+            else:
			
 
				+                result = analyzer.compute(transformed, db=db, persist=True)
			
 
				             analyzer.finalize(result)
			
 
				             print(f" -> Done: {analyzer.__class__.__name__}")
			
 
				 
			
@@ -124,6 +170,13 @@ def main() -> None:
 
				                     print(
			
 
				                         f"    DC matches_used: {mu}; persisted docs in this run: {persisted}"
			
 
				                     )
			
 
				+                # Optional extra collections
			
 
				+                preds_cnt = _safe_count(db, "dc_predictions")
			
 
				+                snaps_cnt = _safe_count(db, "dc_params_history")
			
 
				+                if preds_cnt:
			
 
				+                    print("    DC predictions count:", preds_cnt)
			
 
				+                if snaps_cnt:
			
 
				+                    print("    DC params history count:", snaps_cnt)
			
 
				         except NotImplementedError as exc:
			
 
				             print(f" -> Skipped (not implemented): {exc}")
			
 
				         except (RuntimeError, ValueError) as exc:  # pragma: no cover - diagnostics only
			
--- a/src/databank/analytics/dixon_coles.py
+++ b/src/databank/analytics/dixon_coles.py
@@ -13,10 +13,11 @@ from typing import Any, Dict, Iterable, Optional, Tuple
 
				 from collections import defaultdict
			
 
				 import re
			
 
				 import math
			
 
				+import time as _t
			
 
				+import calendar as _cal
			
 
				 
			
 
				 from databank.core.models import Document
			
 
				 from databank.db.base import BaseDB
			
 
				-
			
 
				 from .base import AnalyticsBase
			
 
				 
			
 
				 
			
@@ -24,26 +25,57 @@ class DixonColesAnalyzer(AnalyticsBase):
 
				     """Estimate DC parameters with time-decayed likelihood and rho correlation."""
			
 
				 
			
 
				     def compute(self, data: Any, **kwargs: Any) -> Any:  # noqa: D401
			
 
				-        """Fit/estimate Poisson-style parameters with DC correlation and decay.
			
 
				+        """Fit Poisson-style parameters with DC correlation and time decay.
			
 
				 
			
 
				         Args:
			
 
				-            data: Iterable of match-like docs (same shape as Elo input).
			
 
				-            **kwargs: group_by ("league_season"|"global"), persist (bool), db (BaseDB),
			
 
				-                halflife_days (float, default 180), rho_range (tuple[float,float], default (-0.3,0.3)),
			
 
				-                rho_step (float, default 0.01), max_iters (int, default 20), tol (float, default 1e-4).
			
 
				-
			
 
				-        Returns:
			
 
				-            dict with per-group parameters summary. If persist=True, writes
			
 
				-            documents to 'dc_params'.
			
 
				+            data: Iterable of match-like docs (same as Elo input).
			
 
				+            group_by: "league_season" (default) or "global".
			
 
				+            persist: Whether to persist results (default True).
			
 
				+            db: Database handle when persist=True.
			
 
				+            halflife_days: Time-decay half-life in days (default 180).
			
 
				+            rho_range: Search interval for rho, e.g. (-0.3, 0.3).
			
 
				+            rho_step: Grid step for rho search (default 0.01).
			
 
				+            max_iters: IPF max iterations (default 20).
			
 
				+            tol: IPF convergence tolerance (default 1e-4).
			
 
				+            optimize: "ipf" (default) or "coord" (joint log-domain updates).
			
 
				+            l2_attack: L2 regularization on log attack (default 0.0).
			
 
				+            l2_defense: L2 regularization on log defense (default 0.0).
			
 
				+            l2_base: L2 regularization on log base means (default 0.0).
			
 
				+            step_size: Learning rate for "coord" optimizer (default 0.1).
			
 
				+            outer_iters: Max outer iterations for "coord" (default 10).
			
 
				+            history: "none" (default), "predictions" (per-match), or "snapshots".
			
 
				+                - predictions: persist pre-match H/D/A probs and mu/nu for each match.
			
 
				+                - snapshots: persist team parameter snapshots at cutoffs.
			
 
				+            snapshot_every: For snapshots, persist every Nth match (default 10).
			
 
				+            max_iters_history: Iterations to use for history fits (default max_iters//2).
			
 
				+            max_goals: Max goals cap for probability table (default 8).
			
 
				         """
			
 
				+
			
 
				         group_by = str(kwargs.get("group_by", "league_season"))
			
 
				         persist = bool(kwargs.get("persist", True))
			
 
				         db: Optional[BaseDB] = kwargs.get("db")
			
 
				         halflife_days: float = float(kwargs.get("halflife_days", 180.0))
			
 
				-        rho_range: Tuple[float, float] = tuple(kwargs.get("rho_range", (-0.3, 0.3)))  # type: ignore[assignment]
			
 
				+        rho_range: Tuple[float, float] = tuple(
			
 
				+            kwargs.get("rho_range", (-0.3, 0.3))
			
 
				+        )  # type: ignore[assignment]
			
 
				         rho_step: float = float(kwargs.get("rho_step", 0.01))
			
 
				         max_iters: int = int(kwargs.get("max_iters", 20))
			
 
				         tol: float = float(kwargs.get("tol", 1e-4))
			
 
				+        optimize: str = str(kwargs.get("optimize", "ipf"))
			
 
				+        l2_attack: float = float(kwargs.get("l2_attack", 0.0))
			
 
				+        l2_defense: float = float(kwargs.get("l2_defense", 0.0))
			
 
				+        l2_base: float = float(kwargs.get("l2_base", 0.0))
			
 
				+        step_size: float = float(kwargs.get("step_size", 0.1))
			
 
				+        outer_iters: int = int(kwargs.get("outer_iters", 10))
			
 
				+        history: str = str(kwargs.get("history", "none"))
			
 
				+        snapshot_every: int = int(kwargs.get("snapshot_every", 10))
			
 
				+        max_iters_history: Optional[int] = kwargs.get("max_iters_history")
			
 
				+        if max_iters_history is not None:
			
 
				+            try:
			
 
				+                max_iters_history = int(max_iters_history)
			
 
				+            except (ValueError, TypeError):
			
 
				+                max_iters_history = None
			
 
				+        max_goals: int = int(kwargs.get("max_goals", 8))
			
 
				 
			
 
				         # Helpers
			
 
				         def _get_ts(match: dict) -> int:
			
@@ -64,11 +96,8 @@ class DixonColesAnalyzer(AnalyticsBase):
 
				                 try:
			
 
				                     y, m, d = [int(x) for x in re.split(r"[-/]", date_s.strip())[:3]]
			
 
				                     hh, mm = [int(x) for x in time_s.strip().split(":")[:2]]
			
 
				-                    # naive epoch assume UTC; avoid importing datetime to keep deps minimal
			
 
				-                    # Use a rough conversion: days since epoch * 86400 + seconds
			
 
				+                    # naive epoch assume UTC; rough conversion using calendar.timegm
			
 
				                     # Here we fallback to 0 if parsing fails in any step.
			
 
				-                    import time as _t
			
 
				-                    import calendar as _cal
			
 
				 
			
 
				                     try:
			
 
				                         struct = _t.struct_time((y, m, d, hh, mm, 0, 0, 0, 0))
			
@@ -179,12 +208,17 @@ class DixonColesAnalyzer(AnalyticsBase):
 
				                 }
			
 
				             )
			
 
				 
			
 
				-        def _fit_group(rows: list[dict]) -> tuple[dict, dict]:
			
 
				+        def _fit_group(
			
 
				+            rows: list[dict],
			
 
				+            max_iters_override: Optional[int] = None,
			
 
				+            outer_iters_override: Optional[int] = None,
			
 
				+            ref_ts: Optional[int] = None,
			
 
				+        ) -> tuple[dict, dict]:
			
 
				             if not rows:
			
 
				                 return {}, {"matches": 0}
			
 
				 
			
 
				-            # Time decay weights
			
 
				-            max_ts = max(r["ts"] for r in rows)
			
 
				+            # Time decay weights (use provided ref_ts if given; else latest in rows)
			
 
				+            max_ts = int(ref_ts) if ref_ts is not None else max(r["ts"] for r in rows)
			
 
				             lam = math.log(2.0) / max(1.0, halflife_days)
			
 
				             for r in rows:
			
 
				                 age_days = max(0.0, (max_ts - r["ts"]) / 86400.0)
			
@@ -237,75 +271,173 @@ class DixonColesAnalyzer(AnalyticsBase):
 
				                 nu = base_a * att_a[r["away"]] * def_h[r["home"]]
			
 
				                 return (max(1e-9, mu), max(1e-9, nu))
			
 
				 
			
 
				-            # IPF-like alternating updates
			
 
				-            for _ in range(max_iters):
			
 
				-                delta = 0.0
			
 
				-                # Update attack_home
			
 
				-                for t in teams:
			
 
				-                    num = 0.0
			
 
				-                    den = 0.0
			
 
				-                    for r in rows:
			
 
				-                        if r["home"] != t:
			
 
				-                            continue
			
 
				-                        mu, _ = _expected(r)
			
 
				-                        num += r["w"] * r["hs"]
			
 
				-                        den += r["w"] * mu
			
 
				-                    if den > 0:
			
 
				-                        factor = num / den
			
 
				-                        delta = max(delta, abs(1 - factor))
			
 
				-                        att_h[t] *= factor
			
 
				-                _normalize()
			
 
				-
			
 
				-                # Update attack_away
			
 
				-                for t in teams:
			
 
				-                    num = 0.0
			
 
				-                    den = 0.0
			
 
				-                    for r in rows:
			
 
				-                        if r["away"] != t:
			
 
				-                            continue
			
 
				-                        _, nu = _expected(r)
			
 
				-                        num += r["w"] * r["as"]
			
 
				-                        den += r["w"] * nu
			
 
				-                    if den > 0:
			
 
				-                        factor = num / den
			
 
				-                        delta = max(delta, abs(1 - factor))
			
 
				-                        att_a[t] *= factor
			
 
				-                _normalize()
			
 
				-
			
 
				-                # Update defense_away (affects mu)
			
 
				-                for t in teams:
			
 
				-                    num = 0.0
			
 
				-                    den = 0.0
			
 
				-                    for r in rows:
			
 
				-                        if r["away"] != t:
			
 
				-                            continue
			
 
				-                        mu, _ = _expected(r)
			
 
				-                        num += r["w"] * r["hs"]
			
 
				-                        den += r["w"] * mu
			
 
				-                    if den > 0:
			
 
				-                        factor = num / den
			
 
				-                        delta = max(delta, abs(1 - factor))
			
 
				-                        def_a[t] *= factor
			
 
				-                _normalize()
			
 
				-
			
 
				-                # Update defense_home (affects nu)
			
 
				-                for t in teams:
			
 
				-                    num = 0.0
			
 
				-                    den = 0.0
			
 
				+            # Select iteration caps (allow overrides for history runs)
			
 
				+            local_max_iters = (
			
 
				+                int(max_iters_override) if max_iters_override is not None else max_iters
			
 
				+            )
			
 
				+            local_outer_iters = (
			
 
				+                int(outer_iters_override)
			
 
				+                if outer_iters_override is not None
			
 
				+                else outer_iters
			
 
				+            )
			
 
				+
			
 
				+            if optimize == "ipf":
			
 
				+                # IPF-like alternating updates
			
 
				+                for _ in range(local_max_iters):
			
 
				+                    delta = 0.0
			
 
				+                    # Update attack_home
			
 
				+                    for t in teams:
			
 
				+                        num = 0.0
			
 
				+                        den = 0.0
			
 
				+                        for r in rows:
			
 
				+                            if r["home"] != t:
			
 
				+                                continue
			
 
				+                            mu, _ = _expected(r)
			
 
				+                            num += r["w"] * r["hs"]
			
 
				+                            den += r["w"] * mu
			
 
				+                        if den > 0:
			
 
				+                            factor = num / den
			
 
				+                            delta = max(delta, abs(1 - factor))
			
 
				+                            att_h[t] *= factor
			
 
				+                    _normalize()
			
 
				+
			
 
				+                    # Update attack_away
			
 
				+                    for t in teams:
			
 
				+                        num = 0.0
			
 
				+                        den = 0.0
			
 
				+                        for r in rows:
			
 
				+                            if r["away"] != t:
			
 
				+                                continue
			
 
				+                            _, nu = _expected(r)
			
 
				+                            num += r["w"] * r["as"]
			
 
				+                            den += r["w"] * nu
			
 
				+                        if den > 0:
			
 
				+                            factor = num / den
			
 
				+                            delta = max(delta, abs(1 - factor))
			
 
				+                            att_a[t] *= factor
			
 
				+                    _normalize()
			
 
				+
			
 
				+                    # Update defense_away (affects mu)
			
 
				+                    for t in teams:
			
 
				+                        num = 0.0
			
 
				+                        den = 0.0
			
 
				+                        for r in rows:
			
 
				+                            if r["away"] != t:
			
 
				+                                continue
			
 
				+                            mu, _ = _expected(r)
			
 
				+                            num += r["w"] * r["hs"]
			
 
				+                            den += r["w"] * mu
			
 
				+                        if den > 0:
			
 
				+                            factor = num / den
			
 
				+                            delta = max(delta, abs(1 - factor))
			
 
				+                            def_a[t] *= factor
			
 
				+                    _normalize()
			
 
				+
			
 
				+                    # Update defense_home (affects nu)
			
 
				+                    for t in teams:
			
 
				+                        num = 0.0
			
 
				+                        den = 0.0
			
 
				+                        for r in rows:
			
 
				+                            if r["home"] != t:
			
 
				+                                continue
			
 
				+                            _, nu = _expected(r)
			
 
				+                            num += r["w"] * r["as"]
			
 
				+                            den += r["w"] * nu
			
 
				+                        if den > 0:
			
 
				+                            factor = num / den
			
 
				+                            delta = max(delta, abs(1 - factor))
			
 
				+                            def_h[t] *= factor
			
 
				+                    _normalize()
			
 
				+
			
 
				+                    if delta < tol:
			
 
				+                        break
			
 
				+            else:
			
 
				+                # Coordinate updates in log-domain with L2 regularization
			
 
				+                # Initialize logs at 0 (since params at 1.0)
			
 
				+                log_att_h = {t: 0.0 for t in teams}
			
 
				+                log_att_a = {t: 0.0 for t in teams}
			
 
				+                log_def_h = {t: 0.0 for t in teams}
			
 
				+                log_def_a = {t: 0.0 for t in teams}
			
 
				+                log_base_h = math.log(max(1e-9, base_h))
			
 
				+                log_base_a = math.log(max(1e-9, base_a))
			
 
				+
			
 
				+                def _sync_from_logs() -> None:
			
 
				+                    nonlocal base_h, base_a
			
 
				+                    for t in teams:
			
 
				+                        att_h[t] = math.exp(log_att_h[t])
			
 
				+                        att_a[t] = math.exp(log_att_a[t])
			
 
				+                        def_h[t] = math.exp(log_def_h[t])
			
 
				+                        def_a[t] = math.exp(log_def_a[t])
			
 
				+                    base_h = math.exp(log_base_h)
			
 
				+                    base_a = math.exp(log_base_a)
			
 
				+
			
 
				+                def _center_logs() -> None:
			
 
				+                    # Enforce identifiability: mean of logs = 0 per block
			
 
				+                    def _center(d: dict[str, float]) -> None:
			
 
				+                        if len(d) == 0:
			
 
				+                            return
			
 
				+                        mean = sum(d.values()) / len(d)
			
 
				+                        for k in d:
			
 
				+                            d[k] -= mean
			
 
				+
			
 
				+                    _center(log_att_h)
			
 
				+                    _center(log_att_a)
			
 
				+                    _center(log_def_h)
			
 
				+                    _center(log_def_a)
			
 
				+
			
 
				+                for _ in range(max(1, local_outer_iters)):
			
 
				+                    # attack_home gradients
			
 
				+                    for t in teams:
			
 
				+                        grad = -l2_attack * log_att_h[t]
			
 
				+                        for r in rows:
			
 
				+                            if r["home"] != t:
			
 
				+                                continue
			
 
				+                            mu, _ = _expected(r)
			
 
				+                            grad += r["w"] * (r["hs"] - mu)
			
 
				+                        log_att_h[t] += step_size * grad / (sum_w + 1e-9)
			
 
				+
			
 
				+                    # attack_away gradients
			
 
				+                    for t in teams:
			
 
				+                        grad = -l2_attack * log_att_a[t]
			
 
				+                        for r in rows:
			
 
				+                            if r["away"] != t:
			
 
				+                                continue
			
 
				+                            _, nu = _expected(r)
			
 
				+                            grad += r["w"] * (r["as"] - nu)
			
 
				+                        log_att_a[t] += step_size * grad / (sum_w + 1e-9)
			
 
				+
			
 
				+                    # defense_away gradients (mu)
			
 
				+                    for t in teams:
			
 
				+                        grad = -l2_defense * log_def_a[t]
			
 
				+                        for r in rows:
			
 
				+                            if r["away"] != t:
			
 
				+                                continue
			
 
				+                            mu, _ = _expected(r)
			
 
				+                            grad += r["w"] * (r["hs"] - mu)
			
 
				+                        log_def_a[t] += step_size * grad / (sum_w + 1e-9)
			
 
				+
			
 
				+                    # defense_home gradients (nu)
			
 
				+                    for t in teams:
			
 
				+                        grad = -l2_defense * log_def_h[t]
			
 
				+                        for r in rows:
			
 
				+                            if r["home"] != t:
			
 
				+                                continue
			
 
				+                            _, nu = _expected(r)
			
 
				+                            grad += r["w"] * (r["as"] - nu)
			
 
				+                        log_def_h[t] += step_size * grad / (sum_w + 1e-9)
			
 
				+
			
 
				+                    # base means
			
 
				+                    grad_bh = -l2_base * log_base_h
			
 
				+                    grad_ba = -l2_base * log_base_a
			
 
				                     for r in rows:
			
 
				-                        if r["home"] != t:
			
 
				-                            continue
			
 
				-                        _, nu = _expected(r)
			
 
				-                        num += r["w"] * r["as"]
			
 
				-                        den += r["w"] * nu
			
 
				-                    if den > 0:
			
 
				-                        factor = num / den
			
 
				-                        delta = max(delta, abs(1 - factor))
			
 
				-                        def_h[t] *= factor
			
 
				-                _normalize()
			
 
				-
			
 
				-                if delta < tol:
			
 
				-                    break
			
 
				+                        mu, nu = _expected(r)
			
 
				+                        grad_bh += r["w"] * (r["hs"] - mu)
			
 
				+                        grad_ba += r["w"] * (r["as"] - nu)
			
 
				+                    log_base_h += step_size * grad_bh / (sum_w + 1e-9)
			
 
				+                    log_base_a += step_size * grad_ba / (sum_w + 1e-9)
			
 
				+
			
 
				+                    _center_logs()
			
 
				+                    _sync_from_logs()
			
 
				 
			
 
				             # Given parameters, grid-search rho for DC correlation
			
 
				             def _dc_phi(hg: int, ag: int, mu: float, nu: float, rho: float) -> float:
			
@@ -324,7 +456,8 @@ class DixonColesAnalyzer(AnalyticsBase):
 
				                 s = 0.0
			
 
				                 for r in rows:
			
 
				                     mu, nu = _expected(r)
			
 
				-                    # Poisson log pmf (ignoring constant factorial by Stirling or exact; include exact via math.lgamma)
			
 
				+                    # Poisson log pmf (ignore constant factorial)
			
 
				+                    # Use math.lgamma for exact factorial term when needed
			
 
				                     x = r["hs"]
			
 
				                     y = r["as"]
			
 
				                     log_px = x * math.log(mu) - mu - math.lgamma(x + 1)
			
@@ -413,6 +546,161 @@ class DixonColesAnalyzer(AnalyticsBase):
 
				                     )
			
 
				                 )
			
 
				 
			
 
				+            # Optional history: pre-match predictions or parameter snapshots
			
 
				+            if history in ("predictions", "snapshots") and persist and db:
			
 
				+                # Helper for DC-phi adjusted joint probabilities
			
 
				+                def _probs_hda(
			
 
				+                    mu: float, nu: float, rho: float
			
 
				+                ) -> tuple[float, float, float]:
			
 
				+                    def _pois(k: int, lam: float) -> float:
			
 
				+                        return math.exp(k * math.log(lam) - lam - math.lgamma(k + 1))
			
 
				+
			
 
				+                    # Use same phi as in likelihood
			
 
				+                    def _phi(x: int, y: int) -> float:
			
 
				+                        return (
			
 
				+                            1.0
			
 
				+                            if (x > 1 or y > 1)
			
 
				+                            else (
			
 
				+                                max(1e-9, 1.0 - mu * nu * rho)
			
 
				+                                if (x == 0 and y == 0)
			
 
				+                                else (
			
 
				+                                    max(1e-9, 1.0 + mu * rho)
			
 
				+                                    if (x == 0 and y == 1)
			
 
				+                                    else (
			
 
				+                                        max(1e-9, 1.0 + nu * rho)
			
 
				+                                        if (x == 1 and y == 0)
			
 
				+                                        else max(1e-9, 1.0 - rho)
			
 
				+                                    )
			
 
				+                                )
			
 
				+                            )
			
 
				+                        )
			
 
				+
			
 
				+                    total = 0.0
			
 
				+                    ph, pd, pa = 0.0, 0.0, 0.0
			
 
				+                    cap = max(0, int(max_goals))
			
 
				+                    for x in range(cap + 1):
			
 
				+                        px = _pois(x, mu)
			
 
				+                        for y in range(cap + 1):
			
 
				+                            py = _pois(y, nu)
			
 
				+                            pr = px * py * _phi(x, y)
			
 
				+                            total += pr
			
 
				+                            if x > y:
			
 
				+                                ph += pr
			
 
				+                            elif x == y:
			
 
				+                                pd += pr
			
 
				+                            else:
			
 
				+                                pa += pr
			
 
				+                    if total <= 0:
			
 
				+                        return 0.0, 0.0, 0.0
			
 
				+                    inv = 1.0 / total
			
 
				+                    return ph * inv, pd * inv, pa * inv
			
 
				+
			
 
				+                # Iterate matches chronologically; fit using only prior matches
			
 
				+                rows_sorted = sorted(rows, key=lambda r: r["ts"])  # ascending
			
 
				+                hist_iters = (
			
 
				+                    int(max_iters_history)
			
 
				+                    if isinstance(max_iters_history, int)
			
 
				+                    else max(5, max_iters // 2)
			
 
				+                )
			
 
				+                hist_outer = max(1, outer_iters // 2)
			
 
				+
			
 
				+                for i, r in enumerate(rows_sorted):
			
 
				+                    prior = rows_sorted[:i]
			
 
				+                    if not prior:
			
 
				+                        continue
			
 
				+                    p_h, st_h = _fit_group(
			
 
				+                        prior,
			
 
				+                        max_iters_override=hist_iters,
			
 
				+                        outer_iters_override=hist_outer,
			
 
				+                        ref_ts=int(r["ts"]),
			
 
				+                    )
			
 
				+
			
 
				+                    # Build parameter maps
			
 
				+                    base_h = float(st_h.get("base_home", 1.0))
			
 
				+                    base_a = float(st_h.get("base_away", 1.0))
			
 
				+                    rho_h = float(st_h.get("rho", 0.0))
			
 
				+                    att_h_map = {
			
 
				+                        k: float(v.get("attack_home", 1.0)) for k, v in p_h.items()
			
 
				+                    }
			
 
				+                    att_a_map = {
			
 
				+                        k: float(v.get("attack_away", 1.0)) for k, v in p_h.items()
			
 
				+                    }
			
 
				+                    def_h_map = {
			
 
				+                        k: float(v.get("defense_home", 1.0)) for k, v in p_h.items()
			
 
				+                    }
			
 
				+                    def_a_map = {
			
 
				+                        k: float(v.get("defense_away", 1.0)) for k, v in p_h.items()
			
 
				+                    }
			
 
				+
			
 
				+                    mu = (
			
 
				+                        base_h
			
 
				+                        * att_h_map.get(r["home"], 1.0)
			
 
				+                        * def_a_map.get(r["away"], 1.0)
			
 
				+                    )
			
 
				+                    nu = (
			
 
				+                        base_a
			
 
				+                        * att_a_map.get(r["away"], 1.0)
			
 
				+                        * def_h_map.get(r["home"], 1.0)
			
 
				+                    )
			
 
				+
			
 
				+                    if history == "predictions":
			
 
				+                        ph, pd, pa = _probs_hda(mu, nu, rho_h)
			
 
				+                        docs.append(
			
 
				+                            Document(
			
 
				+                                id=f"{gk}:{r['ts']}:{r['home']}:{r['away']}",
			
 
				+                                kind="dc_predictions",
			
 
				+                                data={
			
 
				+                                    "group": gk,
			
 
				+                                    "home": r["home"],
			
 
				+                                    "away": r["away"],
			
 
				+                                    "cutoff_ts": int(r["ts"]),
			
 
				+                                    "train_matches": int(len(prior)),
			
 
				+                                    "mu": float(mu),
			
 
				+                                    "nu": float(nu),
			
 
				+                                    "rho": float(rho_h),
			
 
				+                                    "p_home": float(ph),
			
 
				+                                    "p_draw": float(pd),
			
 
				+                                    "p_away": float(pa),
			
 
				+                                    "observed_h": int(r["hs"]),
			
 
				+                                    "observed_a": int(r["as"]),
			
 
				+                                    "model": "dixon_coles",
			
 
				+                                    "halflife_days": float(halflife_days),
			
 
				+                                },
			
 
				+                            )
			
 
				+                        )
			
 
				+
			
 
				+                    if history == "snapshots" and (i % max(1, snapshot_every) == 0):
			
 
				+                        cut_ts = int(r["ts"])
			
 
				+                        for team_id, vals in p_h.items():
			
 
				+                            docs.append(
			
 
				+                                Document(
			
 
				+                                    id=f"{gk}:{cut_ts}:{team_id}",
			
 
				+                                    kind="dc_params_history",
			
 
				+                                    data={
			
 
				+                                        "group": gk,
			
 
				+                                        "team_id": team_id,
			
 
				+                                        "cutoff_ts": cut_ts,
			
 
				+                                        "train_matches": int(len(prior)),
			
 
				+                                        "attack_home": float(
			
 
				+                                            vals.get("attack_home", 1.0)
			
 
				+                                        ),
			
 
				+                                        "attack_away": float(
			
 
				+                                            vals.get("attack_away", 1.0)
			
 
				+                                        ),
			
 
				+                                        "defense_home": float(
			
 
				+                                            vals.get("defense_home", 1.0)
			
 
				+                                        ),
			
 
				+                                        "defense_away": float(
			
 
				+                                            vals.get("defense_away", 1.0)
			
 
				+                                        ),
			
 
				+                                        "league_home_avg": float(base_h),
			
 
				+                                        "league_away_avg": float(base_a),
			
 
				+                                        "rho": float(rho_h),
			
 
				+                                        "halflife_days": float(halflife_days),
			
 
				+                                    },
			
 
				+                                )
			
 
				+                            )
			
 
				+
			
 
				         if persist and db and docs:
			
 
				             db.insert_many(docs)