|
@@ -30,6 +30,7 @@ import json
|
|
|
import random
|
|
import random
|
|
|
import time
|
|
import time
|
|
|
from typing import Any, Mapping
|
|
from typing import Any, Mapping
|
|
|
|
|
+import os
|
|
|
from datetime import datetime, timedelta, timezone
|
|
from datetime import datetime, timedelta, timezone
|
|
|
|
|
|
|
|
try: # Optional dependency; guide user to install if missing
|
|
try: # Optional dependency; guide user to install if missing
|
|
@@ -58,7 +59,7 @@ class GetLeagueMatchListSpider(BaseSpider):
|
|
|
# UTC+8 timezone for date comparisons
|
|
# UTC+8 timezone for date comparisons
|
|
|
tz = timezone(timedelta(hours=8))
|
|
tz = timezone(timedelta(hours=8))
|
|
|
# When future-dated matches exceed this number in a round, suggest stopping the season
|
|
# When future-dated matches exceed this number in a round, suggest stopping the season
|
|
|
- future_exceed_limit: int = 5
|
|
|
|
|
|
|
+ future_exceed_limit: int = int(os.getenv("DATABANK_FUTURE_EXCEED_LIMIT", "5"))
|
|
|
|
|
|
|
|
def build_payload(self, task: Task) -> Payload:
|
|
def build_payload(self, task: Task) -> Payload:
|
|
|
"""Build JSON payload from a MatchListTask (structured input)."""
|
|
"""Build JSON payload from a MatchListTask (structured input)."""
|
|
@@ -198,16 +199,14 @@ class GetLeagueMatchListSpider(BaseSpider):
|
|
|
)
|
|
)
|
|
|
]
|
|
]
|
|
|
|
|
|
|
|
- # First stage: league-only and finished-only (elapsedTime == '已完场')
|
|
|
|
|
- stage1 = [
|
|
|
|
|
|
|
+ # Stage 1: league-only (do NOT filter by elapsedTime yet, to allow future-count)
|
|
|
|
|
+ league_items = [
|
|
|
item
|
|
item
|
|
|
for item in match_list
|
|
for item in match_list
|
|
|
- if isinstance(item, Mapping)
|
|
|
|
|
- and item.get("groupName") == "联赛"
|
|
|
|
|
- and item.get("elapsedTime") == "已完场"
|
|
|
|
|
|
|
+ if isinstance(item, Mapping) and item.get("groupName") == "联赛"
|
|
|
]
|
|
]
|
|
|
|
|
|
|
|
- # Second stage: drop future-dated matches and count them
|
|
|
|
|
|
|
+ # Helper for date extraction
|
|
|
today = datetime.now(self.tz).date()
|
|
today = datetime.now(self.tz).date()
|
|
|
|
|
|
|
|
def _extract_dt(it: Mapping[str, Any]) -> datetime | None:
|
|
def _extract_dt(it: Mapping[str, Any]) -> datetime | None:
|
|
@@ -219,6 +218,10 @@ class GetLeagueMatchListSpider(BaseSpider):
|
|
|
"beginTime",
|
|
"beginTime",
|
|
|
"match_time",
|
|
"match_time",
|
|
|
"start_time",
|
|
"start_time",
|
|
|
|
|
+ "kickOffTime",
|
|
|
|
|
+ "kickoffTime",
|
|
|
|
|
+ "matchStartTime",
|
|
|
|
|
+ "matchBeginTime",
|
|
|
):
|
|
):
|
|
|
if key not in it:
|
|
if key not in it:
|
|
|
continue
|
|
continue
|
|
@@ -235,6 +238,15 @@ class GetLeagueMatchListSpider(BaseSpider):
|
|
|
# String formats
|
|
# String formats
|
|
|
if isinstance(val, str):
|
|
if isinstance(val, str):
|
|
|
s = val.strip()
|
|
s = val.strip()
|
|
|
|
|
+ # Numeric string timestamps (sec/ms)
|
|
|
|
|
+ if s.isdigit():
|
|
|
|
|
+ try:
|
|
|
|
|
+ ts = float(s)
|
|
|
|
|
+ if ts > 1e12:
|
|
|
|
|
+ ts /= 1000.0
|
|
|
|
|
+ return datetime.fromtimestamp(ts, tz=self.tz)
|
|
|
|
|
+ except (ValueError, OSError):
|
|
|
|
|
+ pass
|
|
|
for fmt in (
|
|
for fmt in (
|
|
|
"%Y-%m-%d %H:%M:%S",
|
|
"%Y-%m-%d %H:%M:%S",
|
|
|
"%Y-%m-%d %H:%M",
|
|
"%Y-%m-%d %H:%M",
|
|
@@ -258,12 +270,21 @@ class GetLeagueMatchListSpider(BaseSpider):
|
|
|
continue
|
|
continue
|
|
|
return None
|
|
return None
|
|
|
|
|
|
|
|
|
|
+ # Count future-dated matches across ALL league items (regardless of finished)
|
|
|
future_count = 0
|
|
future_count = 0
|
|
|
- filtered: list[Mapping[str, Any]] = []
|
|
|
|
|
- for it in stage1:
|
|
|
|
|
|
|
+ for it in league_items:
|
|
|
dt = _extract_dt(it)
|
|
dt = _extract_dt(it)
|
|
|
if dt is not None and dt.date() > today:
|
|
if dt is not None and dt.date() > today:
|
|
|
future_count += 1
|
|
future_count += 1
|
|
|
|
|
+
|
|
|
|
|
+ # Stage 2: keep only finished items for persistence, and still guard against future-dated
|
|
|
|
|
+ filtered: list[Mapping[str, Any]] = []
|
|
|
|
|
+ for it in league_items:
|
|
|
|
|
+ if it.get("elapsedTime") != "已完场":
|
|
|
|
|
+ continue
|
|
|
|
|
+ dt = _extract_dt(it)
|
|
|
|
|
+ if dt is not None and dt.date() > today:
|
|
|
|
|
+ # Defensive: finished items shouldn't be future, skip if happens
|
|
|
continue
|
|
continue
|
|
|
filtered.append(it)
|
|
filtered.append(it)
|
|
|
|
|
|