"""HTTPSpider integration test with mocked network calls.""" from __future__ import annotations from typing import Any, Dict import types from claudia.spiders import HTTPSpider, HTTPConfig from claudia.db import MemoryDB from claudia.scheduler import Runner from claudia.reporter import MemoryReporter def test_http_spider_parse_and_runner_integration(): """Mock HTTPSpider.fetch to avoid real network and validate pipeline flow.""" spider = HTTPSpider(HTTPConfig(method="GET")) def fake_fetch(_url: str, _payload: Dict[str, Any]) -> str: return "hello" # Monkeypatch the instance method spider.fetch = types.MethodType(lambda self, url, payload: fake_fetch(url, payload), spider) db = MemoryDB() reporter = MemoryReporter() runner = Runner(db=db, reporters=[reporter]) urls = ["https://example.com/a", "https://example.com/b"] summary = runner.run({spider: urls}) assert summary.total_docs == 2 assert summary.per_spider.get(spider.name) == 2 assert not summary.errors # Reporter events were recorded assert any(e.startswith("start:") for e in reporter.events) assert any(e.startswith("success:") for e in reporter.events) assert any(e.startswith("summary:") for e in reporter.events)