| 123456789101112131415161718192021222324252627282930313233343536373839 |
- """HTTPSpider integration test with mocked network calls."""
- from __future__ import annotations
- from typing import Any, Dict
- import types
- from claudia.spiders import HTTPSpider, HTTPConfig
- from claudia.db import MemoryDB
- from claudia.scheduler import Runner
- from claudia.reporter import MemoryReporter
- def test_http_spider_parse_and_runner_integration():
- """Mock HTTPSpider.fetch to avoid real network and validate pipeline flow."""
- spider = HTTPSpider(HTTPConfig(method="GET"))
- def fake_fetch(_url: str, _payload: Dict[str, Any]) -> str:
- return "<html><body>hello</body></html>"
- # Monkeypatch the instance method
- spider.fetch = types.MethodType(lambda self, url, payload: fake_fetch(url, payload), spider)
- db = MemoryDB()
- reporter = MemoryReporter()
- runner = Runner(db=db, reporters=[reporter])
- urls = ["https://example.com/a", "https://example.com/b"]
- summary = runner.run({spider: urls})
- assert summary.total_docs == 2
- assert summary.per_spider.get(spider.name) == 2
- assert not summary.errors
- # Reporter events were recorded
- assert any(e.startswith("start:") for e in reporter.events)
- assert any(e.startswith("success:") for e in reporter.events)
- assert any(e.startswith("summary:") for e in reporter.events)
|