test_http_spider.py 1.3 KB

123456789101112131415161718192021222324252627282930313233343536373839
  1. """HTTPSpider integration test with mocked network calls."""
  2. from __future__ import annotations
  3. from typing import Any, Dict
  4. import types
  5. from claudia.spiders import HTTPSpider, HTTPConfig
  6. from claudia.db import MemoryDB
  7. from claudia.scheduler import Runner
  8. from claudia.reporter import MemoryReporter
  9. def test_http_spider_parse_and_runner_integration():
  10. """Mock HTTPSpider.fetch to avoid real network and validate pipeline flow."""
  11. spider = HTTPSpider(HTTPConfig(method="GET"))
  12. def fake_fetch(_url: str, _payload: Dict[str, Any]) -> str:
  13. return "<html><body>hello</body></html>"
  14. # Monkeypatch the instance method
  15. spider.fetch = types.MethodType(lambda self, url, payload: fake_fetch(url, payload), spider)
  16. db = MemoryDB()
  17. reporter = MemoryReporter()
  18. runner = Runner(db=db, reporters=[reporter])
  19. urls = ["https://example.com/a", "https://example.com/b"]
  20. summary = runner.run({spider: urls})
  21. assert summary.total_docs == 2
  22. assert summary.per_spider.get(spider.name) == 2
  23. assert not summary.errors
  24. # Reporter events were recorded
  25. assert any(e.startswith("start:") for e in reporter.events)
  26. assert any(e.startswith("success:") for e in reporter.events)
  27. assert any(e.startswith("summary:") for e in reporter.events)