test_pipeline.py 1.0 KB

1234567891011121314151617181920212223242526272829303132
  1. """End-to-end pipeline test for Claudia runner and components."""
  2. from __future__ import annotations
  3. from claudia.db import MemoryDB
  4. from claudia.reporter import MemoryReporter
  5. from claudia.scheduler import Runner
  6. from claudia.spiders import ExampleSpider
  7. def test_pipeline_end_to_end():
  8. """Run an example spider through the runner and assert outcomes."""
  9. spider = ExampleSpider()
  10. db = MemoryDB()
  11. reporter = MemoryReporter()
  12. runner = Runner(db=db, reporters=[reporter])
  13. urls = [
  14. "https://example.com/a",
  15. "https://example.com/b",
  16. ]
  17. summary = runner.run({spider: urls})
  18. # 插入应等于解析产出(每个 URL 一个文档)
  19. assert summary.total_docs == 2
  20. assert summary.per_spider.get(spider.name) == 2
  21. assert not summary.errors
  22. # reporter 至少包含 start/success/summary 事件
  23. assert any(e.startswith("start:") for e in reporter.events)
  24. assert any(e.startswith("success:") for e in reporter.events)
  25. assert any(e.startswith("summary:") for e in reporter.events)