gogs
/
claudia


			
							123456789101112131415161718192021222324252627282930313233343536373839
							"""HTTPSpider integration test with mocked network calls."""

from __future__ import annotations

from typing import Any, Dict

import types

from claudia.spiders import HTTPSpider, HTTPConfig
from claudia.db import MemoryDB
from claudia.scheduler import Runner
from claudia.reporter import MemoryReporter


def test_http_spider_parse_and_runner_integration():
    """Mock HTTPSpider.fetch to avoid real network and validate pipeline flow."""
    spider = HTTPSpider(HTTPConfig(method="GET"))

    def fake_fetch(_url: str, _payload: Dict[str, Any]) -> str:
        return "<html><body>hello</body></html>"

    # Monkeypatch the instance method
    spider.fetch = types.MethodType(lambda self, url, payload: fake_fetch(url, payload), spider)

    db = MemoryDB()
    reporter = MemoryReporter()
    runner = Runner(db=db, reporters=[reporter])

    urls = ["https://example.com/a", "https://example.com/b"]
    summary = runner.run({spider: urls})

    assert summary.total_docs == 2
    assert summary.per_spider.get(spider.name) == 2
    assert not summary.errors

    # Reporter events were recorded
    assert any(e.startswith("start:") for e in reporter.events)
    assert any(e.startswith("success:") for e in reporter.events)
    assert any(e.startswith("summary:") for e in reporter.events)