"""Export MongoDB info_items collection to an Excel file.""" from __future__ import annotations import os import sys from pathlib import Path from typing import Any, Dict, Iterable, List, Optional import pandas as pd from pymongo import MongoClient from pymongo.errors import PyMongoError MONGO_URI = os.getenv("MONGO_URI", "mongodb://localhost:27017") MONGO_DB = os.getenv("MONGO_DB", "haier") MONGO_COLLECTION = os.getenv("MONGO_COLLECTION", "info_items") DEFAULT_OUTPUT = Path(__file__).resolve().parent.parent / "exports" / "info_items.xlsx" FETCH_BATCH_SIZE = 2000 def _ensure_output_dir(path: Path) -> None: path.parent.mkdir(parents=True, exist_ok=True) def _iter_docs( collection, query: Optional[Dict[str, Any]] = None ) -> Iterable[Dict[str, Any]]: cursor = collection.find(query or {}, batch_size=FETCH_BATCH_SIZE) for doc in cursor: # Convert ObjectId to string for Excel friendliness. if "_id" in doc: doc["_id"] = str(doc["_id"]) yield doc def export_to_excel(output_path: Path, query: Optional[Dict[str, Any]] = None) -> Path: """Fetch documents and write them to an Excel file. Parameters ---------- output_path : Path Destination Excel path. Directories are created if missing. query : Optional[Dict[str, Any]] Optional MongoDB query filter. """ client = MongoClient(MONGO_URI) collection = client[MONGO_DB][MONGO_COLLECTION] _ensure_output_dir(output_path) docs: List[Dict[str, Any]] = list(_iter_docs(collection, query)) if not docs: # Create an empty file with a header to indicate no data. pd.DataFrame().to_excel(output_path, index=False) return output_path df = pd.DataFrame(docs) df.to_excel(output_path, index=False) return output_path def main(argv: List[str]) -> int: """CLI entry point. Usage: python export_info_to_excel.py [output_path]""" output_path = Path(argv[1]) if len(argv) > 1 else DEFAULT_OUTPUT try: result_path = export_to_excel(output_path) except ( PyMongoError, OSError, ValueError, ) as exc: # pragma: no cover - defensive runtime path print(f"Export failed: {exc}", file=sys.stderr) return 1 print(f"Export completed: {result_path}") return 0 if __name__ == "__main__": raise SystemExit(main(sys.argv))