| 1234567891011121314151617181920212223242526272829303132333435363738394041424344454647484950515253545556575859606162636465666768697071727374757677787980818283 |
- """Export MongoDB info_items collection to an Excel file."""
- from __future__ import annotations
- import os
- import sys
- from pathlib import Path
- from typing import Any, Dict, Iterable, List, Optional
- import pandas as pd
- from pymongo import MongoClient
- from pymongo.errors import PyMongoError
- MONGO_URI = os.getenv("MONGO_URI", "mongodb://localhost:27017")
- MONGO_DB = os.getenv("MONGO_DB", "haier")
- MONGO_COLLECTION = os.getenv("MONGO_COLLECTION", "info_items")
- DEFAULT_OUTPUT = Path(__file__).resolve().parent.parent / "exports" / "info_items.xlsx"
- FETCH_BATCH_SIZE = 2000
- def _ensure_output_dir(path: Path) -> None:
- path.parent.mkdir(parents=True, exist_ok=True)
- def _iter_docs(
- collection, query: Optional[Dict[str, Any]] = None
- ) -> Iterable[Dict[str, Any]]:
- cursor = collection.find(query or {}, batch_size=FETCH_BATCH_SIZE)
- for doc in cursor:
- # Convert ObjectId to string for Excel friendliness.
- if "_id" in doc:
- doc["_id"] = str(doc["_id"])
- yield doc
- def export_to_excel(output_path: Path, query: Optional[Dict[str, Any]] = None) -> Path:
- """Fetch documents and write them to an Excel file.
- Parameters
- ----------
- output_path : Path
- Destination Excel path. Directories are created if missing.
- query : Optional[Dict[str, Any]]
- Optional MongoDB query filter.
- """
- client = MongoClient(MONGO_URI)
- collection = client[MONGO_DB][MONGO_COLLECTION]
- _ensure_output_dir(output_path)
- docs: List[Dict[str, Any]] = list(_iter_docs(collection, query))
- if not docs:
- # Create an empty file with a header to indicate no data.
- pd.DataFrame().to_excel(output_path, index=False)
- return output_path
- df = pd.DataFrame(docs)
- df.to_excel(output_path, index=False)
- return output_path
- def main(argv: List[str]) -> int:
- """CLI entry point. Usage: python export_info_to_excel.py [output_path]"""
- output_path = Path(argv[1]) if len(argv) > 1 else DEFAULT_OUTPUT
- try:
- result_path = export_to_excel(output_path)
- except (
- PyMongoError,
- OSError,
- ValueError,
- ) as exc: # pragma: no cover - defensive runtime path
- print(f"Export failed: {exc}", file=sys.stderr)
- return 1
- print(f"Export completed: {result_path}")
- return 0
- if __name__ == "__main__":
- raise SystemExit(main(sys.argv))
|