export_info_to_excel.py 2.3 KB

1234567891011121314151617181920212223242526272829303132333435363738394041424344454647484950515253545556575859606162636465666768697071727374757677787980818283
  1. """Export MongoDB info_items collection to an Excel file."""
  2. from __future__ import annotations
  3. import os
  4. import sys
  5. from pathlib import Path
  6. from typing import Any, Dict, Iterable, List, Optional
  7. import pandas as pd
  8. from pymongo import MongoClient
  9. from pymongo.errors import PyMongoError
  10. MONGO_URI = os.getenv("MONGO_URI", "mongodb://localhost:27017")
  11. MONGO_DB = os.getenv("MONGO_DB", "haier")
  12. MONGO_COLLECTION = os.getenv("MONGO_COLLECTION", "info_items")
  13. DEFAULT_OUTPUT = Path(__file__).resolve().parent.parent / "exports" / "info_items.xlsx"
  14. FETCH_BATCH_SIZE = 2000
  15. def _ensure_output_dir(path: Path) -> None:
  16. path.parent.mkdir(parents=True, exist_ok=True)
  17. def _iter_docs(
  18. collection, query: Optional[Dict[str, Any]] = None
  19. ) -> Iterable[Dict[str, Any]]:
  20. cursor = collection.find(query or {}, batch_size=FETCH_BATCH_SIZE)
  21. for doc in cursor:
  22. # Convert ObjectId to string for Excel friendliness.
  23. if "_id" in doc:
  24. doc["_id"] = str(doc["_id"])
  25. yield doc
  26. def export_to_excel(output_path: Path, query: Optional[Dict[str, Any]] = None) -> Path:
  27. """Fetch documents and write them to an Excel file.
  28. Parameters
  29. ----------
  30. output_path : Path
  31. Destination Excel path. Directories are created if missing.
  32. query : Optional[Dict[str, Any]]
  33. Optional MongoDB query filter.
  34. """
  35. client = MongoClient(MONGO_URI)
  36. collection = client[MONGO_DB][MONGO_COLLECTION]
  37. _ensure_output_dir(output_path)
  38. docs: List[Dict[str, Any]] = list(_iter_docs(collection, query))
  39. if not docs:
  40. # Create an empty file with a header to indicate no data.
  41. pd.DataFrame().to_excel(output_path, index=False)
  42. return output_path
  43. df = pd.DataFrame(docs)
  44. df.to_excel(output_path, index=False)
  45. return output_path
  46. def main(argv: List[str]) -> int:
  47. """CLI entry point. Usage: python export_info_to_excel.py [output_path]"""
  48. output_path = Path(argv[1]) if len(argv) > 1 else DEFAULT_OUTPUT
  49. try:
  50. result_path = export_to_excel(output_path)
  51. except (
  52. PyMongoError,
  53. OSError,
  54. ValueError,
  55. ) as exc: # pragma: no cover - defensive runtime path
  56. print(f"Export failed: {exc}", file=sys.stderr)
  57. return 1
  58. print(f"Export completed: {result_path}")
  59. return 0
  60. if __name__ == "__main__":
  61. raise SystemExit(main(sys.argv))