utils.py 2.8 KB

12345678910111213141516171819202122232425262728293031323334353637383940414243444546474849505152535455565758596061626364656667686970717273
  1. # -*- coding: utf-8 -*-
  2. """文档对话检索共享辅助函数。"""
  3. from __future__ import annotations
  4. from typing import Any, Dict, List
  5. def to_int(value: Any, default: int) -> int:
  6. """安全整数转换。"""
  7. try:
  8. return int(value)
  9. except (TypeError, ValueError):
  10. return default
  11. def to_float(value: Any, default: float = 0.0) -> float:
  12. """安全浮点数转换。"""
  13. try:
  14. return float(value)
  15. except (TypeError, ValueError):
  16. return default
  17. def escape_milvus_string(value: str) -> str:
  18. """转义 Milvus 字符串中的特殊字符(反斜杠、单引号、双引号)。"""
  19. return str(value).replace("\\", "\\\\").replace("'", "\\'").replace('"', '\\"')
  20. def combine_expr(*exprs: str) -> str:
  21. """用 AND 连接多个过滤表达式,每个子表达式加括号。"""
  22. parts = [f"({expr})" for expr in exprs if str(expr or "").strip()]
  23. return " and ".join(parts)
  24. def pack_log_items(items: List[Dict[str, Any]], limit: int = 20, text_limit: int = 1500) -> List[Dict[str, Any]]:
  25. """打包候选条目为日志格式,限制条数和文本长度。"""
  26. packed = []
  27. for item in (items or [])[:limit]:
  28. if not isinstance(item, dict):
  29. continue
  30. metadata = item.get("metadata") if isinstance(item.get("metadata"), dict) else {}
  31. text = str(item.get("text") or item.get("text_content") or item.get("content") or "").strip()
  32. packed.append(
  33. {
  34. "candidate_key": item.get("candidate_key"),
  35. "source": item.get("source") or metadata.get("file_name") or "",
  36. "text": text[:text_limit],
  37. "vector_similarity": to_float(item.get("vector_similarity", item.get("similarity")), 0.0),
  38. "fusion_score": to_float(item.get("fusion_score"), 0.0),
  39. "rerank_score": to_float(item.get("rerank_score"), 0.0) if "rerank_score" in item else None,
  40. "source_hits": item.get("source_hits") if isinstance(item.get("source_hits"), dict) else {},
  41. "metadata": {
  42. key: metadata.get(key)
  43. for key in (
  44. "document_id",
  45. "parent_id",
  46. "file_name",
  47. "chapter_title",
  48. "chapter_level_1",
  49. "chapter_level_2",
  50. "chapter_level_3",
  51. "parent_count",
  52. "child_hit_count",
  53. "matched_child_texts",
  54. "tag_match_terms",
  55. "source_scope_valid",
  56. )
  57. if metadata.get(key) not in (None, "")
  58. },
  59. }
  60. )
  61. return packed