| 12345678910111213141516171819202122232425262728293031323334353637383940414243444546474849505152535455565758596061626364656667686970717273 |
- # -*- coding: utf-8 -*-
- """文档对话检索共享辅助函数。"""
- from __future__ import annotations
- from typing import Any, Dict, List
- def to_int(value: Any, default: int) -> int:
- """安全整数转换。"""
- try:
- return int(value)
- except (TypeError, ValueError):
- return default
- def to_float(value: Any, default: float = 0.0) -> float:
- """安全浮点数转换。"""
- try:
- return float(value)
- except (TypeError, ValueError):
- return default
- def escape_milvus_string(value: str) -> str:
- """转义 Milvus 字符串中的特殊字符(反斜杠、单引号、双引号)。"""
- return str(value).replace("\\", "\\\\").replace("'", "\\'").replace('"', '\\"')
- def combine_expr(*exprs: str) -> str:
- """用 AND 连接多个过滤表达式,每个子表达式加括号。"""
- parts = [f"({expr})" for expr in exprs if str(expr or "").strip()]
- return " and ".join(parts)
- def pack_log_items(items: List[Dict[str, Any]], limit: int = 20, text_limit: int = 1500) -> List[Dict[str, Any]]:
- """打包候选条目为日志格式,限制条数和文本长度。"""
- packed = []
- for item in (items or [])[:limit]:
- if not isinstance(item, dict):
- continue
- metadata = item.get("metadata") if isinstance(item.get("metadata"), dict) else {}
- text = str(item.get("text") or item.get("text_content") or item.get("content") or "").strip()
- packed.append(
- {
- "candidate_key": item.get("candidate_key"),
- "source": item.get("source") or metadata.get("file_name") or "",
- "text": text[:text_limit],
- "vector_similarity": to_float(item.get("vector_similarity", item.get("similarity")), 0.0),
- "fusion_score": to_float(item.get("fusion_score"), 0.0),
- "rerank_score": to_float(item.get("rerank_score"), 0.0) if "rerank_score" in item else None,
- "source_hits": item.get("source_hits") if isinstance(item.get("source_hits"), dict) else {},
- "metadata": {
- key: metadata.get(key)
- for key in (
- "document_id",
- "parent_id",
- "file_name",
- "chapter_title",
- "chapter_level_1",
- "chapter_level_2",
- "chapter_level_3",
- "parent_count",
- "child_hit_count",
- "matched_child_texts",
- "tag_match_terms",
- "source_scope_valid",
- )
- if metadata.get(key) not in (None, "")
- },
- }
- )
- return packed
|