# -*- coding: utf-8 -*- """文档对话检索共享辅助函数。""" from __future__ import annotations from typing import Any, Dict, List def to_int(value: Any, default: int) -> int: """安全整数转换。""" try: return int(value) except (TypeError, ValueError): return default def to_float(value: Any, default: float = 0.0) -> float: """安全浮点数转换。""" try: return float(value) except (TypeError, ValueError): return default def escape_milvus_string(value: str) -> str: """转义 Milvus 字符串中的特殊字符(反斜杠、单引号、双引号)。""" return str(value).replace("\\", "\\\\").replace("'", "\\'").replace('"', '\\"') def combine_expr(*exprs: str) -> str: """用 AND 连接多个过滤表达式,每个子表达式加括号。""" parts = [f"({expr})" for expr in exprs if str(expr or "").strip()] return " and ".join(parts) def pack_log_items(items: List[Dict[str, Any]], limit: int = 20, text_limit: int = 1500) -> List[Dict[str, Any]]: """打包候选条目为日志格式,限制条数和文本长度。""" packed = [] for item in (items or [])[:limit]: if not isinstance(item, dict): continue metadata = item.get("metadata") if isinstance(item.get("metadata"), dict) else {} text = str(item.get("text") or item.get("text_content") or item.get("content") or "").strip() packed.append( { "candidate_key": item.get("candidate_key"), "source": item.get("source") or metadata.get("file_name") or "", "text": text[:text_limit], "vector_similarity": to_float(item.get("vector_similarity", item.get("similarity")), 0.0), "fusion_score": to_float(item.get("fusion_score"), 0.0), "rerank_score": to_float(item.get("rerank_score"), 0.0) if "rerank_score" in item else None, "source_hits": item.get("source_hits") if isinstance(item.get("source_hits"), dict) else {}, "metadata": { key: metadata.get(key) for key in ( "document_id", "parent_id", "file_name", "chapter_title", "chapter_level_1", "chapter_level_2", "chapter_level_3", "parent_count", "child_hit_count", "matched_child_texts", "tag_match_terms", "source_scope_valid", ) if metadata.get(key) not in (None, "") }, } ) return packed