# -*- coding: utf-8 -*- """相似片段检索接口。""" from typing import List, Optional from pydantic import BaseModel, Field from fastapi import APIRouter, HTTPException from starlette.concurrency import run_in_threadpool from foundation.observability.logger.loggering import write_logger as logger from foundation.infrastructure.tracing import TraceContext, auto_trace from core.construction_write.component.similar_fragment_service import ( CHILD_COLLECTION, PARENT_COLLECTION, search_similar_fragments, ) # ==================== 路由 ==================== similar_fragment_router = APIRouter(prefix="/sgbx", tags=["施工方案编写"]) # ==================== 数据模型 ==================== class SimilarFragmentSearchRequest(BaseModel): title_level_1: Optional[str] = Field(None, description="一级标题展示文本", example="施工工艺技术") title_level_2: Optional[str] = Field(None, description="二级标题展示文本", example="主要施工方法概述") chapter_level_1: str = Field(..., description="一级章节类型,需与向量库字段匹配", example="technology") chapter_level_2: str = Field(..., description="二级章节类型,需与向量库字段匹配", example="MethodsOverview") chapter_id: str = Field(..., description="当前章节ID(原样回传)") project_id: str = Field(..., description="方案ID(原样回传)") sgbx_code: Optional[str] = Field(None, description="施工编写章节编码") search_text: str = Field(..., description="用户输入的检索信息") class Config: extra = "ignore" class SimilarFragmentItem(BaseModel): chapter_level_1: str = Field(..., description="一级标题") chapter_level_2: str = Field(..., description="二级标题") chapter_id: str = Field(..., description="请求传入的章节ID,原样回传") project_id: str = Field(..., description="请求传入的方案ID,原样回传") text: str = Field(..., description="相似片段内容") file_name: str = Field(..., description="文件名称") similarity_percent: float = Field(..., description="相似度百分比", ge=0.0, le=100.0) class SimilarFragmentSearchResponse(BaseModel): code: int message: str data: List[SimilarFragmentItem] = Field(default_factory=list) # ==================== API 路由 ==================== @similar_fragment_router.post("/similar_fragment_search", response_model=SimilarFragmentSearchResponse) @auto_trace(generate_if_missing=True) async def similar_fragment_search(request: SimilarFragmentSearchRequest): """ 相似片段检索接口 根据用户输入的一级/二级标题和检索信息,从知识库向量表中检索出最相关的相似片段。 Args: request: 检索请求,包含一级标题、二级标题、章节ID、方案ID、检索文本 Returns: 相似片段列表,包含内容、文件名、相似度百分比 """ trace_id = TraceContext.get_trace_id() logger.info( f"[{trace_id}] 相似片段检索: title_level_1={request.title_level_1}, " f"title_level_2={request.title_level_2}, chapter_level_1={request.chapter_level_1}, " f"chapter_level_2={request.chapter_level_2}, 检索文本={request.search_text[:50]}..." ) # 参数校验 if not request.chapter_level_1.strip() or not request.chapter_level_2.strip(): return SimilarFragmentSearchResponse( code=400, message="章节类型 chapter_level_1 和 chapter_level_2 不能为空", data=[] ) if not request.search_text.strip(): return SimilarFragmentSearchResponse( code=400, message="检索信息不能为空", data=[] ) if len(request.search_text.strip()) < 3: return SimilarFragmentSearchResponse( code=400, message="检索信息过短,请输入至少3个字符", data=[] ) try: raw_results = await run_in_threadpool( search_similar_fragments, level1=request.chapter_level_1, level2=request.chapter_level_2, search_text=request.search_text, top_k=5, ) # 组装返回 items = [] for r in raw_results: similarity_percent = round(r["similarity"] * 100, 2) items.append(SimilarFragmentItem( chapter_level_1=r["chapter_level_1"], chapter_level_2=r["chapter_level_2"], chapter_id=request.chapter_id, project_id=request.project_id, text=r["text"], file_name=r["file_name"], similarity_percent=similarity_percent, )) return SimilarFragmentSearchResponse( code=200, message="success", data=items ) except Exception as e: logger.error(f"[{trace_id}] 相似片段检索异常: {e}", exc_info=True) raise HTTPException( status_code=500, detail=f"相似片段检索失败: {str(e)}" ) @similar_fragment_router.get("/similar_fragment_search_health") async def health_check(): """相似片段检索健康检查""" return { "status": "healthy", "vector_db": "Milvus (lq_db)", "child_collection": CHILD_COLLECTION, "parent_collection": PARENT_COLLECTION, }