| 123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145 |
- # -*- coding: utf-8 -*-
- """相似片段检索接口。"""
- from typing import List, Optional
- from pydantic import BaseModel, Field
- from fastapi import APIRouter, HTTPException
- from starlette.concurrency import run_in_threadpool
- from foundation.observability.logger.loggering import write_logger as logger
- from foundation.infrastructure.tracing import TraceContext, auto_trace
- from core.construction_write.component.similar_fragment_service import (
- CHILD_COLLECTION,
- PARENT_COLLECTION,
- search_similar_fragments,
- )
- # ==================== 路由 ====================
- similar_fragment_router = APIRouter(prefix="/sgbx", tags=["施工方案编写"])
- # ==================== 数据模型 ====================
- class SimilarFragmentSearchRequest(BaseModel):
- title_level_1: Optional[str] = Field(None, description="一级标题展示文本", example="施工工艺技术")
- title_level_2: Optional[str] = Field(None, description="二级标题展示文本", example="主要施工方法概述")
- chapter_level_1: str = Field(..., description="一级章节类型,需与向量库字段匹配", example="technology")
- chapter_level_2: str = Field(..., description="二级章节类型,需与向量库字段匹配", example="MethodsOverview")
- chapter_id: str = Field(..., description="当前章节ID(原样回传)")
- project_id: str = Field(..., description="方案ID(原样回传)")
- sgbx_code: Optional[str] = Field(None, description="施工编写章节编码")
- search_text: str = Field(..., description="用户输入的检索信息")
- class Config:
- extra = "ignore"
- class SimilarFragmentItem(BaseModel):
- chapter_level_1: str = Field(..., description="一级标题")
- chapter_level_2: str = Field(..., description="二级标题")
- chapter_id: str = Field(..., description="请求传入的章节ID,原样回传")
- project_id: str = Field(..., description="请求传入的方案ID,原样回传")
- text: str = Field(..., description="相似片段内容")
- file_name: str = Field(..., description="文件名称")
- similarity_percent: float = Field(..., description="相似度百分比", ge=0.0, le=100.0)
- class SimilarFragmentSearchResponse(BaseModel):
- code: int
- message: str
- data: List[SimilarFragmentItem] = Field(default_factory=list)
- # ==================== API 路由 ====================
- @similar_fragment_router.post("/similar_fragment_search", response_model=SimilarFragmentSearchResponse)
- @auto_trace(generate_if_missing=True)
- async def similar_fragment_search(request: SimilarFragmentSearchRequest):
- """
- 相似片段检索接口
- 根据用户输入的一级/二级标题和检索信息,从知识库向量表中检索出最相关的相似片段。
- Args:
- request: 检索请求,包含一级标题、二级标题、章节ID、方案ID、检索文本
- Returns:
- 相似片段列表,包含内容、文件名、相似度百分比
- """
- trace_id = TraceContext.get_trace_id()
- logger.info(
- f"[{trace_id}] 相似片段检索: title_level_1={request.title_level_1}, "
- f"title_level_2={request.title_level_2}, chapter_level_1={request.chapter_level_1}, "
- f"chapter_level_2={request.chapter_level_2}, 检索文本={request.search_text[:50]}..."
- )
- # 参数校验
- if not request.chapter_level_1.strip() or not request.chapter_level_2.strip():
- return SimilarFragmentSearchResponse(
- code=400,
- message="章节类型 chapter_level_1 和 chapter_level_2 不能为空",
- data=[]
- )
- if not request.search_text.strip():
- return SimilarFragmentSearchResponse(
- code=400,
- message="检索信息不能为空",
- data=[]
- )
- if len(request.search_text.strip()) < 3:
- return SimilarFragmentSearchResponse(
- code=400,
- message="检索信息过短,请输入至少3个字符",
- data=[]
- )
- try:
- raw_results = await run_in_threadpool(
- search_similar_fragments,
- level1=request.chapter_level_1,
- level2=request.chapter_level_2,
- search_text=request.search_text,
- top_k=5,
- )
- # 组装返回
- items = []
- for r in raw_results:
- similarity_percent = round(r["similarity"] * 100, 2)
- items.append(SimilarFragmentItem(
- chapter_level_1=r["chapter_level_1"],
- chapter_level_2=r["chapter_level_2"],
- chapter_id=request.chapter_id,
- project_id=request.project_id,
- text=r["text"],
- file_name=r["file_name"],
- similarity_percent=similarity_percent,
- ))
- return SimilarFragmentSearchResponse(
- code=200,
- message="success",
- data=items
- )
- except Exception as e:
- logger.error(f"[{trace_id}] 相似片段检索异常: {e}", exc_info=True)
- raise HTTPException(
- status_code=500,
- detail=f"相似片段检索失败: {str(e)}"
- )
- @similar_fragment_router.get("/similar_fragment_search_health")
- async def health_check():
- """相似片段检索健康检查"""
- return {
- "status": "healthy",
- "vector_db": "Milvus (lq_db)",
- "child_collection": CHILD_COLLECTION,
- "parent_collection": PARENT_COLLECTION,
- }
|