Просмотр исходного кода

v0.0.4-功能优化
- 待实现父子文档功能,增加父子文档召回基础实现逻辑

WangXuMing 2 месяцев назад
Родитель
Сommit
0f9ea1c882

+ 4 - 0
core/construction_review/component/ai_review_engine.py

@@ -347,6 +347,10 @@ class AIReviewEngine(BaseReviewer):
         logger.info("构建查询对")
         query_pairs = query_rewrite_manager.query_extract(query_content)
         bfp_result_lists =entity_enhance.entities_enhance_retrieval(query_pairs)
+        # 使用bfp_result_list 获取 parent_id ,通过parent_id 获取父文档内容 utils_test\Milvus_Test\test_查询接口.py
+        # llm 异步相关度分析  判断父文档是否与query_content 审查条文相关
+        # 如果相关,则追加到 bfp_result,如果不相关则,则跳过
+        # 如果len(bfp_result) > 0 则进行RAG增强,否则 则返回空
 
         logger.info(f"bfp_result_lists{bfp_result_lists}")
         # 检查是否有检索结果

+ 3 - 1
foundation/ai/rag/retrieval/retrieval.py

@@ -221,7 +221,7 @@ class RetrievalManager:
         self.logger.info(f"[async_bfp_recall] 开始召回, top_k={top_k}, 实体数量={len(entity_list)}, 背景='{background[:50]}...'")
 
         # 异步并发召回编制依据
-        collection_name = "first_bfp_collection_test"
+        collection_name = "rag_children_hybrid"
 
         gather_start = time.time()
         # 优化:降低hybrid_top_k参数从50到20,减少混合搜索时间
@@ -284,11 +284,13 @@ class RetrievalManager:
         # 处理二次重排序的高分文档
         for rerank_item in bfp_rerank_result:
             text = rerank_item.get('text', '')
+            parent_id = rerank_item.get('parent_id', '')
             score = rerank_item.get('score', 0.0)
 
             if text in text_to_metadata:
                 original_item = text_to_metadata[text].copy()
                 original_item['bfp_rerank_score'] = score
+                original_item['bfp_rerank_parent_id'] = parent_id
                 final_results.append(original_item)
 
         reorganize_end = time.time()