|
@@ -60,7 +60,7 @@ from foundation.ai.rag.retrieval.entities_enhance import entity_enhance
|
|
|
from core.construction_review.component.reviewers.base_reviewer import BaseReviewer
|
|
from core.construction_review.component.reviewers.base_reviewer import BaseReviewer
|
|
|
from core.construction_review.component.reviewers.outline_reviewer import OutlineReviewer
|
|
from core.construction_review.component.reviewers.outline_reviewer import OutlineReviewer
|
|
|
from core.construction_review.component.reviewers.utils.text_split import split_text
|
|
from core.construction_review.component.reviewers.utils.text_split import split_text
|
|
|
-
|
|
|
|
|
|
|
+from core.construction_review.component.infrastructure.milvus import MilvusManager, MilvusConfig
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
@@ -130,6 +130,8 @@ class AIReviewEngine(BaseReviewer):
|
|
|
self.milvus_collection = config_handler.get('milvus', 'MILVUS_COLLECTION', 'default')
|
|
self.milvus_collection = config_handler.get('milvus', 'MILVUS_COLLECTION', 'default')
|
|
|
self.outline_reviewer = OutlineReviewer()
|
|
self.outline_reviewer = OutlineReviewer()
|
|
|
|
|
|
|
|
|
|
+ self.milvus = MilvusManager(MilvusConfig())
|
|
|
|
|
+
|
|
|
def _process_review_result(self, result):
|
|
def _process_review_result(self, result):
|
|
|
"""
|
|
"""
|
|
|
处理审查结果,统一转换为字典格式
|
|
处理审查结果,统一转换为字典格式
|
|
@@ -348,11 +350,6 @@ class AIReviewEngine(BaseReviewer):
|
|
|
logger.info("构建查询对")
|
|
logger.info("构建查询对")
|
|
|
query_pairs = query_rewrite_manager.query_extract(query_content)
|
|
query_pairs = query_rewrite_manager.query_extract(query_content)
|
|
|
bfp_result_lists =entity_enhance.entities_enhance_retrieval(query_pairs)
|
|
bfp_result_lists =entity_enhance.entities_enhance_retrieval(query_pairs)
|
|
|
- # 使用bfp_result_list 获取 parent_id ,通过parent_id 获取父文档内容 utils_test\Milvus_Test\test_查询接口.py
|
|
|
|
|
- # llm 异步相关度分析 判断父文档是否与query_content 审查条文相关
|
|
|
|
|
- # 如果相关,则追加到 bfp_result,如果不相关则,则跳过
|
|
|
|
|
- # 如果len(bfp_result) > 0 则进行RAG增强,否则 则返回空
|
|
|
|
|
-
|
|
|
|
|
logger.info(f"bfp_result_lists{bfp_result_lists}")
|
|
logger.info(f"bfp_result_lists{bfp_result_lists}")
|
|
|
# 检查是否有检索结果
|
|
# 检查是否有检索结果
|
|
|
if not bfp_result_lists:
|
|
if not bfp_result_lists:
|
|
@@ -364,6 +361,98 @@ class AIReviewEngine(BaseReviewer):
|
|
|
'text_content': '',
|
|
'text_content': '',
|
|
|
'metadata': {}
|
|
'metadata': {}
|
|
|
}
|
|
}
|
|
|
|
|
+ #todo
|
|
|
|
|
+ #异步调用查询。查出所有的
|
|
|
|
|
+
|
|
|
|
|
+ #todo
|
|
|
|
|
+ # 使用bfp_result_list 获取 parent_id ,通过parent_id 获取父文档内容 utils_test\Milvus_Test\test_查询接口.py
|
|
|
|
|
+ # llm 异步相关度分析 判断父文档是否与query_content 审查条文相关
|
|
|
|
|
+ # 如果相关,则追加到 bfp_result,如果不相关则,则跳过
|
|
|
|
|
+ import asyncio
|
|
|
|
|
+ import concurrent.futures
|
|
|
|
|
+ from typing import Any, Dict, List, Optional, Sequence
|
|
|
|
|
+ from core.construction_review.component.infrastructure.relevance import is_relevant_async
|
|
|
|
|
+ PARENT_COLLECTION = "rag_parent_hybrid" # TODO: 改成你的父段 collection
|
|
|
|
|
+ PARENT_TEXT_FIELD = "text" # TODO: 改成你的父段字段名
|
|
|
|
|
+ PARENT_OUTPUT_FIELDS: Sequence[str] = ["parent_id", PARENT_TEXT_FIELD]
|
|
|
|
|
+
|
|
|
|
|
+ def run_async(coro):
|
|
|
|
|
+ """在同步函数中跑 async(兼容已有 event loop)"""
|
|
|
|
|
+ try:
|
|
|
|
|
+ asyncio.get_running_loop()
|
|
|
|
|
+ with concurrent.futures.ThreadPoolExecutor() as executor:
|
|
|
|
|
+ return executor.submit(asyncio.run, coro).result()
|
|
|
|
|
+ except RuntimeError:
|
|
|
|
|
+ return asyncio.run(coro)
|
|
|
|
|
+
|
|
|
|
|
+ async def _async_condition_query_one(pid: str) -> Optional[Dict[str, Any]]:
|
|
|
|
|
+ """
|
|
|
|
|
+ condition_query 是同步:用线程池包成 async
|
|
|
|
|
+ 返回父段 row(或 None)
|
|
|
|
|
+ """
|
|
|
|
|
+ loop = asyncio.get_running_loop()
|
|
|
|
|
+
|
|
|
|
|
+
|
|
|
|
|
+ def _call():
|
|
|
|
|
+ rows = self.milvus.condition_query(
|
|
|
|
|
+ collection_name=PARENT_COLLECTION,
|
|
|
|
|
+ filter=f"parent_id == '{pid}'",
|
|
|
|
|
+ output_fields=PARENT_OUTPUT_FIELDS,
|
|
|
|
|
+ limit=1,
|
|
|
|
|
+ )
|
|
|
|
|
+ if not rows:
|
|
|
|
|
+ return None
|
|
|
|
|
+ row0 = rows[0] or {}
|
|
|
|
|
+ # 白名单投影:避免 pk/id 等多余字段
|
|
|
|
|
+ return {k: row0.get(k) for k in PARENT_OUTPUT_FIELDS if k in row0}
|
|
|
|
|
+
|
|
|
|
|
+ return await loop.run_in_executor(None, _call)
|
|
|
|
|
+
|
|
|
|
|
+ async def _enhance_all():
|
|
|
|
|
+ # 1) 收集 parent_id -> 指向哪些 result 需要被拼接
|
|
|
|
|
+ pid_to_results: Dict[str, List[Dict[str, Any]]] = {}
|
|
|
|
|
+
|
|
|
|
|
+ for result_list in bfp_result_lists:
|
|
|
|
|
+ for r in (result_list or []):
|
|
|
|
|
+ md = r.get("metadata") or {}
|
|
|
|
|
+ pid = md.get("parent_id")
|
|
|
|
|
+ if not pid:
|
|
|
|
|
+ continue
|
|
|
|
|
+ pid = str(pid)
|
|
|
|
|
+ pid_to_results.setdefault(pid, []).append(r)
|
|
|
|
|
+
|
|
|
|
|
+ if not pid_to_results:
|
|
|
|
|
+ return
|
|
|
|
|
+
|
|
|
|
|
+ # 2) 逐个 parent_id 串行:查父段 -> LLM 判断 -> 拼接到对应 results
|
|
|
|
|
+ for pid, results in pid_to_results.items():
|
|
|
|
|
+ parent_doc = await _async_condition_query_one(pid)
|
|
|
|
|
+
|
|
|
|
|
+ if not parent_doc:
|
|
|
|
|
+ continue
|
|
|
|
|
+
|
|
|
|
|
+ parent_text = (parent_doc.get(PARENT_TEXT_FIELD) or "").strip()
|
|
|
|
|
+ if not parent_text:
|
|
|
|
|
+ continue
|
|
|
|
|
+
|
|
|
|
|
+ # LLM 判断是否相关(你已经封装好了 is_relevant_async:模型直接输出 relevant true/false)
|
|
|
|
|
+ relevant = await is_relevant_async(query_content, parent_text)
|
|
|
|
|
+ print("================\n")
|
|
|
|
|
+ print(relevant)
|
|
|
|
|
+
|
|
|
|
|
+ print("\n================\n")
|
|
|
|
|
+ # if not relevant:
|
|
|
|
|
+ # continue
|
|
|
|
|
+
|
|
|
|
|
+ extra = (
|
|
|
|
|
+ f"{parent_text}\n"
|
|
|
|
|
+ )
|
|
|
|
|
+
|
|
|
|
|
+ # 3) 拼接到所有属于该 parent_id 的条目 text_content
|
|
|
|
|
+ for r in results:
|
|
|
|
|
+ r["text_content"] = (r.get("text_content") or "") + extra
|
|
|
|
|
+
|
|
|
|
|
+ run_async(_enhance_all())
|
|
|
logger.info(f"RAG检索返回了 {len(bfp_result_lists)} 个查询对结果")
|
|
logger.info(f"RAG检索返回了 {len(bfp_result_lists)} 个查询对结果")
|
|
|
# 获取第一个查询对的第一个结果
|
|
# 获取第一个查询对的第一个结果
|
|
|
first_result_list = bfp_result_lists[0]
|
|
first_result_list = bfp_result_lists[0]
|
|
@@ -1051,6 +1140,4 @@ class AIReviewEngine(BaseReviewer):
|
|
|
"execution_time": execution_time,
|
|
"execution_time": execution_time,
|
|
|
"error_message": error_msg
|
|
"error_message": error_msg
|
|
|
}
|
|
}
|
|
|
- }
|
|
|
|
|
-
|
|
|
|
|
-
|
|
|
|
|
|
|
+ }
|