|
|
@@ -1,61 +1,25 @@
|
|
|
-import os
|
|
|
-import sys
|
|
|
+from __future__ import annotations
|
|
|
+
|
|
|
import json
|
|
|
-import re
|
|
|
import time
|
|
|
-from typing import Any, Dict, List, Optional
|
|
|
import asyncio
|
|
|
-
|
|
|
-project_root = os.path.abspath(os.path.join(os.path.dirname(__file__), '../../../../'))
|
|
|
-# 将根目录添加到sys.path
|
|
|
-sys.path.append(project_root)
|
|
|
-
|
|
|
-# 导入必要的依赖
|
|
|
-try:
|
|
|
- from pymilvus import connections, Collection
|
|
|
- from foundation.infrastructure.config.config import config_handler
|
|
|
- from foundation.ai.models.model_handler import model_handler as mh
|
|
|
- from foundation.ai.agent.generate.model_generate import generate_model_client
|
|
|
- from core.construction_review.component.reviewers.utils.prompt_loader import prompt_loader
|
|
|
- from core.construction_review.component.reviewers.utils.inter_tool import InterTool
|
|
|
- from foundation.observability.logger.loggering import server_logger as logger
|
|
|
- from langchain_core.prompts import ChatPromptTemplate
|
|
|
- from langchain_milvus import Milvus, BM25BuiltInFunction
|
|
|
- from functools import partial
|
|
|
-
|
|
|
-except ImportError as e:
|
|
|
- logger.warning(f"Warning: 无法导入依赖: {e}")
|
|
|
- # 设置默认值,避免程序崩溃
|
|
|
- mh = None
|
|
|
- generate_model_client = None
|
|
|
- prompt_loader = None
|
|
|
- logger = None
|
|
|
- InterTool = None
|
|
|
-
|
|
|
-
|
|
|
-class TextProcessor:
|
|
|
- """文本处理工具类"""
|
|
|
-
|
|
|
- @staticmethod
|
|
|
- def extract_basis(text: str) -> List[str]:
|
|
|
- """从文本中提取编制依据,支持:《》《》(…)《》【…】"""
|
|
|
- pattern = re.compile(
|
|
|
- r'《[^》]+》' # 《标题》
|
|
|
- r'(?:([^)]+)|【[^】]+】)?' # 可选的(…)或【…】
|
|
|
- )
|
|
|
- return pattern.findall(text)
|
|
|
-
|
|
|
-
|
|
|
-
|
|
|
+from typing import Any, Dict, List
|
|
|
+from functools import partial
|
|
|
+
|
|
|
+from langchain_milvus import Milvus, BM25BuiltInFunction
|
|
|
+from foundation.infrastructure.config.config import config_handler
|
|
|
+from foundation.ai.models.model_handler import model_handler as mh
|
|
|
+from core.construction_review.component.reviewers.utils.inter_tool import InterTool
|
|
|
+from core.construction_review.component.reviewers.utils.directory_extraction import BasisItems, BasisItem
|
|
|
+from foundation.observability.logger.loggering import server_logger as logger
|
|
|
+from core.construction_review.component.reviewers.utils.reference_matcher import match_reference_files
|
|
|
+from core.construction_review.component.reviewers.utils.timeliness_determiner import determine_timeliness_issue
|
|
|
|
|
|
class StandardizedResponseProcessor:
|
|
|
- """标准化响应处理器 - 统一为outline_reviewer.py格式"""
|
|
|
+ """标准化响应处理器"""
|
|
|
|
|
|
def __init__(self):
|
|
|
- if InterTool:
|
|
|
- self.inter_tool = InterTool()
|
|
|
- else:
|
|
|
- self.inter_tool = None
|
|
|
+ self.inter_tool = InterTool()
|
|
|
|
|
|
def process_llm_response(self, response_text: str, check_name: str , chapter_code: str ,check_item_code:str) -> List[Dict[str, Any]]:
|
|
|
"""
|
|
|
@@ -70,10 +34,6 @@ class StandardizedResponseProcessor:
|
|
|
Returns:
|
|
|
List[Dict]: 标准格式的审查结果列表
|
|
|
"""
|
|
|
- if not self.inter_tool:
|
|
|
- logger.warning("InterTool未初始化,返回空结果")
|
|
|
- return []
|
|
|
-
|
|
|
try:
|
|
|
json_data = response_text
|
|
|
|
|
|
@@ -104,23 +64,6 @@ class StandardizedResponseProcessor:
|
|
|
}]
|
|
|
|
|
|
|
|
|
-class MessageBuilder:
|
|
|
- """消息构建工具类"""
|
|
|
-
|
|
|
- def __init__(self, prompt_loader_instance=None):
|
|
|
- self.prompt_loader = prompt_loader_instance
|
|
|
-
|
|
|
-
|
|
|
-
|
|
|
- def get_prompt_template(self):
|
|
|
- import yaml
|
|
|
- with open("core/construction_review/component/reviewers/prompt/timeliness_basis_reviewer.yaml", "r", encoding="utf-8") as f:
|
|
|
- data = yaml.safe_load(f)
|
|
|
- return ChatPromptTemplate.from_messages([
|
|
|
- ("system", data["timeliness_basis_reviewer"]["system_prompt"]),
|
|
|
- ("user", data["timeliness_basis_reviewer"]["user_prompt_template"])
|
|
|
- ])
|
|
|
-
|
|
|
class BasisSearchEngine:
|
|
|
"""编制依据向量搜索引擎"""
|
|
|
|
|
|
@@ -138,11 +81,8 @@ class BasisSearchEngine:
|
|
|
self.password = config_handler.get('milvus', 'MILVUS_PASSWORD')
|
|
|
|
|
|
# 初始化嵌入模型
|
|
|
- if mh:
|
|
|
- self.emdmodel = mh._get_lq_qwen3_8b_emd()
|
|
|
- logger.info(" 嵌入模型初始化成功")
|
|
|
- else:
|
|
|
- raise ImportError("无法获取嵌入模型")
|
|
|
+ self.emdmodel = mh._get_lq_qwen3_8b_emd()
|
|
|
+ logger.info("嵌入模型初始化成功")
|
|
|
|
|
|
except Exception as e:
|
|
|
logger.error(f" BasisSearchEngine 初始化失败: {e}")
|
|
|
@@ -205,45 +145,12 @@ class BasisSearchEngine:
|
|
|
logger.error(f" 搜索失败: {e}")
|
|
|
|
|
|
|
|
|
-class LLMReviewClient:
|
|
|
- """LLM审查客户端"""
|
|
|
-
|
|
|
- async def review_basis(self, Message: str, trace_id: str = None) -> str:
|
|
|
- try:
|
|
|
-
|
|
|
- task_prompt_info = {
|
|
|
- "task_prompt": Message,
|
|
|
- "task_name": "规范性引用文件识别与状态判断"
|
|
|
- }
|
|
|
- logger.info(f" 模型调用准备阶段: {task_prompt_info}")
|
|
|
-
|
|
|
- # 调用统一模型客户端 - 编制依据审查设置90秒超时
|
|
|
- response = await generate_model_client.get_model_generate_invoke(
|
|
|
- trace_id=trace_id,
|
|
|
- task_prompt_info=task_prompt_info,
|
|
|
- timeout=90
|
|
|
- )
|
|
|
- return response
|
|
|
-
|
|
|
- except Exception as e:
|
|
|
- logger.error(f" 模型调用准备阶段失败: {e}")
|
|
|
- # 返回空JSON数组字符串以防解析崩溃
|
|
|
- return "[]"
|
|
|
- # ==================== 修复结束 ====================
|
|
|
-
|
|
|
-
|
|
|
class BasisReviewService:
|
|
|
"""编制依据审查服务核心类"""
|
|
|
|
|
|
def __init__(self, max_concurrent: int = 4):
|
|
|
self.search_engine = BasisSearchEngine()
|
|
|
- self.llm_client = LLMReviewClient()
|
|
|
- self.text_processor = TextProcessor()
|
|
|
- self.response_processor = StandardizedResponseProcessor() # 标准化处理器
|
|
|
- # 确保使用最新的prompt_loader实例
|
|
|
- from core.construction_review.component.reviewers.utils.prompt_loader import PromptLoader
|
|
|
- fresh_prompt_loader = PromptLoader()
|
|
|
- self.message_builder = MessageBuilder(fresh_prompt_loader)
|
|
|
+ self.response_processor = StandardizedResponseProcessor()
|
|
|
self.max_concurrent = max_concurrent
|
|
|
self._semaphore = None
|
|
|
|
|
|
@@ -261,8 +168,6 @@ class BasisReviewService:
|
|
|
self,
|
|
|
basis_items: List[str],
|
|
|
collection_name: str = "first_bfp_collection_status",
|
|
|
- filters: Optional[Dict[str, Any]] = None,
|
|
|
- min_score: float = 0.3,
|
|
|
top_k_each: int = 3,
|
|
|
) -> List[Dict[str, Any]]:
|
|
|
"""异步批次审查(通常3条)"""
|
|
|
@@ -292,24 +197,10 @@ class BasisReviewService:
|
|
|
# result 是 List[dict],需要遍历
|
|
|
texts = [item["text_content"] for item in result if "text_content" in item]
|
|
|
grouped_candidates.append(texts)
|
|
|
- print("搜索结果:\n"+str(grouped_candidates))
|
|
|
-
|
|
|
- # # 构建提示词模板和用户内容
|
|
|
- # prompt_template = self.message_builder.get_prompt_template()
|
|
|
- # message = prompt_template.partial(reference_content=grouped_candidates, check_content=basis_items)
|
|
|
- # trace_id = f"prep_basis_batch_{int(time.time())}"
|
|
|
- # llm_out = await self.llm_client.review_basis(message, trace_id)
|
|
|
-
|
|
|
-
|
|
|
- from core.construction_review.component.reviewers.utils.reference_matcher import match_reference_files
|
|
|
- from core.construction_review.component.reviewers.utils.timeliness_determiner import determine_timeliness_issue
|
|
|
|
|
|
llm_out = await determine_timeliness_issue(await match_reference_files(reference_text=grouped_candidates, review_text=basis_items))
|
|
|
|
|
|
-
|
|
|
- standardized_result = self.response_processor.process_llm_response(llm_out, "timeliness_check", "basis","basis_timeliness_check")
|
|
|
- print("标准化处理器处理响应:\n")
|
|
|
- print(standardized_result)
|
|
|
+ standardized_result = self.response_processor.process_llm_response(llm_out, "timeliness_check", "basis", "basis_timeliness_check")
|
|
|
# 统计问题数量
|
|
|
issue_count = sum(1 for item in standardized_result if item.get('exist_issue', False))
|
|
|
logger.info(f"编制依据批次审查完成:总计 {len(basis_items)} 项,发现问题 {issue_count} 项")
|
|
|
@@ -317,7 +208,7 @@ class BasisReviewService:
|
|
|
return standardized_result
|
|
|
|
|
|
except Exception as e:
|
|
|
- logger.error(f" 批次处理失败1: {e}")
|
|
|
+ logger.error(f" 批次处理失败: {e}")
|
|
|
return [{
|
|
|
"check_item": "timeliness_check",
|
|
|
"chapter_code": "basis",
|
|
|
@@ -355,12 +246,13 @@ class BasisReviewService:
|
|
|
return []
|
|
|
|
|
|
|
|
|
- async def review_all(self, text: str, collection_name: str = "first_bfp_collection_status",
|
|
|
+ async def review_all(self, basis_items: BasisItems, collection_name: str = "first_bfp_collection_status",
|
|
|
progress_manager=None, callback_task_id: str = None) -> List[List[Dict[str, Any]]]:
|
|
|
- """异步批量审查所有编制依据"""
|
|
|
- from core.construction_review.component.reviewers.utils.directory_extraction import extract_basis_with_langchain_qwen
|
|
|
- items = [item.raw for item in extract_basis_with_langchain_qwen(text).items]
|
|
|
- #items = self.text_processor.extract_basis(text)
|
|
|
+ """异步批量审查所有编制依据(入参为 BasisItems)"""
|
|
|
+ if not basis_items or not getattr(basis_items, "items", None):
|
|
|
+ return []
|
|
|
+
|
|
|
+ items = [item.raw for item in basis_items.items if getattr(item, "raw", None)]
|
|
|
if not items:
|
|
|
return []
|
|
|
|
|
|
@@ -519,16 +411,28 @@ async def review_basis_batch_async(basis_items: List[str], max_concurrent: int =
|
|
|
return await service.review_batch(basis_items)
|
|
|
|
|
|
|
|
|
-async def review_all_basis_async(text: str, max_concurrent: int = 4) -> List[List[Dict[str, Any]]]:
|
|
|
- """异步全部审查便捷函数"""
|
|
|
+async def review_all_basis_async(basis_items: BasisItems, max_concurrent: int = 4) -> List[List[Dict[str, Any]]]:
|
|
|
+ """异步全部审查便捷函数(BasisItems 入参)"""
|
|
|
async with BasisReviewService(max_concurrent=max_concurrent) as service:
|
|
|
- return await service.review_all(text)
|
|
|
+ return await service.review_all(basis_items)
|
|
|
|
|
|
if __name__ == "__main__":
|
|
|
- # 简单测试
|
|
|
- test_text = """
|
|
|
-使用要求:按照《坠落防护水平生命线装置》GB 38454 或《电力高处作业防坠
|
|
|
-器》DL/T 1147 中的规定进行现场实验,实验结果符合《坠落防护挂点装置》GB
|
|
|
-30862 中的规定
|
|
|
- """
|
|
|
- result = asyncio.run(review_all_basis_async(test_text))
|
|
|
+ # 直接构造 BasisItems 测试 review_all
|
|
|
+ test_basis_items = BasisItems(items=[
|
|
|
+ BasisItem(title="坠落防护水平生命线装置", suffix="GB 38454", raw="《坠落防护水平生命线装置》GB 38454"),
|
|
|
+ BasisItem(title="电力高处作业防坠器", suffix="DL/T 1147", raw="《电力高处作业防坠器》DL/T 1147"),
|
|
|
+ BasisItem(title="坠落防护挂点装置", suffix="GB 30862", raw="《坠落防护挂点装置》GB 30862"),
|
|
|
+ BasisItem(title="混凝土结构设计规范", suffix="GB 50010-2010", raw="《混凝土结构设计规范》GB 50010-2010"),
|
|
|
+ BasisItem(title="建筑施工组织设计规范", suffix="GB/T 50502-2015", raw="《建筑施工组织设计规范》GB/T 50502-2015"),
|
|
|
+ ])
|
|
|
+
|
|
|
+ print(f"测试 {len(test_basis_items.items)} 项编制依据:")
|
|
|
+ for idx, item in enumerate(test_basis_items.items, 1):
|
|
|
+ print(f" {idx}. {item.raw}")
|
|
|
+
|
|
|
+ print("\n开始异步审查...")
|
|
|
+ result = asyncio.run(review_all_basis_async(test_basis_items))
|
|
|
+
|
|
|
+ print("\n审查结果:")
|
|
|
+ print(json.dumps(result, ensure_ascii=False, indent=2))
|
|
|
+
|