Преглед изворни кода

v0.0.0.4 功能优化 时效性判断优化

ZengChao пре 1 месец
родитељ
комит
e85e0a6bcc

+ 16 - 16
core/construction_review/component/ai_review_engine.py

@@ -1151,7 +1151,7 @@ class AIReviewEngine(BaseReviewer):
             if not review_content or not review_content.strip():
                 logger.warning("没有可执行的编制依据审查任务")
                 return {
-                    "prep_basis_review_results": {
+                    "reference_basis_review_results": {
                         "review_results": [],
                         "review_content": review_content,
                         "total_basis_items": 0,
@@ -1175,21 +1175,21 @@ class AIReviewEngine(BaseReviewer):
                     # 调用带有SSE推送功能的review_all方法
                     from core.construction_review.component.reviewers.reference_basis_reviewer import BasisReviewService
                     async with BasisReviewService(max_concurrent=max_concurrent) as service:
-                        prep_basis_review_results = await service.review_all(
+                        reference_basis_review_results = await service.review_all(
                             review_content,
                             collection_name="first_bfp_collection_status",
                             progress_manager=progress_manager,
                             callback_task_id=callback_task_id
                         )
 
-                    logger.info(f"编制依据审查完成,批次数量: {len(prep_basis_review_results)}")
+                    logger.info(f"编制依据审查完成,批次数量: {len(reference_basis_review_results)}")
 
                     # 统计审查结果
                     total_items = 0
                     valid_items = 0
                     standard_items = 0
 
-                    for batch in prep_basis_review_results:
+                    for batch in reference_basis_review_results:
                         if isinstance(batch, list):
                             total_items += len(batch)
                             for item in batch:
@@ -1203,7 +1203,7 @@ class AIReviewEngine(BaseReviewer):
             except Exception as e:
                 logger.error(f"编制依据异步审查失败: {str(e)}")
                 return {
-                    "prep_basis_review_results": {
+                    "reference_basis_review_results": {
                         "review_results": [],
                         "review_content": review_content,
                         "total_basis_items": 0,
@@ -1216,8 +1216,8 @@ class AIReviewEngine(BaseReviewer):
 
             # 返回完整结果
             return {
-                "prep_basis_review_results": {
-                    "review_results": prep_basis_review_results,
+                "reference_basis_review_results": {
+                    "review_results": reference_basis_review_results,
                     "review_content": review_content,
                     "total_basis_items": total_items,
                     "valid_items": valid_items,
@@ -1233,7 +1233,7 @@ class AIReviewEngine(BaseReviewer):
             logger.error(error_msg, exc_info=True)
 
             return {
-                "prep_basis_review_results": {
+                "reference_basis_review_results": {
                     "review_results": [],
                     "review_content": review_data.get('content', ''),
                     "total_basis_items": 0,
@@ -1277,7 +1277,7 @@ class AIReviewEngine(BaseReviewer):
             if not review_content or not review_content.strip():
                 logger.warning("没有可执行的编制依据审查任务")
                 return {
-                    "prep_basis_review_results": {
+                    "timeliness_basis_review_results": {
                         "review_results": [],
                         "review_content": review_content,
                         "total_basis_items": 0,
@@ -1301,21 +1301,21 @@ class AIReviewEngine(BaseReviewer):
                     # 调用带有SSE推送功能的review_all方法
                     from core.construction_review.component.reviewers.timeliness_basis_reviewer import BasisReviewService
                     async with BasisReviewService(max_concurrent=max_concurrent) as service:
-                        prep_basis_review_results = await service.review_all(
+                        timeliness_basis_review_results = await service.review_all(
                             review_content,
                             collection_name="first_bfp_collection_status",
                             progress_manager=progress_manager,
                             callback_task_id=callback_task_id
                         )
 
-                    logger.info(f"编制依据审查完成,批次数量: {len(prep_basis_review_results)}")
+                    logger.info(f"编制依据审查完成,批次数量: {len(timeliness_basis_review_results)}")
 
                     # 统计审查结果
                     total_items = 0
                     valid_items = 0
                     standard_items = 0
 
-                    for batch in prep_basis_review_results:
+                    for batch in timeliness_basis_review_results:
                         if isinstance(batch, list):
                             total_items += len(batch)
                             for item in batch:
@@ -1329,7 +1329,7 @@ class AIReviewEngine(BaseReviewer):
             except Exception as e:
                 logger.error(f"编制依据异步审查失败: {str(e)}")
                 return {
-                    "prep_basis_review_results": {
+                    "timeliness_basis_review_results": {
                         "review_results": [],
                         "review_content": review_content,
                         "total_basis_items": 0,
@@ -1342,8 +1342,8 @@ class AIReviewEngine(BaseReviewer):
 
             # 返回完整结果
             return {
-                "prep_basis_review_results": {
-                    "review_results": prep_basis_review_results,
+                "timeliness_basis_review_results": {
+                    "review_results": timeliness_basis_review_results,
                     "review_content": review_content,
                     "total_basis_items": total_items,
                     "valid_items": valid_items,
@@ -1359,7 +1359,7 @@ class AIReviewEngine(BaseReviewer):
             logger.error(error_msg, exc_info=True)
 
             return {
-                "prep_basis_review_results": {
+                "timeliness_basis_review_results": {
                     "review_results": [],
                     "review_content": review_data.get('content', ''),
                     "total_basis_items": 0,

+ 5 - 1
core/construction_review/component/reviewers/reference_basis_reviewer.py

@@ -242,7 +242,9 @@ class BasisReviewService:
     async def review_all(self, text: str, collection_name: str = "first_bfp_collection_status",
                         progress_manager=None, callback_task_id: str = None) -> List[List[Dict[str, Any]]]:
         """异步批量审查所有编制依据"""
-        items = self.text_processor.extract_basis(text)
+        # items = self.text_processor.extract_basis(text)
+        from core.construction_review.component.reviewers.utils.directory_extraction import extract_basis_with_langchain_qwen
+        items = [item.raw for item in extract_basis_with_langchain_qwen(text).items]
         if not items:
             return []
 
@@ -390,6 +392,8 @@ class BasisReviewService:
 
         logger.info(f" 异步审查完成,耗时: {elapsed_time:.4f} 秒")
         logger.info(f" 总编制依据: {total_items}, 问题项: {issue_items}, 成功批次: {successful_batches}/{total_batches}")
+        print("final_results:\n")
+        print(final_results)    
         return final_results
 
 

+ 11 - 10
core/construction_review/component/reviewers/timeliness_basis_reviewer.py

@@ -294,15 +294,17 @@ class BasisReviewService:
                         grouped_candidates.append(texts)
                 print("搜索结果:\n"+str(grouped_candidates))
 
-                # 构建提示词模板和用户内容
-                prompt_template = self.message_builder.get_prompt_template()
-                message = prompt_template.partial(reference_content=grouped_candidates, check_content=basis_items)
-                trace_id = f"prep_basis_batch_{int(time.time())}"
-                llm_out = await self.llm_client.review_basis(message, trace_id)
+                # # 构建提示词模板和用户内容
+                # prompt_template = self.message_builder.get_prompt_template()
+                # message = prompt_template.partial(reference_content=grouped_candidates, check_content=basis_items)
+                # trace_id = f"prep_basis_batch_{int(time.time())}"
+                # llm_out = await self.llm_client.review_basis(message, trace_id)
                 
 
-                # from core.construction_review.component.reviewers.utils.timeliness import review_reference_timeliness
-                # llm_out = await review_reference_timeliness(reference_text=grouped_candidates, review_text=basis_items)
+                from core.construction_review.component.reviewers.utils.reference_matcher import match_reference_files
+                from core.construction_review.component.reviewers.utils.timeliness_determiner import determine_timeliness_issue
+                
+                llm_out = await determine_timeliness_issue(await match_reference_files(reference_text=grouped_candidates, review_text=basis_items))
                 
                 
                 standardized_result = self.response_processor.process_llm_response(llm_out, "timeliness_check", "basis","basis_timeliness_check")
@@ -525,8 +527,7 @@ async def review_all_basis_async(text: str, max_concurrent: int = 4) -> List[Lis
 if __name__ == "__main__":
     # 简单测试
     test_text = """
-(16)《公路工程施工现场安全防护技术要求》(JTT1508-2024);
-(17)《公路水运工程临时用电技术 规程》(JTT1499-2024);
-(18)《坠落防护 水平生命线装置》(GB 38454-2019);
+《中华人民共和国水土保持法》2010年12月25日修订
+《中华人民共和国环境保护法》2021年修订版
     """
     result = asyncio.run(review_all_basis_async(test_text))

+ 181 - 0
core/construction_review/component/reviewers/utils/reference_matcher.py

@@ -0,0 +1,181 @@
+#!/usr/bin/env python
+# -*- coding: utf-8 -*-
+
+import json
+from typing import List
+
+from pydantic import BaseModel, Field, ValidationError
+from langchain_core.prompts import ChatPromptTemplate
+from langchain_core.output_parsers import PydanticOutputParser, StrOutputParser
+from langchain_openai import ChatOpenAI
+
+
+# ===== 1) 定义结构 =====
+class MatchResult(BaseModel):
+    review_item: str = Field(..., description="审查的规范原文,与输入完全一致")
+    has_related_file: bool = Field(..., description="是否有相关文件")
+    has_exact_match: bool = Field(..., description="是否有名称编号都相同的文件")
+    exact_match_info: str = Field("", description="名称编号相同的文件及状态,格式:《名称》(编号)状态为XXX,可为空")
+    same_name_current: str = Field("", description="名称相同的现行文件,格式:《名称》(编号)状态为现行,可为空")
+
+
+class MatchResults(BaseModel):
+    items: List[MatchResult]
+
+
+# ===== 2) SYSTEM Prompt =====
+SYSTEM = """
+/no_think
+你是【规范文件匹配助手】。
+
+【任务】
+从参考规范库中查找每个审查规范的匹配信息。
+
+【重要说明(必须严格遵守)】
+- 不用管格式问题,灵活判断编号是否一致
+
+【输出要求】
+- 为每个审查规范输出一个匹配结果
+- 确保输出数量与输入的审查规范数量一致
+- review_item 必须与输入完全一致
+- exact_match_info 和 same_name_current 可以为空字符串
+"""
+
+HUMAN = """
+请从参考规范库中查找每个审查规范的匹配信息:
+
+【匹配规则】
+1. **review_item**(审查的规范原文)
+   - 必须与输入的审查规范完全一致,逐字复制
+   - 不得修改或改写
+
+2. **has_related_file**(是否有相关文件)
+   - 在参考规范库中找到名称相似或相关的文件,返回 true
+   - 完全找不到任何相关文件,返回 false
+
+3. **has_exact_match**(是否有名称编号都相同的文件)
+   - 找到名称和编号完全一致的文件,返回 true
+   - 否则返回 false
+
+4. **exact_match_info**(名称编号相同的文件及状态)
+   - 如果 has_exact_match 为 true,返回该文件的完整信息
+   - 格式:《规范名称》(规范编号)状态为XXX
+   - 如果没有完全匹配,返回空字符串 ""
+
+5. **same_name_current**(名称相同的现行文件)
+   - 在参考规范库中查找与审查规范名称相同且状态为"现行"的文件
+   - 格式:《规范名称》(规范编号)状态为现行
+   - 如果没有找到,返回空字符串 ""
+
+【参考规范库】
+{reference_text}
+
+【审查规范】
+{review_text}
+
+【输出格式要求】
+{format_instructions}
+/no_think
+"""
+
+# ===== 3) Output Parser =====
+parser = PydanticOutputParser(pydantic_object=MatchResults)
+
+# ===== 4) Prompt =====
+prompt = ChatPromptTemplate.from_messages([
+    ("system", SYSTEM),
+    ("human", HUMAN)
+])
+
+# ===== 5) LLM =====
+# from foundation.ai.models.model_handler import model_handler
+# llm = model_handler.get_model_by_name("qwen3_30b")
+
+llm = ChatOpenAI(
+    model="qwen3-30b",
+    base_url="http://192.168.91.253:8003/v1",
+    api_key="sk-123456",
+    temperature=0,
+)
+
+# ===== 6) 提取第一个 JSON =====
+def extract_first_json(text: str) -> dict:
+    """从任意模型输出中提取第一个完整 JSON 对象 { ... }"""
+    start = text.find("{")
+    if start == -1:
+        raise ValueError("未找到 JSON 起始 '{'")
+
+    depth = 0
+    for i in range(start, len(text)):
+        ch = text[i]
+        if ch == "{":
+            depth += 1
+        elif ch == "}":
+            depth -= 1
+            if depth == 0:
+                return json.loads(text[start:i + 1])
+
+    raise ValueError("JSON 花括号未闭合")
+
+
+# ===== 7) 核心方法 =====
+async def match_reference_files(reference_text: str, review_text: str) -> str:
+    """
+    从参考规范库中查找审查规范的匹配信息
+    
+    Args:
+        reference_text: 参考规范库内容
+        review_text: 审查规范内容
+        
+    Returns:
+        匹配结果的JSON字符串
+    """
+    chain = prompt | llm | StrOutputParser()
+    format_instructions = parser.get_format_instructions()
+
+    payload = {
+        "reference_text": reference_text,
+        "review_text": review_text,
+        "format_instructions": format_instructions
+    }
+
+    last_err = None
+
+    for _ in range(2):
+        try:
+            raw = await chain.ainvoke(payload)
+            print(f"[规范匹配] 模型输出: {raw}...")
+            data = extract_first_json(raw)
+            findings = MatchResults.model_validate(data)
+            result = [x.model_dump() for x in findings.items]
+            return json.dumps(result, ensure_ascii=False, indent=2)
+        except (Exception, ValidationError, json.JSONDecodeError) as e:
+            last_err = e
+
+    raise RuntimeError(f"规范匹配失败:{last_err}") from last_err
+
+
+# ===== 8) 示例 =====
+if __name__ == "__main__":
+    import asyncio
+
+    reference_file = """
+    《混凝土结构设计规范》(GB 50010-2010)状态为现行
+    《混凝土结构设计规范》(GB 50010-2015)状态为废止
+    《建筑施工组织设计规范》(GB/T 50502-2015)状态为废止
+    《建筑施工组织设计规范》(GB/T 50502-2020)状态为现行
+    《建筑抗震设计规范》(GB 50011-2001)状态为废止
+    """
+
+    review_file = """
+    [
+        "《混凝土结构设计规范》(GB 50010-2029)",
+        "《建筑施工组织设计规范》(GB/T 50502-2015)",
+        "《建筑抗震设计规范》(GB 50011-2001)",
+        "《城市道路工程设计规范》(CJJ 37-2012)"
+    ]
+    """
+
+    result = asyncio.run(match_reference_files(reference_file, review_file))
+    print("\n匹配结果:")
+    print(result)

+ 197 - 0
core/construction_review/component/reviewers/utils/timeliness_determiner.py

@@ -0,0 +1,197 @@
+#!/usr/bin/env python
+# -*- coding: utf-8 -*-
+
+import json
+from typing import List, Literal
+
+from pydantic import BaseModel, Field, ValidationError
+from langchain_core.prompts import ChatPromptTemplate
+from langchain_core.output_parsers import PydanticOutputParser, StrOutputParser
+from langchain_openai import ChatOpenAI
+
+
+# ===== 1) 定义结构 =====
+RiskLevel = Literal["无风险", "高风险"]
+
+
+class TimelinessResult(BaseModel):
+    issue_point: str = Field(..., description="时效性判定类型")
+    review_item: str = Field(..., description="审查的规范原文,与输入完全一致")
+    suggestion: str = Field(..., description="建议")
+    reason: str = Field(..., description="原因")
+    risk_level: RiskLevel = Field(..., description='风险水平,只能是 "无风险" / "高风险"')
+
+
+class TimelinessResults(BaseModel):
+    items: List[TimelinessResult]
+
+
+# ===== 2) SYSTEM Prompt =====
+SYSTEM = """
+你是【规范时效性判定助手】。
+
+【任务】
+根据规范匹配结果,判定每个审查规范的时效性问题类型。
+
+【重要说明(必须严格遵守)】
+- 如果有具体的文件将回答中的“XXX”替换为具体文件,如果没有具体文件回答内容中不要出现“XXX”字样
+
+【输出要求】
+- 为每个审查规范输出一个判定结果
+- 确保输出数量与输入一致
+- review_item 必须与输入完全一致
+- 严格按照判定规则的优先级顺序进行判断
+"""
+
+HUMAN = """
+请根据以下规范匹配结果,判定每个审查规范的时效性问题类型:
+
+【判定规则(按优先级从高到低)】
+
+1. **无参考规范**(无风险)
+   - 条件:has_related_file = false
+   - 原因:在参考规范库中完全找不到相关文件
+   - 建议:当前引用未在参考规范库中发现,建议人工核实其有效性
+
+2. **规范编号错误**(高风险)
+   - 条件:has_related_file = true 且 has_exact_match = false
+   - 原因:与参考文件XXX编号不一致
+   - 建议:建议核实并更正为参考库中的正确编号XXX
+
+3. **规范编号正确**(无风险)
+   - 条件:has_exact_match = true 且 exact_match_info 中状态为"现行"
+   - 原因:与参考文件XXX名称编号一致,且文件状态为现行
+   - 建议:引用规范为现行有效版本,无需调整
+
+4. **引用已废止的规范**(高风险)
+   - 条件:has_exact_match = true 且 exact_match_info 中状态为"废止" 且 same_name_current 为空
+   - 原因:参考文件显示XXX已废止,且无明确替代版本
+   - 建议:建议删除该引用或咨询最新替代规范
+
+5. **引用已被替代的规范**(高风险)
+   - 条件:has_exact_match = true 且 exact_match_info 中状态为"废止" 且 same_name_current 不为空
+   - 原因:参考文件显示XXX已废止,但存在XXX现行版本
+   - 建议:建议更新为现行替代标准
+
+【规范匹配结果】
+{match_results}
+
+【输出格式要求】
+{format_instructions}
+/no_think
+"""
+
+# ===== 3) Output Parser =====
+parser = PydanticOutputParser(pydantic_object=TimelinessResults)
+
+# ===== 4) Prompt =====
+prompt = ChatPromptTemplate.from_messages([
+    ("system", SYSTEM),
+    ("human", HUMAN)
+])
+
+# ===== 5) LLM =====
+# from foundation.ai.models.model_handler import model_handler
+# llm = model_handler.get_model_by_name("qwen3_30b")
+llm = ChatOpenAI(
+    model="qwen3-30b",
+    base_url="http://192.168.91.253:8003/v1",
+    api_key="sk-123456",
+    temperature=0,
+)
+
+# ===== 6) 提取第一个 JSON =====
+def extract_first_json(text: str) -> dict:
+    """从任意模型输出中提取第一个完整 JSON 对象 { ... }"""
+    start = text.find("{")
+    if start == -1:
+        raise ValueError("未找到 JSON 起始 '{'")
+
+    depth = 0
+    for i in range(start, len(text)):
+        ch = text[i]
+        if ch == "{":
+            depth += 1
+        elif ch == "}":
+            depth -= 1
+            if depth == 0:
+                return json.loads(text[start:i + 1])
+
+    raise ValueError("JSON 花括号未闭合")
+
+
+# ===== 7) 核心方法 =====
+async def determine_timeliness_issue(match_results: str) -> str:
+    """
+    根据规范匹配结果判定时效性问题类型
+    
+    Args:
+        match_results: match_reference_files 的返回结果(JSON字符串)
+        
+    Returns:
+        时效性判定结果的JSON字符串
+    """
+    chain = prompt | llm | StrOutputParser()
+    format_instructions = parser.get_format_instructions()
+
+    payload = {
+        "match_results": match_results,
+        "format_instructions": format_instructions
+    }
+
+    last_err = None
+
+    for _ in range(2):
+        try:
+            raw = await chain.ainvoke(payload)
+            print(f"[时效性判定] 模型输出: {raw}...")
+            data = extract_first_json(raw)
+            findings = TimelinessResults.model_validate(data)
+            result = [x.model_dump() for x in findings.items]
+            return json.dumps(result, ensure_ascii=False, indent=2)
+        except (Exception, ValidationError, json.JSONDecodeError) as e:
+            last_err = e
+
+    raise RuntimeError(f"时效性判定失败:{last_err}") from last_err
+
+
+# ===== 8) 示例 =====
+if __name__ == "__main__":
+    import asyncio
+
+    # 模拟 match_reference_files 的返回结果
+    match_results = json.dumps([
+        {
+            "review_item": "《混凝土结构设计规范》(GB 50010-2010)",
+            "has_related_file": True,
+            "has_exact_match": True,
+            "exact_match_info": "《混凝土结构设计规范》(GB 50010-2010)状态为现行",
+            "same_name_current": "《混凝土结构设计规范》(GB 50010-2010)状态为现行"
+        },
+        {
+            "review_item": "《建筑施工组织设计规范》(GB/T 50502-2015)",
+            "has_related_file": True,
+            "has_exact_match": True,
+            "exact_match_info": "《建筑施工组织设计规范》(GB/T 50502-2015)状态为废止",
+            "same_name_current": "《建筑施工组织设计规范》(GB/T 50502-2020)状态为现行"
+        },
+        {
+            "review_item": "《建筑抗震设计规范》(GB 50011-2001)",
+            "has_related_file": True,
+            "has_exact_match": True,
+            "exact_match_info": "《建筑抗震设计规范》(GB 50011-2001)状态为废止",
+            "same_name_current": ""
+        },
+        {
+            "review_item": "《城市道路工程设计规范》(CJJ 37-2012)",
+            "has_related_file": False,
+            "has_exact_match": False,
+            "exact_match_info": "",
+            "same_name_current": ""
+        }
+    ], ensure_ascii=False)
+
+    result = asyncio.run(determine_timeliness_issue(match_results))
+    print("\n时效性判定结果:")
+    print(result)
+