Просмотр исходного кода

dev:扩展了目录分类逻辑的兼容性,添加了第十一个收容项;

ChenJiSheng 1 месяц назад
Родитель
Сommit
7b96b1c1ce

+ 12 - 8
core/construction_review/component/ai_review_engine.py

@@ -732,15 +732,20 @@ class AIReviewEngine(BaseReviewer):
                 max_concurrent=concurrent_workers
             )
             logger.info("  组件初始化完成")
-            
+
 
             # 3. 执行审查
             logger.info("\n[4/5] 开始执行审查...")
             logger.info(f"  使用模型: {llm_client.model_type}")
             logger.info(f"  最大并发数: {concurrent_workers}")
-            
+
             review_results = await review_pipeline.review(documents, specification)
-            review_results_flag = pd.DataFrame(review_results)["chapter_classification"].unique().tolist()
+            review_results_df = pd.DataFrame(review_results)
+            df_section_label = review_results_df['section_label'].str.split('->').str[0]
+            review_results_df['title'] = df_section_label
+            review_results_df.to_csv(Path('temp') / 'document_temp' / '2_spec_review_results.csv', encoding='utf-8-sig', index=False)
+            review_results_flag = review_results_df["chapter_classification"].unique().tolist()
+
             # with open(r'temp\document_temp\1_spec_review_results.json', 'w', encoding='utf-8') as f:
             #     json.dump(review_results, f, ensure_ascii=False, indent=4)
             # 统计结果
@@ -754,16 +759,15 @@ class AIReviewEngine(BaseReviewer):
             processed_results = analyzer.process_results(review_results)
             spec_summary_csv_path = Path('temp') / 'document_temp' / '3_spec_review_summary.csv'
             summary_rows = analyzer.build_spec_summary(processed_results)
-            logger.info(f"  规范覆盖汇总结果已保存至: {spec_summary_csv_path}")
+            # logger.info(f"  规范覆盖汇总结果已保存至: {spec_summary_csv_path}")
             summary_rows = pd.DataFrame(summary_rows)
             summary_rows = summary_rows[summary_rows['标签'].isin(review_results_flag)]
             # summary_rows.to_csv(str(spec_summary_csv_path), encoding='utf-8-sig', index=False)
             summary_rows = summary_rows.to_dict('records')
             # 生成缺失要点 JSON 列表,便于前端消费
-            #missing_issue_json_path = Path(r'temp\document_temp') / 'spec_review_missing_issues.json'
+
             issues = analyzer.build_missing_issue_list(summary_rows)
-            # with open(r'temp\document_temp\4_spec_review_missing_issues.json', 'w', encoding='utf-8') as f:
-            #     json.dump(issues, f, ensure_ascii=False, indent=4)
+
             # 包装成外层格式化期望的结构
             execution_time = time.time() - start_time
             return {
@@ -776,7 +780,7 @@ class AIReviewEngine(BaseReviewer):
                 },
                 "success": True,
                 "execution_time": execution_time
-            }
+            } 
         except Exception as e:
             execution_time = time.time() - start_time
             error_msg = f"{name} 审查失败: {str(e)}"

+ 5 - 2
core/construction_review/component/doc_worker/classification/hierarchy_classifier.py

@@ -8,6 +8,7 @@ from __future__ import annotations
 
 from collections import Counter
 import asyncio
+import json
 from typing import Any, Dict, List, Optional
 
 from ..interfaces import HierarchyClassifier as IHierarchyClassifier
@@ -99,7 +100,8 @@ class HierarchyClassifier(IHierarchyClassifier):
                 level1_title=level1_item["title"],
                 level2_titles=level2_titles
             )
-            
+            with open('temp/document_temp/prompt.txt', "w", encoding="utf-8") as f:
+                f.write(prompt["user"])
             # 构建消息列表
             messages = [
                 {"role": "system", "content": prompt["system"]},
@@ -110,7 +112,8 @@ class HierarchyClassifier(IHierarchyClassifier):
         
         # 批量异步调用LLM API
         llm_results = await self.llm_client.batch_call_async(llm_requests)
-        
+        with open('temp/document_temp/llm_results.json', "w", encoding="utf-8") as f:
+            json.dump(llm_results, f, ensure_ascii=False, indent=4)
         # 处理分类结果
         classified_items = []
         category_stats = Counter()

+ 4 - 2
core/construction_review/component/doc_worker/config/prompt.yaml

@@ -6,6 +6,7 @@ toc_classification:
     - 一级目录名称本身是重要的分类依据,即使没有二级目录,也要根据一级目录名称进行分类;
     - 必须从提供的标准类别中选择一个,所有标准类别都是平等的,没有偏好,不能创建新类别;
     - 如果待分类的目录与多个标准类别都相关,选择最匹配的一个;
+    - 注意:其他资料类别有自己标准,要严格符合其他资料类别的标准才能分到这个类别;
 
     - /no_think
   user_template: |
@@ -17,8 +18,9 @@ toc_classification:
     {{ level2_titles }}
 
     分类标准(一级目录名称及其包含的二级目录集合):
+
     {{ classification_standards }}
-    
+    - 十一、非标准项(用于接收不符合前十项类别的目录项)
 
     输出要求(只输出 JSON):
     {
@@ -38,7 +40,7 @@ toc_classification:
     - 施工管理及作业人员配备与分工 -> management
     - 验收要求 -> acceptance
     - 其他资料 -> other
-
+    - 非标准项 -> no_standard
 
 
 

+ 3 - 4
core/construction_review/component/doc_worker/utils/prompt_loader.py

@@ -103,15 +103,14 @@ class PromptLoader:
             level2_count = len(level2_list)
             level2_text = "、".join(level2_list)
             
-            # 将一级目录名称和二级目录集合都包含在分类标准中
-            # 强调:匹配时只看核心标题名称,忽略编号前缀
+            # 简化格式,只包含核心标题和二级目录列表
             if level2_count > 0:
                 standards_lines.append(
-                    f"    - {number_prefix}、{level1}(核心标题名称:{level1};包含的二级目录:{level2_text}等{level2_count}个方面):匹配核心标题「{level1}」,包含{level2_text}等{level2_count}个方面。"
+                    f"    - {number_prefix}、{level1}(包含{level2_text}等{level2_count}个方面)"
                 )
             else:
                 standards_lines.append(
-                    f"    - {number_prefix}、{level1}(核心标题名称:{level1}):匹配核心标题「{level1}」。"
+                    f"    - {number_prefix}、{level1}"
                 )
         
         self._classification_standards = "\n".join(standards_lines)

BIN
core/construction_review/component/reviewers/check_completeness/config/Construction_Plan_Content_Specification.csv


+ 2 - 2
core/construction_review/workflows/ai_review_workflow.py

@@ -490,8 +490,8 @@ class AIReviewWorkflow:
                     state = state,
                     stage_name = state.get("stage_name", "完整性审查")
                 )
-                with open(r"temp\document_temp\4_check_completeness_result.json", "w", encoding="utf-8") as f:
-                    json.dump(check_completeness_result, f, ensure_ascii=False, indent=4)
+                # with open(r"temp\document_temp\4_check_completeness_result.json", "w", encoding="utf-8") as f:
+                #     json.dump(check_completeness_result, f, ensure_ascii=False, indent=4)
 
 
             # # 4. 执行编制依据审查