Kaynağa Gözat

dev:目录审查模块的j接入整体框架;

ChenJiSheng 1 ay önce
ebeveyn
işleme
b5dc39393d
41 değiştirilmiş dosya ile 2914 ekleme ve 21 silme
  1. 78 14
      core/construction_review/component/ai_review_engine.py
  2. 20 2
      core/construction_review/component/document_processor.py
  3. 499 0
      core/construction_review/component/reviewers/catalogues_check/catalogues_check.py
  4. BIN
      core/construction_review/component/reviewers/catalogues_check/config/Construction_Plan_Content_Specification.csv
  5. 33 0
      core/construction_review/component/reviewers/catalogues_check/config/llm_api.yaml
  6. 42 0
      core/construction_review/component/reviewers/catalogues_check/config/prompt.yaml
  7. 34 0
      core/construction_review/component/reviewers/catalogues_check/config/prompts/catalog_check.yaml
  8. 8 0
      core/construction_review/component/reviewers/catalogues_check/config/prompts/catalog_check_chain.yaml
  9. 18 0
      core/construction_review/component/reviewers/catalogues_check/config/prompts/chain_config.yaml
  10. 24 0
      core/construction_review/component/reviewers/catalogues_check/config/prompts/step1_extract.yaml
  11. 24 0
      core/construction_review/component/reviewers/catalogues_check/config/prompts/step2_analyze.yaml
  12. 24 0
      core/construction_review/component/reviewers/catalogues_check/config/prompts/step3_summary.yaml
  13. 186 0
      core/construction_review/component/reviewers/catalogues_check/utils/README.md
  14. 9 0
      core/construction_review/component/reviewers/catalogues_check/utils/__init__.py
  15. 373 0
      core/construction_review/component/reviewers/catalogues_check/utils/redis_utils.py
  16. 4 4
      core/construction_review/component/reviewers/check_completeness/components/result_analyzer.py
  17. 20 0
      core/construction_review/component/reviewers/outline_check.py
  18. 400 0
      core/construction_review/component/reviewers/utils/llm_chain_client/README.md
  19. 36 0
      core/construction_review/component/reviewers/utils/llm_chain_client/__init__.py
  20. 189 0
      core/construction_review/component/reviewers/utils/llm_chain_client/bootstrap.py
  21. 1 0
      core/construction_review/component/reviewers/utils/llm_chain_client/implementations/__init__.py
  22. 4 0
      core/construction_review/component/reviewers/utils/llm_chain_client/implementations/chains/__init__.py
  23. 178 0
      core/construction_review/component/reviewers/utils/llm_chain_client/implementations/chains/async_chain_executor.py
  24. 14 0
      core/construction_review/component/reviewers/utils/llm_chain_client/implementations/clients/__init__.py
  25. 129 0
      core/construction_review/component/reviewers/utils/llm_chain_client/implementations/clients/base_client.py
  26. 22 0
      core/construction_review/component/reviewers/utils/llm_chain_client/implementations/clients/deepseek_client.py
  27. 22 0
      core/construction_review/component/reviewers/utils/llm_chain_client/implementations/clients/doubao_client.py
  28. 22 0
      core/construction_review/component/reviewers/utils/llm_chain_client/implementations/clients/gemini_client.py
  29. 22 0
      core/construction_review/component/reviewers/utils/llm_chain_client/implementations/clients/qwen_client.py
  30. 4 0
      core/construction_review/component/reviewers/utils/llm_chain_client/implementations/loaders/__init__.py
  31. 105 0
      core/construction_review/component/reviewers/utils/llm_chain_client/implementations/loaders/yaml_prompt_loader.py
  32. 6 0
      core/construction_review/component/reviewers/utils/llm_chain_client/interfaces/__init__.py
  33. 46 0
      core/construction_review/component/reviewers/utils/llm_chain_client/interfaces/chain_executor.py
  34. 35 0
      core/construction_review/component/reviewers/utils/llm_chain_client/interfaces/llm_client.py
  35. 62 0
      core/construction_review/component/reviewers/utils/llm_chain_client/interfaces/prompt_loader.py
  36. 68 0
      core/construction_review/component/reviewers/utils/llm_chain_client/main.py
  37. 4 0
      core/construction_review/component/reviewers/utils/llm_chain_client/orchestration/__init__.py
  38. 86 0
      core/construction_review/component/reviewers/utils/llm_chain_client/orchestration/prompt_chain_processor.py
  39. 10 0
      core/construction_review/component/reviewers/utils/llm_chain_client/requirements.txt
  40. 28 1
      core/construction_review/workflows/ai_review_workflow.py
  41. 25 0
      prompts.txt

+ 78 - 14
core/construction_review/component/ai_review_engine.py

@@ -90,10 +90,13 @@ from .reviewers.check_completeness.components.result_saver import ResultSaver
 from .reviewers.check_completeness.components.result_analyzer import ResultAnalyzer
 from .reviewers.check_completeness.utils.file_utils import write_json
 from core.construction_review.component.reviewers.base_reviewer import ReviewResult
-from .reviewers.outline_check import outline_review_results_df
+from .reviewers.outline_check import outline_review_results_df, get_empty_list_keys
 from .reviewers.check_completeness.utils.redis_csv_utils import (
     get_redis_connection,
 )
+from .reviewers.catalogues_check.utils import get_redis_manager
+from .reviewers.catalogues_check.catalogues_check import CatalogCheckProcessor, remove_common_elements_between_dataframes
+
 @dataclass
 class ReviewResult:
     """审查结果"""
@@ -786,11 +789,11 @@ class AIReviewEngine(BaseReviewer):
             analyzer = ResultAnalyzer(str(csv_path))
             processed_results = analyzer.process_results(review_results)
             spec_summary_csv_path = Path('temp') / 'document_temp' / '3_spec_review_summary.csv'
-            summary_rows = analyzer.build_spec_summary(processed_results, spec_summary_csv_path)
+            summary_rows = analyzer.build_spec_summary(processed_results)
             # logger.info(f"  规范覆盖汇总结果已保存至: {spec_summary_csv_path}")
             summary_rows = pd.DataFrame(summary_rows)
             summary_rows = summary_rows[summary_rows['标签'].isin(review_results_flag)]
-            summary_rows.to_csv(str(spec_summary_csv_path), encoding='utf-8-sig', index=False)
+            # summary_rows.to_csv(str(spec_summary_csv_path), encoding='utf-8-sig', index=False)
             summary_rows = summary_rows.to_dict('records')
             # 生成缺失要点 JSON 列表,便于前端消费
 
@@ -1001,19 +1004,80 @@ class AIReviewEngine(BaseReviewer):
                 df = read_from_redis_and_save_csv(self.redis_client, bind_id=trace_id_idx)
             else:
                 df = outline_content
-
+            
             # df = merge_results_by_classification(rows_df)
-            if df is not None:
-                df.to_csv(csv_path, encoding='utf-8-sig', index=False)
-            # 检查 df 是否为 None
-            if df is None:
-                logger.error(f"[大纲审查] Redis中不存在ID '{trace_id_idx}' 的数据,无法进行大纲审查")
-                return {
-                    'outline_review_result': {
-                        "response": [],
-                    },
-                    'error': f'Redis中不存在ID \'{trace_id_idx}\' 的数据'
+            # 兼容新旧字段名
+            review_results_col = 'review_results_summary' if 'review_results_summary' in df.columns else ('merged_review_results' if 'merged_review_results' in df.columns else 'review_result')
+            df['miss_outline'] = df[review_results_col].apply(get_empty_list_keys)
+            
+            # 兼容 chapter_label 字段名
+            chapter_label_col = 'chapter_label' if 'chapter_label' in df.columns else ('section_label_first' if 'section_label_first' in df.columns else 'section_label')
+            df['chapter_label'] = df[chapter_label_col]
+            
+            miss_outline_df = df[['chapter_label', 'chapter_classification', 'miss_outline']]
+            
+            # 从 Redis 查询目录审查结果,使用 chapter_label 作为 title 查询
+            redis_manager = get_redis_manager()
+            chapter_labels = miss_outline_df['chapter_label'].unique().tolist()
+            redis_data = redis_manager.read_catalogues_data_by_chapters(state['callback_task_id'], chapter_labels)
+            path_redis = 'temp/document_temp/redis_data.csv'
+
+            # 去除两个DataFrame中相同chapter_label行的miss_outline列与missing_items列的公共元素
+            miss_outline_df, common_elements_dict = remove_common_elements_between_dataframes(miss_outline_df, redis_data)
+            
+            # 更新 redis_data,使用apply函数去除公共元素
+            redis_data = redis_manager.update_redis_data_with_common_elements_removed(redis_data, common_elements_dict)
+            
+            # 将更新后的数据写回Redis
+            for index, row in redis_data.iterrows():
+                chapter_label = row['chapter_label']
+                # 准备要更新的数据
+                update_data = {
+                    'title': chapter_label,
+                    'chapter_label': chapter_label,
+                    'chapter_classification': row.get('chapter_classification', ''),
+                    'missing_items': row.get('missing_items', []),
+                    'miss_outline': miss_outline_df.loc[miss_outline_df['chapter_label'] == chapter_label, 'miss_outline'].values[0]
+                                   if len(miss_outline_df.loc[miss_outline_df['chapter_label'] == chapter_label]) > 0 else []
                 }
+                # 使用 update_row_by_title 方法更新Redis中的数据
+                update_success = redis_manager.update_row_by_title(state['callback_task_id'], chapter_label, update_data)
+                if update_success:
+                    logger.info(f"[大纲审查] 成功将章节 '{chapter_label}' 的更新数据写回Redis")
+                else:
+                    logger.warning(f"[大纲审查] 未能将章节 '{chapter_label}' 的数据写回Redis")
+            
+            if os.path.exists(path_redis):
+                # 文件已存在,追加时不写表头
+                redis_data.to_csv(path_redis, mode='a', encoding='utf-8-sig', index=False, header=False)
+                # df.to_csv(csv_path, mode='a', encoding='utf-8-sig', index=False, header=False)
+
+            else:
+                # 文件不存在,首次写入时写表头
+                redis_data.to_csv(path_redis, mode='w', encoding='utf-8-sig', index=False, header=True)
+
+
+            # 判断文件是否存在,决定是否写入表头
+            if os.path.exists(csv_path):
+                # 文件已存在,追加时不写表头
+                miss_outline_df.to_csv(csv_path, mode='a', encoding='utf-8-sig', index=False, header=False)
+                # df.to_csv(csv_path, mode='a', encoding='utf-8-sig', index=False, header=False)
+
+            else:
+                # 文件不存在,首次写入时写表头
+                miss_outline_df.to_csv(csv_path, mode='w', encoding='utf-8-sig', index=False, header=True)
+                # df.to_csv(csv_path, mode='w', encoding='utf-8-sig', index=False, header=True)
+            
+            # df['']
+            # 检查 df 是否为 None
+            # if df is None:
+            #     logger.error(f"[大纲审查] Redis中不存在ID '{trace_id_idx}' 的数据,无法进行大纲审查")
+            #     return {
+            #         'outline_review_result': {
+            #             "response": [],
+            #         },
+            #         'error': f'Redis中不存在ID \'{trace_id_idx}\' 的数据'
+            #     }
             
             logger.info(f"[大纲审查] 成功从Redis读取数据,共 {len(df)} 行")
 

+ 20 - 2
core/construction_review/component/document_processor.py

@@ -528,7 +528,10 @@ class DocumentProcessor:
             
             # 如果使用了智能处理,保留额外信息
             if is_smart_processing:
-                result['outline'] = self._create_outline_from_toc(raw_content.get('toc_info', {}))
+                result['outline'] = self._create_outline_from_toc(
+                    raw_content.get('toc_info', {}),
+                    raw_content.get('classification')
+                )
 
             # with open(rf"temp\document_temp\文档切分预处理结果.json", 'w', encoding='utf-8') as f:
             #     json.dump(result, f, ensure_ascii=False, indent=4)
@@ -538,14 +541,16 @@ class DocumentProcessor:
             logger.error(f"内容结构化失败: {str(e)}")
             raise
 
-    def _create_outline_from_toc(self, toc_info: Dict[str, Any]) -> Dict[str, Any]:
+    def _create_outline_from_toc(self, toc_info: Dict[str, Any], classification: Optional[Dict[str, Any]] = None) -> Dict[str, Any]:
         """
         从toc_info创建简化的大纲结构,只包含:
         1. 所有的1级标题(章节目录)
         2. 各个章节的次级目录
+        3. 各个章节的分类信息(chapter_classification)
 
         Args:
             toc_info: doc_worker返回的目录信息
+            classification: 分类信息,包含已分类的目录项
 
         Returns:
             Dict: 简化的大纲数据
@@ -561,16 +566,29 @@ class DocumentProcessor:
             # 提取所有1级标题(章节目录)
             level1_items = [item for item in toc_items if item.get('level') == 1]
 
+            # 构建一级目录标题到分类信息的映射
+            classification_map = {}
+            if classification and 'items' in classification:
+                for item in classification['items']:
+                    if item.get('level') == 1:
+                        title = item.get('title', '')
+                        classification_map[title] = item.get('category_code', '')
+
             chapters = []
             for idx, level1_item in enumerate(level1_items, 1):
                 # 查找当前1级标题下的所有次级目录
                 sub_items = self._find_sub_items(toc_items, level1_item, level1_item)
 
+                # 获取一级目录的分类信息
+                title = level1_item.get('title', '')
+                chapter_classification = classification_map.get(title, '')
+
                 chapter_info = {
                     'index': idx,
                     'title': level1_item['title'],
                     'page': level1_item['page'],
                     'original': level1_item.get('original', level1_item['title']),
+                    'chapter_classification': chapter_classification,  # 一级目录的所属分类
                     'subsections': sub_items  # 次级目录
                 }
                 chapters.append(chapter_info)

+ 499 - 0
core/construction_review/component/reviewers/catalogues_check/catalogues_check.py

@@ -0,0 +1,499 @@
+"""
+目录审查主程序
+使用llm_chain_client模块审查施工方案目录,找出缺失的目录项
+"""
+import asyncio
+import json
+import csv
+import logging
+import ast
+from pathlib import Path
+from typing import Dict, List, Any
+import sys
+from typing import List, Dict, Any, Union
+
+import pandas as pd
+
+from ..utils.llm_chain_client.bootstrap import Bootstrap
+from ..utils.llm_chain_client.orchestration import PromptChainProcessor
+
+# 配置日志
+logging.basicConfig(
+    level=logging.INFO,
+    format='%(asctime)s - %(name)s - %(levelname)s - %(message)s'
+)
+logger = logging.getLogger(__name__)
+
+
+class CatalogCheckProcessor:
+    """目录审查处理器"""
+
+    def __init__(self, processor: PromptChainProcessor):
+        """
+        初始化处理器
+
+        Args:
+            processor: 提示链处理器实例
+        """
+        self.processor = processor
+
+    def load_specifications(self, spec_file: str) -> Dict[str, Dict[str, Any]]:
+        """
+        加载规范要求文件
+
+        Args:
+            spec_file: 规范文件路径
+
+        Returns:
+            按标签分组的规范要求字典
+            {
+                "basis": {
+                    "一级目录": "编制依据",
+                    "二级目录": ["法律法规", "标准规范", ...]
+                },
+                ...
+            }
+        """
+        logger.info(f"加载规范要求文件: {spec_file}")
+
+        specifications = {}
+
+        # 尝试不同的编码读取文件:先尝试 utf-8-sig(支持 BOM),然后尝试 utf-16,最后尝试中文编码(GBK/GB2312/GB18030)
+        encodings = ['utf-8-sig', 'utf-16', 'gbk', 'gb2312', 'gb18030']
+        content = None
+        used_encoding = None
+
+        for encoding in encodings:
+            try:
+                with open(spec_file, 'r', encoding=encoding) as f:
+                    content = f.read()
+                used_encoding = encoding
+                logger.info(f"成功使用 {encoding} 编码读取文件")
+                break
+            except UnicodeDecodeError:
+                continue
+
+        if content is None:
+            raise ValueError(f"无法使用常见编码读取文件: {spec_file}")
+
+        # 解析CSV内容(使用制表符作为分隔符)
+        lines = content.strip().split('\n')
+        reader = csv.reader(lines, delimiter='\t')
+
+        # 跳过标题行
+        next(reader, None)
+
+        for row in reader:
+            if len(row) >= 3:
+                label = row[0].strip()
+                primary_dir = row[1].strip()
+                secondary_dir = row[2].strip()
+
+                if label not in specifications:
+                    specifications[label] = {
+                        "一级目录": primary_dir,
+                        "二级目录": []
+                    }
+
+                # 避免重复添加
+                if secondary_dir not in specifications[label]["二级目录"]:
+                    specifications[label]["二级目录"].append(secondary_dir)
+
+        logger.info(f"加载规范要求完成,共 {len(specifications)} 个标签")
+        return specifications
+
+    def load_catalog_data(self, csv_file: str) -> List[Dict[str, Any]]:
+        """
+        加载待审查目录数据
+
+        Args:
+            csv_file: CSV文件路径
+
+        Returns:
+            目录数据列表
+        """
+        logger.info(f"加载待审查目录文件: {csv_file}")
+
+        catalog_data = []
+
+        with open(csv_file, 'r', encoding='utf-8-sig') as f:
+            reader = csv.DictReader(f)
+
+            for row in reader:
+                # 解析subsections列(Python字典字符串,使用单引号)
+                subsections_str = row.get('subsections', '[]')
+                try:
+                    # 尝试使用 ast.literal_eval 解析(支持单引号)
+                    subsections = ast.literal_eval(subsections_str)
+                    # 确保结果是列表
+                    if not isinstance(subsections, list):
+                        subsections = []
+                except (ValueError, SyntaxError):
+                    # 如果解析失败,尝试使用 json.loads(需要双引号)
+                    try:
+                        subsections = json.loads(subsections_str)
+                    except json.JSONDecodeError:
+                        subsections = []
+
+                catalog_data.append({
+                    'index': row.get('index', ''),
+                    'title': row.get('title', ''),
+                    'page': row.get('page', ''),
+                    'chapter_classification': row.get('chapter_classification', ''),
+                    'subsections': subsections
+                })
+
+        logger.info(f"加载待审查目录完成,共 {len(catalog_data)} 个章节")
+        return catalog_data
+
+    def build_requirements_text(self, spec: Dict[str, Any]) -> str:
+        """
+        构造规范要求文本
+
+        Args:
+            spec: 规范要求字典
+
+        Returns:
+            规范要求文本
+            例如: "编制依据章节应包含1.法律法规、2.标准规范、3.文件制度等方面的内容"
+        """
+        primary_dir = spec["一级目录"]
+        secondary_dirs = spec["二级目录"]
+
+        # 构造二级目录列表,带序号
+        secondary_list = [
+            f"{i+1}.{item}"
+            for i, item in enumerate(secondary_dirs)
+        ]
+
+        # 用顿号连接
+        secondary_text = "、".join(secondary_list)
+
+        return f"{primary_dir}章节应包含{secondary_text}等方面的内容"
+
+    def build_catalog_content_text(self, subsections: List[Dict[str, Any]]) -> str:
+        """
+        构造待审查目录文本
+
+        Args:
+            subsections: 二级目录项列表
+
+        Returns:
+            目录文本
+        """
+        if not subsections:
+            return "待审查目录为空"
+
+        titles = [item.get('title', '') for item in subsections]
+        return f"待审查目录包含:{'、'.join(titles)}"
+
+    async def check_catalog(
+        self,
+        chapter_title: str,
+        catalog_content: str,
+        requirements: str
+    ) -> str:
+        """
+        检查目录,找出缺失的目录项
+
+        Args:
+            chapter_title: 章节标题
+            catalog_content: 待审查目录内容
+            requirements: 规范要求
+
+        Returns:
+            缺失的目录项序号(如:"3,5" 或 "无缺失")
+        """
+        # 准备输入数据
+        input_data = {
+            "chapter_title": chapter_title,
+            "catalog_content": catalog_content,
+            "requirements": requirements
+        }
+
+        # 执行提示链 - 使用绝对路径
+        current_dir = Path(__file__).parent
+        chain_config_path = str(current_dir / "config" / "prompts" / "catalog_check_chain.yaml")
+
+        try:
+            result = await self.processor.process(
+                chain_config_path=chain_config_path,
+                input_data=input_data
+            )
+
+            # 获取最终结果
+            missing_items = result.get("final_result", "")
+            return missing_items
+
+        except Exception as e:
+            logger.error(f"目录检查失败: {e}")
+            return f"检查失败: {str(e)}"
+
+    async def process_all_catalogs(
+        self,
+        spec_file: str | Any,
+        catalog_file: str | Any
+    ) -> List[Dict[str, Any]]:
+        """
+        处理所有章节的目录审查
+
+        Args:
+            spec_file: 规范要求文件路径
+            catalog_file: 待审查目录文件路径
+
+        Returns:
+            审查结果列表
+        """
+        # 加载数据
+        if type(spec_file) == str:
+            specifications = self.load_specifications(spec_file)
+        else:
+            specifications = spec_file
+        
+        if type(catalog_file) == str:
+            catalog_data = self.load_catalog_data(catalog_file)
+        else:
+            # catalog_file 是列表(如 original_outline),转换为 DataFrame
+            catalog_data = pd.DataFrame(catalog_file)
+            # 确保数据格式正确,转换为字典列表
+            catalog_data = catalog_data.to_dict('records')
+
+        results = []
+
+        for i, catalog in enumerate(catalog_data):
+            logger.info(f"处理第 {i+1}/{len(catalog_data)} 个章节: {catalog['title']}")
+
+            # 获取章节分类
+            label = catalog.get('chapter_classification', '')
+
+            # 查找规范要求
+            if label not in specifications:
+                logger.warning(f"未找到标签 '{label}' 的规范要求")
+                results.append({
+                    'index': catalog['index'],
+                    'title': catalog['title'],
+                    'chapter_classification': label,
+                    'missing_items': f"未找到标签 '{label}' 的规范要求"
+                })
+                continue
+
+            spec = specifications[label]
+
+            # 构造规范要求文本
+            requirements = self.build_requirements_text(spec)
+
+            # 构造待审查目录文本
+            catalog_content = self.build_catalog_content_text(catalog['subsections'])
+
+            # 检查目录
+            missing_items = await self.check_catalog(
+                chapter_title=catalog['title'],
+                catalog_content=catalog_content,
+                requirements=requirements
+            )
+
+            # 记录结果
+            results.append({
+                'index': catalog['index'],
+                'title': catalog['title'],
+                'chapter_classification': label,
+                'missing_items': missing_items
+            })
+
+            logger.info(f"审查结果: {missing_items}")
+
+        # 将缺失项的数字替换为对应的项名称
+        results = self._replace_missing_numbers_with_names(results, specifications)
+
+        return results
+
+    def _replace_missing_numbers_with_names(
+        self,
+        results: List[Dict[str, Any]],
+        specifications: Dict[str, Dict[str, Any]]
+    ) -> List[Dict[str, Any]]:
+        """
+        将缺失项的数字替换为对应的项名称
+
+        Args:
+            results: 审查结果列表
+            specifications: 规范要求字典
+
+        Returns:
+            替换后的结果列表
+        """
+        for result in results:
+            label = result.get('chapter_classification', '')
+            missing_items_str = result.get('missing_items', '')
+
+            # 如果没有缺失项,跳过
+            if not missing_items_str or missing_items_str == '无缺失':
+                result['missing_items'] = json.dumps([], ensure_ascii=False)
+                continue
+
+            # 获取对应的规范
+            if label not in specifications:
+                logger.warning(f"未找到标签 '{label}' 的规范要求,无法替换缺失项")
+                continue
+
+            spec = specifications[label]
+            secondary_dirs = spec.get('二级目录', [])
+
+            # 解析缺失项数字
+            try:
+                missing_numbers = [int(x.strip()) for x in missing_items_str.split(',')]
+            except (ValueError, AttributeError):
+                logger.warning(f"无法解析缺失项: {missing_items_str}")
+                continue
+
+            # 将数字替换为对应的项名称
+            missing_names = []
+            for num in missing_numbers:
+                if 1 <= num <= len(secondary_dirs):
+                    missing_names.append(secondary_dirs[num - 1])
+                else:
+                    logger.warning(f"缺失项编号 {num} 超出范围,标签 '{label}' 只有 {len(secondary_dirs)} 项")
+
+            # 更新结果,保存为JSON列表字符串
+            result['missing_items'] = json.dumps(missing_names, ensure_ascii=False)
+
+        return results
+
+    def save_results(self, results: List[Dict[str, Any]], output_file: str):
+        """
+        保存审查结果
+
+        Args:
+            results: 审查结果列表
+            output_file: 输出文件路径
+        """
+        logger.info(f"保存审查结果到: {output_file}")
+
+        with open(output_file, 'w', encoding='utf-8-sig', newline='') as f:
+            writer = csv.DictWriter(f, fieldnames=['index', 'title', 'chapter_classification', 'missing_items'])
+            writer.writeheader()
+            writer.writerows(results)
+
+        logger.info(f"审查结果已保存,共 {len(results)} 条记录")
+
+
+def remove_common_elements_between_dataframes(
+    miss_outline_df: pd.DataFrame,
+    redis_data: pd.DataFrame
+) -> tuple[pd.DataFrame, Dict[str, set]]:
+    """
+    去除两个DataFrame中相同chapter_label行的miss_outline列与missing_items列的公共元素
+
+    Args:
+        miss_outline_df: 包含miss_outline列的DataFrame
+        redis_data: 包含missing_items列的DataFrame
+
+    Returns:
+        tuple: (更新后的miss_outline_df, 公共元素字典)
+    """
+    # 合并两个DataFrame,基于chapter_label
+    merged_df = pd.merge(miss_outline_df, redis_data, on='chapter_label', how='inner', suffixes=('_outline', '_redis'))
+    
+    # 创建一个字典来存储公共元素
+    common_elements_dict = {}
+    
+    # 遍历合并后的DataFrame,计算公共元素
+    for index, row in merged_df.iterrows():
+        chapter_label = row['chapter_label']
+        miss_outline_list = row['miss_outline']
+        missing_items_list = row['missing_items']
+        
+        # 确保列表类型正确
+        if not isinstance(miss_outline_list, list):
+            miss_outline_list = []
+        if not isinstance(missing_items_list, list):
+            missing_items_list = []
+        
+        # 转换为集合以便计算差集
+        miss_outline_set = set(miss_outline_list)
+        missing_items_set = set(missing_items_list)
+        
+        # 计算公共元素
+        common_elements = miss_outline_set & missing_items_set
+        
+        # 存储公共元素
+        common_elements_dict[chapter_label] = common_elements
+        
+        logger.info(f"[目录审查] 章节: {chapter_label}, 公共元素: {common_elements}")
+    
+    # 更新 miss_outline_df,使用apply函数去除公共元素
+    miss_outline_df['miss_outline'] = miss_outline_df.apply(
+        lambda row: list(set(row['miss_outline']) - common_elements_dict.get(row['chapter_label'], set()))
+        if isinstance(row['miss_outline'], list) else [],
+        axis=1
+    )
+    
+    logger.info(f"[目录审查] 已去除公共元素,更新后的miss_outline_df: {miss_outline_df.to_dict('records')}")
+    
+    return miss_outline_df, common_elements_dict
+
+
+async def catalogues_check(catalog_file = None):
+    """主函数"""
+    # 获取当前文件所在目录
+    current_dir = Path(__file__).parent
+    
+    # 创建提示链处理器
+    processor = Bootstrap.create_processor(
+        model_type=None,  # 从配置文件读取
+        prompts_dir=str(current_dir / "config" / "prompts"),
+        config_path=str(current_dir / "config" / "llm_api.yaml")
+    )
+
+    # 创建目录审查处理器
+    catalog_processor = CatalogCheckProcessor(processor)
+
+    # 定义文件路径
+    spec_file = str(current_dir / "config" / "Construction_Plan_Content_Specification.csv")
+
+    # 处理所有章节
+    results = await catalog_processor.process_all_catalogs(
+        spec_file=spec_file,
+        catalog_file=catalog_file
+    )
+    return results
+
+async def main():
+    """主函数"""
+    # 获取当前文件所在目录
+    current_dir = Path(__file__).parent
+    
+    # 创建提示链处理器
+    processor = Bootstrap.create_processor(
+        model_type=None,  # 从配置文件读取
+        prompts_dir=str(current_dir / "config" / "prompts"),
+        config_path=str(current_dir / "config" / "llm_api.yaml")
+    )
+
+    # 创建目录审查处理器
+    catalog_processor = CatalogCheckProcessor(processor)
+
+    # 定义文件路径
+    spec_file = str(current_dir / "config" / "Construction_Plan_Content_Specification.csv")
+    catalog_file = "文档切分预处理结果.csv"
+    output_file = "catalog_check_results.csv"
+
+    # 处理所有章节
+    results = await catalog_processor.process_all_catalogs(
+        spec_file=spec_file,
+        catalog_file=catalog_file
+    )
+
+    # 保存结果
+    catalog_processor.save_results(results, output_file)
+
+    # 打印摘要
+    logger.info("=" * 50)
+    logger.info("目录审查完成")
+    logger.info(f"共处理 {len(results)} 个章节")
+    logger.info(f"结果已保存到: {output_file}")
+    logger.info("=" * 50)
+
+
+if __name__ == "__main__":
+    asyncio.run(main())

BIN
core/construction_review/component/reviewers/catalogues_check/config/Construction_Plan_Content_Specification.csv


+ 33 - 0
core/construction_review/component/reviewers/catalogues_check/config/llm_api.yaml

@@ -0,0 +1,33 @@
+MODEL_TYPE: qwen
+
+gemini:
+  GEMINI_SERVER_URL: https://generativelanguage.googleapis.com/v1beta/openai/
+  GEMINI_MODEL_ID: gemini-2.0-flash
+  GEMINI_API_KEY: YOUR_GEMINI_API_KEY_FOR_RAG_EVAL
+
+deepseek:
+  DEEPSEEK_SERVER_URL: https://api.deepseek.com
+  DEEPSEEK_MODEL_ID: deepseek-chat
+  DEEPSEEK_API_KEY: YOUR_DEEPSEEK_API_KEY_FOR_RAG_EVAL
+
+doubao:
+  DOUBAO_SERVER_URL: https://ark.cn-beijing.volces.com/api/v3/
+  DOUBAO_MODEL_ID: doubao-seed-1-6-flash-250715
+  DOUBAO_API_KEY: YOUR_DOUBAO_API_KEY_FOR_RAG_EVAL
+
+qwen:
+  QWEN_SERVER_URL: http://192.168.91.253:8003/v1/
+  QWEN_MODEL_ID: qwen3-30b
+  QWEN_API_KEY: sk-123456
+  # QWEN_SERVER_URL: http://192.168.91.253:9002/v1/
+  # QWEN_MODEL_ID: Qwen3-8B
+  # QWEN_API_KEY: sk-123456
+
+keywords:
+  timeout: 30
+  max_retries: 2
+  concurrent_workers: 20
+  stream: false
+  request_payload:
+    temperature: 0.3
+    max_tokens: 1024

+ 42 - 0
core/construction_review/component/reviewers/catalogues_check/config/prompt.yaml

@@ -0,0 +1,42 @@
+content_review:
+  system: |
+    你是一名工程与施工领域的专业文档审查专家,负责审查施工方案文档的内容完整性。
+    - 仔细分析待审查文本内容,识别文本中实际包含的审查要点;
+    - 对于每个二级目录,检查文本中包含了哪些编号的要点,将这些编号记录在列表中;
+    - 如果某个二级目录的要点一个都没有包含,则返回空列表[];
+    - 判断要严格但合理,如果文本内容能够满足要点的核心要求,即使表述方式不同,也应判定为已包含;
+    - 每个二级目录的要点编号必须严格在给定范围内(例如“只允许使用编号1~2”),严禁编造超出范围的编号(如3、4、5等);
+    - 不得跳过不存在的编号,也不得添加规范中未定义的额外要点编号;
+    - 只输出JSON格式,不要添加任何解释性文字;
+
+    - /no_think
+  user_template: |
+    任务:审查施工方案文档内容,识别文本中实际包含的审查要点。
+
+    待审查文本内容:
+    {{ content }}
+
+    审查要点要求:
+    {{ requirements }}
+
+    输出格式:必须严格按照以下JSON格式输出审查结果:
+    {
+      "二级目录名称1": [要点编号列表,如: [1, 2]],
+      "二级目录名称2": [要点编号列表,如: [1]],
+      "二级目录名称3": []
+    }
+    
+    说明:
+    - JSON对象的字段名必须是二级目录名称(如"法律法规"、"标准规范"等);
+    - 每个字段的值是一个整数数组,表示文本中包含的要点编号;
+    - 每个二级目录的要点编号必须在对应说明中给出的范围之内(例如“只允许使用编号1~2”时,只能使用1或2),不能发明更大的编号;
+    - 如果某个二级目录的要点一个都没有包含,该字段的值应为空数组[];
+    - 只输出JSON对象,不要添加任何解释性文字。
+
+
+
+
+
+
+
+

+ 34 - 0
core/construction_review/component/reviewers/catalogues_check/config/prompts/catalog_check.yaml

@@ -0,0 +1,34 @@
+# 目录审查提示词模板
+name: "目录审查"
+description: "根据规范要求审查施工方案目录,找出缺失的目录项"
+
+system: |
+  你是一名专业的施工方案文档审查专家,负责审查施工方案目录的完整性。
+  - 仔细分析待审查的目录内容,识别其中实际包含的二级目录项;
+  - 根据规范要求,对比待审查目录与规范要求的差异;
+  - 找出规范要求中存在但待审查目录中缺失的二级目录项;
+  - 只输出缺失项的序号数字,不要添加任何解释性文字;
+  - 如果所有规范要求的目录项都已包含,则输出"无缺失";
+  - 严格基于提供的规范要求进行判断,不要添加额外的要求。
+
+  - /no_think
+
+user_template: |
+  任务:审查施工方案目录,找出缺失的目录项。
+
+  待审查章节标题:
+  {{ chapter_title }}
+
+  待审查目录内容:
+  {{ catalog_content }}
+
+  规范要求:
+  {{ requirements }}
+
+  输出格式:
+  - 只输出缺失的目录项序号数字,多个数字用逗号分隔(如:3,5)
+  - 如果所有规范要求的目录项都已包含,则输出"无缺失"
+  - 不要添加任何解释性文字
+
+output_parser:
+  type: "text"

+ 8 - 0
core/construction_review/component/reviewers/catalogues_check/config/prompts/catalog_check_chain.yaml

@@ -0,0 +1,8 @@
+# 目录审查提示链配置文件
+chain_name: "目录审查链"
+description: "根据规范要求审查施工方案目录,找出缺失的目录项"
+
+steps:
+  - name: "catalog_check"
+    prompt_file: "catalog_check"
+    output_key: "missing_items"

+ 18 - 0
core/construction_review/component/reviewers/catalogues_check/config/prompts/chain_config.yaml

@@ -0,0 +1,18 @@
+# 提示链配置文件
+chain_name: "文档处理链"
+description: "处理文档的完整流程:提取、分析、总结"
+
+steps:
+  - name: "step1_extract"
+    prompt_file: "step1_extract.yaml"
+    output_key: "extracted_data"
+
+  - name: "step2_analyze"
+    prompt_file: "step2_analyze.yaml"
+    input_from: "extracted_data"
+    output_key: "analysis_result"
+
+  - name: "step3_summary"
+    prompt_file: "step3_summary.yaml"
+    input_from: "analysis_result"
+    output_key: "final_summary"

+ 24 - 0
core/construction_review/component/reviewers/catalogues_check/config/prompts/step1_extract.yaml

@@ -0,0 +1,24 @@
+# 第一步:提取关键信息
+name: "提取关键信息"
+description: "从文本中提取关键信息"
+
+system: |
+  你是一名专业的信息提取专家,负责从文本中提取关键信息。
+
+user_template: |
+  请从以下文本中提取关键信息:
+
+  文本内容:
+  {{ content }}
+
+  请提取以下字段:
+  - title: 文本标题或主题
+  - summary: 文本摘要(不超过100字)
+  - keywords: 关键词列表(3-5个)
+  - category: 文本分类
+
+  输出格式为JSON。
+
+output_parser:
+  type: "json"
+  required_fields: ["title", "summary", "keywords", "category"]

+ 24 - 0
core/construction_review/component/reviewers/catalogues_check/config/prompts/step2_analyze.yaml

@@ -0,0 +1,24 @@
+# 第二步:分析内容
+name: "分析内容"
+description: "对提取的信息进行深度分析"
+
+system: |
+  你是一名专业的文本分析师,负责对文本信息进行深度分析和评估。
+
+user_template: |
+  请对以下提取的信息进行深度分析:
+
+  提取的信息:
+  {{ input }}
+
+  请分析以下方面:
+  - relevance: 内容相关性评分(1-10分)
+  - quality: 内容质量评分(1-10分)
+  - insights: 关键洞察(3-5条)
+  - suggestions: 改进建议(2-3条)
+
+  输出格式为JSON。
+
+output_parser:
+  type: "json"
+  required_fields: ["relevance", "quality", "insights", "suggestions"]

+ 24 - 0
core/construction_review/component/reviewers/catalogues_check/config/prompts/step3_summary.yaml

@@ -0,0 +1,24 @@
+# 第三步:生成总结
+name: "生成总结"
+description: "基于分析结果生成最终总结"
+
+system: |
+  你是一名专业的总结专家,负责基于分析结果生成清晰、简洁的总结报告。
+
+user_template: |
+  请基于以下分析结果生成最终总结:
+
+  分析结果:
+  {{ input }}
+
+  请生成包含以下内容的总结:
+  - executive_summary: 执行摘要(50-100字)
+  - key_findings: 关键发现(3-5条)
+  - overall_score: 综合评分(1-10分)
+  - recommendation: 最终建议
+
+  输出格式为JSON。
+
+output_parser:
+  type: "json"
+  required_fields: ["executive_summary", "key_findings", "overall_score", "recommendation"]

+ 186 - 0
core/construction_review/component/reviewers/catalogues_check/utils/README.md

@@ -0,0 +1,186 @@
+# Redis 目录审查结果处理器使用说明
+
+## 功能特性
+
+1. **动态任务 ID 支持** - 使用 `callback_task_id` 作为 Redis key
+2. **按 title 筛选读取** - 直接使用 title 作为 Redis Hash field,O(1) 查询
+3. **单行更新覆盖** - 支持按 row_index 或 title 更新单行数据
+
+## 快速开始
+
+### 1. 导入模块
+
+```python
+from core.construction_review.component.reviewers.catalogues_check.utils import get_redis_manager
+import pandas as pd
+```
+
+### 2. 获取 Redis 管理器
+
+```python
+redis_manager = get_redis_manager()
+```
+
+### 3. 存储 DataFrame 数据
+
+```python
+# 在 ai_review_workflow.py 中使用
+outline_results = await catalogues_check(original_outline)
+outline_results = pd.DataFrame(outline_results)
+
+# 存储到 Redis,使用 callback_task_id 作为任务 ID
+task_id = self.task_info.callback_task_id
+redis_manager.store_dataframe(outline_results, task_id)
+```
+
+### 4. 读取所有数据
+
+```python
+# 读取所有数据
+df = redis_manager.read_all(task_id)
+print(df)
+```
+
+### 5. 按 title 筛选读取单行
+
+```python
+# 读取单行数据
+row = redis_manager.read_by_title(task_id, "第一章编制依据")
+print(row)
+```
+
+### 6. 按 title 筛选读取多行
+
+```python
+# 读取多行数据
+titles = ["第一章编制依据", "第二章工程概况"]
+df = redis_manager.read_by_titles(task_id, titles)
+print(df)
+```
+
+### 7. 按行号更新单行
+
+```python
+# 更新第 1 行数据
+new_data = {
+    "index": 1,
+    "title": "第一章编制依据",
+    "chapter_classification": "basis",
+    "missing_items": '["法律法规", "标准规范", "新增项"]'
+}
+redis_manager.update_row_by_index(task_id, 1, new_data)
+```
+
+### 8. 按 title 更新单行
+
+```python
+# 按 title 更新数据
+new_data = {
+    "index": 1,
+    "title": "第一章编制依据",
+    "chapter_classification": "basis",
+    "missing_items": '["法律法规", "标准规范", "更新项"]'
+}
+redis_manager.update_row_by_title(task_id, "第一章编制依据", new_data)
+```
+
+### 9. 删除任务数据
+
+```python
+# 删除指定任务的所有数据
+redis_manager.delete_task_data(task_id)
+```
+
+### 10. 检查数据是否存在
+
+```python
+# 检查数据是否存在
+if redis_manager.exists(task_id):
+    print("数据存在")
+else:
+    print("数据不存在")
+```
+
+### 11. 获取行数
+
+```python
+# 获取行数
+count = redis_manager.get_row_count(task_id)
+print(f"共 {count} 行数据")
+```
+
+## Redis 数据结构
+
+### 数据存储结构(直接使用 title 作为 field)
+```
+catalogues_check:{task_id}:data (Hash)
+├── "第一章编制依据": {"index": 1, "title": "第一章编制依据", "chapter_classification": "basis", "missing_items": "..."}
+├── "第二章工程概况": {"index": 2, "title": "第二章工程概况", "chapter_classification": "overview", "missing_items": "..."}
+├── "第三章施工计划": {"index": 3, "title": "第三章施工计划", ...}
+├── ...
+└── "_row_count": 12
+```
+
+**优势**:
+- 直接使用 title 作为 field key,无需额外索引
+- 按 title 读取和更新都是 O(1) 时间复杂度
+- 数据结构更简洁直观
+
+## 完整示例
+
+```python
+import asyncio
+import pandas as pd
+from core.construction_review.component.reviewers.catalogues_check.utils import get_redis_manager
+
+async def example_usage():
+    # 获取 Redis 管理器
+    redis_manager = get_redis_manager()
+
+    # 模拟数据
+    data = [
+        {"index": 1, "title": "第一章编制依据", "chapter_classification": "basis", "missing_items": '["法律法规", "标准规范"]'},
+        {"index": 2, "title": "第二章工程概况", "chapter_classification": "overview", "missing_items": '["设计概况"]'},
+    ]
+    df = pd.DataFrame(data)
+
+    # 存储数据
+    task_id = "test-task-123"
+    redis_manager.store_dataframe(df, task_id)
+    print(f"存储了 {redis_manager.get_row_count(task_id)} 行数据")
+
+    # 读取所有数据
+    print("\n所有数据:")
+    print(redis_manager.read_all(task_id))
+
+    # 按 title 读取单行
+    print("\n按 title 读取:")
+    print(redis_manager.read_by_title(task_id, "第一章编制依据"))
+
+    # 更新数据
+    print("\n更新前:")
+    print(redis_manager.read_by_title(task_id, "第一章编制依据"))
+
+    redis_manager.update_row_by_title(task_id, "第一章编制依据", {
+        "index": 1,
+        "title": "第一章编制依据",
+        "chapter_classification": "basis",
+        "missing_items": '["法律法规", "标准规范", "新增项"]'
+    })
+
+    print("\n更新后:")
+    print(redis_manager.read_by_title(task_id, "第一章编制依据"))
+
+    # 清理数据
+    redis_manager.delete_task_data(task_id)
+
+if __name__ == "__main__":
+    asyncio.run(example_usage())
+```
+
+## 注意事项
+
+1. Redis 配置从 `config/config.ini` 文件中读取
+2. 使用单例模式管理 Redis 连接,避免重复创建连接
+3. 直接使用 title 作为 field key,按 title 读取和更新都是 O(1) 时间复杂度
+4. 所有操作都有详细的日志记录

+ 9 - 0
core/construction_review/component/reviewers/catalogues_check/utils/__init__.py

@@ -0,0 +1,9 @@
+#!/usr/bin/env python
+# -*- coding: utf-8 -*-
+"""
+catalogues_check 模块工具包
+"""
+
+from .redis_utils import CataloguesRedisManager, get_redis_manager
+
+__all__ = ['CataloguesRedisManager', 'get_redis_manager']

+ 373 - 0
core/construction_review/component/reviewers/catalogues_check/utils/redis_utils.py

@@ -0,0 +1,373 @@
+#!/usr/bin/env python
+# -*- coding: utf-8 -*-
+"""
+Redis 目录审查结果处理器
+功能:专门为 catalogues_check 模块设计的 Redis 读写工具
+支持:
+1. 动态任务 ID 支持 - 使用 callback_task_id 作为 Redis key
+2. 按 title 筛选读取 - 建立 title 到 row_index 的映射索引
+3. 单行更新覆盖 - 支持按 row_index 或 title 更新单行数据
+"""
+
+import json
+import pandas as pd
+import redis
+import configparser
+import os
+import ast
+from typing import Optional, Dict, Any, List, Union
+
+from foundation.observability.logger.loggering import server_logger as logger
+
+
+class CataloguesRedisManager:
+    """目录审查结果 Redis 管理器"""
+
+    def __init__(self):
+        """初始化 Redis 配置"""
+        self._load_redis_config()
+        self._redis_client = None
+
+    def _load_redis_config(self):
+        """从 config.ini 读取 Redis 配置"""
+        config = configparser.ConfigParser()
+        config_path = os.path.join(os.path.dirname(__file__), '../../../../../../config/config.ini')
+        config.read(config_path, encoding='utf-8')
+
+        self.REDIS_HOST = config.get('redis', 'REDIS_HOST', fallback='localhost')
+        self.REDIS_PORT = config.getint('redis', 'REDIS_PORT', fallback=6379)
+        self.REDIS_PASSWORD = config.get('redis', 'REDIS_PASSWORD', fallback='')
+        self.REDIS_DB = config.getint('redis', 'REDIS_DB', fallback=0)
+
+    def get_redis_client(self):
+        """获取 Redis 客户端(单例模式)"""
+        if self._redis_client is None:
+            try:
+                self._redis_client = redis.Redis(
+                    host=self.REDIS_HOST,
+                    port=self.REDIS_PORT,
+                    password=self.REDIS_PASSWORD,
+                    db=self.REDIS_DB,
+                    decode_responses=True
+                )
+                # 测试连接
+                self._redis_client.ping()
+                logger.info(f"Redis 连接成功: {self.REDIS_HOST}:{self.REDIS_PORT}")
+            except Exception as e:
+                logger.error(f"Redis 连接失败: {e}")
+                raise
+        return self._redis_client
+
+    def _get_data_key(self, task_id: str) -> str:
+        """获取数据存储的 Redis key(直接使用 title 作为 field)"""
+        return f"catalogues_check:{task_id}:data"
+
+    def store_dataframe(self, df: pd.DataFrame, task_id: str) -> int:
+        """
+        将 DataFrame 存入 Redis(直接使用 title 作为 field key)
+
+        Args:
+            df: 要存储的 DataFrame
+            task_id: 任务 ID(callback_task_id)
+
+        Returns:
+            int: 存储的行数
+        """
+        redis_client = self.get_redis_client()
+        data_key = self._get_data_key(task_id)
+
+        logger.info(f"开始存储目录审查结果到 Redis,task_id: {task_id}")
+
+        # 清空旧数据
+        redis_client.delete(data_key)
+
+        # 转换为字典列表
+        rows = df.to_dict('records')
+
+        # 存储数据,直接使用 title 作为 field key
+        for row in rows:
+            title = row.get('title', '')
+            if title:
+                row_json = json.dumps(row, ensure_ascii=False)
+                redis_client.hset(data_key, title, row_json)
+
+        # 存储行数
+        redis_client.hset(data_key, "_row_count", len(rows))
+
+        logger.info(f"数据存储完成,共 {len(rows)} 行")
+        return len(rows)
+
+    def read_all(self, task_id: str) -> pd.DataFrame:
+        """
+        读取所有数据
+
+        Args:
+            task_id: 任务 ID
+
+        Returns:
+            pd.DataFrame: 包含所有数据的 DataFrame,如果不存在则返回空 DataFrame
+        """
+        redis_client = self.get_redis_client()
+        data_key = self._get_data_key(task_id)
+
+        logger.info(f"从 Redis 读取所有数据,task_id: {task_id}")
+
+        # 检查数据是否存在
+        if not redis_client.exists(data_key):
+            logger.warning(f"Redis 中不存在 task_id '{task_id}' 的数据,返回空 DataFrame")
+            return pd.DataFrame()
+
+        # 获取所有字段(排除 _row_count)
+        all_fields = redis_client.hkeys(data_key)
+        fields = [f for f in all_fields if f != "_row_count"]
+
+        # 读取所有行数据
+        rows = []
+        for title in fields:
+            row_json = redis_client.hget(data_key, title)
+            if row_json:
+                row = json.loads(row_json)
+                rows.append(row)
+
+        logger.info(f"读取完成,共 {len(rows)} 行")
+        return pd.DataFrame(rows)
+
+    def read_by_title(self, task_id: str, title: str) -> Optional[Dict[str, Any]]:
+        """
+        按 title 字段筛选读取单行数据
+
+        Args:
+            task_id: 任务 ID
+            title: 要筛选的 title 值
+
+        Returns:
+            Dict[str, Any]: 匹配的行数据,如果未找到则返回 None
+        """
+        redis_client = self.get_redis_client()
+        data_key = self._get_data_key(task_id)
+
+        logger.info(f"按 title 读取数据,task_id: {task_id}, title: {title}")
+
+        # 直接使用 title 作为 field key 读取
+        row_json = redis_client.hget(data_key, title)
+        if row_json:
+            return json.loads(row_json)
+
+        logger.warning(f"未找到 title '{title}' 的数据")
+        return None
+
+    def read_by_titles(self, task_id: str, titles: List[str]) -> pd.DataFrame:
+        """
+        按 title 字段筛选读取多行数据
+
+        Args:
+            task_id: 任务 ID
+            titles: 要筛选的 title 列表
+
+        Returns:
+            pd.DataFrame: 包含匹配行的 DataFrame
+        """
+        rows = []
+        for title in titles:
+            row = self.read_by_title(task_id, title)
+            if row:
+                rows.append(row)
+
+        logger.info(f"按 titles 读取完成,找到 {len(rows)} 行")
+        return pd.DataFrame(rows)
+
+    def update_row_by_index(self, task_id: str, row_index: int, row_data: Dict[str, Any]) -> bool:
+        """
+        按行号更新单行数据(需要先读取所有数据找到对应行)
+
+        Args:
+            task_id: 任务 ID
+            row_index: 行号(从1开始)
+            row_data: 新的行数据
+
+        Returns:
+            bool: 更新是否成功
+        """
+        redis_client = self.get_redis_client()
+        data_key = self._get_data_key(task_id)
+
+        logger.info(f"按行号更新数据,task_id: {task_id}, row_index: {row_index}")
+
+        # 读取所有数据
+        all_fields = redis_client.hkeys(data_key)
+        fields = [f for f in all_fields if f != "_row_count"]
+
+        if row_index > len(fields) or row_index < 1:
+            logger.warning(f"行号 {row_index} 超出范围")
+            return False
+
+        # 获取对应行的 title
+        title = fields[row_index - 1]
+
+        # 更新数据
+        row_json = json.dumps(row_data, ensure_ascii=False)
+        redis_client.hset(data_key, title, row_json)
+
+        logger.info(f"行号 {row_index} (title: {title}) 更新成功")
+        return True
+
+    def update_row_by_title(self, task_id: str, title: str, row_data: Dict[str, Any]) -> bool:
+        """
+        按 title 字段更新单行数据
+
+        Args:
+            task_id: 任务 ID
+            title: 要更新的 title 值
+            row_data: 新的行数据
+
+        Returns:
+            bool: 更新是否成功
+        """
+        redis_client = self.get_redis_client()
+        data_key = self._get_data_key(task_id)
+
+        logger.info(f"按 title 更新数据,task_id: {task_id}, title: {title}")
+
+        # 检查数据是否存在
+        if not redis_client.hexists(data_key, title):
+            logger.warning(f"未找到 title '{title}' 的数据")
+            return False
+
+        # 更新数据
+        row_json = json.dumps(row_data, ensure_ascii=False)
+        redis_client.hset(data_key, title, row_json)
+
+        logger.info(f"title '{title}' 更新成功")
+        return True
+
+    def delete_task_data(self, task_id: str) -> bool:
+        """
+        删除指定任务的所有数据
+
+        Args:
+            task_id: 任务 ID
+
+        Returns:
+            bool: 删除是否成功
+        """
+        redis_client = self.get_redis_client()
+        data_key = self._get_data_key(task_id)
+
+        logger.info(f"删除任务数据,task_id: {task_id}")
+
+        redis_client.delete(data_key)
+
+        return True
+
+    def get_row_count(self, task_id: str) -> int:
+        """
+        获取指定任务的行数
+
+        Args:
+            task_id: 任务 ID
+
+        Returns:
+            int: 行数,如果不存在则返回 0
+        """
+        redis_client = self.get_redis_client()
+        data_key = self._get_data_key(task_id)
+
+        if not redis_client.exists(data_key):
+            return 0
+
+        return int(redis_client.hget(data_key, "_row_count") or 0)
+
+    def exists(self, task_id: str) -> bool:
+        """
+        检查指定任务的数据是否存在
+
+        Args:
+            task_id: 任务 ID
+
+        Returns:
+            bool: 数据是否存在
+        """
+        redis_client = self.get_redis_client()
+        data_key = self._get_data_key(task_id)
+        return redis_client.exists(data_key) > 0
+
+    def read_catalogues_data_by_chapters(self, task_id: str, chapter_labels: List[str]) -> pd.DataFrame:
+        """
+        按章节标签列表读取目录审查结果数据
+
+        Args:
+            task_id: 任务 ID
+            chapter_labels: 章节标签列表
+
+        Returns:
+            pd.DataFrame: 包含匹配行的 DataFrame
+        """
+        catalogues_data_list = []
+        for chapter_label in chapter_labels:
+            # 先尝试使用 chapter_label 作为 title 查询
+            redis_row = self.read_by_title(task_id, chapter_label)
+            
+            # 如果查询失败,尝试读取所有数据并匹配 chapter_label 字段
+            if not redis_row:
+                logger.warning(f"[Redis] 使用 chapter_label '{chapter_label}' 作为 title 查询失败,尝试匹配 chapter_label 字段")
+                all_data = self.read_all(task_id)
+                if not all_data.empty:
+                    # 查找 chapter_label 字段匹配的行
+                    matched_rows = all_data[all_data.get('chapter_label', '') == chapter_label]
+                    if not matched_rows.empty:
+                        redis_row = matched_rows.iloc[0].to_dict()
+                        logger.info(f"[Redis] 通过 chapter_label 字段匹配到数据: {chapter_label}")
+            
+            if redis_row:
+                # 解析 missing_items 字符串为列表
+                missing_items_str = redis_row.get('missing_items', '[]')
+                try:
+                    if isinstance(missing_items_str, str):
+                        missing_items = ast.literal_eval(missing_items_str)
+                    else:
+                        missing_items = missing_items_str
+                except (ValueError, SyntaxError):
+                    missing_items = []
+                
+                catalogues_data_list.append({
+                    'chapter_label': chapter_label,
+                    'chapter_classification': redis_row.get('chapter_classification', ''),
+                    'missing_items': missing_items
+                })
+                logger.info(f"[Redis] 从Redis查询到章节: {chapter_label}, 缺失项: {missing_items}")
+            else:
+                logger.warning(f"[Redis] 未找到章节 '{chapter_label}' 的数据")
+        
+        return pd.DataFrame(catalogues_data_list)
+
+    def update_redis_data_with_common_elements_removed(self, redis_data: pd.DataFrame, common_elements_dict: Dict[str, set]) -> pd.DataFrame:
+        """
+        从 redis_data 的 missing_items 列中去除公共元素
+
+        Args:
+            redis_data: 要更新的 DataFrame
+            common_elements_dict: 包含每个章节公共元素的字典
+
+        Returns:
+            pd.DataFrame: 更新后的 DataFrame
+        """
+        # 更新 redis_data,使用apply函数去除公共元素
+        redis_data['missing_items'] = redis_data.apply(
+            lambda row: list(set(row['missing_items']) - common_elements_dict.get(row['chapter_label'], set()))
+            if isinstance(row['missing_items'], list) else [],
+            axis=1
+        )
+        logger.info(f"[Redis] 已去除公共元素,更新后的redis_data: {redis_data.to_dict('records')}")
+        return redis_data
+
+
+# 全局单例实例
+_redis_manager = None
+
+
+def get_redis_manager() -> CataloguesRedisManager:
+    """获取 Redis 管理器单例"""
+    global _redis_manager
+    if _redis_manager is None:
+        _redis_manager = CataloguesRedisManager()
+    return _redis_manager

+ 4 - 4
core/construction_review/component/reviewers/check_completeness/components/result_analyzer.py

@@ -275,8 +275,8 @@ class ResultAnalyzer(IResultAnalyzer):
                 "reference_source": reference_source
             }
             all_issues.append(issue_item)
-            with open("temp/document_temp/missing_points.json", "w", encoding="utf-8") as f:
-                json.dump(all_issues, f, ensure_ascii=False, indent=4)
+            # with open("temp/document_temp/missing_points.json", "w", encoding="utf-8") as f:
+            #     json.dump(all_issues, f, ensure_ascii=False, indent=4)
             # 收集元数据(从第一行获取)
             if not metadata:
                 metadata = {
@@ -284,8 +284,8 @@ class ResultAnalyzer(IResultAnalyzer):
                     "chapter_code": row.get("标签", ""),
                     "original_content": row.get("content", "")
                 }
-            with open("temp/document_temp/missing_points_metadata.json", "w", encoding="utf-8") as f:
-                json.dump(metadata, f, ensure_ascii=False, indent=4)
+            # with open("temp/document_temp/missing_points_metadata.json", "w", encoding="utf-8") as f:
+            #     json.dump(metadata, f, ensure_ascii=False, indent=4)
         logger.debug(f"build_missing_issue_list_all_issues:{len(all_issues)}")
         # 返回包含问题和元数据的字典,由外层统一格式化
         return {

+ 20 - 0
core/construction_review/component/reviewers/outline_check.py

@@ -177,6 +177,26 @@ def merge_results_by_classification(df):
         raise
 
 
+
+# 定义函数:提取字典中空列表对应的键
+def get_empty_list_keys(dict_data):
+    """
+    从字典中提取值为空列表的键列表
+    
+    参数:
+        dict_data: 输入的字典
+    
+    返回:
+        list: 空列表对应的键列表
+    """
+    # 先检查输入是否为字典,避免非字典类型导致报错
+    if not isinstance(dict_data, dict):
+        return []
+    
+    # 遍历字典,筛选值为空列表的键
+    empty_keys = [key for key, value in dict_data.items() if isinstance(value, list) and len(value) == 0]
+    return empty_keys
+
 if __name__ == '__main__':
     csv_file = rf'temp\document_temp\2_spec_review_results.csv'
     path2 = rf'temp\document_temp\outlines_review_results.csv'

+ 400 - 0
core/construction_review/component/reviewers/utils/llm_chain_client/README.md

@@ -0,0 +1,400 @@
+# LLM 链式客户端 (LLM Chain Client)
+
+一个通用的异步 LLM 提示链执行框架,支持多模型 API 调用、提示词模板渲染和链式任务编排。该模块设计为可独立复用的组件,可在任何需要调用大模型进行链式处理的 Python 项目中使用。
+
+## 功能特性
+
+- ✅ **多模型支持**:内置 Qwen、Gemini、DeepSeek、Doubao 等模型客户端
+- ✅ **异步执行**:基于 `asyncio` 和 `aiohttp` 的高并发 API 调用
+- ✅ **提示链编排**:支持多步骤链式任务,步骤间可传递数据
+- ✅ **模板渲染**:基于 Jinja2 的提示词模板引擎
+- ✅ **配置驱动**:提示词和链配置通过 YAML 文件管理
+- ✅ **完全解耦**:接口驱动设计,易于扩展和测试
+- ✅ **独立可用**:无父项目依赖,可独立安装使用
+
+## 架构设计
+
+```
+llm_chain_client/
+├── interfaces/          # 接口层 - 定义组件契约
+│   ├── llm_client.py    # LLM 客户端接口
+│   ├── prompt_loader.py # 提示词加载器接口
+│   └── chain_executor.py # 链执行器接口
+├── implementations/     # 实现层 - 具体实现
+│   ├── clients/        # 各模型客户端实现
+│   │   ├── base_client.py
+│   │   ├── qwen_client.py
+│   │   ├── gemini_client.py
+│   │   ├── deepseek_client.py
+│   │   └── doubao_client.py
+│   ├── loaders/        # 提示词加载器实现
+│   │   └── yaml_prompt_loader.py
+│   └── chains/         # 链执行器实现
+│       └── async_chain_executor.py
+├── orchestration/      # 编排层 - 业务流程编排
+│   └── prompt_chain_processor.py
+├── bootstrap.py         # 初始化层 - 依赖注入容器
+├── main.py            # 入口文件
+├── requirements.txt    # 依赖列表
+└── README.md          # 本文档
+```
+
+### 分层说明
+
+| 层级 | 职责 | 说明 |
+|-----|------|------|
+| **接口层** | 定义契约 | LLMClient、PromptLoader、ChainExecutor 抽象接口 |
+| **实现层** | 具体实现 | 各模型客户端、提示词加载器、链执行器的具体实现 |
+| **编排层** | 流程编排 | PromptChainProcessor 组装组件,定义业务流程 |
+| **初始化层** | 依赖注入 | Bootstrap 容器,统一创建和配置组件 |
+
+## 安装依赖
+
+```bash
+pip install -r requirements.txt
+```
+
+依赖项:
+- `aiohttp>=3.9.0` - 异步 HTTP 客户端
+- `pyyaml>=6.0` - YAML 配置解析
+- `jinja2>=3.1.0` - 模板引擎
+
+## 快速开始
+
+### 1. 配置 LLM API
+
+创建配置文件 `config/llm_api.yaml`:
+
+```yaml
+# 模型类型:qwen/gemini/deepseek/doubao
+MODEL_TYPE: qwen
+
+# 通用配置
+keywords:
+  timeout: 30
+  max_retries: 2
+  request_payload:
+    temperature: 0.3
+    max_tokens: 1024
+
+# 各模型配置
+qwen:
+  server_url: "https://dashscope.aliyuncs.com/compatible-mode/v1"
+  model_id: "qwen-plus"
+  api_key: "your-api-key"
+
+gemini:
+  server_url: "https://generativelanguage.googleapis.com/v1beta"
+  model_id: "gemini-pro"
+  api_key: "your-api-key"
+
+deepseek:
+  server_url: "https://api.deepseek.com"
+  model_id: "deepseek-chat"
+  api_key: "your-api-key"
+
+doubao:
+  server_url: "https://ark.cn-beijing.volces.com/api/v3"
+  model_id: "ep-xxxxx"
+  api_key: "your-api-key"
+```
+
+### 2. 创建提示词
+
+在 `config/prompts/` 目录下创建提示词 YAML 文件:
+
+**step1_extract.yaml**
+```yaml
+name: step1_extract
+description: 提取关键信息
+system: 你是一个信息提取助手,擅长从文本中提取关键信息。
+user_template: |
+  请从以下文本中提取关键信息:
+  
+  文本内容:{{ content }}
+  
+  请以 JSON 格式返回结果。
+output_parser:
+  type: json
+```
+
+### 3. 创建提示链配置
+
+**chain_config.yaml**
+```yaml
+chain_name: 信息提取链
+description: 从文本中提取并分析信息
+steps:
+  - name: extract
+    prompt_file: step1_extract.yaml
+    output_key: extracted_data
+  - name: analyze
+    prompt_file: step2_analyze.yaml
+    input_from: extracted_data
+    output_key: analysis_result
+```
+
+### 4. 使用代码
+
+```python
+import asyncio
+import sys
+from pathlib import Path
+
+# 添加模块路径
+sys.path.insert(0, "path/to/llm_chain_client")
+
+from llm_chain_client.bootstrap import Bootstrap
+
+async def main():
+    # 创建处理器
+    processor = Bootstrap.create_processor(
+        model_type="qwen",              # 可选:指定模型类型
+        prompts_dir="config/prompts",   # 提示词目录
+        config_path="config/llm_api.yaml", # API配置文件
+        temperature=0.5,                # 可选:覆盖默认温度
+        max_tokens=2048                 # 可选:覆盖默认token数
+    )
+    
+    # 执行提示链
+    result = await processor.process(
+        chain_config_path="config/prompts/chain_config.yaml",
+        input_data={
+            "content": "这是待处理的文本内容..."
+        }
+    )
+    
+    # 获取结果
+    print("最终结果:", result["final_result"])
+    print("各步骤结果:")
+    for step in result["steps"]:
+        print(f"  {step['name']}: {step['result']}")
+
+asyncio.run(main())
+```
+
+## 配置说明
+
+### LLM API 配置
+
+配置文件位置可通过 `config_path` 参数指定,默认为 `config/llm_api.yaml`。
+
+| 配置项 | 说明 | 默认值 |
+|-------|------|--------|
+| `MODEL_TYPE` | 默认模型类型 | `qwen` |
+| `keywords.timeout` | 请求超时时间(秒) | `30` |
+| `keywords.max_retries` | 最大重试次数 | `2` |
+| `keywords.request_payload.temperature` | 默认温度参数 | `0.3` |
+| `keywords.request_payload.max_tokens` | 默认最大token数 | `1024` |
+
+### 提示词配置
+
+提示词文件使用 YAML 格式,支持以下字段:
+
+| 字段 | 说明 | 必填 |
+|-----|------|------|
+| `name` | 提示词名称 | 否 |
+| `description` | 提示词描述 | 否 |
+| `system` | 系统提示词 | 否 |
+| `user_template` | 用户提示词模板(支持 Jinja2 语法) | 是 |
+| `output_parser` | 输出解析器配置 | 否 |
+
+输出解析器类型:
+- `text`:返回原始文本(默认)
+- `json`:尝试解析为 JSON 格式
+
+### 提示链配置
+
+提示链配置文件定义步骤序列:
+
+| 字段 | 说明 | 必填 |
+|-----|------|------|
+| `chain_name` | 链名称 | 否 |
+| `description` | 链描述 | 否 |
+| `steps` | 步骤列表 | 是 |
+
+步骤配置:
+| 字段 | 说明 | 必填 |
+|-----|------|------|
+| `name` | 步骤名称 | 否 |
+| `prompt_file` | 提示词文件名 | 是 |
+| `output_key` | 输出结果的键名 | 否 |
+| `input_from` | 从上一步获取输入的键名 | 否 |
+
+## 支持的模型
+
+| 模型 | 标识符 | 说明 |
+|-----|--------|------|
+| 通义千问 | `qwen` | 阿里云大模型 |
+| Gemini | `gemini` | Google 大模型 |
+| DeepSeek | `deepseek` | DeepSeek 大模型 |
+| 豆包 | `doubao` | 字节跳动大模型 |
+
+获取支持的模型列表:
+```python
+from llm_chain_client.bootstrap import Bootstrap
+
+models = Bootstrap.get_supported_models()
+print(models)  # ['qwen', 'gemini', 'deepseek', 'doubao']
+```
+
+## API 文档
+
+### Bootstrap 类
+
+依赖注入容器,用于创建处理器。
+
+#### `create_processor()`
+
+创建提示链处理器实例。
+
+```python
+processor = Bootstrap.create_processor(
+    model_type: str = None,
+    prompts_dir: str = "config/prompts",
+    config_path: str = "config/llm_api.yaml",
+    temperature: float = None,
+    max_tokens: int = None
+) -> PromptChainProcessor
+```
+
+**参数:**
+- `model_type` - 模型类型,为 None 时从配置文件读取
+- `prompts_dir` - 提示词目录路径
+- `config_path` - API 配置文件路径
+- `temperature` - 温度参数(可选,覆盖默认值)
+- `max_tokens` - 最大 token 数(可选,覆盖默认值)
+
+#### `get_supported_models()`
+
+获取支持的模型类型列表。
+
+```python
+models: list[str] = Bootstrap.get_supported_models()
+```
+
+### PromptChainProcessor 类
+
+提示链处理流程编排类。
+
+#### `process()`
+
+执行完整的提示链处理流程。
+
+```python
+result = await processor.process(
+    chain_config_path: str,
+    input_data: Dict[str, Any],
+    temperature: float = None,
+    max_tokens: int = None
+) -> Dict[str, Any]
+```
+
+**参数:**
+- `chain_config_path` - 提示链配置文件路径
+- `input_data` - 输入数据字典
+- `temperature` - 温度参数(可选)
+- `max_tokens` - 最大 token 数(可选)
+
+**返回值:**
+```python
+{
+    "final_result": {...},      # 最终结果
+    "steps": [                  # 各步骤结果
+        {
+            "name": "step1",
+            "prompt_file": "step1.yaml",
+            "result": {...},
+            "raw_content": "...",
+            "usage": {...}
+        },
+        ...
+    ],
+    "context": {...}            # 完整上下文
+}
+```
+
+#### `get_model_info()`
+
+获取当前模型信息。
+
+```python
+info = processor.get_model_info()
+# {"model_id": "qwen-plus", "server_url": "https://..."}
+```
+
+## 独立使用说明
+
+本模块设计为完全独立可用,可在任何 Python 项目中复用。
+
+### 方式一:直接复制
+
+将 `llm_chain_client/` 目录复制到你的项目中:
+
+```python
+import sys
+from pathlib import Path
+
+# 添加模块路径
+sys.path.insert(0, str(Path(__file__).parent / "llm_chain_client"))
+
+from llm_chain_client.bootstrap import Bootstrap
+```
+
+### 方式二:作为包安装
+
+创建 `setup.py` 后安装:
+
+```bash
+pip install -e path/to/llm_chain_client
+```
+
+然后在代码中直接导入:
+
+```python
+from llm_chain_client.bootstrap import Bootstrap
+```
+
+## 扩展开发
+
+### 添加新的 LLM 客户端
+
+1. 在 `implementations/clients/` 下创建新文件
+2. 继承 `BaseLLMClient` 类
+3. 实现必要的配置解析逻辑
+4. 在 `bootstrap.py` 的 `_CLIENT_MAP` 中注册
+
+示例:
+
+```python
+from implementations.clients.base_client import BaseLLMClient
+
+class CustomClient(BaseLLMClient):
+    def __init__(self, config: Dict[str, Any]):
+        super().__init__(
+            server_url=config["server_url"],
+            model_id=config["model_id"],
+            api_key=config["api_key"]
+        )
+```
+
+### 添加新的提示词加载器
+
+1. 在 `implementations/loaders/` 下创建新文件
+2. 继承 `PromptLoader` 接口
+3. 实现抽象方法
+
+## 注意事项
+
+1. **API 密钥安全**:不要将包含真实 API 密钥的配置文件提交到版本控制系统
+2. **超时设置**:根据网络情况调整 `timeout` 参数
+3. **重试机制**:默认重试 2 次,可根据需要调整
+4. **Token 限制**:注意各模型的 token 限制,合理设置 `max_tokens`
+
+## 许可证
+
+本项目为内部组件,仅供项目内部使用。
+
+## 联系方式
+
+如有问题或建议,请联系项目维护者。

+ 36 - 0
core/construction_review/component/reviewers/utils/llm_chain_client/__init__.py

@@ -0,0 +1,36 @@
+"""
+LLM 提示链系统 - 异步调用大模型API进行提示链任务
+
+架构分层说明:
+
+1. 接口层 (interfaces/)
+   - 定义组件契约:LLMClient、PromptLoader、ChainExecutor
+   - 实现解耦和可扩展性
+
+2. 实现层 (implementations/)
+   - clients/: 各模型API客户端实现(Qwen、Gemini、DeepSeek、Doubao)
+   - loaders/: 提示词加载器实现(YAML格式)
+   - chains/: 提示链执行器实现
+   - 按功能分类,内聚存放
+
+3. 编排层 (orchestration/)
+   - PromptChainProcessor: 组装组件,定义业务流程
+   - 通过接口进行依赖注入
+
+4. 初始化层 (bootstrap.py)
+   - Bootstrap 容器
+   - 统一的依赖注入和实例化
+   - 工厂方法创建不同配置的处理器
+
+5. 入口层 (main.py)
+   - 应用启动点
+   - 通过 Bootstrap 创建处理器并执行
+
+核心优势:
+  ✓ 完全解耦:组件通过接口交互
+  ✓ 易于扩展:新增实现无需修改现有代码
+  ✓ 易于测试:可轻松替换为 Mock 实现
+  ✓ 职责清晰:各层职责明确,便于维护
+  ✓ 异步支持:支持高并发API调用
+  ✓ 配置驱动:提示词和链配置通过YAML管理
+"""

+ 189 - 0
core/construction_review/component/reviewers/utils/llm_chain_client/bootstrap.py

@@ -0,0 +1,189 @@
+"""初始化层 - 依赖注入和启动"""
+import yaml
+from pathlib import Path
+from typing import Dict, Any
+
+from .interfaces.llm_client import LLMClient
+from .interfaces.prompt_loader import PromptLoader
+from .interfaces.chain_executor import ChainExecutor
+
+from .implementations.clients import (
+    QwenClient,
+    GeminiClient,
+    DeepSeekClient,
+    DoubaoClient
+)
+from .implementations.loaders import YamlPromptLoader
+from .implementations.chains import AsyncChainExecutor
+from .orchestration import PromptChainProcessor
+
+
+class Bootstrap:
+    """启动和依赖注入容器"""
+
+    # 模型客户端映射
+    _CLIENT_MAP = {
+        "qwen": QwenClient,
+        "gemini": GeminiClient,
+        "deepseek": DeepSeekClient,
+        "doubao": DoubaoClient
+    }
+
+    @staticmethod
+    def _load_llm_config(config_path: str = "config/llm_api.yaml") -> Dict[str, Any]:
+        """
+        加载大模型API配置
+
+        Args:
+            config_path: 配置文件路径
+
+        Returns:
+            配置字典
+        """
+        config_file = Path(config_path)
+        if not config_file.exists():
+            raise FileNotFoundError(f"配置文件不存在: {config_file}")
+
+        with open(config_file, "r", encoding="utf-8") as f:
+            config = yaml.safe_load(f)
+
+        return config
+
+    @staticmethod
+    def _create_llm_client(model_type: str, config: Dict[str, Any]) -> LLMClient:
+        """
+        创建大模型客户端
+
+        Args:
+            model_type: 模型类型(qwen/gemini/deepseek/doubao)
+            config: 配置字典
+
+        Returns:
+            大模型客户端实例
+        """
+        model_type = model_type.lower()
+
+        if model_type not in Bootstrap._CLIENT_MAP:
+            raise ValueError(
+                f"不支持的模型类型: {model_type},"
+                f"支持的类型: {', '.join(Bootstrap._CLIENT_MAP.keys())}"
+            )
+
+        # 获取模型配置
+        model_config = config.get(model_type, {})
+        # 合并通用配置
+        model_config.update({
+            "timeout": config.get("keywords", {}).get("timeout", 30),
+            "max_retries": config.get("keywords", {}).get("max_retries", 2)
+        })
+
+        # 创建客户端
+        client_class = Bootstrap._CLIENT_MAP[model_type]
+        return client_class(model_config)
+
+    @staticmethod
+    def _create_prompt_loader(
+        prompts_dir: str = "config/prompts"
+    ) -> PromptLoader:
+        """
+        创建提示词加载器
+
+        Args:
+            prompts_dir: 提示词目录路径
+
+        Returns:
+            提示词加载器实例
+        """
+        return YamlPromptLoader(prompts_dir)
+
+    @staticmethod
+    def _create_chain_executor(
+        llm_client: LLMClient,
+        prompt_loader: PromptLoader,
+        temperature: float = None,
+        max_tokens: int = None,
+        config_path: str = "config/llm_api.yaml"
+    ) -> ChainExecutor:
+        """
+        创建提示链执行器
+
+        Args:
+            llm_client: 大模型客户端
+            prompt_loader: 提示词加载器
+            temperature: 温度参数(可选)
+            max_tokens: 最大token数(可选)
+            config_path: 配置文件路径
+
+        Returns:
+            提示链执行器实例
+        """
+        config = Bootstrap._load_llm_config(config_path)
+        keywords = config.get("keywords", {})
+        request_payload = keywords.get("request_payload", {})
+
+        default_temperature = temperature or request_payload.get("temperature", 0.3)
+        default_max_tokens = max_tokens or request_payload.get("max_tokens", 1024)
+
+        return AsyncChainExecutor(
+            llm_client=llm_client,
+            prompt_loader=prompt_loader,
+            default_temperature=default_temperature,
+            default_max_tokens=default_max_tokens
+        )
+
+    @staticmethod
+    def create_processor(
+        model_type: str = None,
+        prompts_dir: str = "config/prompts",
+        config_path: str = "config/llm_api.yaml",
+        temperature: float = None,
+        max_tokens: int = None
+    ) -> PromptChainProcessor:
+        """
+        创建提示链处理器
+
+        Args:
+            model_type: 模型类型(qwen/gemini/deepseek/doubao),
+                       如果为None则从配置文件读取
+            prompts_dir: 提示词目录路径
+            config_path: 配置文件路径
+            temperature: 温度参数(可选)
+            max_tokens: 最大token数(可选)
+
+        Returns:
+            提示链处理器实例
+        """
+        # 加载配置
+        config = Bootstrap._load_llm_config(config_path)
+
+        # 确定模型类型
+        if model_type is None:
+            model_type = config.get("MODEL_TYPE", "qwen")
+
+        # 创建组件
+        llm_client = Bootstrap._create_llm_client(model_type, config)
+        prompt_loader = Bootstrap._create_prompt_loader(prompts_dir)
+        chain_executor = Bootstrap._create_chain_executor(
+            llm_client,
+            prompt_loader,
+            temperature,
+            max_tokens,
+            config_path
+        )
+
+        # 创建处理器
+        return PromptChainProcessor(
+            llm_client=llm_client,
+            prompt_loader=prompt_loader,
+            chain_executor=chain_executor
+        )
+
+    @staticmethod
+    def get_supported_models() -> list[str]:
+        """
+        获取支持的模型类型列表
+
+        Returns:
+            模型类型列表
+        """
+        return list(Bootstrap._CLIENT_MAP.keys())

+ 1 - 0
core/construction_review/component/reviewers/utils/llm_chain_client/implementations/__init__.py

@@ -0,0 +1 @@
+"""实现层 - 具体实现"""

+ 4 - 0
core/construction_review/component/reviewers/utils/llm_chain_client/implementations/chains/__init__.py

@@ -0,0 +1,4 @@
+"""实现层 - 提示链执行器"""
+from .async_chain_executor import AsyncChainExecutor
+
+__all__ = ["AsyncChainExecutor"]

+ 178 - 0
core/construction_review/component/reviewers/utils/llm_chain_client/implementations/chains/async_chain_executor.py

@@ -0,0 +1,178 @@
+"""实现层 - 异步提示链执行器"""
+import json
+import logging
+from typing import Dict, Any, List
+from ...interfaces.llm_client import LLMClient
+from ...interfaces.prompt_loader import PromptLoader
+from ...interfaces.chain_executor import ChainExecutor
+
+
+logger = logging.getLogger(__name__)
+
+
+class AsyncChainExecutor(ChainExecutor):
+    """异步提示链执行器"""
+
+    def __init__(
+        self,
+        llm_client: LLMClient,
+        prompt_loader: PromptLoader,
+        default_temperature: float = 0.3,
+        default_max_tokens: int = 1024
+    ):
+        """
+        初始化执行器
+
+        Args:
+            llm_client: 大模型API客户端
+            prompt_loader: 提示词加载器
+            default_temperature: 默认温度参数
+            default_max_tokens: 默认最大token数
+        """
+        self.llm_client = llm_client
+        self.prompt_loader = prompt_loader
+        self.default_temperature = default_temperature
+        self.default_max_tokens = default_max_tokens
+
+    async def execute_chain(
+        self,
+        chain_steps: List[Dict[str, Any]],
+        initial_input: Dict[str, Any]
+    ) -> Dict[str, Any]:
+        """
+        执行提示链
+
+        Args:
+            chain_steps: 提示链步骤列表
+            initial_input: 初始输入变量
+
+        Returns:
+            最终结果和中间结果
+        """
+        # 存储所有步骤的结果
+        step_results = []
+        context = initial_input.copy()
+
+        for step in chain_steps:
+            step_name = step.get("name", "unknown")
+            prompt_file = step.get("prompt_file", "")
+            output_key = step.get("output_key", "")
+            input_from = step.get("input_from", None)
+
+            logger.info(f"执行步骤: {step_name}")
+
+            # 加载提示词
+            prompt_data = self.prompt_loader.load_prompt(prompt_file)
+
+            # 准备变量
+            variables = context.copy()
+
+            # 如果指定了从上一步获取输入
+            if input_from and input_from in context:
+                variables["input"] = context[input_from]
+
+            # 渲染用户提示词
+            user_prompt = self.prompt_loader.render_template(
+                prompt_data["user_template"],
+                variables
+            )
+
+            # 构建消息列表
+            messages = []
+            if prompt_data["system"]:
+                messages.append({"role": "system", "content": prompt_data["system"]})
+            messages.append({"role": "user", "content": user_prompt})
+            # 【推荐位置】在这里添加保存代码
+            with open('prompts.txt', 'w', encoding='utf-8') as f:
+                contents = [msg["content"] for msg in messages]
+                f.write('\n'.join(contents))
+
+            # 调用API
+            response = await self.llm_client.chat_completion(
+                messages,
+                temperature=self.default_temperature,
+                max_tokens=self.default_max_tokens
+            )
+
+            # 解析响应
+            content = response.get("content", "")
+            parsed_result = self._parse_response(content, prompt_data.get("output_parser", {}))
+
+            # 存储结果
+            step_result = {
+                "name": step_name,
+                "prompt_file": prompt_file,
+                "result": parsed_result,
+                "raw_content": content,
+                "usage": response.get("usage", {})
+            }
+            step_results.append(step_result)
+
+            # 更新上下文
+            if output_key:
+                context[output_key] = parsed_result
+
+        # 返回最终结果
+        final_result = step_results[-1]["result"] if step_results else {}
+
+        return {
+            "final_result": final_result,
+            "steps": step_results,
+            "context": context
+        }
+
+    def _parse_response(
+        self,
+        content: str,
+        output_parser: Dict[str, Any]
+    ) -> Any:
+        """
+        解析API响应
+
+        Args:
+            content: API返回的内容
+            output_parser: 输出解析器配置
+
+        Returns:
+            解析后的结果
+        """
+        parser_type = output_parser.get("type", "text")
+
+        if parser_type == "json":
+            try:
+                # 尝试提取JSON(可能包含在代码块中)
+                json_content = self._extract_json(content)
+                return json.loads(json_content)
+            except json.JSONDecodeError as e:
+                logger.warning(f"JSON解析失败: {e}, 返回原始内容")
+                return content
+        else:
+            return content
+
+    def _extract_json(self, content: str) -> str:
+        """
+        从内容中提取JSON
+
+        Args:
+            content: 可能包含JSON的文本
+
+        Returns:
+            提取的JSON字符串
+        """
+        # 尝试找到JSON代码块
+        import re
+
+        # 匹配 ```json ... ``` 格式
+        json_pattern = r'```json\s*(.*?)\s*```'
+        match = re.search(json_pattern, content, re.DOTALL)
+        if match:
+            return match.group(1)
+
+        # 匹配 ``` ... ``` 格式
+        code_pattern = r'```\s*(.*?)\s*```'
+        match = re.search(code_pattern, content, re.DOTALL)
+        if match:
+            return match.group(1)
+
+        # 如果没有找到代码块,尝试匹配整个内容
+        return content.strip()

+ 14 - 0
core/construction_review/component/reviewers/utils/llm_chain_client/implementations/clients/__init__.py

@@ -0,0 +1,14 @@
+"""实现层 - 各模型API客户端"""
+from .base_client import BaseLLMClient
+from .qwen_client import QwenClient
+from .gemini_client import GeminiClient
+from .deepseek_client import DeepSeekClient
+from .doubao_client import DoubaoClient
+
+__all__ = [
+    "BaseLLMClient",
+    "QwenClient",
+    "GeminiClient",
+    "DeepSeekClient",
+    "DoubaoClient"
+]

+ 129 - 0
core/construction_review/component/reviewers/utils/llm_chain_client/implementations/clients/base_client.py

@@ -0,0 +1,129 @@
+"""实现层 - 大模型API客户端基类"""
+import aiohttp
+from typing import Dict, Any
+from ...interfaces.llm_client import LLMClient
+
+
+class BaseLLMClient(LLMClient):
+    """大模型API客户端基类"""
+
+    def __init__(
+        self,
+        server_url: str,
+        model_id: str,
+        api_key: str,
+        timeout: int = 30,
+        max_retries: int = 2
+    ):
+        """
+        初始化客户端
+
+        Args:
+            server_url: 服务器URL
+            model_id: 模型ID
+            api_key: API密钥
+            timeout: 超时时间(秒)
+            max_retries: 最大重试次数
+        """
+        self.server_url = server_url.rstrip("/")
+        self.model_id = model_id
+        self.api_key = api_key
+        self.timeout = timeout
+        self.max_retries = max_retries
+
+    def get_model_id(self) -> str:
+        """获取模型ID"""
+        return self.model_id
+
+    def get_server_url(self) -> str:
+        """获取服务器URL"""
+        return self.server_url
+
+    def _get_headers(self) -> Dict[str, str]:
+        """获取请求头"""
+        return {
+            "Content-Type": "application/json",
+            "Authorization": f"Bearer {self.api_key}"
+        }
+
+    def _build_request_body(
+        self,
+        messages: list[Dict[str, str]],
+        **kwargs
+    ) -> Dict[str, Any]:
+        """
+        构建请求体
+
+        Args:
+            messages: 消息列表
+            **kwargs: 额外参数
+
+        Returns:
+            请求体字典
+        """
+        body = {
+            "model": self.model_id,
+            "messages": messages
+        }
+
+        # 添加可选参数
+        if "temperature" in kwargs:
+            body["temperature"] = kwargs["temperature"]
+        if "max_tokens" in kwargs:
+            body["max_tokens"] = kwargs["max_tokens"]
+        if "stream" in kwargs:
+            body["stream"] = kwargs["stream"]
+
+        return body
+
+    async def chat_completion(
+        self,
+        messages: list[Dict[str, str]],
+        **kwargs
+    ) -> Dict[str, Any]:
+        """
+        异步调用聊天补全API
+
+        Args:
+            messages: 消息列表
+            **kwargs: 额外参数
+
+        Returns:
+            API响应结果
+        """
+        url = f"{self.server_url}/chat/completions"
+        headers = self._get_headers()
+        body = self._build_request_body(messages, **kwargs)
+
+        timeout = aiohttp.ClientTimeout(total=self.timeout)
+
+        for attempt in range(self.max_retries + 1):
+            try:
+                async with aiohttp.ClientSession(timeout=timeout) as session:
+                    async with session.post(url, json=body, headers=headers) as response:
+                        response.raise_for_status()
+                        data = await response.json()
+
+                        # 解析响应
+                        if "choices" in data and len(data["choices"]) > 0:
+                            content = data["choices"][0].get("message", {}).get("content", "")
+                            return {
+                                "content": content,
+                                "usage": data.get("usage", {}),
+                                "model": data.get("model", self.model_id),
+                                "raw_response": data
+                            }
+                        else:
+                            return {
+                                "content": "",
+                                "usage": {},
+                                "model": self.model_id,
+                                "raw_response": data
+                            }
+
+            except aiohttp.ClientError as e:
+                if attempt == self.max_retries:
+                    raise RuntimeError(f"API调用失败(重试{self.max_retries}次后): {e}")
+                continue
+            except Exception as e:
+                raise RuntimeError(f"API调用异常: {e}")

+ 22 - 0
core/construction_review/component/reviewers/utils/llm_chain_client/implementations/clients/deepseek_client.py

@@ -0,0 +1,22 @@
+"""实现层 - DeepSeek模型客户端"""
+from typing import Dict, Any
+from .base_client import BaseLLMClient
+
+
+class DeepSeekClient(BaseLLMClient):
+    """DeepSeek模型客户端"""
+
+    def __init__(self, config: Dict[str, Any]):
+        """
+        初始化DeepSeek客户端
+
+        Args:
+            config: 配置字典,包含 DEEPSEEK_SERVER_URL, DEEPSEEK_MODEL_ID, DEEPSEEK_API_KEY
+        """
+        super().__init__(
+            server_url=config.get("DEEPSEEK_SERVER_URL", ""),
+            model_id=config.get("DEEPSEEK_MODEL_ID", ""),
+            api_key=config.get("DEEPSEEK_API_KEY", ""),
+            timeout=config.get("timeout", 30),
+            max_retries=config.get("max_retries", 2)
+        )

+ 22 - 0
core/construction_review/component/reviewers/utils/llm_chain_client/implementations/clients/doubao_client.py

@@ -0,0 +1,22 @@
+"""实现层 - Doubao模型客户端"""
+from typing import Dict, Any
+from .base_client import BaseLLMClient
+
+
+class DoubaoClient(BaseLLMClient):
+    """Doubao模型客户端"""
+
+    def __init__(self, config: Dict[str, Any]):
+        """
+        初始化Doubao客户端
+
+        Args:
+            config: 配置字典,包含 DOUBAO_SERVER_URL, DOUBAO_MODEL_ID, DOUBAO_API_KEY
+        """
+        super().__init__(
+            server_url=config.get("DOUBAO_SERVER_URL", ""),
+            model_id=config.get("DOUBAO_MODEL_ID", ""),
+            api_key=config.get("DOUBAO_API_KEY", ""),
+            timeout=config.get("timeout", 30),
+            max_retries=config.get("max_retries", 2)
+        )

+ 22 - 0
core/construction_review/component/reviewers/utils/llm_chain_client/implementations/clients/gemini_client.py

@@ -0,0 +1,22 @@
+"""实现层 - Gemini模型客户端"""
+from typing import Dict, Any
+from .base_client import BaseLLMClient
+
+
+class GeminiClient(BaseLLMClient):
+    """Gemini模型客户端"""
+
+    def __init__(self, config: Dict[str, Any]):
+        """
+        初始化Gemini客户端
+
+        Args:
+            config: 配置字典,包含 GEMINI_SERVER_URL, GEMINI_MODEL_ID, GEMINI_API_KEY
+        """
+        super().__init__(
+            server_url=config.get("GEMINI_SERVER_URL", ""),
+            model_id=config.get("GEMINI_MODEL_ID", ""),
+            api_key=config.get("GEMINI_API_KEY", ""),
+            timeout=config.get("timeout", 30),
+            max_retries=config.get("max_retries", 2)
+        )

+ 22 - 0
core/construction_review/component/reviewers/utils/llm_chain_client/implementations/clients/qwen_client.py

@@ -0,0 +1,22 @@
+"""实现层 - Qwen模型客户端"""
+from typing import Dict, Any
+from .base_client import BaseLLMClient
+
+
+class QwenClient(BaseLLMClient):
+    """Qwen模型客户端"""
+
+    def __init__(self, config: Dict[str, Any]):
+        """
+        初始化Qwen客户端
+
+        Args:
+            config: 配置字典,包含 QWEN_SERVER_URL, QWEN_MODEL_ID, QWEN_API_KEY
+        """
+        super().__init__(
+            server_url=config.get("QWEN_SERVER_URL", ""),
+            model_id=config.get("QWEN_MODEL_ID", ""),
+            api_key=config.get("QWEN_API_KEY", ""),
+            timeout=config.get("timeout", 30),
+            max_retries=config.get("max_retries", 2)
+        )

+ 4 - 0
core/construction_review/component/reviewers/utils/llm_chain_client/implementations/loaders/__init__.py

@@ -0,0 +1,4 @@
+"""实现层 - 提示词加载器"""
+from .yaml_prompt_loader import YamlPromptLoader
+
+__all__ = ["YamlPromptLoader"]

+ 105 - 0
core/construction_review/component/reviewers/utils/llm_chain_client/implementations/loaders/yaml_prompt_loader.py

@@ -0,0 +1,105 @@
+"""实现层 - YAML提示词加载器"""
+import yaml
+import os
+from pathlib import Path
+from typing import Dict, Any
+from jinja2 import Template
+from ...interfaces.prompt_loader import PromptLoader
+
+
+class YamlPromptLoader(PromptLoader):
+    """YAML提示词加载器"""
+
+    def __init__(self, prompts_dir: str = "config/prompts"):
+        """
+        初始化加载器
+
+        Args:
+            prompts_dir: 提示词目录路径
+        """
+        self.prompts_dir = Path(prompts_dir)
+        self._prompt_cache: Dict[str, Dict[str, str]] = {}
+
+    def load_prompt(self, prompt_name: str) -> Dict[str, str]:
+        """
+        加载提示词
+
+        Args:
+            prompt_name: 提示词名称(对应YAML文件名,不含扩展名)
+
+        Returns:
+            包含 system 和 user_template 的字典
+        """
+        # 检查缓存
+        if prompt_name in self._prompt_cache:
+            return self._prompt_cache[prompt_name]
+
+        # 构建文件路径
+        prompt_file = self.prompts_dir / f"{prompt_name}.yaml"
+
+        if not prompt_file.exists():
+            raise FileNotFoundError(f"提示词文件不存在: {prompt_file}")
+
+        # 加载YAML文件
+        with open(prompt_file, "r", encoding="utf-8") as f:
+            data = yaml.safe_load(f)
+
+        # 解析提示词
+        result = {
+            "name": data.get("name", prompt_name),
+            "description": data.get("description", ""),
+            "system": data.get("system", ""),
+            "user_template": data.get("user_template", ""),
+            "output_parser": data.get("output_parser", {})
+        }
+
+        # 缓存结果
+        self._prompt_cache[prompt_name] = result
+
+        return result
+
+    def render_template(
+        self,
+        template: str,
+        variables: Dict[str, Any]
+    ) -> str:
+        """
+        渲染提示词模板
+
+        Args:
+            template: 模板字符串,支持 {{ variable }} 语法
+            variables: 变量字典
+
+        Returns:
+            渲染后的字符串
+        """
+        jinja_template = Template(template)
+        return jinja_template.render(**variables)
+
+    def load_chain_config(self, config_path: str) -> Dict[str, Any]:
+        """
+        加载提示链配置
+
+        Args:
+            config_path: 配置文件路径
+
+        Returns:
+            提示链配置
+        """
+        config_file = Path(config_path)
+
+        if not config_file.exists():
+            raise FileNotFoundError(f"提示链配置文件不存在: {config_file}")
+
+        with open(config_file, "r", encoding="utf-8") as f:
+            data = yaml.safe_load(f)
+
+        return {
+            "chain_name": data.get("chain_name", ""),
+            "description": data.get("description", ""),
+            "steps": data.get("steps", [])
+        }
+
+    def clear_cache(self):
+        """清除缓存"""
+        self._prompt_cache.clear()

+ 6 - 0
core/construction_review/component/reviewers/utils/llm_chain_client/interfaces/__init__.py

@@ -0,0 +1,6 @@
+"""接口层 - 定义组件契约"""
+from .llm_client import LLMClient
+from .prompt_loader import PromptLoader
+from .chain_executor import ChainExecutor
+
+__all__ = ["LLMClient", "PromptLoader", "ChainExecutor"]

+ 46 - 0
core/construction_review/component/reviewers/utils/llm_chain_client/interfaces/chain_executor.py

@@ -0,0 +1,46 @@
+"""接口层 - 提示链执行接口"""
+from abc import ABC, abstractmethod
+from typing import Dict, Any, List
+
+
+class ChainExecutor(ABC):
+    """提示链执行接口"""
+
+    @abstractmethod
+    async def execute_chain(
+        self,
+        chain_steps: List[Dict[str, Any]],
+        initial_input: Dict[str, Any]
+    ) -> Dict[str, Any]:
+        """
+        执行提示链
+
+        Args:
+            chain_steps: 提示链步骤列表
+                [
+                    {
+                        "name": "step1_extract",
+                        "prompt_file": "step1_extract.yaml",
+                        "output_key": "extracted_data"
+                    },
+                    {
+                        "name": "step2_analyze",
+                        "prompt_file": "step2_analyze.yaml",
+                        "input_from": "extracted_data",
+                        "output_key": "analysis_result"
+                    }
+                ]
+            initial_input: 初始输入变量
+                {"content": "待处理文本...", "other_var": "value"}
+
+        Returns:
+            最终结果和中间结果
+            {
+                "final_result": {...},
+                "steps": [
+                    {"name": "step1_extract", "result": {...}},
+                    {"name": "step2_analyze", "result": {...}}
+                ]
+            }
+        """
+        pass

+ 35 - 0
core/construction_review/component/reviewers/utils/llm_chain_client/interfaces/llm_client.py

@@ -0,0 +1,35 @@
+"""接口层 - 大模型API调用接口"""
+from abc import ABC, abstractmethod
+from typing import Dict, Any
+
+
+class LLMClient(ABC):
+    """大模型API调用接口"""
+
+    @abstractmethod
+    async def chat_completion(
+        self,
+        messages: list[Dict[str, str]],
+        **kwargs
+    ) -> Dict[str, Any]:
+        """
+        异步调用聊天补全API
+
+        Args:
+            messages: 消息列表,格式为 [{"role": "user", "content": "..."}]
+            **kwargs: 额外参数(temperature, max_tokens等)
+
+        Returns:
+            API响应结果,包含 content, usage 等字段
+        """
+        pass
+
+    @abstractmethod
+    def get_model_id(self) -> str:
+        """获取模型ID"""
+        pass
+
+    @abstractmethod
+    def get_server_url(self) -> str:
+        """获取服务器URL"""
+        pass

+ 62 - 0
core/construction_review/component/reviewers/utils/llm_chain_client/interfaces/prompt_loader.py

@@ -0,0 +1,62 @@
+"""接口层 - 提示词加载接口"""
+from abc import ABC, abstractmethod
+from typing import Dict, Any
+
+
+class PromptLoader(ABC):
+    """提示词加载接口"""
+
+    @abstractmethod
+    def load_prompt(self, prompt_name: str) -> Dict[str, str]:
+        """
+        加载提示词
+
+        Args:
+            prompt_name: 提示词名称(对应YAML文件名,不含扩展名)
+
+        Returns:
+            包含 system 和 user_template 的字典
+            {
+                "system": "系统提示词",
+                "user_template": "用户提示词模板",
+                "description": "提示词描述",
+                "name": "提示词名称"
+            }
+        """
+        pass
+
+    @abstractmethod
+    def render_template(
+        self,
+        template: str,
+        variables: Dict[str, Any]
+    ) -> str:
+        """
+        渲染提示词模板
+
+        Args:
+            template: 模板字符串,支持 {{ variable }} 语法
+            variables: 变量字典
+
+        Returns:
+            渲染后的字符串
+        """
+        pass
+
+    @abstractmethod
+    def load_chain_config(self, config_path: str) -> Dict[str, Any]:
+        """
+        加载提示链配置
+
+        Args:
+            config_path: 配置文件路径
+
+        Returns:
+            提示链配置,包含 steps 列表
+            {
+                "chain_name": "链名称",
+                "description": "描述",
+                "steps": [...]
+            }
+        """
+        pass

+ 68 - 0
core/construction_review/component/reviewers/utils/llm_chain_client/main.py

@@ -0,0 +1,68 @@
+"""入口文件 - 应用启动"""
+import asyncio
+import logging
+from .bootstrap import Bootstrap
+
+
+# 配置日志
+logging.basicConfig(
+    level=logging.INFO,
+    format='%(asctime)s - %(name)s - %(levelname)s - %(message)s'
+)
+logger = logging.getLogger(__name__)
+
+
+async def main():
+    """主程序"""
+
+    # 打印支持的模型类型
+    supported_models = Bootstrap.get_supported_models()
+    logger.info(f"支持的模型类型: {', '.join(supported_models)}")
+
+    # 创建处理器(使用配置文件中的默认模型)
+    processor = Bootstrap.create_processor()
+
+    # 打印模型信息
+    model_info = processor.get_model_info()
+    logger.info(f"当前模型: {model_info['model_id']}")
+    logger.info(f"服务器地址: {model_info['server_url']}")
+
+    # 示例:执行提示链
+    # 注意:需要先创建相应的配置文件和提示词文件
+    try:
+        result = await processor.process(
+            chain_config_path="config/prompts/chain_config.yaml",
+            input_data={
+                "content": "这是一段待处理的文本内容..."
+            }
+        )
+
+        logger.info("处理完成")
+        logger.info(f"最终结果: {result['final_result']}")
+
+        # 打印各步骤结果
+        for step in result['steps']:
+            logger.info(f"步骤 {step['name']}: {step['result']}")
+
+    except FileNotFoundError as e:
+        logger.warning(f"配置文件未找到: {e}")
+        logger.info("请先创建 config/prompts/ 目录和相应的配置文件")
+    except Exception as e:
+        logger.error(f"处理失败: {e}")
+
+
+async def example_custom_model():
+    """使用自定义模型的示例"""
+    # 创建使用特定模型的处理器
+    processor = Bootstrap.create_processor(
+        model_type="qwen",  # 可选: qwen, gemini, deepseek, doubao
+        temperature=0.5,      # 可选: 覆盖默认温度
+        max_tokens=2048      # 可选: 覆盖默认最大token数
+    )
+
+    logger.info(f"使用模型: {processor.get_model_info()['model_id']}")
+
+
+if __name__ == "__main__":
+    # 运行主程序
+    asyncio.run(main())

+ 4 - 0
core/construction_review/component/reviewers/utils/llm_chain_client/orchestration/__init__.py

@@ -0,0 +1,4 @@
+"""编排层 - 业务流程编排"""
+from .prompt_chain_processor import PromptChainProcessor
+
+__all__ = ["PromptChainProcessor"]

+ 86 - 0
core/construction_review/component/reviewers/utils/llm_chain_client/orchestration/prompt_chain_processor.py

@@ -0,0 +1,86 @@
+"""编排层 - 提示链处理流程编排"""
+import json
+import logging
+from typing import Dict, Any
+from ..interfaces.llm_client import LLMClient
+from ..interfaces.prompt_loader import PromptLoader
+from ..interfaces.chain_executor import ChainExecutor
+
+
+logger = logging.getLogger(__name__)
+
+
+class PromptChainProcessor:
+    """提示链处理流程编排类"""
+
+    def __init__(
+        self,
+        llm_client: LLMClient,
+        prompt_loader: PromptLoader,
+        chain_executor: ChainExecutor
+    ):
+        """
+        初始化处理器
+
+        Args:
+            llm_client: 大模型API客户端
+            prompt_loader: 提示词加载器
+            chain_executor: 提示链执行器
+        """
+        self.llm_client = llm_client
+        self.prompt_loader = prompt_loader
+        self.chain_executor = chain_executor
+
+    async def process(
+        self,
+        chain_config_path: str,
+        input_data: Dict[str, Any],
+        temperature: float = None,
+        max_tokens: int = None
+    ) -> Dict[str, Any]:
+        """
+        执行完整的提示链处理流程
+
+        Args:
+            chain_config_path: 提示链配置文件路径
+            input_data: 输入数据
+            temperature: 温度参数(可选,覆盖默认值)
+            max_tokens: 最大token数(可选,覆盖默认值)
+
+        Returns:
+            处理结果
+        """
+        logger.info(f"开始处理提示链: {chain_config_path}")
+
+        # 加载提示链配置
+        chain_config = self.prompt_loader.load_chain_config(chain_config_path)
+        logger.info(f"提示链名称: {chain_config['chain_name']}")
+        logger.info(f"步骤数量: {len(chain_config['steps'])}")
+
+        # 如果指定了温度参数,更新执行器配置
+        if temperature is not None:
+            self.chain_executor.default_temperature = temperature
+        if max_tokens is not None:
+            self.chain_executor.default_max_tokens = max_tokens
+
+        # 执行提示链
+        result = await self.chain_executor.execute_chain(
+            chain_config["steps"],
+            input_data
+        )
+
+        logger.info(f"提示链处理完成,共执行 {len(result['steps'])} 个步骤")
+
+        return result
+
+    def get_model_info(self) -> Dict[str, str]:
+        """
+        获取当前模型信息
+
+        Returns:
+            模型信息字典
+        """
+        return {
+            "model_id": self.llm_client.get_model_id(),
+            "server_url": self.llm_client.get_server_url()
+        }

+ 10 - 0
core/construction_review/component/reviewers/utils/llm_chain_client/requirements.txt

@@ -0,0 +1,10 @@
+# LLM 提示链系统依赖项
+
+# 异步HTTP客户端
+aiohttp>=3.9.0
+
+# YAML配置文件解析
+pyyaml>=6.0
+
+# 模板引擎(用于提示词渲染)
+jinja2>=3.1.0

+ 28 - 1
core/construction_review/workflows/ai_review_workflow.py

@@ -47,6 +47,7 @@ from typing import Optional, Callable, Dict, Any, TypedDict, Annotated, List
 from langgraph.graph import StateGraph, END
 from langgraph.graph.message import add_messages
 from langchain_core.messages import BaseMessage, HumanMessage, AIMessage
+import pandas as pd
 from foundation.observability.logger.loggering import server_logger as logger
 from core.construction_review.component.reviewers.utils.directory_extraction import (
     extract_basis_with_langchain_qwen,
@@ -57,7 +58,8 @@ from ..component.reviewers.utils.inter_tool import InterTool
 from core.base.task_models import TaskFileInfo
 from .core_functions import AIReviewCoreFun
 from .types import AIReviewState
-
+from ..component.reviewers.catalogues_check.catalogues_check import catalogues_check
+from ..component.reviewers.catalogues_check.utils import get_redis_manager
 
 
 # @dataclass
@@ -314,12 +316,37 @@ class AIReviewWorkflow:
 
             # 3️ 获取结构化内容
             original_chunks = state.get("structured_content", {}).get("chunks", [])
+            original_outline = state.get("structured_content", {}).get("outline", [])["chapters"]
+            # with open(r"temp/document_temp/original_outline.json", "w", encoding="utf-8") as f:
+            #     json.dump(original_outline, f, ensure_ascii=False, indent=2)
+            logger.info(f"开始目录审查")
+            outline_results = await catalogues_check(original_outline)
+            outline_results = pd.DataFrame(outline_results)
+            # outline_results[['title', 'chapter_classification', 'missing_items']].to_csv(r"temp/document_temp/outline_results.csv", encoding='utf-8-sig', index=False)
+            
+            outline_results['miss_outline'] = None
+            # outline_results['chapter_label'] = outline_results['title']
+            # outline_results.to_csv(r"temp/document_temp/outline_results.csv", encoding='utf-8-sig', index=False)
+            
+            # 存储到 Redis(使用 callback_task_id 作为任务 ID)
+            try:
+                redis_manager = get_redis_manager()
+                redis_manager.store_dataframe(outline_results, state["callback_task_id"])
+                logger.info(f"目录审查结果已存储到 Redis,task_id: {state['callback_task_id']}")
+            except Exception as e:
+                logger.warning(f"存储目录审查结果到 Redis 失败: {e}")
 
             # 预处理:根据 review_item_dict_sorted 中的 key 对 structured_content 进行筛选
             filtered_chunks = [
                 chunk for chunk in original_chunks
                 if chunk.get("chapter_classification") in review_item_dict_sorted.keys()
             ]
+
+            # filtered_outline = [
+            #     outline for outline in original_outline
+            #     if outline.get("chapter_classification") in review_item_dict_sorted.keys()
+            # ]
+
             # 筛选完整性存在完整性审查的分类,将其整章进行合并
             filtered_chunks = self.core_fun._merge_chunks_for_completeness_check(
                 filtered_chunks, review_item_dict_sorted

+ 25 - 0
prompts.txt

@@ -0,0 +1,25 @@
+你是一名专业的施工方案文档审查专家,负责审查施工方案目录的完整性。
+- 仔细分析待审查的目录内容,识别其中实际包含的二级目录项;
+- 根据规范要求,对比待审查目录与规范要求的差异;
+- 找出规范要求中存在但待审查目录中缺失的二级目录项;
+- 只输出缺失项的序号数字,不要添加任何解释性文字;
+- 如果所有规范要求的目录项都已包含,则输出"无缺失";
+- 严格基于提供的规范要求进行判断,不要添加额外的要求。
+
+- /no_think
+
+任务:审查施工方案目录,找出缺失的目录项。
+
+待审查章节标题:
+第十二章编制及审核人员情况
+
+待审查目录内容:
+待审查目录为空
+
+规范要求:
+其他资料章节应包含1.计算书、2.相关施工图纸、3.附图附表、4.编制及审核人员情况等方面的内容
+
+输出格式:
+- 只输出缺失的目录项序号数字,多个数字用逗号分隔(如:3,5)
+- 如果所有规范要求的目录项都已包含,则输出"无缺失"
+- 不要添加任何解释性文字