| 123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177 |
- import uuid
- from foundation.observability.logger.loggering import review_logger as server_logger
- from foundation.ai.agent.generate.model_generate import generate_model_client
- class QueryRewriteManager():
- """
- 查询改写管理器 — 从施工方案文本中提取审查要点
- """
- def __init__(self):
- self.generate_model_client = generate_model_client
- @property
- def prompt_loader(self):
- """延迟加载 prompt_loader,避免循环导入"""
- from core.construction_review.component.reviewers.utils import prompt_loader
- return prompt_loader
- def query_extract(self, review_content):
- """
- 从审查条文中提取审查要点 (review points)
- Args:
- review_content: 审查内容文本
- Returns:
- list: 审查要点列表
- [
- {
- "label": str, # 审查要点标签
- "search_queries": list, # 规范检索语句
- "original_text": str, # 原文摘录
- "parameter": str, # 技术参数
- # --- 向后兼容别名 (由 _add_backward_compat_aliases 自动添加) ---
- "entity": str, # = label
- "search_keywords": list, # = search_queries
- "background": str, # = original_text
- }
- ]
- 或 None(提取失败时)
- """
- try:
- # 获取提示词模板并组装 — 优先使用新 key,回退到旧 key
- task_prompt = self.prompt_loader.get_prompt_template(
- reviewer_type="review_point_extract",
- prompt_name="review_point_extract",
- review_content=review_content
- )
- task_prompt_info = {
- "task_prompt": task_prompt,
- "task_name": "review_point_extract"
- }
- trace_id = str(uuid.uuid4())
- # 调用模型 — function_name 对应 model_setting.yaml 中的配置
- model_response = self.generate_model_client.get_model_generate_invoke_sync(
- trace_id=trace_id,
- task_prompt_info=task_prompt_info,
- timeout=60,
- function_name="review_point_extract"
- )
- # 格式化模型响应
- formatted_response = self.ai_respose_format(model_response)
- if formatted_response:
- # 添加向后兼容字段别名
- formatted_response = self._add_backward_compat_aliases(formatted_response)
- server_logger.info(f"审查要点提取完成, 提取到 {len(formatted_response)} 个要点")
- else:
- server_logger.warning("审查要点提取失败, 格式化后为空")
- return formatted_response
- except Exception as e:
- server_logger.error(f"审查要点提取失败: {str(e)}")
- return None
- def _add_backward_compat_aliases(self, review_points):
- """
- 为每个审查要点添加双向字段别名,确保新旧格式都能工作
- 新字段 → 旧字段: label→entity, search_queries→search_keywords, original_text→background
- 旧字段 → 新字段: entity→label, search_keywords→search_queries, background→original_text
- """
- for point in review_points:
- # 新 → 旧(LLM 使用新格式时)
- if 'label' in point and 'entity' not in point:
- point['entity'] = point['label']
- if 'search_queries' in point and 'search_keywords' not in point:
- point['search_keywords'] = point['search_queries']
- if 'original_text' in point and 'background' not in point:
- point['background'] = point['original_text']
- # 旧 → 新(LLM 使用旧格式时)
- if 'entity' in point and 'label' not in point:
- point['label'] = point['entity']
- if 'search_keywords' in point and 'search_queries' not in point:
- point['search_queries'] = point['search_keywords']
- if 'background' in point and 'original_text' not in point:
- point['original_text'] = point['background']
- return review_points
- def ai_respose_format(self, model_response):
- """
- 将模型返回的响应格式化为标准格式
- Args:
- model_response: AI模型返回的原始响应(可能是字符串或已解析的JSON)
- Returns:
- list: 标准格式的审查要点列表, 或 None(解析失败时)
- """
- import re
- import json
- try:
- # 1. 如果model_response已经是list,直接返回
- if isinstance(model_response, list):
- server_logger.info(f"模型响应已是list格式, 包含 {len(model_response)} 个要点")
- return model_response
- # 2. 如果是dict,包装成list返回
- if isinstance(model_response, dict):
- server_logger.info("模型响应是dict格式, 包装为list")
- return [model_response]
- # 3. 如果是字符串,需要解析
- if isinstance(model_response, str):
- response_text = model_response.strip()
- # 3.1 尝试去除 ```json 和 ``` 标记
- json_pattern = r'```(?:json)?\s*\n?(.*?)\n?```'
- json_match = re.search(json_pattern, response_text, re.DOTALL | re.IGNORECASE)
- if json_match:
- json_str = json_match.group(1).strip()
- else:
- json_str = response_text
- # 3.2 去除可能的Markdown注释或多余空白
- json_str = re.sub(r'\n+', '\n', json_str)
- json_str = json_str.strip()
- # 3.3 解析JSON
- parsed_data = json.loads(json_str)
- # 3.4 确保返回list格式
- if isinstance(parsed_data, list):
- server_logger.info(f"JSON解析成功, 提取到 {len(parsed_data)} 个审查要点")
- return parsed_data
- elif isinstance(parsed_data, dict):
- server_logger.info("JSON解析成功, 单个要点包装为list")
- return [parsed_data]
- server_logger.warning(f"无法识别的JSON格式: {type(parsed_data)}")
- return None
- server_logger.warning(f"无法识别的响应类型: {type(model_response)}")
- return None
- except json.JSONDecodeError as e:
- server_logger.error(f"JSON解析失败: {e}")
- server_logger.error(f"原始响应: {str(model_response)[:500]}")
- return None
- except Exception as e:
- server_logger.error(f"响应格式化异常: {e}")
- server_logger.error(f"原始响应: {str(model_response)[:500]}")
- return None
- query_rewrite_manager = QueryRewriteManager()
|