import uuid from foundation.observability.logger.loggering import review_logger as server_logger from foundation.ai.agent.generate.model_generate import generate_model_client class QueryRewriteManager(): """ 查询改写管理器 — 从施工方案文本中提取审查要点 """ def __init__(self): self.generate_model_client = generate_model_client @property def prompt_loader(self): """延迟加载 prompt_loader,避免循环导入""" from core.construction_review.component.reviewers.utils import prompt_loader return prompt_loader def query_extract(self, review_content): """ 从审查条文中提取审查要点 (review points) Args: review_content: 审查内容文本 Returns: list: 审查要点列表 [ { "label": str, # 审查要点标签 "search_queries": list, # 规范检索语句 "original_text": str, # 原文摘录 "parameter": str, # 技术参数 # --- 向后兼容别名 (由 _add_backward_compat_aliases 自动添加) --- "entity": str, # = label "search_keywords": list, # = search_queries "background": str, # = original_text } ] 或 None(提取失败时) """ try: # 获取提示词模板并组装 — 优先使用新 key,回退到旧 key task_prompt = self.prompt_loader.get_prompt_template( reviewer_type="review_point_extract", prompt_name="review_point_extract", review_content=review_content ) task_prompt_info = { "task_prompt": task_prompt, "task_name": "review_point_extract" } trace_id = str(uuid.uuid4()) # 调用模型 — function_name 对应 model_setting.yaml 中的配置 model_response = self.generate_model_client.get_model_generate_invoke_sync( trace_id=trace_id, task_prompt_info=task_prompt_info, timeout=60, function_name="review_point_extract" ) # 格式化模型响应 formatted_response = self.ai_respose_format(model_response) if formatted_response: # 添加向后兼容字段别名 formatted_response = self._add_backward_compat_aliases(formatted_response) server_logger.info(f"审查要点提取完成, 提取到 {len(formatted_response)} 个要点") else: server_logger.warning("审查要点提取失败, 格式化后为空") return formatted_response except Exception as e: server_logger.error(f"审查要点提取失败: {str(e)}") return None def _add_backward_compat_aliases(self, review_points): """ 为每个审查要点添加双向字段别名,确保新旧格式都能工作 新字段 → 旧字段: label→entity, search_queries→search_keywords, original_text→background 旧字段 → 新字段: entity→label, search_keywords→search_queries, background→original_text """ for point in review_points: # 新 → 旧(LLM 使用新格式时) if 'label' in point and 'entity' not in point: point['entity'] = point['label'] if 'search_queries' in point and 'search_keywords' not in point: point['search_keywords'] = point['search_queries'] if 'original_text' in point and 'background' not in point: point['background'] = point['original_text'] # 旧 → 新(LLM 使用旧格式时) if 'entity' in point and 'label' not in point: point['label'] = point['entity'] if 'search_keywords' in point and 'search_queries' not in point: point['search_queries'] = point['search_keywords'] if 'background' in point and 'original_text' not in point: point['original_text'] = point['background'] return review_points def ai_respose_format(self, model_response): """ 将模型返回的响应格式化为标准格式 Args: model_response: AI模型返回的原始响应(可能是字符串或已解析的JSON) Returns: list: 标准格式的审查要点列表, 或 None(解析失败时) """ import re import json try: # 1. 如果model_response已经是list,直接返回 if isinstance(model_response, list): server_logger.info(f"模型响应已是list格式, 包含 {len(model_response)} 个要点") return model_response # 2. 如果是dict,包装成list返回 if isinstance(model_response, dict): server_logger.info("模型响应是dict格式, 包装为list") return [model_response] # 3. 如果是字符串,需要解析 if isinstance(model_response, str): response_text = model_response.strip() # 3.1 尝试去除 ```json 和 ``` 标记 json_pattern = r'```(?:json)?\s*\n?(.*?)\n?```' json_match = re.search(json_pattern, response_text, re.DOTALL | re.IGNORECASE) if json_match: json_str = json_match.group(1).strip() else: json_str = response_text # 3.2 去除可能的Markdown注释或多余空白 json_str = re.sub(r'\n+', '\n', json_str) json_str = json_str.strip() # 3.3 解析JSON parsed_data = json.loads(json_str) # 3.4 确保返回list格式 if isinstance(parsed_data, list): server_logger.info(f"JSON解析成功, 提取到 {len(parsed_data)} 个审查要点") return parsed_data elif isinstance(parsed_data, dict): server_logger.info("JSON解析成功, 单个要点包装为list") return [parsed_data] server_logger.warning(f"无法识别的JSON格式: {type(parsed_data)}") return None server_logger.warning(f"无法识别的响应类型: {type(model_response)}") return None except json.JSONDecodeError as e: server_logger.error(f"JSON解析失败: {e}") server_logger.error(f"原始响应: {str(model_response)[:500]}") return None except Exception as e: server_logger.error(f"响应格式化异常: {e}") server_logger.error(f"原始响应: {str(model_response)[:500]}") return None query_rewrite_manager = QueryRewriteManager()