""" 搜索选项验证器 提供搜索功能的模型兼容性验证和参数验证逻辑 需求: 10.1, 10.2, 10.4 """ import logging from typing import List, Optional, Set from sqlalchemy.orm import Session from app.schemas.llm_schema import SearchOptions from app.models.model import ModelNew, ModelCategory logger = logging.getLogger(__name__) class SearchOptionsValidator: """搜索选项验证器""" def __init__(self, db: Optional[Session] = None): """ 初始化验证器 Args: db: 数据库会话(可选,用于动态查询模型信息) """ self.db = db # 支持联网搜索的模型列表(基于阿里云百炼文档)- 作为备用静态配置 SUPPORTED_SEARCH_MODELS: Set[str] = { # 通义千问系列 "qwen3-max", "qwen3-max-2025-09-23", "qwen3-max-preview", "qwen-max", "qwen-max-latest", "qwen-max-2024-09-19", "qwen-plus", "qwen-plus-latest", "qwen-plus-2025-07-14", "qwen-flash", "qwen-flash-2025-07-28", "qwen-turbo", "qwen-turbo-latest", "qwen-turbo-2025-07-15", "qwq-plus", # 第三方模型 "deepseek-v3.2", "deepseek-v3.2-exp", "deepseek-v3.1", "deepseek-r1-0528", "deepseek-r1", "deepseek-v3", "Moonshot-Kimi-K2-Instruct" } # 支持时效性参数的模型(仅turbo策略) FRESHNESS_SUPPORTED_MODELS: Set[str] = { "qwen3-max", "qwen3-max-preview", "qwen-plus", "qwen-flash" } # 有效的搜索策略 VALID_SEARCH_STRATEGIES: Set[str] = {"turbo", "max", "agent"} # 有效的时效性值(天数) VALID_FRESHNESS_VALUES: Set[int] = {7, 30, 180, 365} # 有效的引用格式 VALID_CITATION_FORMATS: Set[str] = {"[]", "[ref_]"} @classmethod def validate_model_compatibility(cls, model: str, search_options: SearchOptions, db: Optional[Session] = None) -> bool: """ 验证模型是否支持联网搜索功能 Args: model: 模型名称 search_options: 搜索选项 db: 数据库会话(可选,用于动态查询) Returns: 是否支持搜索功能 Raises: ValueError: 当模型不支持搜索功能时 """ if not search_options.enable_search: return True # 未启用搜索,无需验证 # 优先从数据库查询 is_supported = False if db: try: model_obj = db.query(ModelNew).filter( ModelNew.model_code == model, ModelNew.categories.any(int(ModelCategory.LLM)) ).first() is_supported = model_obj.is_search if model_obj else False except Exception as e: logger.warning(f"数据库查询模型搜索支持状态失败,使用静态配置: {e}") is_supported = model in cls.SUPPORTED_SEARCH_MODELS else: # 使用静态配置作为备用 is_supported = model in cls.SUPPORTED_SEARCH_MODELS if not is_supported: # 获取支持的模型列表用于错误提示 supported_models = cls.get_supported_models(db) if db else sorted(cls.SUPPORTED_SEARCH_MODELS) raise ValueError( f"模型 '{model}' 不支持联网搜索功能。" f"支持的模型: {', '.join(supported_models)}" ) # 验证时效性参数兼容性 if (search_options.freshness is not None and search_options.search_strategy == "turbo" and model not in cls.FRESHNESS_SUPPORTED_MODELS): logger.warning( f"模型 '{model}' 不支持时效性参数,将忽略 freshness 设置" ) return True @classmethod def validate_search_params(cls, search_options: SearchOptions) -> SearchOptions: """ 验证和标准化搜索参数 Args: search_options: 原始搜索选项 Returns: 验证并标准化后的搜索选项 Raises: ValueError: 当参数组合不兼容时 """ if not search_options.enable_search: return search_options # 时效性参数仅对turbo策略生效 if (search_options.freshness is not None and search_options.search_strategy != "turbo"): logger.warning( f"时效性参数仅对 'turbo' 策略生效,当前策略: '{search_options.search_strategy}'," "将忽略 freshness 设置" ) # 创建新的对象,重置freshness search_options = SearchOptions( enable_search=search_options.enable_search, search_strategy=search_options.search_strategy, forced_search=search_options.forced_search, enable_search_extension=search_options.enable_search_extension, freshness=None, # 重置为None enable_source=search_options.enable_source, enable_citation=search_options.enable_citation, citation_format=search_options.citation_format, prepend_search_result=search_options.prepend_search_result, intention_options=search_options.intention_options ) # 验证引用相关参数的逻辑一致性 if search_options.enable_citation and not search_options.enable_source: logger.warning( "启用角标引用时建议同时启用搜索来源 (enable_source=True)" ) # 验证提前返回搜索结果的前提条件 if search_options.prepend_search_result and not search_options.enable_source: raise ValueError( "启用提前返回搜索结果 (prepend_search_result=True) 时," "必须同时启用搜索来源 (enable_source=True)" ) # 验证自然语言搜索控制参数 if search_options.intention_options: # 验证 prompt_intervene 参数 if "prompt_intervene" in search_options.intention_options: prompt_intervene = search_options.intention_options["prompt_intervene"] if not isinstance(prompt_intervene, str) or not prompt_intervene.strip(): raise ValueError("prompt_intervene 必须是非空字符串") # 验证指导语句长度(避免过长的指导语句) if len(prompt_intervene.strip()) > 500: raise ValueError("prompt_intervene 长度不能超过500个字符") # 验证不支持的字段 supported_fields = {"prompt_intervene"} unsupported_fields = set(search_options.intention_options.keys()) - supported_fields if unsupported_fields: logger.warning(f"intention_options 中包含不支持的字段: {unsupported_fields}") # 创建新的对象,只保留支持的字段 filtered_intention_options = { k: v for k, v in search_options.intention_options.items() if k in supported_fields } search_options = SearchOptions( enable_search=search_options.enable_search, search_strategy=search_options.search_strategy, forced_search=search_options.forced_search, enable_search_extension=search_options.enable_search_extension, freshness=search_options.freshness, enable_source=search_options.enable_source, enable_citation=search_options.enable_citation, citation_format=search_options.citation_format, prepend_search_result=search_options.prepend_search_result, intention_options=filtered_intention_options if filtered_intention_options else None ) return search_options @classmethod def get_supported_models(cls, db: Optional[Session] = None) -> List[str]: """ 获取支持搜索的模型列表 Args: db: 数据库会话(可选,用于动态查询) Returns: 支持搜索的模型名称列表(按字母顺序排序) """ # 优先从数据库查询 if db: try: models = db.query(ModelNew).filter( ModelNew.categories.any(int(ModelCategory.LLM)), ModelNew.is_search == True ).all() return sorted([model.model_code for model in models]) except Exception as e: logger.warning(f"数据库查询支持搜索的模型失败,使用静态配置: {e}") # 使用静态配置作为备用 return sorted(cls.SUPPORTED_SEARCH_MODELS) @classmethod def get_freshness_supported_models(cls) -> List[str]: """ 获取支持时效性参数的模型列表 Returns: 支持时效性参数的模型名称列表(按字母顺序排序) """ return sorted(cls.FRESHNESS_SUPPORTED_MODELS) @classmethod def is_search_supported(cls, model: str, db: Optional[Session] = None) -> bool: """ 检查指定模型是否支持搜索功能 Args: model: 模型名称 db: 数据库会话(可选,用于动态查询) Returns: 是否支持搜索功能 """ # 优先从数据库查询 if db: try: model_obj = db.query(ModelNew).filter( ModelNew.model_code == model, ModelNew.categories.any(int(ModelCategory.LLM)) ).first() return model_obj.is_search if model_obj else False except Exception as e: logger.warning(f"数据库查询模型搜索支持状态失败,使用静态配置: {e}") # 使用静态配置作为备用 return model in cls.SUPPORTED_SEARCH_MODELS @classmethod def is_freshness_supported(cls, model: str) -> bool: """ 检查指定模型是否支持时效性参数 Args: model: 模型名称 Returns: 是否支持时效性参数 """ return model in cls.FRESHNESS_SUPPORTED_MODELS @classmethod def create_default_search_options(cls) -> SearchOptions: """ 创建默认的搜索选项 Returns: 默认搜索选项实例 """ return SearchOptions( enable_search=False, search_strategy="turbo", forced_search=False, enable_search_extension=False, freshness=None, enable_source=False, enable_citation=False, citation_format="[]", prepend_search_result=False, intention_options=None ) @classmethod def validate_and_normalize(cls, model: str, search_options: Optional[SearchOptions], db: Optional[Session] = None) -> SearchOptions: """ 验证并标准化搜索选项的完整流程 Args: model: 模型名称 search_options: 搜索选项(可选) db: 数据库会话(可选,用于动态查询) Returns: 验证并标准化后的搜索选项 Raises: ValueError: 当模型不支持或参数无效时 """ # 如果未提供搜索选项,使用默认值 if search_options is None: search_options = cls.create_default_search_options() # 验证模型兼容性 cls.validate_model_compatibility(model, search_options, db) # 验证和标准化参数 validated_options = cls.validate_search_params(search_options) logger.info( f"搜索选项验证完成: model={model}, " f"enable_search={validated_options.enable_search}, " f"strategy={validated_options.search_strategy}" ) return validated_options