| 123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183184185186187188189190191192193194195196197198199200201202203204205206207208209210211212213214215216217218219220221222223224225226227228229230231232233234235236237238239240241242243244245246247248249250251252253254255256257258259260261262263264265266267268269270271272273274275276277278279280281282283284285286287288289290291292293294295296297298299300301302303304305306307308309310311312313314315316317318319320321322323324325326327 |
- """
- 搜索选项验证器
- 提供搜索功能的模型兼容性验证和参数验证逻辑
- 需求: 10.1, 10.2, 10.4
- """
- import logging
- from typing import List, Optional, Set
- from sqlalchemy.orm import Session
- from app.schemas.llm_schema import SearchOptions
- from app.models.model import ModelNew, ModelCategory
- logger = logging.getLogger(__name__)
- class SearchOptionsValidator:
- """搜索选项验证器"""
-
- def __init__(self, db: Optional[Session] = None):
- """
- 初始化验证器
-
- Args:
- db: 数据库会话(可选,用于动态查询模型信息)
- """
- self.db = db
-
- # 支持联网搜索的模型列表(基于阿里云百炼文档)- 作为备用静态配置
- SUPPORTED_SEARCH_MODELS: Set[str] = {
- # 通义千问系列
- "qwen3-max", "qwen3-max-2025-09-23", "qwen3-max-preview",
- "qwen-max", "qwen-max-latest", "qwen-max-2024-09-19",
- "qwen-plus", "qwen-plus-latest", "qwen-plus-2025-07-14",
- "qwen-flash", "qwen-flash-2025-07-28",
- "qwen-turbo", "qwen-turbo-latest", "qwen-turbo-2025-07-15",
- "qwq-plus",
-
- # 第三方模型
- "deepseek-v3.2", "deepseek-v3.2-exp", "deepseek-v3.1",
- "deepseek-r1-0528", "deepseek-r1", "deepseek-v3",
- "Moonshot-Kimi-K2-Instruct"
- }
-
- # 支持时效性参数的模型(仅turbo策略)
- FRESHNESS_SUPPORTED_MODELS: Set[str] = {
- "qwen3-max", "qwen3-max-preview",
- "qwen-plus", "qwen-flash"
- }
-
- # 有效的搜索策略
- VALID_SEARCH_STRATEGIES: Set[str] = {"turbo", "max", "agent"}
-
- # 有效的时效性值(天数)
- VALID_FRESHNESS_VALUES: Set[int] = {7, 30, 180, 365}
-
- # 有效的引用格式
- VALID_CITATION_FORMATS: Set[str] = {"[<number>]", "[ref_<number>]"}
-
- @classmethod
- def validate_model_compatibility(cls, model: str, search_options: SearchOptions, db: Optional[Session] = None) -> bool:
- """
- 验证模型是否支持联网搜索功能
-
- Args:
- model: 模型名称
- search_options: 搜索选项
- db: 数据库会话(可选,用于动态查询)
-
- Returns:
- 是否支持搜索功能
-
- Raises:
- ValueError: 当模型不支持搜索功能时
- """
- if not search_options.enable_search:
- return True # 未启用搜索,无需验证
-
- # 优先从数据库查询
- is_supported = False
- if db:
- try:
- model_obj = db.query(ModelNew).filter(
- ModelNew.model_code == model,
- ModelNew.categories.any(int(ModelCategory.LLM))
- ).first()
- is_supported = model_obj.is_search if model_obj else False
- except Exception as e:
- logger.warning(f"数据库查询模型搜索支持状态失败,使用静态配置: {e}")
- is_supported = model in cls.SUPPORTED_SEARCH_MODELS
- else:
- # 使用静态配置作为备用
- is_supported = model in cls.SUPPORTED_SEARCH_MODELS
-
- if not is_supported:
- # 获取支持的模型列表用于错误提示
- supported_models = cls.get_supported_models(db) if db else sorted(cls.SUPPORTED_SEARCH_MODELS)
- raise ValueError(
- f"模型 '{model}' 不支持联网搜索功能。"
- f"支持的模型: {', '.join(supported_models)}"
- )
-
- # 验证时效性参数兼容性
- if (search_options.freshness is not None and
- search_options.search_strategy == "turbo" and
- model not in cls.FRESHNESS_SUPPORTED_MODELS):
- logger.warning(
- f"模型 '{model}' 不支持时效性参数,将忽略 freshness 设置"
- )
-
- return True
-
- @classmethod
- def validate_search_params(cls, search_options: SearchOptions) -> SearchOptions:
- """
- 验证和标准化搜索参数
-
- Args:
- search_options: 原始搜索选项
-
- Returns:
- 验证并标准化后的搜索选项
-
- Raises:
- ValueError: 当参数组合不兼容时
- """
- if not search_options.enable_search:
- return search_options
-
- # 时效性参数仅对turbo策略生效
- if (search_options.freshness is not None and
- search_options.search_strategy != "turbo"):
- logger.warning(
- f"时效性参数仅对 'turbo' 策略生效,当前策略: '{search_options.search_strategy}',"
- "将忽略 freshness 设置"
- )
- # 创建新的对象,重置freshness
- search_options = SearchOptions(
- enable_search=search_options.enable_search,
- search_strategy=search_options.search_strategy,
- forced_search=search_options.forced_search,
- enable_search_extension=search_options.enable_search_extension,
- freshness=None, # 重置为None
- enable_source=search_options.enable_source,
- enable_citation=search_options.enable_citation,
- citation_format=search_options.citation_format,
- prepend_search_result=search_options.prepend_search_result,
- intention_options=search_options.intention_options
- )
-
- # 验证引用相关参数的逻辑一致性
- if search_options.enable_citation and not search_options.enable_source:
- logger.warning(
- "启用角标引用时建议同时启用搜索来源 (enable_source=True)"
- )
-
- # 验证提前返回搜索结果的前提条件
- if search_options.prepend_search_result and not search_options.enable_source:
- raise ValueError(
- "启用提前返回搜索结果 (prepend_search_result=True) 时,"
- "必须同时启用搜索来源 (enable_source=True)"
- )
-
- # 验证自然语言搜索控制参数
- if search_options.intention_options:
- # 验证 prompt_intervene 参数
- if "prompt_intervene" in search_options.intention_options:
- prompt_intervene = search_options.intention_options["prompt_intervene"]
- if not isinstance(prompt_intervene, str) or not prompt_intervene.strip():
- raise ValueError("prompt_intervene 必须是非空字符串")
-
- # 验证指导语句长度(避免过长的指导语句)
- if len(prompt_intervene.strip()) > 500:
- raise ValueError("prompt_intervene 长度不能超过500个字符")
-
- # 验证不支持的字段
- supported_fields = {"prompt_intervene"}
- unsupported_fields = set(search_options.intention_options.keys()) - supported_fields
- if unsupported_fields:
- logger.warning(f"intention_options 中包含不支持的字段: {unsupported_fields}")
- # 创建新的对象,只保留支持的字段
- filtered_intention_options = {
- k: v for k, v in search_options.intention_options.items()
- if k in supported_fields
- }
- search_options = SearchOptions(
- enable_search=search_options.enable_search,
- search_strategy=search_options.search_strategy,
- forced_search=search_options.forced_search,
- enable_search_extension=search_options.enable_search_extension,
- freshness=search_options.freshness,
- enable_source=search_options.enable_source,
- enable_citation=search_options.enable_citation,
- citation_format=search_options.citation_format,
- prepend_search_result=search_options.prepend_search_result,
- intention_options=filtered_intention_options if filtered_intention_options else None
- )
-
- return search_options
-
- @classmethod
- def get_supported_models(cls, db: Optional[Session] = None) -> List[str]:
- """
- 获取支持搜索的模型列表
-
- Args:
- db: 数据库会话(可选,用于动态查询)
-
- Returns:
- 支持搜索的模型名称列表(按字母顺序排序)
- """
- # 优先从数据库查询
- if db:
- try:
- models = db.query(ModelNew).filter(
- ModelNew.categories.any(int(ModelCategory.LLM)),
- ModelNew.is_search == True
- ).all()
- return sorted([model.model_code for model in models])
- except Exception as e:
- logger.warning(f"数据库查询支持搜索的模型失败,使用静态配置: {e}")
-
- # 使用静态配置作为备用
- return sorted(cls.SUPPORTED_SEARCH_MODELS)
-
- @classmethod
- def get_freshness_supported_models(cls) -> List[str]:
- """
- 获取支持时效性参数的模型列表
-
- Returns:
- 支持时效性参数的模型名称列表(按字母顺序排序)
- """
- return sorted(cls.FRESHNESS_SUPPORTED_MODELS)
-
- @classmethod
- def is_search_supported(cls, model: str, db: Optional[Session] = None) -> bool:
- """
- 检查指定模型是否支持搜索功能
-
- Args:
- model: 模型名称
- db: 数据库会话(可选,用于动态查询)
-
- Returns:
- 是否支持搜索功能
- """
- # 优先从数据库查询
- if db:
- try:
- model_obj = db.query(ModelNew).filter(
- ModelNew.model_code == model,
- ModelNew.categories.any(int(ModelCategory.LLM))
- ).first()
- return model_obj.is_search if model_obj else False
- except Exception as e:
- logger.warning(f"数据库查询模型搜索支持状态失败,使用静态配置: {e}")
-
- # 使用静态配置作为备用
- return model in cls.SUPPORTED_SEARCH_MODELS
-
- @classmethod
- def is_freshness_supported(cls, model: str) -> bool:
- """
- 检查指定模型是否支持时效性参数
-
- Args:
- model: 模型名称
-
- Returns:
- 是否支持时效性参数
- """
- return model in cls.FRESHNESS_SUPPORTED_MODELS
-
- @classmethod
- def create_default_search_options(cls) -> SearchOptions:
- """
- 创建默认的搜索选项
-
- Returns:
- 默认搜索选项实例
- """
- return SearchOptions(
- enable_search=False,
- search_strategy="turbo",
- forced_search=False,
- enable_search_extension=False,
- freshness=None,
- enable_source=False,
- enable_citation=False,
- citation_format="[<number>]",
- prepend_search_result=False,
- intention_options=None
- )
-
- @classmethod
- def validate_and_normalize(cls, model: str, search_options: Optional[SearchOptions], db: Optional[Session] = None) -> SearchOptions:
- """
- 验证并标准化搜索选项的完整流程
-
- Args:
- model: 模型名称
- search_options: 搜索选项(可选)
- db: 数据库会话(可选,用于动态查询)
-
- Returns:
- 验证并标准化后的搜索选项
-
- Raises:
- ValueError: 当模型不支持或参数无效时
- """
- # 如果未提供搜索选项,使用默认值
- if search_options is None:
- search_options = cls.create_default_search_options()
-
- # 验证模型兼容性
- cls.validate_model_compatibility(model, search_options, db)
-
- # 验证和标准化参数
- validated_options = cls.validate_search_params(search_options)
-
- logger.info(
- f"搜索选项验证完成: model={model}, "
- f"enable_search={validated_options.enable_search}, "
- f"strategy={validated_options.search_strategy}"
- )
-
- return validated_options
|