search_options_validator.py 12 KB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183184185186187188189190191192193194195196197198199200201202203204205206207208209210211212213214215216217218219220221222223224225226227228229230231232233234235236237238239240241242243244245246247248249250251252253254255256257258259260261262263264265266267268269270271272273274275276277278279280281282283284285286287288289290291292293294295296297298299300301302303304305306307308309310311312313314315316317318319320321322323324325326327
  1. """
  2. 搜索选项验证器
  3. 提供搜索功能的模型兼容性验证和参数验证逻辑
  4. 需求: 10.1, 10.2, 10.4
  5. """
  6. import logging
  7. from typing import List, Optional, Set
  8. from sqlalchemy.orm import Session
  9. from app.schemas.llm_schema import SearchOptions
  10. from app.models.model import ModelNew, ModelCategory
  11. logger = logging.getLogger(__name__)
  12. class SearchOptionsValidator:
  13. """搜索选项验证器"""
  14. def __init__(self, db: Optional[Session] = None):
  15. """
  16. 初始化验证器
  17. Args:
  18. db: 数据库会话(可选,用于动态查询模型信息)
  19. """
  20. self.db = db
  21. # 支持联网搜索的模型列表(基于阿里云百炼文档)- 作为备用静态配置
  22. SUPPORTED_SEARCH_MODELS: Set[str] = {
  23. # 通义千问系列
  24. "qwen3-max", "qwen3-max-2025-09-23", "qwen3-max-preview",
  25. "qwen-max", "qwen-max-latest", "qwen-max-2024-09-19",
  26. "qwen-plus", "qwen-plus-latest", "qwen-plus-2025-07-14",
  27. "qwen-flash", "qwen-flash-2025-07-28",
  28. "qwen-turbo", "qwen-turbo-latest", "qwen-turbo-2025-07-15",
  29. "qwq-plus",
  30. # 第三方模型
  31. "deepseek-v3.2", "deepseek-v3.2-exp", "deepseek-v3.1",
  32. "deepseek-r1-0528", "deepseek-r1", "deepseek-v3",
  33. "Moonshot-Kimi-K2-Instruct"
  34. }
  35. # 支持时效性参数的模型(仅turbo策略)
  36. FRESHNESS_SUPPORTED_MODELS: Set[str] = {
  37. "qwen3-max", "qwen3-max-preview",
  38. "qwen-plus", "qwen-flash"
  39. }
  40. # 有效的搜索策略
  41. VALID_SEARCH_STRATEGIES: Set[str] = {"turbo", "max", "agent"}
  42. # 有效的时效性值(天数)
  43. VALID_FRESHNESS_VALUES: Set[int] = {7, 30, 180, 365}
  44. # 有效的引用格式
  45. VALID_CITATION_FORMATS: Set[str] = {"[<number>]", "[ref_<number>]"}
  46. @classmethod
  47. def validate_model_compatibility(cls, model: str, search_options: SearchOptions, db: Optional[Session] = None) -> bool:
  48. """
  49. 验证模型是否支持联网搜索功能
  50. Args:
  51. model: 模型名称
  52. search_options: 搜索选项
  53. db: 数据库会话(可选,用于动态查询)
  54. Returns:
  55. 是否支持搜索功能
  56. Raises:
  57. ValueError: 当模型不支持搜索功能时
  58. """
  59. if not search_options.enable_search:
  60. return True # 未启用搜索,无需验证
  61. # 优先从数据库查询
  62. is_supported = False
  63. if db:
  64. try:
  65. model_obj = db.query(ModelNew).filter(
  66. ModelNew.model_code == model,
  67. ModelNew.categories.any(int(ModelCategory.LLM))
  68. ).first()
  69. is_supported = model_obj.is_search if model_obj else False
  70. except Exception as e:
  71. logger.warning(f"数据库查询模型搜索支持状态失败,使用静态配置: {e}")
  72. is_supported = model in cls.SUPPORTED_SEARCH_MODELS
  73. else:
  74. # 使用静态配置作为备用
  75. is_supported = model in cls.SUPPORTED_SEARCH_MODELS
  76. if not is_supported:
  77. # 获取支持的模型列表用于错误提示
  78. supported_models = cls.get_supported_models(db) if db else sorted(cls.SUPPORTED_SEARCH_MODELS)
  79. raise ValueError(
  80. f"模型 '{model}' 不支持联网搜索功能。"
  81. f"支持的模型: {', '.join(supported_models)}"
  82. )
  83. # 验证时效性参数兼容性
  84. if (search_options.freshness is not None and
  85. search_options.search_strategy == "turbo" and
  86. model not in cls.FRESHNESS_SUPPORTED_MODELS):
  87. logger.warning(
  88. f"模型 '{model}' 不支持时效性参数,将忽略 freshness 设置"
  89. )
  90. return True
  91. @classmethod
  92. def validate_search_params(cls, search_options: SearchOptions) -> SearchOptions:
  93. """
  94. 验证和标准化搜索参数
  95. Args:
  96. search_options: 原始搜索选项
  97. Returns:
  98. 验证并标准化后的搜索选项
  99. Raises:
  100. ValueError: 当参数组合不兼容时
  101. """
  102. if not search_options.enable_search:
  103. return search_options
  104. # 时效性参数仅对turbo策略生效
  105. if (search_options.freshness is not None and
  106. search_options.search_strategy != "turbo"):
  107. logger.warning(
  108. f"时效性参数仅对 'turbo' 策略生效,当前策略: '{search_options.search_strategy}',"
  109. "将忽略 freshness 设置"
  110. )
  111. # 创建新的对象,重置freshness
  112. search_options = SearchOptions(
  113. enable_search=search_options.enable_search,
  114. search_strategy=search_options.search_strategy,
  115. forced_search=search_options.forced_search,
  116. enable_search_extension=search_options.enable_search_extension,
  117. freshness=None, # 重置为None
  118. enable_source=search_options.enable_source,
  119. enable_citation=search_options.enable_citation,
  120. citation_format=search_options.citation_format,
  121. prepend_search_result=search_options.prepend_search_result,
  122. intention_options=search_options.intention_options
  123. )
  124. # 验证引用相关参数的逻辑一致性
  125. if search_options.enable_citation and not search_options.enable_source:
  126. logger.warning(
  127. "启用角标引用时建议同时启用搜索来源 (enable_source=True)"
  128. )
  129. # 验证提前返回搜索结果的前提条件
  130. if search_options.prepend_search_result and not search_options.enable_source:
  131. raise ValueError(
  132. "启用提前返回搜索结果 (prepend_search_result=True) 时,"
  133. "必须同时启用搜索来源 (enable_source=True)"
  134. )
  135. # 验证自然语言搜索控制参数
  136. if search_options.intention_options:
  137. # 验证 prompt_intervene 参数
  138. if "prompt_intervene" in search_options.intention_options:
  139. prompt_intervene = search_options.intention_options["prompt_intervene"]
  140. if not isinstance(prompt_intervene, str) or not prompt_intervene.strip():
  141. raise ValueError("prompt_intervene 必须是非空字符串")
  142. # 验证指导语句长度(避免过长的指导语句)
  143. if len(prompt_intervene.strip()) > 500:
  144. raise ValueError("prompt_intervene 长度不能超过500个字符")
  145. # 验证不支持的字段
  146. supported_fields = {"prompt_intervene"}
  147. unsupported_fields = set(search_options.intention_options.keys()) - supported_fields
  148. if unsupported_fields:
  149. logger.warning(f"intention_options 中包含不支持的字段: {unsupported_fields}")
  150. # 创建新的对象,只保留支持的字段
  151. filtered_intention_options = {
  152. k: v for k, v in search_options.intention_options.items()
  153. if k in supported_fields
  154. }
  155. search_options = SearchOptions(
  156. enable_search=search_options.enable_search,
  157. search_strategy=search_options.search_strategy,
  158. forced_search=search_options.forced_search,
  159. enable_search_extension=search_options.enable_search_extension,
  160. freshness=search_options.freshness,
  161. enable_source=search_options.enable_source,
  162. enable_citation=search_options.enable_citation,
  163. citation_format=search_options.citation_format,
  164. prepend_search_result=search_options.prepend_search_result,
  165. intention_options=filtered_intention_options if filtered_intention_options else None
  166. )
  167. return search_options
  168. @classmethod
  169. def get_supported_models(cls, db: Optional[Session] = None) -> List[str]:
  170. """
  171. 获取支持搜索的模型列表
  172. Args:
  173. db: 数据库会话(可选,用于动态查询)
  174. Returns:
  175. 支持搜索的模型名称列表(按字母顺序排序)
  176. """
  177. # 优先从数据库查询
  178. if db:
  179. try:
  180. models = db.query(ModelNew).filter(
  181. ModelNew.categories.any(int(ModelCategory.LLM)),
  182. ModelNew.is_search == True
  183. ).all()
  184. return sorted([model.model_code for model in models])
  185. except Exception as e:
  186. logger.warning(f"数据库查询支持搜索的模型失败,使用静态配置: {e}")
  187. # 使用静态配置作为备用
  188. return sorted(cls.SUPPORTED_SEARCH_MODELS)
  189. @classmethod
  190. def get_freshness_supported_models(cls) -> List[str]:
  191. """
  192. 获取支持时效性参数的模型列表
  193. Returns:
  194. 支持时效性参数的模型名称列表(按字母顺序排序)
  195. """
  196. return sorted(cls.FRESHNESS_SUPPORTED_MODELS)
  197. @classmethod
  198. def is_search_supported(cls, model: str, db: Optional[Session] = None) -> bool:
  199. """
  200. 检查指定模型是否支持搜索功能
  201. Args:
  202. model: 模型名称
  203. db: 数据库会话(可选,用于动态查询)
  204. Returns:
  205. 是否支持搜索功能
  206. """
  207. # 优先从数据库查询
  208. if db:
  209. try:
  210. model_obj = db.query(ModelNew).filter(
  211. ModelNew.model_code == model,
  212. ModelNew.categories.any(int(ModelCategory.LLM))
  213. ).first()
  214. return model_obj.is_search if model_obj else False
  215. except Exception as e:
  216. logger.warning(f"数据库查询模型搜索支持状态失败,使用静态配置: {e}")
  217. # 使用静态配置作为备用
  218. return model in cls.SUPPORTED_SEARCH_MODELS
  219. @classmethod
  220. def is_freshness_supported(cls, model: str) -> bool:
  221. """
  222. 检查指定模型是否支持时效性参数
  223. Args:
  224. model: 模型名称
  225. Returns:
  226. 是否支持时效性参数
  227. """
  228. return model in cls.FRESHNESS_SUPPORTED_MODELS
  229. @classmethod
  230. def create_default_search_options(cls) -> SearchOptions:
  231. """
  232. 创建默认的搜索选项
  233. Returns:
  234. 默认搜索选项实例
  235. """
  236. return SearchOptions(
  237. enable_search=False,
  238. search_strategy="turbo",
  239. forced_search=False,
  240. enable_search_extension=False,
  241. freshness=None,
  242. enable_source=False,
  243. enable_citation=False,
  244. citation_format="[<number>]",
  245. prepend_search_result=False,
  246. intention_options=None
  247. )
  248. @classmethod
  249. def validate_and_normalize(cls, model: str, search_options: Optional[SearchOptions], db: Optional[Session] = None) -> SearchOptions:
  250. """
  251. 验证并标准化搜索选项的完整流程
  252. Args:
  253. model: 模型名称
  254. search_options: 搜索选项(可选)
  255. db: 数据库会话(可选,用于动态查询)
  256. Returns:
  257. 验证并标准化后的搜索选项
  258. Raises:
  259. ValueError: 当模型不支持或参数无效时
  260. """
  261. # 如果未提供搜索选项,使用默认值
  262. if search_options is None:
  263. search_options = cls.create_default_search_options()
  264. # 验证模型兼容性
  265. cls.validate_model_compatibility(model, search_options, db)
  266. # 验证和标准化参数
  267. validated_options = cls.validate_search_params(search_options)
  268. logger.info(
  269. f"搜索选项验证完成: model={model}, "
  270. f"enable_search={validated_options.enable_search}, "
  271. f"strategy={validated_options.search_strategy}"
  272. )
  273. return validated_options