llm_service.py 34 KB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183184185186187188189190191192193194195196197198199200201202203204205206207208209210211212213214215216217218219220221222223224225226227228229230231232233234235236237238239240241242243244245246247248249250251252253254255256257258259260261262263264265266267268269270271272273274275276277278279280281282283284285286287288289290291292293294295296297298299300301302303304305306307308309310311312313314315316317318319320321322323324325326327328329330331332333334335336337338339340341342343344345346347348349350351352353354355356357358359360361362363364365366367368369370371372373374375376377378379380381382383384385386387388389390391392393394395396397398399400401402403404405406407408409410411412413414415416417418419420421422423424425426427428429430431432433434435436437438439440441442443444445446447448449450451452453454455456457458459460461462463464465466467468469470471472473474475476477478479480481482483484485486487488489490491492493494495496497498499500501502503504505506507508509510511512513514515516517518519520521522523524525526527528529530531532533534535536537538539540541542543544545546547548549550551552553554555556557558559560561562563564565566567568569570571572573574575576577578579580581582583584585586587588589590591592593594595596597598599600601602603604605606607608609610611612613614615616617618619620621622623624625626627628629630631632633634635636637638639640641642643644645646647648649650651652653654655656657658659660661662663664665666667668669670671672673674675676677678679680681682683684685686687688689690691692693694695696697698699700701702703704705706707708709710711712713714715716717718719720721722723724725726727728729730731732733734735736737738739740741742743744745746747748749750751752753754755756757758759760761762763764765766767768769770771772773774775776777778779780781782783784785786787788789790791792793794
  1. """
  2. LLM服务层
  3. 提供LLM对话的业务逻辑处理,集成会话和消息管理
  4. 需求: 1.4, 2.2, 2.3, 3.1, 3.2
  5. 支持: 流式输出、Token消耗输出、思考模式、图像输入、联网搜索
  6. 搜索需求: 1.1, 1.3, 1.4, 8.1, 8.4, 8.5
  7. """
  8. import json
  9. import logging
  10. from decimal import Decimal
  11. from typing import AsyncGenerator, List, Optional, Union
  12. from sqlalchemy.orm import Session
  13. from sqlalchemy import or_
  14. from fastapi import HTTPException
  15. from app.models.model import ModelNew, ModelCategory
  16. from app.schemas.llm_schema import (
  17. ChatRequest, ChatResponse, StreamChunk, UsageInfo,
  18. EnhancedChatRequest, EnhancedChatResponse, EnhancedStreamChunk,
  19. SearchOptions, SearchResult
  20. )
  21. from app.schemas.model_schema import ModelResponse
  22. from app.services.dashscope_client import DashScopeClient
  23. from app.services.search_options_validator import SearchOptionsValidator
  24. from app.services.citation_formatter import CitationFormatter
  25. from app.services.vertical_domain_processor import VerticalDomainProcessor
  26. from app.services.stream_search_state import StreamSearchStateManager
  27. logger = logging.getLogger(__name__)
  28. class LLMService:
  29. """LLM业务服务类"""
  30. def __init__(self, db: Session, api_key: str = None, user_id: str = None):
  31. """
  32. 初始化LLM服务
  33. Args:
  34. db: 数据库会话
  35. api_key: 用户的API密钥(从用户数据动态加载)
  36. user_id: 用户ID(用于会话管理)
  37. """
  38. self.db = db
  39. self.api_key = api_key
  40. self.user_id = user_id
  41. self.client = DashScopeClient(api_key) if api_key else None
  42. self.search_validator = SearchOptionsValidator(db)
  43. self.citation_formatter = CitationFormatter()
  44. self.vertical_processor = VerticalDomainProcessor()
  45. def validate_model(self, model_name: str) -> bool:
  46. """
  47. 验证模型是否为有效的LLM或多模态模型(category=0 or 1),
  48. 排除 OCR 专用模型。
  49. """
  50. OCR_ONLY_MODELS = {'qwen-vl-ocr', 'qwen-vl-ocr-latest'}
  51. if model_name in OCR_ONLY_MODELS:
  52. return False
  53. model = self.db.query(ModelNew).filter(
  54. ModelNew.model_code == model_name,
  55. or_(
  56. ModelNew.categories.any(int(ModelCategory.LLM)),
  57. ModelNew.categories.any(int(ModelCategory.MULTIMODAL))
  58. )
  59. ).first()
  60. return model is not None
  61. def get_model_by_title(self, model_title: str) -> Optional[ModelNew]:
  62. return self.db.query(ModelNew).filter(ModelNew.model_code == model_title).first()
  63. def _convert_messages(self, messages) -> List[dict]:
  64. """
  65. 将请求消息转换为API调用格式
  66. 支持纯文本和多模态内容
  67. """
  68. result = []
  69. for msg in messages:
  70. content = msg.content
  71. if isinstance(content, str):
  72. result.append({"role": msg.role, "content": content})
  73. elif isinstance(content, list):
  74. # 多模态内容
  75. converted_content = []
  76. for item in content:
  77. if hasattr(item, 'type'):
  78. if item.type == 'text':
  79. converted_content.append({"type": "text", "text": item.text})
  80. elif item.type == 'image_url':
  81. converted_content.append({"type": "image_url", "image_url": item.image_url})
  82. elif isinstance(item, dict):
  83. converted_content.append(item)
  84. result.append({"role": msg.role, "content": converted_content})
  85. else:
  86. result.append({"role": msg.role, "content": str(content)})
  87. return result
  88. def _get_text_content(self, content) -> str:
  89. """从消息内容中提取文本"""
  90. if isinstance(content, str):
  91. return content
  92. elif isinstance(content, list):
  93. texts = []
  94. for item in content:
  95. if hasattr(item, 'type') and item.type == 'text':
  96. texts.append(item.text)
  97. elif isinstance(item, dict) and item.get('type') == 'text':
  98. texts.append(item.get('text', ''))
  99. return ' '.join(texts)
  100. return str(content)
  101. def chat(
  102. self,
  103. request: ChatRequest,
  104. conversation_id: Optional[int] = None
  105. ) -> ChatResponse:
  106. """
  107. 非流式对话
  108. Args:
  109. request: 对话请求
  110. conversation_id: 会话ID(可选,如果提供则记录消息)
  111. Returns:
  112. 对话响应
  113. """
  114. if not self.client:
  115. raise HTTPException(status_code=403, detail="未配置API密钥,请在用户设置中配置apikey")
  116. if not self.validate_model(request.model):
  117. raise HTTPException(status_code=400, detail=f"无效的模型名称或模型类型不匹配: {request.model}")
  118. messages = self._convert_messages(request.messages)
  119. response = self.client.call(
  120. model=request.model,
  121. messages=messages,
  122. temperature=request.temperature,
  123. top_p=request.top_p,
  124. max_tokens=request.max_tokens,
  125. enable_thinking=request.enable_thinking,
  126. thinking_budget=request.thinking_budget
  127. )
  128. if response.status_code != 200:
  129. raise HTTPException(status_code=502, detail=f"百炼平台返回错误: {response.message}")
  130. choice = response.output.choices[0]
  131. usage = response.usage
  132. # 提取思考内容(安全获取,qwen3 等模型可能返回字典格式)
  133. try:
  134. reasoning_content = choice.message.get('reasoning_content', '') if hasattr(choice.message, 'get') else getattr(choice.message, 'reasoning_content', None)
  135. except Exception:
  136. reasoning_content = None
  137. # 提取文本内容(处理多模态/思考模式返回的列表格式)
  138. msg_content = choice.message.content
  139. if isinstance(msg_content, list):
  140. content_text = ''.join(
  141. item.get('text', '') if isinstance(item, dict) else str(item)
  142. for item in msg_content
  143. )
  144. elif isinstance(msg_content, dict):
  145. content_text = msg_content.get('text', str(msg_content))
  146. else:
  147. content_text = msg_content or ""
  148. return ChatResponse(
  149. content=content_text,
  150. finish_reason=choice.finish_reason,
  151. usage=UsageInfo(
  152. input_tokens=usage.input_tokens,
  153. output_tokens=usage.output_tokens,
  154. total_tokens=usage.total_tokens
  155. ),
  156. reasoning_content=reasoning_content
  157. )
  158. async def chat_stream(
  159. self,
  160. request: ChatRequest,
  161. conversation_id: Optional[int] = None
  162. ) -> AsyncGenerator[str, None]:
  163. """
  164. 流式对话,返回SSE事件流
  165. Args:
  166. request: 对话请求
  167. conversation_id: 会话ID(可选,如果提供则记录消息)
  168. Yields:
  169. SSE格式的响应块
  170. """
  171. if not self.client:
  172. error_data = {"error": "未配置API密钥,请在用户设置中配置apikey"}
  173. yield f"data: {json.dumps(error_data, ensure_ascii=False)}\n\n"
  174. yield "data: [DONE]\n\n"
  175. return
  176. if not self.validate_model(request.model):
  177. error_data = {"error": f"无效的模型名称或模型类型不匹配: {request.model}"}
  178. yield f"data: {json.dumps(error_data, ensure_ascii=False)}\n\n"
  179. yield "data: [DONE]\n\n"
  180. return
  181. messages = self._convert_messages(request.messages)
  182. # 用于收集完整响应内容
  183. full_content = ""
  184. full_reasoning_content = ""
  185. final_usage = None
  186. try:
  187. for response in self.client.call_stream(
  188. model=request.model,
  189. messages=messages,
  190. temperature=request.temperature,
  191. top_p=request.top_p,
  192. max_tokens=request.max_tokens,
  193. enable_thinking=request.enable_thinking,
  194. thinking_budget=request.thinking_budget
  195. ):
  196. if response.status_code != 200:
  197. error_msg = getattr(response, 'message', str(response))
  198. logger.error(f"DashScope流式调用错误: status={response.status_code}, message={error_msg}")
  199. error_data = {"error": error_msg}
  200. yield f"data: {json.dumps(error_data, ensure_ascii=False)}\n\n"
  201. break
  202. choice = response.output.choices[0]
  203. # 获取内容(处理多模态响应)
  204. msg_content = choice.message.content
  205. if isinstance(msg_content, list):
  206. # 多模态响应,提取文本
  207. text_parts = []
  208. for item in msg_content:
  209. if isinstance(item, dict) and 'text' in item:
  210. text_parts.append(item['text'])
  211. content_text = ''.join(text_parts)
  212. else:
  213. content_text = msg_content or ""
  214. # 累积内容
  215. if content_text:
  216. full_content += content_text
  217. # 获取思考内容(增量)- 安全获取,字段可能不存在
  218. reasoning_content = ""
  219. try:
  220. reasoning_content = choice.message.get('reasoning_content', '') if hasattr(choice.message, 'get') else getattr(choice.message, 'reasoning_content', '') or ''
  221. except (KeyError, AttributeError):
  222. pass
  223. if reasoning_content:
  224. full_reasoning_content += reasoning_content
  225. chunk = StreamChunk(
  226. content=content_text,
  227. finish_reason=choice.finish_reason if choice.finish_reason else None,
  228. reasoning_content=reasoning_content if reasoning_content else None
  229. )
  230. if choice.finish_reason:
  231. usage = response.usage
  232. final_usage = usage
  233. chunk.usage = UsageInfo(
  234. input_tokens=usage.input_tokens,
  235. output_tokens=usage.output_tokens,
  236. total_tokens=usage.total_tokens
  237. )
  238. yield f"data: {chunk.model_dump_json()}\n\n"
  239. # 流式完成后记录 token 统计
  240. if conversation_id and self.user_id and final_usage:
  241. pass # conversation tracking removed
  242. except Exception as e:
  243. logger.error(f"DashScope流式调用异常: {type(e).__name__}: {str(e)}")
  244. error_data = {"error": f"调用失败: {str(e)}"}
  245. yield f"data: {json.dumps(error_data, ensure_ascii=False)}\n\n"
  246. yield "data: [DONE]\n\n"
  247. def chat_with_search(
  248. self,
  249. request: EnhancedChatRequest,
  250. conversation_id: Optional[int] = None
  251. ) -> EnhancedChatResponse:
  252. """
  253. 支持搜索的非流式对话
  254. Args:
  255. request: 增强的对话请求(包含搜索选项)
  256. conversation_id: 会话ID(可选,如果提供则记录消息)
  257. Returns:
  258. 增强的对话响应(包含搜索信息)
  259. """
  260. if not self.client:
  261. raise HTTPException(status_code=403, detail="未配置API密钥,请在用户设置中配置apikey")
  262. if not self.validate_model(request.model):
  263. raise HTTPException(status_code=400, detail=f"无效的模型名称或模型类型不匹配: {request.model}")
  264. # 验证和标准化搜索选项
  265. validated_search_options = None
  266. if request.search_options:
  267. try:
  268. validated_search_options = self.search_validator.validate_and_normalize(
  269. request.model, request.search_options, self.db
  270. )
  271. except ValueError as e:
  272. raise HTTPException(status_code=400, detail=f"搜索选项验证失败: {str(e)}")
  273. messages = self._convert_messages(request.messages)
  274. try:
  275. response = self.client.call(
  276. model=request.model,
  277. messages=messages,
  278. temperature=request.temperature,
  279. top_p=request.top_p,
  280. max_tokens=request.max_tokens,
  281. enable_thinking=request.enable_thinking,
  282. thinking_budget=request.thinking_budget,
  283. search_options=validated_search_options
  284. )
  285. if response.status_code != 200:
  286. # 搜索失败时的降级处理
  287. if validated_search_options and validated_search_options.enable_search:
  288. logger.warning(f"搜索功能调用失败,降级到普通对话: {response.message}")
  289. # 重试不带搜索的调用
  290. response = self.client.call(
  291. model=request.model,
  292. messages=messages,
  293. temperature=request.temperature,
  294. top_p=request.top_p,
  295. max_tokens=request.max_tokens,
  296. enable_thinking=request.enable_thinking,
  297. thinking_budget=request.thinking_budget
  298. )
  299. if response.status_code != 200:
  300. raise HTTPException(status_code=502, detail=f"百炼平台返回错误: {response.message}")
  301. else:
  302. raise HTTPException(status_code=502, detail=f"百炼平台返回错误: {response.message}")
  303. choice = response.output.choices[0]
  304. usage = response.usage
  305. # 提取思考内容
  306. reasoning_content = getattr(choice.message, 'reasoning_content', None)
  307. # 提取搜索信息
  308. search_info = self.client._extract_search_info(response)
  309. search_results = []
  310. # 处理搜索结果和引用格式化
  311. content = choice.message.content
  312. if search_info and validated_search_options:
  313. search_results = self.citation_formatter.extract_search_results(search_info)
  314. # 处理垂直领域搜索结果
  315. if validated_search_options.enable_search_extension and VerticalDomainProcessor.is_vertical_domain_result(search_info):
  316. structured_data = search_info.get("vertical_structured_data")
  317. content = VerticalDomainProcessor.format_vertical_domain_response(
  318. content, search_info, structured_data
  319. )
  320. # 格式化引用和来源
  321. if validated_search_options.enable_citation or validated_search_options.enable_source:
  322. content, search_results = self.citation_formatter.format_content_with_citations_and_sources(
  323. content=content,
  324. search_info=search_info,
  325. enable_citation=validated_search_options.enable_citation,
  326. citation_format=validated_search_options.citation_format,
  327. enable_source=validated_search_options.enable_source
  328. )
  329. return EnhancedChatResponse(
  330. content=content,
  331. finish_reason=choice.finish_reason,
  332. usage=UsageInfo(
  333. input_tokens=usage.input_tokens,
  334. output_tokens=usage.output_tokens,
  335. total_tokens=usage.total_tokens
  336. ),
  337. reasoning_content=reasoning_content,
  338. search_info=search_info,
  339. search_results=search_results
  340. )
  341. except HTTPException:
  342. raise
  343. except Exception as e:
  344. logger.error(f"搜索增强对话调用异常: {type(e).__name__}: {str(e)}")
  345. # 错误处理和降级逻辑
  346. if validated_search_options and validated_search_options.enable_search:
  347. logger.warning("搜索功能异常,尝试降级到普通对话")
  348. try:
  349. # 降级到普通对话
  350. fallback_request = ChatRequest(
  351. model=request.model,
  352. messages=request.messages,
  353. stream=False,
  354. temperature=request.temperature,
  355. top_p=request.top_p,
  356. max_tokens=request.max_tokens,
  357. conversation_id=conversation_id,
  358. enable_thinking=request.enable_thinking,
  359. thinking_budget=request.thinking_budget
  360. )
  361. fallback_response = self.chat(fallback_request, conversation_id)
  362. # 转换为增强响应格式
  363. return EnhancedChatResponse(
  364. content=fallback_response.content,
  365. finish_reason=fallback_response.finish_reason,
  366. usage=fallback_response.usage,
  367. reasoning_content=fallback_response.reasoning_content,
  368. search_info=None,
  369. search_results=[]
  370. )
  371. except Exception as fallback_error:
  372. logger.error(f"降级对话也失败: {fallback_error}")
  373. raise HTTPException(status_code=500, detail=f"对话服务异常: {str(e)}")
  374. else:
  375. raise HTTPException(status_code=500, detail=f"对话服务异常: {str(e)}")
  376. async def chat_stream_with_search(
  377. self,
  378. request: EnhancedChatRequest,
  379. conversation_id: Optional[int] = None
  380. ) -> AsyncGenerator[str, None]:
  381. """
  382. 支持搜索的流式对话,返回SSE事件流
  383. Args:
  384. request: 增强的对话请求(包含搜索选项)
  385. conversation_id: 会话ID(可选,如果提供则记录消息)
  386. Yields:
  387. SSE格式的增强响应块
  388. """
  389. if not self.client:
  390. error_data = {"error": "未配置API密钥,请在用户设置中配置apikey"}
  391. yield f"data: {json.dumps(error_data, ensure_ascii=False)}\n\n"
  392. yield "data: [DONE]\n\n"
  393. return
  394. if not self.validate_model(request.model):
  395. error_data = {"error": f"无效的模型名称或模型类型不匹配: {request.model}"}
  396. yield f"data: {json.dumps(error_data, ensure_ascii=False)}\n\n"
  397. yield "data: [DONE]\n\n"
  398. return
  399. # 验证和标准化搜索选项
  400. validated_search_options = None
  401. if request.search_options:
  402. try:
  403. validated_search_options = self.search_validator.validate_and_normalize(
  404. request.model, request.search_options, self.db
  405. )
  406. except ValueError as e:
  407. error_data = {"error": f"搜索选项验证失败: {str(e)}"}
  408. yield f"data: {json.dumps(error_data, ensure_ascii=False)}\n\n"
  409. yield "data: [DONE]\n\n"
  410. return
  411. messages = self._convert_messages(request.messages)
  412. # 初始化流式搜索状态管理器
  413. state_manager = StreamSearchStateManager(validated_search_options)
  414. try:
  415. for response in self.client.call_stream(
  416. model=request.model,
  417. messages=messages,
  418. temperature=request.temperature,
  419. top_p=request.top_p,
  420. max_tokens=request.max_tokens,
  421. enable_thinking=request.enable_thinking,
  422. thinking_budget=request.thinking_budget,
  423. search_options=validated_search_options
  424. ):
  425. if response.status_code != 200:
  426. error_msg = getattr(response, 'message', str(response))
  427. logger.error(f"DashScope流式调用错误: status={response.status_code}, message={error_msg}")
  428. # 记录错误到状态管理器
  429. state_manager.handle_error(error_msg)
  430. # 搜索失败时的降级处理
  431. if validated_search_options and validated_search_options.enable_search:
  432. logger.warning("搜索功能流式调用失败,尝试降级到普通流式对话")
  433. try:
  434. # 降级到普通流式对话
  435. fallback_request = ChatRequest(
  436. model=request.model,
  437. messages=request.messages,
  438. stream=True,
  439. temperature=request.temperature,
  440. top_p=request.top_p,
  441. max_tokens=request.max_tokens,
  442. conversation_id=conversation_id,
  443. enable_thinking=request.enable_thinking,
  444. thinking_budget=request.thinking_budget
  445. )
  446. async for chunk in self.chat_stream(fallback_request, conversation_id):
  447. yield chunk
  448. return
  449. except Exception as fallback_error:
  450. logger.error(f"降级流式对话也失败: {fallback_error}")
  451. error_data = {"error": error_msg}
  452. yield f"data: {json.dumps(error_data, ensure_ascii=False)}\n\n"
  453. break
  454. choice = response.output.choices[0]
  455. # 获取内容(处理多模态响应)
  456. msg_content = choice.message.content
  457. if isinstance(msg_content, list):
  458. # 多模态响应,提取文本
  459. text_parts = []
  460. for item in msg_content:
  461. if isinstance(item, dict) and 'text' in item:
  462. text_parts.append(item['text'])
  463. content_text = ''.join(text_parts)
  464. else:
  465. content_text = msg_content or ""
  466. # 获取思考内容(增量)
  467. reasoning_content = ""
  468. try:
  469. reasoning_content = choice.message.get('reasoning_content', '') if hasattr(choice.message, 'get') else getattr(choice.message, 'reasoning_content', '') or ''
  470. except (KeyError, AttributeError):
  471. pass
  472. # 提取搜索信息(如果存在)
  473. current_search_info = self.client._extract_search_info(response)
  474. current_search_results = []
  475. if current_search_info:
  476. current_search_results = self.citation_formatter.extract_search_results(current_search_info)
  477. # 使用状态管理器处理响应块
  478. state_info = state_manager.process_response_chunk(
  479. response=response,
  480. content=content_text,
  481. reasoning_content=reasoning_content,
  482. search_info=current_search_info,
  483. search_results=current_search_results
  484. )
  485. # 构建增强的流式响应块
  486. chunk = EnhancedStreamChunk(
  487. content=content_text,
  488. finish_reason=choice.finish_reason if choice.finish_reason else None,
  489. reasoning_content=reasoning_content if reasoning_content else None
  490. )
  491. # 如果应该提前返回搜索结果
  492. if state_info['should_prepend']:
  493. chunk.search_info = state_info['search_info']
  494. chunk.search_results = state_info['search_results']
  495. state_manager.mark_search_results_sent()
  496. logger.info(f"提前返回搜索结果: {len(state_info['search_results'])} 个结果")
  497. # 如果是最后一个块且应该返回搜索结果
  498. if choice.finish_reason and state_info['should_append']:
  499. chunk.search_info = state_info['search_info']
  500. chunk.search_results = state_info['search_results']
  501. logger.info(f"在最后块中返回搜索结果: {len(state_info['search_results'])} 个结果")
  502. # 添加使用统计(如果是最后一个块)
  503. if state_info['is_completed'] and state_info['final_usage']:
  504. usage_data = state_info['final_usage']
  505. chunk.usage = UsageInfo(
  506. input_tokens=usage_data['input_tokens'],
  507. output_tokens=usage_data['output_tokens'],
  508. total_tokens=usage_data['total_tokens']
  509. )
  510. yield f"data: {chunk.model_dump_json()}\n\n"
  511. # 流式完成后记录消息
  512. if conversation_id and self.user_id:
  513. final_state = state_manager.get_final_state()
  514. if final_state.final_usage:
  515. model_obj = self.get_model_by_title(request.model)
  516. model_id = model_obj.id if model_obj else None
  517. # 处理搜索结果和引用格式化
  518. final_content = final_state.full_content
  519. final_search_results = final_state.search_results
  520. if final_state.search_info and validated_search_options:
  521. # 处理垂直领域搜索结果
  522. if (validated_search_options.enable_search_extension and
  523. VerticalDomainProcessor.is_vertical_domain_result(final_state.search_info)):
  524. structured_data = final_state.search_info.get("vertical_structured_data")
  525. final_content = VerticalDomainProcessor.format_vertical_domain_response(
  526. final_content, final_state.search_info, structured_data
  527. )
  528. # 格式化引用和来源
  529. if validated_search_options.enable_citation or validated_search_options.enable_source:
  530. final_content, final_search_results = self.citation_formatter.format_content_with_citations_and_sources(
  531. content=final_state.full_content,
  532. search_info=final_state.search_info,
  533. enable_citation=validated_search_options.enable_citation,
  534. citation_format=validated_search_options.citation_format,
  535. enable_source=validated_search_options.enable_source
  536. )
  537. # 流式完成后完成(conversation tracking removed)
  538. except Exception as e:
  539. logger.error(f"DashScope搜索增强流式调用异常: {type(e).__name__}: {str(e)}")
  540. # 错误处理和降级逻辑
  541. if validated_search_options and validated_search_options.enable_search:
  542. logger.warning("搜索功能流式调用异常,尝试降级到普通流式对话")
  543. try:
  544. # 降级到普通流式对话
  545. fallback_request = ChatRequest(
  546. model=request.model,
  547. messages=request.messages,
  548. stream=True,
  549. temperature=request.temperature,
  550. top_p=request.top_p,
  551. max_tokens=request.max_tokens,
  552. conversation_id=conversation_id,
  553. enable_thinking=request.enable_thinking,
  554. thinking_budget=request.thinking_budget
  555. )
  556. async for chunk in self.chat_stream(fallback_request, conversation_id):
  557. yield chunk
  558. return
  559. except Exception as fallback_error:
  560. logger.error(f"降级流式对话也失败: {fallback_error}")
  561. error_data = {"error": f"调用失败: {str(e)}"}
  562. yield f"data: {json.dumps(error_data, ensure_ascii=False)}\n\n"
  563. yield "data: [DONE]\n\n"
  564. def get_llm_models(self) -> List[ModelResponse]:
  565. """获取所有LLM和多模态模型(均可用于对话)
  566. 排除 OCR 专用模型(qwen-vl-ocr 系列),这类模型必须传图片,
  567. 不适合作为通用对话模型使用。
  568. """
  569. # OCR 专用模型不适合出现在对话列表
  570. OCR_ONLY_MODELS = {'qwen-vl-ocr', 'qwen-vl-ocr-latest'}
  571. models = self.db.query(ModelNew).filter(
  572. or_(
  573. ModelNew.categories.any(int(ModelCategory.LLM)),
  574. ModelNew.categories.any(int(ModelCategory.MULTIMODAL))
  575. ),
  576. ModelNew.is_show_enabled == True,
  577. ~ModelNew.model_code.in_(OCR_ONLY_MODELS)
  578. ).all()
  579. result = []
  580. for m in models:
  581. resp = ModelResponse.model_validate(m)
  582. resp.title = m.model_code
  583. resp.name = m.display_name or m.model_code
  584. resp.keyword = m.keywords
  585. result.append(resp)
  586. return result
  587. def get_search_supported_models(self) -> List[str]:
  588. models = self.db.query(ModelNew).filter(
  589. or_(
  590. ModelNew.categories.any(int(ModelCategory.LLM)),
  591. ModelNew.categories.any(int(ModelCategory.MULTIMODAL))
  592. ),
  593. ModelNew.is_search == True,
  594. ModelNew.is_show_enabled == True
  595. ).all()
  596. return sorted([model.model_code for model in models])
  597. def get_thinking_supported_models(self) -> List[str]:
  598. models = self.db.query(ModelNew).filter(
  599. or_(
  600. ModelNew.categories.any(int(ModelCategory.LLM)),
  601. ModelNew.categories.any(int(ModelCategory.MULTIMODAL))
  602. ),
  603. ModelNew.is_thinking == True,
  604. ModelNew.is_show_enabled == True
  605. ).all()
  606. return sorted([model.model_code for model in models])
  607. def is_search_supported(self, model: str) -> bool:
  608. model_obj = self.db.query(ModelNew).filter(
  609. ModelNew.model_code == model,
  610. or_(
  611. ModelNew.categories.any(int(ModelCategory.LLM)),
  612. ModelNew.categories.any(int(ModelCategory.MULTIMODAL))
  613. )
  614. ).first()
  615. return model_obj.is_search if model_obj else False
  616. def is_thinking_supported(self, model: str) -> bool:
  617. model_obj = self.db.query(ModelNew).filter(
  618. ModelNew.model_code == model,
  619. or_(
  620. ModelNew.categories.any(int(ModelCategory.LLM)),
  621. ModelNew.categories.any(int(ModelCategory.MULTIMODAL))
  622. )
  623. ).first()
  624. return model_obj.is_thinking if model_obj else False
  625. def validate_search_options(self, model: str, search_options: Optional[SearchOptions]) -> SearchOptions:
  626. """
  627. 验证并标准化搜索选项
  628. Args:
  629. model: 模型名称
  630. search_options: 搜索选项
  631. Returns:
  632. 验证并标准化后的搜索选项
  633. Raises:
  634. ValueError: 当模型不支持或参数无效时
  635. """
  636. return self.search_validator.validate_and_normalize(model, search_options, self.db)
  637. def create_search_fallback_response(
  638. self,
  639. original_request: EnhancedChatRequest,
  640. error_message: str,
  641. conversation_id: Optional[int] = None
  642. ) -> EnhancedChatResponse:
  643. """
  644. 创建搜索功能降级响应
  645. Args:
  646. original_request: 原始请求
  647. error_message: 错误信息
  648. conversation_id: 会话ID
  649. Returns:
  650. 降级后的响应
  651. """
  652. try:
  653. # 创建不带搜索的请求
  654. fallback_request = ChatRequest(
  655. model=original_request.model,
  656. messages=original_request.messages,
  657. stream=False,
  658. temperature=original_request.temperature,
  659. top_p=original_request.top_p,
  660. max_tokens=original_request.max_tokens,
  661. conversation_id=conversation_id,
  662. enable_thinking=original_request.enable_thinking,
  663. thinking_budget=original_request.thinking_budget
  664. )
  665. # 调用普通对话
  666. fallback_response = self.chat(fallback_request, conversation_id)
  667. # 在响应内容中添加搜索功能不可用的说明
  668. content_with_notice = (
  669. f"{fallback_response.content}\n\n"
  670. f"*注意:由于搜索功能暂时不可用({error_message}),"
  671. f"以上回复基于模型训练数据,可能不包含最新信息。*"
  672. )
  673. # 转换为增强响应格式
  674. return EnhancedChatResponse(
  675. content=content_with_notice,
  676. finish_reason=fallback_response.finish_reason,
  677. usage=fallback_response.usage,
  678. reasoning_content=fallback_response.reasoning_content,
  679. search_info=None,
  680. search_results=[]
  681. )
  682. except Exception as e:
  683. logger.error(f"创建搜索降级响应失败: {e}")
  684. raise HTTPException(
  685. status_code=500,
  686. detail=f"搜索功能不可用且降级失败: {error_message}"
  687. )