|
|
@@ -34,7 +34,7 @@ class GenerateModelClient:
|
|
|
# 保存model_handler引用,用于动态获取模型
|
|
|
self.model_handler = model_handler
|
|
|
|
|
|
- async def _retry_with_backoff(self, func: Callable, *args, timeout: Optional[int] = None, **kwargs):
|
|
|
+ async def _retry_with_backoff(self, func: Callable, *args, timeout: Optional[int] = None, trace_id: Optional[str] = None, model_name: Optional[str] = None, **kwargs):
|
|
|
"""
|
|
|
带指数退避的重试机制,每次重试都有独立的超时控制
|
|
|
|
|
|
@@ -42,6 +42,7 @@ class GenerateModelClient:
|
|
|
避免在服务端过载时继续加重负载。
|
|
|
"""
|
|
|
current_timeout = timeout or self.default_timeout
|
|
|
+ model_info = model_name or "default"
|
|
|
|
|
|
def _is_server_unavailable_error(error: Exception) -> bool:
|
|
|
"""判断是否为服务端不可用错误(应立即失败)"""
|
|
|
@@ -57,28 +58,28 @@ class GenerateModelClient:
|
|
|
func(*args, **kwargs),
|
|
|
timeout=current_timeout
|
|
|
)
|
|
|
- except asyncio.TimeoutError:
|
|
|
+ except asyncio.TimeoutError as e:
|
|
|
if attempt == self.max_retries:
|
|
|
- logger.error(f"[模型调用] 达到最大重试次数 {self.max_retries},最终超时")
|
|
|
+ logger.error(f"[模型调用] 达到最大重试次数 {self.max_retries},最终超时 | trace_id: {trace_id}, model: {model_info}, timeout: {current_timeout}s, error_type: {type(e).__name__}, error_msg: {str(e)}")
|
|
|
raise TimeoutError(f"模型调用在 {self.max_retries} 次重试后均超时")
|
|
|
|
|
|
wait_time = self.backoff_factor * (2 ** attempt)
|
|
|
- logger.warning(f"[模型调用] 第 {attempt + 1} 次超时, {wait_time}秒后重试...")
|
|
|
+ logger.warning(f"[模型调用] 第 {attempt + 1} 次超时, {wait_time}秒后重试... | trace_id: {trace_id}, model: {model_info}, timeout: {current_timeout}s, error_type: {type(e).__name__}, error_msg: {str(e)}")
|
|
|
await asyncio.sleep(wait_time)
|
|
|
except Exception as e:
|
|
|
error_str = str(e)
|
|
|
|
|
|
# 服务端不可用错误(502/503/504)立即失败,不重试
|
|
|
if _is_server_unavailable_error(e):
|
|
|
- logger.error(f"[模型调用] 服务端不可用,立即失败: {error_str}")
|
|
|
+ logger.error(f"[模型调用] 服务端不可用,立即失败: {error_str} | trace_id: {trace_id}, model: {model_info}")
|
|
|
raise
|
|
|
|
|
|
if attempt == self.max_retries:
|
|
|
- logger.error(f"[模型调用] 达到最大重试次数 {self.max_retries},最终失败: {error_str}")
|
|
|
+ logger.error(f"[模型调用] 达到最大重试次数 {self.max_retries},最终失败: {error_str} | trace_id: {trace_id}, model: {model_info}")
|
|
|
raise
|
|
|
|
|
|
wait_time = self.backoff_factor * (2 ** attempt)
|
|
|
- logger.warning(f"[模型调用] 第 {attempt + 1} 次尝试失败: {error_str}, {wait_time}秒后重试...")
|
|
|
+ logger.warning(f"[模型调用] 第 {attempt + 1} 次尝试失败: {error_str}, {wait_time}秒后重试... | trace_id: {trace_id}, model: {model_info}")
|
|
|
await asyncio.sleep(wait_time)
|
|
|
|
|
|
async def get_model_generate_invoke(
|
|
|
@@ -159,7 +160,7 @@ class GenerateModelClient:
|
|
|
return await llm_to_use.ainvoke(final_messages)
|
|
|
|
|
|
# 调用带重试机制
|
|
|
- response = await self._retry_with_backoff(_invoke, timeout=current_timeout)
|
|
|
+ response = await self._retry_with_backoff(_invoke, timeout=current_timeout, trace_id=trace_id, model_name=model_name or "default")
|
|
|
|
|
|
elapsed_time = time.time() - start_time
|
|
|
logger.info(f"[模型调用] 成功 trace_id: {trace_id}, 耗时: {elapsed_time:.2f}s")
|