| 123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183184185186187188189190191192193194195196197198199200201202203204205206207208209210211212213214215216217218219220221222223224225226227228229230231232233234235236237238239240241242243244245246247248249250251252253254255256257258259260261262263264265266267268269270271272273274275276277278279280281282283284285286287288289290291292293294295296297298299300301302303304305306307308309310311312313314315316317318319320321322323324325326327328329330331332333334335336337338339340341342343344345346347348349350351352353354355356357358359360361362363364365366367368369370371372373374375376377378379380381382383384385386387388389390391392393394395396397398399400401402403404405406407408409410411412413414415416417418419420421422423424425426427428429430431432433434435436437438439440441442443444445446447448449450451452453454455456457458459460461462463464465466467468469470471472473474475476477478479480481482483484485486487488489490491492493494495496497498499500501502503504505506507508509510511512513514515516517518519520521522523524525526527528529530531532533534535536537538539540541542543544545546547548549550551552553554555556557558559560561562563564565566567568569570571572573574575576577578579580581582583584585586587588589590591592593594595596597598599600601602603604605606607608609610611612613614615616617618619620621622623624625626627628629630631632633634635636637638639640641642643644645646647648649650651652653654655656657658659660661662663664665666667668669670671672673674675676677678679680681682683684685686687688689690691692693694695696697698699700701702703704705706707708709710711712713714715716717718719720721722723724725726727728729730731732733734 |
- """
- 通义多模态翻译客户端
- 基于阿里云 AnyTrans API (2025-07-07版本)
- 文档:https://help.aliyun.com/zh/model-studio/api-anytrans-2025-07-07
- """
- import logging
- from typing import Optional, List, Dict, Any
- from alibabacloud_anytrans20250707.client import Client as AnyTransClient
- from alibabacloud_tea_openapi import models as open_api_models
- from alibabacloud_anytrans20250707 import models as anytrans_models
- from alibabacloud_tea_util import models as util_models
- logger = logging.getLogger(__name__)
- class TongyiAnyTransClient:
- """通义多模态翻译客户端"""
-
- def __init__(self, access_key_id: str, access_key_secret: str, workspace_id: str):
- """
- 初始化客户端
-
- Args:
- access_key_id: 阿里云 AccessKey ID
- access_key_secret: 阿里云 AccessKey Secret
- workspace_id: 百炼工作空间 ID
- """
- if not all([access_key_id, access_key_secret, workspace_id]):
- raise ValueError("access_key_id, access_key_secret and workspace_id are required")
-
- self.workspace_id = workspace_id
-
- # 创建配置
- config = open_api_models.Config(
- access_key_id=access_key_id,
- access_key_secret=access_key_secret,
- endpoint='anytrans.cn-beijing.aliyuncs.com'
- )
-
- # 创建客户端
- self.client = AnyTransClient(config)
-
- logger.info("TongyiAnyTransClient initialized successfully")
-
- def text_translate(
- self,
- source_text: str,
- target_language: str,
- source_language: str = "auto",
- scene: str = "mt-turbo",
- terminologies: Optional[List[Dict[str, str]]] = None,
- domain_hint: Optional[str] = None
- ) -> Dict[str, Any]:
- """
- 文本翻译
-
- Args:
- source_text: 源文本
- target_language: 目标语言(如:en, zh, ja)
- source_language: 源语言(默认auto自动检测)
- scene: 模型类型(mt-plus专业版/mt-turbo轻量版)
- terminologies: 术语干预列表
- domain_hint: 领域提示
-
- Returns:
- 翻译结果字典
- """
- try:
- # 构建基础请求
- request = anytrans_models.TextTranslateRequest(
- workspace_id=self.workspace_id,
- source_language=source_language,
- target_language=target_language,
- text=source_text,
- scene=scene
- )
-
- # 添加扩展参数
- if terminologies or domain_hint:
- ext = anytrans_models.TextTranslateRequestExt()
-
- if terminologies:
- # 转换术语格式
- term_objects = []
- for term in terminologies:
- term_obj = anytrans_models.TextTranslateRequestExtTerminologies()
- term_obj.src = term["src"]
- term_obj.tgt = term["tgt"]
- term_objects.append(term_obj)
- ext.terminologies = term_objects
-
- if domain_hint:
- ext.domain_hint = domain_hint
-
- request.ext = ext
-
- # 直接调用
- response = self.client.text_translate(request)
-
- if response.status_code != 200:
- raise Exception(f"API调用失败: {response.status_code}")
-
- body = response.body
-
- return {
- "translated_text": body.data.translation,
- "detected_language": getattr(body.data, 'detected_language', None),
- "usage": {
- "input_tokens": body.data.usage.input_tokens if hasattr(body.data, 'usage') else 0,
- "output_tokens": body.data.usage.output_tokens if hasattr(body.data, 'usage') else 0
- }
- }
-
- except Exception as e:
- logger.error(f"文本翻译失败: {str(e)}")
- raise Exception(f"文本翻译失败: {str(e)}")
-
- def batch_translate(
- self,
- texts: List[str],
- target_language: str,
- source_language: str = "auto",
- scene: str = "mt-turbo"
- ) -> List[Dict[str, Any]]:
- """
- 批量文本翻译
-
- Args:
- texts: 文本列表(最多100条)
- target_language: 目标语言
- source_language: 源语言
- scene: 模型类型
-
- Returns:
- 翻译结果列表
- """
- try:
- request = anytrans_models.BatchTranslateRequest(
- workspace_id=self.workspace_id,
- source_language=source_language,
- target_language=target_language,
- source_texts=texts,
- scene=scene
- )
-
- runtime = util_models.RuntimeOptions()
- response = self.client.batch_translate_with_options(request, runtime)
-
- if response.status_code != 200:
- raise Exception(f"API调用失败: {response.status_code}")
-
- body = response.body
- results = []
-
- for idx, item in enumerate(body.data.translations):
- results.append({
- "index": idx,
- "source_text": texts[idx],
- "translated_text": item.translated_text,
- "status": "completed",
- "usage": {
- "input_tokens": item.usage.input_tokens if hasattr(item, 'usage') else 0,
- "output_tokens": item.usage.output_tokens if hasattr(item, 'usage') else 0
- }
- })
-
- return results
-
- except Exception as e:
- logger.error(f"批量翻译失败: {str(e)}")
- raise Exception(f"批量翻译失败: {str(e)}")
-
- def submit_long_text_task(
- self,
- source_text: str,
- target_language: str,
- source_language: str = "auto",
- scene: str = "mt-turbo"
- ) -> str:
- """
- 提交长文本翻译任务
-
- Args:
- source_text: 源文本(最大100万字符)
- target_language: 目标语言
- source_language: 源语言
- scene: 模型类型
-
- Returns:
- 任务ID
- """
- try:
- request = anytrans_models.SubmitLongTextTranslateTaskRequest(
- workspace_id=self.workspace_id,
- source_language=source_language,
- target_language=target_language,
- source_text=source_text,
- scene=scene
- )
-
- runtime = util_models.RuntimeOptions()
- response = self.client.submit_long_text_translate_task_with_options(request, runtime)
-
- if response.status_code != 200:
- raise Exception(f"API调用失败: {response.status_code}")
-
- return response.body.data.task_id
-
- except Exception as e:
- logger.error(f"提交长文本任务失败: {str(e)}")
- raise Exception(f"提交长文本任务失败: {str(e)}")
-
- def get_long_text_result(self, task_id: str) -> Dict[str, Any]:
- """
- 获取长文本翻译结果
-
- Args:
- task_id: 任务ID
-
- Returns:
- 任务结果
- """
- try:
- request = anytrans_models.GetLongTextTranslateTaskRequest(
- workspace_id=self.workspace_id,
- task_id=task_id
- )
-
- runtime = util_models.RuntimeOptions()
- response = self.client.get_long_text_translate_task_with_options(request, runtime)
-
- if response.status_code != 200:
- raise Exception(f"API调用失败: {response.status_code}")
-
- body = response.body
- data = body.data
-
- return {
- "task_id": task_id,
- "status": data.status,
- "translated_text": data.translated_text if hasattr(data, 'translated_text') else None,
- "progress": data.progress if hasattr(data, 'progress') else 0,
- "error_message": data.error_message if hasattr(data, 'error_message') else None,
- "usage": {
- "input_tokens": data.usage.input_tokens if hasattr(data, 'usage') else 0,
- "output_tokens": data.usage.output_tokens if hasattr(data, 'usage') else 0
- } if hasattr(data, 'usage') else None
- }
-
- except Exception as e:
- logger.error(f"获取长文本结果失败: {str(e)}")
- raise Exception(f"获取长文本结果失败: {str(e)}")
-
- def submit_image_task(
- self,
- image_url: str,
- target_languages: List[str],
- source_language: str = "auto",
- scene: str = "flash",
- terminologies: Optional[List[Dict[str, str]]] = None,
- domain_hint: Optional[str] = None
- ) -> str:
- """
- 提交图片翻译任务
-
- Args:
- image_url: 图片URL
- target_languages: 目标语言列表
- source_language: 源语言
- scene: 模型类型(general专业版/flash轻量版)
- terminologies: 术语干预列表
- domain_hint: 领域提示
-
- Returns:
- 任务ID
- """
- try:
- # 构建基础请求
- request = anytrans_models.SubmitImageTranslateTaskRequest(
- workspace_id=self.workspace_id,
- source_language=source_language,
- target_language=target_languages,
- text=image_url,
- scene=scene
- )
-
- # 添加扩展参数
- if terminologies or domain_hint:
- ext = anytrans_models.SubmitImageTranslateTaskRequestExt()
-
- if terminologies:
- term_objects = []
- for term in terminologies:
- term_obj = anytrans_models.SubmitImageTranslateTaskRequestExtTerminologies()
- term_obj.src = term["src"]
- term_obj.tgt = term["tgt"]
- term_objects.append(term_obj)
- ext.terminologies = term_objects
-
- if domain_hint:
- ext.domain_hint = domain_hint
-
- request.ext = ext
-
- response = self.client.submit_image_translate_task(request)
-
- if response.status_code != 200:
- raise Exception(f"API调用失败: {response.status_code}")
-
- return response.body.data.task_id
-
- except Exception as e:
- logger.error(f"提交图片翻译任务失败: {str(e)}")
- raise Exception(f"提交图片翻译任务失败: {str(e)}")
-
- def get_image_result(self, task_id: str) -> Dict[str, Any]:
- """
- 获取图片翻译结果
-
- Args:
- task_id: 任务ID
-
- Returns:
- 任务结果
- """
- try:
- request = anytrans_models.GetImageTranslateTaskRequest(
- workspace_id=self.workspace_id,
- task_id=task_id
- )
-
- response = self.client.get_image_translate_task(request)
-
- if response.status_code != 200:
- raise Exception(f"API调用失败: {response.status_code}")
-
- body = response.body
- data = body.data
-
- # 解析翻译结果
- translation = data.translation if hasattr(data, 'translation') else None
-
- result = {
- "task_id": task_id,
- "status": "completed" if translation else "processing",
- "translation": None
- }
-
- if translation:
- result["translation"] = {
- "angle": translation.angle if hasattr(translation, 'angle') else 0,
- "width": translation.width if hasattr(translation, 'width') else 0,
- "height": translation.height if hasattr(translation, 'height') else 0,
- "boxes_count": translation.boxes_count if hasattr(translation, 'boxes_count') else 0,
- "bounding_boxes": []
- }
-
- if hasattr(translation, 'bounding_boxes'):
- for box in translation.bounding_boxes:
- box_data = {
- "text": box.text if hasattr(box, 'text') else "",
- "translation": box.translation if hasattr(box, 'translation') else {},
- "confidence": box.confidence if hasattr(box, 'confidence') else 0,
- "up_left": {"x": box.up_left.x, "y": box.up_left.y} if hasattr(box, 'up_left') else None,
- "up_right": {"x": box.up_right.x, "y": box.up_right.y} if hasattr(box, 'up_right') else None,
- "down_left": {"x": box.down_left.x, "y": box.down_left.y} if hasattr(box, 'down_left') else None,
- "down_right": {"x": box.down_right.x, "y": box.down_right.y} if hasattr(box, 'down_right') else None
- }
- result["translation"]["bounding_boxes"].append(box_data)
-
- return result
-
- except Exception as e:
- logger.error(f"获取图片翻译结果失败: {str(e)}")
- raise Exception(f"获取图片翻译结果失败: {str(e)}")
-
- def submit_doc_task(
- self,
- doc_url: str,
- target_language: str,
- source_language: str = "auto",
- scene: str = "mt-turbo",
- terminologies: Optional[List[Dict[str, str]]] = None,
- domain_hint: Optional[str] = None,
- skip_img_trans: bool = False
- ) -> str:
- """
- 提交文档翻译任务
-
- Args:
- doc_url: 文档URL
- target_language: 目标语言
- source_language: 源语言
- scene: 模型类型
- terminologies: 术语干预列表
- domain_hint: 领域提示
- skip_img_trans: 是否跳过PDF中的图片翻译
-
- Returns:
- 任务ID
- """
- try:
- # 构建基础请求
- request = anytrans_models.SubmitDocTranslateTaskRequest(
- workspace_id=self.workspace_id,
- source_language=source_language,
- target_language=target_language,
- text=doc_url,
- scene=scene
- )
-
- # 添加扩展参数
- if terminologies or domain_hint or skip_img_trans:
- ext = anytrans_models.SubmitDocTranslateTaskRequestExt()
-
- if terminologies:
- term_objects = []
- for term in terminologies:
- term_obj = anytrans_models.SubmitDocTranslateTaskRequestExtTerminologies()
- term_obj.src = term["src"]
- term_obj.tgt = term["tgt"]
- term_objects.append(term_obj)
- ext.terminologies = term_objects
-
- if domain_hint:
- ext.domain_hint = domain_hint
-
- # 配置对象
- config = anytrans_models.SubmitDocTranslateTaskRequestExtConfig()
- config.skip_img_trans = skip_img_trans
- ext.config = config
-
- request.ext = ext
-
- response = self.client.submit_doc_translate_task(request)
-
- if response.status_code != 200:
- raise Exception(f"API调用失败: {response.status_code}")
-
- return response.body.data.task_id
-
- except Exception as e:
- logger.error(f"提交文档翻译任务失败: {str(e)}")
- raise Exception(f"提交文档翻译任务失败: {str(e)}")
-
- def get_doc_result(self, task_id: str) -> Dict[str, Any]:
- """
- 获取文档翻译结果
-
- Args:
- task_id: 任务ID
-
- Returns:
- 任务结果
- """
- try:
- request = anytrans_models.GetDocTranslateTaskRequest(
- workspace_id=self.workspace_id,
- task_id=task_id
- )
-
- response = self.client.get_doc_translate_task(request)
-
- if response.status_code != 200:
- raise Exception(f"API调用失败: {response.status_code}")
-
- body = response.body
- data = body.data
-
- result = {
- "task_id": task_id,
- "status": data.status if hasattr(data, 'status') else "processing"
- }
-
- # 如果翻译完成,添加结果信息
- if hasattr(data, 'translate_file_url') and data.translate_file_url:
- result["translate_file_url"] = data.translate_file_url
- result["characters_count"] = data.characters_count if hasattr(data, 'characters_count') else 0
- result["page_count"] = data.page_count if hasattr(data, 'page_count') else 0
-
- return result
-
- except Exception as e:
- logger.error(f"获取文档翻译结果失败: {str(e)}")
- raise Exception(f"获取文档翻译结果失败: {str(e)}")
-
- def submit_html_task(
- self,
- html_content: str,
- target_language: str,
- source_language: str = "auto",
- scene: str = "mt-turbo",
- terminologies: Optional[List[Dict[str, str]]] = None,
- domain_hint: Optional[str] = None
- ) -> str:
- """
- 提交HTML翻译任务
-
- Args:
- html_content: HTML内容
- target_language: 目标语言
- source_language: 源语言
- scene: 模型类型
- terminologies: 术语干预列表
- domain_hint: 领域提示
-
- Returns:
- 任务ID
- """
- try:
- # 构建基础请求
- request = anytrans_models.SubmitHtmlTranslateTaskRequest(
- workspace_id=self.workspace_id,
- source_language=source_language,
- target_language=target_language,
- text=html_content,
- scene=scene
- )
-
- # 添加扩展参数
- if terminologies or domain_hint:
- ext = anytrans_models.SubmitHtmlTranslateTaskRequestExt()
-
- if terminologies:
- term_objects = []
- for term in terminologies:
- term_obj = anytrans_models.SubmitHtmlTranslateTaskRequestExtTerminologies()
- term_obj.src = term["src"]
- term_obj.tgt = term["tgt"]
- term_objects.append(term_obj)
- ext.terminologies = term_objects
-
- if domain_hint:
- ext.domain_hint = domain_hint
-
- request.ext = ext
-
- response = self.client.submit_html_translate_task(request)
-
- if response.status_code != 200:
- raise Exception(f"API调用失败: {response.status_code}")
-
- return response.body.data.task_id
-
- except Exception as e:
- logger.error(f"提交HTML任务失败: {str(e)}")
- raise Exception(f"提交HTML任务失败: {str(e)}")
-
- def get_html_result(self, task_id: str) -> Dict[str, Any]:
- """
- 获取HTML翻译结果
-
- Args:
- task_id: 任务ID
-
- Returns:
- 任务结果
- """
- try:
- request = anytrans_models.GetHtmlTranslateTaskRequest(
- workspace_id=self.workspace_id,
- task_id=task_id
- )
-
- response = self.client.get_html_translate_task(request)
-
- if response.status_code != 200:
- raise Exception(f"API调用失败: {response.status_code}")
-
- body = response.body
-
- # 检查是否有data字段
- if not hasattr(body, 'data'):
- logger.warning(f"HTML翻译任务 {task_id} 暂无结果数据")
- return {
- "task_id": task_id,
- "status": "processing"
- }
-
- data = body.data
-
- # 检查是否有translation字段(翻译完成)
- if hasattr(data, 'translation') and data.translation:
- result = {
- "task_id": task_id,
- "status": "completed",
- "translated_html": data.translation
- }
-
- # 添加token使用统计
- if hasattr(data, 'usage'):
- result["usage"] = {
- "input_tokens": data.usage.input_tokens if hasattr(data.usage, 'input_tokens') else 0,
- "output_tokens": data.usage.output_tokens if hasattr(data.usage, 'output_tokens') else 0,
- "total_tokens": data.usage.total_tokens if hasattr(data.usage, 'total_tokens') else 0
- }
-
- return result
- else:
- # 任务还在处理中
- return {
- "task_id": task_id,
- "status": "processing"
- }
-
- except Exception as e:
- error_msg = str(e)
- logger.error(f"获取HTML结果失败: {error_msg}")
-
- # 如果是服务内部异常,可能是任务还在处理中
- if "Server.Internal.Error" in error_msg or "服务内部异常" in error_msg:
- logger.info(f"HTML翻译任务 {task_id} 可能还在处理中")
- return {
- "task_id": task_id,
- "status": "processing"
- }
-
- raise Exception(f"获取HTML结果失败: {error_msg}")
-
- def term_query(
- self,
- source_language: str,
- target_language: str,
- scene: str = "mt-turbo",
- text: Optional[str] = None
- ) -> List[Dict[str, str]]:
- """
- 查询术语库
-
- Args:
- source_language: 源语言
- target_language: 目标语言
- scene: 模型类型
- text: 包含术语的句子(可选)
-
- Returns:
- 术语列表
- """
- try:
- request = anytrans_models.TermQueryRequest(
- workspace_id=self.workspace_id,
- source_language=source_language,
- target_language=target_language,
- scene=scene,
- text=text
- )
-
- runtime = util_models.RuntimeOptions()
- response = self.client.term_query_with_options(request, runtime)
-
- if response.status_code != 200:
- raise Exception(f"API调用失败: {response.status_code}")
-
- body = response.body
- terms = []
-
- if hasattr(body.data, 'terms'):
- for term in body.data.terms:
- terms.append({
- "term_id": term.term_id,
- "src": term.src,
- "tgt": term.tgt
- })
-
- return terms
-
- except Exception as e:
- logger.error(f"查询术语失败: {str(e)}")
- raise Exception(f"查询术语失败: {str(e)}")
-
- def term_edit(
- self,
- action: str,
- source_language: str,
- target_language: str,
- scene: str,
- terms: List[Dict[str, str]]
- ) -> List[Dict[str, str]]:
- """
- 编辑术语库
-
- Args:
- action: 操作类型(ADD/DELETE/MODIFY)
- source_language: 源语言
- target_language: 目标语言
- scene: 模型类型
- terms: 术语列表 [{"src": "源文本", "tgt": "目标文本", "term_id": "术语ID"}]
-
- Returns:
- 操作后的术语列表
- """
- try:
- # 构建术语对象
- term_objects = []
- for term in terms:
- term_obj = anytrans_models.TermEditRequestTerms()
- term_obj.src = term["src"]
- term_obj.tgt = term["tgt"]
- if "term_id" in term:
- term_obj.term_id = term["term_id"]
- term_objects.append(term_obj)
-
- request = anytrans_models.TermEditRequest(
- workspace_id=self.workspace_id,
- action=action,
- source_language=source_language,
- target_language=target_language,
- scene=scene,
- terms=term_objects
- )
-
- runtime = util_models.RuntimeOptions()
- response = self.client.term_edit_with_options(request, runtime)
-
- if response.status_code != 200:
- raise Exception(f"API调用失败: {response.status_code}")
-
- body = response.body
- result_terms = []
-
- if hasattr(body.data, 'terms'):
- for term in body.data.terms:
- result_terms.append({
- "term_id": term.term_id,
- "src": term.src,
- "tgt": term.tgt
- })
-
- return result_terms
-
- except Exception as e:
- logger.error(f"编辑术语失败: {str(e)}")
- raise Exception(f"编辑术语失败: {str(e)}")
|