translation_memory_service.py 11 KB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183184185186187188189190191192193194195196197198199200201202203204205206207208209210211212213214215216217218219220221222223224225226227228229230231232233234235236237238239240241242243244245246247248249250251252253254255256257258259260261262263264265266267268269270271272273274275276277278279280281282283284285286287288289290291292293294295296297298299300301302303304305306307308309310311312313314315316317318319320321322323324325326327328329330331332333334335336337338339340341342343344345346347348349350351352
  1. """
  2. 翻译记忆服务
  3. 处理翻译记忆的增删改查和相似句子匹配
  4. """
  5. import logging
  6. from datetime import datetime
  7. from typing import Optional, List
  8. from sqlalchemy.orm import Session
  9. from sqlalchemy import and_, func
  10. from app.models.translation import TranslationMemory
  11. from app.schemas.translation_schema import (
  12. MemoryCreate,
  13. MemoryResponse,
  14. MemoryListResponse,
  15. MemoryMatch,
  16. MemoryMatchResponse
  17. )
  18. logger = logging.getLogger(__name__)
  19. class TranslationMemoryService:
  20. """翻译记忆服务类"""
  21. def __init__(self, db: Session):
  22. """
  23. 初始化服务
  24. Args:
  25. db: 数据库会话
  26. """
  27. self.db = db
  28. def add_memory(
  29. self,
  30. user_id: str,
  31. memory: MemoryCreate,
  32. source_type: str = "manual",
  33. task_id: Optional[str] = None
  34. ) -> MemoryResponse:
  35. """
  36. 添加翻译记忆
  37. Args:
  38. user_id: 用户ID
  39. memory: 翻译记忆创建请求
  40. source_type: 来源类型
  41. task_id: 关联的任务ID
  42. Returns:
  43. 翻译记忆响应
  44. """
  45. try:
  46. # 检查是否已存在相同的记忆
  47. existing = self.db.query(TranslationMemory).filter(
  48. and_(
  49. TranslationMemory.user_id == user_id,
  50. TranslationMemory.source_lang == memory.source_lang,
  51. TranslationMemory.target_lang == memory.target_lang,
  52. TranslationMemory.source_text == memory.source_text,
  53. TranslationMemory.is_deleted == False
  54. )
  55. ).first()
  56. if existing:
  57. # 更新已存在的记忆
  58. existing.target_text = memory.target_text
  59. existing.updated_at = datetime.now()
  60. self.db.commit()
  61. self.db.refresh(existing)
  62. logger.info(f"更新翻译记忆: user_id={user_id}, id={existing.id}")
  63. return MemoryResponse(
  64. id=existing.id,
  65. source_lang=existing.source_lang,
  66. target_lang=existing.target_lang,
  67. source_text=existing.source_text,
  68. target_text=existing.target_text,
  69. source_type=existing.source_type,
  70. usage_count=existing.usage_count,
  71. created_at=existing.created_at.strftime("%Y-%m-%d %H:%M:%S")
  72. )
  73. # 创建新记忆
  74. new_memory = TranslationMemory(
  75. user_id=user_id,
  76. source_lang=memory.source_lang,
  77. target_lang=memory.target_lang,
  78. source_text=memory.source_text,
  79. target_text=memory.target_text,
  80. source_type=source_type,
  81. task_id=task_id
  82. )
  83. self.db.add(new_memory)
  84. self.db.commit()
  85. self.db.refresh(new_memory)
  86. logger.info(f"添加翻译记忆成功: user_id={user_id}, id={new_memory.id}")
  87. return MemoryResponse(
  88. id=new_memory.id,
  89. source_lang=new_memory.source_lang,
  90. target_lang=new_memory.target_lang,
  91. source_text=new_memory.source_text,
  92. target_text=new_memory.target_text,
  93. source_type=new_memory.source_type,
  94. usage_count=new_memory.usage_count,
  95. created_at=new_memory.created_at.strftime("%Y-%m-%d %H:%M:%S")
  96. )
  97. except Exception as e:
  98. logger.error(f"添加翻译记忆失败: {str(e)}")
  99. self.db.rollback()
  100. raise Exception(f"添加翻译记忆失败: {str(e)}")
  101. def get_user_memories(
  102. self,
  103. user_id: str,
  104. source_lang: Optional[str] = None,
  105. target_lang: Optional[str] = None,
  106. keyword: Optional[str] = None
  107. ) -> MemoryListResponse:
  108. """
  109. 查询用户翻译记忆
  110. Args:
  111. user_id: 用户ID
  112. source_lang: 源语言筛选
  113. target_lang: 目标语言筛选
  114. keyword: 关键词搜索
  115. Returns:
  116. 翻译记忆列表响应
  117. """
  118. query = self.db.query(TranslationMemory).filter(
  119. TranslationMemory.user_id == user_id,
  120. TranslationMemory.is_deleted == False
  121. )
  122. # 应用筛选条件
  123. if source_lang:
  124. query = query.filter(TranslationMemory.source_lang == source_lang)
  125. if target_lang:
  126. query = query.filter(TranslationMemory.target_lang == target_lang)
  127. if keyword:
  128. query = query.filter(
  129. (TranslationMemory.source_text.ilike(f"%{keyword}%")) |
  130. (TranslationMemory.target_text.ilike(f"%{keyword}%"))
  131. )
  132. # 排序(按使用次数和创建时间)
  133. query = query.order_by(
  134. TranslationMemory.usage_count.desc(),
  135. TranslationMemory.created_at.desc()
  136. )
  137. items = query.all()
  138. # 构建响应
  139. memory_items = [
  140. MemoryResponse(
  141. id=item.id,
  142. source_lang=item.source_lang,
  143. target_lang=item.target_lang,
  144. source_text=item.source_text,
  145. target_text=item.target_text,
  146. source_type=item.source_type,
  147. usage_count=item.usage_count,
  148. created_at=item.created_at.strftime("%Y-%m-%d %H:%M:%S")
  149. )
  150. for item in items
  151. ]
  152. return MemoryListResponse(items=memory_items)
  153. def delete_memory(
  154. self,
  155. user_id: str,
  156. memory_id: int
  157. ) -> bool:
  158. """
  159. 删除翻译记忆(软删除)
  160. Args:
  161. user_id: 用户ID
  162. memory_id: 记忆ID
  163. Returns:
  164. 是否删除成功
  165. """
  166. try:
  167. memory = self.db.query(TranslationMemory).filter(
  168. TranslationMemory.id == memory_id,
  169. TranslationMemory.user_id == user_id,
  170. TranslationMemory.is_deleted == False
  171. ).first()
  172. if not memory:
  173. logger.warning(f"翻译记忆不存在或已删除: memory_id={memory_id}, user_id={user_id}")
  174. return False
  175. # 软删除
  176. memory.is_deleted = True
  177. memory.deleted_at = datetime.now()
  178. self.db.commit()
  179. logger.info(f"软删除翻译记忆成功: memory_id={memory_id}, user_id={user_id}")
  180. return True
  181. except Exception as e:
  182. logger.error(f"软删除翻译记忆失败: {str(e)}")
  183. self.db.rollback()
  184. return False
  185. def find_similar(
  186. self,
  187. user_id: str,
  188. source_text: str,
  189. source_lang: str,
  190. target_lang: str,
  191. threshold: float = 0.8,
  192. limit: int = 5
  193. ) -> MemoryMatchResponse:
  194. """
  195. 查找相似翻译
  196. 使用简单的文本相似度算法(基于字符串包含关系)
  197. Args:
  198. user_id: 用户ID
  199. source_text: 源文本
  200. source_lang: 源语言
  201. target_lang: 目标语言
  202. threshold: 相似度阈值
  203. limit: 返回结果数量限制
  204. Returns:
  205. 相似句子匹配响应
  206. """
  207. try:
  208. # 查询相同语言对的翻译记忆
  209. memories = self.db.query(TranslationMemory).filter(
  210. TranslationMemory.user_id == user_id,
  211. TranslationMemory.source_lang == source_lang,
  212. TranslationMemory.target_lang == target_lang,
  213. TranslationMemory.is_deleted == False
  214. ).all()
  215. matches = []
  216. for memory in memories:
  217. # 计算简单的相似度(基于字符串包含和长度)
  218. similarity = self._calculate_similarity(source_text, memory.source_text)
  219. if similarity >= threshold:
  220. matches.append({
  221. "memory": memory,
  222. "similarity": similarity
  223. })
  224. # 按相似度排序
  225. matches.sort(key=lambda x: x["similarity"], reverse=True)
  226. # 限制返回数量
  227. matches = matches[:limit]
  228. # 更新使用统计
  229. for match in matches:
  230. memory = match["memory"]
  231. memory.usage_count += 1
  232. memory.last_used_at = datetime.now()
  233. self.db.commit()
  234. # 构建响应
  235. match_results = [
  236. MemoryMatch(
  237. source_text=match["memory"].source_text,
  238. target_text=match["memory"].target_text,
  239. similarity=match["similarity"]
  240. )
  241. for match in matches
  242. ]
  243. return MemoryMatchResponse(matches=match_results)
  244. except Exception as e:
  245. logger.error(f"查找相似翻译失败: {str(e)}")
  246. return MemoryMatchResponse(matches=[])
  247. def _calculate_similarity(self, text1: str, text2: str) -> float:
  248. """
  249. 计算文本相似度(简单算法)
  250. 基于最长公共子序列和字符串长度
  251. Args:
  252. text1: 文本1
  253. text2: 文本2
  254. Returns:
  255. 相似度(0-1)
  256. """
  257. if text1 == text2:
  258. return 1.0
  259. # 完全包含关系
  260. if text1 in text2 or text2 in text1:
  261. shorter = min(len(text1), len(text2))
  262. longer = max(len(text1), len(text2))
  263. return shorter / longer
  264. # 计算最长公共子序列长度
  265. lcs_length = self._lcs_length(text1, text2)
  266. # 相似度 = LCS长度 / 较长字符串长度
  267. max_length = max(len(text1), len(text2))
  268. if max_length == 0:
  269. return 0.0
  270. return lcs_length / max_length
  271. def _lcs_length(self, text1: str, text2: str) -> int:
  272. """
  273. 计算最长公共子序列长度
  274. Args:
  275. text1: 文本1
  276. text2: 文本2
  277. Returns:
  278. LCS长度
  279. """
  280. m, n = len(text1), len(text2)
  281. # 创建DP表
  282. dp = [[0] * (n + 1) for _ in range(m + 1)]
  283. # 填充DP表
  284. for i in range(1, m + 1):
  285. for j in range(1, n + 1):
  286. if text1[i - 1] == text2[j - 1]:
  287. dp[i][j] = dp[i - 1][j - 1] + 1
  288. else:
  289. dp[i][j] = max(dp[i - 1][j], dp[i][j - 1])
  290. return dp[m][n]