tag.py 11 KB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183184185186187188189190191192193194195196197198199200201202203204205206207208209210211212213214215216217218219220221222223224225226227228229230231232233234235236237238239240241242243244245246247248249250251252253254255256257258259
  1. # coding=utf-8
  2. """
  3. @project: maxkb
  4. @Author:AI Assistant
  5. @file: tag.py
  6. @date:2025/10/13
  7. @desc: 标签系统相关序列化器
  8. """
  9. from collections import defaultdict
  10. from typing import Dict
  11. import uuid_utils.compat as uuid
  12. from django.db import transaction
  13. from django.db.models import QuerySet
  14. from django.db.models.aggregates import Count
  15. from django.db.models.query_utils import Q
  16. from django.utils.translation import gettext_lazy as _
  17. from rest_framework import serializers
  18. from common.exception.app_exception import AppApiException
  19. from knowledge.models import Tag, Knowledge, DocumentTag
  20. class TagModelSerializer(serializers.ModelSerializer):
  21. """标签模型序列化器"""
  22. class Meta:
  23. model = Tag
  24. fields = ['id', 'knowledge_id', 'key', 'value', 'create_time', 'update_time']
  25. read_only_fields = ['id', 'create_time', 'update_time']
  26. class TagCreateSerializer(serializers.Serializer):
  27. """创建标签序列化器"""
  28. key = serializers.CharField(required=True, max_length=64, label=_('Tag Key'))
  29. value = serializers.CharField(required=True, max_length=128, label=_('Tag Value'))
  30. class TagEditSerializer(serializers.Serializer):
  31. key = serializers.CharField(required=False, max_length=64, label=_('Tag Key'))
  32. value = serializers.CharField(required=False, max_length=128, label=_('Tag Value'))
  33. class TagSerializers(serializers.Serializer):
  34. class Create(serializers.Serializer):
  35. workspace_id = serializers.CharField(required=True, label=_('Workspace ID'))
  36. knowledge_id = serializers.UUIDField(required=True, label=_('Knowledge ID'))
  37. tags = serializers.ListField(required=True, label=_('Tags'), child=TagCreateSerializer())
  38. def is_valid(self, *, raise_exception=False):
  39. super().is_valid(raise_exception=True)
  40. workspace_id = self.data.get('workspace_id')
  41. query_set = QuerySet(Knowledge).filter(id=self.data.get('knowledge_id'))
  42. if workspace_id and workspace_id != 'None':
  43. query_set = query_set.filter(workspace_id=workspace_id)
  44. if not query_set.exists():
  45. raise AppApiException(500, _('Knowledge id does not exist'))
  46. def insert(self):
  47. self.is_valid(raise_exception=True)
  48. knowledge_id = self.data.get('knowledge_id')
  49. # 获取数据库中已存在的key-value组合
  50. existing_tags = set(
  51. QuerySet(Tag).filter(knowledge_id=knowledge_id)
  52. .values_list('key', 'value', named=False)
  53. )
  54. # 过滤掉已存在的标签
  55. tag_objects = []
  56. for tag_data in self.data.get('tags', []):
  57. key = tag_data.get('key')
  58. value = tag_data.get('value')
  59. # 检查key-value组合是否已存在
  60. if (key, value) not in existing_tags:
  61. tag = Tag(
  62. id=uuid.uuid7(),
  63. knowledge_id=knowledge_id,
  64. key=key,
  65. value=value
  66. )
  67. tag_objects.append(tag)
  68. # 将新标签添加到已存在集合中,避免本次批量插入中的重复
  69. existing_tags.add((key, value))
  70. # 批量插入未重复的标签
  71. if tag_objects:
  72. Tag.objects.bulk_create(tag_objects)
  73. class Operate(serializers.Serializer):
  74. workspace_id = serializers.CharField(required=True, label=_('Workspace ID'))
  75. knowledge_id = serializers.UUIDField(required=True, label=_('Knowledge ID'))
  76. tag_id = serializers.UUIDField(required=True, label=_('Tag ID'))
  77. def is_valid(self, *, raise_exception=False):
  78. super().is_valid(raise_exception=True)
  79. workspace_id = self.data.get('workspace_id')
  80. query_set = QuerySet(Knowledge).filter(id=self.data.get('knowledge_id'))
  81. if workspace_id and workspace_id != 'None':
  82. query_set = query_set.filter(workspace_id=workspace_id)
  83. if not query_set.exists():
  84. raise AppApiException(500, _('Knowledge id does not exist'))
  85. @transaction.atomic
  86. def edit(self, instance: Dict):
  87. self.is_valid(raise_exception=True)
  88. tag = QuerySet(Tag).get(id=self.data.get('tag_id'))
  89. if tag is None:
  90. raise AppApiException(500, _('Tag id does not exist'))
  91. # 如果key发生变化,更新所有相同key的标签
  92. if instance.get('key') and instance.get('key') != tag.key:
  93. old_key = tag.key
  94. new_key = instance.get('key')
  95. # 检查新key是否已存在于同一个knowledge中
  96. existing_key_exists = QuerySet(Tag).filter(
  97. knowledge_id=tag.knowledge_id,
  98. key=new_key
  99. ).exists()
  100. if existing_key_exists:
  101. raise AppApiException(500, _('Tag key already exists'))
  102. # 批量更新所有具有相同old_key的标签
  103. QuerySet(Tag).filter(
  104. knowledge_id=tag.knowledge_id,
  105. key=old_key
  106. ).update(key=new_key)
  107. # 如果只是value变化,只更新当前标签
  108. if instance.get('value') and instance.get('value') != tag.value:
  109. # 检查新key是否已存在于同一个knowledge中
  110. existing_value_exists = QuerySet(Tag).filter(
  111. knowledge_id=tag.knowledge_id,
  112. key=instance.get('key'),
  113. value=instance.get('value')
  114. ).exists()
  115. if existing_value_exists:
  116. raise AppApiException(500, _('Tag value already exists'))
  117. QuerySet(Tag).filter(
  118. id=tag.id
  119. ).update(value=instance.get('value'))
  120. @transaction.atomic
  121. def delete(self, delete_type: str):
  122. self.is_valid(raise_exception=True)
  123. if delete_type == 'key':
  124. # 删除同一knowledge_id下相同key的所有标签
  125. tag = QuerySet(Tag).get(id=self.data.get('tag_id'))
  126. if tag is None:
  127. raise AppApiException(500, _('Tag id does not exist'))
  128. QuerySet(Tag).filter(
  129. knowledge_id=tag.knowledge_id,
  130. key=tag.key
  131. ).delete()
  132. QuerySet(DocumentTag).filter(tag_id=tag.id).delete()
  133. else:
  134. # 仅删除当前标签
  135. QuerySet(Tag).filter(id=self.data.get('tag_id')).delete()
  136. QuerySet(DocumentTag).filter(tag_id=self.data.get('tag_id')).delete()
  137. class BatchDelete(serializers.Serializer):
  138. workspace_id = serializers.CharField(required=True, label=_('Workspace ID'))
  139. knowledge_id = serializers.UUIDField(required=True, label=_('Knowledge ID'))
  140. tag_ids = serializers.ListField(required=True, label=_('Tag IDs'), child=serializers.UUIDField())
  141. def is_valid(self, *, raise_exception=False):
  142. super().is_valid(raise_exception=True)
  143. workspace_id = self.data.get('workspace_id')
  144. query_set = QuerySet(Knowledge).filter(id=self.data.get('knowledge_id'))
  145. if workspace_id and workspace_id != 'None':
  146. query_set = query_set.filter(workspace_id=workspace_id)
  147. if not query_set.exists():
  148. raise AppApiException(500, _('Knowledge id does not exist'))
  149. @transaction.atomic
  150. def batch_delete(self):
  151. self.is_valid(raise_exception=True)
  152. tag_ids = self.data.get('tag_ids', [])
  153. if not tag_ids:
  154. return
  155. # 获取要删除的标签的key
  156. tags_to_delete = QuerySet(Tag).filter(id__in=tag_ids)
  157. keys_to_delete = set(tags_to_delete.values_list('key', flat=True))
  158. # 删除具有相同key的所有标签
  159. QuerySet(Tag).filter(
  160. knowledge_id=self.data.get('knowledge_id'),
  161. key__in=keys_to_delete
  162. ).delete()
  163. # 删除关联的DocumentTag
  164. QuerySet(DocumentTag).filter(tag_id__in=tag_ids).delete()
  165. class Query(serializers.Serializer):
  166. workspace_id = serializers.CharField(required=True, label=_('Workspace ID'))
  167. knowledge_id = serializers.UUIDField(required=True, label=_('Knowledge ID'))
  168. name = serializers.CharField(required=False, allow_null=True, allow_blank=True, label=_('search value'))
  169. def is_valid(self, *, raise_exception=False):
  170. super().is_valid(raise_exception=True)
  171. workspace_id = self.data.get('workspace_id')
  172. query_set = QuerySet(Knowledge).filter(id=self.data.get('knowledge_id'))
  173. if workspace_id and workspace_id != 'None':
  174. query_set = query_set.filter(workspace_id=workspace_id)
  175. if not query_set.exists():
  176. raise AppApiException(500, _('Knowledge id does not exist'))
  177. def list(self):
  178. self.is_valid(raise_exception=True)
  179. if self.data.get('name'):
  180. name = self.data.get('name')
  181. tags = QuerySet(Tag).filter(
  182. knowledge_id=self.data.get('knowledge_id')
  183. ).filter(
  184. Q(key__icontains=name) | Q(value__icontains=name)
  185. ).values('key', 'value', 'id', 'create_time', 'update_time').order_by('create_time', 'key', 'value')
  186. else:
  187. # 获取所有标签,按创建时间排序保持稳定顺序
  188. tags = QuerySet(Tag).filter(
  189. knowledge_id=self.data.get('knowledge_id')
  190. ).values('key', 'value', 'id', 'create_time', 'update_time').order_by('create_time', 'key', 'value')
  191. tag_ids = [tag['id'] for tag in tags]
  192. tag_doc_count_map = {row['tag_id']: row['doc_count'] for row in
  193. QuerySet(DocumentTag).filter(tag_id__in=tag_ids)
  194. .values('tag_id').annotate(doc_count=Count('document_id'))
  195. }
  196. # 按key分组
  197. grouped_tags = defaultdict(list)
  198. for tag in tags:
  199. grouped_tags[tag['key']].append({
  200. 'id': tag['id'],
  201. 'value': tag['value'],
  202. 'doc_count': tag_doc_count_map.get(tag['id'],0),
  203. 'create_time': tag['create_time'],
  204. 'update_time': tag['update_time']
  205. })
  206. # 转换为期望的格式,保持key的顺序
  207. result = []
  208. # 按key排序以确保结果顺序一致
  209. for key in sorted(grouped_tags.keys()):
  210. values = grouped_tags[key]
  211. # 按创建时间对values进行排序
  212. values.sort(key=lambda x: x['create_time'])
  213. result.append({
  214. 'key': key,
  215. 'values': values,
  216. })
  217. return result