vector_models.py 4.7 KB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153
  1. """
  2. 向量数据模型定义
  3. 提供向量数据库相关的数据结构定义
  4. """
  5. from typing import Optional, Dict, Any, List
  6. from dataclasses import dataclass
  7. from datetime import datetime
  8. @dataclass
  9. class VectorEmbedding:
  10. """向量嵌入数据模型"""
  11. id: Optional[str] = None
  12. text: str = ""
  13. vector: List[float] = None
  14. embedding_model: str = ""
  15. dimension: int = 0
  16. metadata: Optional[Dict[str, Any]] = None
  17. created_at: Optional[datetime] = None
  18. def __post_init__(self):
  19. if self.vector is None:
  20. self.vector = []
  21. if self.metadata is None:
  22. self.metadata = {}
  23. def to_dict(self) -> Dict[str, Any]:
  24. """转换为字典"""
  25. return {
  26. 'id': self.id,
  27. 'text': self.text,
  28. 'vector': self.vector,
  29. 'embedding_model': self.embedding_model,
  30. 'dimension': self.dimension,
  31. 'metadata': self.metadata,
  32. 'created_at': self.created_at.isoformat() if self.created_at else None
  33. }
  34. @classmethod
  35. def from_dict(cls, data: Dict[str, Any]) -> 'VectorEmbedding':
  36. """从字典创建实例"""
  37. return cls(
  38. id=data.get('id'),
  39. text=data.get('text', ''),
  40. vector=data.get('vector', []),
  41. embedding_model=data.get('embedding_model', ''),
  42. dimension=data.get('dimension', 0),
  43. metadata=data.get('metadata', {}),
  44. created_at=datetime.fromisoformat(data['created_at']) if data.get('created_at') else None
  45. )
  46. @dataclass
  47. class VectorDocument:
  48. """向量文档数据模型"""
  49. id: Optional[str] = None
  50. text_content: str = ""
  51. doc_id: Optional[str] = None
  52. doc_type: str = ""
  53. category: Optional[str] = None
  54. embedding: Optional[VectorEmbedding] = None
  55. metadata: Optional[Dict[str, Any]] = None
  56. created_at: Optional[datetime] = None
  57. updated_at: Optional[datetime] = None
  58. def __post_init__(self):
  59. if self.metadata is None:
  60. self.metadata = {}
  61. def to_dict(self) -> Dict[str, Any]:
  62. """转换为字典"""
  63. return {
  64. 'id': self.id,
  65. 'text_content': self.text_content,
  66. 'doc_id': self.doc_id,
  67. 'doc_type': self.doc_type,
  68. 'category': self.category,
  69. 'embedding': self.embedding.to_dict() if self.embedding else None,
  70. 'metadata': self.metadata,
  71. 'created_at': self.created_at.isoformat() if self.created_at else None,
  72. 'updated_at': self.updated_at.isoformat() if self.updated_at else None
  73. }
  74. @classmethod
  75. def from_dict(cls, data: Dict[str, Any]) -> 'VectorDocument':
  76. """从字典创建实例"""
  77. embedding_data = data.get('embedding')
  78. embedding = VectorEmbedding.from_dict(embedding_data) if embedding_data else None
  79. return cls(
  80. id=data.get('id'),
  81. text_content=data.get('text_content', ''),
  82. doc_id=data.get('doc_id'),
  83. doc_type=data.get('doc_type', ''),
  84. category=data.get('category'),
  85. embedding=embedding,
  86. metadata=data.get('metadata', {}),
  87. created_at=datetime.fromisoformat(data['created_at']) if data.get('created_at') else None,
  88. updated_at=datetime.fromisoformat(data['updated_at']) if data.get('updated_at') else None
  89. )
  90. @dataclass
  91. class VectorSearchResult:
  92. """向量搜索结果数据模型"""
  93. id: Optional[str] = None
  94. text_content: Optional[str] = None
  95. score: float = 0.0
  96. distance: Optional[float] = None
  97. metadata: Optional[Dict[str, Any]] = None
  98. doc_id: Optional[str] = None
  99. doc_type: Optional[str] = None
  100. category: Optional[str] = None
  101. def __post_init__(self):
  102. if self.metadata is None:
  103. self.metadata = {}
  104. def to_dict(self) -> Dict[str, Any]:
  105. """转换为字典"""
  106. return {
  107. 'id': self.id,
  108. 'text_content': self.text_content,
  109. 'score': self.score,
  110. 'distance': self.distance,
  111. 'metadata': self.metadata,
  112. 'doc_id': self.doc_id,
  113. 'doc_type': self.doc_type,
  114. 'category': self.category
  115. }
  116. @classmethod
  117. def from_dict(cls, data: Dict[str, Any]) -> 'VectorSearchResult':
  118. """从字典创建实例"""
  119. return cls(
  120. id=data.get('id'),
  121. text_content=data.get('text_content'),
  122. score=data.get('score', 0.0),
  123. distance=data.get('distance'),
  124. metadata=data.get('metadata', {}),
  125. doc_id=data.get('doc_id'),
  126. doc_type=data.get('doc_type'),
  127. category=data.get('category')
  128. )
  129. __all__ = [
  130. "VectorEmbedding",
  131. "VectorDocument",
  132. "VectorSearchResult"
  133. ]