| 123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153 |
- """
- 向量数据模型定义
- 提供向量数据库相关的数据结构定义
- """
- from typing import Optional, Dict, Any, List
- from dataclasses import dataclass
- from datetime import datetime
- @dataclass
- class VectorEmbedding:
- """向量嵌入数据模型"""
- id: Optional[str] = None
- text: str = ""
- vector: List[float] = None
- embedding_model: str = ""
- dimension: int = 0
- metadata: Optional[Dict[str, Any]] = None
- created_at: Optional[datetime] = None
- def __post_init__(self):
- if self.vector is None:
- self.vector = []
- if self.metadata is None:
- self.metadata = {}
- def to_dict(self) -> Dict[str, Any]:
- """转换为字典"""
- return {
- 'id': self.id,
- 'text': self.text,
- 'vector': self.vector,
- 'embedding_model': self.embedding_model,
- 'dimension': self.dimension,
- 'metadata': self.metadata,
- 'created_at': self.created_at.isoformat() if self.created_at else None
- }
- @classmethod
- def from_dict(cls, data: Dict[str, Any]) -> 'VectorEmbedding':
- """从字典创建实例"""
- return cls(
- id=data.get('id'),
- text=data.get('text', ''),
- vector=data.get('vector', []),
- embedding_model=data.get('embedding_model', ''),
- dimension=data.get('dimension', 0),
- metadata=data.get('metadata', {}),
- created_at=datetime.fromisoformat(data['created_at']) if data.get('created_at') else None
- )
- @dataclass
- class VectorDocument:
- """向量文档数据模型"""
- id: Optional[str] = None
- text_content: str = ""
- doc_id: Optional[str] = None
- doc_type: str = ""
- category: Optional[str] = None
- embedding: Optional[VectorEmbedding] = None
- metadata: Optional[Dict[str, Any]] = None
- created_at: Optional[datetime] = None
- updated_at: Optional[datetime] = None
- def __post_init__(self):
- if self.metadata is None:
- self.metadata = {}
- def to_dict(self) -> Dict[str, Any]:
- """转换为字典"""
- return {
- 'id': self.id,
- 'text_content': self.text_content,
- 'doc_id': self.doc_id,
- 'doc_type': self.doc_type,
- 'category': self.category,
- 'embedding': self.embedding.to_dict() if self.embedding else None,
- 'metadata': self.metadata,
- 'created_at': self.created_at.isoformat() if self.created_at else None,
- 'updated_at': self.updated_at.isoformat() if self.updated_at else None
- }
- @classmethod
- def from_dict(cls, data: Dict[str, Any]) -> 'VectorDocument':
- """从字典创建实例"""
- embedding_data = data.get('embedding')
- embedding = VectorEmbedding.from_dict(embedding_data) if embedding_data else None
- return cls(
- id=data.get('id'),
- text_content=data.get('text_content', ''),
- doc_id=data.get('doc_id'),
- doc_type=data.get('doc_type', ''),
- category=data.get('category'),
- embedding=embedding,
- metadata=data.get('metadata', {}),
- created_at=datetime.fromisoformat(data['created_at']) if data.get('created_at') else None,
- updated_at=datetime.fromisoformat(data['updated_at']) if data.get('updated_at') else None
- )
- @dataclass
- class VectorSearchResult:
- """向量搜索结果数据模型"""
- id: Optional[str] = None
- text_content: Optional[str] = None
- score: float = 0.0
- distance: Optional[float] = None
- metadata: Optional[Dict[str, Any]] = None
- doc_id: Optional[str] = None
- doc_type: Optional[str] = None
- category: Optional[str] = None
- def __post_init__(self):
- if self.metadata is None:
- self.metadata = {}
- def to_dict(self) -> Dict[str, Any]:
- """转换为字典"""
- return {
- 'id': self.id,
- 'text_content': self.text_content,
- 'score': self.score,
- 'distance': self.distance,
- 'metadata': self.metadata,
- 'doc_id': self.doc_id,
- 'doc_type': self.doc_type,
- 'category': self.category
- }
- @classmethod
- def from_dict(cls, data: Dict[str, Any]) -> 'VectorSearchResult':
- """从字典创建实例"""
- return cls(
- id=data.get('id'),
- text_content=data.get('text_content'),
- score=data.get('score', 0.0),
- distance=data.get('distance'),
- metadata=data.get('metadata', {}),
- doc_id=data.get('doc_id'),
- doc_type=data.get('doc_type'),
- category=data.get('category')
- )
- __all__ = [
- "VectorEmbedding",
- "VectorDocument",
- "VectorSearchResult"
- ]
|