""" 图数据模型定义 提供知识图谱相关的通用数据结构定义 """ from typing import Optional, Dict, Any, List, Union from dataclasses import dataclass from datetime import datetime from enum import Enum class NodeType(Enum): """节点类型枚举""" PERSON = "person" ORGANIZATION = "organization" LOCATION = "location" CONCEPT = "concept" EVENT = "event" DOCUMENT = "document" UNKNOWN = "unknown" class RelationType(Enum): """关系类型枚举""" BELONGS_TO = "belongs_to" LOCATED_IN = "located_in" RELATED_TO = "related_to" PART_OF = "part_of" INSTANCE_OF = "instance_of" KNOWS = "knows" WORKS_FOR = "works_for" UNKNOWN = "unknown" @dataclass class GraphNode: """图节点数据模型""" id: Optional[str] = None label: str = "" node_type: NodeType = NodeType.UNKNOWN properties: Optional[Dict[str, Any]] = None embeddings: Optional[List[float]] = None created_at: Optional[datetime] = None updated_at: Optional[datetime] = None def __post_init__(self): if self.properties is None: self.properties = {} if isinstance(self.node_type, str): self.node_type = NodeType(self.node_type) def to_dict(self) -> Dict[str, Any]: """转换为字典""" return { 'id': self.id, 'label': self.label, 'node_type': self.node_type.value if self.node_type else None, 'properties': self.properties, 'embeddings': self.embeddings, 'created_at': self.created_at.isoformat() if self.created_at else None, 'updated_at': self.updated_at.isoformat() if self.updated_at else None } @classmethod def from_dict(cls, data: Dict[str, Any]) -> 'GraphNode': """从字典创建实例""" node_type = data.get('node_type') if isinstance(node_type, str): node_type = NodeType(node_type) return cls( id=data.get('id'), label=data.get('label', ''), node_type=node_type, properties=data.get('properties', {}), embeddings=data.get('embeddings', []), created_at=datetime.fromisoformat(data['created_at']) if data.get('created_at') else None, updated_at=datetime.fromisoformat(data['updated_at']) if data.get('updated_at') else None ) @dataclass class GraphEdge: """图边数据模型""" id: Optional[str] = None source_id: str = "" target_id: str = "" relation_type: RelationType = RelationType.UNKNOWN weight: float = 1.0 properties: Optional[Dict[str, Any]] = None created_at: Optional[datetime] = None def __post_init__(self): if self.properties is None: self.properties = {} if isinstance(self.relation_type, str): self.relation_type = RelationType(self.relation_type) def to_dict(self) -> Dict[str, Any]: """转换为字典""" return { 'id': self.id, 'source_id': self.source_id, 'target_id': self.target_id, 'relation_type': self.relation_type.value if self.relation_type else None, 'weight': self.weight, 'properties': self.properties, 'created_at': self.created_at.isoformat() if self.created_at else None } @classmethod def from_dict(cls, data: Dict[str, Any]) -> 'GraphEdge': """从字典创建实例""" relation_type = data.get('relation_type') if isinstance(relation_type, str): relation_type = RelationType(relation_type) return cls( id=data.get('id'), source_id=data.get('source_id', ''), target_id=data.get('target_id', ''), relation_type=relation_type, weight=data.get('weight', 1.0), properties=data.get('properties', {}), created_at=datetime.fromisoformat(data['created_at']) if data.get('created_at') else None ) @dataclass class GraphEntity: """图实体数据模型(扩展的节点模型)""" node: GraphNode entity_type: str = "" confidence: float = 1.0 source_document: Optional[str] = None extraction_method: Optional[str] = None def to_dict(self) -> Dict[str, Any]: """转换为字典""" return { 'node': self.node.to_dict(), 'entity_type': self.entity_type, 'confidence': self.confidence, 'source_document': self.source_document, 'extraction_method': self.extraction_method } @classmethod def from_dict(cls, data: Dict[str, Any]) -> 'GraphEntity': """从字典创建实例""" node_data = data.get('node', {}) node = GraphNode.from_dict(node_data) return cls( node=node, entity_type=data.get('entity_type', ''), confidence=data.get('confidence', 1.0), source_document=data.get('source_document'), extraction_method=data.get('extraction_method') ) @dataclass class GraphRelation: """图关系数据模型(扩展的边模型)""" edge: GraphEdge relation_subtype: Optional[str] = None confidence: float = 1.0 source_sentence: Optional[str] = None extraction_method: Optional[str] = None def to_dict(self) -> Dict[str, Any]: """转换为字典""" return { 'edge': self.edge.to_dict(), 'relation_subtype': self.relation_subtype, 'confidence': self.confidence, 'source_sentence': self.source_sentence, 'extraction_method': self.extraction_method } @classmethod def from_dict(cls, data: Dict[str, Any]) -> 'GraphRelation': """从字典创建实例""" edge_data = data.get('edge', {}) edge = GraphEdge.from_dict(edge_data) return cls( edge=edge, relation_subtype=data.get('relation_subtype'), confidence=data.get('confidence', 1.0), source_sentence=data.get('source_sentence'), extraction_method=data.get('extraction_method') ) @dataclass class KnowledgeGraph: """知识图谱数据模型""" id: Optional[str] = None name: str = "" description: Optional[str] = None nodes: List[GraphEntity] = None relations: List[GraphRelation] = None metadata: Optional[Dict[str, Any]] = None created_at: Optional[datetime] = None updated_at: Optional[datetime] = None def __post_init__(self): if self.nodes is None: self.nodes = [] if self.relations is None: self.relations = [] if self.metadata is None: self.metadata = {} def to_dict(self) -> Dict[str, Any]: """转换为字典""" return { 'id': self.id, 'name': self.name, 'description': self.description, 'nodes': [node.to_dict() for node in self.nodes], 'relations': [relation.to_dict() for relation in self.relations], 'metadata': self.metadata, 'created_at': self.created_at.isoformat() if self.created_at else None, 'updated_at': self.updated_at.isoformat() if self.updated_at else None } @classmethod def from_dict(cls, data: Dict[str, Any]) -> 'KnowledgeGraph': """从字典创建实例""" nodes_data = data.get('nodes', []) relations_data = data.get('relations', []) nodes = [GraphEntity.from_dict(node_data) for node_data in nodes_data] relations = [GraphRelation.from_dict(relation_data) for relation_data in relations_data] return cls( id=data.get('id'), name=data.get('name', ''), description=data.get('description'), nodes=nodes, relations=relations, metadata=data.get('metadata', {}), created_at=datetime.fromisoformat(data['created_at']) if data.get('created_at') else None, updated_at=datetime.fromisoformat(data['updated_at']) if data.get('updated_at') else None ) __all__ = [ "NodeType", "RelationType", "GraphNode", "GraphEdge", "GraphEntity", "GraphRelation", "KnowledgeGraph" ]