|
|
@@ -0,0 +1,260 @@
|
|
|
+"""
|
|
|
+图数据模型定义
|
|
|
+
|
|
|
+提供知识图谱相关的通用数据结构定义
|
|
|
+"""
|
|
|
+
|
|
|
+from typing import Optional, Dict, Any, List, Union
|
|
|
+from dataclasses import dataclass
|
|
|
+from datetime import datetime
|
|
|
+from enum import Enum
|
|
|
+
|
|
|
+
|
|
|
+class NodeType(Enum):
|
|
|
+ """节点类型枚举"""
|
|
|
+ PERSON = "person"
|
|
|
+ ORGANIZATION = "organization"
|
|
|
+ LOCATION = "location"
|
|
|
+ CONCEPT = "concept"
|
|
|
+ EVENT = "event"
|
|
|
+ DOCUMENT = "document"
|
|
|
+ UNKNOWN = "unknown"
|
|
|
+
|
|
|
+
|
|
|
+class RelationType(Enum):
|
|
|
+ """关系类型枚举"""
|
|
|
+ BELONGS_TO = "belongs_to"
|
|
|
+ LOCATED_IN = "located_in"
|
|
|
+ RELATED_TO = "related_to"
|
|
|
+ PART_OF = "part_of"
|
|
|
+ INSTANCE_OF = "instance_of"
|
|
|
+ KNOWS = "knows"
|
|
|
+ WORKS_FOR = "works_for"
|
|
|
+ UNKNOWN = "unknown"
|
|
|
+
|
|
|
+
|
|
|
+@dataclass
|
|
|
+class GraphNode:
|
|
|
+ """图节点数据模型"""
|
|
|
+ id: Optional[str] = None
|
|
|
+ label: str = ""
|
|
|
+ node_type: NodeType = NodeType.UNKNOWN
|
|
|
+ properties: Optional[Dict[str, Any]] = None
|
|
|
+ embeddings: Optional[List[float]] = None
|
|
|
+ created_at: Optional[datetime] = None
|
|
|
+ updated_at: Optional[datetime] = None
|
|
|
+
|
|
|
+ def __post_init__(self):
|
|
|
+ if self.properties is None:
|
|
|
+ self.properties = {}
|
|
|
+ if isinstance(self.node_type, str):
|
|
|
+ self.node_type = NodeType(self.node_type)
|
|
|
+
|
|
|
+ def to_dict(self) -> Dict[str, Any]:
|
|
|
+ """转换为字典"""
|
|
|
+ return {
|
|
|
+ 'id': self.id,
|
|
|
+ 'label': self.label,
|
|
|
+ 'node_type': self.node_type.value if self.node_type else None,
|
|
|
+ 'properties': self.properties,
|
|
|
+ 'embeddings': self.embeddings,
|
|
|
+ 'created_at': self.created_at.isoformat() if self.created_at else None,
|
|
|
+ 'updated_at': self.updated_at.isoformat() if self.updated_at else None
|
|
|
+ }
|
|
|
+
|
|
|
+ @classmethod
|
|
|
+ def from_dict(cls, data: Dict[str, Any]) -> 'GraphNode':
|
|
|
+ """从字典创建实例"""
|
|
|
+ node_type = data.get('node_type')
|
|
|
+ if isinstance(node_type, str):
|
|
|
+ node_type = NodeType(node_type)
|
|
|
+
|
|
|
+ return cls(
|
|
|
+ id=data.get('id'),
|
|
|
+ label=data.get('label', ''),
|
|
|
+ node_type=node_type,
|
|
|
+ properties=data.get('properties', {}),
|
|
|
+ embeddings=data.get('embeddings', []),
|
|
|
+ created_at=datetime.fromisoformat(data['created_at']) if data.get('created_at') else None,
|
|
|
+ updated_at=datetime.fromisoformat(data['updated_at']) if data.get('updated_at') else None
|
|
|
+ )
|
|
|
+
|
|
|
+
|
|
|
+@dataclass
|
|
|
+class GraphEdge:
|
|
|
+ """图边数据模型"""
|
|
|
+ id: Optional[str] = None
|
|
|
+ source_id: str = ""
|
|
|
+ target_id: str = ""
|
|
|
+ relation_type: RelationType = RelationType.UNKNOWN
|
|
|
+ weight: float = 1.0
|
|
|
+ properties: Optional[Dict[str, Any]] = None
|
|
|
+ created_at: Optional[datetime] = None
|
|
|
+
|
|
|
+ def __post_init__(self):
|
|
|
+ if self.properties is None:
|
|
|
+ self.properties = {}
|
|
|
+ if isinstance(self.relation_type, str):
|
|
|
+ self.relation_type = RelationType(self.relation_type)
|
|
|
+
|
|
|
+ def to_dict(self) -> Dict[str, Any]:
|
|
|
+ """转换为字典"""
|
|
|
+ return {
|
|
|
+ 'id': self.id,
|
|
|
+ 'source_id': self.source_id,
|
|
|
+ 'target_id': self.target_id,
|
|
|
+ 'relation_type': self.relation_type.value if self.relation_type else None,
|
|
|
+ 'weight': self.weight,
|
|
|
+ 'properties': self.properties,
|
|
|
+ 'created_at': self.created_at.isoformat() if self.created_at else None
|
|
|
+ }
|
|
|
+
|
|
|
+ @classmethod
|
|
|
+ def from_dict(cls, data: Dict[str, Any]) -> 'GraphEdge':
|
|
|
+ """从字典创建实例"""
|
|
|
+ relation_type = data.get('relation_type')
|
|
|
+ if isinstance(relation_type, str):
|
|
|
+ relation_type = RelationType(relation_type)
|
|
|
+
|
|
|
+ return cls(
|
|
|
+ id=data.get('id'),
|
|
|
+ source_id=data.get('source_id', ''),
|
|
|
+ target_id=data.get('target_id', ''),
|
|
|
+ relation_type=relation_type,
|
|
|
+ weight=data.get('weight', 1.0),
|
|
|
+ properties=data.get('properties', {}),
|
|
|
+ created_at=datetime.fromisoformat(data['created_at']) if data.get('created_at') else None
|
|
|
+ )
|
|
|
+
|
|
|
+
|
|
|
+@dataclass
|
|
|
+class GraphEntity:
|
|
|
+ """图实体数据模型(扩展的节点模型)"""
|
|
|
+ node: GraphNode
|
|
|
+ entity_type: str = ""
|
|
|
+ confidence: float = 1.0
|
|
|
+ source_document: Optional[str] = None
|
|
|
+ extraction_method: Optional[str] = None
|
|
|
+
|
|
|
+ def to_dict(self) -> Dict[str, Any]:
|
|
|
+ """转换为字典"""
|
|
|
+ return {
|
|
|
+ 'node': self.node.to_dict(),
|
|
|
+ 'entity_type': self.entity_type,
|
|
|
+ 'confidence': self.confidence,
|
|
|
+ 'source_document': self.source_document,
|
|
|
+ 'extraction_method': self.extraction_method
|
|
|
+ }
|
|
|
+
|
|
|
+ @classmethod
|
|
|
+ def from_dict(cls, data: Dict[str, Any]) -> 'GraphEntity':
|
|
|
+ """从字典创建实例"""
|
|
|
+ node_data = data.get('node', {})
|
|
|
+ node = GraphNode.from_dict(node_data)
|
|
|
+
|
|
|
+ return cls(
|
|
|
+ node=node,
|
|
|
+ entity_type=data.get('entity_type', ''),
|
|
|
+ confidence=data.get('confidence', 1.0),
|
|
|
+ source_document=data.get('source_document'),
|
|
|
+ extraction_method=data.get('extraction_method')
|
|
|
+ )
|
|
|
+
|
|
|
+
|
|
|
+@dataclass
|
|
|
+class GraphRelation:
|
|
|
+ """图关系数据模型(扩展的边模型)"""
|
|
|
+ edge: GraphEdge
|
|
|
+ relation_subtype: Optional[str] = None
|
|
|
+ confidence: float = 1.0
|
|
|
+ source_sentence: Optional[str] = None
|
|
|
+ extraction_method: Optional[str] = None
|
|
|
+
|
|
|
+ def to_dict(self) -> Dict[str, Any]:
|
|
|
+ """转换为字典"""
|
|
|
+ return {
|
|
|
+ 'edge': self.edge.to_dict(),
|
|
|
+ 'relation_subtype': self.relation_subtype,
|
|
|
+ 'confidence': self.confidence,
|
|
|
+ 'source_sentence': self.source_sentence,
|
|
|
+ 'extraction_method': self.extraction_method
|
|
|
+ }
|
|
|
+
|
|
|
+ @classmethod
|
|
|
+ def from_dict(cls, data: Dict[str, Any]) -> 'GraphRelation':
|
|
|
+ """从字典创建实例"""
|
|
|
+ edge_data = data.get('edge', {})
|
|
|
+ edge = GraphEdge.from_dict(edge_data)
|
|
|
+
|
|
|
+ return cls(
|
|
|
+ edge=edge,
|
|
|
+ relation_subtype=data.get('relation_subtype'),
|
|
|
+ confidence=data.get('confidence', 1.0),
|
|
|
+ source_sentence=data.get('source_sentence'),
|
|
|
+ extraction_method=data.get('extraction_method')
|
|
|
+ )
|
|
|
+
|
|
|
+
|
|
|
+@dataclass
|
|
|
+class KnowledgeGraph:
|
|
|
+ """知识图谱数据模型"""
|
|
|
+ id: Optional[str] = None
|
|
|
+ name: str = ""
|
|
|
+ description: Optional[str] = None
|
|
|
+ nodes: List[GraphEntity] = None
|
|
|
+ relations: List[GraphRelation] = None
|
|
|
+ metadata: Optional[Dict[str, Any]] = None
|
|
|
+ created_at: Optional[datetime] = None
|
|
|
+ updated_at: Optional[datetime] = None
|
|
|
+
|
|
|
+ def __post_init__(self):
|
|
|
+ if self.nodes is None:
|
|
|
+ self.nodes = []
|
|
|
+ if self.relations is None:
|
|
|
+ self.relations = []
|
|
|
+ if self.metadata is None:
|
|
|
+ self.metadata = {}
|
|
|
+
|
|
|
+ def to_dict(self) -> Dict[str, Any]:
|
|
|
+ """转换为字典"""
|
|
|
+ return {
|
|
|
+ 'id': self.id,
|
|
|
+ 'name': self.name,
|
|
|
+ 'description': self.description,
|
|
|
+ 'nodes': [node.to_dict() for node in self.nodes],
|
|
|
+ 'relations': [relation.to_dict() for relation in self.relations],
|
|
|
+ 'metadata': self.metadata,
|
|
|
+ 'created_at': self.created_at.isoformat() if self.created_at else None,
|
|
|
+ 'updated_at': self.updated_at.isoformat() if self.updated_at else None
|
|
|
+ }
|
|
|
+
|
|
|
+ @classmethod
|
|
|
+ def from_dict(cls, data: Dict[str, Any]) -> 'KnowledgeGraph':
|
|
|
+ """从字典创建实例"""
|
|
|
+ nodes_data = data.get('nodes', [])
|
|
|
+ relations_data = data.get('relations', [])
|
|
|
+
|
|
|
+ nodes = [GraphEntity.from_dict(node_data) for node_data in nodes_data]
|
|
|
+ relations = [GraphRelation.from_dict(relation_data) for relation_data in relations_data]
|
|
|
+
|
|
|
+ return cls(
|
|
|
+ id=data.get('id'),
|
|
|
+ name=data.get('name', ''),
|
|
|
+ description=data.get('description'),
|
|
|
+ nodes=nodes,
|
|
|
+ relations=relations,
|
|
|
+ metadata=data.get('metadata', {}),
|
|
|
+ created_at=datetime.fromisoformat(data['created_at']) if data.get('created_at') else None,
|
|
|
+ updated_at=datetime.fromisoformat(data['updated_at']) if data.get('updated_at') else None
|
|
|
+ )
|
|
|
+
|
|
|
+
|
|
|
+__all__ = [
|
|
|
+ "NodeType",
|
|
|
+ "RelationType",
|
|
|
+ "GraphNode",
|
|
|
+ "GraphEdge",
|
|
|
+ "GraphEntity",
|
|
|
+ "GraphRelation",
|
|
|
+ "KnowledgeGraph"
|
|
|
+]
|