| 123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183184185186187188189190191192193194195196197198199200201202203204205206207208209210211212213214215216217218219220221222223224225226227228229230231232233234235236237238239240241242243244245246247248249250251252253254255256257258259260 |
- """
- 图数据模型定义
- 提供知识图谱相关的通用数据结构定义
- """
- from typing import Optional, Dict, Any, List, Union
- from dataclasses import dataclass
- from datetime import datetime
- from enum import Enum
- class NodeType(Enum):
- """节点类型枚举"""
- PERSON = "person"
- ORGANIZATION = "organization"
- LOCATION = "location"
- CONCEPT = "concept"
- EVENT = "event"
- DOCUMENT = "document"
- UNKNOWN = "unknown"
- class RelationType(Enum):
- """关系类型枚举"""
- BELONGS_TO = "belongs_to"
- LOCATED_IN = "located_in"
- RELATED_TO = "related_to"
- PART_OF = "part_of"
- INSTANCE_OF = "instance_of"
- KNOWS = "knows"
- WORKS_FOR = "works_for"
- UNKNOWN = "unknown"
- @dataclass
- class GraphNode:
- """图节点数据模型"""
- id: Optional[str] = None
- label: str = ""
- node_type: NodeType = NodeType.UNKNOWN
- properties: Optional[Dict[str, Any]] = None
- embeddings: Optional[List[float]] = None
- created_at: Optional[datetime] = None
- updated_at: Optional[datetime] = None
- def __post_init__(self):
- if self.properties is None:
- self.properties = {}
- if isinstance(self.node_type, str):
- self.node_type = NodeType(self.node_type)
- def to_dict(self) -> Dict[str, Any]:
- """转换为字典"""
- return {
- 'id': self.id,
- 'label': self.label,
- 'node_type': self.node_type.value if self.node_type else None,
- 'properties': self.properties,
- 'embeddings': self.embeddings,
- 'created_at': self.created_at.isoformat() if self.created_at else None,
- 'updated_at': self.updated_at.isoformat() if self.updated_at else None
- }
- @classmethod
- def from_dict(cls, data: Dict[str, Any]) -> 'GraphNode':
- """从字典创建实例"""
- node_type = data.get('node_type')
- if isinstance(node_type, str):
- node_type = NodeType(node_type)
- return cls(
- id=data.get('id'),
- label=data.get('label', ''),
- node_type=node_type,
- properties=data.get('properties', {}),
- embeddings=data.get('embeddings', []),
- created_at=datetime.fromisoformat(data['created_at']) if data.get('created_at') else None,
- updated_at=datetime.fromisoformat(data['updated_at']) if data.get('updated_at') else None
- )
- @dataclass
- class GraphEdge:
- """图边数据模型"""
- id: Optional[str] = None
- source_id: str = ""
- target_id: str = ""
- relation_type: RelationType = RelationType.UNKNOWN
- weight: float = 1.0
- properties: Optional[Dict[str, Any]] = None
- created_at: Optional[datetime] = None
- def __post_init__(self):
- if self.properties is None:
- self.properties = {}
- if isinstance(self.relation_type, str):
- self.relation_type = RelationType(self.relation_type)
- def to_dict(self) -> Dict[str, Any]:
- """转换为字典"""
- return {
- 'id': self.id,
- 'source_id': self.source_id,
- 'target_id': self.target_id,
- 'relation_type': self.relation_type.value if self.relation_type else None,
- 'weight': self.weight,
- 'properties': self.properties,
- 'created_at': self.created_at.isoformat() if self.created_at else None
- }
- @classmethod
- def from_dict(cls, data: Dict[str, Any]) -> 'GraphEdge':
- """从字典创建实例"""
- relation_type = data.get('relation_type')
- if isinstance(relation_type, str):
- relation_type = RelationType(relation_type)
- return cls(
- id=data.get('id'),
- source_id=data.get('source_id', ''),
- target_id=data.get('target_id', ''),
- relation_type=relation_type,
- weight=data.get('weight', 1.0),
- properties=data.get('properties', {}),
- created_at=datetime.fromisoformat(data['created_at']) if data.get('created_at') else None
- )
- @dataclass
- class GraphEntity:
- """图实体数据模型(扩展的节点模型)"""
- node: GraphNode
- entity_type: str = ""
- confidence: float = 1.0
- source_document: Optional[str] = None
- extraction_method: Optional[str] = None
- def to_dict(self) -> Dict[str, Any]:
- """转换为字典"""
- return {
- 'node': self.node.to_dict(),
- 'entity_type': self.entity_type,
- 'confidence': self.confidence,
- 'source_document': self.source_document,
- 'extraction_method': self.extraction_method
- }
- @classmethod
- def from_dict(cls, data: Dict[str, Any]) -> 'GraphEntity':
- """从字典创建实例"""
- node_data = data.get('node', {})
- node = GraphNode.from_dict(node_data)
- return cls(
- node=node,
- entity_type=data.get('entity_type', ''),
- confidence=data.get('confidence', 1.0),
- source_document=data.get('source_document'),
- extraction_method=data.get('extraction_method')
- )
- @dataclass
- class GraphRelation:
- """图关系数据模型(扩展的边模型)"""
- edge: GraphEdge
- relation_subtype: Optional[str] = None
- confidence: float = 1.0
- source_sentence: Optional[str] = None
- extraction_method: Optional[str] = None
- def to_dict(self) -> Dict[str, Any]:
- """转换为字典"""
- return {
- 'edge': self.edge.to_dict(),
- 'relation_subtype': self.relation_subtype,
- 'confidence': self.confidence,
- 'source_sentence': self.source_sentence,
- 'extraction_method': self.extraction_method
- }
- @classmethod
- def from_dict(cls, data: Dict[str, Any]) -> 'GraphRelation':
- """从字典创建实例"""
- edge_data = data.get('edge', {})
- edge = GraphEdge.from_dict(edge_data)
- return cls(
- edge=edge,
- relation_subtype=data.get('relation_subtype'),
- confidence=data.get('confidence', 1.0),
- source_sentence=data.get('source_sentence'),
- extraction_method=data.get('extraction_method')
- )
- @dataclass
- class KnowledgeGraph:
- """知识图谱数据模型"""
- id: Optional[str] = None
- name: str = ""
- description: Optional[str] = None
- nodes: List[GraphEntity] = None
- relations: List[GraphRelation] = None
- metadata: Optional[Dict[str, Any]] = None
- created_at: Optional[datetime] = None
- updated_at: Optional[datetime] = None
- def __post_init__(self):
- if self.nodes is None:
- self.nodes = []
- if self.relations is None:
- self.relations = []
- if self.metadata is None:
- self.metadata = {}
- def to_dict(self) -> Dict[str, Any]:
- """转换为字典"""
- return {
- 'id': self.id,
- 'name': self.name,
- 'description': self.description,
- 'nodes': [node.to_dict() for node in self.nodes],
- 'relations': [relation.to_dict() for relation in self.relations],
- 'metadata': self.metadata,
- 'created_at': self.created_at.isoformat() if self.created_at else None,
- 'updated_at': self.updated_at.isoformat() if self.updated_at else None
- }
- @classmethod
- def from_dict(cls, data: Dict[str, Any]) -> 'KnowledgeGraph':
- """从字典创建实例"""
- nodes_data = data.get('nodes', [])
- relations_data = data.get('relations', [])
- nodes = [GraphEntity.from_dict(node_data) for node_data in nodes_data]
- relations = [GraphRelation.from_dict(relation_data) for relation_data in relations_data]
- return cls(
- id=data.get('id'),
- name=data.get('name', ''),
- description=data.get('description'),
- nodes=nodes,
- relations=relations,
- metadata=data.get('metadata', {}),
- created_at=datetime.fromisoformat(data['created_at']) if data.get('created_at') else None,
- updated_at=datetime.fromisoformat(data['updated_at']) if data.get('updated_at') else None
- )
- __all__ = [
- "NodeType",
- "RelationType",
- "GraphNode",
- "GraphEdge",
- "GraphEntity",
- "GraphRelation",
- "KnowledgeGraph"
- ]
|