本设计文档描述了标注平台对外开放API及内部配套管理功能的技术实现方案。主要包括:
/api/external/ 前缀,供样本中心等外部系统调用外部系统通过管理员Token调用对外接口,实现项目初始化、进度查询和数据导出功能。管理员通过内部界面完成项目配置和任务分发。
graph TB
subgraph "外部系统"
SC[样本中心]
end
subgraph "标注平台"
subgraph "Frontend"
PM[项目管理页面]
PC[项目配置页面]
TD[任务分发页面]
end
subgraph "API Layer"
EA[External API Router<br/>/api/external/*]
PA[Project API Router<br/>/api/projects/*]
TA[Task API Router<br/>/api/tasks/*]
end
subgraph "Middleware"
AM[Auth Middleware]
end
subgraph "Services"
ES[External Service]
PS[Project Service]
TS[Task Service]
AS[Assignment Service]
EXS[Export Service]
end
subgraph "Data Layer"
DB[(Database)]
end
end
SC -->|Admin Token| EA
PM --> PA
PC --> PA
TD --> TA
EA --> AM
PA --> AM
TA --> AM
AM --> ES
AM --> PS
AM --> TS
ES --> PS
ES --> TS
ES --> EXS
TS --> AS
PS --> DB
TS --> DB
AS --> DB
EXS --> DB
/api/external/ # 对外API
├── /projects/init POST - 项目初始化
├── /projects/{id}/progress GET - 进度查询
└── /projects/{id}/export POST - 数据导出
/api/projects/ # 内部项目API(扩展)
├── /{id}/config PUT - 更新项目配置
├── /{id}/status PUT - 更新项目状态
└── /{id}/dispatch POST - 一键任务分发
/api/tasks/ # 内部任务API(扩展)
├── /preview-assignment POST - 预览任务分配
└── /batch-assign POST - 批量分配(已有)
stateDiagram-v2
[*] --> draft: 外部系统创建项目
draft --> configuring: 管理员开始配置
configuring --> ready: 管理员完成配置
ready --> in_progress: 管理员分发任务
in_progress --> completed: 所有任务完成
configuring --> draft: 重置配置
ready --> configuring: 修改配置
routers/external.py)负责处理所有对外API请求的路由模块。
from fastapi import APIRouter, HTTPException, status, Request
from schemas.external import (
ProjectInitRequest, ProjectInitResponse,
ProgressResponse, ExternalExportRequest, ExternalExportResponse
)
from services.external_service import ExternalService
router = APIRouter(
prefix="/api/external",
tags=["external"]
)
services/external_service.py)封装对外API的业务逻辑。
class ExternalService:
@staticmethod
def init_project(request: ProjectInitRequest, user_id: str) -> ProjectInitResponse:
"""初始化项目并创建任务"""
pass
@staticmethod
def get_project_progress(project_id: str) -> ProgressResponse:
"""获取项目进度"""
pass
@staticmethod
def export_project_data(project_id: str, request: ExternalExportRequest) -> ExternalExportResponse:
"""导出项目数据"""
pass
services/assignment_service.py)封装任务分配的业务逻辑。
class AssignmentService:
@staticmethod
def preview_assignment(
project_id: str,
user_ids: List[str]
) -> AssignmentPreview:
"""预览任务分配结果"""
pass
@staticmethod
def dispatch_tasks(
project_id: str,
user_ids: List[str],
mode: str = "equal"
) -> DispatchResult:
"""执行一键任务分发"""
pass
@staticmethod
def get_annotator_workload(user_id: str) -> AnnotatorWorkload:
"""获取标注人员当前工作负载"""
pass
schemas/external.py)class TaskType(str, Enum):
TEXT_CLASSIFICATION = "text_classification"
IMAGE_CLASSIFICATION = "image_classification"
OBJECT_DETECTION = "object_detection"
NER = "ner"
class TaskDataItem(BaseModel):
"""单个任务数据项"""
id: Optional[str] = None # 外部系统的数据ID
content: str # 文本内容或图像URL
metadata: Optional[dict] = None # 额外元数据
class ProjectInitRequest(BaseModel):
"""项目初始化请求"""
name: str
description: Optional[str] = ""
task_type: TaskType
data: List[TaskDataItem]
config: Optional[str] = None # 自定义XML配置,为空则使用默认空模板
external_id: Optional[str] = None # 外部系统的项目ID,用于关联
class ProjectInitResponse(BaseModel):
"""项目初始化响应"""
project_id: str # 标注平台的项目ID,样本中心需保存用于后续回调
project_name: str
task_count: int
status: str # "draft"
created_at: datetime
config: str # 实际使用的XML配置模板
external_id: Optional[str] = None # 样本中心传入的外部ID(如有)
class AnnotatorProgress(BaseModel):
"""标注人员进度"""
user_id: str
username: str
assigned_count: int
completed_count: int
in_progress_count: int
completion_rate: float
class ProgressResponse(BaseModel):
"""项目进度响应"""
project_id: str
project_name: str
total_tasks: int
completed_tasks: int
in_progress_tasks: int
pending_tasks: int
completion_percentage: float
annotators: List[AnnotatorProgress]
last_updated: datetime
class ExternalExportFormat(str, Enum):
JSON = "json"
CSV = "csv"
SHAREGPT = "sharegpt" # ShareGPT对话格式
YOLO = "yolo" # YOLO目标检测格式
COCO = "coco" # COCO数据集格式
ALPACA = "alpaca" # Alpaca指令微调格式
class ExternalExportRequest(BaseModel):
"""导出请求"""
format: ExternalExportFormat = ExternalExportFormat.JSON
completed_only: bool = True # 是否只导出已完成的任务
callback_url: Optional[str] = None # 回调URL,导出完成后通知样本中心
class ExportedTaskData(BaseModel):
"""导出的任务数据"""
task_id: str
external_id: Optional[str] # 外部系统的数据ID
original_data: dict
annotations: List[dict]
status: str
annotator: Optional[str]
completed_at: Optional[datetime]
class ExternalExportResponse(BaseModel):
"""导出响应"""
project_id: str
format: str
total_exported: int
file_url: str # 导出文件的下载URL
file_name: str # 文件名
file_size: Optional[int] = None # 文件大小(字节)
expires_at: Optional[datetime] = None # 下载链接过期时间
class ExportCallbackPayload(BaseModel):
"""导出完成回调载荷"""
project_id: str
export_id: str
status: str # "completed" 或 "failed"
format: str
total_exported: int
file_url: str
file_name: str
file_size: int
error_message: Optional[str] = None
schemas/project.py 扩展)class ProjectStatus(str, Enum):
DRAFT = "draft"
CONFIGURING = "configuring"
READY = "ready"
IN_PROGRESS = "in_progress"
COMPLETED = "completed"
class ProjectSource(str, Enum):
INTERNAL = "internal"
EXTERNAL = "external"
class ProjectStatusUpdate(BaseModel):
"""项目状态更新请求"""
status: ProjectStatus
class ProjectConfigUpdate(BaseModel):
"""项目配置更新请求"""
config: str # XML配置
labels: Optional[List[LabelConfig]] = None
class LabelConfig(BaseModel):
"""标签配置"""
name: str
color: Optional[str] = None
hotkey: Optional[str] = None
class DispatchRequest(BaseModel):
"""一键分发请求"""
user_ids: List[str]
mode: str = "equal" # equal 或 round_robin
class AssignmentPreviewRequest(BaseModel):
"""分配预览请求"""
user_ids: List[str]
class AnnotatorAssignment(BaseModel):
"""单个标注人员的分配信息"""
user_id: str
username: str
task_count: int
percentage: float
current_workload: int # 当前已有任务数
class AssignmentPreviewResponse(BaseModel):
"""分配预览响应"""
project_id: str
total_tasks: int
assignments: List[AnnotatorAssignment]
class DispatchResponse(BaseModel):
"""分发结果响应"""
project_id: str
success: bool
total_assigned: int
assignments: List[AnnotatorAssignment]
project_status: str # 更新后的项目状态
class ProjectResponseExtended(BaseModel):
"""扩展的项目响应(包含状态和来源)"""
id: str
name: str
description: str
config: str
task_type: Optional[str] = None
status: ProjectStatus = ProjectStatus.DRAFT
source: ProjectSource = ProjectSource.INTERNAL
created_at: datetime
updated_at: Optional[datetime] = None
task_count: int = 0
completed_task_count: int = 0
assigned_task_count: int = 0
需要在 projects 表中添加以下字段:
ALTER TABLE projects ADD COLUMN status VARCHAR(20) DEFAULT 'draft';
ALTER TABLE projects ADD COLUMN source VARCHAR(20) DEFAULT 'internal';
ALTER TABLE projects ADD COLUMN task_type VARCHAR(50);
ALTER TABLE projects ADD COLUMN updated_at TIMESTAMP;
ALTER TABLE projects ADD COLUMN external_id VARCHAR(100); -- 外部系统的项目ID
# 默认配置模板(不含标签,由管理员后续配置)
DEFAULT_CONFIGS = {
"text_classification": """
<View>
<Text name="text" value="$text"/>
<Choices name="label" toName="text" choice="single">
<!-- 标签由管理员配置 -->
</Choices>
</View>
""",
"image_classification": """
<View>
<Image name="image" value="$image"/>
<Choices name="label" toName="image" choice="single">
<!-- 标签由管理员配置 -->
</Choices>
</View>
""",
"object_detection": """
<View>
<Image name="image" value="$image"/>
<RectangleLabels name="label" toName="image">
<!-- 标签由管理员配置 -->
</RectangleLabels>
</View>
""",
"ner": """
<View>
<Text name="text" value="$text"/>
<Labels name="label" toName="text">
<!-- 标签由管理员配置 -->
</Labels>
</View>
"""
}
根据任务类型,任务数据的存储格式:
# 文本分类/NER
{
"items": [
{
"id": "item_001",
"external_id": "ext_123",
"text": "这是一段待标注的文本"
}
]
}
# 图像分类/目标检测
{
"items": [
{
"id": "item_001",
"external_id": "ext_123",
"image": "https://example.com/image.jpg"
}
]
}
A property is a characteristic or behavior that should hold true across all valid executions of a system-essentially, a formal statement about what the system should do. Properties serve as the bridge between human-readable specifications and machine-verifiable correctness guarantees.
For any API请求,如果提供的Token无效或过期,系统应返回401未授权错误;如果Token有效但用户不是管理员,系统应返回403禁止访问错误。
Validates: Requirements 1.5, 2.6, 3.8, 4.2, 4.4, 4.5, 4.6
For any 有效的项目初始化请求,创建的任务数量应等于请求中提供的数据项数量,且返回的响应应包含有效的项目ID和正确的任务计数。
Validates: Requirements 1.2, 1.3, 1.4
For any 项目,返回的完成百分比应等于已完成任务数除以总任务数,且总任务数应等于已完成、进行中和待处理任务数之和。
Validates: Requirements 2.2, 2.3
For any 项目进度查询,返回的标注人员统计中,每个人员的任务总数(assigned_count)应等于其completed_count + in_progress_count + pending_count。
Validates: Requirements 2.4
For any 导出请求,如果指定completed_only=true,则导出的所有任务状态都应为"completed";如果指定completed_only=false,则导出的任务数量应等于项目的总任务数。
Validates: Requirements 3.5, 3.6
For any 导出的任务数据,应包含原始数据(original_data)和对应的标注结果(annotations),且original_data应与创建时的输入数据一致。
Validates: Requirements 3.4
For any 项目初始化请求,系统应根据task_type生成对应的默认XML配置模板(不含标签),标签由管理员后续配置。
Validates: Requirements 5.5
For any 进度查询或导出请求,如果项目ID不存在,系统应返回404错误。
Validates: Requirements 2.5, 3.7
For any 项目状态更新操作,系统应只允许符合状态流转规则的转换:draft→configuring→ready→in_progress→completed。
Validates: Requirements 10.2, 10.3, 10.4, 10.5, 10.6, 10.7
For any 一键分发操作,分配给所有标注人员的任务总数应等于项目的总任务数,且每个人分配的任务数量差异不超过1。
Validates: Requirements 8.6, 8.7
For any 任务分发操作,如果使用相同的参数,预览结果中的任务分配数量应与实际分配结果一致。
Validates: Requirements 9.1, 9.2, 9.5
class ErrorResponse(BaseModel):
"""统一错误响应格式"""
error_code: str
message: str
details: Optional[dict] = None
# 错误码定义
ERROR_CODES = {
"INVALID_TOKEN": "Token无效或已过期",
"PERMISSION_DENIED": "权限不足,需要管理员权限",
"PROJECT_NOT_FOUND": "项目不存在",
"INVALID_REQUEST": "请求参数无效",
"INVALID_TASK_TYPE": "不支持的任务类型",
"INVALID_STATUS_TRANSITION": "无效的状态转换",
"PROJECT_NOT_READY": "项目尚未就绪,无法分发任务",
"NO_TASKS_TO_ASSIGN": "没有可分配的任务",
"NO_USERS_SELECTED": "未选择标注人员",
"EXPORT_FAILED": "导出失败",
"INTERNAL_ERROR": "内部服务器错误"
}
| 错误码 | HTTP状态码 | 说明 |
|---|---|---|
| INVALID_TOKEN | 401 | Token验证失败 |
| PERMISSION_DENIED | 403 | 非管理员用户 |
| PROJECT_NOT_FOUND | 404 | 项目不存在 |
| INVALID_REQUEST | 400 | 请求参数错误 |
| INVALID_TASK_TYPE | 400 | 任务类型不支持 |
| INVALID_STATUS_TRANSITION | 400 | 状态转换不合法 |
| PROJECT_NOT_READY | 400 | 项目未就绪 |
| NO_TASKS_TO_ASSIGN | 400 | 无可分配任务 |
| NO_USERS_SELECTED | 400 | 未选择用户 |
| EXPORT_FAILED | 500 | 导出过程出错 |
| INTERNAL_ERROR | 500 | 服务器内部错误 |
// 项目列表扩展,显示项目状态和来源
interface ProjectListItem {
id: string;
name: string;
description: string;
taskType: string;
status: 'draft' | 'configuring' | 'ready' | 'in_progress' | 'completed';
source: 'internal' | 'external';
taskCount: number;
completedTaskCount: number;
createdAt: string;
}
// 状态筛选器
type StatusFilter = 'all' | 'draft' | 'configuring' | 'ready' | 'in_progress' | 'completed';
// 标签配置组件
interface LabelEditorProps {
labels: LabelConfig[];
onLabelsChange: (labels: LabelConfig[]) => void;
taskType: string;
}
// XML配置预览组件
interface ConfigPreviewProps {
config: string;
onConfigChange: (config: string) => void;
}
// 分发对话框组件
interface DispatchDialogProps {
projectId: string;
totalTasks: number;
onDispatch: (userIds: string[]) => Promise<void>;
}
// 标注人员选择组件
interface AnnotatorSelectorProps {
annotators: AnnotatorInfo[];
selectedIds: string[];
onSelectionChange: (ids: string[]) => void;
}
interface AnnotatorInfo {
id: string;
username: string;
currentWorkload: number; // 当前任务数
completedToday: number; // 今日完成数
}
// 分配预览组件
interface AssignmentPreviewProps {
preview: AssignmentPreviewResponse;
}
使用 hypothesis 库进行属性测试:
# pytest.ini 配置
[pytest]
testpaths = test
python_files = test_*.py
python_functions = test_*
# hypothesis 配置
hypothesis_settings = {
"max_examples": 100,
"deadline": None
}