CRBC-MaaS-Platform-Project
/
LabelingSystem


			
							123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183184185186187188189190191192193194195196197198199200201202203204205206207208209210211212213214215216217218219220221222223224225226227228229230231232233234235236237238239240241242243244245246247248249250251252253254255256257258259260261262263264265266267268269270271272273274275276277278279280281282283284285286287288289290291292293294295296297298299300301302303304305306307308309310311312313314315316317318319320321322323324325326327328329330331332333334335336337338339340341342343344345346347348349350351352353354355356357358359360361362363364365366367368369370371372373374375376377378379380381382383384385386387388389390391392393394395396397398399400401402403404405406407408409410411412413414415416417418419420421422423424425426427428429430431432433434435436437438439440441442443444445446447448449450451452453454455456457458459460461462463464465466467468469470471472473474475476477478479480481482483484485486487488489490491492493494495496497498499500501502503504505506507508509510511512513514515516517518519520521522523524525526527528529530531532533534535536537538539540541542543544545546547548549550551552553554555556557558559560561562563564565566567568569570571572573574575576577578579580581582583584585586587588589590591592593594595596597598599600601602603604605606607608609610611612613614615616617618619620621622623624625626627628629630631632633634635636637638639640641642643644645646647648649650651652653654655656657658659660661662663664665666667668669670671672673674675676677678679680681682683684685686687688689690691692693694695696697698699700701702703704705706707708709710711712713714715716717718719720721722723724725726727728729730731732733734735736737738739740741742743744745746747748749750751752753754755756757758759760761762763764765766767768769770771772773774775776777778779780781782783784785786787788789790791792793794795796797798799800801802803804805806807808809810811812813814815816817818819820821822823824825826827828829830831832833834835836837838839840841842843844845846847848849850851852853854855856857858859860861862863864865866867868869870871872873874875876877878879880881882883884885886887888889890891892893894895896897898899900901902903904905906907908909910911912913914915916917918919920921922923924925926927928929930931932933934935936937938939940941942943944945946947948949950951952953954955956957958959960961962963964965966
							"""
External API Service.
Provides business logic for external system integration.
"""
import uuid
import json
import logging
import random
from datetime import datetime
from typing import Optional, List, Dict, Any

from database import get_db_connection
from schemas.external import (
    TaskType, ProjectInitRequest, ProjectInitResponse,
    ProgressResponse, AnnotatorProgress,
    ExternalExportFormat, ExternalExportRequest, ExternalExportResponse,
    TaskDataItem, TagItem
)

logger = logging.getLogger(__name__)


def generate_random_color() -> str:
    """
    生成随机颜色
    
    Returns:
        str: #RRGGBB 格式的颜色字符串
    """
    return f"#{random.randint(0, 0xFFFFFF):06x}"


# 预定义的颜色列表，用于生成更美观的颜色
PRESET_COLORS = [
    "#FF5733", "#33FF57", "#3357FF", "#FF33F5", "#F5FF33",
    "#33FFF5", "#FF8C33", "#8C33FF", "#33FF8C", "#FF338C",
    "#5733FF", "#57FF33", "#FF3357", "#33F5FF", "#F533FF",
    "#8CFF33", "#338CFF", "#FF338C", "#33FF57", "#5733FF"
]


def get_color_for_tag(index: int, specified_color: Optional[str] = None) -> str:
    """
    获取标签颜色
    
    Args:
        index: 标签索引，用于从预设颜色中选择
        specified_color: 指定的颜色，如果有则直接使用
        
    Returns:
        str: #RRGGBB 格式的颜色字符串
    """
    if specified_color:
        return specified_color
    if index < len(PRESET_COLORS):
        return PRESET_COLORS[index]
    return generate_random_color()


# 默认XML配置模板（不含标签，由管理员后续配置）
DEFAULT_CONFIGS = {
    TaskType.TEXT_CLASSIFICATION: """<View>
  <Text name="text" value="$text"/>
  <Choices name="label" toName="text" choice="single">
    <!-- 标签由管理员配置 -->
  </Choices>
</View>""",
    TaskType.IMAGE_CLASSIFICATION: """<View>
  <Image name="image" value="$image"/>
  <Choices name="label" toName="image" choice="single">
    <!-- 标签由管理员配置 -->
  </Choices>
</View>""",
    TaskType.OBJECT_DETECTION: """<View>
  <Image name="image" value="$image"/>
  <RectangleLabels name="label" toName="image">
    <!-- 标签由管理员配置 -->
  </RectangleLabels>
</View>""",
    TaskType.NER: """<View>
  <Text name="text" value="$text"/>
  <Labels name="label" toName="text">
    <!-- 标签由管理员配置 -->
  </Labels>
</View>""",
    TaskType.POLYGON: """<View>
  <Image name="image" value="$image"/>
  <PolygonLabels name="label" toName="image">
    <!-- 标签由管理员配置 -->
  </PolygonLabels>
</View>"""
}


def generate_config_with_tags(task_type: TaskType, tags: Optional[List[TagItem]] = None) -> str:
    """
    根据任务类型和标签生成XML配置
    
    Args:
        task_type: 任务类型
        tags: 标签列表，可选
        
    Returns:
        str: 生成的XML配置字符串
    """
    if not tags or len(tags) == 0:
        # 没有标签，返回默认配置
        return DEFAULT_CONFIGS.get(task_type, DEFAULT_CONFIGS[TaskType.TEXT_CLASSIFICATION])
    
    # 根据任务类型生成带标签的配置
    if task_type == TaskType.TEXT_CLASSIFICATION:
        labels_xml = "\n".join([
            f'    <Choice value="{tag.tag}" style="background-color: {get_color_for_tag(i, tag.color)}"/>'
            for i, tag in enumerate(tags)
        ])
        return f"""<View>
  <Text name="text" value="$text"/>
  <Choices name="label" toName="text" choice="single">
{labels_xml}
  </Choices>
</View>"""
    
    elif task_type == TaskType.IMAGE_CLASSIFICATION:
        labels_xml = "\n".join([
            f'    <Choice value="{tag.tag}" style="background-color: {get_color_for_tag(i, tag.color)}"/>'
            for i, tag in enumerate(tags)
        ])
        return f"""<View>
  <Image name="image" value="$image"/>
  <Choices name="label" toName="image" choice="single">
{labels_xml}
  </Choices>
</View>"""
    
    elif task_type == TaskType.OBJECT_DETECTION:
        labels_xml = "\n".join([
            f'    <Label value="{tag.tag}" background="{get_color_for_tag(i, tag.color)}"/>'
            for i, tag in enumerate(tags)
        ])
        return f"""<View>
  <Image name="image" value="$image"/>
  <RectangleLabels name="label" toName="image">
{labels_xml}
  </RectangleLabels>
</View>"""
    
    elif task_type == TaskType.NER:
        labels_xml = "\n".join([
            f'    <Label value="{tag.tag}" background="{get_color_for_tag(i, tag.color)}"/>'
            for i, tag in enumerate(tags)
        ])
        return f"""<View>
  <Text name="text" value="$text"/>
  <Labels name="label" toName="text">
{labels_xml}
  </Labels>
</View>"""
    
    elif task_type == TaskType.POLYGON:
        labels_xml = "\n".join([
            f'    <Label value="{tag.tag}" background="{get_color_for_tag(i, tag.color)}"/>'
            for i, tag in enumerate(tags)
        ])
        return f"""<View>
  <Image name="image" value="$image"/>
  <PolygonLabels name="label" toName="image">
{labels_xml}
  </PolygonLabels>
</View>"""
    
    else:
        return DEFAULT_CONFIGS.get(task_type, DEFAULT_CONFIGS[TaskType.TEXT_CLASSIFICATION])


class ExternalService:
    """对外API服务类"""
    
    @staticmethod
    def get_default_config(task_type: TaskType) -> str:
        """获取任务类型对应的默认XML配置"""
        return DEFAULT_CONFIGS.get(task_type, DEFAULT_CONFIGS[TaskType.TEXT_CLASSIFICATION])
    
    @staticmethod
    def init_project(request: ProjectInitRequest, user_id: str) -> ProjectInitResponse:
        """
        初始化项目并创建任务
        
        Args:
            request: 项目初始化请求
            user_id: 创建者用户ID
            
        Returns:
            ProjectInitResponse: 项目初始化响应
        """
        # 生成项目ID
        project_id = f"proj_{uuid.uuid4().hex[:12]}"
        
        # 根据是否有标签生成配置
        if request.tags and len(request.tags) > 0:
            config = generate_config_with_tags(request.task_type, request.tags)
        else:
            config = ExternalService.get_default_config(request.task_type)
        
        with get_db_connection() as conn:
            cursor = conn.cursor()
            
            # 创建项目
            cursor.execute("""
                INSERT INTO projects (id, name, description, config, status, source, task_type, external_id, updated_at)
                VALUES (?, ?, ?, ?, 'draft', 'external', ?, ?, CURRENT_TIMESTAMP)
            """, (
                project_id,
                request.name,
                request.description or "",
                config,
                request.task_type.value,
                request.external_id
            ))
            
            # 创建任务
            task_count = 0
            for i, item in enumerate(request.data):
                task_id = f"task_{uuid.uuid4().hex[:12]}"
                task_name = f"Task {i + 1}"
                
                # 根据任务类型构建数据格式
                if request.task_type in [TaskType.TEXT_CLASSIFICATION, TaskType.NER]:
                    task_data = {
                        "text": item.content,
                        "external_id": item.id,
                        "metadata": item.metadata or {}
                    }
                else:
                    task_data = {
                        "image": item.content,
                        "external_id": item.id,
                        "metadata": item.metadata or {}
                    }
                
                cursor.execute("""
                    INSERT INTO tasks (id, project_id, name, data, status)
                    VALUES (?, ?, ?, ?, 'pending')
                """, (
                    task_id,
                    project_id,
                    task_name,
                    json.dumps(task_data)
                ))
                task_count += 1
            
            # 获取创建时间
            cursor.execute("SELECT created_at FROM projects WHERE id = ?", (project_id,))
            row = cursor.fetchone()
            created_at = row["created_at"] if row else datetime.now()
            
            return ProjectInitResponse(
                project_id=project_id,
                project_name=request.name,
                task_count=task_count,
                status="draft",
                created_at=created_at,
                config=config,
                external_id=request.external_id
            )
    
    @staticmethod
    def get_project_progress(project_id: str) -> Optional[ProgressResponse]:
        """
        获取项目进度
        
        Args:
            project_id: 项目ID
            
        Returns:
            ProgressResponse: 进度响应，如果项目不存在返回None
        """
        with get_db_connection() as conn:
            cursor = conn.cursor()
            
            # 获取项目信息
            cursor.execute("""
                SELECT id, name, status, updated_at
                FROM projects
                WHERE id = ?
            """, (project_id,))
            
            project = cursor.fetchone()
            if not project:
                return None
            
            # 获取任务统计
            cursor.execute("""
                SELECT 
                    COUNT(*) as total,
                    SUM(CASE WHEN status = 'completed' THEN 1 ELSE 0 END) as completed,
                    SUM(CASE WHEN status = 'in_progress' THEN 1 ELSE 0 END) as in_progress,
                    SUM(CASE WHEN status = 'pending' THEN 1 ELSE 0 END) as pending
                FROM tasks
                WHERE project_id = ?
            """, (project_id,))
            
            stats = cursor.fetchone()
            total_tasks = stats["total"] or 0
            completed_tasks = stats["completed"] or 0
            in_progress_tasks = stats["in_progress"] or 0
            pending_tasks = stats["pending"] or 0
            
            # 计算完成百分比
            completion_percentage = 0.0
            if total_tasks > 0:
                completion_percentage = round((completed_tasks / total_tasks) * 100, 2)
            
            # 获取标注人员统计
            cursor.execute("""
                SELECT 
                    t.assigned_to,
                    u.username,
                    COUNT(*) as assigned_count,
                    SUM(CASE WHEN t.status = 'completed' THEN 1 ELSE 0 END) as completed_count,
                    SUM(CASE WHEN t.status = 'in_progress' THEN 1 ELSE 0 END) as in_progress_count
                FROM tasks t
                LEFT JOIN users u ON t.assigned_to = u.id
                WHERE t.project_id = ? AND t.assigned_to IS NOT NULL
                GROUP BY t.assigned_to, u.username
            """, (project_id,))
            
            annotators = []
            for row in cursor.fetchall():
                assigned_count = row["assigned_count"] or 0
                completed_count = row["completed_count"] or 0
                completion_rate = 0.0
                if assigned_count > 0:
                    completion_rate = round((completed_count / assigned_count) * 100, 2)
                
                annotators.append(AnnotatorProgress(
                    user_id=row["assigned_to"] or "",
                    username=row["username"] or "Unknown",
                    assigned_count=assigned_count,
                    completed_count=completed_count,
                    in_progress_count=row["in_progress_count"] or 0,
                    completion_rate=completion_rate
                ))
            
            return ProgressResponse(
                project_id=project_id,
                project_name=project["name"],
                status=project["status"] or "draft",
                total_tasks=total_tasks,
                completed_tasks=completed_tasks,
                in_progress_tasks=in_progress_tasks,
                pending_tasks=pending_tasks,
                completion_percentage=completion_percentage,
                annotators=annotators,
                last_updated=project["updated_at"]
            )
    
    @staticmethod
    def check_project_exists(project_id: str) -> bool:
        """检查项目是否存在"""
        with get_db_connection() as conn:
            cursor = conn.cursor()
            cursor.execute("SELECT id FROM projects WHERE id = ?", (project_id,))
            return cursor.fetchone() is not None


    @staticmethod
    def export_project_data(
        project_id: str,
        request: ExternalExportRequest,
        base_url: str = ""
    ) -> Optional[ExternalExportResponse]:
        """
        导出项目数据
        
        Args:
            project_id: 项目ID
            request: 导出请求
            base_url: 基础URL，用于生成下载链接
            
        Returns:
            ExternalExportResponse: 导出响应，如果项目不存在返回None
        """
        import os
        from datetime import timedelta
        
        with get_db_connection() as conn:
            cursor = conn.cursor()
            
            # 检查项目是否存在
            cursor.execute("SELECT id, name FROM projects WHERE id = ?", (project_id,))
            project = cursor.fetchone()
            if not project:
                return None
            
            # 构建查询条件
            status_filter = ""
            if request.completed_only:
                status_filter = "AND t.status = 'completed'"
            
            # 获取任务和标注数据
            cursor.execute(f"""
                SELECT 
                    t.id as task_id,
                    t.data,
                    t.status,
                    t.assigned_to,
                    u.username as annotator_name,
                    a.id as annotation_id,
                    a.result as annotation_result,
                    a.updated_at as annotation_time
                FROM tasks t
                LEFT JOIN users u ON t.assigned_to = u.id
                LEFT JOIN annotations a ON t.id = a.task_id
                WHERE t.project_id = ? {status_filter}
                ORDER BY t.id
            """, (project_id,))
            
            rows = cursor.fetchall()
            
            # 组织数据
            tasks_data = {}
            for row in rows:
                task_id = row["task_id"]
                if task_id not in tasks_data:
                    task_data = json.loads(row["data"]) if row["data"] else {}
                    tasks_data[task_id] = {
                        "task_id": task_id,
                        "external_id": task_data.get("external_id"),
                        "original_data": task_data,
                        "annotations": [],
                        "status": row["status"],
                        "annotator": row["annotator_name"],
                        "completed_at": None
                    }
                
                if row["annotation_id"]:
                    annotation_result = json.loads(row["annotation_result"]) if row["annotation_result"] else {}
                    tasks_data[task_id]["annotations"].append(annotation_result)
                    if row["annotation_time"]:
                        tasks_data[task_id]["completed_at"] = row["annotation_time"]
            
            # 转换为列表
            export_data = list(tasks_data.values())
            total_exported = len(export_data)
            
            # 生成导出文件
            export_id = f"export_{uuid.uuid4().hex[:12]}"
            timestamp = datetime.now().strftime("%Y%m%d_%H%M%S")
            
            # 确保导出目录存在
            export_dir = os.path.join(os.path.dirname(os.path.dirname(__file__)), "exports")
            os.makedirs(export_dir, exist_ok=True)
            
            # 根据格式生成文件
            file_name, file_content = ExternalService._generate_export_file(
                export_data,
                request.format,
                project_id,
                timestamp
            )
            
            file_path = os.path.join(export_dir, file_name)
            
            # 写入文件
            if isinstance(file_content, str):
                with open(file_path, 'w', encoding='utf-8') as f:
                    f.write(file_content)
            else:
                with open(file_path, 'wb') as f:
                    f.write(file_content)
            
            file_size = os.path.getsize(file_path)
            
            # 记录导出任务
            cursor.execute("""
                INSERT INTO export_jobs (id, project_id, format, status, status_filter, file_path, total_tasks, exported_tasks, completed_at)
                VALUES (?, ?, ?, 'completed', ?, ?, ?, ?, CURRENT_TIMESTAMP)
            """, (
                export_id,
                project_id,
                request.format.value,
                'completed' if request.completed_only else 'all',
                file_path,
                total_exported,
                total_exported
            ))
            
            # 计算过期时间（7天后）
            expires_at = datetime.now() + timedelta(days=7)
            
            return ExternalExportResponse(
                project_id=project_id,
                format=request.format.value,
                total_exported=total_exported,
                file_url=f"/api/exports/{export_id}/download",
                file_name=file_name,
                file_size=file_size,
                expires_at=expires_at
            )
    
    @staticmethod
    def _generate_export_file(
        data: List[Dict],
        format: ExternalExportFormat,
        project_id: str,
        timestamp: str
    ) -> tuple:
        """
        根据格式生成导出文件
        
        Returns:
            tuple: (文件名, 文件内容)
        """
        if format == ExternalExportFormat.JSON:
            return ExternalService._export_json(data, project_id, timestamp)
        elif format == ExternalExportFormat.CSV:
            return ExternalService._export_csv(data, project_id, timestamp)
        elif format == ExternalExportFormat.SHAREGPT:
            return ExternalService._export_sharegpt(data, project_id, timestamp)
        elif format == ExternalExportFormat.YOLO:
            return ExternalService._export_yolo(data, project_id, timestamp)
        elif format == ExternalExportFormat.COCO:
            return ExternalService._export_coco(data, project_id, timestamp)
        elif format == ExternalExportFormat.ALPACA:
            return ExternalService._export_alpaca(data, project_id, timestamp)
        elif format == ExternalExportFormat.PASCAL_VOC:
            return ExternalService._export_pascal_voc(data, project_id, timestamp)
        else:
            return ExternalService._export_json(data, project_id, timestamp)
    
    @staticmethod
    def _export_json(data: List[Dict], project_id: str, timestamp: str) -> tuple:
        """导出JSON格式"""
        file_name = f"export_{project_id}_{timestamp}.json"
        content = json.dumps(data, ensure_ascii=False, indent=2)
        return file_name, content
    
    @staticmethod
    def _export_csv(data: List[Dict], project_id: str, timestamp: str) -> tuple:
        """导出CSV格式"""
        import csv
        import io
        
        file_name = f"export_{project_id}_{timestamp}.csv"
        
        output = io.StringIO()
        writer = csv.writer(output)
        
        # 写入表头
        writer.writerow(['task_id', 'external_id', 'status', 'annotator', 'original_data', 'annotations'])
        
        # 写入数据
        for item in data:
            writer.writerow([
                item.get('task_id', ''),
                item.get('external_id', ''),
                item.get('status', ''),
                item.get('annotator', ''),
                json.dumps(item.get('original_data', {}), ensure_ascii=False),
                json.dumps(item.get('annotations', []), ensure_ascii=False)
            ])
        
        return file_name, output.getvalue()
    
    @staticmethod
    def _export_sharegpt(data: List[Dict], project_id: str, timestamp: str) -> tuple:
        """导出ShareGPT对话格式"""
        file_name = f"export_{project_id}_sharegpt_{timestamp}.json"
        
        conversations = []
        for item in data:
            original = item.get('original_data', {})
            annotations = item.get('annotations', [])
            
            # 获取原始文本
            text = original.get('text', original.get('image', ''))
            
            # 获取标注结果
            label = ""
            if annotations:
                for ann in annotations:
                    if isinstance(ann, list):
                        for a in ann:
                            if 'value' in a and 'choices' in a['value']:
                                label = ', '.join(a['value']['choices'])
                                break
                    elif isinstance(ann, dict):
                        if 'value' in ann and 'choices' in ann['value']:
                            label = ', '.join(ann['value']['choices'])
            
            if text and label:
                conversations.append({
                    "conversations": [
                        {"from": "human", "value": text},
                        {"from": "gpt", "value": label}
                    ]
                })
        
        content = json.dumps(conversations, ensure_ascii=False, indent=2)
        return file_name, content
    
    @staticmethod
    def _export_yolo(data: List[Dict], project_id: str, timestamp: str) -> tuple:
        """导出YOLO格式（简化版，返回JSON描述）"""
        file_name = f"export_{project_id}_yolo_{timestamp}.json"
        
        yolo_data = []
        for item in data:
            original = item.get('original_data', {})
            annotations = item.get('annotations', [])
            
            image_url = original.get('image', '')
            boxes = []
            polygons = []
            
            for ann in annotations:
                if isinstance(ann, list):
                    for a in ann:
                        if a.get('type') == 'rectanglelabels':
                            value = a.get('value', {})
                            boxes.append({
                                "label": value.get('rectanglelabels', [''])[0],
                                "x": value.get('x', 0) / 100,
                                "y": value.get('y', 0) / 100,
                                "width": value.get('width', 0) / 100,
                                "height": value.get('height', 0) / 100
                            })
                        elif a.get('type') == 'polygonlabels':
                            value = a.get('value', {})
                            points = value.get('points', [])
                            # 将点坐标归一化到0-1范围
                            normalized_points = [[p[0] / 100, p[1] / 100] for p in points]
                            polygons.append({
                                "label": value.get('polygonlabels', [''])[0],
                                "points": normalized_points
                            })
            
            if image_url:
                entry = {"image": image_url}
                if boxes:
                    entry["boxes"] = boxes
                if polygons:
                    entry["polygons"] = polygons
                yolo_data.append(entry)
        
        content = json.dumps(yolo_data, ensure_ascii=False, indent=2)
        return file_name, content
    
    @staticmethod
    def _export_coco(data: List[Dict], project_id: str, timestamp: str) -> tuple:
        """导出COCO格式"""
        file_name = f"export_{project_id}_coco_{timestamp}.json"
        
        coco_data = {
            "images": [],
            "annotations": [],
            "categories": []
        }
        
        category_map = {}
        annotation_id = 1
        
        for idx, item in enumerate(data):
            original = item.get('original_data', {})
            annotations = item.get('annotations', [])
            
            image_url = original.get('image', '')
            
            # 添加图像
            coco_data["images"].append({
                "id": idx + 1,
                "file_name": image_url,
                "width": 0,
                "height": 0
            })
            
            # 处理标注
            for ann in annotations:
                if isinstance(ann, list):
                    for a in ann:
                        ann_type = a.get('type', '')
                        value = a.get('value', {})
                        
                        if ann_type == 'rectanglelabels':
                            label = value.get('rectanglelabels', [''])[0]
                            
                            # 添加类别
                            if label and label not in category_map:
                                cat_id = len(category_map) + 1
                                category_map[label] = cat_id
                                coco_data["categories"].append({
                                    "id": cat_id,
                                    "name": label
                                })
                            
                            if label:
                                coco_data["annotations"].append({
                                    "id": annotation_id,
                                    "image_id": idx + 1,
                                    "category_id": category_map.get(label, 0),
                                    "bbox": [
                                        value.get('x', 0),
                                        value.get('y', 0),
                                        value.get('width', 0),
                                        value.get('height', 0)
                                    ],
                                    "area": value.get('width', 0) * value.get('height', 0),
                                    "iscrowd": 0
                                })
                                annotation_id += 1
                        
                        elif ann_type == 'polygonlabels':
                            label = value.get('polygonlabels', [''])[0]
                            points = value.get('points', [])
                            
                            # 添加类别
                            if label and label not in category_map:
                                cat_id = len(category_map) + 1
                                category_map[label] = cat_id
                                coco_data["categories"].append({
                                    "id": cat_id,
                                    "name": label
                                })
                            
                            if label and points:
                                # 将点列表转换为COCO segmentation格式 [x1, y1, x2, y2, ...]
                                segmentation = []
                                for p in points:
                                    segmentation.extend([p[0], p[1]])
                                
                                # 计算边界框
                                x_coords = [p[0] for p in points]
                                y_coords = [p[1] for p in points]
                                x_min, x_max = min(x_coords), max(x_coords)
                                y_min, y_max = min(y_coords), max(y_coords)
                                width = x_max - x_min
                                height = y_max - y_min
                                
                                # 计算面积（使用鞋带公式）
                                n = len(points)
                                area = 0
                                for i in range(n):
                                    j = (i + 1) % n
                                    area += points[i][0] * points[j][1]
                                    area -= points[j][0] * points[i][1]
                                area = abs(area) / 2
                                
                                coco_data["annotations"].append({
                                    "id": annotation_id,
                                    "image_id": idx + 1,
                                    "category_id": category_map.get(label, 0),
                                    "segmentation": [segmentation],
                                    "bbox": [x_min, y_min, width, height],
                                    "area": area,
                                    "iscrowd": 0
                                })
                                annotation_id += 1
        
        content = json.dumps(coco_data, ensure_ascii=False, indent=2)
        return file_name, content
    
    @staticmethod
    def _export_alpaca(data: List[Dict], project_id: str, timestamp: str) -> tuple:
        """导出Alpaca指令微调格式"""
        file_name = f"export_{project_id}_alpaca_{timestamp}.json"
        
        alpaca_data = []
        for item in data:
            original = item.get('original_data', {})
            annotations = item.get('annotations', [])
            
            # 获取原始文本
            text = original.get('text', '')
            
            # 获取标注结果
            label = ""
            if annotations:
                for ann in annotations:
                    if isinstance(ann, list):
                        for a in ann:
                            if 'value' in a and 'choices' in a['value']:
                                label = ', '.join(a['value']['choices'])
                                break
                    elif isinstance(ann, dict):
                        if 'value' in ann and 'choices' in ann['value']:
                            label = ', '.join(ann['value']['choices'])
            
            if text:
                alpaca_data.append({
                    "instruction": "请对以下文本进行分类",
                    "input": text,
                    "output": label or "未标注"
                })
        
        content = json.dumps(alpaca_data, ensure_ascii=False, indent=2)
        return file_name, content

    @staticmethod
    def _export_pascal_voc(data: List[Dict], project_id: str, timestamp: str) -> tuple:
        """
        导出PascalVOC XML格式
        
        PascalVOC格式是一种常用的目标检测数据集格式，每张图片对应一个XML文件。
        由于我们需要返回单个文件，这里返回一个包含所有标注的JSON文件，
        其中每个条目包含对应的PascalVOC XML内容。
        """
        file_name = f"export_{project_id}_pascal_voc_{timestamp}.json"
        
        voc_data = []
        
        for idx, item in enumerate(data):
            original = item.get('original_data', {})
            annotations = item.get('annotations', [])
            
            image_url = original.get('image', '')
            # 从URL中提取文件名
            image_filename = image_url.split('/')[-1] if image_url else f"image_{idx + 1}.jpg"
            
            # 获取图像尺寸（如果有的话）
            img_width = original.get('width', 0)
            img_height = original.get('height', 0)
            
            objects = []
            
            # 处理标注
            for ann in annotations:
                if isinstance(ann, list):
                    for a in ann:
                        ann_type = a.get('type', '')
                        value = a.get('value', {})
                        
                        if ann_type == 'rectanglelabels':
                            label = value.get('rectanglelabels', [''])[0]
                            if label:
                                # 转换百分比坐标为像素坐标
                                x_pct = value.get('x', 0)
                                y_pct = value.get('y', 0)
                                w_pct = value.get('width', 0)
                                h_pct = value.get('height', 0)
                                
                                # 如果有图像尺寸，转换为像素；否则保持百分比
                                if img_width > 0 and img_height > 0:
                                    xmin = int(x_pct * img_width / 100)
                                    ymin = int(y_pct * img_height / 100)
                                    xmax = int((x_pct + w_pct) * img_width / 100)
                                    ymax = int((y_pct + h_pct) * img_height / 100)
                                else:
                                    xmin = x_pct
                                    ymin = y_pct
                                    xmax = x_pct + w_pct
                                    ymax = y_pct + h_pct
                                
                                objects.append({
                                    "name": label,
                                    "pose": "Unspecified",
                                    "truncated": 0,
                                    "difficult": 0,
                                    "bndbox": {
                                        "xmin": xmin,
                                        "ymin": ymin,
                                        "xmax": xmax,
                                        "ymax": ymax
                                    }
                                })
                        
                        elif ann_type == 'polygonlabels':
                            label = value.get('polygonlabels', [''])[0]
                            points = value.get('points', [])
                            
                            if label and points:
                                # 计算边界框
                                x_coords = [p[0] for p in points]
                                y_coords = [p[1] for p in points]
                                
                                if img_width > 0 and img_height > 0:
                                    xmin = int(min(x_coords) * img_width / 100)
                                    ymin = int(min(y_coords) * img_height / 100)
                                    xmax = int(max(x_coords) * img_width / 100)
                                    ymax = int(max(y_coords) * img_height / 100)
                                else:
                                    xmin = min(x_coords)
                                    ymin = min(y_coords)
                                    xmax = max(x_coords)
                                    ymax = max(y_coords)
                                
                                # 转换多边形点坐标
                                if img_width > 0 and img_height > 0:
                                    polygon_points = [[int(p[0] * img_width / 100), int(p[1] * img_height / 100)] for p in points]
                                else:
                                    polygon_points = points
                                
                                objects.append({
                                    "name": label,
                                    "pose": "Unspecified",
                                    "truncated": 0,
                                    "difficult": 0,
                                    "bndbox": {
                                        "xmin": xmin,
                                        "ymin": ymin,
                                        "xmax": xmax,
                                        "ymax": ymax
                                    },
                                    "polygon": polygon_points
                                })
            
            # 生成PascalVOC XML内容
            xml_content = ExternalService._generate_voc_xml(
                image_filename,
                img_width or 0,
                img_height or 0,
                objects
            )
            
            voc_data.append({
                "image": image_url,
                "filename": image_filename,
                "xml_content": xml_content,
                "objects": objects
            })
        
        content = json.dumps(voc_data, ensure_ascii=False, indent=2)
        return file_name, content
    
    @staticmethod
    def _generate_voc_xml(filename: str, width: int, height: int, objects: List[Dict]) -> str:
        """生成PascalVOC格式的XML字符串"""
        xml_lines = [
            '<?xml version="1.0" encoding="UTF-8"?>',
            '<annotation>',
            f'  <filename>{filename}</filename>',
            '  <source>',
            '    <database>Annotation Platform</database>',
            '  </source>',
            '  <size>',
            f'    <width>{width}</width>',
            f'    <height>{height}</height>',
            '    <depth>3</depth>',
            '  </size>',
            '  <segmented>0</segmented>'
        ]
        
        for obj in objects:
            xml_lines.append('  <object>')
            xml_lines.append(f'    <name>{obj["name"]}</name>')
            xml_lines.append(f'    <pose>{obj.get("pose", "Unspecified")}</pose>')
            xml_lines.append(f'    <truncated>{obj.get("truncated", 0)}</truncated>')
            xml_lines.append(f'    <difficult>{obj.get("difficult", 0)}</difficult>')
            xml_lines.append('    <bndbox>')
            xml_lines.append(f'      <xmin>{obj["bndbox"]["xmin"]}</xmin>')
            xml_lines.append(f'      <ymin>{obj["bndbox"]["ymin"]}</ymin>')
            xml_lines.append(f'      <xmax>{obj["bndbox"]["xmax"]}</xmax>')
            xml_lines.append(f'      <ymax>{obj["bndbox"]["ymax"]}</ymax>')
            xml_lines.append('    </bndbox>')
            
            # 如果有多边形数据，也添加进去
            if 'polygon' in obj:
                xml_lines.append('    <polygon>')
                for point in obj['polygon']:
                    xml_lines.append(f'      <pt><x>{point[0]}</x><y>{point[1]}</y></pt>')
                xml_lines.append('    </polygon>')
            
            xml_lines.append('  </object>')
        
        xml_lines.append('</annotation>')
        
        return '\n'.join(xml_lines)