ソースを参照

新增部署功能,修复知识库字段不匹配问题

lxylxy123321 2 日 前
コミット
c6950e60fb

+ 3 - 1
CLAUDE.md

@@ -46,4 +46,6 @@ docker stop finetune-trainer && docker rename finetune-trainer finetune-trainer-
 ```bash
 docker exec -it finetune-trainer /opt/conda/bin/pip install peft trl accelerate bitsandbytes datasets
 docker exec -it finetune-trainer /opt/conda/bin/pip install --no-deps --upgrade transformers huggingface-hub
-```
+```
+
+> **注意**: 253 容器不需要安装 fastapi/uvicorn。推理 worker(inference_worker.py)只用 Python 标准库 + torch/transformers,API 代理由 151 主节点提供。

+ 56 - 0
backend/app/api/api_keys.py

@@ -0,0 +1,56 @@
+"""API Key 管理端点。
+
+路由前缀: /api/v1/api-keys
+"""
+from fastapi import APIRouter, Depends, HTTPException
+from pydantic import BaseModel
+
+from app.core.auth import get_current_user
+from app.services import api_key_service
+
+router = APIRouter()
+
+
+class CreateApiKeyRequest(BaseModel):
+    name: str = "default"
+
+
+class ApiKeyResponse(BaseModel):
+    id: str
+    key: str       # 创建时返回完整 key,列表时返回 masked
+    name: str
+    status: str | None = None
+    last_used_at: str | None = None
+    created_at: str | None = None
+
+
+@router.post("/", response_model=ApiKeyResponse)
+async def create_key(
+    req: CreateApiKeyRequest,
+    current_user: dict = Depends(get_current_user),
+):
+    """创建新的 API Key(完整 key 仅返回一次)。"""
+    user_id = current_user.get("sub")
+    result = await api_key_service.create_api_key(user_id, req.name)
+    return ApiKeyResponse(**result)
+
+
+@router.get("/", response_model=list[ApiKeyResponse])
+async def list_keys(current_user: dict = Depends(get_current_user)):
+    """列出当前用户的所有 API Key。"""
+    user_id = current_user.get("sub")
+    keys = await api_key_service.list_api_keys(user_id)
+    return [ApiKeyResponse(**k) for k in keys]
+
+
+@router.delete("/{key_id}")
+async def revoke_key(
+    key_id: str,
+    current_user: dict = Depends(get_current_user),
+):
+    """吊销指定的 API Key。"""
+    user_id = current_user.get("sub")
+    result = await api_key_service.revoke_api_key(key_id, user_id)
+    if "error" in result:
+        raise HTTPException(status_code=404, detail=result["error"])
+    return result

+ 252 - 5
backend/app/api/deployment.py

@@ -1,23 +1,270 @@
-from fastapi import APIRouter
+"""部署 API —— 导出、在线服务管理、OpenAI 兼容代理。
 
-from app.schemas.deployment import DeployConfig, DeployResponse
-from app.services import deploy_service
+路由前缀:
+  /api/v1/deployment         - 管理端点(JWT 认证)
+  /api/v1/deployment/proxy   - 代理端点(API Key 认证)
 
+代理路由:
+    POST /proxy/{task_id}/v1/chat/completions   - OpenAI 兼容聊天补全
+    POST /proxy/{task_id}/v1/completions         - OpenAI 兼容文本补全
+    GET  /proxy/{task_id}/v1/models              - 模型列表
+    GET  /proxy/{task_id}/health                 - 健康检查
+"""
+import time
+import uuid
+
+from fastapi import APIRouter, Depends, HTTPException, Request
+from fastapi.responses import JSONResponse
+
+from app.core.auth import get_current_user
+from app.schemas.deployment import (
+    DeployConfig, DeployResponse, DeployServeConfig, DeployedServiceInfo,
+)
+from app.services import api_key_service, deploy_service
+
+# 管理端点(需要 JWT 登录)
 router = APIRouter()
 
+# 代理端点(需要 API Key,不需要 JWT)
+proxy_router = APIRouter()
+
+
+# ---------------------------------------------------------------------------
+#  API Key 验证(代理端点专用)
+# ---------------------------------------------------------------------------
+
+async def _extract_api_key(request: Request) -> str | None:
+    """从 Authorization: Bearer sk-xxx 提取 API Key。"""
+    auth_header = request.headers.get("Authorization", "")
+    if auth_header.startswith("Bearer "):
+        token = auth_header[7:].strip()
+        if token.startswith("sk-"):
+            return token
+    return None
+
+
+async def _validate_proxy_auth(task_id: str, request: Request) -> None:
+    """验证代理请求的 API Key,并检查用户是否拥有该部署任务。"""
+    api_key = await _extract_api_key(request)
+    if not api_key:
+        raise HTTPException(
+            status_code=401,
+            detail={"error": {"message": "Missing API key. Use Authorization: Bearer sk-xxx", "type": "auth_error"}},
+        )
+
+    key_info = await api_key_service.validate_api_key(api_key)
+    if not key_info:
+        raise HTTPException(
+            status_code=401,
+            detail={"error": {"message": "Invalid or revoked API key", "type": "auth_error"}},
+        )
+
+    # 检查用户是否拥有该部署任务
+    if not await api_key_service.check_deploy_ownership(task_id, key_info["user_id"]):
+        raise HTTPException(
+            status_code=403,
+            detail={"error": {"message": "Access denied: you do not own this deployment", "type": "permission_error"}},
+        )
+
+
+# ---------------------------------------------------------------------------
+#  管理端点(JWT 认证)
+# ---------------------------------------------------------------------------
 
 @router.post("/export", response_model=DeployResponse)
-async def export_adapter(config: DeployConfig):
-    """启动导出后台任务,立即返回 task_id。"""
+async def export_adapter(
+    config: DeployConfig,
+    current_user: dict = Depends(get_current_user),
+):
+    """启动导出后台任务(导出模型文件),立即返回 task_id。"""
+    user_id = current_user.get("sub")
     result = await deploy_service.export_adapter(
         config.job_id,
         {"merge_with_base": config.merge_with_base, "export_format": config.export_format},
+        user_id=user_id,
     )
     return DeployResponse(**result)
 
 
+@router.post("/serve", response_model=DeployResponse)
+async def serve_model(
+    config: DeployServeConfig,
+    current_user: dict = Depends(get_current_user),
+):
+    """部署为在线推理服务(OpenAI 兼容 API)。
+
+    151 提供代理 API,253 运行纯推理 worker。
+    启动后通过 base_url 调用 /v1/chat/completions 等接口。
+    """
+    user_id = current_user.get("sub")
+    try:
+        result = await deploy_service.start_serving(
+            config.job_id,
+            {"merge_with_base": config.merge_with_base, "port": config.port, "host": config.host},
+            user_id=user_id,
+        )
+        return DeployResponse(**result)
+    except RuntimeError as e:
+        raise HTTPException(status_code=400, detail=str(e))
+
+
+@router.get("/services", response_model=list[DeployedServiceInfo])
+async def list_services(current_user: dict = Depends(get_current_user)):
+    """列出当前用户已部署的在线服务。"""
+    user_id = current_user.get("sub")
+    services = await deploy_service.list_deployed_services(user_id)
+    return [DeployedServiceInfo(**s) for s in services]
+
+
+@router.post("/{task_id}/stop")
+async def stop_serving(
+    task_id: str,
+    current_user: dict = Depends(get_current_user),
+):
+    """停止已部署的在线服务。"""
+    user_id = current_user.get("sub")
+    result = await deploy_service.stop_serving(task_id, user_id)
+    if "error" in result:
+        raise HTTPException(status_code=400, detail=result["error"])
+    return result
+
+
 @router.get("/{deploy_id}/status", response_model=DeployResponse)
 async def get_deployment_status(deploy_id: str):
     """获取导出/部署任务状态。"""
     result = await deploy_service.get_deploy_status(deploy_id)
     return DeployResponse(**result)
+
+
+# ---------------------------------------------------------------------------
+#  OpenAI 兼容代理端点(API Key 认证)
+# ---------------------------------------------------------------------------
+
+@proxy_router.post("/proxy/{task_id}/v1/chat/completions")
+async def proxy_chat_completions(task_id: str, request: Request):
+    """OpenAI 兼容的聊天补全代理。"""
+    await _validate_proxy_auth(task_id, request)
+
+    try:
+        body = await request.json()
+    except Exception:
+        raise HTTPException(status_code=400, detail="Invalid JSON")
+
+    messages = body.get("messages", [])
+    if not messages:
+        raise HTTPException(status_code=400, detail="messages is required")
+
+    worker_req = {
+        "messages": messages,
+        "max_tokens": body.get("max_tokens", 512),
+        "temperature": body.get("temperature", 0.7),
+        "top_p": body.get("top_p", 0.9),
+        "do_sample": body.get("temperature", 0.7) > 0,
+        "repetition_penalty": body.get("repetition_penalty", 1.0),
+    }
+
+    worker_resp = await deploy_service.proxy_to_worker(task_id, worker_req)
+
+    if "error" in worker_resp:
+        return JSONResponse(
+            status_code=502,
+            content={"error": {"message": worker_resp["error"], "type": "upstream_error"}},
+        )
+
+    model = body.get("model", "local-model")
+    return {
+        "id": f"chatcmpl-{uuid.uuid4().hex[:12]}",
+        "object": "chat.completion",
+        "created": int(time.time()),
+        "model": model,
+        "choices": [{
+            "index": 0,
+            "message": {
+                "role": "assistant",
+                "content": worker_resp.get("generated_text", ""),
+            },
+            "finish_reason": "stop",
+        }],
+        "usage": {
+            "prompt_tokens": worker_resp.get("prompt_tokens", 0),
+            "completion_tokens": worker_resp.get("completion_tokens", 0),
+            "total_tokens": worker_resp.get("total_tokens", 0),
+        },
+    }
+
+
+@proxy_router.post("/proxy/{task_id}/v1/completions")
+async def proxy_completions(task_id: str, request: Request):
+    """OpenAI 兼容的文本补全代理。"""
+    await _validate_proxy_auth(task_id, request)
+
+    try:
+        body = await request.json()
+    except Exception:
+        raise HTTPException(status_code=400, detail="Invalid JSON")
+
+    prompt = body.get("prompt", "")
+    if not prompt:
+        raise HTTPException(status_code=400, detail="prompt is required")
+
+    worker_req = {
+        "prompt": prompt,
+        "max_tokens": body.get("max_tokens", 512),
+        "temperature": body.get("temperature", 0.7),
+        "top_p": body.get("top_p", 0.9),
+        "do_sample": body.get("temperature", 0.7) > 0,
+        "repetition_penalty": body.get("repetition_penalty", 1.0),
+    }
+
+    worker_resp = await deploy_service.proxy_to_worker(task_id, worker_req)
+
+    if "error" in worker_resp:
+        return JSONResponse(
+            status_code=502,
+            content={"error": {"message": worker_resp["error"], "type": "upstream_error"}},
+        )
+
+    model = body.get("model", "local-model")
+    return {
+        "id": f"cmpl-{uuid.uuid4().hex[:12]}",
+        "object": "text_completion",
+        "created": int(time.time()),
+        "model": model,
+        "choices": [{
+            "index": 0,
+            "text": worker_resp.get("generated_text", ""),
+            "finish_reason": "stop",
+        }],
+        "usage": {
+            "prompt_tokens": worker_resp.get("prompt_tokens", 0),
+            "completion_tokens": worker_resp.get("completion_tokens", 0),
+            "total_tokens": worker_resp.get("total_tokens", 0),
+        },
+    }
+
+
+@proxy_router.get("/proxy/{task_id}/v1/models")
+async def proxy_models(task_id: str, request: Request):
+    """返回模型列表(代理)。"""
+    await _validate_proxy_auth(task_id, request)
+    result = await deploy_service.get_deploy_status(task_id)
+    model_name = f"finetuned-{result.get('job_id', '')[:8]}" if result.get("job_id") else "local-model"
+    return {
+        "object": "list",
+        "data": [{
+            "id": model_name,
+            "object": "model",
+            "created": int(time.time()),
+            "owned_by": "local",
+        }],
+    }
+
+
+@proxy_router.get("/proxy/{task_id}/health")
+async def proxy_health(task_id: str, request: Request):
+    """健康检查。"""
+    await _validate_proxy_auth(task_id, request)
+    result = await deploy_service.get_deploy_status(task_id)
+    if result.get("status") != "running":
+        return {"status": "error", "message": f"服务状态: {result.get('status', 'unknown')}"}
+    return {"status": "ok", "task_id": task_id}

+ 39 - 0
backend/app/core/db.py

@@ -42,6 +42,28 @@ async def init_db():
     """创建所有表(首次启动时调用)。"""
     async with _get_engine().begin() as conn:
         await conn.run_sync(Base.metadata.create_all)
+    # 自动迁移:为已有表补齐新字段
+    await _migrate_tables()
+
+
+async def _migrate_tables():
+    """补齐表新增字段(兼容已有数据库)。"""
+    from sqlalchemy import text
+    from sqlalchemy.exc import ProgrammingError, OperationalError
+    alter_stmts = [
+        # deploy_tasks 补齐字段
+        "ALTER TABLE deploy_tasks ADD COLUMN deploy_mode VARCHAR(16) DEFAULT 'export'",
+        "ALTER TABLE deploy_tasks ADD COLUMN endpoint_url VARCHAR(256)",
+        "ALTER TABLE deploy_tasks ADD COLUMN port INTEGER",
+        "ALTER TABLE deploy_tasks ADD COLUMN pid VARCHAR(32)",
+        "ALTER TABLE deploy_tasks ADD COLUMN user_id VARCHAR(36)",
+    ]
+    async with _get_engine().begin() as conn:
+        for stmt in alter_stmts:
+            try:
+                await conn.execute(text(stmt))
+            except (ProgrammingError, OperationalError):
+                pass
 
 
 class TrainingJobModel(Base):
@@ -134,8 +156,13 @@ class DeployTaskModel(Base):
 
     id = Column(String(36), primary_key=True)
     job_id = Column(String(36), nullable=False)
+    user_id = Column(String(36), nullable=True)               # 部署任务所属用户
     status = Column(String(32), default="pending")
+    deploy_mode = Column(String(16), default="export")  # export | serve
     output_path = Column(String(512), nullable=True)
+    endpoint_url = Column(String(256), nullable=True)  # serve 模式下的 base_url
+    port = Column(Integer, nullable=True)               # serve 模式分配的端口
+    pid = Column(String(32), nullable=True)             # serve 模式远程进程 PID
     error = Column(Text, nullable=True)
     progress = Column(Float, default=0.0)
     finished_at = Column(DateTime, nullable=True)
@@ -199,6 +226,18 @@ class RefreshTokenModel(Base):
     created_at = Column(DateTime, default=datetime.utcnow)
 
 
+class ApiKeyModel(Base):
+    __tablename__ = "api_keys"
+
+    id = Column(String(36), primary_key=True, default=lambda: str(uuid.uuid4()))
+    user_id = Column(String(36), nullable=False, index=True)
+    key = Column(String(128), unique=True, nullable=False, index=True)  # sk-xxx
+    name = Column(String(128), nullable=False, default="default")
+    status = Column(String(16), default="active")  # active | revoked
+    last_used_at = Column(DateTime, nullable=True)
+    created_at = Column(DateTime, default=datetime.utcnow)
+
+
 async def get_db() -> AsyncSession:
     async with async_session() as session:
         yield session

+ 244 - 0
backend/app/core/deploy_server_template.py

@@ -0,0 +1,244 @@
+"""OpenAI 兼容的模型推理服务器。
+
+使用方法:
+    python server.py --port 8000 --host 0.0.0.0
+
+API 端点:
+    POST /v1/chat/completions   - OpenAI 兼容的聊天补全接口
+    POST /v1/completions        - 文本补全接口
+    GET  /v1/models             - 模型列表
+    GET  /health                - 健康检查
+
+调用示例:
+    curl http://localhost:8000/v1/chat/completions \
+        -H "Content-Type: application/json" \
+        -d '{
+            "model": "local-model",
+            "messages": [{"role": "user", "content": "你好"}],
+            "max_tokens": 512,
+            "temperature": 0.7
+        }'
+"""
+import argparse
+import json
+import time
+import uuid
+from pathlib import Path
+
+import torch
+from fastapi import FastAPI
+from fastapi.middleware.cors import CORSMiddleware
+from pydantic import BaseModel
+from transformers import AutoModelForCausalLM, AutoTokenizer
+
+app = FastAPI(title="Model Serving API", version="1.0.0")
+app.add_middleware(
+    CORSMiddleware,
+    allow_origins=["*"],
+    allow_methods=["*"],
+    allow_headers=["*"],
+)
+
+model = None
+tokenizer = None
+model_name = "local-model"
+
+
+# --- Request / Response schemas ---
+
+class Message(BaseModel):
+    role: str
+    content: str
+
+
+class ChatRequest(BaseModel):
+    model: str = "local-model"
+    messages: list[Message]
+    max_tokens: int = 512
+    temperature: float = 0.7
+    top_p: float = 0.9
+    stream: bool = False
+
+
+class CompletionRequest(BaseModel):
+    model: str = "local-model"
+    prompt: str
+    max_tokens: int = 512
+    temperature: float = 0.7
+    top_p: float = 0.9
+    stream: bool = False
+
+
+class ChoiceMessage(BaseModel):
+    role: str = "assistant"
+    content: str
+
+
+class Choice(BaseModel):
+    index: int = 0
+    message: ChoiceMessage
+    finish_reason: str = "stop"
+
+
+class Usage(BaseModel):
+    prompt_tokens: int
+    completion_tokens: int
+    total_tokens: int
+
+
+class ChatResponse(BaseModel):
+    id: str
+    object: str = "chat.completion"
+    created: int
+    model: str
+    choices: list[Choice]
+    usage: Usage
+
+
+class CompletionChoice(BaseModel):
+    index: int = 0
+    text: str
+    finish_reason: str = "stop"
+
+
+class CompletionResponse(BaseModel):
+    id: str
+    object: str = "text_completion"
+    created: int
+    model: str
+    choices: list[CompletionChoice]
+    usage: Usage
+
+
+class ModelInfo(BaseModel):
+    id: str
+    object: str = "model"
+    created: int = 0
+    owned_by: str = "local"
+
+
+class ModelList(BaseModel):
+    object: str = "list"
+    data: list[ModelInfo]
+
+
+# --- Endpoints ---
+
+@app.post("/v1/chat/completions", response_model=ChatResponse)
+async def chat_completions(req: ChatRequest):
+    prompt = _build_prompt(req.messages)
+    inputs = tokenizer(prompt, return_tensors="pt").to(model.device)
+    prompt_tokens = inputs["input_ids"].shape[1]
+
+    with torch.no_grad():
+        outputs = model.generate(
+            **inputs,
+            max_new_tokens=req.max_tokens,
+            temperature=max(req.temperature, 0.01),
+            top_p=req.top_p,
+            do_sample=req.temperature > 0,
+            pad_token_id=tokenizer.eos_token_id,
+        )
+
+    generated = tokenizer.decode(outputs[0][prompt_tokens:], skip_special_tokens=True)
+    completion_tokens = outputs.shape[1] - prompt_tokens
+
+    return ChatResponse(
+        id=f"chatcmpl-{uuid.uuid4().hex[:12]}",
+        created=int(time.time()),
+        model=req.model,
+        choices=[Choice(message=ChoiceMessage(content=generated))],
+        usage=Usage(
+            prompt_tokens=prompt_tokens,
+            completion_tokens=completion_tokens,
+            total_tokens=prompt_tokens + completion_tokens,
+        ),
+    )
+
+
+@app.post("/v1/completions", response_model=CompletionResponse)
+async def completions(req: CompletionRequest):
+    inputs = tokenizer(req.prompt, return_tensors="pt").to(model.device)
+    prompt_tokens = inputs["input_ids"].shape[1]
+
+    with torch.no_grad():
+        outputs = model.generate(
+            **inputs,
+            max_new_tokens=req.max_tokens,
+            temperature=max(req.temperature, 0.01),
+            top_p=req.top_p,
+            do_sample=req.temperature > 0,
+            pad_token_id=tokenizer.eos_token_id,
+        )
+
+    generated = tokenizer.decode(outputs[0][prompt_tokens:], skip_special_tokens=True)
+    completion_tokens = outputs.shape[1] - prompt_tokens
+
+    return CompletionResponse(
+        id=f"cmpl-{uuid.uuid4().hex[:12]}",
+        created=int(time.time()),
+        model=req.model,
+        choices=[CompletionChoice(text=generated)],
+        usage=Usage(
+            prompt_tokens=prompt_tokens,
+            completion_tokens=completion_tokens,
+            total_tokens=prompt_tokens + completion_tokens,
+        ),
+    )
+
+
+@app.get("/v1/models", response_model=ModelList)
+async def list_models():
+    return ModelList(data=[ModelInfo(id=model_name)])
+
+
+@app.get("/health")
+async def health():
+    return {"status": "ok", "model": model_name}
+
+
+def _build_prompt(messages: list[Message]) -> str:
+    """将 OpenAI 消息格式转为模型输入文本。"""
+    parts = []
+    for msg in messages:
+        if msg.role == "system":
+            parts.append(f"<|system|>\n{msg.content}")
+        elif msg.role == "user":
+            parts.append(f"<|user|>\n{msg.content}")
+        elif msg.role == "assistant":
+            parts.append(f"<|assistant|>\n{msg.content}")
+    parts.append("<|assistant|>\n")
+    return "\n".join(parts)
+
+
+if __name__ == "__main__":
+    parser = argparse.ArgumentParser(description="Model Serving API Server")
+    parser.add_argument("--model-path", type=str, default="./model", help="模型目录路径")
+    parser.add_argument("--host", type=str, default="0.0.0.0", help="监听地址")
+    parser.add_argument("--port", type=int, default=8000, help="监听端口")
+    parser.add_argument("--device", type=str, default="auto", help="设备 (auto/cuda/cpu)")
+    args = parser.parse_args()
+
+    model_path = Path(args.model_path)
+    model_name = model_path.name
+    print(f"Loading model from: {model_path}")
+
+    if args.device == "auto":
+        device_map = {"": 0} if torch.cuda.is_available() else "auto"
+    elif args.device == "cuda":
+        device_map = {"": 0}
+    else:
+        device_map = "cpu"
+
+    tokenizer = AutoTokenizer.from_pretrained(str(model_path), trust_remote_code=True)
+    if tokenizer.pad_token is None:
+        tokenizer.pad_token = tokenizer.eos_token
+
+    model = AutoModelForCausalLM.from_pretrained(
+        str(model_path), torch_dtype=torch.float16, device_map=device_map,
+    )
+    model.eval()
+    print(f"Model loaded. Starting server on {args.host}:{args.port}")
+
+    import uvicorn
+    uvicorn.run(app, host=args.host, port=args.port)

+ 207 - 0
backend/app/core/inference_worker.py

@@ -0,0 +1,207 @@
+"""轻量推理 worker —— 在算力节点(253)上运行。
+
+只依赖 Python 标准库 + torch + transformers(不需要 fastapi/uvicorn)。
+通过 TCP 接收 JSON 请求,返回 JSON 响应。
+
+协议:4 字节大端长度前缀 + JSON body
+
+启动:
+    python inference_worker.py --model-path /path/to/merged/model --port 8100
+
+请求格式:
+    {
+        "prompt": "<|user|>\\n你好\\n<|assistant|>\\n",
+        "max_new_tokens": 512,
+        "temperature": 0.7,
+        "top_p": 0.9,
+        "do_sample": true,
+        "repetition_penalty": 1.0
+    }
+
+响应格式:
+    {
+        "generated_text": "你好!有什么可以帮你的吗?",
+        "prompt_tokens": 12,
+        "completion_tokens": 15,
+        "total_tokens": 27
+    }
+"""
+import argparse
+import json
+import socket
+import struct
+import threading
+import sys
+
+
+def _build_prompt_from_messages(messages: list[dict]) -> str:
+    """将 OpenAI 消息格式转为模型输入文本。"""
+    parts = []
+    for msg in messages:
+        role = msg.get("role", "")
+        content = msg.get("content", "")
+        if role == "system":
+            parts.append(f"<|system|>\n{content}")
+        elif role == "user":
+            parts.append(f"<|user|>\n{content}")
+        elif role == "assistant":
+            parts.append(f"<|assistant|>\n{content}")
+    parts.append("<|assistant|>\n")
+    return "\n".join(parts)
+
+
+class InferenceWorker:
+    def __init__(self, model_path: str):
+        import torch
+        from transformers import AutoModelForCausalLM, AutoTokenizer
+
+        print(f"[worker] Loading tokenizer from: {model_path}", flush=True)
+        self.tokenizer = AutoTokenizer.from_pretrained(model_path, trust_remote_code=True)
+        if self.tokenizer.pad_token is None:
+            self.tokenizer.pad_token = self.tokenizer.eos_token
+
+        print(f"[worker] Loading model from: {model_path}", flush=True)
+        device_map = {"": 0} if torch.cuda.is_available() else "auto"
+        self.model = AutoModelForCausalLM.from_pretrained(
+            model_path, torch_dtype=torch.float16, device_map=device_map,
+        )
+        self.model.eval()
+        self.torch = torch
+        print("[worker] Model loaded successfully.", flush=True)
+
+    def generate(self, request: dict) -> dict:
+        """处理一次推理请求。"""
+        # 支持两种输入:messages(OpenAI 格式)或 prompt(原始文本)
+        messages = request.get("messages")
+        if messages:
+            prompt = _build_prompt_from_messages(messages)
+        else:
+            prompt = request.get("prompt", "")
+
+        max_new_tokens = request.get("max_tokens", request.get("max_new_tokens", 512))
+        temperature = max(request.get("temperature", 0.7), 0.01)
+        top_p = request.get("top_p", 0.9)
+        do_sample = request.get("do_sample", temperature > 0)
+        repetition_penalty = request.get("repetition_penalty", 1.0)
+
+        inputs = self.tokenizer(prompt, return_tensors="pt").to(self.model.device)
+        prompt_tokens = inputs["input_ids"].shape[1]
+
+        with self.torch.no_grad():
+            outputs = self.model.generate(
+                **inputs,
+                max_new_tokens=max_new_tokens,
+                temperature=temperature,
+                top_p=top_p,
+                do_sample=do_sample,
+                repetition_penalty=repetition_penalty,
+                pad_token_id=self.tokenizer.eos_token_id,
+            )
+
+        generated = self.tokenizer.decode(
+            outputs[0][prompt_tokens:], skip_special_tokens=True
+        )
+        completion_tokens = outputs.shape[1] - prompt_tokens
+
+        return {
+            "generated_text": generated,
+            "prompt_tokens": int(prompt_tokens),
+            "completion_tokens": int(completion_tokens),
+            "total_tokens": int(prompt_tokens + completion_tokens),
+        }
+
+
+def _recv_exact(sock: socket.socket, n: int) -> bytes:
+    """确保接收恰好 n 字节。"""
+    buf = bytearray()
+    while len(buf) < n:
+        chunk = sock.recv(n - len(buf))
+        if not chunk:
+            raise ConnectionError("Connection closed while reading")
+        buf.extend(chunk)
+    return bytes(buf)
+
+
+def handle_client(worker: InferenceWorker, conn: socket.socket, addr):
+    """处理单个 TCP 客户端连接。"""
+    try:
+        # 读取 4 字节长度前缀
+        len_data = _recv_exact(conn, 4)
+        length = struct.unpack(">I", len_data)[0]
+
+        # 读取 JSON body
+        body_data = _recv_exact(conn, length)
+        request = json.loads(body_data.decode("utf-8"))
+
+        print(f"[worker] Request from {addr}: {list(request.keys())}", flush=True)
+
+        # 执行推理
+        response = worker.generate(request)
+        print(
+            f"[worker] Response: {response['completion_tokens']} tokens generated",
+            flush=True,
+        )
+
+        # 发送响应
+        resp_bytes = json.dumps(response, ensure_ascii=False).encode("utf-8")
+        conn.sendall(struct.pack(">I", len(resp_bytes)))
+        conn.sendall(resp_bytes)
+
+    except Exception as e:
+        print(f"[worker] Error handling {addr}: {e}", flush=True)
+        try:
+            error_resp = json.dumps({"error": str(e)}).encode("utf-8")
+            conn.sendall(struct.pack(">I", len(error_resp)))
+            conn.sendall(error_resp)
+        except Exception:
+            pass
+    finally:
+        conn.close()
+
+
+def main():
+    parser = argparse.ArgumentParser(description="Lightweight Inference Worker")
+    parser.add_argument("--model-path", type=str, required=True, help="模型目录路径")
+    parser.add_argument("--port", type=int, required=True, help="监听端口")
+    parser.add_argument("--host", type=str, default="0.0.0.0", help="监听地址")
+    args = parser.parse_args()
+
+    print(f"[worker] Initializing...", flush=True)
+    worker = InferenceWorker(args.model_path)
+
+    server = socket.socket(socket.AF_INET, socket.SOCK_STREAM)
+    server.setsockopt(socket.SOL_SOCKET, socket.SO_REUSEADDR, 1)
+    server.bind((args.host, args.port))
+    server.listen(2)
+    print(
+        f"[worker] Listening on {args.host}:{args.port} (TCP, length-prefixed JSON)",
+        flush=True,
+    )
+
+    # 通知启动脚本:服务已就绪
+    print("[worker] READY", flush=True)
+
+    def accept_loop():
+        while True:
+            try:
+                conn, addr = server.accept()
+                t = threading.Thread(target=handle_client, args=(worker, conn, addr))
+                t.daemon = True
+                t.start()
+            except OSError:
+                break  # server closed
+            except Exception as e:
+                print(f"[worker] Accept error: {e}", flush=True)
+
+    accept_thread = threading.Thread(target=accept_loop, daemon=True)
+    accept_thread.start()
+
+    try:
+        accept_thread.join()
+    except KeyboardInterrupt:
+        print("[worker] Shutting down...", flush=True)
+        server.close()
+
+
+if __name__ == "__main__":
+    main()

+ 25 - 0
backend/app/schemas/deployment.py

@@ -7,9 +7,34 @@ class DeployConfig(BaseModel):
     export_format: str = "safetensors"  # safetensors | gguf
 
 
+class DeployServeConfig(BaseModel):
+    """部署为在线服务的配置。"""
+    job_id: str
+    merge_with_base: bool = True  # 部署为服务时默认合并基础模型
+    port: int | None = None       # 可选指定端口,为空则自动分配
+    host: str = "0.0.0.0"         # 监听地址
+
+
 class DeployResponse(BaseModel):
+    task_id: str | None = None  # 部署任务 ID(用于轮询状态)
     job_id: str
     status: str
+    deploy_mode: str = "export"   # export | serve
     progress: float = 0.0
     output_path: str | None = None
+    endpoint_url: str | None = None  # serve 模式下的 base_url
+    port: int | None = None
+    error: str | None = None
+
+
+class DeployedServiceInfo(BaseModel):
+    """已部署的在线服务信息。"""
+    task_id: str
+    job_id: str
+    status: str                   # running | stopped | error
+    endpoint_url: str | None = None
+    base_url: str | None = None   # 同 endpoint_url,方便 OpenAI SDK 使用
+    port: int | None = None
+    output_path: str | None = None
+    created_at: str | None = None
     error: str | None = None

+ 2 - 2
backend/app/schemas/sample_center.py

@@ -7,7 +7,7 @@ class KnowledgeBaseItem(BaseModel):
     parent_table: str
     child_table: str
     document_count: int
-    status: int
+    status: str
     created_at: str
     created_by: str
     metadata_schema: list[dict] = []
@@ -27,7 +27,7 @@ class KnowledgeBaseDetailResponse(BaseModel):
     parent_table: str
     child_table: str
     document_count: int
-    status: int
+    status: str
     created_at: str
     updated_at: str = ""
     created_by: str

+ 122 - 0
backend/app/services/api_key_service.py

@@ -0,0 +1,122 @@
+"""API Key 管理服务。
+
+功能:
+  - 创建 API Key(格式 sk-xxxxx)
+  - 列出用户的 API Key
+  - 吊销 API Key
+  - 验证 API Key
+"""
+import secrets
+import uuid
+from datetime import datetime
+from typing import Any
+
+from sqlalchemy import select, update
+
+from app.core.db import ApiKeyModel, DeployTaskModel, async_session
+from app.core.logging import logger
+
+
+def _generate_api_key() -> str:
+    """生成 sk- 前缀的 API Key,类似 OpenAI 格式。"""
+    return f"sk-{secrets.token_urlsafe(32)}"
+
+
+async def create_api_key(user_id: str, name: str = "default") -> dict[str, Any]:
+    """创建新的 API Key,返回完整 key(仅展示一次)。"""
+    key = _generate_api_key()
+    api_key = ApiKeyModel(
+        id=str(uuid.uuid4()),
+        user_id=user_id,
+        key=key,
+        name=name,
+        status="active",
+    )
+    async with async_session() as session:
+        session.add(api_key)
+        await session.commit()
+
+    logger.info(f"API key created: user={user_id} name={name}")
+    return {
+        "id": api_key.id,
+        "key": key,  # 完整 key,只在创建时返回一次
+        "name": api_key.name,
+        "created_at": api_key.created_at.isoformat() if api_key.created_at else None,
+    }
+
+
+async def list_api_keys(user_id: str) -> list[dict[str, Any]]:
+    """列出用户的所有 API Key(隐藏 key 中间部分)。"""
+    async with async_session() as session:
+        result = await session.execute(
+            select(ApiKeyModel)
+            .where(ApiKeyModel.user_id == user_id)
+            .order_by(ApiKeyModel.created_at.desc())
+        )
+        keys = result.scalars().all()
+
+    items = []
+    for k in keys:
+        # 隐藏中间部分:显示前 7 位 + **** + 后 4 位
+        masked = k.key[:7] + "****" + k.key[-4:] if len(k.key) > 11 else "****"
+        items.append({
+            "id": k.id,
+            "key": masked,
+            "name": k.name,
+            "status": k.status,
+            "last_used_at": k.last_used_at.isoformat() if k.last_used_at else None,
+            "created_at": k.created_at.isoformat() if k.created_at else None,
+        })
+    return items
+
+
+async def revoke_api_key(key_id: str, user_id: str) -> dict[str, Any]:
+    """吊销指定 API Key。"""
+    async with async_session() as session:
+        result = await session.execute(
+            select(ApiKeyModel).where(
+                ApiKeyModel.id == key_id,
+                ApiKeyModel.user_id == user_id,
+            )
+        )
+        record = result.scalar_one_or_none()
+        if not record:
+            return {"error": "API Key 不存在"}
+
+        record.status = "revoked"
+        await session.commit()
+
+    logger.info(f"API key revoked: id={key_id} user={user_id}")
+    return {"id": key_id, "status": "revoked"}
+
+
+async def validate_api_key(key: str) -> dict[str, Any] | None:
+    """验证 API Key,返回 {user_id, key_id} 或 None。"""
+    async with async_session() as session:
+        result = await session.execute(
+            select(ApiKeyModel).where(
+                ApiKeyModel.key == key,
+                ApiKeyModel.status == "active",
+            )
+        )
+        record = result.scalar_one_or_none()
+        if not record:
+            return None
+
+        # 更新最后使用时间
+        record.last_used_at = datetime.utcnow()
+        await session.commit()
+
+        return {"user_id": record.user_id, "key_id": record.id}
+
+
+async def check_deploy_ownership(task_id: str, user_id: str) -> bool:
+    """检查用户是否拥有指定的部署任务。"""
+    async with async_session() as session:
+        result = await session.execute(
+            select(DeployTaskModel).where(
+                DeployTaskModel.id == task_id,
+                DeployTaskModel.user_id == user_id,
+            )
+        )
+        return result.scalar_one_or_none() is not None

+ 470 - 84
backend/app/services/deploy_service.py

@@ -1,4 +1,13 @@
+"""部署服务 —— 导出模型 / 部署为在线推理服务。
+
+架构:
+  - 253 算力节点运行轻量 inference_worker.py(纯 stdlib + torch/transformers,不需要 fastapi/uvicorn)
+  - 151 主节点对外提供 OpenAI 兼容代理 API,通过 TCP 转发请求到 253
+"""
+import asyncio
 import json
+import socket
+import struct
 import uuid
 from datetime import datetime, timezone
 from pathlib import Path
@@ -13,88 +22,400 @@ from sqlalchemy import select
 
 settings = get_settings()
 
+# 253 上 worker 的 TCP 端口范围
+_SERVE_PORT_MIN = 8100
+_SERVE_PORT_MAX = 8199
+
+
+# ---------------------------------------------------------------------------
+#  TCP 代理:151 → 253 inference_worker
+# ---------------------------------------------------------------------------
+
+async def proxy_to_worker(task_id: str, request: dict) -> dict:
+    """通过 TCP 把推理请求转发到 253 的 inference_worker,返回响应。
+
+    协议:4 字节大端长度前缀 + JSON body
+    """
+    # 查 DB 获取 worker 监听的端口
+    async with async_session() as session:
+        result = await session.execute(
+            select(DeployTaskModel).where(DeployTaskModel.id == task_id)
+        )
+        record = result.scalar_one_or_none()
+        if not record:
+            return {"error": "部署任务不存在"}
+        if record.status != "running":
+            return {"error": f"服务未运行(当前状态: {record.status})"}
+        port = record.port
+
+    if not port:
+        return {"error": "未找到 worker 端口"}
+
+    # 通过 asyncio 在线程池中执行同步 TCP 操作
+    return await asyncio.to_thread(_tcp_request, settings.compute_node_host, port, request)
 
-async def export_adapter(job_id: str, config: dict[str, Any]) -> dict[str, Any]:
+
+def _tcp_request(host: str, port: int, request: dict) -> dict:
+    """同步 TCP 请求:连接到 worker,发送请求,接收响应。"""
+    sock = socket.socket(socket.AF_INET, socket.SOCK_STREAM)
+    sock.settimeout(120)  # 推理可能耗时较长
+    try:
+        sock.connect((host, port))
+
+        # 发送:4 字节长度 + JSON
+        body = json.dumps(request, ensure_ascii=False).encode("utf-8")
+        sock.sendall(struct.pack(">I", len(body)))
+        sock.sendall(body)
+
+        # 接收:4 字节长度 + JSON
+        len_data = _recv_exact(sock, 4)
+        resp_len = struct.unpack(">I", len_data)[0]
+        resp_data = _recv_exact(sock, resp_len)
+        return json.loads(resp_data.decode("utf-8"))
+    except socket.timeout:
+        return {"error": "推理超时(120s)"}
+    except ConnectionRefusedError:
+        return {"error": f"无法连接到推理 worker({host}:{port}),服务可能已停止"}
+    except Exception as e:
+        return {"error": f"代理请求失败: {e}"}
+    finally:
+        sock.close()
+
+
+def _recv_exact(sock: socket.socket, n: int) -> bytes:
+    """确保接收恰好 n 字节。"""
+    buf = bytearray()
+    while len(buf) < n:
+        chunk = sock.recv(n - len(buf))
+        if not chunk:
+            raise ConnectionError("Connection closed while reading")
+        buf.extend(chunk)
+    return bytes(buf)
+
+
+# ---------------------------------------------------------------------------
+#  导出 Adapter(导出文件模式)
+# ---------------------------------------------------------------------------
+
+async def export_adapter(job_id: str, config: dict[str, Any], user_id: str = "") -> dict[str, Any]:
     """启动导出后台任务,立即返回 task_id。"""
     task_id = str(uuid.uuid4())
     merge_with_base = config.get("merge_with_base", False)
     export_format = config.get("export_format", "safetensors")
 
-    # 写 DB
     task = DeployTaskModel(
         id=task_id,
         job_id=job_id,
+        user_id=user_id or None,
         status="pending",
+        deploy_mode="export",
     )
     async with async_session() as session:
         session.add(task)
         await session.commit()
 
-    # 注册并启动
     background_task_manager.register_task(task_id, "deployment", {"job_id": job_id})
     await background_task_manager.run(
         task_id, "deployment", _execute_export(task_id, job_id, merge_with_base, export_format)
     )
 
     logger.info(f"Deploy task started: job={job_id} (task_id={task_id})")
-    return {"job_id": job_id, "status": "pending"}
+    return {"task_id": task_id, "job_id": job_id, "status": "pending", "deploy_mode": "export"}
 
 
 async def _execute_export(task_id: str, job_id: str, merge_with_base: bool, export_format: str) -> dict:
     """后台执行导出。"""
     try:
-        # 远程模式:通过 SSH 在算力节点执行
         if settings.use_remote_compute:
             result = await _run_remote_export(task_id, job_id, merge_with_base, export_format)
-            return result
+        else:
+            result = await _run_local_export(task_id, job_id, merge_with_base)
 
-        # 本地模式
-        adapter_path = settings.adapters_dir / job_id
-        if not adapter_path.exists():
-            raise ValueError("Adapter not found")
+        output_path = result.get("output_path")
 
-        import torch
-        from transformers import AutoModelForCausalLM, AutoTokenizer
+        # 把 inference_worker.py 和启动脚本复制到输出目录
+        if output_path and settings.use_remote_compute:
+            _copy_worker_template_remote(output_path)
+
+        await _update_deploy_status(task_id, "completed", output_path=output_path)
+        return {"output_path": output_path}
+
+    except Exception as e:
+        logger.error(f"Export failed for job {job_id}: {e}")
+        await _update_deploy_status(task_id, "failed", error=str(e))
+        return {"error": str(e)}
+
+
+# ---------------------------------------------------------------------------
+#  部署为在线服务(serve 模式)
+# ---------------------------------------------------------------------------
+
+async def start_serving(job_id: str, config: dict[str, Any], user_id: str = "") -> dict[str, Any]:
+    """部署为在线推理服务,151 代理对外,253 worker 做推理。"""
+    task_id = str(uuid.uuid4())
+    merge_with_base = config.get("merge_with_base", True)
+    port = config.get("port")
+
+    if not port:
+        port = await _allocate_port()
 
-        output_path = settings.adapters_dir / f"{job_id}_merged"
-
-        if merge_with_base:
-            base_model_id = _get_base_model_id_local(job_id)
-            if base_model_id:
-                base_model = AutoModelForCausalLM.from_pretrained(
-                    base_model_id, torch_dtype=torch.float16, device_map="auto"
-                )
-            else:
-                from peft import PeftModel
-                merged = PeftModel.from_pretrained(
-                    AutoModelForCausalLM.from_pretrained(
-                        str(adapter_path), torch_dtype=torch.float16
-                    ),
-                    str(adapter_path),
-                )
-                merged = merged.merge_and_unload()
-                merged.save_pretrained(output_path)
-                tokenizer = AutoTokenizer.from_pretrained(adapter_path)
-                tokenizer.save_pretrained(output_path)
+    task = DeployTaskModel(
+        id=task_id,
+        job_id=job_id,
+        user_id=user_id or None,
+        status="pending",
+        deploy_mode="serve",
+        port=port,
+    )
+    async with async_session() as session:
+        session.add(task)
+        await session.commit()
+
+    background_task_manager.register_task(task_id, "deployment", {"job_id": job_id, "mode": "serve"})
+    await background_task_manager.run(
+        task_id, "deployment", _execute_serve(task_id, job_id, merge_with_base, port)
+    )
+
+    logger.info(f"Serve task started: job={job_id} port={port} (task_id={task_id})")
+    return {"task_id": task_id, "job_id": job_id, "status": "pending", "deploy_mode": "serve", "port": port}
+
+
+async def _execute_serve(task_id: str, job_id: str, merge_with_base: bool, port: int) -> dict:
+    """后台执行:导出模型 → 复制 worker → 启动 TCP 推理 worker。"""
+    try:
+        # 第一步:导出(合并 adapter)
+        if settings.use_remote_compute:
+            export_result = await _run_remote_export(task_id, job_id, merge_with_base, "safetensors")
+            output_path = export_result.get("output_path")
         else:
-            import shutil
-            shutil.copytree(adapter_path, output_path)
+            export_result = await _run_local_export(task_id, job_id, merge_with_base)
+            output_path = export_result.get("output_path")
 
-        if export_format == "gguf":
-            gguf_path = output_path.with_suffix(".gguf")
-            _export_to_gguf_local(output_path, gguf_path)
+        if not output_path:
+            raise RuntimeError("导出失败,无法获取输出路径")
 
-        await _update_deploy_status(task_id, "completed", output_path=str(output_path))
-        return {"output_path": str(output_path)}
+        # 第二步:启动推理 worker
+        if settings.use_remote_compute:
+            pid = await _launch_remote_worker(task_id, output_path, port)
+        else:
+            pid = await _launch_local_worker(task_id, output_path, port)
+
+        # endpoint_url 是 151 上的代理路径(相对路径,前端拼接 origin)
+        endpoint_url = f"/api/v1/deployment/proxy/{task_id}/v1"
+        await _update_deploy_status(
+            task_id, "running",
+            output_path=output_path,
+            endpoint_url=endpoint_url,
+            port=port,
+            pid=pid,
+        )
+        return {"endpoint_url": endpoint_url, "port": port, "pid": pid}
 
     except Exception as e:
-        logger.error(f"Export failed for job {job_id}: {e}")
+        logger.error(f"Serve failed for job {job_id}: {e}")
         await _update_deploy_status(task_id, "failed", error=str(e))
         return {"error": str(e)}
 
 
+async def _launch_remote_worker(task_id: str, model_path: str, port: int) -> str:
+    """在远程 253 容器里启动 inference_worker.py,返回进程 PID。
+
+    只依赖 torch + transformers(不需要 fastapi/uvicorn)。
+    """
+    # worker 脚本在容器内的路径
+    worker_template = f"{settings.compute_node_workdir}/app/core/inference_worker.py"
+
+    # 复制 worker 到模型目录
+    copy_cmd = (
+        f"docker exec {settings.compute_node_docker_container} "
+        f"bash -c 'cp {worker_template} {model_path}/inference_worker.py'"
+    )
+    code, _, stderr = ssh_exec(copy_cmd, timeout=30)
+    if code != 0:
+        raise RuntimeError(f"复制 inference_worker.py 失败: {stderr}")
+
+    # 在容器内后台启动 worker
+    launch_cmd = (
+        f"docker exec "
+        f"-e MACA_MPS_MODE=1 "
+        f"-e CUDA_VISIBLE_DEVICES=3 "
+        f"-w {model_path} "
+        f"{settings.compute_node_docker_container} "
+        f"bash -c '"
+        f"setsid {settings.compute_node_python} inference_worker.py "
+        f"--model-path {model_path} "
+        f"--port {port} "
+        f"</dev/null >/tmp/serve_{task_id}.log 2>&1 &"
+        f" disown; echo $!'"
+    )
+
+    code, stdout, stderr = ssh_exec(launch_cmd, timeout=30)
+    if code != 0:
+        raise RuntimeError(f"启动推理 worker 失败: {stderr}")
+
+    pid = stdout.strip()
+    logger.info(f"Remote worker launched: task={task_id} port={port} pid={pid}")
+
+    # 等待模型加载(可能需要较长时间),检查 READY 标记
+    import asyncio as _aio
+    for attempt in range(60):  # 最多等 5 分钟(60 * 5s)
+        await _aio.sleep(5)
+        check_cmd = (
+            f"docker exec {settings.compute_node_docker_container} "
+            f"bash -c 'grep -c READY /tmp/serve_{task_id}.log 2>/dev/null || echo 0'"
+        )
+        code, stdout, _ = ssh_exec(check_cmd, timeout=10)
+        if code == 0 and stdout.strip() != "0":
+            logger.info(f"Worker ready: task={task_id} (after ~{(attempt+1)*5}s)")
+            return pid
+
+        # 检查进程是否还在
+        from app.core.remote_executor import is_process_running
+        if not is_process_running(pid):
+            # 读取日志看什么错了
+            log_cmd = (
+                f"docker exec {settings.compute_node_docker_container} "
+                f"bash -c 'tail -20 /tmp/serve_{task_id}.log 2>/dev/null'"
+            )
+            _, log_stdout, _ = ssh_exec(log_cmd, timeout=10)
+            raise RuntimeError(f"Worker 进程已退出: {log_stdout}")
+
+    logger.warning(f"Worker not ready after 5min: task={task_id}, proceeding anyway")
+    return pid
+
+
+async def _launch_local_worker(task_id: str, model_path: str, port: int) -> str:
+    """在本地启动推理 worker(开发用)。"""
+    import subprocess
+    import shutil
+    import sys
+
+    worker_src = Path(__file__).resolve().parent.parent / "core" / "inference_worker.py"
+    shutil.copy(worker_src, Path(model_path) / "inference_worker.py")
+
+    proc = subprocess.Popen(
+        [sys.executable, "inference_worker.py", "--model-path", model_path, "--port", str(port)],
+        cwd=model_path,
+        stdout=subprocess.DEVNULL,
+        stderr=subprocess.DEVNULL,
+    )
+    return str(proc.pid)
+
+
+# ---------------------------------------------------------------------------
+#  停止服务 / 列表 / 状态
+# ---------------------------------------------------------------------------
+
+async def stop_serving(task_id: str, user_id: str = "") -> dict[str, Any]:
+    """停止已部署的在线服务。"""
+    async with async_session() as session:
+        result = await session.execute(select(DeployTaskModel).where(DeployTaskModel.id == task_id))
+        record = result.scalar_one_or_none()
+        if not record:
+            return {"error": "任务不存在"}
+        if record.deploy_mode != "serve":
+            return {"error": "该任务不是在线服务"}
+        if user_id and record.user_id and record.user_id != user_id:
+            return {"error": "无权操作此任务"}
+
+        pid = record.pid
+        if pid and settings.use_remote_compute:
+            # 杀掉远程 worker 进程及其子线程
+            kill_cmd = (
+                f"docker exec {settings.compute_node_docker_container} "
+                f"bash -c 'kill {pid} 2>/dev/null; pkill -P {pid} 2>/dev/null; true'"
+            )
+            code, _, _ = ssh_exec(kill_cmd, timeout=15)
+            logger.info(f"Stop serving: task={task_id} pid={pid} kill_code={code}")
+
+        record.status = "stopped"
+        record.pid = None
+        record.finished_at = datetime.utcnow()
+        await session.commit()
+
+    background_task_manager.update_task(task_id, status="stopped")
+    return {"task_id": task_id, "status": "stopped"}
+
+
+async def list_deployed_services(user_id: str = "") -> list[dict[str, Any]]:
+    """列出 serve 模式的部署任务(按用户过滤)。"""
+    async with async_session() as session:
+        query = select(DeployTaskModel).where(DeployTaskModel.deploy_mode == "serve")
+        if user_id:
+            query = query.where(DeployTaskModel.user_id == user_id)
+        query = query.order_by(DeployTaskModel.created_at.desc())
+        result = await session.execute(query)
+        records = result.scalars().all()
+
+    services = []
+    for r in records:
+        status = r.status
+        # 对 running 状态,检查远程进程是否还活着
+        if status == "running" and r.pid and settings.use_remote_compute:
+            from app.core.remote_executor import is_process_running
+            if not is_process_running(r.pid):
+                status = "stopped"
+                await _update_deploy_status(r.id, "stopped", error="进程已退出")
+
+        services.append({
+            "task_id": r.id,
+            "job_id": r.job_id,
+            "status": status,
+            "endpoint_url": r.endpoint_url,
+            "base_url": r.endpoint_url,
+            "port": r.port,
+            "output_path": r.output_path,
+            "created_at": r.created_at.isoformat() if r.created_at else None,
+            "error": r.error,
+        })
+    return services
+
+
+async def get_deploy_status(task_id: str) -> dict[str, Any]:
+    """获取部署任务状态。"""
+    async with async_session() as session:
+        result = await session.execute(select(DeployTaskModel).where(DeployTaskModel.id == task_id))
+        record = result.scalar_one_or_none()
+        if record:
+            return {
+                "task_id": record.id,
+                "job_id": record.job_id,
+                "status": record.status,
+                "deploy_mode": record.deploy_mode or "export",
+                "progress": record.progress,
+                "output_path": record.output_path,
+                "endpoint_url": record.endpoint_url,
+                "port": record.port,
+                "error": record.error,
+            }
+    return {"task_id": None, "job_id": "", "status": "not_found", "deploy_mode": "export",
+            "progress": 0.0, "output_path": None, "endpoint_url": None, "port": None, "error": None}
+
+
+# ---------------------------------------------------------------------------
+#  辅助函数
+# ---------------------------------------------------------------------------
+
+async def _allocate_port() -> int:
+    """从端口池里分配一个未使用的端口。"""
+    async with async_session() as session:
+        result = await session.execute(
+            select(DeployTaskModel.port).where(
+                DeployTaskModel.deploy_mode == "serve",
+                DeployTaskModel.status.in_(["pending", "running"]),
+                DeployTaskModel.port.isnot(None),
+            )
+        )
+        used = {row[0] for row in result.all()}
+
+    for port in range(_SERVE_PORT_MIN, _SERVE_PORT_MAX + 1):
+        if port not in used:
+            return port
+    raise RuntimeError(f"无可用端口({_SERVE_PORT_MIN}-{_SERVE_PORT_MAX} 全部占用)")
+
+
 async def _run_remote_export(task_id: str, job_id: str, merge_with_base: bool, export_format: str) -> dict:
-    """通过 SSH 在远程容器执行部署。"""
+    """通过 SSH 在远程容器执行模型合并/导出。"""
     remote_cmd = (
         f"docker exec "
         f"-e MACA_MPS_MODE=1 "
@@ -120,69 +441,122 @@ async def _run_remote_export(task_id: str, job_id: str, merge_with_base: bool, e
                 result = json.loads(line)
                 if "error" in result:
                     raise RuntimeError(result["error"])
-                await _update_deploy_status(task_id, "completed", output_path=result.get("output_path"))
-                return {"output_path": result.get("output_path")}
+                return result
             except json.JSONDecodeError:
                 continue
 
     raise RuntimeError(f"Invalid response: {stdout[:500]}")
 
 
-async def _update_deploy_status(task_id: str, status: str, output_path: str = None, error: str = None):
-    async with async_session() as session:
-        result = await session.execute(select(DeployTaskModel).where(DeployTaskModel.id == task_id))
-        record = result.scalar_one_or_none()
-        if record:
-            record.status = status
-            if output_path:
-                record.output_path = output_path
-            if error:
-                record.error = error
-            if status in ("completed", "failed"):
-                record.finished_at = datetime.utcnow()
-            await session.commit()
+async def _run_local_export(task_id: str, job_id: str, merge_with_base: bool) -> dict:
+    """本地执行导出(开发用)。"""
+    adapter_path = settings.adapters_dir / job_id
+    if not adapter_path.exists():
+        raise ValueError("Adapter not found")
 
-    background_task_manager.update_task(
-        task_id, status=status, output_path=output_path, error=error,
+    output_path = settings.adapters_dir / f"{job_id}_merged"
+
+    if merge_with_base:
+        import torch
+        from transformers import AutoModelForCausalLM, AutoTokenizer
+
+        base_model_id = _get_base_model_id_local(job_id)
+        if base_model_id:
+            from peft import PeftModel
+            base_model = AutoModelForCausalLM.from_pretrained(
+                base_model_id, torch_dtype=torch.float16, device_map="auto"
+            )
+            peft_model = PeftModel.from_pretrained(base_model, adapter_path)
+            merged = peft_model.merge_and_unload()
+            merged.save_pretrained(output_path)
+            tokenizer = AutoTokenizer.from_pretrained(adapter_path)
+            tokenizer.save_pretrained(output_path)
+        else:
+            from peft import PeftModel
+            merged = PeftModel.from_pretrained(
+                AutoModelForCausalLM.from_pretrained(
+                    str(adapter_path), torch_dtype=torch.float16
+                ),
+                str(adapter_path),
+            )
+            merged = merged.merge_and_unload()
+            merged.save_pretrained(output_path)
+            tokenizer = AutoTokenizer.from_pretrained(adapter_path)
+            tokenizer.save_pretrained(output_path)
+    else:
+        import shutil
+        if output_path.exists():
+            shutil.rmtree(output_path)
+        shutil.copytree(adapter_path, output_path)
+
+    return {"output_path": str(output_path)}
+
+
+def _copy_worker_template_remote(output_path: str):
+    """把 inference_worker.py 和启动脚本复制到远程模型目录。"""
+    worker_template = f"{settings.compute_node_workdir}/app/core/inference_worker.py"
+    copy_cmd = (
+        f"docker exec {settings.compute_node_docker_container} "
+        f"bash -c 'cp {worker_template} {output_path}/inference_worker.py'"
+    )
+    code, _, stderr = ssh_exec(copy_cmd, timeout=30)
+    if code != 0:
+        logger.warning(f"复制 inference_worker.py 到 {output_path} 失败: {stderr}")
+
+    # 生成快捷启动脚本
+    start_script = (
+        f"#!/bin/bash\n"
+        f"cd {output_path}\n"
+        f"CUDA_VISIBLE_DEVICES=3 MACA_MPS_MODE=1 "
+        f"{settings.compute_node_python} inference_worker.py "
+        f"--model-path . --port 8100\n"
     )
+    script_cmd = (
+        f"docker exec {settings.compute_node_docker_container} "
+        f"bash -c 'cat > {output_path}/start.sh << \"EOF\"\n{start_script}EOF\n"
+        f"chmod +x {output_path}/start.sh'"
+    )
+    code, _, _ = ssh_exec(script_cmd, timeout=15)
+    if code != 0:
+        logger.warning(f"生成 start.sh 失败")
 
 
 def _get_base_model_id_local(job_id: str):
     config_path = settings.adapters_dir / job_id / "adapter_config.json"
     if config_path.exists():
-        import json
         with open(config_path) as f:
             return json.load(f).get("base_model_name_or_path")
     return None
 
 
-def _export_to_gguf_local(model_path: Path, output_path: Path):
-    try:
-        import subprocess
-        result = subprocess.run(
-            ["python", "-m", "llama_cpp.convert_hf_to_gguf", str(model_path), "--outfile", str(output_path)],
-            capture_output=True, text=True, timeout=600,
-        )
-        if result.returncode != 0:
-            logger.error(f"GGUF export failed: {result.stderr}")
-    except Exception as e:
-        logger.warning(f"GGUF export not available: {e}")
-
-
-async def get_deploy_status(task_id: str) -> dict[str, Any]:
-    """获取部署任务状态。"""
+async def _update_deploy_status(
+    task_id: str, status: str,
+    output_path: str = None, error: str = None,
+    endpoint_url: str = None, port: int = None, pid: str = None,
+):
     async with async_session() as session:
         result = await session.execute(select(DeployTaskModel).where(DeployTaskModel.id == task_id))
         record = result.scalar_one_or_none()
         if record:
-            return {
-                "job_id": record.job_id,
-                "status": record.status,
-                "progress": record.progress,
-                "output_path": record.output_path,
-                "error": record.error,
-            }
-    return {"job_id": "", "status": "not_found", "progress": 0.0, "output_path": None, "error": None}
+            record.status = status
+            if output_path:
+                record.output_path = output_path
+            if error:
+                record.error = error
+            if endpoint_url:
+                record.endpoint_url = endpoint_url
+            if port:
+                record.port = port
+            if pid:
+                record.pid = pid
+            if status in ("completed", "failed", "stopped"):
+                record.finished_at = datetime.utcnow()
+            await session.commit()
+
+    background_task_manager.update_task(
+        task_id, status=status, output_path=output_path, error=error,
+        endpoint_url=endpoint_url,
+    )
 
 
 async def recover_stale_deploys() -> None:
@@ -194,8 +568,20 @@ async def recover_stale_deploys() -> None:
         )
         records = result.scalars().all()
         for record in records:
-            record.status = "failed"
-            record.error = "Server restarted, task interrupted"
+            if record.deploy_mode == "export":
+                record.status = "failed"
+                record.error = "Server restarted, task interrupted"
+            elif record.deploy_mode == "serve":
+                if record.pid and settings.use_remote_compute:
+                    from app.core.remote_executor import is_process_running
+                    if not is_process_running(record.pid):
+                        record.status = "stopped"
+                        record.error = "Server restarted, process no longer running"
+                    else:
+                        continue  # 进程还在,保持 running
+                else:
+                    record.status = "stopped"
+                    record.error = "Server restarted, process state unknown"
             record.finished_at = datetime.utcnow()
         if records:
             await session.commit()

+ 11 - 0
backend/main.py

@@ -78,11 +78,18 @@ def create_app() -> FastAPI:
     from app.api import inference as inference_api
     from app.api import auth as auth_api
     from app.api import sample_center as sample_center_api
+    from app.api import api_keys as api_keys_api
     from app.core.auth import get_current_active_user
 
     # 认证路由(无 prefix,端点自带完整路径)
     app.include_router(auth_api.router)
 
+    # API Key 管理路由
+    app.include_router(
+        api_keys_api.router, prefix="/api/v1/api-keys", tags=["api-keys"],
+        dependencies=[Depends(get_current_active_user)],
+    )
+
     # 已有路由:添加认证依赖保护
     app.include_router(
         models_api.router, prefix="/api/v1/models", tags=["models"],
@@ -104,6 +111,10 @@ def create_app() -> FastAPI:
         deployment_api.router, prefix="/api/v1/deployment", tags=["deployment"],
         dependencies=[Depends(get_current_active_user)],
     )
+    # 代理端点不需要 JWT,使用 API Key 认证
+    app.include_router(
+        deployment_api.proxy_router, prefix="/api/v1/deployment", tags=["deployment-proxy"],
+    )
     app.include_router(
         inference_api.router, prefix="/api/v1/inference", tags=["inference"],
         dependencies=[Depends(get_current_active_user)],

+ 65 - 1
frontend/src/api/client.ts

@@ -165,6 +165,16 @@ const api = {
         headers: { 'Content-Type': 'application/json' },
         body: JSON.stringify(cfg),
       }).then(r => r.json()) as Promise<DeployResponse>,
+    serve: (cfg: DeployServeConfig) =>
+      apiFetch('/api/v1/deployment/serve', {
+        method: 'POST',
+        headers: { 'Content-Type': 'application/json' },
+        body: JSON.stringify(cfg),
+      }).then(r => r.json()) as Promise<DeployResponse>,
+    stop: (taskId: string) =>
+      apiFetch(`/api/v1/deployment/${taskId}/stop`, { method: 'POST' }).then(r => r.json()),
+    services: () =>
+      apiFetch('/api/v1/deployment/services').then(r => r.json()) as Promise<DeployedServiceInfo[]>,
     status: (id: string) =>
       apiFetch(`/api/v1/deployment/${id}/status`).then(r => r.json()) as Promise<DeployResponse>,
   },
@@ -183,6 +193,20 @@ const api = {
       }).then(r => r.json()) as Promise<KbImportResponse>,
   },
 
+  // --- API Keys ---
+  apiKeys: {
+    create: (name = 'default') =>
+      apiFetch('/api/v1/api-keys/', {
+        method: 'POST',
+        headers: { 'Content-Type': 'application/json' },
+        body: JSON.stringify({ name }),
+      }).then(r => r.json()) as Promise<ApiKeyCreateResponse>,
+    list: () =>
+      apiFetch('/api/v1/api-keys/').then(r => r.json()) as Promise<ApiKeyInfo[]>,
+    revoke: (keyId: string) =>
+      apiFetch(`/api/v1/api-keys/${keyId}`, { method: 'DELETE' }).then(r => r.json()),
+  },
+
   // --- Inference ---
   inference: {
     generate: (req: InferenceRequest) =>
@@ -348,10 +372,34 @@ interface DeployConfig {
   export_format?: string
 }
 
+interface DeployServeConfig {
+  job_id: string
+  merge_with_base?: boolean
+  port?: number
+  host?: string
+}
+
 interface DeployResponse {
+  task_id?: string
   job_id: string
   status: string
+  deploy_mode?: string
+  progress?: number
   output_path?: string
+  endpoint_url?: string
+  port?: number
+  error?: string
+}
+
+interface DeployedServiceInfo {
+  task_id: string
+  job_id: string
+  status: string
+  endpoint_url?: string
+  base_url?: string
+  port?: number
+  output_path?: string
+  created_at?: string
   error?: string
 }
 
@@ -424,4 +472,20 @@ interface KbImportResponse {
   child_table: string
 }
 
-export type { ModelInfo, ModelTestRequest, ModelTestResponse, ModelDownloadResponse, ModelDownloadTaskResponse, DatasetInfo, DatasetDownloadResponse, DatasetDownloadTaskResponse, DatasetPreview, DatasetValidation, TrainingJob, TrainingConfig, EvalConfig, EvalResult, DeployConfig, DeployResponse, AdapterInfo, InferenceRequest, InferenceResponse, KnowledgeBaseItem, KnowledgeBaseListResponse, KnowledgeBaseDetailResponse, KbImportResponse }
+interface ApiKeyCreateResponse {
+  id: string
+  key: string        // 完整 key,仅创建时返回一次
+  name: string
+  created_at?: string
+}
+
+interface ApiKeyInfo {
+  id: string
+  key: string        // masked(sk-xxx****xxxx)
+  name: string
+  status: string     // active | revoked
+  last_used_at?: string
+  created_at?: string
+}
+
+export type { ModelInfo, ModelTestRequest, ModelTestResponse, ModelDownloadResponse, ModelDownloadTaskResponse, DatasetInfo, DatasetDownloadResponse, DatasetDownloadTaskResponse, DatasetPreview, DatasetValidation, TrainingJob, TrainingConfig, EvalConfig, EvalResult, DeployConfig, DeployServeConfig, DeployResponse, DeployedServiceInfo, AdapterInfo, InferenceRequest, InferenceResponse, KnowledgeBaseItem, KnowledgeBaseListResponse, KnowledgeBaseDetailResponse, KbImportResponse, ApiKeyCreateResponse, ApiKeyInfo }

+ 1 - 1
frontend/src/components/layout/Layout.tsx

@@ -8,8 +8,8 @@ const NAV_ITEMS = [
   { path: '/datasets', label: '数据集', icon: Database },
   { path: '/training', label: '训练', icon: Train },
   { path: '/evaluation', label: '评估', icon: TrendingUp },
-  { path: '/deployment', label: '部署', icon: CloudUpload },
   { path: '/inference', label: '推理', icon: MessageSquare },
+  { path: '/deployment', label: '部署', icon: CloudUpload },
 ]
 
 export function Layout({ children }: { children: React.ReactNode }) {

+ 694 - 111
frontend/src/pages/Deployment.tsx

@@ -1,153 +1,736 @@
-import { useState, useEffect } from 'react'
-import api, { DeployResponse, TrainingJob } from '../api/client'
+import { useState, useEffect, useRef, useCallback } from 'react'
+import api, { DeployResponse, DeployedServiceInfo, TrainingJob, ApiKeyInfo, ApiKeyCreateResponse } from '../api/client'
 
-const EXPORT_FORMATS = [
-  { value: 'safetensors', label: 'SafeTensors (推荐)' },
-  { value: 'pytorch', label: 'PyTorch (.bin)' },
-  { value: 'gguf', label: 'GGUF (llama.cpp)' },
-]
+type Tab = 'serve' | 'export'
 
 export function Deployment() {
+  const [tab, setTab] = useState<Tab>('serve')
   const [jobs, setJobs] = useState<TrainingJob[]>([])
-  const [jobId, setJobId] = useState('')
-  const [mergeWithBase, setMergeWithBase] = useState(false)
+  const [services, setServices] = useState<DeployedServiceInfo[]>([])
+  const [loadingServices, setLoadingServices] = useState(false)
+
+  // 导出状态
+  const [exportJobId, setExportJobId] = useState('')
+  const [exportMerge, setExportMerge] = useState(false)
   const [exportFormat, setExportFormat] = useState('safetensors')
-  const [running, setRunning] = useState(false)
-  const [result, setResult] = useState<DeployResponse | null>(null)
-  const [error, setError] = useState('')
+  const [exportRunning, setExportRunning] = useState(false)
+  const [exportResult, setExportResult] = useState<DeployResponse | null>(null)
+  const [exportError, setExportError] = useState('')
+
+  // 部署状态
+  const [serveJobId, setServeJobId] = useState('')
+  const [serveMerge, setServeMerge] = useState(true)
+  const [serveRunning, setServeRunning] = useState(false)
+  const [serveResult, setServeResult] = useState<DeployResponse | null>(null)
+  const [serveError, setServeError] = useState('')
+
+  // API Key 状态
+  const [apiKeys, setApiKeys] = useState<ApiKeyInfo[]>([])
+  const [newKeyName, setNewKeyName] = useState('')
+  const [creatingKey, setCreatingKey] = useState(false)
+  const [justCreatedKey, setJustCreatedKey] = useState<ApiKeyCreateResponse | null>(null)
 
+  const pollingRef = useRef<ReturnType<typeof setInterval> | null>(null)
+  const servicesPollingRef = useRef<ReturnType<typeof setInterval> | null>(null)
+
+  // 加载 API Keys
+  const loadApiKeys = useCallback(() => {
+    api.apiKeys.list().then(setApiKeys).catch(() => setApiKeys([]))
+  }, [])
+
+  // 加载已完成训练任务
   useEffect(() => {
     api.training.list()
       .then(data => setJobs(data.filter(j => j.status === 'completed')))
       .catch(() => setJobs([]))
   }, [])
 
+  // 加载已部署服务列表
+  const loadServices = useCallback(() => {
+    setLoadingServices(true)
+    api.deployment.services()
+      .then(setServices)
+      .catch(() => setServices([]))
+      .finally(() => setLoadingServices(false))
+  }, [])
+
+  useEffect(() => {
+    loadServices()
+    loadApiKeys()
+    // 每 10 秒刷新服务列表
+    servicesPollingRef.current = setInterval(loadServices, 10000)
+    return () => {
+      if (servicesPollingRef.current) clearInterval(servicesPollingRef.current)
+      if (pollingRef.current) clearInterval(pollingRef.current)
+    }
+  }, [loadServices, loadApiKeys])
+
+  // 轮询部署任务状态
+  const startPolling = useCallback((taskId: string, mode: Tab) => {
+    if (pollingRef.current) clearInterval(pollingRef.current)
+    pollingRef.current = setInterval(async () => {
+      try {
+        const res = await api.deployment.status(taskId)
+        if (mode === 'serve') setServeResult(res)
+        else setExportResult(res)
+        if (res.status === 'completed' || res.status === 'running' || res.status === 'failed') {
+          if (pollingRef.current) clearInterval(pollingRef.current)
+          pollingRef.current = null
+          if (mode === 'serve') {
+            setServeRunning(false)
+            loadServices()
+          } else {
+            setExportRunning(false)
+          }
+        }
+      } catch {
+        // ignore
+      }
+    }, 3000)
+  }, [loadServices])
+
   const handleExport = () => {
-    if (!jobId.trim()) return
-    setRunning(true)
-    setError('')
-    setResult(null)
+    if (!exportJobId.trim()) return
+    setExportRunning(true)
+    setExportError('')
+    setExportResult(null)
     api.deployment.export({
-      job_id: jobId,
-      merge_with_base: mergeWithBase,
+      job_id: exportJobId,
+      merge_with_base: exportMerge,
       export_format: exportFormat,
     })
-      .then(setResult)
-      .catch(err => setError(err instanceof Error ? err.message : '导出失败'))
-      .finally(() => setRunning(false))
+      .then(res => {
+        setExportResult(res)
+        if (res.status === 'pending' && res.task_id) startPolling(res.task_id, 'export')
+      })
+      .catch(err => {
+        setExportError(err instanceof Error ? err.message : '导出失败')
+        setExportRunning(false)
+      })
+  }
+
+  const handleServe = () => {
+    if (!serveJobId.trim()) return
+    setServeRunning(true)
+    setServeError('')
+    setServeResult(null)
+    api.deployment.serve({
+      job_id: serveJobId,
+      merge_with_base: serveMerge,
+    })
+      .then(res => {
+        setServeResult(res)
+        if (res.status === 'pending' && res.task_id) startPolling(res.task_id, 'serve')
+      })
+      .catch(err => {
+        setServeError(err instanceof Error ? err.message : '部署失败')
+        setServeRunning(false)
+      })
+  }
+
+  const handleStop = (taskId: string) => {
+    api.deployment.stop(taskId)
+      .then(() => loadServices())
+      .catch(() => {})
+  }
+
+  const tabStyle = (active: boolean): React.CSSProperties => ({
+    padding: '8px 20px',
+    borderRadius: 8,
+    border: 'none',
+    background: active ? '#14b8a6' : 'transparent',
+    color: active ? '#fff' : '#64748b',
+    cursor: 'pointer',
+    fontSize: 14,
+    fontWeight: active ? 600 : 400,
+    transition: 'all 0.15s ease',
+  })
+
+  const selectStyle: React.CSSProperties = {
+    width: '100%', padding: '10px 12px', borderRadius: 8,
+    border: '1px solid #d0d0d0', boxSizing: 'border-box',
+    fontSize: 14, outline: 'none', background: '#fff',
+    transition: 'border-color 0.2s',
+  }
+
+  const btnPrimary: React.CSSProperties = {
+    padding: '10px 32px', borderRadius: 8, border: 'none',
+    background: '#14b8a6', color: '#fff', cursor: 'pointer',
+    fontSize: 14, fontWeight: 600, transition: 'all 0.2s ease',
   }
 
   return (
     <div>
       <h1 style={{ margin: 0, fontSize: 22, fontWeight: 700 }}>模型部署</h1>
-      <p style={{ color: '#888', fontSize: 13, margin: '4px 0 16px' }}>导出训练好的模型用于生产部署</p>
+      <p style={{ color: '#888', fontSize: 13, margin: '4px 0 16px' }}>
+        将训练好的模型部署为 OpenAI 兼容的推理服务,或导出模型文件
+      </p>
+
+      {/* Tab 切换 */}
+      <div style={{ display: 'flex', gap: 4, marginBottom: 20, background: '#f1f5f9', borderRadius: 10, padding: 4, width: 'fit-content' }}>
+        <button style={tabStyle(tab === 'serve')} onClick={() => setTab('serve')}>部署为服务</button>
+        <button style={tabStyle(tab === 'export')} onClick={() => setTab('export')}>导出文件</button>
+      </div>
+
+      {/* 部署为服务 */}
+      {tab === 'serve' && (
+        <div style={{
+          background: '#fff', borderRadius: 12, padding: 24,
+          boxShadow: '0 1px 3px rgba(0,0,0,0.06)', border: '1px solid rgba(0,0,0,0.04)',
+        }}>
+          <h2 style={{ margin: '0 0 4px', fontSize: 15, fontWeight: 600 }}>部署为在线推理服务</h2>
+          <p style={{ margin: '0 0 16px', fontSize: 12, color: '#94a3b8' }}>
+            启动 OpenAI 兼容 API 服务,可通过 base_url 在外部系统调用
+          </p>
+          <div style={{ display: 'grid', gridTemplateColumns: '1fr 1fr', gap: 16 }}>
+            <div>
+              <label style={{ display: 'block', fontSize: 12, color: '#666', marginBottom: 6, fontWeight: 500 }}>训练任务</label>
+              <select
+                value={serveJobId}
+                onChange={e => setServeJobId(e.target.value)}
+                style={selectStyle}
+                onFocus={e => { e.currentTarget.style.borderColor = '#14b8a6' }}
+                onBlur={e => { e.currentTarget.style.borderColor = '#cbd5e1' }}
+              >
+                <option value="" disabled>选择已完成的训练任务</option>
+                {jobs.map(j => (
+                  <option key={j.id} value={j.id}>{j.id.slice(0, 8)}... — {j.model_id}</option>
+                ))}
+              </select>
+            </div>
+            <div style={{ display: 'flex', alignItems: 'end' }}>
+              <label style={{
+                display: 'flex', alignItems: 'center', gap: 8, fontSize: 14, cursor: 'pointer',
+                padding: '10px 14px', background: '#fafbfc', borderRadius: 8, border: '1px solid #f0f0f0',
+              }}>
+                <input type="checkbox" checked={serveMerge} onChange={e => setServeMerge(e.target.checked)} />
+                合并基础模型(推荐)
+              </label>
+            </div>
+          </div>
+
+          {serveError && (
+            <div style={{ marginTop: 16, padding: 12, background: '#fff1f2', borderRadius: 8, fontSize: 13, color: '#e11d48', border: '1px solid #fecdd3' }}>
+              {serveError}
+            </div>
+          )}
+
+          <button
+            onClick={handleServe}
+            disabled={serveRunning || !serveJobId}
+            style={{ ...btnPrimary, marginTop: 20, opacity: (serveRunning || !serveJobId) ? 0.5 : 1 }}
+          >
+            {serveRunning ? '部署中...' : '启动服务'}
+          </button>
+        </div>
+      )}
+
+      {/* 导出文件 */}
+      {tab === 'export' && (
+        <div style={{
+          background: '#fff', borderRadius: 12, padding: 24,
+          boxShadow: '0 1px 3px rgba(0,0,0,0.06)', border: '1px solid rgba(0,0,0,0.04)',
+        }}>
+          <h2 style={{ margin: '0 0 4px', fontSize: 15, fontWeight: 600 }}>导出模型文件</h2>
+          <p style={{ margin: '0 0 16px', fontSize: 12, color: '#94a3b8' }}>
+            导出合并后的模型文件,同时附带 server.py 和 start.sh 启动脚本
+          </p>
+          <div style={{ display: 'grid', gridTemplateColumns: '1fr 1fr 1fr', gap: 16, alignItems: 'end' }}>
+            <div>
+              <label style={{ display: 'block', fontSize: 12, color: '#666', marginBottom: 6, fontWeight: 500 }}>训练任务</label>
+              <select
+                value={exportJobId}
+                onChange={e => setExportJobId(e.target.value)}
+                style={selectStyle}
+                onFocus={e => { e.currentTarget.style.borderColor = '#14b8a6' }}
+                onBlur={e => { e.currentTarget.style.borderColor = '#cbd5e1' }}
+              >
+                <option value="" disabled>选择已完成的训练任务</option>
+                {jobs.map(j => (
+                  <option key={j.id} value={j.id}>{j.id.slice(0, 8)}... — {j.model_id}</option>
+                ))}
+              </select>
+            </div>
+            <div>
+              <label style={{ display: 'block', fontSize: 12, color: '#666', marginBottom: 6, fontWeight: 500 }}>导出格式</label>
+              <select
+                value={exportFormat}
+                onChange={e => setExportFormat(e.target.value)}
+                style={selectStyle}
+                onFocus={e => { e.currentTarget.style.borderColor = '#14b8a6' }}
+                onBlur={e => { e.currentTarget.style.borderColor = '#cbd5e1' }}
+              >
+                <option value="safetensors">SafeTensors (推荐)</option>
+                <option value="pytorch">PyTorch (.bin)</option>
+                <option value="gguf">GGUF (llama.cpp)</option>
+              </select>
+            </div>
+            <div>
+              <label style={{
+                display: 'flex', alignItems: 'center', gap: 8, fontSize: 14, cursor: 'pointer',
+                padding: '10px 14px', background: '#fafbfc', borderRadius: 8, border: '1px solid #f0f0f0',
+              }}>
+                <input type="checkbox" checked={exportMerge} onChange={e => setExportMerge(e.target.checked)} />
+                合并基础模型
+              </label>
+            </div>
+          </div>
+
+          {exportError && (
+            <div style={{ marginTop: 16, padding: 12, background: '#fff1f2', borderRadius: 8, fontSize: 13, color: '#e11d48', border: '1px solid #fecdd3' }}>
+              {exportError}
+            </div>
+          )}
+
+          <button
+            onClick={handleExport}
+            disabled={exportRunning || !exportJobId}
+            style={{ ...btnPrimary, marginTop: 20, opacity: (exportRunning || !exportJobId) ? 0.5 : 1 }}
+          >
+            {exportRunning ? '导出中...' : '开始导出'}
+          </button>
+        </div>
+      )}
+
+      {/* 当前任务状态 */}
+      {(serveResult || exportResult) && (
+        <div style={{
+          marginTop: 20, background: '#fff', borderRadius: 12, padding: 20,
+          boxShadow: '0 1px 3px rgba(0,0,0,0.06)', border: '1px solid rgba(0,0,0,0.04)',
+        }}>
+          <h3 style={{ margin: '0 0 12px', fontSize: 14, fontWeight: 600 }}>任务状态</h3>
+          <TaskStatus result={serveResult || exportResult} />
+        </div>
+      )}
 
+      {/* API Key 管理 */}
       <div style={{
-        background: '#fff', borderRadius: 12, padding: 24,
+        marginTop: 24, background: '#fff', borderRadius: 12, padding: 24,
         boxShadow: '0 1px 3px rgba(0,0,0,0.06)', border: '1px solid rgba(0,0,0,0.04)',
       }}>
-        <h2 style={{ margin: '0 0 16px', fontSize: 15, fontWeight: 600 }}>导出 Adapter</h2>
-        <div style={{ display: 'grid', gridTemplateColumns: '1fr 1fr 1fr', gap: 16, alignItems: 'end' }}>
-          <div>
-            <label style={{ display: 'block', fontSize: 12, color: '#666', marginBottom: 6, fontWeight: 500 }}>训练任务</label>
-            <select
-              value={jobId}
-              onChange={e => setJobId(e.target.value)}
-              style={{
-                width: '100%', padding: '10px 12px', borderRadius: 8,
-                border: '1px solid #d0d0d0', boxSizing: 'border-box',
-                fontSize: 14, outline: 'none', background: '#fff',
-                transition: 'border-color 0.2s',
-              }}
-              onFocus={e => { e.currentTarget.style.borderColor = '#14b8a6' }}
-              onBlur={e => { e.currentTarget.style.borderColor = '#cbd5e1' }}
-            >
-              <option value="" disabled>选择已完成的训练任务</option>
-              {jobs.map(j => (
-                <option key={j.id} value={j.id}>{j.id.slice(0, 8)}... — {j.model_id}</option>
-              ))}
-            </select>
-          </div>
-          <div>
-            <label style={{ display: 'block', fontSize: 12, color: '#666', marginBottom: 6, fontWeight: 500 }}>导出格式</label>
-            <select
-              value={exportFormat}
-              onChange={e => setExportFormat(e.target.value)}
-              style={{
-                width: '100%', padding: '10px 12px', borderRadius: 8,
-                border: '1px solid #d0d0d0', boxSizing: 'border-box',
-                fontSize: 14, outline: 'none', background: '#fff',
-                transition: 'border-color 0.2s',
-              }}
-              onFocus={e => { e.currentTarget.style.borderColor = '#14b8a6' }}
-              onBlur={e => { e.currentTarget.style.borderColor = '#cbd5e1' }}
-            >
-              {EXPORT_FORMATS.map(f => (
-                <option key={f.value} value={f.value}>{f.label}</option>
-              ))}
-            </select>
+        <h2 style={{ margin: '0 0 4px', fontSize: 15, fontWeight: 600 }}>API Key 管理</h2>
+        <p style={{ margin: '0 0 16px', fontSize: 12, color: '#94a3b8' }}>
+          创建 API Key 用于外部系统调用已部署的推理服务
+        </p>
+
+        {/* 创建新 Key */}
+        <div style={{ display: 'flex', gap: 10, marginBottom: 16 }}>
+          <input
+            value={newKeyName}
+            onChange={e => setNewKeyName(e.target.value)}
+            placeholder="Key 名称(如:生产环境、测试)"
+            style={{
+              flex: 1, padding: '8px 12px', borderRadius: 8,
+              border: '1px solid #d0d0d0', fontSize: 13, outline: 'none',
+            }}
+            onFocus={e => { e.currentTarget.style.borderColor = '#14b8a6' }}
+            onBlur={e => { e.currentTarget.style.borderColor = '#d0d0d0' }}
+          />
+          <button
+            onClick={() => {
+              setCreatingKey(true)
+              api.apiKeys.create(newKeyName || 'default')
+                .then(res => {
+                  setJustCreatedKey(res)
+                  setNewKeyName('')
+                  loadApiKeys()
+                })
+                .catch(() => {})
+                .finally(() => setCreatingKey(false))
+            }}
+            disabled={creatingKey}
+            style={{
+              padding: '8px 20px', borderRadius: 8, border: 'none',
+              background: '#14b8a6', color: '#fff', cursor: 'pointer',
+              fontSize: 13, fontWeight: 500, opacity: creatingKey ? 0.5 : 1,
+            }}
+          >
+            {creatingKey ? '创建中...' : '创建 Key'}
+          </button>
+        </div>
+
+        {/* 新创建的 Key 提示 */}
+        {justCreatedKey && (
+          <div style={{
+            padding: 14, background: '#f0fdfa', borderRadius: 8, border: '1px solid #ccfbf1',
+            marginBottom: 16,
+          }}>
+            <div style={{ fontSize: 12, color: '#0d9488', fontWeight: 600, marginBottom: 6 }}>
+              API Key 创建成功(仅显示一次,请立即保存)
+            </div>
+            <div style={{ display: 'flex', alignItems: 'center', gap: 8 }}>
+              <code style={{
+                flex: 1, padding: '6px 10px', background: '#fff', borderRadius: 6,
+                border: '1px solid #e2e8f0', fontSize: 13, fontFamily: 'monospace',
+                wordBreak: 'break-all',
+              }}>
+                {justCreatedKey.key}
+              </code>
+              <button
+                onClick={() => {
+                  navigator.clipboard.writeText(justCreatedKey.key)
+                }}
+                style={{
+                  padding: '6px 12px', borderRadius: 6, border: '1px solid #e2e8f0',
+                  background: '#fff', cursor: 'pointer', fontSize: 12, whiteSpace: 'nowrap',
+                }}
+              >
+                复制
+              </button>
+              <button
+                onClick={() => setJustCreatedKey(null)}
+                style={{
+                  padding: '6px 12px', borderRadius: 6, border: '1px solid #e2e8f0',
+                  background: '#fff', cursor: 'pointer', fontSize: 12, color: '#64748b',
+                }}
+              >
+                关闭
+              </button>
+            </div>
           </div>
-          <div>
-            <label style={{
-              display: 'flex', alignItems: 'center', gap: 8, fontSize: 14, cursor: 'pointer',
-              padding: '10px 14px', background: '#fafbfc', borderRadius: 8, border: '1px solid #f0f0f0',
-            }}>
-              <input type="checkbox" checked={mergeWithBase} onChange={e => setMergeWithBase(e.target.checked)} />
-              合并基础模型
-            </label>
+        )}
+
+        {/* 已有 Key 列表 */}
+        {apiKeys.length === 0 ? (
+          <p style={{ color: '#94a3b8', fontSize: 13, margin: 0 }}>暂无 API Key</p>
+        ) : (
+          <div style={{ display: 'flex', flexDirection: 'column', gap: 8 }}>
+            {apiKeys.map(k => (
+              <div key={k.id} style={{
+                display: 'flex', alignItems: 'center', justifyContent: 'space-between',
+                padding: '10px 14px', borderRadius: 8,
+                background: k.status === 'active' ? '#fafbfc' : '#f8f8f8',
+                border: `1px solid ${k.status === 'active' ? '#e2e8f0' : '#eee'}`,
+              }}>
+                <div style={{ flex: 1 }}>
+                  <div style={{ display: 'flex', alignItems: 'center', gap: 8 }}>
+                    <span style={{ fontSize: 13, fontWeight: 500 }}>{k.name}</span>
+                    <span style={{
+                      fontSize: 11, padding: '1px 6px', borderRadius: 10,
+                      background: k.status === 'active' ? '#dcfce7' : '#f1f5f9',
+                      color: k.status === 'active' ? '#16a34a' : '#94a3b8',
+                    }}>
+                      {k.status === 'active' ? '有效' : '已吊销'}
+                    </span>
+                  </div>
+                  <div style={{ fontSize: 12, color: '#94a3b8', fontFamily: 'monospace', marginTop: 2 }}>
+                    {k.key}
+                  </div>
+                  <div style={{ fontSize: 11, color: '#cbd5e1', marginTop: 2 }}>
+                    创建于 {k.created_at ? new Date(k.created_at).toLocaleDateString() : '-'}
+                    {k.last_used_at && ` · 最后使用 ${new Date(k.last_used_at).toLocaleDateString()}`}
+                  </div>
+                </div>
+                {k.status === 'active' && (
+                  <button
+                    onClick={() => {
+                      if (confirm('确定吊销此 API Key?吊销后不可恢复。')) {
+                        api.apiKeys.revoke(k.id).then(() => loadApiKeys())
+                      }
+                    }}
+                    style={{
+                      padding: '4px 10px', borderRadius: 6, border: '1px solid #fca5a5',
+                      background: '#fff', color: '#dc2626', cursor: 'pointer', fontSize: 12,
+                    }}
+                  >
+                    吊销
+                  </button>
+                )}
+              </div>
+            ))}
           </div>
+        )}
+      </div>
+
+      {/* 已部署服务列表 */}
+      <div style={{
+        marginTop: 24, background: '#fff', borderRadius: 12, padding: 24,
+        boxShadow: '0 1px 3px rgba(0,0,0,0.06)', border: '1px solid rgba(0,0,0,0.04)',
+      }}>
+        <div style={{ display: 'flex', justifyContent: 'space-between', alignItems: 'center', marginBottom: 16 }}>
+          <h2 style={{ margin: 0, fontSize: 15, fontWeight: 600 }}>已部署服务</h2>
+          <button
+            onClick={loadServices}
+            style={{ padding: '6px 12px', borderRadius: 6, border: '1px solid #e2e8f0', background: '#fff', cursor: 'pointer', fontSize: 12, color: '#64748b' }}
+          >
+            {loadingServices ? '刷新中...' : '刷新'}
+          </button>
         </div>
 
-        {error && (
-          <div style={{ marginTop: 16, padding: 12, background: '#fff1f2', borderRadius: 8, fontSize: 13, color: '#e11d48', border: '1px solid #fecdd3' }}>
-            {error}
+        {services.length === 0 ? (
+          <p style={{ color: '#94a3b8', fontSize: 13, margin: 0 }}>暂无已部署的服务</p>
+        ) : (
+          <div style={{ display: 'flex', flexDirection: 'column', gap: 12 }}>
+            {services.map(svc => (
+              <ServiceCard key={svc.task_id} service={svc} onStop={() => handleStop(svc.task_id)} />
+            ))}
           </div>
         )}
+      </div>
 
+      <style>{`
+        @keyframes spin {
+          to { transform: rotate(360deg); }
+        }
+        @keyframes pulse {
+          0%, 100% { opacity: 1; }
+          50% { opacity: 0.4; }
+        }
+      `}</style>
+    </div>
+  )
+}
+
+function TaskStatus({ result }: { result: DeployResponse }) {
+  const isPending = result.status === 'pending' || result.status === 'running'
+  const isFailed = result.status === 'failed'
+  const isSuccess = result.status === 'completed' || result.status === 'running'
+
+  return (
+    <div style={{ display: 'grid', gridTemplateColumns: 'repeat(2, 1fr)', gap: 10 }}>
+      <InfoBox label="任务 ID" value={result.job_id} mono />
+      <InfoBox
+        label="状态"
+        value={result.status}
+        color={isFailed ? '#e11d48' : isSuccess ? '#059669' : '#d97706'}
+        bg={isFailed ? '#fff1f2' : isSuccess ? '#ecfdf5' : '#fffbeb'}
+        borderColor={isFailed ? '#fecdd3' : isSuccess ? '#d1fae5' : '#fde68a'}
+      />
+      {result.endpoint_url && (
+        <InfoBox label="Endpoint URL" value={result.endpoint_url} mono wide />
+      )}
+      {result.output_path && (
+        <InfoBox label="输出路径" value={result.output_path} mono wide />
+      )}
+      {result.error && (
+        <InfoBox label="错误" value={result.error} color="#e11d48" bg="#fff1f2" borderColor="#fecdd3" wide />
+      )}
+      {isPending && (
+        <div style={{ gridColumn: '1 / -1', padding: 12, background: '#f0fdfa', borderRadius: 8, border: '1px solid #ccfbf1', display: 'flex', alignItems: 'center', gap: 10 }}>
+          <Spinner />
+          <span style={{ fontSize: 13, color: '#134e4a' }}>
+            {result.status === 'pending' ? '任务排队中...' : '任务执行中...'}
+          </span>
+        </div>
+      )}
+    </div>
+  )
+}
+
+function ServiceCard({ service, onStop }: { service: DeployedServiceInfo; onStop: () => void }) {
+  const [showUsage, setShowUsage] = useState(false)
+  const isRunning = service.status === 'running'
+  // endpoint_url 是相对路径(如 /api/v1/deployment/proxy/{task_id}/v1),拼接完整 URL
+  const relativeUrl = service.endpoint_url || service.base_url || ''
+  const baseUrl = relativeUrl ? `${window.location.origin}${relativeUrl}` : ''
+
+  return (
+    <div style={{
+      padding: 16, borderRadius: 10,
+      border: `1px solid ${isRunning ? '#ccfbf1' : '#f1f5f9'}`,
+      background: isRunning ? '#f0fdfa' : '#fafbfc',
+    }}>
+      <div style={{ display: 'flex', justifyContent: 'space-between', alignItems: 'flex-start' }}>
+        <div style={{ flex: 1 }}>
+          <div style={{ display: 'flex', alignItems: 'center', gap: 8, marginBottom: 6 }}>
+            <StatusBadge status={service.status} />
+            <span style={{ fontSize: 13, color: '#64748b', fontFamily: 'monospace' }}>
+              Job: {service.job_id.slice(0, 8)}...
+            </span>
+          </div>
+          {baseUrl && isRunning && (
+            <div style={{ marginTop: 6 }}>
+              <span style={{ fontSize: 11, color: '#94a3b8', textTransform: 'uppercase', letterSpacing: '0.5px' }}>base_url</span>
+              <div style={{
+                fontFamily: 'monospace', fontSize: 14, fontWeight: 600, color: '#134e4a',
+                background: '#fff', padding: '6px 10px', borderRadius: 6, marginTop: 3,
+                border: '1px solid #e2e8f0', display: 'inline-block',
+              }}>
+                {baseUrl}
+              </div>
+            </div>
+          )}
+          {service.error && (
+            <div style={{ marginTop: 8, fontSize: 12, color: '#e11d48' }}>{service.error}</div>
+          )}
+        </div>
+        <div style={{ display: 'flex', gap: 6 }}>
+          {isRunning && (
+            <>
+              <button
+                onClick={() => setShowUsage(!showUsage)}
+                style={{
+                  padding: '6px 12px', borderRadius: 6,
+                  border: '1px solid #14b8a6', background: showUsage ? '#14b8a6' : '#fff',
+                  color: showUsage ? '#fff' : '#14b8a6',
+                  cursor: 'pointer', fontSize: 12, fontWeight: 500,
+                }}
+              >
+                {showUsage ? '收起示例' : '调用示例'}
+              </button>
+              <button
+                onClick={onStop}
+                style={{
+                  padding: '6px 12px', borderRadius: 6,
+                  border: '1px solid #fca5a5', background: '#fff', color: '#dc2626',
+                  cursor: 'pointer', fontSize: 12, fontWeight: 500,
+                }}
+              >
+                停止
+              </button>
+            </>
+          )}
+        </div>
+      </div>
+
+      {showUsage && baseUrl && (
+        <div style={{ marginTop: 14, borderTop: '1px solid #e2e8f0', paddingTop: 14 }}>
+          <UsageExamples baseUrl={baseUrl} />
+        </div>
+      )}
+    </div>
+  )
+}
+
+function UsageExamples({ baseUrl }: { baseUrl: string }) {
+  const curlExample = `curl ${baseUrl}/chat/completions \\
+  -H "Content-Type: application/json" \\
+  -H "Authorization: Bearer YOUR_API_KEY" \\
+  -d '{
+    "model": "local-model",
+    "messages": [{"role": "user", "content": "你好"}],
+    "max_tokens": 512,
+    "temperature": 0.7
+  }'`
+
+  const pythonExample = `from openai import OpenAI
+
+client = OpenAI(
+    base_url="${baseUrl}",
+    api_key="sk-xxx"  # 替换为你的 API Key
+)
+
+response = client.chat.completions.create(
+    model="local-model",
+    messages=[{"role": "user", "content": "你好"}],
+    max_tokens=512,
+    temperature=0.7
+)
+print(response.choices[0].message.content)`
+
+  const jsExample = `import OpenAI from 'openai'
+
+const client = new OpenAI({
+  baseURL: '${baseUrl}',
+  apiKey: 'sk-xxx'  // 替换为你的 API Key
+})
+
+const response = await client.chat.completions.create({
+  model: 'local-model',
+  messages: [{ role: 'user', content: '你好' }],
+  max_tokens: 512,
+  temperature: 0.7
+})
+console.log(response.choices[0].message.content)`
+
+  return (
+    <div>
+      <p style={{ margin: '0 0 10px', fontSize: 12, color: '#64748b' }}>
+        使用 OpenAI 兼容接口调用,需要在请求头中携带 API Key(在上方「API Key 管理」中创建):
+      </p>
+      <CodeBlock title="curl" code={curlExample} />
+      <CodeBlock title="Python (openai SDK)" code={pythonExample} />
+      <CodeBlock title="JavaScript / TypeScript" code={jsExample} />
+    </div>
+  )
+}
+
+function CodeBlock({ title, code }: { title: string; code: string }) {
+  const [copied, setCopied] = useState(false)
+
+  const handleCopy = () => {
+    navigator.clipboard.writeText(code).then(() => {
+      setCopied(true)
+      setTimeout(() => setCopied(false), 2000)
+    })
+  }
+
+  return (
+    <div style={{ marginBottom: 10 }}>
+      <div style={{ display: 'flex', justifyContent: 'space-between', alignItems: 'center', marginBottom: 4 }}>
+        <span style={{ fontSize: 11, color: '#94a3b8', fontWeight: 500 }}>{title}</span>
         <button
-          onClick={handleExport}
-          disabled={running || !jobId}
+          onClick={handleCopy}
           style={{
-            marginTop: 20, padding: '10px 32px', borderRadius: 8, border: 'none',
-            background: '#14b8a6', color: '#fff', cursor: 'pointer',
-            opacity: (running || !jobId) ? 0.5 : 1, fontSize: 14, fontWeight: 600,
-            transition: 'all 0.2s ease',
+            padding: '2px 8px', borderRadius: 4, border: '1px solid #e2e8f0',
+            background: '#fff', cursor: 'pointer', fontSize: 11, color: '#64748b',
           }}
         >
-          {running ? '导出中...' : '开始导出'}
+          {copied ? '已复制' : '复制'}
         </button>
       </div>
+      <pre style={{
+        background: '#1e293b', color: '#e2e8f0', padding: 12, borderRadius: 8,
+        fontSize: 12, margin: 0, overflow: 'auto', fontFamily: 'monospace',
+        lineHeight: 1.5,
+      }}>
+        {code}
+      </pre>
+    </div>
+  )
+}
 
-      {result && (
-        <div style={{
-          marginTop: 24, background: '#fff', borderRadius: 12, padding: 24,
-          boxShadow: '0 1px 3px rgba(0,0,0,0.06)', border: '1px solid rgba(0,0,0,0.04)',
-        }}>
-          <h3 style={{ margin: '0 0 16px', fontSize: 15, fontWeight: 600 }}>导出状态</h3>
-          <div style={{ display: 'grid', gridTemplateColumns: 'repeat(2, 1fr)', gap: 12 }}>
-            <div style={{ padding: '14px 16px', background: '#fafbfc', borderRadius: 8, border: '1px solid #f0f0f0' }}>
-              <div style={{ fontSize: 12, color: '#888', marginBottom: 4 }}>任务 ID</div>
-              <div style={{ fontSize: 14, fontFamily: 'monospace' }}>{result.job_id}</div>
-            </div>
-            <div style={{ padding: '14px 16px', background: result.error ? '#fff1f2' : '#ecfdf5', borderRadius: 8, border: `1px solid ${result.error ? '#fecdd3' : '#d1fae5'}` }}>
-              <div style={{ fontSize: 12, color: '#94a3b8', marginBottom: 4 }}>状态</div>
-              <div style={{ color: result.error ? '#e11d48' : '#059669', fontWeight: 600, fontSize: 14 }}>{result.status}</div>
-            </div>
-            {result.output_path && (
-              <div style={{ padding: '14px 16px', background: '#fafbfc', borderRadius: 8, border: '1px solid #f0f0f0' }}>
-                <div style={{ fontSize: 12, color: '#888', marginBottom: 4 }}>输出路径</div>
-                <div style={{ fontSize: 13, fontFamily: 'monospace', wordBreak: 'break-all' }}>{result.output_path}</div>
-              </div>
-            )}
-            {result.error && (
-              <div style={{ padding: '14px 16px', background: '#fff1f2', borderRadius: 8, border: '1px solid #fecdd3' }}>
-                <div style={{ fontSize: 12, color: '#94a3b8', marginBottom: 4 }}>错误</div>
-                <div style={{ color: '#e11d48', fontSize: 13 }}>{result.error}</div>
-              </div>
-            )}
-          </div>
-        </div>
-      )}
+function StatusBadge({ status }: { status: string }) {
+  const map: Record<string, { label: string; bg: string; color: string }> = {
+    running: { label: '运行中', bg: '#dcfce7', color: '#16a34a' },
+    pending: { label: '排队中', bg: '#fef9c3', color: '#a16207' },
+    completed: { label: '已完成', bg: '#dcfce7', color: '#16a34a' },
+    stopped: { label: '已停止', bg: '#f1f5f9', color: '#64748b' },
+    failed: { label: '失败', bg: '#fee2e2', color: '#dc2626' },
+  }
+  const { label, bg, color } = map[status] || { label: status, bg: '#f1f5f9', color: '#64748b' }
+  return (
+    <span style={{
+      display: 'inline-flex', alignItems: 'center', gap: 4,
+      padding: '2px 8px', borderRadius: 12, fontSize: 11, fontWeight: 600,
+      background: bg, color,
+    }}>
+      {status === 'running' && <span style={{ width: 6, height: 6, borderRadius: '50%', background: color, animation: 'pulse 2s infinite' }} />}
+      {label}
+    </span>
+  )
+}
+
+function InfoBox({ label, value, mono, color, bg, borderColor, wide }: {
+  label: string; value: string; mono?: boolean
+  color?: string; bg?: string; borderColor?: string; wide?: boolean
+}) {
+  return (
+    <div style={{
+      gridColumn: wide ? '1 / -1' : undefined,
+      padding: '10px 14px', borderRadius: 8,
+      background: bg || '#fafbfc',
+      border: `1px solid ${borderColor || '#f0f0f0'}`,
+    }}>
+      <div style={{ fontSize: 11, color: '#94a3b8', marginBottom: 3 }}>{label}</div>
+      <div style={{
+        fontSize: 13, fontWeight: 500,
+        fontFamily: mono ? 'monospace' : undefined,
+        color: color || '#1e293b',
+        wordBreak: 'break-all',
+      }}>{value}</div>
     </div>
   )
 }
+
+function Spinner() {
+  return (
+    <div style={{
+      width: 16, height: 16,
+      border: '2px solid #14b8a6',
+      borderTopColor: 'transparent',
+      borderRadius: '50%',
+      animation: 'spin 1s linear infinite',
+    }} />
+  )
+}