Przeglądaj źródła

修复253服务器端口占用问题

lxylxy123321 2 dni temu
rodzic
commit
5691abc452
4 zmienionych plików z 201 dodań i 260 usunięć
  1. 23 16
      backend/app/core/remote_executor.py
  2. 23 3
      backend/app/services/deploy_service.py
  3. 141 241
      result.txt
  4. 14 0
      test.py

+ 23 - 16
backend/app/core/remote_executor.py

@@ -204,27 +204,34 @@ def run_training_remote(
     return pid
 
 
-def is_process_running(pid: str) -> bool:
+def is_process_running(pid: str, retries: int = 3) -> bool:
     """检查远程训练/推理进程是否还在运行。
 
     通过 docker exec 进入容器,优先用 kill -0 检查指定 PID,
     兜底用 ps 检查是否存在匹配的 Python 进程。
+    失败时重试,避免因单次 SSH 超时误判。
     """
-    cmd = (
-        f"docker exec {settings.compute_node_docker_container} bash -c '"
-        f"if kill -0 {pid} 2>/dev/null; then "
-        f"  state=$(cat /proc/{pid}/stat 2>/dev/null | awk \"{{{{print \\$3}}}}\"); "
-        f"  if [ \"$state\" = \"Z\" ]; then echo stopped; else echo running; "
-        f"  fi; "
-        f"else "
-        f"  echo stopped; "
-        f"fi'"
-    )
-    code, stdout, stderr = ssh_exec(cmd, timeout=30)
-    if code != 0:
-        # SSH/docker exec 本身失败(容器可能挂了),视为进程不存活
-        return False
-    return "running" in stdout
+    for attempt in range(retries):
+        cmd = (
+            f"docker exec {settings.compute_node_docker_container} bash -c '"
+            f"if kill -0 {pid} 2>/dev/null; then "
+            f"  state=$(cat /proc/{pid}/stat 2>/dev/null | awk \"{{{{print \\$3}}}}\"); "
+            f"  if [ \"$state\" = \"Z\" ]; then echo stopped; else echo running; "
+            f"  fi; "
+            f"else "
+            f"  echo stopped; "
+            f"fi'"
+        )
+        code, stdout, stderr = ssh_exec(cmd, timeout=30)
+        if code != 0:
+            # SSH/docker exec 本身失败(容器可能挂了或网络抖动),重试
+            if attempt < retries - 1:
+                import time
+                time.sleep(2)
+                continue
+            return False
+        return "running" in stdout
+    return False
 
 
 def get_remote_stderr(job_id: str) -> str | None:

+ 23 - 3
backend/app/services/deploy_service.py

@@ -222,6 +222,13 @@ async def _launch_remote_worker(task_id: str, model_path: str, port: int) -> str
 
     只依赖 torch + transformers(不需要 fastapi/uvicorn)。
     """
+    # 启动前先清理端口占用,确保不会有旧进程残留
+    kill_cmd = (
+        f"docker exec {settings.compute_node_docker_container} "
+        f"bash -c 'fuser -k {port}/tcp 2>/dev/null; sleep 1; true'"
+    )
+    ssh_exec(kill_cmd, timeout=15)
+
     # worker 脚本在容器内的路径
     worker_template = f"{settings.compute_node_workdir}/app/core/inference_worker.py"
 
@@ -235,7 +242,6 @@ async def _launch_remote_worker(task_id: str, model_path: str, port: int) -> str
         raise RuntimeError(f"复制 inference_worker.py 失败: {stderr}")
 
     # 在容器内后台启动 worker
-    # 直接使用 & 后台启动,$! 捕获的是 Python 进程本身的 PID(不是 nohup/setsid 的包装 PID)
     launch_cmd = (
         f"docker exec "
         f"-e MACA_MPS_MODE=1 "
@@ -327,12 +333,19 @@ async def stop_serving(task_id: str, user_id: str = "") -> dict[str, Any]:
 
         pid = record.pid
         if pid and settings.use_remote_compute:
-            # 杀掉远程 worker 进程及其子线
+            # 方式1: kill -9 主进程及其子进
             kill_cmd = (
                 f"docker exec {settings.compute_node_docker_container} "
-                f"bash -c 'kill {pid} 2>/dev/null; pkill -P {pid} 2>/dev/null; true'"
+                f"bash -c 'kill -9 {pid} 2>/dev/null; pkill -9 -P {pid} 2>/dev/null; true'"
             )
             code, _, _ = ssh_exec(kill_cmd, timeout=15)
+            # 方式2: fuser 兜底清理端口(防止进程 kill 失败仍占着端口)
+            if record.port:
+                fuser_cmd = (
+                    f"docker exec {settings.compute_node_docker_container} "
+                    f"bash -c 'fuser -k {record.port}/tcp 2>/dev/null; sleep 1; true'"
+                )
+                ssh_exec(fuser_cmd, timeout=15)
             logger.info(f"Stop serving: task={task_id} pid={pid} kill_code={code}")
 
         record.status = "stopped"
@@ -363,6 +376,9 @@ async def list_deployed_services(user_id: str = "") -> list[dict[str, Any]]:
             if not is_process_running(r.pid):
                 status = "stopped"
                 await _update_deploy_status(r.id, "stopped", error="进程已退出")
+                # 释放端口和 PID,确保下次分配时可用
+                r.port = None
+                r.pid = None
 
         services.append({
             "task_id": r.id,
@@ -589,6 +605,10 @@ async def recover_stale_deploys() -> None:
                 else:
                     record.status = "stopped"
                     record.error = "Server restarted, process state unknown"
+            # 释放端口,确保下次分配时可用
+            if record.status == "stopped":
+                record.port = None
+                record.pid = None
             record.finished_at = datetime.utcnow()
         if records:
             await session.commit()

+ 141 - 241
result.txt

@@ -1,241 +1,141 @@
-lq@lq:~/Fine-tuning$ sudo docker logs -f finetune-backend
-=> Syncing backend code to compute node 192.168.91.253 ...
-Warning: Permanently added '192.168.91.253' (ED25519) to the list of known hosts.
-sending incremental file list
-./
-.dockerignore
-.env.docker
-.env.example
-.python-version
-Dockerfile
-entrypoint.sh
-main.py
-pyproject.toml
-requirements.txt
-app/
-app/__init__.py
-app/config.py
-app/__pycache__/
-app/__pycache__/__init__.cpython-310.pyc
-app/__pycache__/config.cpython-310.pyc
-app/api/
-app/api/__init__.py
-app/api/api_keys.py
-app/api/auth.py
-app/api/datasets.py
-app/api/deployment.py
-app/api/evaluation.py
-app/api/inference.py
-app/api/models.py
-app/api/sample_center.py
-app/api/training.py
-app/api/__pycache__/__init__.cpython-310.pyc
-app/api/__pycache__/api_keys.cpython-310.pyc
-app/api/__pycache__/auth.cpython-310.pyc
-app/api/__pycache__/datasets.cpython-310.pyc
-app/api/__pycache__/deployment.cpython-310.pyc
-app/api/__pycache__/evaluation.cpython-310.pyc
-app/api/__pycache__/inference.cpython-310.pyc
-app/api/__pycache__/models.cpython-310.pyc
-app/api/__pycache__/sample_center.cpython-310.pyc
-app/api/__pycache__/training.cpython-310.pyc
-app/core/
-app/core/__init__.py
-app/core/auth.py
-app/core/background_tasks.py
-app/core/db.py
-app/core/deploy_server_template.py
-app/core/inference_worker.py
-app/core/job_queue.py
-app/core/logging.py
-app/core/remote_deploy.py
-app/core/remote_eval.py
-app/core/remote_executor.py
-app/core/security.py
-app/core/sso_client.py
-app/core/websocket.py
-app/core/__pycache__/
-app/core/__pycache__/__init__.cpython-310.pyc
-app/core/__pycache__/auth.cpython-310.pyc
-app/core/__pycache__/background_tasks.cpython-310.pyc
-app/core/__pycache__/db.cpython-310.pyc
-app/core/__pycache__/job_queue.cpython-310.pyc
-app/core/__pycache__/logging.cpython-310.pyc
-app/core/__pycache__/remote_deploy.cpython-310.pyc
-app/core/__pycache__/remote_eval.cpython-310.pyc
-app/core/__pycache__/remote_executor.cpython-310.pyc
-app/core/__pycache__/security.cpython-310.pyc
-app/core/__pycache__/sso_client.cpython-310.pyc
-app/core/__pycache__/websocket.cpython-310.pyc
-app/engines/
-app/engines/__init__.py
-app/engines/__main__.py
-app/engines/base.py
-app/engines/multimodal_engine.py
-app/engines/remote_train.py
-app/engines/text_engine.py
-app/engines/vision_engine.py
-app/engines/__pycache__/__init__.cpython-310.pyc
-app/engines/__pycache__/base.cpython-310.pyc
-app/engines/__pycache__/remote_train.cpython-310.pyc
-app/engines/__pycache__/text_engine.cpython-310.pyc
-app/peft/
-app/peft/__init__.py
-app/peft/__pycache__/__init__.cpython-310.pyc
-app/preprocessors/
-app/preprocessors/__init__.py
-app/preprocessors/__pycache__/__init__.cpython-310.pyc
-app/schemas/
-app/schemas/__init__.py
-app/schemas/background_task.py
-app/schemas/common.py
-app/schemas/dataset.py
-app/schemas/deployment.py
-app/schemas/evaluation.py
-app/schemas/model.py
-app/schemas/model_test.py
-app/schemas/sample_center.py
-app/schemas/training.py
-app/schemas/__pycache__/__init__.cpython-310.pyc
-app/schemas/__pycache__/background_task.cpython-310.pyc
-app/schemas/__pycache__/common.cpython-310.pyc
-app/schemas/__pycache__/dataset.cpython-310.pyc
-app/schemas/__pycache__/deployment.cpython-310.pyc
-app/schemas/__pycache__/evaluation.cpython-310.pyc
-app/schemas/__pycache__/model.cpython-310.pyc
-app/schemas/__pycache__/model_test.cpython-310.pyc
-app/schemas/__pycache__/sample_center.cpython-310.pyc
-app/schemas/__pycache__/training.cpython-310.pyc
-app/services/
-app/services/api_key_service.py
-app/services/dataset_service.py
-app/services/deploy_service.py
-app/services/eval_service.py
-app/services/inference_service.py
-app/services/model_service.py
-app/services/model_test_service.py
-app/services/sample_center_service.py
-app/services/training_service.py
-app/services/__pycache__/api_key_service.cpython-310.pyc
-app/services/__pycache__/dataset_service.cpython-310.pyc
-app/services/__pycache__/deploy_service.cpython-310.pyc
-app/services/__pycache__/eval_service.cpython-310.pyc
-app/services/__pycache__/inference_service.cpython-310.pyc
-app/services/__pycache__/model_service.cpython-310.pyc
-app/services/__pycache__/model_test_service.cpython-310.pyc
-app/services/__pycache__/sample_center_service.cpython-310.pyc
-app/services/__pycache__/training_service.cpython-310.pyc
-
-sent 8,248 bytes  received 6,977 bytes  822.97 bytes/sec
-total size is 519,183  speedup is 34.10
-=> Sync done.
-INFO:     Started server process [1]
-INFO:     Waiting for application startup.
-2026-05-26 01:57:06 | INFO     | peft-platform | JobQueue started with 2 workers
-INFO:     Application startup complete.
-INFO:     Uvicorn running on http://0.0.0.0:8010 (Press CTRL+C to quit)
-INFO:     172.20.0.4:46460 - "GET /api/v1/deployment/services HTTP/1.0" 200 OK
-INFO:     127.0.0.1:37172 - "GET /health HTTP/1.1" 200 OK
-INFO:     172.20.0.4:47912 - "GET /api/v1/deployment/services HTTP/1.0" 200 OK
-INFO:     172.20.0.4:40496 - "GET /api/v1/models/ HTTP/1.0" 200 OK
-INFO:     172.20.0.4:40502 - "GET /api/v1/datasets/ HTTP/1.0" 200 OK
-INFO:     172.20.0.4:40506 - "GET /api/v1/training/jobs HTTP/1.0" 200 OK
-INFO:     127.0.0.1:59614 - "GET /health HTTP/1.1" 200 OK
-INFO:     172.20.0.4:45828 - "GET /api/v1/training/jobs HTTP/1.0" 200 OK
-INFO:     172.20.0.4:45836 - "GET /api/v1/deployment/services HTTP/1.0" 200 OK
-INFO:     172.20.0.4:45848 - "GET /api/v1/api-keys/ HTTP/1.0" 200 OK
-INFO:     172.20.0.4:59154 - "GET /api/v1/deployment/services HTTP/1.0" 200 OK
-INFO:     172.20.0.4:44642 - "GET /api/v1/deployment/services HTTP/1.0" 200 OK
-INFO:     127.0.0.1:58614 - "GET /health HTTP/1.1" 200 OK
-INFO:     172.20.0.4:54690 - "GET /api/v1/deployment/services HTTP/1.0" 200 OK
-INFO:     172.20.0.4:45240 - "GET /api/v1/deployment/services HTTP/1.0" 200 OK
-2026-05-26 01:58:25 | INFO     | peft-platform | Serve task started: job=3819e7af-6c9b-4fde-88d0-35784e6afeda port=8100 (task_id=5ade7a70-4564-4f5c-b0cf-257e67a2ee17)
-INFO:     172.20.0.4:33464 - "POST /api/v1/deployment/serve HTTP/1.0" 200 OK
-2026-05-26 02:00:25 | INFO     | peft-platform | Remote worker launched: task=5ade7a70-4564-4f5c-b0cf-257e67a2ee17 port=8100 pid=92955
-INFO:     127.0.0.1:57188 - "GET /health HTTP/1.1" 200 OK
-INFO:     127.0.0.1:46232 - "GET /health HTTP/1.1" 200 OK
-INFO:     127.0.0.1:42886 - "GET /health HTTP/1.1" 200 OK
-INFO:     172.20.0.4:33468 - "GET /api/v1/deployment/5ade7a70-4564-4f5c-b0cf-257e67a2ee17/status HTTP/1.0" 200 OK
-INFO:     172.20.0.4:33474 - "GET /api/v1/deployment/services HTTP/1.0" 200 OK
-INFO:     172.20.0.4:54934 - "GET /api/v1/deployment/5ade7a70-4564-4f5c-b0cf-257e67a2ee17/status HTTP/1.0" 200 OK
-INFO:     172.20.0.4:40846 - "GET /api/v1/deployment/5ade7a70-4564-4f5c-b0cf-257e67a2ee17/status HTTP/1.0" 200 OK
-INFO:     172.20.0.4:40860 - "GET /api/v1/deployment/services HTTP/1.0" 200 OK
-INFO:     172.20.0.4:40850 - "GET /api/v1/deployment/5ade7a70-4564-4f5c-b0cf-257e67a2ee17/status HTTP/1.0" 200 OK
-INFO:     172.20.0.4:35794 - "GET /api/v1/deployment/5ade7a70-4564-4f5c-b0cf-257e67a2ee17/status HTTP/1.0" 200 OK
-INFO:     172.20.0.4:35806 - "GET /api/v1/deployment/services HTTP/1.0" 200 OK
-INFO:     172.20.0.4:35822 - "GET /api/v1/deployment/5ade7a70-4564-4f5c-b0cf-257e67a2ee17/status HTTP/1.0" 200 OK
-INFO:     172.20.0.4:35834 - "GET /api/v1/deployment/5ade7a70-4564-4f5c-b0cf-257e67a2ee17/status HTTP/1.0" 200 OK
-INFO:     172.20.0.4:35848 - "GET /api/v1/deployment/5ade7a70-4564-4f5c-b0cf-257e67a2ee17/status HTTP/1.0" 200 OK
-INFO:     172.20.0.4:35852 - "GET /api/v1/deployment/5ade7a70-4564-4f5c-b0cf-257e67a2ee17/status HTTP/1.0" 200 OK
-INFO:     172.20.0.4:35858 - "GET /api/v1/deployment/5ade7a70-4564-4f5c-b0cf-257e67a2ee17/status HTTP/1.0" 200 OK
-INFO:     172.20.0.4:35874 - "GET /api/v1/deployment/5ade7a70-4564-4f5c-b0cf-257e67a2ee17/status HTTP/1.0" 200 OK
-INFO:     172.20.0.4:35892 - "GET /api/v1/deployment/5ade7a70-4564-4f5c-b0cf-257e67a2ee17/status HTTP/1.0" 200 OK
-INFO:     172.20.0.4:35890 - "GET /api/v1/deployment/services HTTP/1.0" 200 OK
-INFO:     172.20.0.4:35906 - "GET /api/v1/deployment/5ade7a70-4564-4f5c-b0cf-257e67a2ee17/status HTTP/1.0" 200 OK
-INFO:     172.20.0.4:35916 - "GET /api/v1/deployment/5ade7a70-4564-4f5c-b0cf-257e67a2ee17/status HTTP/1.0" 200 OK
-INFO:     172.20.0.4:35940 - "GET /api/v1/deployment/5ade7a70-4564-4f5c-b0cf-257e67a2ee17/status HTTP/1.0" 200 OK
-INFO:     172.20.0.4:35930 - "GET /api/v1/deployment/services HTTP/1.0" 200 OK
-INFO:     172.20.0.4:35952 - "GET /api/v1/deployment/5ade7a70-4564-4f5c-b0cf-257e67a2ee17/status HTTP/1.0" 200 OK
-INFO:     172.20.0.4:35962 - "GET /api/v1/deployment/5ade7a70-4564-4f5c-b0cf-257e67a2ee17/status HTTP/1.0" 200 OK
-INFO:     172.20.0.4:35990 - "GET /api/v1/deployment/5ade7a70-4564-4f5c-b0cf-257e67a2ee17/status HTTP/1.0" 200 OK
-INFO:     172.20.0.4:35986 - "GET /api/v1/deployment/services HTTP/1.0" 200 OK
-INFO:     172.20.0.4:36000 - "GET /api/v1/deployment/5ade7a70-4564-4f5c-b0cf-257e67a2ee17/status HTTP/1.0" 200 OK
-INFO:     172.20.0.4:36008 - "GET /api/v1/deployment/5ade7a70-4564-4f5c-b0cf-257e67a2ee17/status HTTP/1.0" 200 OK
-INFO:     172.20.0.4:36020 - "GET /api/v1/deployment/services HTTP/1.0" 200 OK
-INFO:     172.20.0.4:35976 - "GET /api/v1/deployment/5ade7a70-4564-4f5c-b0cf-257e67a2ee17/status HTTP/1.0" 200 OK
-INFO:     172.20.0.4:36026 - "GET /api/v1/deployment/5ade7a70-4564-4f5c-b0cf-257e67a2ee17/status HTTP/1.0" 200 OK
-INFO:     172.20.0.4:36028 - "GET /api/v1/deployment/5ade7a70-4564-4f5c-b0cf-257e67a2ee17/status HTTP/1.0" 200 OK
-INFO:     172.20.0.4:36058 - "GET /api/v1/deployment/services HTTP/1.0" 200 OK
-INFO:     172.20.0.4:36042 - "GET /api/v1/deployment/5ade7a70-4564-4f5c-b0cf-257e67a2ee17/status HTTP/1.0" 200 OK
-INFO:     172.20.0.4:36070 - "GET /api/v1/deployment/5ade7a70-4564-4f5c-b0cf-257e67a2ee17/status HTTP/1.0" 200 OK
-INFO:     172.20.0.4:36072 - "GET /api/v1/deployment/5ade7a70-4564-4f5c-b0cf-257e67a2ee17/status HTTP/1.0" 200 OK
-INFO:     172.20.0.4:36076 - "GET /api/v1/deployment/5ade7a70-4564-4f5c-b0cf-257e67a2ee17/status HTTP/1.0" 200 OK
-INFO:     172.20.0.4:36084 - "GET /api/v1/deployment/services HTTP/1.0" 200 OK
-INFO:     172.20.0.4:36092 - "GET /api/v1/deployment/5ade7a70-4564-4f5c-b0cf-257e67a2ee17/status HTTP/1.0" 200 OK
-INFO:     172.20.0.4:36094 - "GET /api/v1/deployment/5ade7a70-4564-4f5c-b0cf-257e67a2ee17/status HTTP/1.0" 200 OK
-INFO:     172.20.0.4:36098 - "GET /api/v1/deployment/5ade7a70-4564-4f5c-b0cf-257e67a2ee17/status HTTP/1.0" 200 OK
-INFO:     172.20.0.4:36102 - "GET /api/v1/deployment/5ade7a70-4564-4f5c-b0cf-257e67a2ee17/status HTTP/1.0" 200 OK
-INFO:     172.20.0.4:36114 - "GET /api/v1/deployment/services HTTP/1.0" 200 OK
-INFO:     172.20.0.4:36118 - "GET /api/v1/deployment/5ade7a70-4564-4f5c-b0cf-257e67a2ee17/status HTTP/1.0" 200 OK
-INFO:     172.20.0.4:36140 - "GET /api/v1/deployment/services HTTP/1.0" 200 OK
-INFO:     172.20.0.4:36136 - "GET /api/v1/deployment/5ade7a70-4564-4f5c-b0cf-257e67a2ee17/status HTTP/1.0" 200 OK
-INFO:     172.20.0.4:36124 - "GET /api/v1/deployment/5ade7a70-4564-4f5c-b0cf-257e67a2ee17/status HTTP/1.0" 200 OK
-INFO:     172.20.0.4:36146 - "GET /api/v1/deployment/5ade7a70-4564-4f5c-b0cf-257e67a2ee17/status HTTP/1.0" 200 OK
-INFO:     172.20.0.4:36148 - "GET /api/v1/deployment/services HTTP/1.0" 200 OK
-INFO:     172.20.0.4:36154 - "GET /api/v1/deployment/5ade7a70-4564-4f5c-b0cf-257e67a2ee17/status HTTP/1.0" 200 OK
-INFO:     172.20.0.4:36156 - "GET /api/v1/deployment/5ade7a70-4564-4f5c-b0cf-257e67a2ee17/status HTTP/1.0" 200 OK
-INFO:     172.20.0.4:36158 - "GET /api/v1/deployment/5ade7a70-4564-4f5c-b0cf-257e67a2ee17/status HTTP/1.0" 200 OK
-INFO:     172.20.0.4:36174 - "GET /api/v1/deployment/5ade7a70-4564-4f5c-b0cf-257e67a2ee17/status HTTP/1.0" 200 OK
-INFO:     172.20.0.4:36182 - "GET /api/v1/deployment/5ade7a70-4564-4f5c-b0cf-257e67a2ee17/status HTTP/1.0" 200 OK
-INFO:     172.20.0.4:36190 - "GET /api/v1/deployment/5ade7a70-4564-4f5c-b0cf-257e67a2ee17/status HTTP/1.0" 200 OK
-2026-05-26 02:00:47 | INFO     | peft-platform | Worker ready: task=5ade7a70-4564-4f5c-b0cf-257e67a2ee17 (after ~5s)
-INFO:     127.0.0.1:45552 - "GET /health HTTP/1.1" 200 OK
-INFO:     172.20.0.4:36198 - "GET /api/v1/deployment/services HTTP/1.0" 200 OK
-INFO:     172.20.0.4:36204 - "GET /api/v1/deployment/5ade7a70-4564-4f5c-b0cf-257e67a2ee17/status HTTP/1.0" 200 OK
-INFO:     172.20.0.4:57910 - "GET /api/v1/deployment/5ade7a70-4564-4f5c-b0cf-257e67a2ee17/status HTTP/1.0" 200 OK
-INFO:     172.20.0.4:57918 - "GET /api/v1/deployment/5ade7a70-4564-4f5c-b0cf-257e67a2ee17/status HTTP/1.0" 200 OK
-INFO:     172.20.0.4:57938 - "GET /api/v1/deployment/5ade7a70-4564-4f5c-b0cf-257e67a2ee17/status HTTP/1.0" 200 OK
-INFO:     127.0.0.1:45574 - "GET /health HTTP/1.1" 200 OK
-INFO:     172.20.0.4:57924 - "GET /api/v1/deployment/services HTTP/1.0" 200 OK
-INFO:     172.20.0.4:39078 - "GET /api/v1/deployment/5ade7a70-4564-4f5c-b0cf-257e67a2ee17/status HTTP/1.0" 200 OK
-INFO:     172.20.0.4:39108 - "GET /api/v1/deployment/5ade7a70-4564-4f5c-b0cf-257e67a2ee17/status HTTP/1.0" 200 OK
-INFO:     172.20.0.4:39118 - "GET /api/v1/deployment/5ade7a70-4564-4f5c-b0cf-257e67a2ee17/status HTTP/1.0" 200 OK
-INFO:     172.20.0.4:39120 - "GET /api/v1/deployment/5ade7a70-4564-4f5c-b0cf-257e67a2ee17/status HTTP/1.0" 200 OK
-INFO:     172.20.0.4:39092 - "GET /api/v1/deployment/services HTTP/1.0" 200 OK
-INFO:     172.20.0.4:41448 - "GET /api/v1/deployment/5ade7a70-4564-4f5c-b0cf-257e67a2ee17/status HTTP/1.0" 200 OK
-INFO:     172.20.0.4:41450 - "GET /api/v1/deployment/5ade7a70-4564-4f5c-b0cf-257e67a2ee17/status HTTP/1.0" 200 OK
-INFO:     172.20.0.4:41454 - "GET /api/v1/deployment/services HTTP/1.0" 200 OK
-INFO:     172.20.0.4:41476 - "GET /api/v1/deployment/5ade7a70-4564-4f5c-b0cf-257e67a2ee17/status HTTP/1.0" 200 OK
-INFO:     172.20.0.4:41468 - "GET /api/v1/deployment/5ade7a70-4564-4f5c-b0cf-257e67a2ee17/status HTTP/1.0" 200 OK
-INFO:     172.20.0.4:41488 - "GET /api/v1/deployment/services HTTP/1.0" 200 OK
-INFO:     172.20.0.4:41496 - "GET /api/v1/deployment/services HTTP/1.0" 200 OK
-INFO:     172.20.0.4:41510 - "GET /api/v1/deployment/services HTTP/1.0" 200 OK
-INFO:     172.20.0.4:41522 - "GET /api/v1/deployment/services HTTP/1.0" 200 OK
-INFO:     172.20.0.4:41524 - "GET /api/v1/deployment/services HTTP/1.0" 200 OK
-INFO:     172.20.0.4:41538 - "GET /api/v1/deployment/services HTTP/1.0" 200 OK
-INFO:     172.20.0.4:41552 - "GET /api/v1/deployment/services HTTP/1.0" 200 OK
-INFO:     172.20.0.4:34580 - "GET /api/v1/deployment/services HTTP/1.0" 200 OK
-INFO:     172.20.0.4:33068 - "GET /api/v1/deployment/services HTTP/1.0" 200 OK
-INFO:     172.20.0.4:43700 - "GET /api/v1/deployment/services HTTP/1.0" 200 OK
-INFO:     127.0.0.1:45014 - "GET /health HTTP/1.1" 200 OK
-INFO:     172.20.0.4:43706 - "GET /api/v1/training/jobs HTTP/1.0" 200 OK
+(base) [root@localhost ~]# docker exec finetune-trainer ps aux | grep python | grep -v grep
+root        8196  0.2  0.0      0     0 ?        Zs   May25   2:48 [python] <defunct>
+root       25181  0.2  0.0      0     0 ?        Zs   May25   2:35 [python] <defunct>
+root       36002  0.2  0.0      0     0 ?        Zs   May25   2:31 [python] <defunct>
+root       42795  0.2  0.0      0     0 ?        Zs   May25   3:10 [python] <defunct>
+root       63097  0.0  0.0      0     0 ?        Z    May25   0:12 [python] <defunct>
+root       63127  0.0  0.0      0     0 ?        Z    May25   0:00 [python] <defunct>
+root       63129  0.0  0.0      0     0 ?        Z    May25   0:00 [python] <defunct>
+root       63131  0.0  0.0      0     0 ?        Z    May25   0:00 [python] <defunct>
+root       63133  0.0  0.0      0     0 ?        Z    May25   0:00 [python] <defunct>
+root       63135  0.0  0.0      0     0 ?        Z    May25   0:00 [python] <defunct>
+root       63137  0.0  0.0      0     0 ?        Z    May25   0:00 [python] <defunct>
+root       63139  0.0  0.0      0     0 ?        Z    May25   0:00 [python] <defunct>
+root       63141  0.0  0.0      0     0 ?        Z    May25   0:00 [python] <defunct>
+root       63143  0.0  0.0      0     0 ?        Z    May25   0:00 [python] <defunct>
+root       63145  0.0  0.0      0     0 ?        Z    May25   0:00 [python] <defunct>
+root       63147  0.0  0.0      0     0 ?        Z    May25   0:00 [python] <defunct>
+root       63149  0.0  0.0      0     0 ?        Z    May25   0:00 [python] <defunct>
+root       63151  0.0  0.0      0     0 ?        Z    May25   0:00 [python] <defunct>
+root       63154  0.0  0.0      0     0 ?        Z    May25   0:00 [python] <defunct>
+root       63156  0.0  0.0      0     0 ?        Z    May25   0:00 [python] <defunct>
+root       63158  0.0  0.0      0     0 ?        Z    May25   0:00 [python] <defunct>
+root       63160  0.0  0.0      0     0 ?        Z    May25   0:00 [python] <defunct>
+root       63162  0.0  0.0      0     0 ?        Z    May25   0:00 [python] <defunct>
+root       63164  0.0  0.0      0     0 ?        Z    May25   0:00 [python] <defunct>
+root       63166  0.0  0.0      0     0 ?        Z    May25   0:00 [python] <defunct>
+root       63168  0.0  0.0      0     0 ?        Z    May25   0:00 [python] <defunct>
+root       63172  0.0  0.0      0     0 ?        Z    May25   0:00 [python] <defunct>
+root       63175  0.0  0.0      0     0 ?        Z    May25   0:00 [python] <defunct>
+root       63182  0.0  0.0      0     0 ?        Z    May25   0:00 [python] <defunct>
+root       63186  0.0  0.0      0     0 ?        Z    May25   0:00 [python] <defunct>
+root       63190  0.0  0.0      0     0 ?        Z    May25   0:00 [python] <defunct>
+root       63194  0.0  0.0      0     0 ?        Z    May25   0:00 [python] <defunct>
+root       63198  0.0  0.0      0     0 ?        Z    May25   0:00 [python] <defunct>
+root       63202  0.0  0.0      0     0 ?        Z    May25   0:00 [python] <defunct>
+root       63206  0.0  0.0      0     0 ?        Z    May25   0:00 [python] <defunct>
+root       63210  0.0  0.0      0     0 ?        Z    May25   0:00 [python] <defunct>
+root       63214  0.0  0.0      0     0 ?        Z    May25   0:00 [python] <defunct>
+root       63860  0.0  0.0      0     0 ?        Z    May25   0:12 [python] <defunct>
+root       63896  0.0  0.0      0     0 ?        Z    May25   0:00 [python] <defunct>
+root       63898  0.0  0.0      0     0 ?        Z    May25   0:00 [python] <defunct>
+root       63900  0.0  0.0      0     0 ?        Z    May25   0:00 [python] <defunct>
+root       63902  0.0  0.0      0     0 ?        Z    May25   0:00 [python] <defunct>
+root       63904  0.0  0.0      0     0 ?        Z    May25   0:00 [python] <defunct>
+root       63906  0.0  0.0      0     0 ?        Z    May25   0:00 [python] <defunct>
+root       63908  0.0  0.0      0     0 ?        Z    May25   0:00 [python] <defunct>
+root       63910  0.0  0.0      0     0 ?        Z    May25   0:00 [python] <defunct>
+root       63912  0.0  0.0      0     0 ?        Z    May25   0:00 [python] <defunct>
+root       63914  0.0  0.0      0     0 ?        Z    May25   0:00 [python] <defunct>
+root       63916  0.0  0.0      0     0 ?        Z    May25   0:00 [python] <defunct>
+root       63918  0.0  0.0      0     0 ?        Z    May25   0:00 [python] <defunct>
+root       63920  0.0  0.0      0     0 ?        Z    May25   0:00 [python] <defunct>
+root       63922  0.0  0.0      0     0 ?        Z    May25   0:00 [python] <defunct>
+root       63924  0.0  0.0      0     0 ?        Z    May25   0:00 [python] <defunct>
+root       63926  0.0  0.0      0     0 ?        Z    May25   0:00 [python] <defunct>
+root       63928  0.0  0.0      0     0 ?        Z    May25   0:00 [python] <defunct>
+root       63930  0.0  0.0      0     0 ?        Z    May25   0:00 [python] <defunct>
+root       63932  0.0  0.0      0     0 ?        Z    May25   0:00 [python] <defunct>
+root       63934  0.0  0.0      0     0 ?        Z    May25   0:00 [python] <defunct>
+root       63936  0.0  0.0      0     0 ?        Z    May25   0:00 [python] <defunct>
+root       63938  0.0  0.0      0     0 ?        Z    May25   0:00 [python] <defunct>
+root       63940  0.0  0.0      0     0 ?        Z    May25   0:00 [python] <defunct>
+root       63943  0.0  0.0      0     0 ?        Z    May25   0:00 [python] <defunct>
+root       63945  0.0  0.0      0     0 ?        Z    May25   0:00 [python] <defunct>
+root       63947  0.0  0.0      0     0 ?        Z    May25   0:00 [python] <defunct>
+root       63949  0.0  0.0      0     0 ?        Z    May25   0:00 [python] <defunct>
+root       63951  0.0  0.0      0     0 ?        Z    May25   0:00 [python] <defunct>
+root       63953  0.0  0.0      0     0 ?        Z    May25   0:00 [python] <defunct>
+root       63955  0.0  0.0      0     0 ?        Z    May25   0:00 [python] <defunct>
+root       63957  0.0  0.0      0     0 ?        Z    May25   0:00 [python] <defunct>
+root       63959  0.0  0.0      0     0 ?        Z    May25   0:00 [python] <defunct>
+root       76608  0.0  0.0      0     0 ?        Z    May25   0:11 [python] <defunct>
+root       76644  0.0  0.0      0     0 ?        Z    May25   0:00 [python] <defunct>
+root       76646  0.0  0.0      0     0 ?        Z    May25   0:00 [python] <defunct>
+root       76648  0.0  0.0      0     0 ?        Z    May25   0:00 [python] <defunct>
+root       76650  0.0  0.0      0     0 ?        Z    May25   0:00 [python] <defunct>
+root       76652  0.0  0.0      0     0 ?        Z    May25   0:00 [python] <defunct>
+root       76654  0.0  0.0      0     0 ?        Z    May25   0:00 [python] <defunct>
+root       76656  0.0  0.0      0     0 ?        Z    May25   0:00 [python] <defunct>
+root       76658  0.0  0.0      0     0 ?        Z    May25   0:00 [python] <defunct>
+root       76660  0.0  0.0      0     0 ?        Z    May25   0:00 [python] <defunct>
+root       76662  0.0  0.0      0     0 ?        Z    May25   0:00 [python] <defunct>
+root       76664  0.0  0.0      0     0 ?        Z    May25   0:00 [python] <defunct>
+root       76666  0.0  0.0      0     0 ?        Z    May25   0:00 [python] <defunct>
+root       76668  0.0  0.0      0     0 ?        Z    May25   0:00 [python] <defunct>
+root       76670  0.0  0.0      0     0 ?        Z    May25   0:00 [python] <defunct>
+root       76673  0.0  0.0      0     0 ?        Z    May25   0:00 [python] <defunct>
+root       76675  0.0  0.0      0     0 ?        Z    May25   0:00 [python] <defunct>
+root       76677  0.0  0.0      0     0 ?        Z    May25   0:00 [python] <defunct>
+root       76679  0.0  0.0      0     0 ?        Z    May25   0:00 [python] <defunct>
+root       76681  0.0  0.0      0     0 ?        Z    May25   0:00 [python] <defunct>
+root       76683  0.0  0.0      0     0 ?        Z    May25   0:00 [python] <defunct>
+root       76685  0.0  0.0      0     0 ?        Z    May25   0:00 [python] <defunct>
+root       76687  0.0  0.0      0     0 ?        Z    May25   0:00 [python] <defunct>
+root       76689  0.0  0.0      0     0 ?        Z    May25   0:00 [python] <defunct>
+root       76691  0.0  0.0      0     0 ?        Z    May25   0:00 [python] <defunct>
+root       76693  0.0  0.0      0     0 ?        Z    May25   0:00 [python] <defunct>
+root       76695  0.0  0.0      0     0 ?        Z    May25   0:00 [python] <defunct>
+root       76698  0.0  0.0      0     0 ?        Z    May25   0:00 [python] <defunct>
+root       76702  0.0  0.0      0     0 ?        Z    May25   0:00 [python] <defunct>
+root       76706  0.0  0.0      0     0 ?        Z    May25   0:00 [python] <defunct>
+root       76710  0.0  0.0      0     0 ?        Z    May25   0:00 [python] <defunct>
+root       76714  0.0  0.0      0     0 ?        Z    May25   0:00 [python] <defunct>
+root       76720  0.0  0.0      0     0 ?        Z    May25   0:00 [python] <defunct>
+root       90890  0.4  1.8 42079812 9990072 ?    Ssl  May25   2:47 /opt/conda/bin/python inference_worker.py --model-path /root/Fine-tuning/backend/data/adapters/3819e7af-6c9b-4fde-88d0-35784e6afeda_merged --port 8100
+root       91022  0.0  1.6 17399152 8922772 ?    Sl   May25   0:19 /opt/conda/bin/python /opt/conda/lib/python3.10/site-packages/torch/_inductor/compile_worker/__main__.py --pickler=torch._inductor.compile_worker.subproc_pool.SubprocPickler --kind=fork --workers=32 --parent=90890 --read-fd=7 --write-fd=10 --torch-key=kdnYoFpyXJfmeFh07c0N00WVSuau0TZN11yUZqCrSHo=
+root       91351  0.0  0.9 17102100 4843800 ?    Sl   May25   0:00 /opt/conda/bin/python /opt/conda/lib/python3.10/site-packages/torch/_inductor/compile_worker/__main__.py --pickler=torch._inductor.compile_worker.subproc_pool.SubprocPickler --kind=fork --workers=32 --parent=90890 --read-fd=7 --write-fd=10 --torch-key=kdnYoFpyXJfmeFh07c0N00WVSuau0TZN11yUZqCrSHo=
+root       91353  0.0  0.9 17102100 4843600 ?    Sl   May25   0:00 /opt/conda/bin/python /opt/conda/lib/python3.10/site-packages/torch/_inductor/compile_worker/__main__.py --pickler=torch._inductor.compile_worker.subproc_pool.SubprocPickler --kind=fork --workers=32 --parent=90890 --read-fd=7 --write-fd=10 --torch-key=kdnYoFpyXJfmeFh07c0N00WVSuau0TZN11yUZqCrSHo=
+root       91355  0.0  0.9 17102100 4843600 ?    Sl   May25   0:00 /opt/conda/bin/python /opt/conda/lib/python3.10/site-packages/torch/_inductor/compile_worker/__main__.py --pickler=torch._inductor.compile_worker.subproc_pool.SubprocPickler --kind=fork --workers=32 --parent=90890 --read-fd=7 --write-fd=10 --torch-key=kdnYoFpyXJfmeFh07c0N00WVSuau0TZN11yUZqCrSHo=
+root       91357  0.0  0.9 17102100 4843604 ?    Sl   May25   0:00 /opt/conda/bin/python /opt/conda/lib/python3.10/site-packages/torch/_inductor/compile_worker/__main__.py --pickler=torch._inductor.compile_worker.subproc_pool.SubprocPickler --kind=fork --workers=32 --parent=90890 --read-fd=7 --write-fd=10 --torch-key=kdnYoFpyXJfmeFh07c0N00WVSuau0TZN11yUZqCrSHo=
+root       91359  0.0  0.9 17102100 4843604 ?    Sl   May25   0:00 /opt/conda/bin/python /opt/conda/lib/python3.10/site-packages/torch/_inductor/compile_worker/__main__.py --pickler=torch._inductor.compile_worker.subproc_pool.SubprocPickler --kind=fork --workers=32 --parent=90890 --read-fd=7 --write-fd=10 --torch-key=kdnYoFpyXJfmeFh07c0N00WVSuau0TZN11yUZqCrSHo=
+root       91361  0.0  0.9 17102100 4843608 ?    Sl   May25   0:00 /opt/conda/bin/python /opt/conda/lib/python3.10/site-packages/torch/_inductor/compile_worker/__main__.py --pickler=torch._inductor.compile_worker.subproc_pool.SubprocPickler --kind=fork --workers=32 --parent=90890 --read-fd=7 --write-fd=10 --torch-key=kdnYoFpyXJfmeFh07c0N00WVSuau0TZN11yUZqCrSHo=
+root       91363  0.0  0.9 17102100 4843612 ?    Sl   May25   0:00 /opt/conda/bin/python /opt/conda/lib/python3.10/site-packages/torch/_inductor/compile_worker/__main__.py --pickler=torch._inductor.compile_worker.subproc_pool.SubprocPickler --kind=fork --workers=32 --parent=90890 --read-fd=7 --write-fd=10 --torch-key=kdnYoFpyXJfmeFh07c0N00WVSuau0TZN11yUZqCrSHo=
+root       91365  0.0  0.9 17102100 4843612 ?    Sl   May25   0:00 /opt/conda/bin/python /opt/conda/lib/python3.10/site-packages/torch/_inductor/compile_worker/__main__.py --pickler=torch._inductor.compile_worker.subproc_pool.SubprocPickler --kind=fork --workers=32 --parent=90890 --read-fd=7 --write-fd=10 --torch-key=kdnYoFpyXJfmeFh07c0N00WVSuau0TZN11yUZqCrSHo=
+root       91367  0.0  0.9 17102100 4842020 ?    Sl   May25   0:00 /opt/conda/bin/python /opt/conda/lib/python3.10/site-packages/torch/_inductor/compile_worker/__main__.py --pickler=torch._inductor.compile_worker.subproc_pool.SubprocPickler --kind=fork --workers=32 --parent=90890 --read-fd=7 --write-fd=10 --torch-key=kdnYoFpyXJfmeFh07c0N00WVSuau0TZN11yUZqCrSHo=
+root       91369  0.0  0.9 17102100 4842020 ?    Sl   May25   0:00 /opt/conda/bin/python /opt/conda/lib/python3.10/site-packages/torch/_inductor/compile_worker/__main__.py --pickler=torch._inductor.compile_worker.subproc_pool.SubprocPickler --kind=fork --workers=32 --parent=90890 --read-fd=7 --write-fd=10 --torch-key=kdnYoFpyXJfmeFh07c0N00WVSuau0TZN11yUZqCrSHo=
+root       91371  0.0  0.9 17102100 4842024 ?    Sl   May25   0:00 /opt/conda/bin/python /opt/conda/lib/python3.10/site-packages/torch/_inductor/compile_worker/__main__.py --pickler=torch._inductor.compile_worker.subproc_pool.SubprocPickler --kind=fork --workers=32 --parent=90890 --read-fd=7 --write-fd=10 --torch-key=kdnYoFpyXJfmeFh07c0N00WVSuau0TZN11yUZqCrSHo=
+root       91374  0.0  0.9 17102100 4842024 ?    Sl   May25   0:00 /opt/conda/bin/python /opt/conda/lib/python3.10/site-packages/torch/_inductor/compile_worker/__main__.py --pickler=torch._inductor.compile_worker.subproc_pool.SubprocPickler --kind=fork --workers=32 --parent=90890 --read-fd=7 --write-fd=10 --torch-key=kdnYoFpyXJfmeFh07c0N00WVSuau0TZN11yUZqCrSHo=
+root       91376  0.0  0.9 17102100 4842020 ?    Sl   May25   0:00 /opt/conda/bin/python /opt/conda/lib/python3.10/site-packages/torch/_inductor/compile_worker/__main__.py --pickler=torch._inductor.compile_worker.subproc_pool.SubprocPickler --kind=fork --workers=32 --parent=90890 --read-fd=7 --write-fd=10 --torch-key=kdnYoFpyXJfmeFh07c0N00WVSuau0TZN11yUZqCrSHo=
+root       91378  0.0  0.9 17102100 4842024 ?    Sl   May25   0:00 /opt/conda/bin/python /opt/conda/lib/python3.10/site-packages/torch/_inductor/compile_worker/__main__.py --pickler=torch._inductor.compile_worker.subproc_pool.SubprocPickler --kind=fork --workers=32 --parent=90890 --read-fd=7 --write-fd=10 --torch-key=kdnYoFpyXJfmeFh07c0N00WVSuau0TZN11yUZqCrSHo=
+root       91380  0.0  0.9 17102100 4842024 ?    Sl   May25   0:00 /opt/conda/bin/python /opt/conda/lib/python3.10/site-packages/torch/_inductor/compile_worker/__main__.py --pickler=torch._inductor.compile_worker.subproc_pool.SubprocPickler --kind=fork --workers=32 --parent=90890 --read-fd=7 --write-fd=10 --torch-key=kdnYoFpyXJfmeFh07c0N00WVSuau0TZN11yUZqCrSHo=
+root       91382  0.0  0.9 17102100 4842024 ?    Sl   May25   0:00 /opt/conda/bin/python /opt/conda/lib/python3.10/site-packages/torch/_inductor/compile_worker/__main__.py --pickler=torch._inductor.compile_worker.subproc_pool.SubprocPickler --kind=fork --workers=32 --parent=90890 --read-fd=7 --write-fd=10 --torch-key=kdnYoFpyXJfmeFh07c0N00WVSuau0TZN11yUZqCrSHo=
+root       91384  0.0  0.9 17102100 4842028 ?    Sl   May25   0:00 /opt/conda/bin/python /opt/conda/lib/python3.10/site-packages/torch/_inductor/compile_worker/__main__.py --pickler=torch._inductor.compile_worker.subproc_pool.SubprocPickler --kind=fork --workers=32 --parent=90890 --read-fd=7 --write-fd=10 --torch-key=kdnYoFpyXJfmeFh07c0N00WVSuau0TZN11yUZqCrSHo=
+root       91386  0.0  0.9 17102100 4842028 ?    Sl   May25   0:00 /opt/conda/bin/python /opt/conda/lib/python3.10/site-packages/torch/_inductor/compile_worker/__main__.py --pickler=torch._inductor.compile_worker.subproc_pool.SubprocPickler --kind=fork --workers=32 --parent=90890 --read-fd=7 --write-fd=10 --torch-key=kdnYoFpyXJfmeFh07c0N00WVSuau0TZN11yUZqCrSHo=
+root       91388  0.0  0.9 17102100 4842028 ?    Sl   May25   0:00 /opt/conda/bin/python /opt/conda/lib/python3.10/site-packages/torch/_inductor/compile_worker/__main__.py --pickler=torch._inductor.compile_worker.subproc_pool.SubprocPickler --kind=fork --workers=32 --parent=90890 --read-fd=7 --write-fd=10 --torch-key=kdnYoFpyXJfmeFh07c0N00WVSuau0TZN11yUZqCrSHo=
+root       91390  0.0  0.9 17102100 4842028 ?    Sl   May25   0:00 /opt/conda/bin/python /opt/conda/lib/python3.10/site-packages/torch/_inductor/compile_worker/__main__.py --pickler=torch._inductor.compile_worker.subproc_pool.SubprocPickler --kind=fork --workers=32 --parent=90890 --read-fd=7 --write-fd=10 --torch-key=kdnYoFpyXJfmeFh07c0N00WVSuau0TZN11yUZqCrSHo=
+root       91392  0.0  0.9 17102100 4842036 ?    Sl   May25   0:00 /opt/conda/bin/python /opt/conda/lib/python3.10/site-packages/torch/_inductor/compile_worker/__main__.py --pickler=torch._inductor.compile_worker.subproc_pool.SubprocPickler --kind=fork --workers=32 --parent=90890 --read-fd=7 --write-fd=10 --torch-key=kdnYoFpyXJfmeFh07c0N00WVSuau0TZN11yUZqCrSHo=
+root       91394  0.0  0.9 17102100 4842036 ?    Sl   May25   0:00 /opt/conda/bin/python /opt/conda/lib/python3.10/site-packages/torch/_inductor/compile_worker/__main__.py --pickler=torch._inductor.compile_worker.subproc_pool.SubprocPickler --kind=fork --workers=32 --parent=90890 --read-fd=7 --write-fd=10 --torch-key=kdnYoFpyXJfmeFh07c0N00WVSuau0TZN11yUZqCrSHo=
+root       91396  0.0  0.9 17102100 4842036 ?    Sl   May25   0:00 /opt/conda/bin/python /opt/conda/lib/python3.10/site-packages/torch/_inductor/compile_worker/__main__.py --pickler=torch._inductor.compile_worker.subproc_pool.SubprocPickler --kind=fork --workers=32 --parent=90890 --read-fd=7 --write-fd=10 --torch-key=kdnYoFpyXJfmeFh07c0N00WVSuau0TZN11yUZqCrSHo=
+root       91398  0.0  0.9 17102100 4842036 ?    Sl   May25   0:00 /opt/conda/bin/python /opt/conda/lib/python3.10/site-packages/torch/_inductor/compile_worker/__main__.py --pickler=torch._inductor.compile_worker.subproc_pool.SubprocPickler --kind=fork --workers=32 --parent=90890 --read-fd=7 --write-fd=10 --torch-key=kdnYoFpyXJfmeFh07c0N00WVSuau0TZN11yUZqCrSHo=
+root       91400  0.0  0.9 17102100 4842036 ?    Sl   May25   0:00 /opt/conda/bin/python /opt/conda/lib/python3.10/site-packages/torch/_inductor/compile_worker/__main__.py --pickler=torch._inductor.compile_worker.subproc_pool.SubprocPickler --kind=fork --workers=32 --parent=90890 --read-fd=7 --write-fd=10 --torch-key=kdnYoFpyXJfmeFh07c0N00WVSuau0TZN11yUZqCrSHo=
+root       91402  0.0  0.9 17102100 4842036 ?    Sl   May25   0:00 /opt/conda/bin/python /opt/conda/lib/python3.10/site-packages/torch/_inductor/compile_worker/__main__.py --pickler=torch._inductor.compile_worker.subproc_pool.SubprocPickler --kind=fork --workers=32 --parent=90890 --read-fd=7 --write-fd=10 --torch-key=kdnYoFpyXJfmeFh07c0N00WVSuau0TZN11yUZqCrSHo=
+root       91404  0.0  0.9 17102100 4841448 ?    Sl   May25   0:00 /opt/conda/bin/python /opt/conda/lib/python3.10/site-packages/torch/_inductor/compile_worker/__main__.py --pickler=torch._inductor.compile_worker.subproc_pool.SubprocPickler --kind=fork --workers=32 --parent=90890 --read-fd=7 --write-fd=10 --torch-key=kdnYoFpyXJfmeFh07c0N00WVSuau0TZN11yUZqCrSHo=
+root       91406  0.0  0.9 17102100 4841396 ?    Sl   May25   0:00 /opt/conda/bin/python /opt/conda/lib/python3.10/site-packages/torch/_inductor/compile_worker/__main__.py --pickler=torch._inductor.compile_worker.subproc_pool.SubprocPickler --kind=fork --workers=32 --parent=90890 --read-fd=7 --write-fd=10 --torch-key=kdnYoFpyXJfmeFh07c0N00WVSuau0TZN11yUZqCrSHo=
+root       91408  0.0  0.9 17102100 4841400 ?    Sl   May25   0:00 /opt/conda/bin/python /opt/conda/lib/python3.10/site-packages/torch/_inductor/compile_worker/__main__.py --pickler=torch._inductor.compile_worker.subproc_pool.SubprocPickler --kind=fork --workers=32 --parent=90890 --read-fd=7 --write-fd=10 --torch-key=kdnYoFpyXJfmeFh07c0N00WVSuau0TZN11yUZqCrSHo=
+root       91410  0.0  0.9 17102100 4841400 ?    Sl   May25   0:00 /opt/conda/bin/python /opt/conda/lib/python3.10/site-packages/torch/_inductor/compile_worker/__main__.py --pickler=torch._inductor.compile_worker.subproc_pool.SubprocPickler --kind=fork --workers=32 --parent=90890 --read-fd=7 --write-fd=10 --torch-key=kdnYoFpyXJfmeFh07c0N00WVSuau0TZN11yUZqCrSHo=
+root       91412  0.0  0.9 17102100 4842044 ?    Sl   May25   0:00 /opt/conda/bin/python /opt/conda/lib/python3.10/site-packages/torch/_inductor/compile_worker/__main__.py --pickler=torch._inductor.compile_worker.subproc_pool.SubprocPickler --kind=fork --workers=32 --parent=90890 --read-fd=7 --write-fd=10 --torch-key=kdnYoFpyXJfmeFh07c0N00WVSuau0TZN11yUZqCrSHo=
+root       91414  0.0  0.9 17102100 4842044 ?    Sl   May25   0:00 /opt/conda/bin/python /opt/conda/lib/python3.10/site-packages/torch/_inductor/compile_worker/__main__.py --pickler=torch._inductor.compile_worker.subproc_pool.SubprocPickler --kind=fork --workers=32 --parent=90890 --read-fd=7 --write-fd=10 --torch-key=kdnYoFpyXJfmeFh07c0N00WVSuau0TZN11yUZqCrSHo=
+root       92043  3.9  0.0      0     0 ?        Zs   09:50   1:19 [python] <defunct>
+root       92955  5.4  0.0      0     0 ?        Z    10:00   1:19 [python] <defunct>
+root       93867 11.7  0.0      0     0 ?        Z    10:13   1:20 [python] <defunct>

+ 14 - 0
test.py

@@ -0,0 +1,14 @@
+from openai import OpenAI
+
+client = OpenAI(
+    base_url="http://192.168.92.151:3000/api/v1/deployment/proxy/08b07765-f5cd-4421-ad2c-b8eb22e60399/v1",
+    api_key="sk-1wTkTvsfu0IiyZFhNAx8HMgtIf2TxLGP-DyrcNKYlIc"  # 替换为你的 API Key
+)
+
+response = client.chat.completions.create(
+    model="local-model",
+    messages=[{"role": "user", "content": "你好"}],
+    max_tokens=512,
+    temperature=0.7
+)
+print(response.choices[0].message.content)