Ver Fonte

修复253服务器端口清理

lxylxy123321 há 2 dias atrás
pai
commit
d124f58ab6

+ 9 - 5
backend/app/core/job_queue.py

@@ -404,7 +404,10 @@ class JobQueue:
                 return
 
             # 检查进程是否还在运行(非阻塞)
-            process_alive = await asyncio.to_thread(is_process_running, pid)
+            # 返回三态:"running" / "stopped" / "unknown"(SSH 失败时)
+            proc_state = await asyncio.to_thread(is_process_running, pid)
+            process_alive = proc_state != "stopped"  # running 或 unknown 都视为"可能还活着"
+            process_confirmed_dead = proc_state == "stopped"
 
             # === 1. 读取 jsonl 进度日志 ===
             cat_cmd = f"docker exec {container} bash -c 'wc -c < {remote_log} 2>/dev/null || echo 0'"
@@ -507,10 +510,11 @@ class JobQueue:
                 consecutive_empty_polls += 1
 
             # 进程已退出但日志里没有 completed/error
-            if not process_alive:
+            if process_confirmed_dead:
                 # 多等几秒让日志写完
                 await asyncio.sleep(2)
-                if not await asyncio.to_thread(is_process_running, pid):
+                confirm_state = await asyncio.to_thread(is_process_running, pid)
+                if confirm_state == "stopped":
                     # 进程退出但没有写 completed/error 日志,读取 stderr 日志兜底
                     error_msg = f"Remote process exited unexpectedly (pid={pid})"
                     try:
@@ -525,8 +529,8 @@ class JobQueue:
                     await _mark_failed(error_msg)
                     return
 
-            # 长时间无日志且进程异常,也标记为失败
-            if consecutive_empty_polls >= max_consecutive_empty and not process_alive:
+            # 长时间无日志且进程确认已退出,也标记为失败
+            if consecutive_empty_polls >= max_consecutive_empty and process_confirmed_dead:
                 error_msg = f"Remote process exited unexpectedly (pid={pid}), no error log found"
                 logger.error(f"Remote job {job_id} failed: {error_msg}")
                 await _mark_failed(error_msg)

+ 12 - 4
backend/app/core/remote_executor.py

@@ -204,12 +204,17 @@ def run_training_remote(
     return pid
 
 
-def is_process_running(pid: str, retries: int = 3) -> bool:
+def is_process_running(pid: str, retries: int = 3) -> str:
     """检查远程训练/推理进程是否还在运行。
 
     通过 docker exec 进入容器,优先用 kill -0 检查指定 PID,
     兜底用 ps 检查是否存在匹配的 Python 进程。
     失败时重试,避免因单次 SSH 超时误判。
+
+    返回值:
+        "running"  - 进程确认存活
+        "stopped"  - 进程确认已退出
+        "unknown"  - SSH/docker exec 失败,无法确认(不应改变现有状态)
     """
     for attempt in range(retries):
         cmd = (
@@ -229,9 +234,12 @@ def is_process_running(pid: str, retries: int = 3) -> bool:
                 import time
                 time.sleep(2)
                 continue
-            return False
-        return "running" in stdout
-    return False
+            # 重试耗尽,无法确认进程状态
+            return "unknown"
+        if "running" in stdout:
+            return "running"
+        return "stopped"
+    return "unknown"
 
 
 def get_remote_stderr(job_id: str) -> str | None:

+ 44 - 19
backend/app/services/deploy_service.py

@@ -223,11 +223,12 @@ async def _launch_remote_worker(task_id: str, model_path: str, port: int) -> str
     只依赖 torch + transformers(不需要 fastapi/uvicorn)。
     """
     # 启动前先清理端口占用,确保不会有旧进程残留
+    # 253 容器内子进程多,docker exec 执行较慢,给足超时
     kill_cmd = (
         f"docker exec {settings.compute_node_docker_container} "
-        f"bash -c 'fuser -k {port}/tcp 2>/dev/null; sleep 1; true'"
+        f"bash -c 'fuser -k {port}/tcp 2>/dev/null; sleep 2; fuser -k {port}/tcp 2>/dev/null; sleep 1; true'"
     )
-    await asyncio.to_thread(ssh_exec, kill_cmd, timeout=15)
+    await asyncio.to_thread(ssh_exec, kill_cmd, timeout=60)
 
     # worker 脚本在容器内的路径
     worker_template = f"{settings.compute_node_workdir}/app/core/inference_worker.py"
@@ -256,7 +257,7 @@ async def _launch_remote_worker(task_id: str, model_path: str, port: int) -> str
         f" echo $!'"
     )
 
-    code, stdout, stderr = await asyncio.to_thread(ssh_exec, launch_cmd, timeout=30)
+    code, stdout, stderr = await asyncio.to_thread(ssh_exec, launch_cmd, timeout=60)
     if code != 0:
         raise RuntimeError(f"启动推理 worker 失败: {stderr}")
 
@@ -276,11 +277,16 @@ async def _launch_remote_worker(task_id: str, model_path: str, port: int) -> str
             f"  echo \"ALIVE\"; "
             f"'"
         )
-        code, stdout, stderr = await asyncio.to_thread(ssh_exec, check_cmd, timeout=30)
+        code, stdout, stderr = await asyncio.to_thread(ssh_exec, check_cmd, timeout=60)
         if code == 0:
             result = stdout.strip()
             if result.startswith("READY:"):
                 logger.info(f"Worker ready: task={task_id} (after ~{(attempt+1)*5}s)")
+                # 校验实际占用端口的 PID(防止 stop 没杀干净旧进程导致 PID 对不上)
+                actual_pid = await _get_port_pid(port)
+                if actual_pid and actual_pid != pid:
+                    logger.warning(f"Port {port} PID mismatch: launched={pid}, actual={actual_pid}")
+                    pid = actual_pid
                 return pid
             elif result == "DEAD":
                 # 读取日志看什么错了
@@ -288,7 +294,7 @@ async def _launch_remote_worker(task_id: str, model_path: str, port: int) -> str
                     f"docker exec {settings.compute_node_docker_container} "
                     f"bash -c 'tail -20 /tmp/serve_{task_id}.log 2>/dev/null'"
                 )
-                _, log_stdout, _ = await asyncio.to_thread(ssh_exec, log_cmd, timeout=30)
+                _, log_stdout, _ = await asyncio.to_thread(ssh_exec, log_cmd, timeout=60)
                 raise RuntimeError(f"Worker 进程已退出: {log_stdout}")
             # result == "ALIVE" → 继续等待
 
@@ -296,6 +302,22 @@ async def _launch_remote_worker(task_id: str, model_path: str, port: int) -> str
     return pid
 
 
+async def _get_port_pid(port: int) -> str | None:
+    """获取远程容器内占用指定端口的进程 PID。"""
+    cmd = (
+        f"docker exec {settings.compute_node_docker_container} "
+        f"bash -c 'fuser {port}/tcp 2>/dev/null'"
+    )
+    code, stdout, _ = await asyncio.to_thread(ssh_exec, cmd, timeout=60)
+    if code == 0 and stdout.strip():
+        # fuser 输出格式可能是 "8100/tcp:  372" 或直接 " 372"
+        parts = stdout.strip().split()
+        for p in reversed(parts):
+            if p.isdigit():
+                return p
+    return None
+
+
 async def _launch_local_worker(task_id: str, model_path: str, port: int) -> str:
     """在本地启动推理 worker(开发用)。"""
     import subprocess
@@ -336,19 +358,19 @@ async def stop_serving(task_id: str, user_id: str = "") -> dict[str, Any]:
 
         if pid and settings.use_remote_compute:
             # 方式1: kill -9 主进程及其子进程
+            # 方式2: fuser 直接杀占用端口的进程(最可靠,防止 PID 对不上)
             kill_cmd = (
                 f"docker exec {settings.compute_node_docker_container} "
-                f"bash -c 'kill -9 {pid} 2>/dev/null; pkill -9 -P {pid} 2>/dev/null; true'"
+                f"bash -c '"
+                f"kill -9 {pid} 2>/dev/null; "
+                f"pkill -9 -P {pid} 2>/dev/null; "
+                f"fuser -k {port}/tcp 2>/dev/null; "
+                f"sleep 2; "
+                f"fuser -k {port}/tcp 2>/dev/null; "
+                f"true'"
             )
-            code, _, _ = await asyncio.to_thread(ssh_exec, kill_cmd, timeout=15)
-            # 方式2: fuser 兜底清理端口(防止进程 kill 失败仍占着端口)
-            if port:
-                fuser_cmd = (
-                    f"docker exec {settings.compute_node_docker_container} "
-                    f"bash -c 'fuser -k {port}/tcp 2>/dev/null; sleep 1; true'"
-                )
-                await asyncio.to_thread(ssh_exec, fuser_cmd, timeout=15)
-            logger.info(f"Stop serving: task={task_id} pid={pid} kill_code={code}")
+            code, _, _ = await asyncio.to_thread(ssh_exec, kill_cmd, timeout=60)
+            logger.info(f"Stop serving: task={task_id} pid={pid} port={port} kill_code={code}")
 
         record.status = "stopped"
         record.pid = None
@@ -432,12 +454,14 @@ async def list_deployed_services(user_id: str = "") -> list[dict[str, Any]]:
         # 对 running 状态,检查远程进程是否还活着
         if status == "running" and r.pid and settings.use_remote_compute:
             from app.core.remote_executor import is_process_running
-            if not await asyncio.to_thread(is_process_running, r.pid):
+            proc_state = await asyncio.to_thread(is_process_running, r.pid)
+            if proc_state == "stopped":
+                # 确认进程已退出,标记为 stopped
                 status = "stopped"
                 await _update_deploy_status(r.id, "stopped", error="进程已退出")
-                # 释放端口和 PID,确保下次分配时可用
                 r.port = None
                 r.pid = None
+            # proc_state == "unknown" 时不改状态(SSH 超时不代表进程死了)
 
         services.append({
             "task_id": r.id,
@@ -660,11 +684,12 @@ async def recover_stale_deploys() -> None:
             elif record.deploy_mode == "serve":
                 if record.pid and settings.use_remote_compute:
                     from app.core.remote_executor import is_process_running
-                    if not is_process_running(record.pid):
+                    proc_state = await asyncio.to_thread(is_process_running, record.pid)
+                    if proc_state == "stopped":
                         record.status = "stopped"
                         record.error = "Server restarted, process no longer running"
                     else:
-                        continue  # 进程还在,保持 running
+                        continue  # 进程还在或无法确认,保持 running
                 else:
                     record.status = "stopped"
                     record.error = "Server restarted, process state unknown"

+ 72 - 140
result.txt

@@ -1,141 +1,73 @@
 (base) [root@localhost ~]# docker exec finetune-trainer ps aux | grep python | grep -v grep
-root        8196  0.2  0.0      0     0 ?        Zs   May25   2:48 [python] <defunct>
-root       25181  0.2  0.0      0     0 ?        Zs   May25   2:35 [python] <defunct>
-root       36002  0.2  0.0      0     0 ?        Zs   May25   2:31 [python] <defunct>
-root       42795  0.2  0.0      0     0 ?        Zs   May25   3:10 [python] <defunct>
-root       63097  0.0  0.0      0     0 ?        Z    May25   0:12 [python] <defunct>
-root       63127  0.0  0.0      0     0 ?        Z    May25   0:00 [python] <defunct>
-root       63129  0.0  0.0      0     0 ?        Z    May25   0:00 [python] <defunct>
-root       63131  0.0  0.0      0     0 ?        Z    May25   0:00 [python] <defunct>
-root       63133  0.0  0.0      0     0 ?        Z    May25   0:00 [python] <defunct>
-root       63135  0.0  0.0      0     0 ?        Z    May25   0:00 [python] <defunct>
-root       63137  0.0  0.0      0     0 ?        Z    May25   0:00 [python] <defunct>
-root       63139  0.0  0.0      0     0 ?        Z    May25   0:00 [python] <defunct>
-root       63141  0.0  0.0      0     0 ?        Z    May25   0:00 [python] <defunct>
-root       63143  0.0  0.0      0     0 ?        Z    May25   0:00 [python] <defunct>
-root       63145  0.0  0.0      0     0 ?        Z    May25   0:00 [python] <defunct>
-root       63147  0.0  0.0      0     0 ?        Z    May25   0:00 [python] <defunct>
-root       63149  0.0  0.0      0     0 ?        Z    May25   0:00 [python] <defunct>
-root       63151  0.0  0.0      0     0 ?        Z    May25   0:00 [python] <defunct>
-root       63154  0.0  0.0      0     0 ?        Z    May25   0:00 [python] <defunct>
-root       63156  0.0  0.0      0     0 ?        Z    May25   0:00 [python] <defunct>
-root       63158  0.0  0.0      0     0 ?        Z    May25   0:00 [python] <defunct>
-root       63160  0.0  0.0      0     0 ?        Z    May25   0:00 [python] <defunct>
-root       63162  0.0  0.0      0     0 ?        Z    May25   0:00 [python] <defunct>
-root       63164  0.0  0.0      0     0 ?        Z    May25   0:00 [python] <defunct>
-root       63166  0.0  0.0      0     0 ?        Z    May25   0:00 [python] <defunct>
-root       63168  0.0  0.0      0     0 ?        Z    May25   0:00 [python] <defunct>
-root       63172  0.0  0.0      0     0 ?        Z    May25   0:00 [python] <defunct>
-root       63175  0.0  0.0      0     0 ?        Z    May25   0:00 [python] <defunct>
-root       63182  0.0  0.0      0     0 ?        Z    May25   0:00 [python] <defunct>
-root       63186  0.0  0.0      0     0 ?        Z    May25   0:00 [python] <defunct>
-root       63190  0.0  0.0      0     0 ?        Z    May25   0:00 [python] <defunct>
-root       63194  0.0  0.0      0     0 ?        Z    May25   0:00 [python] <defunct>
-root       63198  0.0  0.0      0     0 ?        Z    May25   0:00 [python] <defunct>
-root       63202  0.0  0.0      0     0 ?        Z    May25   0:00 [python] <defunct>
-root       63206  0.0  0.0      0     0 ?        Z    May25   0:00 [python] <defunct>
-root       63210  0.0  0.0      0     0 ?        Z    May25   0:00 [python] <defunct>
-root       63214  0.0  0.0      0     0 ?        Z    May25   0:00 [python] <defunct>
-root       63860  0.0  0.0      0     0 ?        Z    May25   0:12 [python] <defunct>
-root       63896  0.0  0.0      0     0 ?        Z    May25   0:00 [python] <defunct>
-root       63898  0.0  0.0      0     0 ?        Z    May25   0:00 [python] <defunct>
-root       63900  0.0  0.0      0     0 ?        Z    May25   0:00 [python] <defunct>
-root       63902  0.0  0.0      0     0 ?        Z    May25   0:00 [python] <defunct>
-root       63904  0.0  0.0      0     0 ?        Z    May25   0:00 [python] <defunct>
-root       63906  0.0  0.0      0     0 ?        Z    May25   0:00 [python] <defunct>
-root       63908  0.0  0.0      0     0 ?        Z    May25   0:00 [python] <defunct>
-root       63910  0.0  0.0      0     0 ?        Z    May25   0:00 [python] <defunct>
-root       63912  0.0  0.0      0     0 ?        Z    May25   0:00 [python] <defunct>
-root       63914  0.0  0.0      0     0 ?        Z    May25   0:00 [python] <defunct>
-root       63916  0.0  0.0      0     0 ?        Z    May25   0:00 [python] <defunct>
-root       63918  0.0  0.0      0     0 ?        Z    May25   0:00 [python] <defunct>
-root       63920  0.0  0.0      0     0 ?        Z    May25   0:00 [python] <defunct>
-root       63922  0.0  0.0      0     0 ?        Z    May25   0:00 [python] <defunct>
-root       63924  0.0  0.0      0     0 ?        Z    May25   0:00 [python] <defunct>
-root       63926  0.0  0.0      0     0 ?        Z    May25   0:00 [python] <defunct>
-root       63928  0.0  0.0      0     0 ?        Z    May25   0:00 [python] <defunct>
-root       63930  0.0  0.0      0     0 ?        Z    May25   0:00 [python] <defunct>
-root       63932  0.0  0.0      0     0 ?        Z    May25   0:00 [python] <defunct>
-root       63934  0.0  0.0      0     0 ?        Z    May25   0:00 [python] <defunct>
-root       63936  0.0  0.0      0     0 ?        Z    May25   0:00 [python] <defunct>
-root       63938  0.0  0.0      0     0 ?        Z    May25   0:00 [python] <defunct>
-root       63940  0.0  0.0      0     0 ?        Z    May25   0:00 [python] <defunct>
-root       63943  0.0  0.0      0     0 ?        Z    May25   0:00 [python] <defunct>
-root       63945  0.0  0.0      0     0 ?        Z    May25   0:00 [python] <defunct>
-root       63947  0.0  0.0      0     0 ?        Z    May25   0:00 [python] <defunct>
-root       63949  0.0  0.0      0     0 ?        Z    May25   0:00 [python] <defunct>
-root       63951  0.0  0.0      0     0 ?        Z    May25   0:00 [python] <defunct>
-root       63953  0.0  0.0      0     0 ?        Z    May25   0:00 [python] <defunct>
-root       63955  0.0  0.0      0     0 ?        Z    May25   0:00 [python] <defunct>
-root       63957  0.0  0.0      0     0 ?        Z    May25   0:00 [python] <defunct>
-root       63959  0.0  0.0      0     0 ?        Z    May25   0:00 [python] <defunct>
-root       76608  0.0  0.0      0     0 ?        Z    May25   0:11 [python] <defunct>
-root       76644  0.0  0.0      0     0 ?        Z    May25   0:00 [python] <defunct>
-root       76646  0.0  0.0      0     0 ?        Z    May25   0:00 [python] <defunct>
-root       76648  0.0  0.0      0     0 ?        Z    May25   0:00 [python] <defunct>
-root       76650  0.0  0.0      0     0 ?        Z    May25   0:00 [python] <defunct>
-root       76652  0.0  0.0      0     0 ?        Z    May25   0:00 [python] <defunct>
-root       76654  0.0  0.0      0     0 ?        Z    May25   0:00 [python] <defunct>
-root       76656  0.0  0.0      0     0 ?        Z    May25   0:00 [python] <defunct>
-root       76658  0.0  0.0      0     0 ?        Z    May25   0:00 [python] <defunct>
-root       76660  0.0  0.0      0     0 ?        Z    May25   0:00 [python] <defunct>
-root       76662  0.0  0.0      0     0 ?        Z    May25   0:00 [python] <defunct>
-root       76664  0.0  0.0      0     0 ?        Z    May25   0:00 [python] <defunct>
-root       76666  0.0  0.0      0     0 ?        Z    May25   0:00 [python] <defunct>
-root       76668  0.0  0.0      0     0 ?        Z    May25   0:00 [python] <defunct>
-root       76670  0.0  0.0      0     0 ?        Z    May25   0:00 [python] <defunct>
-root       76673  0.0  0.0      0     0 ?        Z    May25   0:00 [python] <defunct>
-root       76675  0.0  0.0      0     0 ?        Z    May25   0:00 [python] <defunct>
-root       76677  0.0  0.0      0     0 ?        Z    May25   0:00 [python] <defunct>
-root       76679  0.0  0.0      0     0 ?        Z    May25   0:00 [python] <defunct>
-root       76681  0.0  0.0      0     0 ?        Z    May25   0:00 [python] <defunct>
-root       76683  0.0  0.0      0     0 ?        Z    May25   0:00 [python] <defunct>
-root       76685  0.0  0.0      0     0 ?        Z    May25   0:00 [python] <defunct>
-root       76687  0.0  0.0      0     0 ?        Z    May25   0:00 [python] <defunct>
-root       76689  0.0  0.0      0     0 ?        Z    May25   0:00 [python] <defunct>
-root       76691  0.0  0.0      0     0 ?        Z    May25   0:00 [python] <defunct>
-root       76693  0.0  0.0      0     0 ?        Z    May25   0:00 [python] <defunct>
-root       76695  0.0  0.0      0     0 ?        Z    May25   0:00 [python] <defunct>
-root       76698  0.0  0.0      0     0 ?        Z    May25   0:00 [python] <defunct>
-root       76702  0.0  0.0      0     0 ?        Z    May25   0:00 [python] <defunct>
-root       76706  0.0  0.0      0     0 ?        Z    May25   0:00 [python] <defunct>
-root       76710  0.0  0.0      0     0 ?        Z    May25   0:00 [python] <defunct>
-root       76714  0.0  0.0      0     0 ?        Z    May25   0:00 [python] <defunct>
-root       76720  0.0  0.0      0     0 ?        Z    May25   0:00 [python] <defunct>
-root       90890  0.4  1.8 42079812 9990072 ?    Ssl  May25   2:47 /opt/conda/bin/python inference_worker.py --model-path /root/Fine-tuning/backend/data/adapters/3819e7af-6c9b-4fde-88d0-35784e6afeda_merged --port 8100
-root       91022  0.0  1.6 17399152 8922772 ?    Sl   May25   0:19 /opt/conda/bin/python /opt/conda/lib/python3.10/site-packages/torch/_inductor/compile_worker/__main__.py --pickler=torch._inductor.compile_worker.subproc_pool.SubprocPickler --kind=fork --workers=32 --parent=90890 --read-fd=7 --write-fd=10 --torch-key=kdnYoFpyXJfmeFh07c0N00WVSuau0TZN11yUZqCrSHo=
-root       91351  0.0  0.9 17102100 4843800 ?    Sl   May25   0:00 /opt/conda/bin/python /opt/conda/lib/python3.10/site-packages/torch/_inductor/compile_worker/__main__.py --pickler=torch._inductor.compile_worker.subproc_pool.SubprocPickler --kind=fork --workers=32 --parent=90890 --read-fd=7 --write-fd=10 --torch-key=kdnYoFpyXJfmeFh07c0N00WVSuau0TZN11yUZqCrSHo=
-root       91353  0.0  0.9 17102100 4843600 ?    Sl   May25   0:00 /opt/conda/bin/python /opt/conda/lib/python3.10/site-packages/torch/_inductor/compile_worker/__main__.py --pickler=torch._inductor.compile_worker.subproc_pool.SubprocPickler --kind=fork --workers=32 --parent=90890 --read-fd=7 --write-fd=10 --torch-key=kdnYoFpyXJfmeFh07c0N00WVSuau0TZN11yUZqCrSHo=
-root       91355  0.0  0.9 17102100 4843600 ?    Sl   May25   0:00 /opt/conda/bin/python /opt/conda/lib/python3.10/site-packages/torch/_inductor/compile_worker/__main__.py --pickler=torch._inductor.compile_worker.subproc_pool.SubprocPickler --kind=fork --workers=32 --parent=90890 --read-fd=7 --write-fd=10 --torch-key=kdnYoFpyXJfmeFh07c0N00WVSuau0TZN11yUZqCrSHo=
-root       91357  0.0  0.9 17102100 4843604 ?    Sl   May25   0:00 /opt/conda/bin/python /opt/conda/lib/python3.10/site-packages/torch/_inductor/compile_worker/__main__.py --pickler=torch._inductor.compile_worker.subproc_pool.SubprocPickler --kind=fork --workers=32 --parent=90890 --read-fd=7 --write-fd=10 --torch-key=kdnYoFpyXJfmeFh07c0N00WVSuau0TZN11yUZqCrSHo=
-root       91359  0.0  0.9 17102100 4843604 ?    Sl   May25   0:00 /opt/conda/bin/python /opt/conda/lib/python3.10/site-packages/torch/_inductor/compile_worker/__main__.py --pickler=torch._inductor.compile_worker.subproc_pool.SubprocPickler --kind=fork --workers=32 --parent=90890 --read-fd=7 --write-fd=10 --torch-key=kdnYoFpyXJfmeFh07c0N00WVSuau0TZN11yUZqCrSHo=
-root       91361  0.0  0.9 17102100 4843608 ?    Sl   May25   0:00 /opt/conda/bin/python /opt/conda/lib/python3.10/site-packages/torch/_inductor/compile_worker/__main__.py --pickler=torch._inductor.compile_worker.subproc_pool.SubprocPickler --kind=fork --workers=32 --parent=90890 --read-fd=7 --write-fd=10 --torch-key=kdnYoFpyXJfmeFh07c0N00WVSuau0TZN11yUZqCrSHo=
-root       91363  0.0  0.9 17102100 4843612 ?    Sl   May25   0:00 /opt/conda/bin/python /opt/conda/lib/python3.10/site-packages/torch/_inductor/compile_worker/__main__.py --pickler=torch._inductor.compile_worker.subproc_pool.SubprocPickler --kind=fork --workers=32 --parent=90890 --read-fd=7 --write-fd=10 --torch-key=kdnYoFpyXJfmeFh07c0N00WVSuau0TZN11yUZqCrSHo=
-root       91365  0.0  0.9 17102100 4843612 ?    Sl   May25   0:00 /opt/conda/bin/python /opt/conda/lib/python3.10/site-packages/torch/_inductor/compile_worker/__main__.py --pickler=torch._inductor.compile_worker.subproc_pool.SubprocPickler --kind=fork --workers=32 --parent=90890 --read-fd=7 --write-fd=10 --torch-key=kdnYoFpyXJfmeFh07c0N00WVSuau0TZN11yUZqCrSHo=
-root       91367  0.0  0.9 17102100 4842020 ?    Sl   May25   0:00 /opt/conda/bin/python /opt/conda/lib/python3.10/site-packages/torch/_inductor/compile_worker/__main__.py --pickler=torch._inductor.compile_worker.subproc_pool.SubprocPickler --kind=fork --workers=32 --parent=90890 --read-fd=7 --write-fd=10 --torch-key=kdnYoFpyXJfmeFh07c0N00WVSuau0TZN11yUZqCrSHo=
-root       91369  0.0  0.9 17102100 4842020 ?    Sl   May25   0:00 /opt/conda/bin/python /opt/conda/lib/python3.10/site-packages/torch/_inductor/compile_worker/__main__.py --pickler=torch._inductor.compile_worker.subproc_pool.SubprocPickler --kind=fork --workers=32 --parent=90890 --read-fd=7 --write-fd=10 --torch-key=kdnYoFpyXJfmeFh07c0N00WVSuau0TZN11yUZqCrSHo=
-root       91371  0.0  0.9 17102100 4842024 ?    Sl   May25   0:00 /opt/conda/bin/python /opt/conda/lib/python3.10/site-packages/torch/_inductor/compile_worker/__main__.py --pickler=torch._inductor.compile_worker.subproc_pool.SubprocPickler --kind=fork --workers=32 --parent=90890 --read-fd=7 --write-fd=10 --torch-key=kdnYoFpyXJfmeFh07c0N00WVSuau0TZN11yUZqCrSHo=
-root       91374  0.0  0.9 17102100 4842024 ?    Sl   May25   0:00 /opt/conda/bin/python /opt/conda/lib/python3.10/site-packages/torch/_inductor/compile_worker/__main__.py --pickler=torch._inductor.compile_worker.subproc_pool.SubprocPickler --kind=fork --workers=32 --parent=90890 --read-fd=7 --write-fd=10 --torch-key=kdnYoFpyXJfmeFh07c0N00WVSuau0TZN11yUZqCrSHo=
-root       91376  0.0  0.9 17102100 4842020 ?    Sl   May25   0:00 /opt/conda/bin/python /opt/conda/lib/python3.10/site-packages/torch/_inductor/compile_worker/__main__.py --pickler=torch._inductor.compile_worker.subproc_pool.SubprocPickler --kind=fork --workers=32 --parent=90890 --read-fd=7 --write-fd=10 --torch-key=kdnYoFpyXJfmeFh07c0N00WVSuau0TZN11yUZqCrSHo=
-root       91378  0.0  0.9 17102100 4842024 ?    Sl   May25   0:00 /opt/conda/bin/python /opt/conda/lib/python3.10/site-packages/torch/_inductor/compile_worker/__main__.py --pickler=torch._inductor.compile_worker.subproc_pool.SubprocPickler --kind=fork --workers=32 --parent=90890 --read-fd=7 --write-fd=10 --torch-key=kdnYoFpyXJfmeFh07c0N00WVSuau0TZN11yUZqCrSHo=
-root       91380  0.0  0.9 17102100 4842024 ?    Sl   May25   0:00 /opt/conda/bin/python /opt/conda/lib/python3.10/site-packages/torch/_inductor/compile_worker/__main__.py --pickler=torch._inductor.compile_worker.subproc_pool.SubprocPickler --kind=fork --workers=32 --parent=90890 --read-fd=7 --write-fd=10 --torch-key=kdnYoFpyXJfmeFh07c0N00WVSuau0TZN11yUZqCrSHo=
-root       91382  0.0  0.9 17102100 4842024 ?    Sl   May25   0:00 /opt/conda/bin/python /opt/conda/lib/python3.10/site-packages/torch/_inductor/compile_worker/__main__.py --pickler=torch._inductor.compile_worker.subproc_pool.SubprocPickler --kind=fork --workers=32 --parent=90890 --read-fd=7 --write-fd=10 --torch-key=kdnYoFpyXJfmeFh07c0N00WVSuau0TZN11yUZqCrSHo=
-root       91384  0.0  0.9 17102100 4842028 ?    Sl   May25   0:00 /opt/conda/bin/python /opt/conda/lib/python3.10/site-packages/torch/_inductor/compile_worker/__main__.py --pickler=torch._inductor.compile_worker.subproc_pool.SubprocPickler --kind=fork --workers=32 --parent=90890 --read-fd=7 --write-fd=10 --torch-key=kdnYoFpyXJfmeFh07c0N00WVSuau0TZN11yUZqCrSHo=
-root       91386  0.0  0.9 17102100 4842028 ?    Sl   May25   0:00 /opt/conda/bin/python /opt/conda/lib/python3.10/site-packages/torch/_inductor/compile_worker/__main__.py --pickler=torch._inductor.compile_worker.subproc_pool.SubprocPickler --kind=fork --workers=32 --parent=90890 --read-fd=7 --write-fd=10 --torch-key=kdnYoFpyXJfmeFh07c0N00WVSuau0TZN11yUZqCrSHo=
-root       91388  0.0  0.9 17102100 4842028 ?    Sl   May25   0:00 /opt/conda/bin/python /opt/conda/lib/python3.10/site-packages/torch/_inductor/compile_worker/__main__.py --pickler=torch._inductor.compile_worker.subproc_pool.SubprocPickler --kind=fork --workers=32 --parent=90890 --read-fd=7 --write-fd=10 --torch-key=kdnYoFpyXJfmeFh07c0N00WVSuau0TZN11yUZqCrSHo=
-root       91390  0.0  0.9 17102100 4842028 ?    Sl   May25   0:00 /opt/conda/bin/python /opt/conda/lib/python3.10/site-packages/torch/_inductor/compile_worker/__main__.py --pickler=torch._inductor.compile_worker.subproc_pool.SubprocPickler --kind=fork --workers=32 --parent=90890 --read-fd=7 --write-fd=10 --torch-key=kdnYoFpyXJfmeFh07c0N00WVSuau0TZN11yUZqCrSHo=
-root       91392  0.0  0.9 17102100 4842036 ?    Sl   May25   0:00 /opt/conda/bin/python /opt/conda/lib/python3.10/site-packages/torch/_inductor/compile_worker/__main__.py --pickler=torch._inductor.compile_worker.subproc_pool.SubprocPickler --kind=fork --workers=32 --parent=90890 --read-fd=7 --write-fd=10 --torch-key=kdnYoFpyXJfmeFh07c0N00WVSuau0TZN11yUZqCrSHo=
-root       91394  0.0  0.9 17102100 4842036 ?    Sl   May25   0:00 /opt/conda/bin/python /opt/conda/lib/python3.10/site-packages/torch/_inductor/compile_worker/__main__.py --pickler=torch._inductor.compile_worker.subproc_pool.SubprocPickler --kind=fork --workers=32 --parent=90890 --read-fd=7 --write-fd=10 --torch-key=kdnYoFpyXJfmeFh07c0N00WVSuau0TZN11yUZqCrSHo=
-root       91396  0.0  0.9 17102100 4842036 ?    Sl   May25   0:00 /opt/conda/bin/python /opt/conda/lib/python3.10/site-packages/torch/_inductor/compile_worker/__main__.py --pickler=torch._inductor.compile_worker.subproc_pool.SubprocPickler --kind=fork --workers=32 --parent=90890 --read-fd=7 --write-fd=10 --torch-key=kdnYoFpyXJfmeFh07c0N00WVSuau0TZN11yUZqCrSHo=
-root       91398  0.0  0.9 17102100 4842036 ?    Sl   May25   0:00 /opt/conda/bin/python /opt/conda/lib/python3.10/site-packages/torch/_inductor/compile_worker/__main__.py --pickler=torch._inductor.compile_worker.subproc_pool.SubprocPickler --kind=fork --workers=32 --parent=90890 --read-fd=7 --write-fd=10 --torch-key=kdnYoFpyXJfmeFh07c0N00WVSuau0TZN11yUZqCrSHo=
-root       91400  0.0  0.9 17102100 4842036 ?    Sl   May25   0:00 /opt/conda/bin/python /opt/conda/lib/python3.10/site-packages/torch/_inductor/compile_worker/__main__.py --pickler=torch._inductor.compile_worker.subproc_pool.SubprocPickler --kind=fork --workers=32 --parent=90890 --read-fd=7 --write-fd=10 --torch-key=kdnYoFpyXJfmeFh07c0N00WVSuau0TZN11yUZqCrSHo=
-root       91402  0.0  0.9 17102100 4842036 ?    Sl   May25   0:00 /opt/conda/bin/python /opt/conda/lib/python3.10/site-packages/torch/_inductor/compile_worker/__main__.py --pickler=torch._inductor.compile_worker.subproc_pool.SubprocPickler --kind=fork --workers=32 --parent=90890 --read-fd=7 --write-fd=10 --torch-key=kdnYoFpyXJfmeFh07c0N00WVSuau0TZN11yUZqCrSHo=
-root       91404  0.0  0.9 17102100 4841448 ?    Sl   May25   0:00 /opt/conda/bin/python /opt/conda/lib/python3.10/site-packages/torch/_inductor/compile_worker/__main__.py --pickler=torch._inductor.compile_worker.subproc_pool.SubprocPickler --kind=fork --workers=32 --parent=90890 --read-fd=7 --write-fd=10 --torch-key=kdnYoFpyXJfmeFh07c0N00WVSuau0TZN11yUZqCrSHo=
-root       91406  0.0  0.9 17102100 4841396 ?    Sl   May25   0:00 /opt/conda/bin/python /opt/conda/lib/python3.10/site-packages/torch/_inductor/compile_worker/__main__.py --pickler=torch._inductor.compile_worker.subproc_pool.SubprocPickler --kind=fork --workers=32 --parent=90890 --read-fd=7 --write-fd=10 --torch-key=kdnYoFpyXJfmeFh07c0N00WVSuau0TZN11yUZqCrSHo=
-root       91408  0.0  0.9 17102100 4841400 ?    Sl   May25   0:00 /opt/conda/bin/python /opt/conda/lib/python3.10/site-packages/torch/_inductor/compile_worker/__main__.py --pickler=torch._inductor.compile_worker.subproc_pool.SubprocPickler --kind=fork --workers=32 --parent=90890 --read-fd=7 --write-fd=10 --torch-key=kdnYoFpyXJfmeFh07c0N00WVSuau0TZN11yUZqCrSHo=
-root       91410  0.0  0.9 17102100 4841400 ?    Sl   May25   0:00 /opt/conda/bin/python /opt/conda/lib/python3.10/site-packages/torch/_inductor/compile_worker/__main__.py --pickler=torch._inductor.compile_worker.subproc_pool.SubprocPickler --kind=fork --workers=32 --parent=90890 --read-fd=7 --write-fd=10 --torch-key=kdnYoFpyXJfmeFh07c0N00WVSuau0TZN11yUZqCrSHo=
-root       91412  0.0  0.9 17102100 4842044 ?    Sl   May25   0:00 /opt/conda/bin/python /opt/conda/lib/python3.10/site-packages/torch/_inductor/compile_worker/__main__.py --pickler=torch._inductor.compile_worker.subproc_pool.SubprocPickler --kind=fork --workers=32 --parent=90890 --read-fd=7 --write-fd=10 --torch-key=kdnYoFpyXJfmeFh07c0N00WVSuau0TZN11yUZqCrSHo=
-root       91414  0.0  0.9 17102100 4842044 ?    Sl   May25   0:00 /opt/conda/bin/python /opt/conda/lib/python3.10/site-packages/torch/_inductor/compile_worker/__main__.py --pickler=torch._inductor.compile_worker.subproc_pool.SubprocPickler --kind=fork --workers=32 --parent=90890 --read-fd=7 --write-fd=10 --torch-key=kdnYoFpyXJfmeFh07c0N00WVSuau0TZN11yUZqCrSHo=
-root       92043  3.9  0.0      0     0 ?        Zs   09:50   1:19 [python] <defunct>
-root       92955  5.4  0.0      0     0 ?        Z    10:00   1:19 [python] <defunct>
-root       93867 11.7  0.0      0     0 ?        Z    10:13   1:20 [python] <defunct>
+root         372  5.7  1.9 57650712 10328360 ?   Sl   10:33   2:34 /opt/conda/bin/python inference_worker.py --model-path /root/Fine-tuning/backend/data/adapters/3819e7af-6c9b-4fde-88d0-35784e6afeda_merged --port 8100
+root         504  0.6  1.6 17399148 8919172 ?    Sl   10:33   0:17 /opt/conda/bin/python /opt/conda/lib/python3.10/site-packages/torch/_inductor/compile_worker/__main__.py --pickler=torch._inductor.compile_worker.subproc_pool.SubprocPickler --kind=fork --workers=32 --parent=372 --read-fd=7 --write-fd=10 --torch-key=kdnYoFpyXJfmeFh07c0N00WVSuau0TZN11yUZqCrSHo=
+root         842  0.0  0.9 17102096 4842448 ?    Sl   10:33   0:00 /opt/conda/bin/python /opt/conda/lib/python3.10/site-packages/torch/_inductor/compile_worker/__main__.py --pickler=torch._inductor.compile_worker.subproc_pool.SubprocPickler --kind=fork --workers=32 --parent=372 --read-fd=7 --write-fd=10 --torch-key=kdnYoFpyXJfmeFh07c0N00WVSuau0TZN11yUZqCrSHo=
+root         844  0.0  0.9 17102096 4842248 ?    Sl   10:33   0:00 /opt/conda/bin/python /opt/conda/lib/python3.10/site-packages/torch/_inductor/compile_worker/__main__.py --pickler=torch._inductor.compile_worker.subproc_pool.SubprocPickler --kind=fork --workers=32 --parent=372 --read-fd=7 --write-fd=10 --torch-key=kdnYoFpyXJfmeFh07c0N00WVSuau0TZN11yUZqCrSHo=
+root         846  0.0  0.9 17102096 4842252 ?    Sl   10:33   0:00 /opt/conda/bin/python /opt/conda/lib/python3.10/site-packages/torch/_inductor/compile_worker/__main__.py --pickler=torch._inductor.compile_worker.subproc_pool.SubprocPickler --kind=fork --workers=32 --parent=372 --read-fd=7 --write-fd=10 --torch-key=kdnYoFpyXJfmeFh07c0N00WVSuau0TZN11yUZqCrSHo=
+root         848  0.0  0.9 17102096 4842252 ?    Sl   10:33   0:00 /opt/conda/bin/python /opt/conda/lib/python3.10/site-packages/torch/_inductor/compile_worker/__main__.py --pickler=torch._inductor.compile_worker.subproc_pool.SubprocPickler --kind=fork --workers=32 --parent=372 --read-fd=7 --write-fd=10 --torch-key=kdnYoFpyXJfmeFh07c0N00WVSuau0TZN11yUZqCrSHo=
+root         851  0.0  0.9 17102096 4842252 ?    Sl   10:33   0:00 /opt/conda/bin/python /opt/conda/lib/python3.10/site-packages/torch/_inductor/compile_worker/__main__.py --pickler=torch._inductor.compile_worker.subproc_pool.SubprocPickler --kind=fork --workers=32 --parent=372 --read-fd=7 --write-fd=10 --torch-key=kdnYoFpyXJfmeFh07c0N00WVSuau0TZN11yUZqCrSHo=
+root         853  0.0  0.9 17102096 4842260 ?    Sl   10:33   0:00 /opt/conda/bin/python /opt/conda/lib/python3.10/site-packages/torch/_inductor/compile_worker/__main__.py --pickler=torch._inductor.compile_worker.subproc_pool.SubprocPickler --kind=fork --workers=32 --parent=372 --read-fd=7 --write-fd=10 --torch-key=kdnYoFpyXJfmeFh07c0N00WVSuau0TZN11yUZqCrSHo=
+root         855  0.0  0.9 17102096 4842260 ?    Sl   10:33   0:00 /opt/conda/bin/python /opt/conda/lib/python3.10/site-packages/torch/_inductor/compile_worker/__main__.py --pickler=torch._inductor.compile_worker.subproc_pool.SubprocPickler --kind=fork --workers=32 --parent=372 --read-fd=7 --write-fd=10 --torch-key=kdnYoFpyXJfmeFh07c0N00WVSuau0TZN11yUZqCrSHo=
+root         857  0.0  0.9 17102096 4842264 ?    Sl   10:33   0:00 /opt/conda/bin/python /opt/conda/lib/python3.10/site-packages/torch/_inductor/compile_worker/__main__.py --pickler=torch._inductor.compile_worker.subproc_pool.SubprocPickler --kind=fork --workers=32 --parent=372 --read-fd=7 --write-fd=10 --torch-key=kdnYoFpyXJfmeFh07c0N00WVSuau0TZN11yUZqCrSHo=
+root         859  0.0  0.9 17102096 4842264 ?    Sl   10:33   0:00 /opt/conda/bin/python /opt/conda/lib/python3.10/site-packages/torch/_inductor/compile_worker/__main__.py --pickler=torch._inductor.compile_worker.subproc_pool.SubprocPickler --kind=fork --workers=32 --parent=372 --read-fd=7 --write-fd=10 --torch-key=kdnYoFpyXJfmeFh07c0N00WVSuau0TZN11yUZqCrSHo=
+root         861  0.0  0.9 17102096 4842264 ?    Sl   10:33   0:00 /opt/conda/bin/python /opt/conda/lib/python3.10/site-packages/torch/_inductor/compile_worker/__main__.py --pickler=torch._inductor.compile_worker.subproc_pool.SubprocPickler --kind=fork --workers=32 --parent=372 --read-fd=7 --write-fd=10 --torch-key=kdnYoFpyXJfmeFh07c0N00WVSuau0TZN11yUZqCrSHo=
+root         863  0.0  0.9 17102096 4842264 ?    Sl   10:33   0:00 /opt/conda/bin/python /opt/conda/lib/python3.10/site-packages/torch/_inductor/compile_worker/__main__.py --pickler=torch._inductor.compile_worker.subproc_pool.SubprocPickler --kind=fork --workers=32 --parent=372 --read-fd=7 --write-fd=10 --torch-key=kdnYoFpyXJfmeFh07c0N00WVSuau0TZN11yUZqCrSHo=
+root         865  0.0  0.9 17102096 4842268 ?    Sl   10:33   0:00 /opt/conda/bin/python /opt/conda/lib/python3.10/site-packages/torch/_inductor/compile_worker/__main__.py --pickler=torch._inductor.compile_worker.subproc_pool.SubprocPickler --kind=fork --workers=32 --parent=372 --read-fd=7 --write-fd=10 --torch-key=kdnYoFpyXJfmeFh07c0N00WVSuau0TZN11yUZqCrSHo=
+root         867  0.0  0.9 17102096 4842264 ?    Sl   10:33   0:00 /opt/conda/bin/python /opt/conda/lib/python3.10/site-packages/torch/_inductor/compile_worker/__main__.py --pickler=torch._inductor.compile_worker.subproc_pool.SubprocPickler --kind=fork --workers=32 --parent=372 --read-fd=7 --write-fd=10 --torch-key=kdnYoFpyXJfmeFh07c0N00WVSuau0TZN11yUZqCrSHo=
+root         869  0.0  0.9 17102096 4842264 ?    Sl   10:33   0:00 /opt/conda/bin/python /opt/conda/lib/python3.10/site-packages/torch/_inductor/compile_worker/__main__.py --pickler=torch._inductor.compile_worker.subproc_pool.SubprocPickler --kind=fork --workers=32 --parent=372 --read-fd=7 --write-fd=10 --torch-key=kdnYoFpyXJfmeFh07c0N00WVSuau0TZN11yUZqCrSHo=
+root         871  0.0  0.9 17102096 4842268 ?    Sl   10:33   0:00 /opt/conda/bin/python /opt/conda/lib/python3.10/site-packages/torch/_inductor/compile_worker/__main__.py --pickler=torch._inductor.compile_worker.subproc_pool.SubprocPickler --kind=fork --workers=32 --parent=372 --read-fd=7 --write-fd=10 --torch-key=kdnYoFpyXJfmeFh07c0N00WVSuau0TZN11yUZqCrSHo=
+root         873  0.0  0.9 17102096 4842268 ?    Sl   10:33   0:00 /opt/conda/bin/python /opt/conda/lib/python3.10/site-packages/torch/_inductor/compile_worker/__main__.py --pickler=torch._inductor.compile_worker.subproc_pool.SubprocPickler --kind=fork --workers=32 --parent=372 --read-fd=7 --write-fd=10 --torch-key=kdnYoFpyXJfmeFh07c0N00WVSuau0TZN11yUZqCrSHo=
+root         875  0.0  0.9 17102096 4842272 ?    Sl   10:33   0:00 /opt/conda/bin/python /opt/conda/lib/python3.10/site-packages/torch/_inductor/compile_worker/__main__.py --pickler=torch._inductor.compile_worker.subproc_pool.SubprocPickler --kind=fork --workers=32 --parent=372 --read-fd=7 --write-fd=10 --torch-key=kdnYoFpyXJfmeFh07c0N00WVSuau0TZN11yUZqCrSHo=
+root         877  0.0  0.9 17102096 4842272 ?    Sl   10:33   0:00 /opt/conda/bin/python /opt/conda/lib/python3.10/site-packages/torch/_inductor/compile_worker/__main__.py --pickler=torch._inductor.compile_worker.subproc_pool.SubprocPickler --kind=fork --workers=32 --parent=372 --read-fd=7 --write-fd=10 --torch-key=kdnYoFpyXJfmeFh07c0N00WVSuau0TZN11yUZqCrSHo=
+root         879  0.0  0.9 17102096 4842272 ?    Sl   10:33   0:00 /opt/conda/bin/python /opt/conda/lib/python3.10/site-packages/torch/_inductor/compile_worker/__main__.py --pickler=torch._inductor.compile_worker.subproc_pool.SubprocPickler --kind=fork --workers=32 --parent=372 --read-fd=7 --write-fd=10 --torch-key=kdnYoFpyXJfmeFh07c0N00WVSuau0TZN11yUZqCrSHo=
+root         881  0.0  0.9 17102096 4842272 ?    Sl   10:33   0:00 /opt/conda/bin/python /opt/conda/lib/python3.10/site-packages/torch/_inductor/compile_worker/__main__.py --pickler=torch._inductor.compile_worker.subproc_pool.SubprocPickler --kind=fork --workers=32 --parent=372 --read-fd=7 --write-fd=10 --torch-key=kdnYoFpyXJfmeFh07c0N00WVSuau0TZN11yUZqCrSHo=
+root         883  0.0  0.9 17102096 4842276 ?    Sl   10:33   0:00 /opt/conda/bin/python /opt/conda/lib/python3.10/site-packages/torch/_inductor/compile_worker/__main__.py --pickler=torch._inductor.compile_worker.subproc_pool.SubprocPickler --kind=fork --workers=32 --parent=372 --read-fd=7 --write-fd=10 --torch-key=kdnYoFpyXJfmeFh07c0N00WVSuau0TZN11yUZqCrSHo=
+root         885  0.0  0.9 17102096 4842276 ?    Sl   10:33   0:00 /opt/conda/bin/python /opt/conda/lib/python3.10/site-packages/torch/_inductor/compile_worker/__main__.py --pickler=torch._inductor.compile_worker.subproc_pool.SubprocPickler --kind=fork --workers=32 --parent=372 --read-fd=7 --write-fd=10 --torch-key=kdnYoFpyXJfmeFh07c0N00WVSuau0TZN11yUZqCrSHo=
+root         887  0.0  0.9 17102096 4842280 ?    Sl   10:33   0:00 /opt/conda/bin/python /opt/conda/lib/python3.10/site-packages/torch/_inductor/compile_worker/__main__.py --pickler=torch._inductor.compile_worker.subproc_pool.SubprocPickler --kind=fork --workers=32 --parent=372 --read-fd=7 --write-fd=10 --torch-key=kdnYoFpyXJfmeFh07c0N00WVSuau0TZN11yUZqCrSHo=
+root         889  0.0  0.9 17102096 4842280 ?    Sl   10:33   0:00 /opt/conda/bin/python /opt/conda/lib/python3.10/site-packages/torch/_inductor/compile_worker/__main__.py --pickler=torch._inductor.compile_worker.subproc_pool.SubprocPickler --kind=fork --workers=32 --parent=372 --read-fd=7 --write-fd=10 --torch-key=kdnYoFpyXJfmeFh07c0N00WVSuau0TZN11yUZqCrSHo=
+root         891  0.0  0.9 17102096 4842280 ?    Sl   10:33   0:00 /opt/conda/bin/python /opt/conda/lib/python3.10/site-packages/torch/_inductor/compile_worker/__main__.py --pickler=torch._inductor.compile_worker.subproc_pool.SubprocPickler --kind=fork --workers=32 --parent=372 --read-fd=7 --write-fd=10 --torch-key=kdnYoFpyXJfmeFh07c0N00WVSuau0TZN11yUZqCrSHo=
+root         893  0.0  0.9 17102096 4842280 ?    Sl   10:33   0:00 /opt/conda/bin/python /opt/conda/lib/python3.10/site-packages/torch/_inductor/compile_worker/__main__.py --pickler=torch._inductor.compile_worker.subproc_pool.SubprocPickler --kind=fork --workers=32 --parent=372 --read-fd=7 --write-fd=10 --torch-key=kdnYoFpyXJfmeFh07c0N00WVSuau0TZN11yUZqCrSHo=
+root         895  0.0  0.9 17102096 4841776 ?    Sl   10:33   0:00 /opt/conda/bin/python /opt/conda/lib/python3.10/site-packages/torch/_inductor/compile_worker/__main__.py --pickler=torch._inductor.compile_worker.subproc_pool.SubprocPickler --kind=fork --workers=32 --parent=372 --read-fd=7 --write-fd=10 --torch-key=kdnYoFpyXJfmeFh07c0N00WVSuau0TZN11yUZqCrSHo=
+root         897  0.0  0.9 17102096 4841776 ?    Sl   10:33   0:00 /opt/conda/bin/python /opt/conda/lib/python3.10/site-packages/torch/_inductor/compile_worker/__main__.py --pickler=torch._inductor.compile_worker.subproc_pool.SubprocPickler --kind=fork --workers=32 --parent=372 --read-fd=7 --write-fd=10 --torch-key=kdnYoFpyXJfmeFh07c0N00WVSuau0TZN11yUZqCrSHo=
+root         899  0.0  0.9 17102096 4841784 ?    Sl   10:33   0:00 /opt/conda/bin/python /opt/conda/lib/python3.10/site-packages/torch/_inductor/compile_worker/__main__.py --pickler=torch._inductor.compile_worker.subproc_pool.SubprocPickler --kind=fork --workers=32 --parent=372 --read-fd=7 --write-fd=10 --torch-key=kdnYoFpyXJfmeFh07c0N00WVSuau0TZN11yUZqCrSHo=
+root         901  0.0  0.9 17102096 4841764 ?    Sl   10:33   0:00 /opt/conda/bin/python /opt/conda/lib/python3.10/site-packages/torch/_inductor/compile_worker/__main__.py --pickler=torch._inductor.compile_worker.subproc_pool.SubprocPickler --kind=fork --workers=32 --parent=372 --read-fd=7 --write-fd=10 --torch-key=kdnYoFpyXJfmeFh07c0N00WVSuau0TZN11yUZqCrSHo=
+root         903  0.0  0.9 17102096 4842292 ?    Sl   10:33   0:00 /opt/conda/bin/python /opt/conda/lib/python3.10/site-packages/torch/_inductor/compile_worker/__main__.py --pickler=torch._inductor.compile_worker.subproc_pool.SubprocPickler --kind=fork --workers=32 --parent=372 --read-fd=7 --write-fd=10 --torch-key=kdnYoFpyXJfmeFh07c0N00WVSuau0TZN11yUZqCrSHo=
+root         905  0.0  0.9 17102096 4842292 ?    Sl   10:33   0:00 /opt/conda/bin/python /opt/conda/lib/python3.10/site-packages/torch/_inductor/compile_worker/__main__.py --pickler=torch._inductor.compile_worker.subproc_pool.SubprocPickler --kind=fork --workers=32 --parent=372 --read-fd=7 --write-fd=10 --torch-key=kdnYoFpyXJfmeFh07c0N00WVSuau0TZN11yUZqCrSHo=
+root        4661 52.8  0.0      0     0 ?        Z    11:15   1:19 [python] <defunct>
+(base) [root@localhost ~]# docker exec finetune-trainer ps aux | grep python | grep -v grep
+root         372  5.3  1.9 57650712 10328360 ?   Sl   10:33   2:35 /opt/conda/bin/python inference_worker.py --model-path /root/Fine-tuning/backend/data/adapters/3819e7af-6c9b-4fde-88d0-35784e6afeda_merged --port 8100
+root         504  0.6  1.6 17399148 8919172 ?    Sl   10:33   0:17 /opt/conda/bin/python /opt/conda/lib/python3.10/site-packages/torch/_inductor/compile_worker/__main__.py --pickler=torch._inductor.compile_worker.subproc_pool.SubprocPickler --kind=fork --workers=32 --parent=372 --read-fd=7 --write-fd=10 --torch-key=kdnYoFpyXJfmeFh07c0N00WVSuau0TZN11yUZqCrSHo=
+root         842  0.0  0.9 17102096 4842448 ?    Sl   10:33   0:00 /opt/conda/bin/python /opt/conda/lib/python3.10/site-packages/torch/_inductor/compile_worker/__main__.py --pickler=torch._inductor.compile_worker.subproc_pool.SubprocPickler --kind=fork --workers=32 --parent=372 --read-fd=7 --write-fd=10 --torch-key=kdnYoFpyXJfmeFh07c0N00WVSuau0TZN11yUZqCrSHo=
+root         844  0.0  0.9 17102096 4842248 ?    Sl   10:33   0:00 /opt/conda/bin/python /opt/conda/lib/python3.10/site-packages/torch/_inductor/compile_worker/__main__.py --pickler=torch._inductor.compile_worker.subproc_pool.SubprocPickler --kind=fork --workers=32 --parent=372 --read-fd=7 --write-fd=10 --torch-key=kdnYoFpyXJfmeFh07c0N00WVSuau0TZN11yUZqCrSHo=
+root         846  0.0  0.9 17102096 4842252 ?    Sl   10:33   0:00 /opt/conda/bin/python /opt/conda/lib/python3.10/site-packages/torch/_inductor/compile_worker/__main__.py --pickler=torch._inductor.compile_worker.subproc_pool.SubprocPickler --kind=fork --workers=32 --parent=372 --read-fd=7 --write-fd=10 --torch-key=kdnYoFpyXJfmeFh07c0N00WVSuau0TZN11yUZqCrSHo=
+root         848  0.0  0.9 17102096 4842252 ?    Sl   10:33   0:00 /opt/conda/bin/python /opt/conda/lib/python3.10/site-packages/torch/_inductor/compile_worker/__main__.py --pickler=torch._inductor.compile_worker.subproc_pool.SubprocPickler --kind=fork --workers=32 --parent=372 --read-fd=7 --write-fd=10 --torch-key=kdnYoFpyXJfmeFh07c0N00WVSuau0TZN11yUZqCrSHo=
+root         851  0.0  0.9 17102096 4842252 ?    Sl   10:33   0:00 /opt/conda/bin/python /opt/conda/lib/python3.10/site-packages/torch/_inductor/compile_worker/__main__.py --pickler=torch._inductor.compile_worker.subproc_pool.SubprocPickler --kind=fork --workers=32 --parent=372 --read-fd=7 --write-fd=10 --torch-key=kdnYoFpyXJfmeFh07c0N00WVSuau0TZN11yUZqCrSHo=
+root         853  0.0  0.9 17102096 4842260 ?    Sl   10:33   0:00 /opt/conda/bin/python /opt/conda/lib/python3.10/site-packages/torch/_inductor/compile_worker/__main__.py --pickler=torch._inductor.compile_worker.subproc_pool.SubprocPickler --kind=fork --workers=32 --parent=372 --read-fd=7 --write-fd=10 --torch-key=kdnYoFpyXJfmeFh07c0N00WVSuau0TZN11yUZqCrSHo=
+root         855  0.0  0.9 17102096 4842260 ?    Sl   10:33   0:00 /opt/conda/bin/python /opt/conda/lib/python3.10/site-packages/torch/_inductor/compile_worker/__main__.py --pickler=torch._inductor.compile_worker.subproc_pool.SubprocPickler --kind=fork --workers=32 --parent=372 --read-fd=7 --write-fd=10 --torch-key=kdnYoFpyXJfmeFh07c0N00WVSuau0TZN11yUZqCrSHo=
+root         857  0.0  0.9 17102096 4842264 ?    Sl   10:33   0:00 /opt/conda/bin/python /opt/conda/lib/python3.10/site-packages/torch/_inductor/compile_worker/__main__.py --pickler=torch._inductor.compile_worker.subproc_pool.SubprocPickler --kind=fork --workers=32 --parent=372 --read-fd=7 --write-fd=10 --torch-key=kdnYoFpyXJfmeFh07c0N00WVSuau0TZN11yUZqCrSHo=
+root         859  0.0  0.9 17102096 4842264 ?    Sl   10:33   0:00 /opt/conda/bin/python /opt/conda/lib/python3.10/site-packages/torch/_inductor/compile_worker/__main__.py --pickler=torch._inductor.compile_worker.subproc_pool.SubprocPickler --kind=fork --workers=32 --parent=372 --read-fd=7 --write-fd=10 --torch-key=kdnYoFpyXJfmeFh07c0N00WVSuau0TZN11yUZqCrSHo=
+root         861  0.0  0.9 17102096 4842264 ?    Sl   10:33   0:00 /opt/conda/bin/python /opt/conda/lib/python3.10/site-packages/torch/_inductor/compile_worker/__main__.py --pickler=torch._inductor.compile_worker.subproc_pool.SubprocPickler --kind=fork --workers=32 --parent=372 --read-fd=7 --write-fd=10 --torch-key=kdnYoFpyXJfmeFh07c0N00WVSuau0TZN11yUZqCrSHo=
+root         863  0.0  0.9 17102096 4842264 ?    Sl   10:33   0:00 /opt/conda/bin/python /opt/conda/lib/python3.10/site-packages/torch/_inductor/compile_worker/__main__.py --pickler=torch._inductor.compile_worker.subproc_pool.SubprocPickler --kind=fork --workers=32 --parent=372 --read-fd=7 --write-fd=10 --torch-key=kdnYoFpyXJfmeFh07c0N00WVSuau0TZN11yUZqCrSHo=
+root         865  0.0  0.9 17102096 4842268 ?    Sl   10:33   0:00 /opt/conda/bin/python /opt/conda/lib/python3.10/site-packages/torch/_inductor/compile_worker/__main__.py --pickler=torch._inductor.compile_worker.subproc_pool.SubprocPickler --kind=fork --workers=32 --parent=372 --read-fd=7 --write-fd=10 --torch-key=kdnYoFpyXJfmeFh07c0N00WVSuau0TZN11yUZqCrSHo=
+root         867  0.0  0.9 17102096 4842264 ?    Sl   10:33   0:00 /opt/conda/bin/python /opt/conda/lib/python3.10/site-packages/torch/_inductor/compile_worker/__main__.py --pickler=torch._inductor.compile_worker.subproc_pool.SubprocPickler --kind=fork --workers=32 --parent=372 --read-fd=7 --write-fd=10 --torch-key=kdnYoFpyXJfmeFh07c0N00WVSuau0TZN11yUZqCrSHo=
+root         869  0.0  0.9 17102096 4842264 ?    Sl   10:33   0:00 /opt/conda/bin/python /opt/conda/lib/python3.10/site-packages/torch/_inductor/compile_worker/__main__.py --pickler=torch._inductor.compile_worker.subproc_pool.SubprocPickler --kind=fork --workers=32 --parent=372 --read-fd=7 --write-fd=10 --torch-key=kdnYoFpyXJfmeFh07c0N00WVSuau0TZN11yUZqCrSHo=
+root         871  0.0  0.9 17102096 4842268 ?    Sl   10:33   0:00 /opt/conda/bin/python /opt/conda/lib/python3.10/site-packages/torch/_inductor/compile_worker/__main__.py --pickler=torch._inductor.compile_worker.subproc_pool.SubprocPickler --kind=fork --workers=32 --parent=372 --read-fd=7 --write-fd=10 --torch-key=kdnYoFpyXJfmeFh07c0N00WVSuau0TZN11yUZqCrSHo=
+root         873  0.0  0.9 17102096 4842268 ?    Sl   10:33   0:00 /opt/conda/bin/python /opt/conda/lib/python3.10/site-packages/torch/_inductor/compile_worker/__main__.py --pickler=torch._inductor.compile_worker.subproc_pool.SubprocPickler --kind=fork --workers=32 --parent=372 --read-fd=7 --write-fd=10 --torch-key=kdnYoFpyXJfmeFh07c0N00WVSuau0TZN11yUZqCrSHo=
+root         875  0.0  0.9 17102096 4842272 ?    Sl   10:33   0:00 /opt/conda/bin/python /opt/conda/lib/python3.10/site-packages/torch/_inductor/compile_worker/__main__.py --pickler=torch._inductor.compile_worker.subproc_pool.SubprocPickler --kind=fork --workers=32 --parent=372 --read-fd=7 --write-fd=10 --torch-key=kdnYoFpyXJfmeFh07c0N00WVSuau0TZN11yUZqCrSHo=
+root         877  0.0  0.9 17102096 4842272 ?    Sl   10:33   0:00 /opt/conda/bin/python /opt/conda/lib/python3.10/site-packages/torch/_inductor/compile_worker/__main__.py --pickler=torch._inductor.compile_worker.subproc_pool.SubprocPickler --kind=fork --workers=32 --parent=372 --read-fd=7 --write-fd=10 --torch-key=kdnYoFpyXJfmeFh07c0N00WVSuau0TZN11yUZqCrSHo=
+root         879  0.0  0.9 17102096 4842272 ?    Sl   10:33   0:00 /opt/conda/bin/python /opt/conda/lib/python3.10/site-packages/torch/_inductor/compile_worker/__main__.py --pickler=torch._inductor.compile_worker.subproc_pool.SubprocPickler --kind=fork --workers=32 --parent=372 --read-fd=7 --write-fd=10 --torch-key=kdnYoFpyXJfmeFh07c0N00WVSuau0TZN11yUZqCrSHo=
+root         881  0.0  0.9 17102096 4842272 ?    Sl   10:33   0:00 /opt/conda/bin/python /opt/conda/lib/python3.10/site-packages/torch/_inductor/compile_worker/__main__.py --pickler=torch._inductor.compile_worker.subproc_pool.SubprocPickler --kind=fork --workers=32 --parent=372 --read-fd=7 --write-fd=10 --torch-key=kdnYoFpyXJfmeFh07c0N00WVSuau0TZN11yUZqCrSHo=
+root         883  0.0  0.9 17102096 4842276 ?    Sl   10:33   0:00 /opt/conda/bin/python /opt/conda/lib/python3.10/site-packages/torch/_inductor/compile_worker/__main__.py --pickler=torch._inductor.compile_worker.subproc_pool.SubprocPickler --kind=fork --workers=32 --parent=372 --read-fd=7 --write-fd=10 --torch-key=kdnYoFpyXJfmeFh07c0N00WVSuau0TZN11yUZqCrSHo=
+root         885  0.0  0.9 17102096 4842276 ?    Sl   10:33   0:00 /opt/conda/bin/python /opt/conda/lib/python3.10/site-packages/torch/_inductor/compile_worker/__main__.py --pickler=torch._inductor.compile_worker.subproc_pool.SubprocPickler --kind=fork --workers=32 --parent=372 --read-fd=7 --write-fd=10 --torch-key=kdnYoFpyXJfmeFh07c0N00WVSuau0TZN11yUZqCrSHo=
+root         887  0.0  0.9 17102096 4842280 ?    Sl   10:33   0:00 /opt/conda/bin/python /opt/conda/lib/python3.10/site-packages/torch/_inductor/compile_worker/__main__.py --pickler=torch._inductor.compile_worker.subproc_pool.SubprocPickler --kind=fork --workers=32 --parent=372 --read-fd=7 --write-fd=10 --torch-key=kdnYoFpyXJfmeFh07c0N00WVSuau0TZN11yUZqCrSHo=
+root         889  0.0  0.9 17102096 4842280 ?    Sl   10:33   0:00 /opt/conda/bin/python /opt/conda/lib/python3.10/site-packages/torch/_inductor/compile_worker/__main__.py --pickler=torch._inductor.compile_worker.subproc_pool.SubprocPickler --kind=fork --workers=32 --parent=372 --read-fd=7 --write-fd=10 --torch-key=kdnYoFpyXJfmeFh07c0N00WVSuau0TZN11yUZqCrSHo=
+root         891  0.0  0.9 17102096 4842280 ?    Sl   10:33   0:00 /opt/conda/bin/python /opt/conda/lib/python3.10/site-packages/torch/_inductor/compile_worker/__main__.py --pickler=torch._inductor.compile_worker.subproc_pool.SubprocPickler --kind=fork --workers=32 --parent=372 --read-fd=7 --write-fd=10 --torch-key=kdnYoFpyXJfmeFh07c0N00WVSuau0TZN11yUZqCrSHo=
+root         893  0.0  0.9 17102096 4842280 ?    Sl   10:33   0:00 /opt/conda/bin/python /opt/conda/lib/python3.10/site-packages/torch/_inductor/compile_worker/__main__.py --pickler=torch._inductor.compile_worker.subproc_pool.SubprocPickler --kind=fork --workers=32 --parent=372 --read-fd=7 --write-fd=10 --torch-key=kdnYoFpyXJfmeFh07c0N00WVSuau0TZN11yUZqCrSHo=
+root         895  0.0  0.9 17102096 4841776 ?    Sl   10:33   0:00 /opt/conda/bin/python /opt/conda/lib/python3.10/site-packages/torch/_inductor/compile_worker/__main__.py --pickler=torch._inductor.compile_worker.subproc_pool.SubprocPickler --kind=fork --workers=32 --parent=372 --read-fd=7 --write-fd=10 --torch-key=kdnYoFpyXJfmeFh07c0N00WVSuau0TZN11yUZqCrSHo=
+root         897  0.0  0.9 17102096 4841776 ?    Sl   10:33   0:00 /opt/conda/bin/python /opt/conda/lib/python3.10/site-packages/torch/_inductor/compile_worker/__main__.py --pickler=torch._inductor.compile_worker.subproc_pool.SubprocPickler --kind=fork --workers=32 --parent=372 --read-fd=7 --write-fd=10 --torch-key=kdnYoFpyXJfmeFh07c0N00WVSuau0TZN11yUZqCrSHo=
+root         899  0.0  0.9 17102096 4841784 ?    Sl   10:33   0:00 /opt/conda/bin/python /opt/conda/lib/python3.10/site-packages/torch/_inductor/compile_worker/__main__.py --pickler=torch._inductor.compile_worker.subproc_pool.SubprocPickler --kind=fork --workers=32 --parent=372 --read-fd=7 --write-fd=10 --torch-key=kdnYoFpyXJfmeFh07c0N00WVSuau0TZN11yUZqCrSHo=
+root         901  0.0  0.9 17102096 4841764 ?    Sl   10:33   0:00 /opt/conda/bin/python /opt/conda/lib/python3.10/site-packages/torch/_inductor/compile_worker/__main__.py --pickler=torch._inductor.compile_worker.subproc_pool.SubprocPickler --kind=fork --workers=32 --parent=372 --read-fd=7 --write-fd=10 --torch-key=kdnYoFpyXJfmeFh07c0N00WVSuau0TZN11yUZqCrSHo=
+root         903  0.0  0.9 17102096 4842292 ?    Sl   10:33   0:00 /opt/conda/bin/python /opt/conda/lib/python3.10/site-packages/torch/_inductor/compile_worker/__main__.py --pickler=torch._inductor.compile_worker.subproc_pool.SubprocPickler --kind=fork --workers=32 --parent=372 --read-fd=7 --write-fd=10 --torch-key=kdnYoFpyXJfmeFh07c0N00WVSuau0TZN11yUZqCrSHo=
+root         905  0.0  0.9 17102096 4842292 ?    Sl   10:33   0:00 /opt/conda/bin/python /opt/conda/lib/python3.10/site-packages/torch/_inductor/compile_worker/__main__.py --pickler=torch._inductor.compile_worker.subproc_pool.SubprocPickler --kind=fork --workers=32 --parent=372 --read-fd=7 --write-fd=10 --torch-key=kdnYoFpyXJfmeFh07c0N00WVSuau0TZN11yUZqCrSHo=
+root        4661 21.5  0.0      0     0 ?        Z    11:15   1:19 [python] <defunct>
+root        5234 42.1  0.0      0     0 ?        Z    11:18   1:21 [python] <defunct>