lxylxy123321 22 saat önce
ebeveyn
işleme
8eade0ee3d

+ 25 - 1
backend/app/core/job_queue.py

@@ -337,7 +337,7 @@ class JobQueue:
         """
         from app.config import get_settings
         from app.core.websocket import send_progress, send_epoch_done, send_completed, send_error
-        from app.core.remote_executor import ssh_exec, is_process_running
+        from app.core.remote_executor import ssh_exec, is_process_running, scp_from_remote_dir
 
         settings = get_settings()
         remote_log = f"{settings.compute_node_remote_data_dir}/logs/{job_id}.jsonl"
@@ -463,6 +463,30 @@ class JobQueue:
 
                         elif entry_type == "completed":
                             adapter_path = entry.get("adapter_path", str(settings.adapters_dir / job_id))
+
+                            # 从 253 同步 adapter 回 151(训练完成后自动回传)
+                            local_adapter_dir = str(settings.adapters_dir / job_id)
+                            remote_adapter_path = adapter_path  # 253 上的路径
+                            logger.info(f"Syncing adapter from remote: {remote_adapter_path} -> {local_adapter_dir}")
+
+                            # 确保本地目录存在
+                            import os
+                            os.makedirs(str(settings.adapters_dir), exist_ok=True)
+
+                            # 异步执行 SCP,避免阻塞事件循环
+                            ret_code, stdout, stderr = await asyncio.to_thread(
+                                scp_from_remote_dir,
+                                remote_adapter_path,
+                                local_adapter_dir,
+                            )
+
+                            if ret_code == 0:
+                                logger.info(f"Adapter synced successfully: {job_id}")
+                                # 更新 adapter_path 为本地路径,便于 151 上的推理服务使用
+                                adapter_path = local_adapter_dir
+                            else:
+                                logger.error(f"Failed to sync adapter: {stderr}, using remote path")
+
                             self.update_job(job_id,
                                             status=JobStatus.COMPLETED,
                                             adapter_path=adapter_path,

+ 25 - 0
backend/app/core/remote_executor.py

@@ -36,6 +36,31 @@ def scp_to_remote(local_path: str, remote_path: str) -> tuple[int, str, str]:
         return -1, "", str(e)
 
 
+def scp_from_remote_dir(remote_path: str, local_path: str, timeout: int = 600) -> tuple[int, str, str]:
+    """通过 SCP 从远端主机递归复制目录到本地,返回 (exit_code, stdout, stderr)。
+    timeout 默认 10 分钟,足够传输 20G 的 adapter(千兆网约 3 分钟)。
+    """
+    target = f"{settings.compute_node_ssh_user}@{settings.compute_node_host}"
+    scp_args = ["scp", "-r", *_get_ssh_prefix(), "-P", str(settings.compute_node_ssh_port)]
+    if settings.compute_node_ssh_key:
+        scp_args += ["-i", settings.compute_node_ssh_key]
+    elif settings.compute_node_ssh_password:
+        scp_args = ["sshpass", "-p", settings.compute_node_ssh_password] + scp_args
+    scp_args += [f"{target}:{remote_path}", local_path]
+
+    try:
+        proc = subprocess.run(scp_args, capture_output=True, text=True, timeout=timeout)
+        clean_stderr = "\n".join(line for line in proc.stderr.split("\n")
+                                  if not line.startswith("Warning:"))
+        return proc.returncode, proc.stdout, clean_stderr
+    except subprocess.TimeoutExpired:
+        logger.error(f"SCP from remote timeout after {timeout}s: {remote_path}")
+        return -1, "", f"SCP timed out after {timeout}s"
+    except Exception as e:
+        logger.error(f"SCP from remote failed: {e}")
+        return -1, "", str(e)
+
+
 def scp_to_remote_dir(local_path: str, remote_path: str) -> tuple[int, str, str]:
     """通过 SCP 把本地目录递归复制到远端主机。"""
     target = f"{settings.compute_node_ssh_user}@{settings.compute_node_host}"

+ 78 - 204
backend/app/engines/text_engine.py

@@ -318,39 +318,8 @@ class TextEngine(BaseEngine):
             reward_model_path = training_args.get("reward_model_path")
             reward_type = training_args.get("reward_type", "heuristic")
 
-            import inspect
-
-            # 检测 PPOTrainer 版本(新版无 step 方法,使用标准 Trainer API)
-            trainer_sig = inspect.signature(PPOTrainer.__init__)
-            trainer_params = set(trainer_sig.parameters.keys())
-            is_new_ppo = "step" not in dir(PPOTrainer)
-
-            # ---- 准备数据集 ----
-            if is_new_ppo:
-                # 新版 TRL (1.4.0+):PPOTrainer 自己处理 tokenization 和生成,
-                # 需要传入带 "prompt" 列的原始文本数据集
-                import json as _json
-                from datasets import Dataset as HFDataset
-
-                raw_data = []
-                with open(dataset_path, "r", encoding="utf-8") as f:
-                    for line in f:
-                        line = line.strip()
-                        if line:
-                            item = _json.loads(line)
-                            if "prompt" not in item:
-                                item["prompt"] = item.get("question", item.get("query", item.get("text", item.get("input", ""))))
-                            if isinstance(item["prompt"], (list, dict)):
-                                item["prompt"] = _json.dumps(item["prompt"], ensure_ascii=False)
-                            item["prompt"] = str(item["prompt"])
-                            raw_data.append(item)
-
-                ppo_dataset = HFDataset.from_list(raw_data)
-                logger.info(f"新版 PPOTrainer: 加载原始文本数据集,共 {len(ppo_dataset)} 条")
-            else:
-                # 旧版 TRL:需要预先 tokenize
-                ppo_dataset = self._tokenize_dataset_ppo(dataset_path, max_seq_length, response_length)
-                logger.info(f"旧版 PPOTrainer: 加载 tokenize 数据集,共 {len(ppo_dataset)} 条")
+            # PPO 专用:仅 tokenize prompt
+            ppo_dataset = self._tokenize_dataset_ppo(dataset_path, max_seq_length, response_length)
 
             # Reference 模型(冻结,用于 KL 惩罚)
             ref_model = deepcopy(self._model)
@@ -358,194 +327,99 @@ class TextEngine(BaseEngine):
             for param in ref_model.parameters():
                 param.requires_grad = False
 
-            # 兼容不同版本的 TRL PPOConfig 参数名变化
-            ppo_config_sig = inspect.signature(PPOConfig.__init__)
-            ppo_config_params = set(ppo_config_sig.parameters.keys())
-
-            ppo_config_kwargs = dict(
+            ppo_config = PPOConfig(
                 learning_rate=learning_rate,
                 batch_size=batch_size,
                 gradient_accumulation_steps=gradient_accumulation,
+                ppo_epochs=ppo_epochs,
+                vf_coef=vf_coef,
+                kl_ctl=kl_coef,
+                response_length=response_length,
                 output_dir=output_dir,
                 logging_steps=10,
                 save_strategy=save_strategy,
+                fp16=True,
                 report_to="none",
                 dataloader_num_workers=4,
                 dataloader_pin_memory=False,
-                torch_compile=False,
             )
 
-            # ppo_epochs: 新版叫 num_ppo_epochs,旧版叫 ppo_epochs
-            if "num_ppo_epochs" in ppo_config_params:
-                ppo_config_kwargs["num_ppo_epochs"] = ppo_epochs
-            elif "ppo_epochs" in ppo_config_params:
-                ppo_config_kwargs["ppo_epochs"] = ppo_epochs
-
-            # kl_ctl: 新版叫 init_kl_coef,旧版叫 kl_ctl
-            if "init_kl_coef" in ppo_config_params:
-                ppo_config_kwargs["init_kl_coef"] = kl_coef
-            elif "kl_ctl" in ppo_config_params:
-                ppo_config_kwargs["kl_ctl"] = kl_coef
-
-            # vf_coef: 新版可能已移除,仅在支持时传入
-            if "vf_coef" in ppo_config_params:
-                ppo_config_kwargs["vf_coef"] = vf_coef
-
-            # response_length: 部分版本可能不支持
-            if "response_length" in ppo_config_params:
-                ppo_config_kwargs["response_length"] = response_length
-
-            # fp16/bf16: 新版可能使用不同的混合精度参数名
-            if "fp16" in ppo_config_params:
-                ppo_config_kwargs["fp16"] = True
-
-            logger.info(f"PPOConfig 可用参数: {sorted(ppo_config_params)}")
-            logger.info(f"PPOConfig 实际传入参数: {ppo_config_kwargs}")
-
-            ppo_config = PPOConfig(**ppo_config_kwargs)
-
-            # 兼容不同版本的 PPOTrainer 参数名(config vs args)
-            trainer_sig = inspect.signature(PPOTrainer.__init__)
-            trainer_params = set(trainer_sig.parameters.keys())
-
-            # ---- 加载奖励模型 ----
-            reward_model = None
-            if reward_type == "model" and reward_model_path:
-                from transformers import AutoModelForSequenceClassification
-                reward_model = AutoModelForSequenceClassification.from_pretrained(
-                    reward_model_path, device_map={"": 0}
-                )
-            else:
-                # 启发式奖励:包装成 nn.Module 以兼容新版 PPOTrainer 的 reward_model 参数
-                class _HeuristicRewardModel(torch.nn.Module):
-                    """将启发式奖励函数包装为 reward model,供新版 PPOTrainer 使用。"""
-
-                    def __init__(self, tokenizer, reward_func):
-                        super().__init__()
-                        self.tokenizer = tokenizer
-                        self.reward_func = reward_func
-                        # 需要一个 dummy 参数让 Trainer 识别为有效的 Module
-                        self._dummy = torch.nn.Parameter(torch.zeros(1))
-
-                    def forward(self, input_ids=None, attention_mask=None, **kwargs):
-                        texts = [
-                            self.tokenizer.decode(ids, skip_special_tokens=True)
-                            for ids in input_ids
-                        ]
-                        rewards = self.reward_func(texts, texts)
-                        return type("RewardOutput", (), {
-                            "logits": torch.tensor(rewards, dtype=torch.float32, device=input_ids.device).unsqueeze(-1)
-                        })()
-
-                reward_model = _HeuristicRewardModel(self._tokenizer, _compute_heuristic_reward)
-
-            # ---- 构建 value_model(价值函数模型,新版 PPOTrainer 必需)----
-            value_model = None
-            if "value_model" in trainer_params:
-                from transformers import AutoModelForSequenceClassification
-                # PEFT 包装后 config._name_or_path 仍指向 base model
-                base_model_path = getattr(
-                    peft_config, "base_model_name_or_path", None
-                ) or self._model.config._name_or_path
-                value_model = AutoModelForSequenceClassification.from_pretrained(
-                    base_model_path,
-                    num_labels=1,
-                    torch_dtype=torch.float16,
-                )
-                value_model.to(self._model.device)
-                value_model.eval()
-                logger.info(f"已加载 value_model from {base_model_path}")
-
-            # ---- 构建 PPOTrainer ----
-            trainer_kwargs = dict(
+            trainer = PPOTrainer(
+                config=ppo_config,
                 model=self._model,
                 ref_model=ref_model,
                 processing_class=self._tokenizer,
                 train_dataset=ppo_dataset,
             )
 
-            # 新版叫 args,旧版叫 config
-            if "args" in trainer_params:
-                trainer_kwargs["args"] = ppo_config
-            elif "config" in trainer_params:
-                trainer_kwargs["config"] = ppo_config
-
-            # 新版 PPOTrainer 支持 reward_model 参数
-            if "reward_model" in trainer_params:
-                trainer_kwargs["reward_model"] = reward_model
-
-            # 新版 PPOTrainer 需要 value_model
-            if value_model is not None:
-                trainer_kwargs["value_model"] = value_model
-
-            logger.info(f"PPOTrainer 可用参数: {sorted(trainer_params)}")
-            trainer = PPOTrainer(**trainer_kwargs)
-
-            # ---- 训练 ----
-            if hasattr(trainer, "step"):
-                # 旧版 TRL:手动循环 + trainer.step()
-                dataloader = trainer.dataloader
-                total_steps = len(dataloader) * epochs
-                step_count = 0
-
-                for epoch in range(epochs):
-                    for batch in dataloader:
-                        step_count += 1
-                        query_tensors = batch["input_ids"]
-
-                        response_tensors = []
-                        for query in query_tensors:
-                            query_tensor = torch.tensor(query).unsqueeze(0).to(self._model.device)
-                            gen_output = self._model.generate(
-                                query_tensor,
-                                max_new_tokens=response_length,
-                                do_sample=True,
-                                top_p=0.9,
-                                temperature=0.7,
-                            )
-                            response_tensors.append(gen_output[0][query_tensor.shape[-1]:])
-
-                        responses_text = [
-                            self._tokenizer.decode(r, skip_special_tokens=True)
-                            for r in response_tensors
-                        ]
-                        prompts_text = [
-                            self._tokenizer.decode(q, skip_special_tokens=True)
-                            for q in query_tensors
-                        ]
-
-                        if reward_type == "model" and reward_model_path:
-                            reward_inputs = [p + r for p, r in zip(prompts_text, responses_text)]
-                            tokenized = self._tokenizer(
-                                reward_inputs, return_tensors="pt", padding=True, truncation=True
-                            ).to(self._model.device)
-                            with torch.no_grad():
-                                rewards = reward_model(**tokenized).logits.squeeze(-1).tolist()
-                        else:
-                            rewards = _compute_heuristic_reward(prompts_text, responses_text)
-
-                        reward_tensors = [torch.tensor(r, device=self._model.device) for r in rewards]
-                        stats = trainer.step(query_tensors, response_tensors, reward_tensors)
-
-                        if step_count % 10 == 0:
-                            for cb in (all_callbacks or []):
-                                if hasattr(cb, "on_log"):
-                                    cb.on_log(
-                                        SimpleNamespace(),
-                                        SimpleNamespace(
-                                            epoch=epoch, global_step=step_count, max_steps=total_steps
-                                        ),
-                                        None,
-                                        logs={
-                                            "loss": stats.get("ppo/loss/total", 0),
-                                            "learning_rate": stats.get("ppo/learning_rate", learning_rate),
-                                        },
-                                    )
-            else:
-                # 新版 TRL (>=1.0):标准 Trainer API,直接 train()
-                for cb in (all_callbacks or []):
-                    trainer.add_callback(cb)
-                trainer.train()
+            dataloader = trainer.dataloader
+            total_steps = len(dataloader) * epochs
+            step_count = 0
+
+            for epoch in range(epochs):
+                for batch in dataloader:
+                    step_count += 1
+                    query_tensors = batch["input_ids"]
+
+                    # 生成回答
+                    response_tensors = []
+                    for query in query_tensors:
+                        query_tensor = torch.tensor(query).unsqueeze(0).to(self._model.device)
+                        gen_output = self._model.generate(
+                            query_tensor,
+                            max_new_tokens=response_length,
+                            do_sample=True,
+                            top_p=0.9,
+                            temperature=0.7,
+                        )
+                        response_tensors.append(gen_output[0][query_tensor.shape[-1]:])
+
+                    # 解码文本用于奖励计算
+                    responses_text = [
+                        self._tokenizer.decode(r, skip_special_tokens=True)
+                        for r in response_tensors
+                    ]
+                    prompts_text = [
+                        self._tokenizer.decode(q, skip_special_tokens=True)
+                        for q in query_tensors
+                    ]
+
+                    # 计算奖励
+                    if reward_type == "model" and reward_model_path:
+                        from transformers import AutoModelForSequenceClassification
+
+                        reward_model = AutoModelForSequenceClassification.from_pretrained(
+                            reward_model_path, device_map={"": 0}
+                        )
+                        reward_inputs = [p + r for p, r in zip(prompts_text, responses_text)]
+                        tokenized = self._tokenizer(
+                            reward_inputs, return_tensors="pt", padding=True, truncation=True
+                        ).to(self._model.device)
+                        with torch.no_grad():
+                            rewards = reward_model(**tokenized).logits.squeeze(-1).tolist()
+                    else:
+                        rewards = _compute_heuristic_reward(prompts_text, responses_text)
+
+                    reward_tensors = [torch.tensor(r, device=self._model.device) for r in rewards]
+
+                    # PPO 更新
+                    stats = trainer.step(query_tensors, response_tensors, reward_tensors)
+
+                    # 报告进度
+                    if step_count % 10 == 0:
+                        for cb in (all_callbacks or []):
+                            if hasattr(cb, "on_log"):
+                                cb.on_log(
+                                    SimpleNamespace(),
+                                    SimpleNamespace(
+                                        epoch=epoch, global_step=step_count, max_steps=total_steps
+                                    ),
+                                    None,
+                                    logs={
+                                        "loss": stats.get("ppo/loss/total", 0),
+                                        "learning_rate": stats.get("ppo/learning_rate", learning_rate),
+                                    },
+                                )
 
             self._model.save_pretrained(output_dir)
             self._tokenizer.save_pretrained(output_dir)

+ 932 - 71
result.txt

@@ -1,72 +1,933 @@
-(base) [root@localhost ~]# docker exec finetune-trainer bash -c 'cat /proc/$(pgrep -f "remote_train" | head -1)/fd/1 2>/dev/null | tail -20'
-  File "/opt/conda/lib/python3.10/site-packages/bitsandbytes/cextension.py", line 320, in <module>
-    lib = get_native_library()
-  File "/opt/conda/lib/python3.10/site-packages/bitsandbytes/cextension.py", line 288, in get_native_library
-    raise RuntimeError(f"Configured {BNB_BACKEND} binary not found at {cuda_binary_path}")
-RuntimeError: Configured CUDA binary not found at /opt/conda/lib/python3.10/site-packages/bitsandbytes/libbitsandbytes_cuda116.so
-[transformers] warmup_ratio is deprecated and will be removed in v5.2. Use `warmup_steps` instead.
-/root/Fine-tuning/backend/app/engines/text_engine.py:310: TRLExperimentalWarning: You are importing from 'trl.experimental'. APIs here are unstable and may change or be removed without notice. Silence this warning by setting environment variable TRL_EXPERIMENTAL_SILENCE=1.
-  from trl.experimental.ppo import PPOConfig, PPOTrainer
-[transformers] `torch_dtype` is deprecated! Use `dtype` instead!
-trainable params: 5,070,848 || all params: 757,463,872 || trainable%: 0.6695
-Loading weights: 100%|██████████| 473/473 [00:06<00:00, 72.87it/s] 
-[transformers] Qwen3_5ForSequenceClassification LOAD REPORT from: /root/Fine-tuning/backend/data/models/Qwen_Qwen3.5-0.8B
-Key          | Status  | 
--------------+---------+-
-score.weight | MISSING | 
+lq@lq:~/Fine-tuning$ sudo docker logs -f finetune-backend
+[sudo] password for lq: 
+=> Syncing backend code to compute node 192.168.91.253 ...
+Warning: Permanently added '192.168.91.253' (ED25519) to the list of known hosts.
+sending incremental file list
+.dockerignore
+.env.docker
+.env.example
+.python-version
+Dockerfile
+entrypoint.sh
+main.py
+pyproject.toml
+requirements.txt
+app/__init__.py
+app/config.py
+app/__pycache__/__init__.cpython-310.pyc
+app/__pycache__/config.cpython-310.pyc
+app/api/__init__.py
+app/api/annotation_platform.py
+app/api/api_keys.py
+app/api/auth.py
+app/api/datasets.py
+app/api/deployment.py
+app/api/evaluation.py
+app/api/inference.py
+app/api/models.py
+app/api/training.py
+app/api/__pycache__/__init__.cpython-310.pyc
+app/api/__pycache__/api_keys.cpython-310.pyc
+app/api/__pycache__/auth.cpython-310.pyc
+app/api/__pycache__/datasets.cpython-310.pyc
+app/api/__pycache__/deployment.cpython-310.pyc
+app/api/__pycache__/evaluation.cpython-310.pyc
+app/api/__pycache__/inference.cpython-310.pyc
+app/api/__pycache__/models.cpython-310.pyc
+app/api/__pycache__/sample_center.cpython-310.pyc
+app/api/__pycache__/training.cpython-310.pyc
+app/core/__init__.py
+app/core/auth.py
+app/core/background_tasks.py
+app/core/db.py
+app/core/deploy_server_template.py
+app/core/inference_worker.py
+app/core/job_queue.py
+app/core/logging.py
+app/core/remote_deploy.py
+app/core/remote_eval.py
+app/core/remote_executor.py
+app/core/security.py
+app/core/sso_client.py
+app/core/websocket.py
+app/core/__pycache__/__init__.cpython-310.pyc
+app/core/__pycache__/auth.cpython-310.pyc
+app/core/__pycache__/background_tasks.cpython-310.pyc
+app/core/__pycache__/db.cpython-310.pyc
+app/core/__pycache__/job_queue.cpython-310.pyc
+app/core/__pycache__/logging.cpython-310.pyc
+app/core/__pycache__/remote_deploy.cpython-310.pyc
+app/core/__pycache__/remote_eval.cpython-310.pyc
+app/core/__pycache__/remote_executor.cpython-310.pyc
+app/core/__pycache__/security.cpython-310.pyc
+app/core/__pycache__/sso_client.cpython-310.pyc
+app/core/__pycache__/websocket.cpython-310.pyc
+app/engines/
+app/engines/__init__.py
+app/engines/__main__.py
+app/engines/base.py
+app/engines/multimodal_engine.py
+app/engines/remote_train.py
+app/engines/text_engine.py
+app/engines/vision_engine.py
+app/engines/__pycache__/__init__.cpython-310.pyc
+app/engines/__pycache__/base.cpython-310.pyc
+app/engines/__pycache__/remote_train.cpython-310.pyc
+app/engines/__pycache__/text_engine.cpython-310.pyc
+app/peft/__init__.py
+app/peft/__pycache__/__init__.cpython-310.pyc
+app/preprocessors/
+app/preprocessors/__init__.py
+app/preprocessors/__pycache__/
+app/preprocessors/__pycache__/__init__.cpython-310.pyc
+app/schemas/__init__.py
+app/schemas/annotation_platform.py
+app/schemas/background_task.py
+app/schemas/common.py
+app/schemas/dataset.py
+app/schemas/deployment.py
+app/schemas/evaluation.py
+app/schemas/model.py
+app/schemas/model_test.py
+app/schemas/training.py
+app/schemas/__pycache__/__init__.cpython-310.pyc
+app/schemas/__pycache__/background_task.cpython-310.pyc
+app/schemas/__pycache__/common.cpython-310.pyc
+app/schemas/__pycache__/dataset.cpython-310.pyc
+app/schemas/__pycache__/deployment.cpython-310.pyc
+app/schemas/__pycache__/evaluation.cpython-310.pyc
+app/schemas/__pycache__/model.cpython-310.pyc
+app/schemas/__pycache__/model_test.cpython-310.pyc
+app/schemas/__pycache__/sample_center.cpython-310.pyc
+app/schemas/__pycache__/training.cpython-310.pyc
+app/services/annotation_platform_service.py
+app/services/api_key_service.py
+app/services/dataset_service.py
+app/services/deploy_service.py
+app/services/eval_service.py
+app/services/inference_service.py
+app/services/model_service.py
+app/services/model_test_service.py
+app/services/training_service.py
+app/services/__pycache__/api_key_service.cpython-310.pyc
+app/services/__pycache__/dataset_service.cpython-310.pyc
+app/services/__pycache__/deploy_service.cpython-310.pyc
+app/services/__pycache__/eval_service.cpython-310.pyc
+app/services/__pycache__/inference_service.cpython-310.pyc
+app/services/__pycache__/model_service.cpython-310.pyc
+app/services/__pycache__/model_test_service.cpython-310.pyc
+app/services/__pycache__/sample_center_service.cpython-310.pyc
+app/services/__pycache__/training_service.cpython-310.pyc
 
-Notes:
-- MISSING:	those params were newly initialized because missing from the checkpoint. Consider training on your downstream task.
-===training policy===
-  0%|          | 0/1 [00:00<?, ?it/s](base) [root@localhost ~]# docker exec finetune-trainer ps aux | grep python | grep -v defunct | grep -v grep
-root       31506  104  2.0 197646012 10808312 ?  Rsl  11:31  19:50 /opt/conda/bin/python -m app.engines.remote_train 0b822b5e-105d-4d1e-ad51-7217a2d63c29 Qwen/Qwen3.5-0.8B text /root/Fine-tuning/backend/data/datasets/ppo_sample.jsonl /root/Fine-tuning/backend/data/config_0b822b5e-105d-4d1e-ad51-7217a2d63c29.json
-root       31639  1.5  1.6 17399148 8921280 ?    Sl   11:31   0:17 /opt/conda/bin/python /opt/conda/lib/python3.10/site-packages/torch/_inductor/compile_worker/__main__.py --pickler=torch._inductor.compile_worker.subproc_pool.SubprocPickler --kind=fork --workers=32 --parent=31506 --read-fd=10 --write-fd=13 --torch-key=kdnYoFpyXJfmeFh07c0N00WVSuau0TZN11yUZqCrSHo=
-root       31725  0.0  0.9 17102096 4842292 ?    Sl   11:31   0:00 /opt/conda/bin/python /opt/conda/lib/python3.10/site-packages/torch/_inductor/compile_worker/__main__.py --pickler=torch._inductor.compile_worker.subproc_pool.SubprocPickler --kind=fork --workers=32 --parent=31506 --read-fd=10 --write-fd=13 --torch-key=kdnYoFpyXJfmeFh07c0N00WVSuau0TZN11yUZqCrSHo=
-root       31727  0.0  0.9 17102096 4842164 ?    Sl   11:31   0:00 /opt/conda/bin/python /opt/conda/lib/python3.10/site-packages/torch/_inductor/compile_worker/__main__.py --pickler=torch._inductor.compile_worker.subproc_pool.SubprocPickler --kind=fork --workers=32 --parent=31506 --read-fd=10 --write-fd=13 --torch-key=kdnYoFpyXJfmeFh07c0N00WVSuau0TZN11yUZqCrSHo=
-root       31729  0.0  0.9 17102096 4841516 ?    Sl   11:31   0:00 /opt/conda/bin/python /opt/conda/lib/python3.10/site-packages/torch/_inductor/compile_worker/__main__.py --pickler=torch._inductor.compile_worker.subproc_pool.SubprocPickler --kind=fork --workers=32 --parent=31506 --read-fd=10 --write-fd=13 --torch-key=kdnYoFpyXJfmeFh07c0N00WVSuau0TZN11yUZqCrSHo=
-root       31731  0.0  0.9 17102096 4841516 ?    Sl   11:31   0:00 /opt/conda/bin/python /opt/conda/lib/python3.10/site-packages/torch/_inductor/compile_worker/__main__.py --pickler=torch._inductor.compile_worker.subproc_pool.SubprocPickler --kind=fork --workers=32 --parent=31506 --read-fd=10 --write-fd=13 --torch-key=kdnYoFpyXJfmeFh07c0N00WVSuau0TZN11yUZqCrSHo=
-root       31733  0.0  0.9 17102096 4841516 ?    Sl   11:31   0:00 /opt/conda/bin/python /opt/conda/lib/python3.10/site-packages/torch/_inductor/compile_worker/__main__.py --pickler=torch._inductor.compile_worker.subproc_pool.SubprocPickler --kind=fork --workers=32 --parent=31506 --read-fd=10 --write-fd=13 --torch-key=kdnYoFpyXJfmeFh07c0N00WVSuau0TZN11yUZqCrSHo=
-root       31735  0.0  0.9 17102096 4841528 ?    Sl   11:31   0:00 /opt/conda/bin/python /opt/conda/lib/python3.10/site-packages/torch/_inductor/compile_worker/__main__.py --pickler=torch._inductor.compile_worker.subproc_pool.SubprocPickler --kind=fork --workers=32 --parent=31506 --read-fd=10 --write-fd=13 --torch-key=kdnYoFpyXJfmeFh07c0N00WVSuau0TZN11yUZqCrSHo=
-root       31737  0.0  0.9 17102096 4841528 ?    Sl   11:31   0:00 /opt/conda/bin/python /opt/conda/lib/python3.10/site-packages/torch/_inductor/compile_worker/__main__.py --pickler=torch._inductor.compile_worker.subproc_pool.SubprocPickler --kind=fork --workers=32 --parent=31506 --read-fd=10 --write-fd=13 --torch-key=kdnYoFpyXJfmeFh07c0N00WVSuau0TZN11yUZqCrSHo=
-root       31739  0.0  0.9 17102096 4841528 ?    Sl   11:31   0:00 /opt/conda/bin/python /opt/conda/lib/python3.10/site-packages/torch/_inductor/compile_worker/__main__.py --pickler=torch._inductor.compile_worker.subproc_pool.SubprocPickler --kind=fork --workers=32 --parent=31506 --read-fd=10 --write-fd=13 --torch-key=kdnYoFpyXJfmeFh07c0N00WVSuau0TZN11yUZqCrSHo=
-root       31741  0.0  0.9 17102096 4841528 ?    Sl   11:31   0:00 /opt/conda/bin/python /opt/conda/lib/python3.10/site-packages/torch/_inductor/compile_worker/__main__.py --pickler=torch._inductor.compile_worker.subproc_pool.SubprocPickler --kind=fork --workers=32 --parent=31506 --read-fd=10 --write-fd=13 --torch-key=kdnYoFpyXJfmeFh07c0N00WVSuau0TZN11yUZqCrSHo=
-root       31743  0.0  0.9 17102096 4841532 ?    Sl   11:31   0:00 /opt/conda/bin/python /opt/conda/lib/python3.10/site-packages/torch/_inductor/compile_worker/__main__.py --pickler=torch._inductor.compile_worker.subproc_pool.SubprocPickler --kind=fork --workers=32 --parent=31506 --read-fd=10 --write-fd=13 --torch-key=kdnYoFpyXJfmeFh07c0N00WVSuau0TZN11yUZqCrSHo=
-root       31745  0.0  0.9 17102096 4841532 ?    Sl   11:31   0:00 /opt/conda/bin/python /opt/conda/lib/python3.10/site-packages/torch/_inductor/compile_worker/__main__.py --pickler=torch._inductor.compile_worker.subproc_pool.SubprocPickler --kind=fork --workers=32 --parent=31506 --read-fd=10 --write-fd=13 --torch-key=kdnYoFpyXJfmeFh07c0N00WVSuau0TZN11yUZqCrSHo=
-root       31747  0.0  0.9 17102096 4841532 ?    Sl   11:31   0:00 /opt/conda/bin/python /opt/conda/lib/python3.10/site-packages/torch/_inductor/compile_worker/__main__.py --pickler=torch._inductor.compile_worker.subproc_pool.SubprocPickler --kind=fork --workers=32 --parent=31506 --read-fd=10 --write-fd=13 --torch-key=kdnYoFpyXJfmeFh07c0N00WVSuau0TZN11yUZqCrSHo=
-root       31749  0.0  0.9 17102096 4841532 ?    Sl   11:31   0:00 /opt/conda/bin/python /opt/conda/lib/python3.10/site-packages/torch/_inductor/compile_worker/__main__.py --pickler=torch._inductor.compile_worker.subproc_pool.SubprocPickler --kind=fork --workers=32 --parent=31506 --read-fd=10 --write-fd=13 --torch-key=kdnYoFpyXJfmeFh07c0N00WVSuau0TZN11yUZqCrSHo=
-root       31751  0.0  0.9 17102096 4841532 ?    Sl   11:31   0:00 /opt/conda/bin/python /opt/conda/lib/python3.10/site-packages/torch/_inductor/compile_worker/__main__.py --pickler=torch._inductor.compile_worker.subproc_pool.SubprocPickler --kind=fork --workers=32 --parent=31506 --read-fd=10 --write-fd=13 --torch-key=kdnYoFpyXJfmeFh07c0N00WVSuau0TZN11yUZqCrSHo=
-root       31753  0.0  0.9 17102096 4841532 ?    Sl   11:31   0:00 /opt/conda/bin/python /opt/conda/lib/python3.10/site-packages/torch/_inductor/compile_worker/__main__.py --pickler=torch._inductor.compile_worker.subproc_pool.SubprocPickler --kind=fork --workers=32 --parent=31506 --read-fd=10 --write-fd=13 --torch-key=kdnYoFpyXJfmeFh07c0N00WVSuau0TZN11yUZqCrSHo=
-root       31755  0.0  0.9 17102096 4841532 ?    Sl   11:31   0:00 /opt/conda/bin/python /opt/conda/lib/python3.10/site-packages/torch/_inductor/compile_worker/__main__.py --pickler=torch._inductor.compile_worker.subproc_pool.SubprocPickler --kind=fork --workers=32 --parent=31506 --read-fd=10 --write-fd=13 --torch-key=kdnYoFpyXJfmeFh07c0N00WVSuau0TZN11yUZqCrSHo=
-root       31757  0.0  0.9 17102096 4841536 ?    Sl   11:31   0:00 /opt/conda/bin/python /opt/conda/lib/python3.10/site-packages/torch/_inductor/compile_worker/__main__.py --pickler=torch._inductor.compile_worker.subproc_pool.SubprocPickler --kind=fork --workers=32 --parent=31506 --read-fd=10 --write-fd=13 --torch-key=kdnYoFpyXJfmeFh07c0N00WVSuau0TZN11yUZqCrSHo=
-root       31759  0.0  0.9 17102096 4841536 ?    Sl   11:31   0:00 /opt/conda/bin/python /opt/conda/lib/python3.10/site-packages/torch/_inductor/compile_worker/__main__.py --pickler=torch._inductor.compile_worker.subproc_pool.SubprocPickler --kind=fork --workers=32 --parent=31506 --read-fd=10 --write-fd=13 --torch-key=kdnYoFpyXJfmeFh07c0N00WVSuau0TZN11yUZqCrSHo=
-root       31761  0.0  0.9 17102096 4841540 ?    Sl   11:31   0:00 /opt/conda/bin/python /opt/conda/lib/python3.10/site-packages/torch/_inductor/compile_worker/__main__.py --pickler=torch._inductor.compile_worker.subproc_pool.SubprocPickler --kind=fork --workers=32 --parent=31506 --read-fd=10 --write-fd=13 --torch-key=kdnYoFpyXJfmeFh07c0N00WVSuau0TZN11yUZqCrSHo=
-root       31763  0.0  0.9 17102096 4841540 ?    Sl   11:31   0:00 /opt/conda/bin/python /opt/conda/lib/python3.10/site-packages/torch/_inductor/compile_worker/__main__.py --pickler=torch._inductor.compile_worker.subproc_pool.SubprocPickler --kind=fork --workers=32 --parent=31506 --read-fd=10 --write-fd=13 --torch-key=kdnYoFpyXJfmeFh07c0N00WVSuau0TZN11yUZqCrSHo=
-root       31765  0.0  0.9 17102096 4841544 ?    Sl   11:31   0:00 /opt/conda/bin/python /opt/conda/lib/python3.10/site-packages/torch/_inductor/compile_worker/__main__.py --pickler=torch._inductor.compile_worker.subproc_pool.SubprocPickler --kind=fork --workers=32 --parent=31506 --read-fd=10 --write-fd=13 --torch-key=kdnYoFpyXJfmeFh07c0N00WVSuau0TZN11yUZqCrSHo=
-root       31767  0.0  0.9 17102096 4841544 ?    Sl   11:31   0:00 /opt/conda/bin/python /opt/conda/lib/python3.10/site-packages/torch/_inductor/compile_worker/__main__.py --pickler=torch._inductor.compile_worker.subproc_pool.SubprocPickler --kind=fork --workers=32 --parent=31506 --read-fd=10 --write-fd=13 --torch-key=kdnYoFpyXJfmeFh07c0N00WVSuau0TZN11yUZqCrSHo=
-root       31769  0.0  0.9 17102096 4841544 ?    Sl   11:31   0:00 /opt/conda/bin/python /opt/conda/lib/python3.10/site-packages/torch/_inductor/compile_worker/__main__.py --pickler=torch._inductor.compile_worker.subproc_pool.SubprocPickler --kind=fork --workers=32 --parent=31506 --read-fd=10 --write-fd=13 --torch-key=kdnYoFpyXJfmeFh07c0N00WVSuau0TZN11yUZqCrSHo=
-root       31771  0.0  0.9 17102096 4841548 ?    Sl   11:31   0:00 /opt/conda/bin/python /opt/conda/lib/python3.10/site-packages/torch/_inductor/compile_worker/__main__.py --pickler=torch._inductor.compile_worker.subproc_pool.SubprocPickler --kind=fork --workers=32 --parent=31506 --read-fd=10 --write-fd=13 --torch-key=kdnYoFpyXJfmeFh07c0N00WVSuau0TZN11yUZqCrSHo=
-root       31773  0.0  0.9 17102096 4841548 ?    Sl   11:31   0:00 /opt/conda/bin/python /opt/conda/lib/python3.10/site-packages/torch/_inductor/compile_worker/__main__.py --pickler=torch._inductor.compile_worker.subproc_pool.SubprocPickler --kind=fork --workers=32 --parent=31506 --read-fd=10 --write-fd=13 --torch-key=kdnYoFpyXJfmeFh07c0N00WVSuau0TZN11yUZqCrSHo=
-root       31778  0.0  0.9 17102096 4841548 ?    Sl   11:31   0:00 /opt/conda/bin/python /opt/conda/lib/python3.10/site-packages/torch/_inductor/compile_worker/__main__.py --pickler=torch._inductor.compile_worker.subproc_pool.SubprocPickler --kind=fork --workers=32 --parent=31506 --read-fd=10 --write-fd=13 --torch-key=kdnYoFpyXJfmeFh07c0N00WVSuau0TZN11yUZqCrSHo=
-root       31780  0.0  0.9 17102096 4841884 ?    Sl   11:31   0:00 /opt/conda/bin/python /opt/conda/lib/python3.10/site-packages/torch/_inductor/compile_worker/__main__.py --pickler=torch._inductor.compile_worker.subproc_pool.SubprocPickler --kind=fork --workers=32 --parent=31506 --read-fd=10 --write-fd=13 --torch-key=kdnYoFpyXJfmeFh07c0N00WVSuau0TZN11yUZqCrSHo=
-root       31782  0.0  0.9 17102096 4841884 ?    Sl   11:31   0:00 /opt/conda/bin/python /opt/conda/lib/python3.10/site-packages/torch/_inductor/compile_worker/__main__.py --pickler=torch._inductor.compile_worker.subproc_pool.SubprocPickler --kind=fork --workers=32 --parent=31506 --read-fd=10 --write-fd=13 --torch-key=kdnYoFpyXJfmeFh07c0N00WVSuau0TZN11yUZqCrSHo=
-root       31784  0.0  0.9 17102096 4841888 ?    Sl   11:31   0:00 /opt/conda/bin/python /opt/conda/lib/python3.10/site-packages/torch/_inductor/compile_worker/__main__.py --pickler=torch._inductor.compile_worker.subproc_pool.SubprocPickler --kind=fork --workers=32 --parent=31506 --read-fd=10 --write-fd=13 --torch-key=kdnYoFpyXJfmeFh07c0N00WVSuau0TZN11yUZqCrSHo=
-root       31849  0.0  0.9 17102096 4841888 ?    Sl   11:31   0:00 /opt/conda/bin/python /opt/conda/lib/python3.10/site-packages/torch/_inductor/compile_worker/__main__.py --pickler=torch._inductor.compile_worker.subproc_pool.SubprocPickler --kind=fork --workers=32 --parent=31506 --read-fd=10 --write-fd=13 --torch-key=kdnYoFpyXJfmeFh07c0N00WVSuau0TZN11yUZqCrSHo=
-root       31851  0.0  0.9 17102096 4841496 ?    Sl   11:31   0:00 /opt/conda/bin/python /opt/conda/lib/python3.10/site-packages/torch/_inductor/compile_worker/__main__.py --pickler=torch._inductor.compile_worker.subproc_pool.SubprocPickler --kind=fork --workers=32 --parent=31506 --read-fd=10 --write-fd=13 --torch-key=kdnYoFpyXJfmeFh07c0N00WVSuau0TZN11yUZqCrSHo=
-root       31853  0.0  0.9 17102096 4841496 ?    Sl   11:31   0:00 /opt/conda/bin/python /opt/conda/lib/python3.10/site-packages/torch/_inductor/compile_worker/__main__.py --pickler=torch._inductor.compile_worker.subproc_pool.SubprocPickler --kind=fork --workers=32 --parent=31506 --read-fd=10 --write-fd=13 --torch-key=kdnYoFpyXJfmeFh07c0N00WVSuau0TZN11yUZqCrSHo=
-(base) [root@localhost ~]# docker exec finetune-trainer mx-smi | grep python
-|  3                    31506         python                       4580           |
-(base) [root@localhost ~]# docker exec finetune-trainer tail -f /tmp/train_*.log 2>/dev/null | head -50
-(base) [root@localhost ~]# docker exec finetune-trainer top -b -n 1 | grep python | head -5
-  31506 root      20   0  188.5g  10.3g   4.0g R 100.0   2.0  20:20.44 python
-     29 root      20   0       0      0      0 Z   0.0   0.0   2:36.28 python
-    161 root      20   0       0      0      0 Z   0.0   0.0   0:17.39 python
-    499 root      20   0       0      0      0 Z   0.0   0.0   0:00.22 python
-    501 root      20   0       0      0      0 Z   0.0   0.0   0:00.29 python
-(base) [root@localhost ~]# docker exec finetune-trainer mx-smi | grep -E "MiB|python"
-| 52W / 225W       | 42C          P9 | 60459/65536 MiB     | Available            |
-| 49W / 225W       | 41C          P9 | 60459/65536 MiB     | Available            |
-| 53W / 225W       | 44C          P9 | 29988/65536 MiB     | Available            |
-| 51W / 225W       | 42C          P9 | 5248/65536 MiB      | Available            |
-|                                                                  Usage(MiB)     |
-|  3                    31506         python                       4580           |
-(base) [root@localhost ~]# 
+sent 8,297 bytes  received 7,247 bytes  840.22 bytes/sec
+total size is 551,554  speedup is 35.48
+=> Sync done.
+INFO:     Started server process [1]
+INFO:     Waiting for application startup.
+2026-05-27 03:56:31 | INFO     | peft-platform | JobQueue started with 2 workers
+INFO:     Application startup complete.
+INFO:     Uvicorn running on http://0.0.0.0:8010 (Press CTRL+C to quit)
+INFO:     127.0.0.1:59336 - "GET /health HTTP/1.1" 200 OK
+INFO:     172.20.0.4:39822 - "GET /api/v1/training/jobs HTTP/1.0" 200 OK
+INFO:     172.20.0.4:50448 - "GET /api/v1/datasets/ HTTP/1.0" 200 OK
+INFO:     172.20.0.4:50450 - "GET /api/v1/models/ HTTP/1.0" 200 OK
+INFO:     172.20.0.4:50454 - "GET /api/v1/training/jobs HTTP/1.0" 200 OK
+INFO:     172.20.0.4:50460 - "GET /api/v1/models/ HTTP/1.0" 200 OK
+INFO:     172.20.0.4:50484 - "GET /api/v1/datasets/ HTTP/1.0" 200 OK
+INFO:     172.20.0.4:50476 - "GET /api/v1/training/jobs HTTP/1.0" 200 OK
+INFO:     127.0.0.1:39340 - "GET /health HTTP/1.1" 200 OK
+2026-05-27 03:57:10 | INFO     | peft-platform | Training job 832c0256-2308-4b07-b8de-fcf9ce100c4c: num_gpus=1, batch_size=16
+2026-05-27 03:57:10 | INFO     | peft-platform | Job 832c0256-2308-4b07-b8de-fcf9ce100c4c enqueued
+2026-05-27 03:57:10 | INFO     | peft-platform | Training job created: 832c0256-2308-4b07-b8de-fcf9ce100c4c
+INFO:     172.20.0.4:50492 - "POST /api/v1/training/jobs HTTP/1.0" 200 OK
+INFO:     172.20.0.4:50498 - "GET /api/v1/training/jobs HTTP/1.0" 200 OK
+INFO:     172.20.0.4:50508 - "GET /api/v1/models/ HTTP/1.0" 200 OK
+INFO:     172.20.0.4:50502 - "GET /api/v1/datasets/ HTTP/1.0" 200 OK
+2026-05-27 03:57:10 | INFO     | app.engines.text_engine | Preprocessed 8 samples for ppo/alpaca
+INFO:     172.20.0.4:50512 - "GET /api/v1/training/jobs HTTP/1.0" 200 OK
+INFO:     172.20.0.4:50526 - "WebSocket /ws/training/832c0256-2308-4b07-b8de-fcf9ce100c4c?token=eyJhbGciOiJIUzI1NiIsInR5cCI6IkpXVCJ9.eyJzdWIiOiJhZjgyN2IxZC0wM2IxLTQwZGMtOTliMC1jOGRjYTEzNWEwNmUiLCJ1c2VybmFtZSI6InN1cGVyX2FkbWluIiwicm9sZXMiOlsic3VwZXJfYWRtaW4iXSwiZXhwIjoxNzc5ODU1MTMzLCJpYXQiOjE3Nzk4NTM5MzMsInR5cGUiOiJhY2Nlc3MifQ.DXc1xT1Oh5fiC6yaO53kZ2V9UOirKS2yZCTKnxrMBBA" [accepted]
+2026-05-27 03:57:10 | INFO     | peft-platform | 客户端已连接到训练 WebSocket (job 832c0256-2308-4b07-b8de-fcf9ce100c4c)
+INFO:     connection open
+INFO:     172.20.0.4:60796 - "GET /api/v1/training/jobs HTTP/1.0" 200 OK
+INFO:     172.20.0.4:60808 - "GET /api/v1/training/jobs HTTP/1.0" 200 OK
+INFO:     172.20.0.4:40164 - "GET /api/v1/training/jobs HTTP/1.0" 200 OK
+2026-05-27 03:57:28 | INFO     | peft-platform | Remote cleanup result: true
+no python processes
+2026-05-27 03:58:21 | INFO     | peft-platform | Created remote dataset directory: /root/Fine-tuning/backend/data/datasets
+2026-05-27 03:58:21 | INFO     | peft-platform | Uploading dataset file: /root/Fine-tuning/backend/data/uploads/ppo_sample.jsonl -> /root/Fine-tuning/backend/data/datasets/ppo_sample.jsonl
+2026-05-27 03:58:39 | INFO     | peft-platform | Dataset uploaded successfully: /root/Fine-tuning/backend/data/datasets/ppo_sample.jsonl
+2026-05-27 03:59:14 | INFO     | peft-platform | Remote training launched in container: job=832c0256-2308-4b07-b8de-fcf9ce100c4c, container_pid=35
+INFO:     127.0.0.1:35058 - "GET /health HTTP/1.1" 200 OK
+INFO:     127.0.0.1:52056 - "GET /health HTTP/1.1" 200 OK
+INFO:     127.0.0.1:37292 - "GET /health HTTP/1.1" 200 OK
+INFO:     172.20.0.4:40168 - "GET /api/v1/training/jobs HTTP/1.0" 200 OK
+INFO:     172.20.0.4:51214 - "GET /api/v1/training/jobs HTTP/1.0" 200 OK
+INFO:     172.20.0.4:38860 - "GET /api/v1/training/jobs HTTP/1.0" 200 OK
+INFO:     172.20.0.4:51202 - "GET /api/v1/training/jobs HTTP/1.0" 200 OK
+INFO:     172.20.0.4:36072 - "GET /api/v1/training/jobs HTTP/1.0" 200 OK
+INFO:     172.20.0.4:38866 - "GET /api/v1/training/jobs HTTP/1.0" 200 OK
+INFO:     172.20.0.4:44930 - "GET /api/v1/training/jobs HTTP/1.0" 200 OK
+INFO:     172.20.0.4:44938 - "GET /api/v1/training/jobs HTTP/1.0" 200 OK
+INFO:     172.20.0.4:44966 - "GET /api/v1/training/jobs HTTP/1.0" 200 OK
+INFO:     172.20.0.4:44954 - "GET /api/v1/training/jobs HTTP/1.0" 200 OK
+INFO:     127.0.0.1:53854 - "GET /health HTTP/1.1" 200 OK
+INFO:     127.0.0.1:51240 - "GET /health HTTP/1.1" 200 OK
+INFO:     172.20.0.4:47368 - "GET /api/v1/training/jobs HTTP/1.0" 200 OK
+INFO:     172.20.0.4:42234 - "GET /api/v1/training/jobs HTTP/1.0" 200 OK
+INFO:     172.20.0.4:36516 - "GET /api/v1/training/jobs HTTP/1.0" 200 OK
+INFO:     127.0.0.1:39910 - "GET /health HTTP/1.1" 200 OK
+INFO:     172.20.0.4:36532 - "GET /api/v1/training/jobs HTTP/1.0" 200 OK
+2026-05-27 04:00:43 | INFO     | peft-platform | [253:832c0256] [remote_train] fla package found at: /opt/conda/lib/python3.10/site-packages/fla
+2026-05-27 04:00:43 | INFO     | peft-platform | [253:832c0256] [remote_train] fla shared memory patch v2 already applied, skipping
+2026-05-27 04:00:43 | INFO     | peft-platform | [253:832c0256] [remote_train] [rank 0] === Training job started: 832c0256-2308-4b07-b8de-fcf9ce100c4c ===
+2026-05-27 04:00:43 | INFO     | peft-platform | [253:832c0256] [remote_train] model_id=Qwen/Qwen3.5-0.8B, model_type=text
+2026-05-27 04:00:43 | INFO     | peft-platform | [253:832c0256] [remote_train] dataset_path=/root/Fine-tuning/backend/data/datasets/ppo_sample.jsonl
+2026-05-27 04:00:43 | INFO     | peft-platform | [253:832c0256] [remote_train] config={"model_id": "Qwen/Qwen3.5-0.8B", "model_type": "text", "dataset_id": "26767f82-673c-4199-8c59-e9ed715f0ae0", "peft_method": "lora", "epochs": 3, "batch_size": 16, "gradient_accumulation": 4, "learnin
+2026-05-27 04:00:43 | INFO     | peft-platform | [253:832c0256] [remote_train] Step 1: Preprocessing dataset...
+2026-05-27 04:00:43 | INFO     | peft-platform | [253:832c0256] [remote_train]   task_type=ppo, template=auto
+2026-05-27 04:00:43 | INFO     | peft-platform | [253:832c0256] [remote_train]   Engine loaded: TextEngine
+2026-05-27 04:00:43 | INFO     | peft-platform | [253:832c0256] [remote_train]   Running preprocess_dataset...
+2026-05-27 04:00:43 | INFO     | peft-platform | [253:832c0256] [remote_train]   Preprocessing done, output: /root/Fine-tuning/backend/data/processed/832c0256-2308-4b07-b8de-fcf9ce100c4c_processed.jsonl
+2026-05-27 04:00:43 | INFO     | peft-platform | [253:832c0256] [remote_train] Step 2: Loading model: Qwen/Qwen3.5-0.8B...
+2026-05-27 04:00:43 | ERROR    | peft-platform | [253:832c0256] Current Triton version 3.0.0 is below the recommended 3.2.0 version. Errors may occur and these issues will not be fixed. Please consider upgrading Triton.
+2026-05-27 04:00:43 | INFO     | peft-platform | [253:832c0256] Current Python version 3.10 is below the recommended 3.11 version. It is recommended to upgrade to Python 3.11 or higher for the best experience.
+2026-05-27 04:00:43 | INFO     | peft-platform | [253:832c0256] torch.compile is not available in Python 3.10, using identity decorator instead
+2026-05-27 04:00:43 | WARNING  | peft-platform | [253:832c0256] /opt/conda/lib/python3.10/site-packages/torchvision/datapoints/__init__.py:12: UserWarning: The torchvision.datapoints and torchvision.transforms.v2 namespaces are still Beta. While we do not expect major breaking changes, some APIs may still change according to user feedback. Please submit any feedback you may have in this issue: https://github.com/pytorch/vision/issues/6753, and you can also check out https://github.com/pytorch/vision/issues/7319 to learn more about the APIs that we suspect might involve future changes. You can silence this warning by calling torchvision.disable_beta_transforms_warning().
+2026-05-27 04:00:43 | INFO     | peft-platform | [253:832c0256] warnings.warn(_BETA_TRANSFORMS_WARNING)
+2026-05-27 04:00:43 | WARNING  | peft-platform | [253:832c0256] /opt/conda/lib/python3.10/site-packages/torchvision/transforms/v2/__init__.py:54: UserWarning: The torchvision.datapoints and torchvision.transforms.v2 namespaces are still Beta. While we do not expect major breaking changes, some APIs may still change according to user feedback. Please submit any feedback you may have in this issue: https://github.com/pytorch/vision/issues/6753, and you can also check out https://github.com/pytorch/vision/issues/7319 to learn more about the APIs that we suspect might involve future changes. You can silence this warning by calling torchvision.disable_beta_transforms_warning().
+2026-05-27 04:00:43 | INFO     | peft-platform | [253:832c0256] warnings.warn(_BETA_TRANSFORMS_WARNING)
+2026-05-27 04:00:43 | INFO     | peft-platform | [253:832c0256] Loading weights:   0%|          | 0/320 [00:00<?, ?it/s]
+2026-05-27 04:00:43 | INFO     | peft-platform | [253:832c0256] Loading weights:   0%|          | 1/320 [00:02<11:49,  2.22s/it]
+2026-05-27 04:00:43 | INFO     | peft-platform | [253:832c0256] Loading weights:   3%|▎         | 11/320 [00:02<00:50,  6.10it/s]
+2026-05-27 04:00:43 | INFO     | peft-platform | [253:832c0256] Loading weights:   8%|▊         | 25/320 [00:02<00:19, 15.16it/s]
+2026-05-27 04:00:43 | INFO     | peft-platform | [253:832c0256] Loading weights:  12%|█▏        | 39/320 [00:02<00:10, 26.00it/s]
+2026-05-27 04:00:43 | INFO     | peft-platform | [253:832c0256] Loading weights:  15%|█▍        | 47/320 [00:02<00:09, 29.92it/s]
+2026-05-27 04:00:43 | INFO     | peft-platform | [253:832c0256] Loading weights:  19%|█▉        | 60/320 [00:03<00:06, 40.35it/s]
+2026-05-27 04:00:43 | INFO     | peft-platform | [253:832c0256] Loading weights:  21%|██▏       | 68/320 [00:03<00:05, 44.90it/s]
+2026-05-27 04:00:43 | INFO     | peft-platform | [253:832c0256] Loading weights:  24%|██▍       | 76/320 [00:03<00:04, 48.89it/s]
+2026-05-27 04:00:43 | INFO     | peft-platform | [253:832c0256] Loading weights:  27%|██▋       | 85/320 [00:03<00:04, 55.88it/s]
+2026-05-27 04:00:43 | INFO     | peft-platform | [253:832c0256] Loading weights:  29%|██▉       | 94/320 [00:03<00:03, 59.30it/s]
+2026-05-27 04:00:43 | INFO     | peft-platform | [253:832c0256] Loading weights:  32%|███▏      | 102/320 [00:03<00:03, 61.67it/s]
+2026-05-27 04:00:43 | INFO     | peft-platform | [253:832c0256] Loading weights:  36%|███▌      | 114/320 [00:03<00:02, 71.31it/s]
+2026-05-27 04:00:43 | INFO     | peft-platform | [253:832c0256] Loading weights:  40%|████      | 128/320 [00:03<00:02, 74.91it/s]
+2026-05-27 04:00:43 | INFO     | peft-platform | [253:832c0256] Loading weights:  44%|████▍     | 140/320 [00:04<00:02, 74.80it/s]
+2026-05-27 04:00:43 | INFO     | peft-platform | [253:832c0256] Loading weights:  46%|████▋     | 148/320 [00:04<00:02, 68.41it/s]
+2026-05-27 04:00:43 | INFO     | peft-platform | [253:832c0256] Loading weights:  49%|████▉     | 156/320 [00:04<00:02, 66.74it/s]
+2026-05-27 04:00:43 | INFO     | peft-platform | [253:832c0256] Loading weights:  53%|█████▎    | 170/320 [00:04<00:01, 78.63it/s]
+2026-05-27 04:00:43 | INFO     | peft-platform | [253:832c0256] Loading weights:  57%|█████▋    | 181/320 [00:04<00:01, 77.34it/s]
+2026-05-27 04:00:43 | INFO     | peft-platform | [253:832c0256] Loading weights:  61%|██████    | 195/320 [00:04<00:01, 76.82it/s]
+2026-05-27 04:00:43 | INFO     | peft-platform | [253:832c0256] Loading weights:  64%|██████▍   | 204/320 [00:05<00:01, 66.62it/s]
+2026-05-27 04:00:43 | INFO     | peft-platform | [253:832c0256] Loading weights:  69%|██████▉   | 220/320 [00:05<00:01, 73.99it/s]
+2026-05-27 04:00:43 | INFO     | peft-platform | [253:832c0256] Loading weights:  72%|███████▏  | 231/320 [00:05<00:01, 70.69it/s]
+2026-05-27 04:00:43 | INFO     | peft-platform | [253:832c0256] Loading weights:  75%|███████▍  | 239/320 [00:05<00:01, 68.94it/s]
+2026-05-27 04:00:43 | INFO     | peft-platform | [253:832c0256] Loading weights:  78%|███████▊  | 250/320 [00:05<00:00, 73.80it/s]
+2026-05-27 04:00:43 | INFO     | peft-platform | [253:832c0256] Loading weights:  81%|████████  | 258/320 [00:05<00:00, 66.56it/s]
+2026-05-27 04:00:43 | INFO     | peft-platform | [253:832c0256] Loading weights:  85%|████████▌ | 273/320 [00:05<00:00, 75.72it/s]
+2026-05-27 04:00:43 | INFO     | peft-platform | [253:832c0256] Loading weights:  90%|████████▉ | 287/320 [00:06<00:00, 77.58it/s]
+2026-05-27 04:00:43 | INFO     | peft-platform | [253:832c0256] Loading weights:  92%|█████████▎| 296/320 [00:06<00:00, 71.51it/s]
+2026-05-27 04:00:43 | INFO     | peft-platform | [253:832c0256] Loading weights:  95%|█████████▌| 304/320 [00:06<00:00, 72.41it/s]
+2026-05-27 04:00:43 | INFO     | peft-platform | [253:832c0256] Loading weights:  98%|█████████▊| 312/320 [00:06<00:00, 73.52it/s]
+2026-05-27 04:00:43 | INFO     | peft-platform | [253:832c0256] Loading weights: 100%|██████████| 320/320 [00:06<00:00, 48.81it/s]
+2026-05-27 04:00:43 | INFO     | peft-platform | [253:832c0256] [remote_train]   Model loaded successfully
+2026-05-27 04:00:43 | INFO     | peft-platform | [253:832c0256] [remote_train] Step 3: Building PEFT config...
+2026-05-27 04:00:43 | INFO     | peft-platform | [253:832c0256] [remote_train] Step 4: Starting training...
+2026-05-27 04:00:43 | INFO     | peft-platform | [253:832c0256] [remote_train] NOTE: First step may take 2-5 minutes due to Triton kernel compilation (autotuning). This is normal.
+2026-05-27 04:00:43 | INFO     | peft-platform | [253:832c0256] [remote_train] Total steps: 3 epochs, batch_size per GPU=16
+2026-05-27 04:00:43 | WARNING  | peft-platform | [253:832c0256] /opt/conda/lib/python3.10/site-packages/peft/tuners/tuners_utils.py:1348: UserWarning: Model has `tie_word_embeddings=True` and a tied layer is part of the adapter, but `ensure_weight_tying` is not set to True. This can lead to complications, for example when merging the adapter or converting your model to formats other than safetensors. Check the discussion here: https://github.com/huggingface/peft/issues/2777
+2026-05-27 04:00:43 | INFO     | peft-platform | [253:832c0256] warnings.warn(msg)
+2026-05-27 04:00:43 | INFO     | peft-platform | [253:832c0256] bitsandbytes library load error: Configured CUDA binary not found at /opt/conda/lib/python3.10/site-packages/bitsandbytes/libbitsandbytes_cuda116.so
+2026-05-27 04:00:43 | INFO     | peft-platform | [253:832c0256] Traceback (most recent call last):
+2026-05-27 04:00:43 | INFO     | peft-platform | [253:832c0256] File "/opt/conda/lib/python3.10/site-packages/bitsandbytes/cextension.py", line 320, in <module>
+2026-05-27 04:00:43 | INFO     | peft-platform | [253:832c0256] lib = get_native_library()
+2026-05-27 04:00:43 | INFO     | peft-platform | [253:832c0256] File "/opt/conda/lib/python3.10/site-packages/bitsandbytes/cextension.py", line 288, in get_native_library
+2026-05-27 04:00:43 | ERROR    | peft-platform | [253:832c0256] raise RuntimeError(f"Configured {BNB_BACKEND} binary not found at {cuda_binary_path}")
+2026-05-27 04:00:43 | ERROR    | peft-platform | [253:832c0256] RuntimeError: Configured CUDA binary not found at /opt/conda/lib/python3.10/site-packages/bitsandbytes/libbitsandbytes_cuda116.so
+2026-05-27 04:00:43 | WARNING  | peft-platform | [253:832c0256] [transformers] warmup_ratio is deprecated and will be removed in v5.2. Use `warmup_steps` instead.
+2026-05-27 04:00:43 | WARNING  | peft-platform | [253:832c0256] /root/Fine-tuning/backend/app/engines/text_engine.py:310: TRLExperimentalWarning: You are importing from 'trl.experimental'. APIs here are unstable and may change or be removed without notice. Silence this warning by setting environment variable TRL_EXPERIMENTAL_SILENCE=1.
+2026-05-27 04:00:43 | INFO     | peft-platform | [253:832c0256] from trl.experimental.ppo import PPOConfig, PPOTrainer
+2026-05-27 04:00:43 | WARNING  | peft-platform | [253:832c0256] [transformers] `torch_dtype` is deprecated! Use `dtype` instead!
+2026-05-27 04:00:43 | INFO     | peft-platform | [253:832c0256] trainable params: 5,070,848 || all params: 757,463,872 || trainable%: 0.6695
+2026-05-27 04:00:43 | INFO     | peft-platform | [253:832c0256] Loading weights:   0%|          | 0/473 [00:00<?, ?it/s]
+2026-05-27 04:00:43 | INFO     | peft-platform | [253:832c0256] Loading weights:   0%|          | 1/473 [00:02<18:43,  2.38s/it]
+2026-05-27 04:00:43 | INFO     | peft-platform | [253:832c0256] Loading weights:  13%|█▎        | 61/473 [00:02<00:12, 33.88it/s]
+2026-05-27 04:00:43 | INFO     | peft-platform | [253:832c0256] Loading weights:  19%|█▉        | 90/473 [00:02<00:08, 43.66it/s]
+2026-05-27 04:00:43 | INFO     | peft-platform | [253:832c0256] Loading weights:  23%|██▎       | 109/473 [00:03<00:07, 50.60it/s]
+2026-05-27 04:00:43 | INFO     | peft-platform | [253:832c0256] Loading weights:  26%|██▌       | 124/473 [00:03<00:06, 57.31it/s]
+2026-05-27 04:00:43 | INFO     | peft-platform | [253:832c0256] Loading weights:  29%|██▉       | 138/473 [00:03<00:05, 61.75it/s]
+2026-05-27 04:00:43 | INFO     | peft-platform | [253:832c0256] Loading weights:  32%|███▏      | 150/473 [00:03<00:04, 65.28it/s]
+2026-05-27 04:00:43 | INFO     | peft-platform | [253:832c0256] Loading weights:  34%|███▍      | 161/473 [00:03<00:04, 65.78it/s]
+2026-05-27 04:00:43 | INFO     | peft-platform | [253:832c0256] Loading weights:  37%|███▋      | 173/473 [00:03<00:04, 66.40it/s]
+2026-05-27 04:00:43 | INFO     | peft-platform | [253:832c0256] Loading weights:  39%|███▉      | 185/473 [00:04<00:04, 65.54it/s]
+2026-05-27 04:00:43 | INFO     | peft-platform | [253:832c0256] Loading weights:  42%|████▏     | 201/473 [00:04<00:03, 80.80it/s]
+2026-05-27 04:00:43 | INFO     | peft-platform | [253:832c0256] Loading weights:  45%|████▍     | 211/473 [00:04<00:03, 83.19it/s]
+2026-05-27 04:00:43 | INFO     | peft-platform | [253:832c0256] Loading weights:  47%|████▋     | 221/473 [00:04<00:03, 83.62it/s]
+2026-05-27 04:00:43 | INFO     | peft-platform | [253:832c0256] Loading weights:  49%|████▉     | 231/473 [00:04<00:02, 82.36it/s]
+2026-05-27 04:00:43 | INFO     | peft-platform | [253:832c0256] Loading weights:  51%|█████     | 240/473 [00:04<00:02, 79.58it/s]
+2026-05-27 04:00:43 | INFO     | peft-platform | [253:832c0256] Loading weights:  53%|█████▎    | 249/473 [00:04<00:02, 75.17it/s]
+2026-05-27 04:00:43 | INFO     | peft-platform | [253:832c0256] Loading weights:  54%|█████▍    | 257/473 [00:04<00:02, 72.68it/s]
+2026-05-27 04:00:43 | INFO     | peft-platform | [253:832c0256] Loading weights:  58%|█████▊    | 273/473 [00:05<00:02, 88.06it/s]
+2026-05-27 04:00:43 | INFO     | peft-platform | [253:832c0256] Loading weights:  60%|█████▉    | 282/473 [00:05<00:02, 78.78it/s]
+2026-05-27 04:00:43 | INFO     | peft-platform | [253:832c0256] Loading weights:  62%|██████▏   | 291/473 [00:05<00:02, 72.26it/s]
+2026-05-27 04:00:43 | INFO     | peft-platform | [253:832c0256] Loading weights:  65%|██████▍   | 306/473 [00:05<00:02, 76.36it/s]
+2026-05-27 04:00:43 | INFO     | peft-platform | [253:832c0256] Loading weights:  67%|██████▋   | 318/473 [00:05<00:01, 84.04it/s]
+2026-05-27 04:00:43 | INFO     | peft-platform | [253:832c0256] Loading weights:  72%|███████▏  | 340/473 [00:05<00:01, 114.61it/s]
+2026-05-27 04:00:43 | INFO     | peft-platform | [253:832c0256] Loading weights:  77%|███████▋  | 364/473 [00:05<00:00, 138.25it/s]
+2026-05-27 04:00:43 | INFO     | peft-platform | [253:832c0256] Loading weights:  81%|████████  | 384/473 [00:05<00:00, 153.55it/s]
+2026-05-27 04:00:43 | INFO     | peft-platform | [253:832c0256] Loading weights:  85%|████████▍ | 401/473 [00:06<00:00, 157.16it/s]
+2026-05-27 04:00:43 | INFO     | peft-platform | [253:832c0256] Loading weights:  90%|████████▉ | 424/473 [00:06<00:00, 167.82it/s]
+2026-05-27 04:00:43 | INFO     | peft-platform | [253:832c0256] Loading weights:  94%|█████████▍| 445/473 [00:06<00:00, 172.18it/s]
+2026-05-27 04:00:43 | INFO     | peft-platform | [253:832c0256] Loading weights:  98%|█████████▊| 463/473 [00:06<00:00, 160.24it/s]
+2026-05-27 04:00:43 | INFO     | peft-platform | [253:832c0256] Loading weights: 100%|██████████| 473/473 [00:06<00:00, 72.56it/s]
+2026-05-27 04:00:43 | WARNING  | peft-platform | [253:832c0256] [transformers] Qwen3_5ForSequenceClassification LOAD REPORT from: /root/Fine-tuning/backend/data/models/Qwen_Qwen3.5-0.8B
+2026-05-27 04:00:43 | INFO     | peft-platform | [253:832c0256] Key          | Status  |
+2026-05-27 04:00:43 | INFO     | peft-platform | [253:832c0256] -------------+---------+-
+2026-05-27 04:00:43 | INFO     | peft-platform | [253:832c0256] score.weight | MISSING |
+2026-05-27 04:00:43 | INFO     | peft-platform | [253:832c0256] Notes:
+2026-05-27 04:00:43 | INFO     | peft-platform | [253:832c0256] - MISSING:	those params were newly initialized because missing from the checkpoint. Consider training on your downstream task.
+2026-05-27 04:00:43 | INFO     | peft-platform | [253:832c0256] ===training policy===
+2026-05-27 04:00:43 | INFO     | peft-platform | [253:832c0256] 0%|          | 0/1 [00:00<?, ?it/s]
+INFO:     172.20.0.4:37512 - "GET /api/v1/training/jobs HTTP/1.0" 200 OK
+INFO:     172.20.0.4:37520 - "GET /api/v1/training/jobs HTTP/1.0" 200 OK
+INFO:     172.20.0.4:42038 - "GET /api/v1/training/jobs HTTP/1.0" 200 OK
+INFO:     172.20.0.4:42054 - "GET /api/v1/training/jobs HTTP/1.0" 200 OK
+INFO:     172.20.0.4:54404 - "GET /api/v1/training/jobs HTTP/1.0" 200 OK
+INFO:     127.0.0.1:41564 - "GET /health HTTP/1.1" 200 OK
+INFO:     172.20.0.4:54420 - "GET /api/v1/training/jobs HTTP/1.0" 200 OK
+INFO:     172.20.0.4:51366 - "GET /api/v1/training/jobs HTTP/1.0" 200 OK
+INFO:     172.20.0.4:51374 - "GET /api/v1/training/jobs HTTP/1.0" 200 OK
+INFO:     172.20.0.4:51372 - "GET /api/v1/training/jobs HTTP/1.0" 200 OK
+INFO:     172.20.0.4:51384 - "GET /api/v1/training/jobs HTTP/1.0" 200 OK
+INFO:     172.20.0.4:42896 - "GET /api/v1/training/jobs HTTP/1.0" 200 OK
+INFO:     127.0.0.1:37624 - "GET /health HTTP/1.1" 200 OK
+INFO:     172.20.0.4:42908 - "GET /api/v1/training/jobs HTTP/1.0" 200 OK
+INFO:     172.20.0.4:46132 - "GET /api/v1/training/jobs HTTP/1.0" 200 OK
+INFO:     172.20.0.4:46148 - "GET /api/v1/training/jobs HTTP/1.0" 200 OK
+INFO:     172.20.0.4:50088 - "GET /api/v1/training/jobs HTTP/1.0" 200 OK
+INFO:     172.20.0.4:50092 - "GET /api/v1/training/jobs HTTP/1.0" 200 OK
+INFO:     172.20.0.4:57084 - "GET /api/v1/training/jobs HTTP/1.0" 200 OK
+INFO:     127.0.0.1:50570 - "GET /health HTTP/1.1" 200 OK
+INFO:     172.20.0.4:57086 - "GET /api/v1/training/jobs HTTP/1.0" 200 OK
+INFO:     172.20.0.4:47750 - "GET /api/v1/training/jobs HTTP/1.0" 200 OK
+INFO:     172.20.0.4:47756 - "GET /api/v1/training/jobs HTTP/1.0" 200 OK
+INFO:     172.20.0.4:36394 - "GET /api/v1/training/jobs HTTP/1.0" 200 OK
+INFO:     172.20.0.4:36402 - "GET /api/v1/training/jobs HTTP/1.0" 200 OK
+INFO:     172.20.0.4:57050 - "GET /api/v1/training/jobs HTTP/1.0" 200 OK
+INFO:     127.0.0.1:35558 - "GET /health HTTP/1.1" 200 OK
+INFO:     172.20.0.4:57060 - "GET /api/v1/training/jobs HTTP/1.0" 200 OK
+INFO:     172.20.0.4:39816 - "GET /api/v1/training/jobs HTTP/1.0" 200 OK
+INFO:     172.20.0.4:39818 - "GET /api/v1/training/jobs HTTP/1.0" 200 OK
+INFO:     172.20.0.4:47380 - "GET /api/v1/training/jobs HTTP/1.0" 200 OK
+INFO:     172.20.0.4:47396 - "GET /api/v1/training/jobs HTTP/1.0" 200 OK
+INFO:     172.20.0.4:51768 - "GET /api/v1/training/jobs HTTP/1.0" 200 OK
+INFO:     127.0.0.1:55474 - "GET /health HTTP/1.1" 200 OK
+INFO:     172.20.0.4:51778 - "GET /api/v1/training/jobs HTTP/1.0" 200 OK
+INFO:     172.20.0.4:44678 - "GET /api/v1/training/jobs HTTP/1.0" 200 OK
+INFO:     172.20.0.4:44686 - "GET /api/v1/training/jobs HTTP/1.0" 200 OK
+INFO:     172.20.0.4:58596 - "GET /api/v1/training/jobs HTTP/1.0" 200 OK
+INFO:     172.20.0.4:58598 - "GET /api/v1/training/jobs HTTP/1.0" 200 OK
+INFO:     172.20.0.4:43102 - "GET /api/v1/training/jobs HTTP/1.0" 200 OK
+INFO:     127.0.0.1:41496 - "GET /health HTTP/1.1" 200 OK
+INFO:     172.20.0.4:43116 - "GET /api/v1/training/jobs HTTP/1.0" 200 OK
+INFO:     172.20.0.4:46762 - "GET /api/v1/training/jobs HTTP/1.0" 200 OK
+INFO:     172.20.0.4:46774 - "GET /api/v1/training/jobs HTTP/1.0" 200 OK
+INFO:     172.20.0.4:33864 - "GET /api/v1/training/jobs HTTP/1.0" 200 OK
+INFO:     172.20.0.4:33876 - "GET /api/v1/training/jobs HTTP/1.0" 200 OK
+INFO:     172.20.0.4:45352 - "GET /api/v1/training/jobs HTTP/1.0" 200 OK
+INFO:     127.0.0.1:34392 - "GET /health HTTP/1.1" 200 OK
+INFO:     172.20.0.4:45362 - "GET /api/v1/training/jobs HTTP/1.0" 200 OK
+INFO:     172.20.0.4:56984 - "GET /api/v1/training/jobs HTTP/1.0" 200 OK
+INFO:     172.20.0.4:56988 - "GET /api/v1/training/jobs HTTP/1.0" 200 OK
+INFO:     172.20.0.4:33320 - "GET /api/v1/training/jobs HTTP/1.0" 200 OK
+INFO:     172.20.0.4:33322 - "GET /api/v1/training/jobs HTTP/1.0" 200 OK
+INFO:     172.20.0.4:47140 - "GET /api/v1/training/jobs HTTP/1.0" 200 OK
+INFO:     127.0.0.1:48598 - "GET /health HTTP/1.1" 200 OK
+INFO:     172.20.0.4:47146 - "GET /api/v1/training/jobs HTTP/1.0" 200 OK
+INFO:     172.20.0.4:51682 - "GET /api/v1/training/jobs HTTP/1.0" 200 OK
+INFO:     172.20.0.4:51694 - "GET /api/v1/training/jobs HTTP/1.0" 200 OK
+INFO:     172.20.0.4:44054 - "GET /api/v1/training/jobs HTTP/1.0" 200 OK
+INFO:     172.20.0.4:44056 - "GET /api/v1/training/jobs HTTP/1.0" 200 OK
+INFO:     172.20.0.4:45734 - "GET /api/v1/training/jobs HTTP/1.0" 200 OK
+INFO:     127.0.0.1:45368 - "GET /health HTTP/1.1" 200 OK
+INFO:     172.20.0.4:45738 - "GET /api/v1/training/jobs HTTP/1.0" 200 OK
+INFO:     172.20.0.4:52000 - "GET /api/v1/training/jobs HTTP/1.0" 200 OK
+INFO:     172.20.0.4:52014 - "GET /api/v1/training/jobs HTTP/1.0" 200 OK
+INFO:     172.20.0.4:32816 - "GET /api/v1/training/jobs HTTP/1.0" 200 OK
+INFO:     172.20.0.4:32830 - "GET /api/v1/training/jobs HTTP/1.0" 200 OK
+INFO:     172.20.0.4:48976 - "GET /api/v1/training/jobs HTTP/1.0" 200 OK
+INFO:     127.0.0.1:35024 - "GET /health HTTP/1.1" 200 OK
+INFO:     172.20.0.4:48992 - "GET /api/v1/training/jobs HTTP/1.0" 200 OK
+INFO:     172.20.0.4:40444 - "GET /api/v1/training/jobs HTTP/1.0" 200 OK
+INFO:     172.20.0.4:40460 - "GET /api/v1/training/jobs HTTP/1.0" 200 OK
+INFO:     172.20.0.4:51448 - "GET /api/v1/training/jobs HTTP/1.0" 200 OK
+INFO:     172.20.0.4:51454 - "GET /api/v1/training/jobs HTTP/1.0" 200 OK
+INFO:     172.20.0.4:45958 - "GET /api/v1/training/jobs HTTP/1.0" 200 OK
+INFO:     127.0.0.1:48908 - "GET /health HTTP/1.1" 200 OK
+INFO:     172.20.0.4:45970 - "GET /api/v1/training/jobs HTTP/1.0" 200 OK
+INFO:     172.20.0.4:52794 - "GET /api/v1/training/jobs HTTP/1.0" 200 OK
+INFO:     172.20.0.4:52806 - "GET /api/v1/training/jobs HTTP/1.0" 200 OK
+INFO:     172.20.0.4:44520 - "GET /api/v1/training/jobs HTTP/1.0" 200 OK
+INFO:     172.20.0.4:44526 - "GET /api/v1/training/jobs HTTP/1.0" 200 OK
+INFO:     172.20.0.4:34254 - "GET /api/v1/training/jobs HTTP/1.0" 200 OK
+INFO:     127.0.0.1:56500 - "GET /health HTTP/1.1" 200 OK
+INFO:     172.20.0.4:34264 - "GET /api/v1/training/jobs HTTP/1.0" 200 OK
+INFO:     172.20.0.4:49998 - "GET /api/v1/training/jobs HTTP/1.0" 200 OK
+INFO:     172.20.0.4:50012 - "GET /api/v1/training/jobs HTTP/1.0" 200 OK
+INFO:     172.20.0.4:43788 - "GET /api/v1/training/jobs HTTP/1.0" 200 OK
+INFO:     172.20.0.4:43804 - "GET /api/v1/training/jobs HTTP/1.0" 200 OK
+INFO:     172.20.0.4:42614 - "GET /api/v1/training/jobs HTTP/1.0" 200 OK
+INFO:     127.0.0.1:45998 - "GET /health HTTP/1.1" 200 OK
+INFO:     172.20.0.4:42622 - "GET /api/v1/training/jobs HTTP/1.0" 200 OK
+INFO:     172.20.0.4:32992 - "GET /api/v1/training/jobs HTTP/1.0" 200 OK
+INFO:     172.20.0.4:33008 - "GET /api/v1/training/jobs HTTP/1.0" 200 OK
+INFO:     172.20.0.4:34494 - "GET /api/v1/training/jobs HTTP/1.0" 200 OK
+INFO:     172.20.0.4:34496 - "GET /api/v1/training/jobs HTTP/1.0" 200 OK
+INFO:     172.20.0.4:36302 - "GET /api/v1/training/jobs HTTP/1.0" 200 OK
+INFO:     127.0.0.1:55948 - "GET /health HTTP/1.1" 200 OK
+INFO:     172.20.0.4:36318 - "GET /api/v1/training/jobs HTTP/1.0" 200 OK
+INFO:     172.20.0.4:36992 - "GET /api/v1/training/jobs HTTP/1.0" 200 OK
+INFO:     172.20.0.4:37004 - "GET /api/v1/training/jobs HTTP/1.0" 200 OK
+INFO:     172.20.0.4:49380 - "GET /api/v1/training/jobs HTTP/1.0" 200 OK
+INFO:     172.20.0.4:49390 - "GET /api/v1/training/jobs HTTP/1.0" 200 OK
+INFO:     172.20.0.4:39108 - "GET /api/v1/training/jobs HTTP/1.0" 200 OK
+INFO:     127.0.0.1:49258 - "GET /health HTTP/1.1" 200 OK
+INFO:     172.20.0.4:39124 - "GET /api/v1/training/jobs HTTP/1.0" 200 OK
+INFO:     172.20.0.4:46778 - "GET /api/v1/training/jobs HTTP/1.0" 200 OK
+INFO:     172.20.0.4:46788 - "GET /api/v1/training/jobs HTTP/1.0" 200 OK
+INFO:     172.20.0.4:51712 - "GET /api/v1/training/jobs HTTP/1.0" 200 OK
+INFO:     172.20.0.4:51728 - "GET /api/v1/training/jobs HTTP/1.0" 200 OK
+INFO:     172.20.0.4:41918 - "GET /api/v1/training/jobs HTTP/1.0" 200 OK
+INFO:     127.0.0.1:46566 - "GET /health HTTP/1.1" 200 OK
+INFO:     172.20.0.4:41920 - "GET /api/v1/training/jobs HTTP/1.0" 200 OK
+INFO:     172.20.0.4:60542 - "GET /api/v1/training/jobs HTTP/1.0" 200 OK
+INFO:     172.20.0.4:60544 - "GET /api/v1/training/jobs HTTP/1.0" 200 OK
+INFO:     172.20.0.4:56672 - "GET /api/v1/training/jobs HTTP/1.0" 200 OK
+INFO:     172.20.0.4:56676 - "GET /api/v1/training/jobs HTTP/1.0" 200 OK
+INFO:     172.20.0.4:43858 - "GET /api/v1/training/jobs HTTP/1.0" 200 OK
+INFO:     127.0.0.1:56708 - "GET /health HTTP/1.1" 200 OK
+INFO:     172.20.0.4:43862 - "GET /api/v1/training/jobs HTTP/1.0" 200 OK
+INFO:     172.20.0.4:44278 - "GET /api/v1/training/jobs HTTP/1.0" 200 OK
+INFO:     172.20.0.4:44286 - "GET /api/v1/training/jobs HTTP/1.0" 200 OK
+INFO:     172.20.0.4:35148 - "GET /api/v1/training/jobs HTTP/1.0" 200 OK
+INFO:     172.20.0.4:35150 - "GET /api/v1/training/jobs HTTP/1.0" 200 OK
+INFO:     172.20.0.4:45034 - "GET /api/v1/training/jobs HTTP/1.0" 200 OK
+INFO:     127.0.0.1:46310 - "GET /health HTTP/1.1" 200 OK
+INFO:     172.20.0.4:45044 - "GET /api/v1/training/jobs HTTP/1.0" 200 OK
+INFO:     172.20.0.4:52406 - "GET /api/v1/training/jobs HTTP/1.0" 200 OK
+INFO:     172.20.0.4:52416 - "GET /api/v1/training/jobs HTTP/1.0" 200 OK
+INFO:     172.20.0.4:47640 - "GET /api/v1/training/jobs HTTP/1.0" 200 OK
+INFO:     172.20.0.4:47654 - "GET /api/v1/training/jobs HTTP/1.0" 200 OK
+INFO:     172.20.0.4:49252 - "GET /api/v1/training/jobs HTTP/1.0" 200 OK
+INFO:     172.20.0.4:49262 - "GET /api/v1/training/jobs HTTP/1.0" 200 OK
+INFO:     127.0.0.1:39458 - "GET /health HTTP/1.1" 200 OK
+INFO:     172.20.0.4:39362 - "GET /api/v1/training/jobs HTTP/1.0" 200 OK
+INFO:     172.20.0.4:39364 - "GET /api/v1/training/jobs HTTP/1.0" 200 OK
+INFO:     172.20.0.4:54350 - "GET /api/v1/training/jobs HTTP/1.0" 200 OK
+INFO:     172.20.0.4:54358 - "GET /api/v1/training/jobs HTTP/1.0" 200 OK
+INFO:     172.20.0.4:49916 - "GET /api/v1/training/jobs HTTP/1.0" 200 OK
+INFO:     172.20.0.4:49928 - "GET /api/v1/training/jobs HTTP/1.0" 200 OK
+INFO:     127.0.0.1:40136 - "GET /health HTTP/1.1" 200 OK
+INFO:     172.20.0.4:59388 - "GET /api/v1/training/jobs HTTP/1.0" 200 OK
+INFO:     172.20.0.4:59400 - "GET /api/v1/training/jobs HTTP/1.0" 200 OK
+INFO:     172.20.0.4:46028 - "GET /api/v1/training/jobs HTTP/1.0" 200 OK
+INFO:     172.20.0.4:46044 - "GET /api/v1/training/jobs HTTP/1.0" 200 OK
+INFO:     172.20.0.4:55242 - "GET /api/v1/training/jobs HTTP/1.0" 200 OK
+INFO:     172.20.0.4:55248 - "GET /api/v1/training/jobs HTTP/1.0" 200 OK
+INFO:     127.0.0.1:42708 - "GET /health HTTP/1.1" 200 OK
+INFO:     172.20.0.4:40012 - "GET /api/v1/training/jobs HTTP/1.0" 200 OK
+INFO:     172.20.0.4:40020 - "GET /api/v1/training/jobs HTTP/1.0" 200 OK
+INFO:     172.20.0.4:41514 - "GET /api/v1/training/jobs HTTP/1.0" 200 OK
+INFO:     172.20.0.4:41520 - "GET /api/v1/training/jobs HTTP/1.0" 200 OK
+INFO:     172.20.0.4:57280 - "GET /api/v1/training/jobs HTTP/1.0" 200 OK
+INFO:     172.20.0.4:57294 - "GET /api/v1/training/jobs HTTP/1.0" 200 OK
+INFO:     127.0.0.1:36632 - "GET /health HTTP/1.1" 200 OK
+INFO:     172.20.0.4:57230 - "GET /api/v1/training/jobs HTTP/1.0" 200 OK
+INFO:     172.20.0.4:57238 - "GET /api/v1/training/jobs HTTP/1.0" 200 OK
+INFO:     172.20.0.4:50354 - "GET /api/v1/training/jobs HTTP/1.0" 200 OK
+INFO:     172.20.0.4:50358 - "GET /api/v1/training/jobs HTTP/1.0" 200 OK
+INFO:     172.20.0.4:41850 - "GET /api/v1/training/jobs HTTP/1.0" 200 OK
+INFO:     172.20.0.4:41856 - "GET /api/v1/training/jobs HTTP/1.0" 200 OK
+INFO:     127.0.0.1:46602 - "GET /health HTTP/1.1" 200 OK
+INFO:     172.20.0.4:47208 - "GET /api/v1/training/jobs HTTP/1.0" 401 Unauthorized
+INFO:     172.20.0.4:47218 - "POST /api/v1/auth/refresh HTTP/1.0" 200 OK
+INFO:     172.20.0.4:47224 - "GET /api/v1/training/jobs HTTP/1.0" 200 OK
+INFO:     172.20.0.4:47238 - "GET /api/v1/training/jobs HTTP/1.0" 200 OK
+INFO:     172.20.0.4:57082 - "GET /api/v1/training/jobs HTTP/1.0" 200 OK
+INFO:     172.20.0.4:57098 - "GET /api/v1/training/jobs HTTP/1.0" 200 OK
+INFO:     172.20.0.4:45620 - "GET /api/v1/training/jobs HTTP/1.0" 200 OK
+INFO:     172.20.0.4:45624 - "GET /api/v1/training/jobs HTTP/1.0" 200 OK
+INFO:     127.0.0.1:34198 - "GET /health HTTP/1.1" 200 OK
+INFO:     172.20.0.4:57446 - "GET /api/v1/training/jobs HTTP/1.0" 200 OK
+INFO:     172.20.0.4:57458 - "GET /api/v1/training/jobs HTTP/1.0" 200 OK
+INFO:     172.20.0.4:49174 - "GET /api/v1/training/jobs HTTP/1.0" 200 OK
+INFO:     172.20.0.4:49190 - "GET /api/v1/training/jobs HTTP/1.0" 200 OK
+INFO:     172.20.0.4:52050 - "GET /api/v1/training/jobs HTTP/1.0" 200 OK
+INFO:     172.20.0.4:52066 - "GET /api/v1/training/jobs HTTP/1.0" 200 OK
+INFO:     127.0.0.1:52774 - "GET /health HTTP/1.1" 200 OK
+INFO:     172.20.0.4:49346 - "GET /api/v1/training/jobs HTTP/1.0" 200 OK
+INFO:     172.20.0.4:49350 - "GET /api/v1/training/jobs HTTP/1.0" 200 OK
+INFO:     172.20.0.4:33160 - "GET /api/v1/training/jobs HTTP/1.0" 200 OK
+INFO:     172.20.0.4:33168 - "GET /api/v1/training/jobs HTTP/1.0" 200 OK
+INFO:     172.20.0.4:59190 - "GET /api/v1/training/jobs HTTP/1.0" 200 OK
+INFO:     172.20.0.4:59202 - "GET /api/v1/training/jobs HTTP/1.0" 200 OK
+INFO:     127.0.0.1:37008 - "GET /health HTTP/1.1" 200 OK
+INFO:     172.20.0.4:37412 - "GET /api/v1/training/jobs HTTP/1.0" 200 OK
+INFO:     172.20.0.4:37422 - "GET /api/v1/training/jobs HTTP/1.0" 200 OK
+INFO:     172.20.0.4:39052 - "GET /api/v1/training/jobs HTTP/1.0" 200 OK
+INFO:     172.20.0.4:39054 - "GET /api/v1/training/jobs HTTP/1.0" 200 OK
+INFO:     172.20.0.4:49278 - "GET /api/v1/training/jobs HTTP/1.0" 200 OK
+INFO:     172.20.0.4:49290 - "GET /api/v1/training/jobs HTTP/1.0" 200 OK
+INFO:     127.0.0.1:52540 - "GET /health HTTP/1.1" 200 OK
+INFO:     172.20.0.4:37202 - "GET /api/v1/training/jobs HTTP/1.0" 200 OK
+INFO:     172.20.0.4:37218 - "GET /api/v1/training/jobs HTTP/1.0" 200 OK
+INFO:     172.20.0.4:55074 - "GET /api/v1/training/jobs HTTP/1.0" 200 OK
+INFO:     172.20.0.4:55078 - "GET /api/v1/training/jobs HTTP/1.0" 200 OK
+INFO:     172.20.0.4:38416 - "GET /api/v1/training/jobs HTTP/1.0" 200 OK
+INFO:     172.20.0.4:38420 - "GET /api/v1/training/jobs HTTP/1.0" 200 OK
+INFO:     127.0.0.1:47526 - "GET /health HTTP/1.1" 200 OK
+INFO:     172.20.0.4:45110 - "GET /api/v1/training/jobs HTTP/1.0" 200 OK
+INFO:     172.20.0.4:45112 - "GET /api/v1/training/jobs HTTP/1.0" 200 OK
+INFO:     172.20.0.4:51394 - "GET /api/v1/training/jobs HTTP/1.0" 200 OK
+INFO:     172.20.0.4:51400 - "GET /api/v1/training/jobs HTTP/1.0" 200 OK
+INFO:     172.20.0.4:56356 - "GET /api/v1/training/jobs HTTP/1.0" 200 OK
+INFO:     172.20.0.4:56358 - "GET /api/v1/training/jobs HTTP/1.0" 200 OK
+INFO:     127.0.0.1:41518 - "GET /health HTTP/1.1" 200 OK
+INFO:     172.20.0.4:43898 - "GET /api/v1/training/jobs HTTP/1.0" 200 OK
+INFO:     172.20.0.4:43912 - "GET /api/v1/training/jobs HTTP/1.0" 200 OK
+INFO:     172.20.0.4:35290 - "GET /api/v1/training/jobs HTTP/1.0" 200 OK
+INFO:     172.20.0.4:35300 - "GET /api/v1/training/jobs HTTP/1.0" 200 OK
+INFO:     172.20.0.4:43752 - "GET /api/v1/training/jobs HTTP/1.0" 200 OK
+INFO:     172.20.0.4:43764 - "GET /api/v1/training/jobs HTTP/1.0" 200 OK
+INFO:     127.0.0.1:49308 - "GET /health HTTP/1.1" 200 OK
+INFO:     172.20.0.4:34942 - "GET /api/v1/training/jobs HTTP/1.0" 200 OK
+INFO:     172.20.0.4:34944 - "GET /api/v1/training/jobs HTTP/1.0" 200 OK
+INFO:     172.20.0.4:58696 - "GET /api/v1/training/jobs HTTP/1.0" 200 OK
+INFO:     172.20.0.4:58710 - "GET /api/v1/training/jobs HTTP/1.0" 200 OK
+INFO:     172.20.0.4:42680 - "GET /api/v1/training/jobs HTTP/1.0" 200 OK
+INFO:     172.20.0.4:42690 - "GET /api/v1/training/jobs HTTP/1.0" 200 OK
+INFO:     127.0.0.1:59014 - "GET /health HTTP/1.1" 200 OK
+INFO:     172.20.0.4:54416 - "GET /api/v1/training/jobs HTTP/1.0" 200 OK
+INFO:     172.20.0.4:54420 - "GET /api/v1/training/jobs HTTP/1.0" 200 OK
+INFO:     172.20.0.4:48850 - "GET /api/v1/training/jobs HTTP/1.0" 200 OK
+INFO:     172.20.0.4:48866 - "GET /api/v1/training/jobs HTTP/1.0" 200 OK
+INFO:     172.20.0.4:33402 - "GET /api/v1/training/jobs HTTP/1.0" 200 OK
+INFO:     172.20.0.4:33406 - "GET /api/v1/training/jobs HTTP/1.0" 200 OK
+INFO:     127.0.0.1:59302 - "GET /health HTTP/1.1" 200 OK
+INFO:     172.20.0.4:44464 - "GET /api/v1/training/jobs HTTP/1.0" 200 OK
+INFO:     172.20.0.4:44474 - "GET /api/v1/training/jobs HTTP/1.0" 200 OK
+INFO:     172.20.0.4:43808 - "GET /api/v1/training/jobs HTTP/1.0" 200 OK
+INFO:     172.20.0.4:43822 - "GET /api/v1/training/jobs HTTP/1.0" 200 OK
+INFO:     172.20.0.4:47046 - "GET /api/v1/training/jobs HTTP/1.0" 200 OK
+INFO:     172.20.0.4:47050 - "GET /api/v1/training/jobs HTTP/1.0" 200 OK
+INFO:     127.0.0.1:48880 - "GET /health HTTP/1.1" 200 OK
+INFO:     172.20.0.4:36028 - "GET /api/v1/training/jobs HTTP/1.0" 200 OK
+INFO:     172.20.0.4:36030 - "GET /api/v1/training/jobs HTTP/1.0" 200 OK
+INFO:     172.20.0.4:58314 - "GET /api/v1/training/jobs HTTP/1.0" 200 OK
+INFO:     172.20.0.4:58324 - "GET /api/v1/training/jobs HTTP/1.0" 200 OK
+INFO:     172.20.0.4:37080 - "GET /api/v1/training/jobs HTTP/1.0" 200 OK
+INFO:     172.20.0.4:37092 - "GET /api/v1/training/jobs HTTP/1.0" 200 OK
+INFO:     127.0.0.1:46852 - "GET /health HTTP/1.1" 200 OK
+INFO:     172.20.0.4:60992 - "GET /api/v1/training/jobs HTTP/1.0" 200 OK
+INFO:     172.20.0.4:60998 - "GET /api/v1/training/jobs HTTP/1.0" 200 OK
+INFO:     172.20.0.4:39630 - "GET /api/v1/training/jobs HTTP/1.0" 200 OK
+INFO:     172.20.0.4:39642 - "GET /api/v1/training/jobs HTTP/1.0" 200 OK
+INFO:     172.20.0.4:36476 - "GET /api/v1/training/jobs HTTP/1.0" 200 OK
+INFO:     172.20.0.4:36488 - "GET /api/v1/training/jobs HTTP/1.0" 200 OK
+INFO:     127.0.0.1:53522 - "GET /health HTTP/1.1" 200 OK
+INFO:     172.20.0.4:51504 - "GET /api/v1/training/jobs HTTP/1.0" 200 OK
+INFO:     172.20.0.4:51516 - "GET /api/v1/training/jobs HTTP/1.0" 200 OK
+INFO:     172.20.0.4:35868 - "GET /api/v1/training/jobs HTTP/1.0" 200 OK
+INFO:     172.20.0.4:35870 - "GET /api/v1/training/jobs HTTP/1.0" 200 OK
+INFO:     172.20.0.4:34374 - "GET /api/v1/training/jobs HTTP/1.0" 200 OK
+INFO:     172.20.0.4:34376 - "GET /api/v1/training/jobs HTTP/1.0" 200 OK
+INFO:     127.0.0.1:57202 - "GET /health HTTP/1.1" 200 OK
+INFO:     172.20.0.4:44868 - "GET /api/v1/training/jobs HTTP/1.0" 200 OK
+INFO:     172.20.0.4:44872 - "GET /api/v1/training/jobs HTTP/1.0" 200 OK
+INFO:     172.20.0.4:42406 - "GET /api/v1/training/jobs HTTP/1.0" 200 OK
+INFO:     172.20.0.4:42422 - "GET /api/v1/training/jobs HTTP/1.0" 200 OK
+INFO:     172.20.0.4:60702 - "GET /api/v1/training/jobs HTTP/1.0" 200 OK
+INFO:     172.20.0.4:60710 - "GET /api/v1/training/jobs HTTP/1.0" 200 OK
+INFO:     127.0.0.1:60600 - "GET /health HTTP/1.1" 200 OK
+INFO:     172.20.0.4:50484 - "GET /api/v1/training/jobs HTTP/1.0" 200 OK
+INFO:     172.20.0.4:50498 - "GET /api/v1/training/jobs HTTP/1.0" 200 OK
+INFO:     172.20.0.4:45690 - "GET /api/v1/training/jobs HTTP/1.0" 200 OK
+INFO:     172.20.0.4:45692 - "GET /api/v1/training/jobs HTTP/1.0" 200 OK
+INFO:     172.20.0.4:52716 - "GET /api/v1/training/jobs HTTP/1.0" 200 OK
+INFO:     172.20.0.4:52718 - "GET /api/v1/training/jobs HTTP/1.0" 200 OK
+INFO:     127.0.0.1:48724 - "GET /health HTTP/1.1" 200 OK
+INFO:     172.20.0.4:51784 - "GET /api/v1/training/jobs HTTP/1.0" 200 OK
+INFO:     172.20.0.4:51786 - "GET /api/v1/training/jobs HTTP/1.0" 200 OK
+INFO:     172.20.0.4:38232 - "GET /api/v1/training/jobs HTTP/1.0" 200 OK
+INFO:     172.20.0.4:38234 - "GET /api/v1/training/jobs HTTP/1.0" 200 OK
+INFO:     172.20.0.4:34956 - "GET /api/v1/training/jobs HTTP/1.0" 200 OK
+INFO:     172.20.0.4:34966 - "GET /api/v1/training/jobs HTTP/1.0" 200 OK
+INFO:     127.0.0.1:52180 - "GET /health HTTP/1.1" 200 OK
+INFO:     172.20.0.4:36488 - "GET /api/v1/training/jobs HTTP/1.0" 200 OK
+INFO:     172.20.0.4:36490 - "GET /api/v1/training/jobs HTTP/1.0" 200 OK
+INFO:     172.20.0.4:41376 - "GET /api/v1/training/jobs HTTP/1.0" 200 OK
+INFO:     172.20.0.4:41392 - "GET /api/v1/training/jobs HTTP/1.0" 200 OK
+INFO:     172.20.0.4:46826 - "GET /api/v1/training/jobs HTTP/1.0" 200 OK
+INFO:     172.20.0.4:46836 - "GET /api/v1/training/jobs HTTP/1.0" 200 OK
+INFO:     127.0.0.1:47626 - "GET /health HTTP/1.1" 200 OK
+INFO:     172.20.0.4:54908 - "GET /api/v1/training/jobs HTTP/1.0" 200 OK
+INFO:     172.20.0.4:54912 - "GET /api/v1/training/jobs HTTP/1.0" 200 OK
+INFO:     172.20.0.4:35626 - "GET /api/v1/training/jobs HTTP/1.0" 200 OK
+INFO:     172.20.0.4:35634 - "GET /api/v1/training/jobs HTTP/1.0" 200 OK
+INFO:     172.20.0.4:50542 - "GET /api/v1/training/jobs HTTP/1.0" 200 OK
+INFO:     172.20.0.4:50550 - "GET /api/v1/training/jobs HTTP/1.0" 200 OK
+INFO:     127.0.0.1:36580 - "GET /health HTTP/1.1" 200 OK
+INFO:     172.20.0.4:50738 - "GET /api/v1/training/jobs HTTP/1.0" 200 OK
+INFO:     172.20.0.4:50742 - "GET /api/v1/training/jobs HTTP/1.0" 200 OK
+INFO:     172.20.0.4:50508 - "GET /api/v1/training/jobs HTTP/1.0" 200 OK
+INFO:     172.20.0.4:50524 - "GET /api/v1/training/jobs HTTP/1.0" 200 OK
+INFO:     172.20.0.4:52314 - "GET /api/v1/training/jobs HTTP/1.0" 200 OK
+INFO:     172.20.0.4:52318 - "GET /api/v1/training/jobs HTTP/1.0" 200 OK
+INFO:     127.0.0.1:53346 - "GET /health HTTP/1.1" 200 OK
+INFO:     172.20.0.4:33188 - "GET /api/v1/training/jobs HTTP/1.0" 200 OK
+INFO:     172.20.0.4:33196 - "GET /api/v1/training/jobs HTTP/1.0" 200 OK
+INFO:     172.20.0.4:45814 - "GET /api/v1/training/jobs HTTP/1.0" 200 OK
+INFO:     172.20.0.4:45830 - "GET /api/v1/training/jobs HTTP/1.0" 200 OK
+INFO:     172.20.0.4:57786 - "GET /api/v1/training/jobs HTTP/1.0" 200 OK
+INFO:     172.20.0.4:57794 - "GET /api/v1/training/jobs HTTP/1.0" 200 OK
+INFO:     127.0.0.1:47498 - "GET /health HTTP/1.1" 200 OK
+INFO:     172.20.0.4:32778 - "GET /api/v1/training/jobs HTTP/1.0" 200 OK
+INFO:     172.20.0.4:32784 - "GET /api/v1/training/jobs HTTP/1.0" 200 OK
+INFO:     172.20.0.4:56482 - "GET /api/v1/training/jobs HTTP/1.0" 200 OK
+INFO:     172.20.0.4:56484 - "GET /api/v1/training/jobs HTTP/1.0" 200 OK
+INFO:     172.20.0.4:59438 - "GET /api/v1/training/jobs HTTP/1.0" 200 OK
+INFO:     172.20.0.4:59446 - "GET /api/v1/training/jobs HTTP/1.0" 200 OK
+INFO:     172.20.0.4:52028 - "GET /api/v1/training/jobs HTTP/1.0" 200 OK
+INFO:     127.0.0.1:56488 - "GET /health HTTP/1.1" 200 OK
+INFO:     172.20.0.4:52044 - "GET /api/v1/training/jobs HTTP/1.0" 200 OK
+INFO:     172.20.0.4:33298 - "GET /api/v1/training/jobs HTTP/1.0" 200 OK
+INFO:     172.20.0.4:33306 - "GET /api/v1/training/jobs HTTP/1.0" 200 OK
+INFO:     172.20.0.4:51924 - "GET /api/v1/training/jobs HTTP/1.0" 200 OK
+INFO:     172.20.0.4:51930 - "GET /api/v1/training/jobs HTTP/1.0" 200 OK
+INFO:     172.20.0.4:52452 - "GET /api/v1/training/jobs HTTP/1.0" 200 OK
+INFO:     127.0.0.1:42738 - "GET /health HTTP/1.1" 200 OK
+INFO:     172.20.0.4:52458 - "GET /api/v1/training/jobs HTTP/1.0" 200 OK
+INFO:     172.20.0.4:59086 - "GET /api/v1/training/jobs HTTP/1.0" 200 OK
+INFO:     172.20.0.4:59096 - "GET /api/v1/training/jobs HTTP/1.0" 200 OK
+INFO:     172.20.0.4:54470 - "GET /api/v1/training/jobs HTTP/1.0" 200 OK
+INFO:     172.20.0.4:54486 - "GET /api/v1/training/jobs HTTP/1.0" 200 OK
+INFO:     172.20.0.4:36706 - "GET /api/v1/training/jobs HTTP/1.0" 200 OK
+INFO:     127.0.0.1:52486 - "GET /health HTTP/1.1" 200 OK
+INFO:     172.20.0.4:36716 - "GET /api/v1/training/jobs HTTP/1.0" 200 OK
+INFO:     172.20.0.4:51060 - "GET /api/v1/training/jobs HTTP/1.0" 200 OK
+INFO:     172.20.0.4:51068 - "GET /api/v1/training/jobs HTTP/1.0" 200 OK
+INFO:     172.20.0.4:34328 - "GET /api/v1/training/jobs HTTP/1.0" 200 OK
+INFO:     172.20.0.4:34334 - "GET /api/v1/training/jobs HTTP/1.0" 200 OK
+INFO:     172.20.0.4:54774 - "GET /api/v1/training/jobs HTTP/1.0" 200 OK
+INFO:     127.0.0.1:57076 - "GET /health HTTP/1.1" 200 OK
+INFO:     172.20.0.4:54780 - "GET /api/v1/training/jobs HTTP/1.0" 200 OK
+INFO:     172.20.0.4:34402 - "GET /api/v1/training/jobs HTTP/1.0" 200 OK
+INFO:     172.20.0.4:34418 - "GET /api/v1/training/jobs HTTP/1.0" 200 OK
+INFO:     172.20.0.4:38228 - "GET /api/v1/training/jobs HTTP/1.0" 200 OK
+INFO:     172.20.0.4:38236 - "GET /api/v1/training/jobs HTTP/1.0" 200 OK
+INFO:     172.20.0.4:37076 - "GET /api/v1/training/jobs HTTP/1.0" 200 OK
+INFO:     127.0.0.1:45446 - "GET /health HTTP/1.1" 200 OK
+INFO:     172.20.0.4:37086 - "GET /api/v1/training/jobs HTTP/1.0" 200 OK
+INFO:     172.20.0.4:48046 - "GET /api/v1/training/jobs HTTP/1.0" 200 OK
+INFO:     172.20.0.4:48058 - "GET /api/v1/training/jobs HTTP/1.0" 200 OK
+INFO:     172.20.0.4:38112 - "GET /api/v1/training/jobs HTTP/1.0" 200 OK
+INFO:     172.20.0.4:38120 - "GET /api/v1/training/jobs HTTP/1.0" 200 OK
+INFO:     172.20.0.4:45198 - "GET /api/v1/training/jobs HTTP/1.0" 200 OK
+INFO:     127.0.0.1:38814 - "GET /health HTTP/1.1" 200 OK
+INFO:     172.20.0.4:45210 - "GET /api/v1/training/jobs HTTP/1.0" 200 OK
+INFO:     172.20.0.4:43402 - "GET /api/v1/training/jobs HTTP/1.0" 200 OK
+INFO:     172.20.0.4:43414 - "GET /api/v1/training/jobs HTTP/1.0" 200 OK
+INFO:     172.20.0.4:40246 - "GET /api/v1/training/jobs HTTP/1.0" 200 OK
+INFO:     172.20.0.4:40254 - "GET /api/v1/training/jobs HTTP/1.0" 200 OK
+INFO:     172.20.0.4:44564 - "GET /api/v1/training/jobs HTTP/1.0" 200 OK
+INFO:     127.0.0.1:58232 - "GET /health HTTP/1.1" 200 OK
+INFO:     172.20.0.4:44576 - "GET /api/v1/training/jobs HTTP/1.0" 200 OK
+INFO:     172.20.0.4:53844 - "GET /api/v1/training/jobs HTTP/1.0" 200 OK
+INFO:     172.20.0.4:53846 - "GET /api/v1/training/jobs HTTP/1.0" 200 OK
+INFO:     172.20.0.4:34974 - "GET /api/v1/training/jobs HTTP/1.0" 200 OK
+INFO:     172.20.0.4:34978 - "GET /api/v1/training/jobs HTTP/1.0" 200 OK
+INFO:     172.20.0.4:60516 - "GET /api/v1/training/jobs HTTP/1.0" 200 OK
+INFO:     127.0.0.1:41410 - "GET /health HTTP/1.1" 200 OK
+INFO:     172.20.0.4:60528 - "GET /api/v1/training/jobs HTTP/1.0" 200 OK
+INFO:     172.20.0.4:40258 - "GET /api/v1/training/jobs HTTP/1.0" 200 OK
+INFO:     172.20.0.4:40266 - "GET /api/v1/training/jobs HTTP/1.0" 200 OK
+INFO:     172.20.0.4:38798 - "GET /api/v1/training/jobs HTTP/1.0" 200 OK
+INFO:     172.20.0.4:38800 - "GET /api/v1/training/jobs HTTP/1.0" 200 OK
+INFO:     172.20.0.4:38078 - "GET /api/v1/training/jobs HTTP/1.0" 200 OK
+INFO:     127.0.0.1:45778 - "GET /health HTTP/1.1" 200 OK
+INFO:     172.20.0.4:38086 - "GET /api/v1/training/jobs HTTP/1.0" 200 OK
+INFO:     172.20.0.4:51946 - "GET /api/v1/training/jobs HTTP/1.0" 200 OK
+INFO:     172.20.0.4:51952 - "GET /api/v1/training/jobs HTTP/1.0" 200 OK
+INFO:     172.20.0.4:35198 - "GET /api/v1/training/jobs HTTP/1.0" 200 OK
+INFO:     172.20.0.4:35204 - "GET /api/v1/training/jobs HTTP/1.0" 200 OK
+INFO:     172.20.0.4:56816 - "GET /api/v1/training/jobs HTTP/1.0" 200 OK
+INFO:     127.0.0.1:51960 - "GET /health HTTP/1.1" 200 OK
+INFO:     172.20.0.4:56830 - "GET /api/v1/training/jobs HTTP/1.0" 200 OK
+INFO:     172.20.0.4:56830 - "GET /api/v1/training/jobs HTTP/1.0" 200 OK
+INFO:     172.20.0.4:56832 - "GET /api/v1/training/jobs HTTP/1.0" 200 OK
+INFO:     172.20.0.4:40294 - "GET /api/v1/training/jobs HTTP/1.0" 200 OK
+INFO:     172.20.0.4:40298 - "GET /api/v1/training/jobs HTTP/1.0" 200 OK
+INFO:     172.20.0.4:57358 - "GET /api/v1/training/jobs HTTP/1.0" 200 OK
+INFO:     127.0.0.1:60288 - "GET /health HTTP/1.1" 200 OK
+INFO:     172.20.0.4:57370 - "GET /api/v1/training/jobs HTTP/1.0" 200 OK
+INFO:     172.20.0.4:48902 - "GET /api/v1/training/jobs HTTP/1.0" 200 OK
+INFO:     172.20.0.4:48910 - "GET /api/v1/training/jobs HTTP/1.0" 200 OK
+INFO:     172.20.0.4:37686 - "GET /api/v1/training/jobs HTTP/1.0" 200 OK
+INFO:     172.20.0.4:37702 - "GET /api/v1/training/jobs HTTP/1.0" 200 OK
+INFO:     172.20.0.4:40086 - "GET /api/v1/training/jobs HTTP/1.0" 200 OK
+INFO:     127.0.0.1:45518 - "GET /health HTTP/1.1" 200 OK
+INFO:     172.20.0.4:40092 - "GET /api/v1/training/jobs HTTP/1.0" 200 OK
+INFO:     172.20.0.4:57062 - "GET /api/v1/training/jobs HTTP/1.0" 200 OK
+INFO:     172.20.0.4:57072 - "GET /api/v1/training/jobs HTTP/1.0" 200 OK
+INFO:     172.20.0.4:48960 - "GET /api/v1/training/jobs HTTP/1.0" 200 OK
+INFO:     172.20.0.4:48962 - "GET /api/v1/training/jobs HTTP/1.0" 200 OK
+INFO:     172.20.0.4:35056 - "GET /api/v1/training/jobs HTTP/1.0" 200 OK
+INFO:     127.0.0.1:33282 - "GET /health HTTP/1.1" 200 OK
+INFO:     172.20.0.4:35064 - "GET /api/v1/training/jobs HTTP/1.0" 200 OK
+INFO:     172.20.0.4:40256 - "GET /api/v1/training/jobs HTTP/1.0" 200 OK
+INFO:     172.20.0.4:40258 - "GET /api/v1/training/jobs HTTP/1.0" 200 OK
+INFO:     172.20.0.4:41078 - "GET /api/v1/training/jobs HTTP/1.0" 200 OK
+INFO:     172.20.0.4:41080 - "GET /api/v1/training/jobs HTTP/1.0" 200 OK
+INFO:     172.20.0.4:48264 - "GET /api/v1/training/jobs HTTP/1.0" 200 OK
+INFO:     127.0.0.1:33056 - "GET /health HTTP/1.1" 200 OK
+INFO:     172.20.0.4:48276 - "GET /api/v1/training/jobs HTTP/1.0" 200 OK
+INFO:     172.20.0.4:43240 - "GET /api/v1/training/jobs HTTP/1.0" 200 OK
+INFO:     172.20.0.4:43242 - "GET /api/v1/training/jobs HTTP/1.0" 200 OK
+INFO:     172.20.0.4:41054 - "GET /api/v1/training/jobs HTTP/1.0" 200 OK
+INFO:     172.20.0.4:41060 - "GET /api/v1/training/jobs HTTP/1.0" 200 OK
+INFO:     172.20.0.4:43068 - "GET /api/v1/training/jobs HTTP/1.0" 200 OK
+INFO:     127.0.0.1:35534 - "GET /health HTTP/1.1" 200 OK
+INFO:     172.20.0.4:43070 - "GET /api/v1/training/jobs HTTP/1.0" 200 OK
+INFO:     172.20.0.4:55842 - "GET /api/v1/training/jobs HTTP/1.0" 200 OK
+INFO:     172.20.0.4:55848 - "GET /api/v1/training/jobs HTTP/1.0" 200 OK
+INFO:     172.20.0.4:55450 - "GET /api/v1/training/jobs HTTP/1.0" 200 OK
+INFO:     172.20.0.4:55458 - "GET /api/v1/training/jobs HTTP/1.0" 200 OK
+INFO:     172.20.0.4:40482 - "GET /api/v1/training/jobs HTTP/1.0" 200 OK
+INFO:     127.0.0.1:57392 - "GET /health HTTP/1.1" 200 OK
+INFO:     172.20.0.4:40496 - "GET /api/v1/training/jobs HTTP/1.0" 200 OK
+INFO:     172.20.0.4:43374 - "GET /api/v1/training/jobs HTTP/1.0" 200 OK
+INFO:     172.20.0.4:43388 - "GET /api/v1/training/jobs HTTP/1.0" 200 OK
+INFO:     172.20.0.4:51658 - "GET /api/v1/training/jobs HTTP/1.0" 200 OK
+INFO:     172.20.0.4:51664 - "GET /api/v1/training/jobs HTTP/1.0" 200 OK
+INFO:     172.20.0.4:38436 - "GET /api/v1/training/jobs HTTP/1.0" 200 OK
+INFO:     127.0.0.1:55008 - "GET /health HTTP/1.1" 200 OK
+INFO:     172.20.0.4:38446 - "GET /api/v1/training/jobs HTTP/1.0" 200 OK
+INFO:     172.20.0.4:54708 - "GET /api/v1/training/jobs HTTP/1.0" 200 OK
+INFO:     172.20.0.4:54720 - "GET /api/v1/training/jobs HTTP/1.0" 200 OK
+INFO:     172.20.0.4:58096 - "GET /api/v1/training/jobs HTTP/1.0" 200 OK
+INFO:     172.20.0.4:58104 - "GET /api/v1/training/jobs HTTP/1.0" 200 OK
+INFO:     172.20.0.4:41600 - "GET /api/v1/training/jobs HTTP/1.0" 200 OK
+INFO:     127.0.0.1:55794 - "GET /health HTTP/1.1" 200 OK
+INFO:     172.20.0.4:41602 - "GET /api/v1/training/jobs HTTP/1.0" 200 OK
+INFO:     172.20.0.4:49594 - "GET /api/v1/training/jobs HTTP/1.0" 200 OK
+INFO:     172.20.0.4:49604 - "GET /api/v1/training/jobs HTTP/1.0" 200 OK
+INFO:     172.20.0.4:58604 - "GET /api/v1/training/jobs HTTP/1.0" 200 OK
+INFO:     172.20.0.4:58614 - "GET /api/v1/training/jobs HTTP/1.0" 200 OK
+INFO:     172.20.0.4:40642 - "GET /api/v1/training/jobs HTTP/1.0" 200 OK
+INFO:     127.0.0.1:54878 - "GET /health HTTP/1.1" 200 OK
+INFO:     172.20.0.4:40646 - "GET /api/v1/training/jobs HTTP/1.0" 200 OK
+INFO:     172.20.0.4:43400 - "GET /api/v1/training/jobs HTTP/1.0" 200 OK
+INFO:     172.20.0.4:43416 - "GET /api/v1/training/jobs HTTP/1.0" 200 OK
+INFO:     172.20.0.4:35948 - "GET /api/v1/training/jobs HTTP/1.0" 200 OK
+INFO:     172.20.0.4:35960 - "GET /api/v1/training/jobs HTTP/1.0" 200 OK
+INFO:     172.20.0.4:33746 - "GET /api/v1/training/jobs HTTP/1.0" 401 Unauthorized
+INFO:     172.20.0.4:33756 - "POST /api/v1/auth/refresh HTTP/1.0" 200 OK
+INFO:     172.20.0.4:33760 - "GET /api/v1/training/jobs HTTP/1.0" 200 OK
+INFO:     127.0.0.1:35890 - "GET /health HTTP/1.1" 200 OK
+INFO:     172.20.0.4:33776 - "GET /api/v1/training/jobs HTTP/1.0" 200 OK
+INFO:     172.20.0.4:45654 - "GET /api/v1/training/jobs HTTP/1.0" 200 OK
+INFO:     172.20.0.4:45666 - "GET /api/v1/training/jobs HTTP/1.0" 200 OK
+INFO:     172.20.0.4:56414 - "GET /api/v1/training/jobs HTTP/1.0" 200 OK
+INFO:     172.20.0.4:56422 - "GET /api/v1/training/jobs HTTP/1.0" 200 OK
+INFO:     172.20.0.4:45068 - "GET /api/v1/training/jobs HTTP/1.0" 200 OK
+INFO:     127.0.0.1:34722 - "GET /health HTTP/1.1" 200 OK
+INFO:     172.20.0.4:45078 - "GET /api/v1/training/jobs HTTP/1.0" 200 OK
+INFO:     172.20.0.4:47542 - "GET /api/v1/training/jobs HTTP/1.0" 200 OK
+INFO:     172.20.0.4:47548 - "GET /api/v1/training/jobs HTTP/1.0" 200 OK
+INFO:     172.20.0.4:60286 - "GET /api/v1/training/jobs HTTP/1.0" 200 OK
+INFO:     172.20.0.4:60298 - "GET /api/v1/training/jobs HTTP/1.0" 200 OK
+INFO:     172.20.0.4:35570 - "GET /api/v1/training/jobs HTTP/1.0" 200 OK
+INFO:     127.0.0.1:36088 - "GET /health HTTP/1.1" 200 OK
+INFO:     172.20.0.4:35586 - "GET /api/v1/training/jobs HTTP/1.0" 200 OK
+INFO:     172.20.0.4:38744 - "GET /api/v1/training/jobs HTTP/1.0" 200 OK
+INFO:     172.20.0.4:38756 - "GET /api/v1/training/jobs HTTP/1.0" 200 OK
+INFO:     172.20.0.4:38208 - "GET /api/v1/training/jobs HTTP/1.0" 200 OK
+INFO:     172.20.0.4:38214 - "GET /api/v1/training/jobs HTTP/1.0" 200 OK
+INFO:     172.20.0.4:41866 - "GET /api/v1/training/jobs HTTP/1.0" 200 OK
+INFO:     127.0.0.1:58152 - "GET /health HTTP/1.1" 200 OK
+INFO:     172.20.0.4:41874 - "GET /api/v1/training/jobs HTTP/1.0" 200 OK
+INFO:     172.20.0.4:58270 - "GET /api/v1/training/jobs HTTP/1.0" 200 OK
+INFO:     172.20.0.4:58282 - "GET /api/v1/training/jobs HTTP/1.0" 200 OK
+INFO:     172.20.0.4:52058 - "GET /api/v1/training/jobs HTTP/1.0" 200 OK
+INFO:     172.20.0.4:52062 - "GET /api/v1/training/jobs HTTP/1.0" 200 OK
+INFO:     172.20.0.4:43178 - "GET /api/v1/training/jobs HTTP/1.0" 200 OK
+INFO:     127.0.0.1:43892 - "GET /health HTTP/1.1" 200 OK
+INFO:     172.20.0.4:43182 - "GET /api/v1/training/jobs HTTP/1.0" 200 OK
+INFO:     172.20.0.4:36476 - "GET /api/v1/training/jobs HTTP/1.0" 200 OK
+INFO:     172.20.0.4:36488 - "GET /api/v1/training/jobs HTTP/1.0" 200 OK
+INFO:     172.20.0.4:44800 - "GET /api/v1/training/jobs HTTP/1.0" 200 OK
+INFO:     172.20.0.4:44816 - "GET /api/v1/training/jobs HTTP/1.0" 200 OK
+INFO:     172.20.0.4:60126 - "GET /api/v1/training/jobs HTTP/1.0" 200 OK
+INFO:     172.20.0.4:60128 - "GET /api/v1/training/jobs HTTP/1.0" 200 OK
+INFO:     127.0.0.1:50304 - "GET /health HTTP/1.1" 200 OK
+INFO:     172.20.0.4:60974 - "GET /api/v1/training/jobs HTTP/1.0" 200 OK
+INFO:     172.20.0.4:60976 - "GET /api/v1/training/jobs HTTP/1.0" 200 OK
+INFO:     172.20.0.4:60204 - "GET /api/v1/training/jobs HTTP/1.0" 200 OK
+INFO:     172.20.0.4:60218 - "GET /api/v1/training/jobs HTTP/1.0" 200 OK
+INFO:     172.20.0.4:45836 - "GET /api/v1/training/jobs HTTP/1.0" 200 OK
+INFO:     172.20.0.4:45846 - "GET /api/v1/training/jobs HTTP/1.0" 200 OK
+INFO:     127.0.0.1:42984 - "GET /health HTTP/1.1" 200 OK
+INFO:     172.20.0.4:42764 - "GET /api/v1/training/jobs HTTP/1.0" 200 OK
+INFO:     172.20.0.4:42768 - "GET /api/v1/training/jobs HTTP/1.0" 200 OK
+INFO:     172.20.0.4:45106 - "GET /api/v1/training/jobs HTTP/1.0" 200 OK
+INFO:     172.20.0.4:45114 - "GET /api/v1/training/jobs HTTP/1.0" 200 OK
+INFO:     172.20.0.4:47878 - "GET /api/v1/training/jobs HTTP/1.0" 200 OK
+INFO:     172.20.0.4:47886 - "GET /api/v1/training/jobs HTTP/1.0" 200 OK
+INFO:     127.0.0.1:60250 - "GET /health HTTP/1.1" 200 OK
+INFO:     172.20.0.4:34066 - "GET /api/v1/training/jobs HTTP/1.0" 200 OK
+INFO:     172.20.0.4:34072 - "GET /api/v1/training/jobs HTTP/1.0" 200 OK
+INFO:     172.20.0.4:48678 - "GET /api/v1/training/jobs HTTP/1.0" 200 OK
+INFO:     172.20.0.4:48680 - "GET /api/v1/training/jobs HTTP/1.0" 200 OK
+INFO:     172.20.0.4:44510 - "GET /api/v1/training/jobs HTTP/1.0" 200 OK
+INFO:     172.20.0.4:44526 - "GET /api/v1/training/jobs HTTP/1.0" 200 OK
+INFO:     127.0.0.1:58120 - "GET /health HTTP/1.1" 200 OK
+INFO:     172.20.0.4:51246 - "GET /api/v1/training/jobs HTTP/1.0" 200 OK
+INFO:     172.20.0.4:51260 - "GET /api/v1/training/jobs HTTP/1.0" 200 OK
+INFO:     172.20.0.4:56330 - "GET /api/v1/training/jobs HTTP/1.0" 200 OK
+INFO:     172.20.0.4:56340 - "GET /api/v1/training/jobs HTTP/1.0" 200 OK
+INFO:     172.20.0.4:38666 - "GET /api/v1/training/jobs HTTP/1.0" 200 OK
+INFO:     172.20.0.4:38680 - "GET /api/v1/training/jobs HTTP/1.0" 200 OK
+INFO:     127.0.0.1:52352 - "GET /health HTTP/1.1" 200 OK
+INFO:     172.20.0.4:53626 - "GET /api/v1/training/jobs HTTP/1.0" 200 OK
+INFO:     172.20.0.4:53638 - "GET /api/v1/training/jobs HTTP/1.0" 200 OK
+INFO:     172.20.0.4:37922 - "GET /api/v1/training/jobs HTTP/1.0" 200 OK
+INFO:     172.20.0.4:37924 - "GET /api/v1/training/jobs HTTP/1.0" 200 OK
+INFO:     172.20.0.4:48974 - "GET /api/v1/training/jobs HTTP/1.0" 200 OK
+INFO:     172.20.0.4:48986 - "GET /api/v1/training/jobs HTTP/1.0" 200 OK
+INFO:     127.0.0.1:42640 - "GET /health HTTP/1.1" 200 OK
+INFO:     172.20.0.4:34086 - "GET /api/v1/training/jobs HTTP/1.0" 200 OK
+INFO:     172.20.0.4:34100 - "GET /api/v1/training/jobs HTTP/1.0" 200 OK
+INFO:     172.20.0.4:45412 - "GET /api/v1/training/jobs HTTP/1.0" 200 OK
+INFO:     172.20.0.4:45420 - "GET /api/v1/training/jobs HTTP/1.0" 200 OK
+INFO:     172.20.0.4:35974 - "GET /api/v1/training/jobs HTTP/1.0" 200 OK
+INFO:     172.20.0.4:35990 - "GET /api/v1/training/jobs HTTP/1.0" 200 OK
+INFO:     127.0.0.1:58778 - "GET /health HTTP/1.1" 200 OK
+INFO:     172.20.0.4:58494 - "GET /api/v1/training/jobs HTTP/1.0" 200 OK
+INFO:     172.20.0.4:58506 - "GET /api/v1/training/jobs HTTP/1.0" 200 OK
+INFO:     172.20.0.4:34012 - "GET /api/v1/training/jobs HTTP/1.0" 200 OK
+INFO:     172.20.0.4:34026 - "GET /api/v1/training/jobs HTTP/1.0" 200 OK
+INFO:     172.20.0.4:51558 - "GET /api/v1/training/jobs HTTP/1.0" 200 OK
+INFO:     172.20.0.4:51562 - "GET /api/v1/training/jobs HTTP/1.0" 200 OK
+INFO:     127.0.0.1:44164 - "GET /health HTTP/1.1" 200 OK
+INFO:     172.20.0.4:56038 - "GET /api/v1/training/jobs HTTP/1.0" 200 OK
+INFO:     172.20.0.4:56052 - "GET /api/v1/training/jobs HTTP/1.0" 200 OK
+INFO:     172.20.0.4:52234 - "GET /api/v1/training/jobs HTTP/1.0" 200 OK
+INFO:     172.20.0.4:52242 - "GET /api/v1/training/jobs HTTP/1.0" 200 OK
+INFO:     172.20.0.4:39060 - "GET /api/v1/training/jobs HTTP/1.0" 200 OK
+INFO:     172.20.0.4:39062 - "GET /api/v1/training/jobs HTTP/1.0" 200 OK
+INFO:     127.0.0.1:55850 - "GET /health HTTP/1.1" 200 OK
+INFO:     172.20.0.4:53702 - "GET /api/v1/training/jobs HTTP/1.0" 200 OK
+INFO:     172.20.0.4:53714 - "GET /api/v1/training/jobs HTTP/1.0" 200 OK
+INFO:     172.20.0.4:55232 - "GET /api/v1/training/jobs HTTP/1.0" 200 OK
+INFO:     172.20.0.4:55244 - "GET /api/v1/training/jobs HTTP/1.0" 200 OK
+INFO:     172.20.0.4:58022 - "GET /api/v1/training/jobs HTTP/1.0" 200 OK
+INFO:     172.20.0.4:58024 - "GET /api/v1/training/jobs HTTP/1.0" 200 OK
+INFO:     127.0.0.1:35092 - "GET /health HTTP/1.1" 200 OK
+INFO:     172.20.0.4:33586 - "GET /api/v1/training/jobs HTTP/1.0" 200 OK
+INFO:     172.20.0.4:33600 - "GET /api/v1/training/jobs HTTP/1.0" 200 OK
+INFO:     172.20.0.4:52474 - "GET /api/v1/training/jobs HTTP/1.0" 200 OK
+INFO:     172.20.0.4:52484 - "GET /api/v1/training/jobs HTTP/1.0" 200 OK
+INFO:     172.20.0.4:33616 - "GET /api/v1/training/jobs HTTP/1.0" 200 OK
+INFO:     172.20.0.4:33628 - "GET /api/v1/training/jobs HTTP/1.0" 200 OK
+INFO:     127.0.0.1:46168 - "GET /health HTTP/1.1" 200 OK
+INFO:     172.20.0.4:53610 - "GET /api/v1/training/jobs HTTP/1.0" 200 OK
+INFO:     172.20.0.4:53620 - "GET /api/v1/training/jobs HTTP/1.0" 200 OK
+INFO:     172.20.0.4:40012 - "GET /api/v1/training/jobs HTTP/1.0" 200 OK
+INFO:     172.20.0.4:40020 - "GET /api/v1/training/jobs HTTP/1.0" 200 OK
+INFO:     172.20.0.4:43308 - "GET /api/v1/training/jobs HTTP/1.0" 200 OK
+INFO:     172.20.0.4:43310 - "GET /api/v1/training/jobs HTTP/1.0" 200 OK
+INFO:     127.0.0.1:52462 - "GET /health HTTP/1.1" 200 OK
+INFO:     172.20.0.4:44410 - "GET /api/v1/training/jobs HTTP/1.0" 200 OK
+INFO:     172.20.0.4:44420 - "GET /api/v1/training/jobs HTTP/1.0" 200 OK
+INFO:     172.20.0.4:54336 - "GET /api/v1/training/jobs HTTP/1.0" 200 OK
+INFO:     172.20.0.4:54338 - "GET /api/v1/training/jobs HTTP/1.0" 200 OK
+INFO:     172.20.0.4:34896 - "GET /api/v1/training/jobs HTTP/1.0" 200 OK
+INFO:     172.20.0.4:34908 - "GET /api/v1/training/jobs HTTP/1.0" 200 OK
+INFO:     127.0.0.1:44548 - "GET /health HTTP/1.1" 200 OK
+INFO:     172.20.0.4:34280 - "GET /api/v1/training/jobs HTTP/1.0" 200 OK
+INFO:     172.20.0.4:34292 - "GET /api/v1/training/jobs HTTP/1.0" 200 OK
+INFO:     172.20.0.4:50498 - "GET /api/v1/training/jobs HTTP/1.0" 200 OK
+INFO:     172.20.0.4:50506 - "GET /api/v1/training/jobs HTTP/1.0" 200 OK
+INFO:     172.20.0.4:45522 - "GET /api/v1/training/jobs HTTP/1.0" 200 OK
+INFO:     172.20.0.4:45538 - "GET /api/v1/training/jobs HTTP/1.0" 200 OK
+INFO:     127.0.0.1:57902 - "GET /health HTTP/1.1" 200 OK
+INFO:     172.20.0.4:47720 - "GET /api/v1/training/jobs HTTP/1.0" 200 OK
+INFO:     172.20.0.4:47734 - "GET /api/v1/training/jobs HTTP/1.0" 200 OK
+INFO:     172.20.0.4:34412 - "GET /api/v1/training/jobs HTTP/1.0" 200 OK
+INFO:     172.20.0.4:34422 - "GET /api/v1/training/jobs HTTP/1.0" 200 OK
+INFO:     172.20.0.4:47576 - "GET /api/v1/training/jobs HTTP/1.0" 200 OK
+INFO:     172.20.0.4:47580 - "GET /api/v1/training/jobs HTTP/1.0" 200 OK
+INFO:     127.0.0.1:33812 - "GET /health HTTP/1.1" 200 OK
+INFO:     172.20.0.4:34674 - "GET /api/v1/training/jobs HTTP/1.0" 200 OK
+INFO:     172.20.0.4:34676 - "GET /api/v1/training/jobs HTTP/1.0" 200 OK
+INFO:     172.20.0.4:56512 - "GET /api/v1/training/jobs HTTP/1.0" 200 OK
+INFO:     172.20.0.4:56520 - "GET /api/v1/training/jobs HTTP/1.0" 200 OK
+INFO:     172.20.0.4:56118 - "GET /api/v1/training/jobs HTTP/1.0" 200 OK
+INFO:     172.20.0.4:56128 - "GET /api/v1/training/jobs HTTP/1.0" 200 OK
+INFO:     127.0.0.1:39556 - "GET /health HTTP/1.1" 200 OK
+INFO:     172.20.0.4:46290 - "GET /api/v1/training/jobs HTTP/1.0" 200 OK
+INFO:     172.20.0.4:46302 - "GET /api/v1/training/jobs HTTP/1.0" 200 OK
+INFO:     172.20.0.4:37848 - "GET /api/v1/training/jobs HTTP/1.0" 200 OK
+INFO:     172.20.0.4:37864 - "GET /api/v1/training/jobs HTTP/1.0" 200 OK
+INFO:     172.20.0.4:32974 - "GET /api/v1/training/jobs HTTP/1.0" 200 OK
+INFO:     172.20.0.4:32976 - "GET /api/v1/training/jobs HTTP/1.0" 200 OK
+INFO:     127.0.0.1:51610 - "GET /health HTTP/1.1" 200 OK
+INFO:     172.20.0.4:58296 - "GET /api/v1/training/jobs HTTP/1.0" 200 OK
+INFO:     172.20.0.4:58312 - "GET /api/v1/training/jobs HTTP/1.0" 200 OK
+INFO:     172.20.0.4:58882 - "GET /api/v1/training/jobs HTTP/1.0" 200 OK
+INFO:     172.20.0.4:58890 - "GET /api/v1/training/jobs HTTP/1.0" 200 OK
+INFO:     172.20.0.4:58712 - "GET /api/v1/training/jobs HTTP/1.0" 200 OK
+INFO:     172.20.0.4:58728 - "GET /api/v1/training/jobs HTTP/1.0" 200 OK
+INFO:     127.0.0.1:54736 - "GET /health HTTP/1.1" 200 OK
+INFO:     172.20.0.4:50148 - "GET /api/v1/training/jobs HTTP/1.0" 200 OK
+INFO:     172.20.0.4:50164 - "GET /api/v1/training/jobs HTTP/1.0" 200 OK
+INFO:     172.20.0.4:47952 - "GET /api/v1/training/jobs HTTP/1.0" 200 OK
+INFO:     172.20.0.4:47954 - "GET /api/v1/training/jobs HTTP/1.0" 200 OK
+INFO:     172.20.0.4:49086 - "GET /api/v1/training/jobs HTTP/1.0" 200 OK
+INFO:     172.20.0.4:49090 - "GET /api/v1/training/jobs HTTP/1.0" 200 OK
+INFO:     127.0.0.1:38496 - "GET /health HTTP/1.1" 200 OK
+INFO:     172.20.0.4:49240 - "GET /api/v1/training/jobs HTTP/1.0" 200 OK
+INFO:     172.20.0.4:49252 - "GET /api/v1/training/jobs HTTP/1.0" 200 OK
+INFO:     172.20.0.4:58204 - "GET /api/v1/training/jobs HTTP/1.0" 200 OK
+INFO:     172.20.0.4:58214 - "GET /api/v1/training/jobs HTTP/1.0" 200 OK
+INFO:     172.20.0.4:36860 - "GET /api/v1/training/jobs HTTP/1.0" 200 OK
+INFO:     172.20.0.4:36874 - "GET /api/v1/training/jobs HTTP/1.0" 200 OK
+INFO:     127.0.0.1:41468 - "GET /health HTTP/1.1" 200 OK
+INFO:     172.20.0.4:57226 - "GET /api/v1/training/jobs HTTP/1.0" 200 OK
+INFO:     172.20.0.4:57236 - "GET /api/v1/training/jobs HTTP/1.0" 200 OK
+INFO:     172.20.0.4:54068 - "GET /api/v1/training/jobs HTTP/1.0" 200 OK
+INFO:     172.20.0.4:54076 - "GET /api/v1/training/jobs HTTP/1.0" 200 OK
+INFO:     172.20.0.4:51842 - "GET /api/v1/training/jobs HTTP/1.0" 200 OK
+INFO:     172.20.0.4:51850 - "GET /api/v1/training/jobs HTTP/1.0" 200 OK
+INFO:     127.0.0.1:49742 - "GET /health HTTP/1.1" 200 OK
+INFO:     172.20.0.4:41866 - "GET /api/v1/training/jobs HTTP/1.0" 200 OK
+INFO:     172.20.0.4:41880 - "GET /api/v1/training/jobs HTTP/1.0" 200 OK
+INFO:     172.20.0.4:41118 - "GET /api/v1/training/jobs HTTP/1.0" 200 OK
+INFO:     172.20.0.4:41128 - "GET /api/v1/training/jobs HTTP/1.0" 200 OK
+INFO:     172.20.0.4:37406 - "GET /api/v1/training/jobs HTTP/1.0" 200 OK
+INFO:     172.20.0.4:37414 - "GET /api/v1/training/jobs HTTP/1.0" 200 OK
+INFO:     127.0.0.1:41030 - "GET /health HTTP/1.1" 200 OK
+INFO:     172.20.0.4:49464 - "GET /api/v1/training/jobs HTTP/1.0" 200 OK
+INFO:     172.20.0.4:49480 - "GET /api/v1/training/jobs HTTP/1.0" 200 OK
+INFO:     172.20.0.4:35326 - "GET /api/v1/training/jobs HTTP/1.0" 200 OK
+INFO:     172.20.0.4:35330 - "GET /api/v1/training/jobs HTTP/1.0" 200 OK
+INFO:     172.20.0.4:41598 - "GET /api/v1/training/jobs HTTP/1.0" 200 OK
+INFO:     172.20.0.4:41606 - "GET /api/v1/training/jobs HTTP/1.0" 200 OK
+INFO:     127.0.0.1:56438 - "GET /health HTTP/1.1" 200 OK
+INFO:     172.20.0.4:52102 - "GET /api/v1/training/jobs HTTP/1.0" 200 OK
+INFO:     172.20.0.4:52114 - "GET /api/v1/training/jobs HTTP/1.0" 200 OK
+INFO:     172.20.0.4:32814 - "GET /api/v1/training/jobs HTTP/1.0" 200 OK
+INFO:     172.20.0.4:32824 - "GET /api/v1/training/jobs HTTP/1.0" 200 OK
+INFO:     172.20.0.4:33056 - "GET /api/v1/training/jobs HTTP/1.0" 200 OK
+INFO:     172.20.0.4:33070 - "GET /api/v1/training/jobs HTTP/1.0" 200 OK
+INFO:     127.0.0.1:39812 - "GET /health HTTP/1.1" 200 OK
+INFO:     172.20.0.4:59340 - "GET /api/v1/training/jobs HTTP/1.0" 200 OK
+INFO:     172.20.0.4:59352 - "GET /api/v1/training/jobs HTTP/1.0" 200 OK