|
@@ -1,374 +1,261 @@
|
|
|
-INFO: 172.20.0.4:59236 - "GET /api/v1/training/jobs HTTP/1.0" 200 OK
|
|
|
|
|
-INFO: 172.20.0.4:59240 - "GET /api/v1/training/jobs HTTP/1.0" 200 OK
|
|
|
|
|
-INFO: 172.20.0.4:59252 - "GET /api/v1/datasets/ HTTP/1.0" 200 OK
|
|
|
|
|
-INFO: 172.20.0.4:59238 - "GET /api/v1/models/ HTTP/1.0" 200 OK
|
|
|
|
|
-INFO: 172.20.0.4:59262 - "GET /api/v1/models/ HTTP/1.0" 200 OK
|
|
|
|
|
-INFO: 172.20.0.4:59276 - "GET /api/v1/training/jobs HTTP/1.0" 200 OK
|
|
|
|
|
-INFO: 172.20.0.4:59278 - "GET /api/v1/models/ HTTP/1.0" 200 OK
|
|
|
|
|
-INFO: 172.20.0.4:59294 - "GET /api/v1/datasets/ HTTP/1.0" 200 OK
|
|
|
|
|
-INFO: 172.20.0.4:59308 - "GET /api/v1/training/jobs HTTP/1.0" 200 OK
|
|
|
|
|
-INFO: 172.20.0.4:38434 - "GET /api/v1/training/jobs HTTP/1.0" 200 OK
|
|
|
|
|
-INFO: 172.20.0.4:38440 - "GET /api/v1/training/jobs HTTP/1.0" 200 OK
|
|
|
|
|
-INFO: 127.0.0.1:48106 - "GET /health HTTP/1.1" 200 OK
|
|
|
|
|
-INFO: 172.20.0.4:56722 - "GET /api/v1/training/jobs HTTP/1.0" 200 OK
|
|
|
|
|
-INFO: 172.20.0.4:56736 - "GET /api/v1/training/jobs HTTP/1.0" 200 OK
|
|
|
|
|
-2026-05-25 13:49:13 | INFO | peft-platform | Training job ddc610b6-d872-466c-b382-3c9bfd6df12a: num_gpus=2, batch_size=64
|
|
|
|
|
-2026-05-25 13:49:13 | INFO | peft-platform | Job ddc610b6-d872-466c-b382-3c9bfd6df12a enqueued
|
|
|
|
|
-2026-05-25 13:49:13 | INFO | peft-platform | Training job created: ddc610b6-d872-466c-b382-3c9bfd6df12a
|
|
|
|
|
-INFO: 172.20.0.4:56748 - "POST /api/v1/training/jobs HTTP/1.0" 200 OK
|
|
|
|
|
-2026-05-25 13:49:13 | INFO | app.engines.text_engine | Preprocessed 60 samples for sft/alpaca
|
|
|
|
|
-INFO: 172.20.0.4:56768 - "GET /api/v1/models/ HTTP/1.0" 200 OK
|
|
|
|
|
-INFO: 172.20.0.4:56784 - "GET /api/v1/datasets/ HTTP/1.0" 200 OK
|
|
|
|
|
-INFO: 172.20.0.4:56758 - "GET /api/v1/training/jobs HTTP/1.0" 200 OK
|
|
|
|
|
-INFO: 172.20.0.4:50036 - "GET /api/v1/training/jobs HTTP/1.0" 200 OK
|
|
|
|
|
-INFO: 172.20.0.4:50048 - "GET /api/v1/training/jobs HTTP/1.0" 200 OK
|
|
|
|
|
-INFO: 172.20.0.4:37870 - "GET /api/v1/training/jobs HTTP/1.0" 200 OK
|
|
|
|
|
-INFO: 172.20.0.4:37874 - "GET /api/v1/training/jobs HTTP/1.0" 200 OK
|
|
|
|
|
-INFO: 127.0.0.1:46502 - "GET /health HTTP/1.1" 200 OK
|
|
|
|
|
-INFO: 172.20.0.4:51788 - "GET /api/v1/training/jobs HTTP/1.0" 200 OK
|
|
|
|
|
-2026-05-25 13:49:35 | INFO | peft-platform | Remote cleanup result: true
|
|
|
|
|
-cleaned 70 processes
|
|
|
|
|
-2026-05-25 13:50:28 | INFO | peft-platform | Created remote dataset directory: /root/Fine-tuning/backend/data/datasets
|
|
|
|
|
-2026-05-25 13:50:28 | INFO | peft-platform | Uploading dataset file: /root/Fine-tuning/backend/data/processed/ms_yanalong_yanalong/data.jsonl -> /root/Fine-tuning/backend/data/datasets/data.jsonl
|
|
|
|
|
-2026-05-25 13:50:46 | INFO | peft-platform | Dataset uploaded successfully: /root/Fine-tuning/backend/data/datasets/data.jsonl
|
|
|
|
|
-2026-05-25 13:51:03 | INFO | peft-platform | Multi-GPU training: num_gpus=2, CUDA_VISIBLE_DEVICES=2,3
|
|
|
|
|
-2026-05-25 13:51:21 | INFO | peft-platform | Remote training launched in container: job=ddc610b6-d872-466c-b382-3c9bfd6df12a, container_pid=76529
|
|
|
|
|
-INFO: 127.0.0.1:57534 - "GET /health HTTP/1.1" 200 OK
|
|
|
|
|
-INFO: 127.0.0.1:57616 - "GET /health HTTP/1.1" 200 OK
|
|
|
|
|
-INFO: 127.0.0.1:52350 - "GET /health HTTP/1.1" 200 OK
|
|
|
|
|
-INFO: 172.20.0.4:51796 - "GET /api/v1/training/jobs HTTP/1.0" 200 OK
|
|
|
|
|
-INFO: 172.20.0.4:38770 - "GET /api/v1/training/jobs HTTP/1.0" 200 OK
|
|
|
|
|
-INFO: 172.20.0.4:58504 - "GET /api/v1/training/jobs HTTP/1.0" 200 OK
|
|
|
|
|
-INFO: 172.20.0.4:58496 - "GET /api/v1/training/jobs HTTP/1.0" 200 OK
|
|
|
|
|
-INFO: 172.20.0.4:38780 - "GET /api/v1/training/jobs HTTP/1.0" 200 OK
|
|
|
|
|
-INFO: 172.20.0.4:41362 - "GET /api/v1/training/jobs HTTP/1.0" 200 OK
|
|
|
|
|
-INFO: 172.20.0.4:46036 - "GET /api/v1/training/jobs HTTP/1.0" 200 OK
|
|
|
|
|
-INFO: 172.20.0.4:46018 - "GET /api/v1/training/jobs HTTP/1.0" 200 OK
|
|
|
|
|
-INFO: 172.20.0.4:46016 - "GET /api/v1/training/jobs HTTP/1.0" 200 OK
|
|
|
|
|
-INFO: 172.20.0.4:46038 - "GET /api/v1/training/jobs HTTP/1.0" 200 OK
|
|
|
|
|
-INFO: 172.20.0.4:46050 - "GET /api/v1/training/jobs HTTP/1.0" 200 OK
|
|
|
|
|
-INFO: 172.20.0.4:46064 - "GET /api/v1/training/jobs HTTP/1.0" 200 OK
|
|
|
|
|
-INFO: 172.20.0.4:46072 - "GET /api/v1/training/jobs HTTP/1.0" 200 OK
|
|
|
|
|
-INFO: 172.20.0.4:46076 - "GET /api/v1/training/jobs HTTP/1.0" 200 OK
|
|
|
|
|
-INFO: 172.20.0.4:34810 - "GET /api/v1/training/jobs HTTP/1.0" 200 OK
|
|
|
|
|
-INFO: 172.20.0.4:34812 - "GET /api/v1/training/jobs HTTP/1.0" 200 OK
|
|
|
|
|
-INFO: 172.20.0.4:52798 - "GET /api/v1/training/jobs HTTP/1.0" 200 OK
|
|
|
|
|
-INFO: 172.20.0.4:52810 - "GET /api/v1/training/jobs HTTP/1.0" 200 OK
|
|
|
|
|
-INFO: 172.20.0.4:47732 - "GET /api/v1/training/jobs HTTP/1.0" 200 OK
|
|
|
|
|
-INFO: 172.20.0.4:47748 - "GET /api/v1/training/jobs HTTP/1.0" 200 OK
|
|
|
|
|
-INFO: 127.0.0.1:59998 - "GET /health HTTP/1.1" 200 OK
|
|
|
|
|
-INFO: 172.20.0.4:42814 - "GET /api/v1/training/jobs HTTP/1.0" 200 OK
|
|
|
|
|
-INFO: 172.20.0.4:42822 - "GET /api/v1/training/jobs HTTP/1.0" 200 OK
|
|
|
|
|
-INFO: 172.20.0.4:54916 - "GET /api/v1/training/jobs HTTP/1.0" 200 OK
|
|
|
|
|
-INFO: 172.20.0.4:54926 - "GET /api/v1/training/jobs HTTP/1.0" 200 OK
|
|
|
|
|
-INFO: 172.20.0.4:41970 - "GET /api/v1/training/jobs HTTP/1.0" 200 OK
|
|
|
|
|
-INFO: 127.0.0.1:34236 - "GET /health HTTP/1.1" 200 OK
|
|
|
|
|
-INFO: 172.20.0.4:60076 - "GET /api/v1/training/jobs HTTP/1.0" 200 OK
|
|
|
|
|
-2026-05-25 13:52:50 | INFO | peft-platform | [253:ddc610b6] *****************************************
|
|
|
|
|
-2026-05-25 13:52:50 | INFO | peft-platform | [253:ddc610b6] Setting OMP_NUM_THREADS environment variable for each process to be 1 in default, to avoid your system being overloaded, please further tune the variable for optimal performance in your application as needed.
|
|
|
|
|
-2026-05-25 13:52:50 | INFO | peft-platform | [253:ddc610b6] *****************************************
|
|
|
|
|
-2026-05-25 13:52:50 | INFO | peft-platform | [253:ddc610b6] [remote_train] DDP mode: rank=0, local_rank=0, world_size=2
|
|
|
|
|
-2026-05-25 13:52:50 | INFO | peft-platform | [253:ddc610b6] [remote_train] [rank 1] === Training job started: ddc610b6-d872-466c-b382-3c9bfd6df12a ===
|
|
|
|
|
-2026-05-25 13:52:50 | INFO | peft-platform | [253:ddc610b6] [remote_train] fla package found at: /opt/conda/lib/python3.10/site-packages/fla
|
|
|
|
|
-2026-05-25 13:52:50 | INFO | peft-platform | [253:ddc610b6] [remote_train] fla shared memory patch v2 already applied, skipping
|
|
|
|
|
-2026-05-25 13:52:50 | INFO | peft-platform | [253:ddc610b6] [remote_train] [rank 0] === Training job started: ddc610b6-d872-466c-b382-3c9bfd6df12a ===
|
|
|
|
|
-2026-05-25 13:52:50 | INFO | peft-platform | [253:ddc610b6] [remote_train] model_id=Qwen/Qwen3.5-0.8B, model_type=text
|
|
|
|
|
-2026-05-25 13:52:50 | INFO | peft-platform | [253:ddc610b6] [remote_train] dataset_path=/root/Fine-tuning/backend/data/datasets/data.jsonl
|
|
|
|
|
-2026-05-25 13:52:50 | INFO | peft-platform | [253:ddc610b6] [remote_train] config={"model_id": "Qwen/Qwen3.5-0.8B", "model_type": "text", "dataset_id": "3d5f8808-e71a-449d-94e9-c61c4881b2cf", "peft_method": "adalora", "epochs": 3, "batch_size": 64, "gradient_accumulation": 4, "lear
|
|
|
|
|
-2026-05-25 13:52:50 | INFO | peft-platform | [253:ddc610b6] [remote_train] DDP: world_size=2, batch_size per GPU=64
|
|
|
|
|
-2026-05-25 13:52:50 | INFO | peft-platform | [253:ddc610b6] [remote_train] Step 1: Preprocessing dataset...
|
|
|
|
|
-2026-05-25 13:52:50 | INFO | peft-platform | [253:ddc610b6] [remote_train] task_type=sft, template=auto
|
|
|
|
|
-2026-05-25 13:52:50 | INFO | peft-platform | [253:ddc610b6] [remote_train] Engine loaded: TextEngine
|
|
|
|
|
-2026-05-25 13:52:50 | INFO | peft-platform | [253:ddc610b6] [remote_train] Running preprocess_dataset...
|
|
|
|
|
-2026-05-25 13:52:50 | INFO | peft-platform | [253:ddc610b6] [remote_train] Preprocessing done, output: /root/Fine-tuning/backend/data/processed/ddc610b6-d872-466c-b382-3c9bfd6df12a_processed.jsonl
|
|
|
|
|
-2026-05-25 13:52:50 | INFO | peft-platform | [253:ddc610b6] [remote_train] Step 2: Loading model: Qwen/Qwen3.5-0.8B...
|
|
|
|
|
-2026-05-25 13:52:50 | ERROR | peft-platform | [253:ddc610b6] Current Triton version 3.0.0 is below the recommended 3.2.0 version. Errors may occur and these issues will not be fixed. Please consider upgrading Triton.
|
|
|
|
|
-2026-05-25 13:52:50 | INFO | peft-platform | [253:ddc610b6] Current Python version 3.10 is below the recommended 3.11 version. It is recommended to upgrade to Python 3.11 or higher for the best experience.
|
|
|
|
|
-2026-05-25 13:52:50 | ERROR | peft-platform | [253:ddc610b6] Current Triton version 3.0.0 is below the recommended 3.2.0 version. Errors may occur and these issues will not be fixed. Please consider upgrading Triton.
|
|
|
|
|
-2026-05-25 13:52:50 | INFO | peft-platform | [253:ddc610b6] Current Python version 3.10 is below the recommended 3.11 version. It is recommended to upgrade to Python 3.11 or higher for the best experience.
|
|
|
|
|
-2026-05-25 13:52:50 | INFO | peft-platform | [253:ddc610b6] torch.compile is not available in Python 3.10, using identity decorator instead
|
|
|
|
|
-2026-05-25 13:52:50 | WARNING | peft-platform | [253:ddc610b6] /opt/conda/lib/python3.10/site-packages/torchvision/datapoints/__init__.py:12: UserWarning: The torchvision.datapoints and torchvision.transforms.v2 namespaces are still Beta. While we do not expect major breaking changes, some APIs may still change according to user feedback. Please submit any feedback you may have in this issue: https://github.com/pytorch/vision/issues/6753, and you can also check out https://github.com/pytorch/vision/issues/7319 to learn more about the APIs that we suspect might involve future changes. You can silence this warning by calling torchvision.disable_beta_transforms_warning().
|
|
|
|
|
-2026-05-25 13:52:50 | INFO | peft-platform | [253:ddc610b6] warnings.warn(_BETA_TRANSFORMS_WARNING)
|
|
|
|
|
-2026-05-25 13:52:50 | WARNING | peft-platform | [253:ddc610b6] /opt/conda/lib/python3.10/site-packages/torchvision/transforms/v2/__init__.py:54: UserWarning: The torchvision.datapoints and torchvision.transforms.v2 namespaces are still Beta. While we do not expect major breaking changes, some APIs may still change according to user feedback. Please submit any feedback you may have in this issue: https://github.com/pytorch/vision/issues/6753, and you can also check out https://github.com/pytorch/vision/issues/7319 to learn more about the APIs that we suspect might involve future changes. You can silence this warning by calling torchvision.disable_beta_transforms_warning().
|
|
|
|
|
-2026-05-25 13:52:50 | INFO | peft-platform | [253:ddc610b6] warnings.warn(_BETA_TRANSFORMS_WARNING)
|
|
|
|
|
-2026-05-25 13:52:50 | INFO | peft-platform | [253:ddc610b6] Loading weights: 0%| | 0/320 [00:00<?, ?it/s]torch.compile is not available in Python 3.10, using identity decorator instead
|
|
|
|
|
-2026-05-25 13:52:50 | WARNING | peft-platform | [253:ddc610b6] /opt/conda/lib/python3.10/site-packages/torchvision/datapoints/__init__.py:12: UserWarning: The torchvision.datapoints and torchvision.transforms.v2 namespaces are still Beta. While we do not expect major breaking changes, some APIs may still change according to user feedback. Please submit any feedback you may have in this issue: https://github.com/pytorch/vision/issues/6753, and you can also check out https://github.com/pytorch/vision/issues/7319 to learn more about the APIs that we suspect might involve future changes. You can silence this warning by calling torchvision.disable_beta_transforms_warning().
|
|
|
|
|
-2026-05-25 13:52:50 | INFO | peft-platform | [253:ddc610b6] warnings.warn(_BETA_TRANSFORMS_WARNING)
|
|
|
|
|
-2026-05-25 13:52:50 | WARNING | peft-platform | [253:ddc610b6] /opt/conda/lib/python3.10/site-packages/torchvision/transforms/v2/__init__.py:54: UserWarning: The torchvision.datapoints and torchvision.transforms.v2 namespaces are still Beta. While we do not expect major breaking changes, some APIs may still change according to user feedback. Please submit any feedback you may have in this issue: https://github.com/pytorch/vision/issues/6753, and you can also check out https://github.com/pytorch/vision/issues/7319 to learn more about the APIs that we suspect might involve future changes. You can silence this warning by calling torchvision.disable_beta_transforms_warning().
|
|
|
|
|
-2026-05-25 13:52:50 | INFO | peft-platform | [253:ddc610b6] warnings.warn(_BETA_TRANSFORMS_WARNING)
|
|
|
|
|
-2026-05-25 13:52:50 | INFO | peft-platform | [253:ddc610b6] Loading weights: 0%| | 0/320 [00:00<?, ?it/s]
|
|
|
|
|
-2026-05-25 13:52:50 | INFO | peft-platform | [253:ddc610b6] Loading weights: 0%| | 1/320 [00:03<18:22, 3.46s/it]
|
|
|
|
|
-2026-05-25 13:52:50 | INFO | peft-platform | [253:ddc610b6] Loading weights: 0%| | 1/320 [00:02<14:44, 2.77s/it]
|
|
|
|
|
-2026-05-25 13:52:50 | INFO | peft-platform | [253:ddc610b6] Loading weights: 2%|▎ | 8/320 [00:03<01:42, 3.05it/s]
|
|
|
|
|
-2026-05-25 13:52:50 | INFO | peft-platform | [253:ddc610b6] Loading weights: 2%|▎ | 8/320 [00:02<01:24, 3.71it/s]
|
|
|
|
|
-2026-05-25 13:52:50 | INFO | peft-platform | [253:ddc610b6] Loading weights: 4%|▍ | 12/320 [00:03<01:01, 5.00it/s]
|
|
|
|
|
-2026-05-25 13:52:50 | INFO | peft-platform | [253:ddc610b6] Loading weights: 6%|▌ | 18/320 [00:03<00:32, 9.35it/s]
|
|
|
|
|
-2026-05-25 13:52:50 | INFO | peft-platform | [253:ddc610b6] Loading weights: 7%|▋ | 21/320 [00:03<00:27, 10.90it/s]
|
|
|
|
|
-2026-05-25 13:52:50 | INFO | peft-platform | [253:ddc610b6] Loading weights: 8%|▊ | 24/320 [00:03<00:22, 13.08it/s]
|
|
|
|
|
-2026-05-25 13:52:50 | INFO | peft-platform | [253:ddc610b6] Loading weights: 8%|▊ | 27/320 [00:03<00:20, 14.49it/s]
|
|
|
|
|
-2026-05-25 13:52:50 | INFO | peft-platform | [253:ddc610b6] Loading weights: 11%|█▏ | 36/320 [00:04<00:12, 22.55it/s]
|
|
|
|
|
-2026-05-25 13:52:50 | INFO | peft-platform | [253:ddc610b6] Loading weights: 11%|█▏ | 36/320 [00:03<00:12, 22.72it/s]
|
|
|
|
|
-2026-05-25 13:52:50 | INFO | peft-platform | [253:ddc610b6] Loading weights: 13%|█▎ | 42/320 [00:03<00:10, 26.01it/s]
|
|
|
|
|
-2026-05-25 13:52:50 | INFO | peft-platform | [253:ddc610b6] Loading weights: 13%|█▎ | 42/320 [00:04<00:10, 26.05it/s]
|
|
|
|
|
-2026-05-25 13:52:50 | INFO | peft-platform | [253:ddc610b6] Loading weights: 15%|█▌ | 48/320 [00:03<00:09, 28.59it/s]
|
|
|
|
|
-2026-05-25 13:52:50 | INFO | peft-platform | [253:ddc610b6] Loading weights: 15%|█▌ | 48/320 [00:04<00:09, 28.81it/s]
|
|
|
|
|
-2026-05-25 13:52:50 | INFO | peft-platform | [253:ddc610b6] Loading weights: 19%|█▉ | 60/320 [00:03<00:06, 37.69it/s]
|
|
|
|
|
-2026-05-25 13:52:50 | INFO | peft-platform | [253:ddc610b6] Loading weights: 19%|█▉ | 60/320 [00:04<00:06, 38.28it/s]
|
|
|
|
|
-2026-05-25 13:52:50 | INFO | peft-platform | [253:ddc610b6] Loading weights: 21%|██ | 66/320 [00:04<00:06, 37.61it/s]
|
|
|
|
|
-2026-05-25 13:52:50 | INFO | peft-platform | [253:ddc610b6] Loading weights: 21%|██ | 66/320 [00:04<00:06, 38.34it/s]
|
|
|
|
|
-2026-05-25 13:52:50 | INFO | peft-platform | [253:ddc610b6] Loading weights: 23%|██▎ | 74/320 [00:04<00:05, 44.84it/s]
|
|
|
|
|
-2026-05-25 13:52:50 | INFO | peft-platform | [253:ddc610b6] Loading weights: 23%|██▎ | 75/320 [00:04<00:05, 45.16it/s]
|
|
|
|
|
-2026-05-25 13:52:50 | INFO | peft-platform | [253:ddc610b6] Loading weights: 25%|██▌ | 81/320 [00:04<00:05, 47.29it/s]
|
|
|
|
|
-2026-05-25 13:52:50 | INFO | peft-platform | [253:ddc610b6] Loading weights: 25%|██▌ | 80/320 [00:04<00:05, 44.48it/s]
|
|
|
|
|
-2026-05-25 13:52:50 | INFO | peft-platform | [253:ddc610b6] Loading weights: 28%|██▊ | 89/320 [00:05<00:04, 52.42it/s]
|
|
|
|
|
-2026-05-25 13:52:50 | INFO | peft-platform | [253:ddc610b6] Loading weights: 28%|██▊ | 88/320 [00:04<00:04, 49.24it/s]
|
|
|
|
|
-2026-05-25 13:52:50 | INFO | peft-platform | [253:ddc610b6] Loading weights: 30%|██▉ | 95/320 [00:05<00:04, 47.78it/s]
|
|
|
|
|
-2026-05-25 13:52:50 | INFO | peft-platform | [253:ddc610b6] Loading weights: 29%|██▉ | 94/320 [00:04<00:04, 46.23it/s]
|
|
|
|
|
-2026-05-25 13:52:50 | INFO | peft-platform | [253:ddc610b6] Loading weights: 31%|███▏ | 100/320 [00:04<00:04, 47.04it/s]
|
|
|
|
|
-2026-05-25 13:52:50 | INFO | peft-platform | [253:ddc610b6] Loading weights: 32%|███▏ | 101/320 [00:05<00:04, 47.55it/s]
|
|
|
|
|
-2026-05-25 13:52:50 | INFO | peft-platform | [253:ddc610b6] Loading weights: 36%|███▌ | 114/320 [00:04<00:04, 51.27it/s]
|
|
|
|
|
-2026-05-25 13:52:50 | INFO | peft-platform | [253:ddc610b6] Loading weights: 36%|███▌ | 114/320 [00:05<00:04, 50.08it/s]
|
|
|
|
|
-2026-05-25 13:52:50 | INFO | peft-platform | [253:ddc610b6] Loading weights: 38%|███▊ | 120/320 [00:05<00:04, 48.49it/s]
|
|
|
|
|
-2026-05-25 13:52:50 | INFO | peft-platform | [253:ddc610b6] Loading weights: 38%|███▊ | 120/320 [00:05<00:04, 47.46it/s]
|
|
|
|
|
-2026-05-25 13:52:50 | INFO | peft-platform | [253:ddc610b6] Loading weights: 40%|████ | 128/320 [00:05<00:03, 54.21it/s]
|
|
|
|
|
-2026-05-25 13:52:50 | INFO | peft-platform | [253:ddc610b6] Loading weights: 40%|████ | 128/320 [00:05<00:03, 50.73it/s]
|
|
|
|
|
-2026-05-25 13:52:50 | INFO | peft-platform | [253:ddc610b6] Loading weights: 42%|████▏ | 134/320 [00:05<00:03, 52.68it/s]
|
|
|
|
|
-2026-05-25 13:52:50 | INFO | peft-platform | [253:ddc610b6] Loading weights: 44%|████▍ | 141/320 [00:05<00:03, 55.06it/s]
|
|
|
|
|
-2026-05-25 13:52:50 | INFO | peft-platform | [253:ddc610b6] Loading weights: 44%|████▍ | 142/320 [00:06<00:03, 55.16it/s]
|
|
|
|
|
-2026-05-25 13:52:50 | INFO | peft-platform | [253:ddc610b6] Loading weights: 46%|████▌ | 147/320 [00:05<00:03, 47.98it/s]
|
|
|
|
|
-2026-05-25 13:52:50 | INFO | peft-platform | [253:ddc610b6] Loading weights: 46%|████▋ | 148/320 [00:06<00:03, 50.18it/s]
|
|
|
|
|
-2026-05-25 13:52:50 | INFO | peft-platform | [253:ddc610b6] Loading weights: 48%|████▊ | 153/320 [00:06<00:03, 45.32it/s]
|
|
|
|
|
-2026-05-25 13:52:50 | INFO | peft-platform | [253:ddc610b6] Loading weights: 48%|████▊ | 153/320 [00:05<00:03, 44.03it/s]
|
|
|
|
|
-2026-05-25 13:52:50 | INFO | peft-platform | [253:ddc610b6] Loading weights: 52%|█████▏ | 167/320 [00:05<00:03, 49.67it/s]
|
|
|
|
|
-2026-05-25 13:52:50 | INFO | peft-platform | [253:ddc610b6] Loading weights: 52%|█████▏ | 167/320 [00:06<00:03, 49.28it/s]
|
|
|
|
|
-2026-05-25 13:52:50 | INFO | peft-platform | [253:ddc610b6] Loading weights: 54%|█████▍ | 172/320 [00:06<00:03, 48.28it/s]
|
|
|
|
|
-2026-05-25 13:52:50 | INFO | peft-platform | [253:ddc610b6] Loading weights: 54%|█████▍ | 172/320 [00:06<00:03, 48.19it/s]
|
|
|
|
|
-2026-05-25 13:52:50 | INFO | peft-platform | [253:ddc610b6] Loading weights: 56%|█████▋ | 180/320 [00:06<00:02, 54.82it/s]
|
|
|
|
|
-2026-05-25 13:52:50 | INFO | peft-platform | [253:ddc610b6] Loading weights: 57%|█████▋ | 182/320 [00:06<00:02, 58.29it/s]
|
|
|
|
|
-2026-05-25 13:52:50 | INFO | peft-platform | [253:ddc610b6] Loading weights: 60%|██████ | 193/320 [00:07<00:02, 56.56it/s]
|
|
|
|
|
-2026-05-25 13:52:50 | INFO | peft-platform | [253:ddc610b6] Loading weights: 61%|██████ | 195/320 [00:06<00:02, 59.04it/s]
|
|
|
|
|
-2026-05-25 13:52:50 | INFO | peft-platform | [253:ddc610b6] Loading weights: 62%|██████▏ | 199/320 [00:07<00:02, 53.09it/s]
|
|
|
|
|
-2026-05-25 13:52:50 | INFO | peft-platform | [253:ddc610b6] Loading weights: 63%|██████▎ | 201/320 [00:06<00:02, 56.81it/s]
|
|
|
|
|
-2026-05-25 13:52:50 | INFO | peft-platform | [253:ddc610b6] Loading weights: 64%|██████▍ | 205/320 [00:07<00:02, 53.47it/s]
|
|
|
|
|
-2026-05-25 13:52:50 | INFO | peft-platform | [253:ddc610b6] Loading weights: 65%|██████▍ | 207/320 [00:06<00:01, 56.75it/s]
|
|
|
|
|
-2026-05-25 13:52:50 | INFO | peft-platform | [253:ddc610b6] Loading weights: 68%|██████▊ | 218/320 [00:07<00:01, 56.74it/s]
|
|
|
|
|
-2026-05-25 13:52:50 | INFO | peft-platform | [253:ddc610b6] Loading weights: 68%|██████▊ | 218/320 [00:06<00:01, 55.23it/s]
|
|
|
|
|
-2026-05-25 13:52:50 | INFO | peft-platform | [253:ddc610b6] Loading weights: 70%|███████ | 224/320 [00:06<00:01, 55.22it/s]
|
|
|
|
|
-2026-05-25 13:52:50 | INFO | peft-platform | [253:ddc610b6] Loading weights: 70%|███████ | 224/320 [00:07<00:01, 52.28it/s]
|
|
|
|
|
-2026-05-25 13:52:50 | INFO | peft-platform | [253:ddc610b6] Loading weights: 73%|███████▎ | 234/320 [00:07<00:01, 57.33it/s]
|
|
|
|
|
-2026-05-25 13:52:50 | INFO | peft-platform | [253:ddc610b6] Loading weights: 73%|███████▎ | 234/320 [00:07<00:01, 54.79it/s]
|
|
|
|
|
-2026-05-25 13:52:50 | INFO | peft-platform | [253:ddc610b6] Loading weights: 75%|███████▌ | 240/320 [00:07<00:01, 55.86it/s]
|
|
|
|
|
-2026-05-25 13:52:50 | INFO | peft-platform | [253:ddc610b6] Loading weights: 75%|███████▌ | 240/320 [00:07<00:01, 56.89it/s]
|
|
|
|
|
-2026-05-25 13:52:50 | INFO | peft-platform | [253:ddc610b6] Loading weights: 78%|███████▊ | 248/320 [00:08<00:01, 60.44it/s]
|
|
|
|
|
-2026-05-25 13:52:50 | INFO | peft-platform | [253:ddc610b6] Loading weights: 78%|███████▊ | 248/320 [00:07<00:01, 54.79it/s]
|
|
|
|
|
-2026-05-25 13:52:50 | INFO | peft-platform | [253:ddc610b6] Loading weights: 79%|███████▉ | 254/320 [00:07<00:01, 51.18it/s]
|
|
|
|
|
-2026-05-25 13:52:50 | INFO | peft-platform | [253:ddc610b6] Loading weights: 80%|███████▉ | 255/320 [00:08<00:01, 52.41it/s]
|
|
|
|
|
-2026-05-25 13:52:50 | INFO | peft-platform | [253:ddc610b6] Loading weights: 81%|████████▏ | 260/320 [00:07<00:01, 51.17it/s]
|
|
|
|
|
-2026-05-25 13:52:50 | INFO | peft-platform | [253:ddc610b6] Loading weights: 82%|████████▏ | 261/320 [00:08<00:01, 49.19it/s]
|
|
|
|
|
-2026-05-25 13:52:50 | INFO | peft-platform | [253:ddc610b6] Loading weights: 83%|████████▎ | 266/320 [00:07<00:01, 53.06it/s]
|
|
|
|
|
-2026-05-25 13:52:50 | INFO | peft-platform | [253:ddc610b6] Loading weights: 85%|████████▍ | 271/320 [00:08<00:00, 59.53it/s]
|
|
|
|
|
-2026-05-25 13:52:50 | INFO | peft-platform | [253:ddc610b6] Loading weights: 85%|████████▌ | 273/320 [00:07<00:00, 56.73it/s]
|
|
|
|
|
-2026-05-25 13:52:50 | INFO | peft-platform | [253:ddc610b6] Loading weights: 87%|████████▋ | 278/320 [00:08<00:00, 50.99it/s]
|
|
|
|
|
-2026-05-25 13:52:50 | INFO | peft-platform | [253:ddc610b6] Loading weights: 87%|████████▋ | 279/320 [00:08<00:00, 53.63it/s]
|
|
|
|
|
-2026-05-25 13:52:50 | INFO | peft-platform | [253:ddc610b6] Loading weights: 89%|████████▉ | 286/320 [00:08<00:00, 50.30it/s]
|
|
|
|
|
-2026-05-25 13:52:50 | INFO | peft-platform | [253:ddc610b6] Loading weights: 89%|████████▉ | 285/320 [00:08<00:00, 47.93it/s]
|
|
|
|
|
-2026-05-25 13:52:50 | INFO | peft-platform | [253:ddc610b6] Loading weights: 91%|█████████ | 290/320 [00:08<00:00, 43.37it/s]
|
|
|
|
|
-2026-05-25 13:52:50 | INFO | peft-platform | [253:ddc610b6] Loading weights: 91%|█████████▏| 292/320 [00:08<00:00, 47.45it/s]
|
|
|
|
|
-2026-05-25 13:52:50 | INFO | peft-platform | [253:ddc610b6] Loading weights: 94%|█████████▍| 301/320 [00:09<00:00, 54.23it/s]
|
|
|
|
|
-2026-05-25 13:52:50 | INFO | peft-platform | [253:ddc610b6] Loading weights: 93%|█████████▎| 298/320 [00:08<00:00, 49.52it/s]
|
|
|
|
|
-2026-05-25 13:52:50 | INFO | peft-platform | [253:ddc610b6] Loading weights: 95%|█████████▌| 304/320 [00:08<00:00, 50.90it/s]
|
|
|
|
|
-2026-05-25 13:52:50 | INFO | peft-platform | [253:ddc610b6] Loading weights: 96%|█████████▌| 307/320 [00:09<00:00, 48.82it/s]
|
|
|
|
|
-2026-05-25 13:52:50 | INFO | peft-platform | [253:ddc610b6] Loading weights: 97%|█████████▋| 310/320 [00:08<00:00, 47.50it/s]
|
|
|
|
|
-2026-05-25 13:52:50 | INFO | peft-platform | [253:ddc610b6] Loading weights: 98%|█████████▊| 313/320 [00:09<00:00, 51.02it/s]
|
|
|
|
|
-2026-05-25 13:52:50 | INFO | peft-platform | [253:ddc610b6] Loading weights: 100%|██████████| 320/320 [00:09<00:00, 33.79it/s]
|
|
|
|
|
-2026-05-25 13:52:50 | INFO | peft-platform | [253:ddc610b6] Loading weights: 100%|██████████| 320/320 [00:08<00:00, 36.43it/s]
|
|
|
|
|
-2026-05-25 13:52:50 | INFO | peft-platform | [253:ddc610b6] [remote_train] Model loaded successfully
|
|
|
|
|
-2026-05-25 13:52:50 | INFO | peft-platform | [253:ddc610b6] [remote_train] Step 3: Building PEFT config...
|
|
|
|
|
-2026-05-25 13:52:50 | INFO | peft-platform | [253:ddc610b6] [remote_train] Step 4: Starting training...
|
|
|
|
|
-2026-05-25 13:52:50 | INFO | peft-platform | [253:ddc610b6] [remote_train] NOTE: First step may take 2-5 minutes due to Triton kernel compilation (autotuning). This is normal.
|
|
|
|
|
-2026-05-25 13:52:50 | INFO | peft-platform | [253:ddc610b6] [remote_train] Total steps: 3 epochs, batch_size per GPU=64
|
|
|
|
|
-2026-05-25 13:52:50 | INFO | peft-platform | [253:ddc610b6] Map: 0%| | 0/60 [00:00<?, ? examples/s]
|
|
|
|
|
-2026-05-25 13:52:50 | INFO | peft-platform | [253:ddc610b6] Map: 100%|██████████| 60/60 [00:00<00:00, 2242.42 examples/s]
|
|
|
|
|
-2026-05-25 13:52:50 | WARNING | peft-platform | [253:ddc610b6] /opt/conda/lib/python3.10/site-packages/peft/tuners/tuners_utils.py:1348: UserWarning: Model has `tie_word_embeddings=True` and a tied layer is part of the adapter, but `ensure_weight_tying` is not set to True. This can lead to complications, for example when merging the adapter or converting your model to formats other than safetensors. Check the discussion here: https://github.com/huggingface/peft/issues/2777
|
|
|
|
|
-2026-05-25 13:52:50 | INFO | peft-platform | [253:ddc610b6] warnings.warn(msg)
|
|
|
|
|
-2026-05-25 13:52:50 | INFO | peft-platform | [253:ddc610b6] bitsandbytes library load error: Configured CUDA binary not found at /opt/conda/lib/python3.10/site-packages/bitsandbytes/libbitsandbytes_cuda116.so
|
|
|
|
|
-2026-05-25 13:52:50 | INFO | peft-platform | [253:ddc610b6] Traceback (most recent call last):
|
|
|
|
|
-2026-05-25 13:52:50 | INFO | peft-platform | [253:ddc610b6] File "/opt/conda/lib/python3.10/site-packages/bitsandbytes/cextension.py", line 320, in <module>
|
|
|
|
|
-2026-05-25 13:52:50 | INFO | peft-platform | [253:ddc610b6] lib = get_native_library()
|
|
|
|
|
-2026-05-25 13:52:50 | INFO | peft-platform | [253:ddc610b6] File "/opt/conda/lib/python3.10/site-packages/bitsandbytes/cextension.py", line 288, in get_native_library
|
|
|
|
|
-2026-05-25 13:52:50 | ERROR | peft-platform | [253:ddc610b6] raise RuntimeError(f"Configured {BNB_BACKEND} binary not found at {cuda_binary_path}")
|
|
|
|
|
-2026-05-25 13:52:50 | ERROR | peft-platform | [253:ddc610b6] RuntimeError: Configured CUDA binary not found at /opt/conda/lib/python3.10/site-packages/bitsandbytes/libbitsandbytes_cuda116.so
|
|
|
|
|
-2026-05-25 13:52:50 | INFO | peft-platform | [253:ddc610b6] trainable params: 2,535,624 || all params: 754,928,673 || trainable%: 0.3359
|
|
|
|
|
-2026-05-25 13:52:50 | WARNING | peft-platform | [253:ddc610b6] [transformers] warmup_ratio is deprecated and will be removed in v5.2. Use `warmup_steps` instead.
|
|
|
|
|
-2026-05-25 13:52:50 | INFO | peft-platform | [253:ddc610b6] Map: 0%| | 0/60 [00:00<?, ? examples/s]
|
|
|
|
|
-2026-05-25 13:52:50 | INFO | peft-platform | [253:ddc610b6] Map: 100%|██████████| 60/60 [00:00<00:00, 1935.52 examples/s]
|
|
|
|
|
-2026-05-25 13:52:50 | WARNING | peft-platform | [253:ddc610b6] /opt/conda/lib/python3.10/site-packages/peft/tuners/tuners_utils.py:1348: UserWarning: Model has `tie_word_embeddings=True` and a tied layer is part of the adapter, but `ensure_weight_tying` is not set to True. This can lead to complications, for example when merging the adapter or converting your model to formats other than safetensors. Check the discussion here: https://github.com/huggingface/peft/issues/2777
|
|
|
|
|
-2026-05-25 13:52:50 | INFO | peft-platform | [253:ddc610b6] warnings.warn(msg)
|
|
|
|
|
-2026-05-25 13:52:50 | INFO | peft-platform | [253:ddc610b6] bitsandbytes library load error: Configured CUDA binary not found at /opt/conda/lib/python3.10/site-packages/bitsandbytes/libbitsandbytes_cuda116.so
|
|
|
|
|
-2026-05-25 13:52:50 | INFO | peft-platform | [253:ddc610b6] Traceback (most recent call last):
|
|
|
|
|
-2026-05-25 13:52:50 | INFO | peft-platform | [253:ddc610b6] File "/opt/conda/lib/python3.10/site-packages/bitsandbytes/cextension.py", line 320, in <module>
|
|
|
|
|
-2026-05-25 13:52:50 | INFO | peft-platform | [253:ddc610b6] lib = get_native_library()
|
|
|
|
|
-2026-05-25 13:52:50 | INFO | peft-platform | [253:ddc610b6] File "/opt/conda/lib/python3.10/site-packages/bitsandbytes/cextension.py", line 288, in get_native_library
|
|
|
|
|
-2026-05-25 13:52:50 | ERROR | peft-platform | [253:ddc610b6] raise RuntimeError(f"Configured {BNB_BACKEND} binary not found at {cuda_binary_path}")
|
|
|
|
|
-2026-05-25 13:52:50 | ERROR | peft-platform | [253:ddc610b6] RuntimeError: Configured CUDA binary not found at /opt/conda/lib/python3.10/site-packages/bitsandbytes/libbitsandbytes_cuda116.so
|
|
|
|
|
-2026-05-25 13:52:50 | INFO | peft-platform | [253:ddc610b6] trainable params: 2,535,624 || all params: 754,928,673 || trainable%: 0.3359
|
|
|
|
|
-2026-05-25 13:52:50 | WARNING | peft-platform | [253:ddc610b6] [transformers] warmup_ratio is deprecated and will be removed in v5.2. Use `warmup_steps` instead.
|
|
|
|
|
-2026-05-25 13:52:50 | INFO | peft-platform | [253:ddc610b6] 0%| | 0/1 [00:00<?, ?it/s]64,39,16,128,128,64,64,1,None
|
|
|
|
|
-2026-05-25 13:52:50 | INFO | peft-platform | [253:ddc610b6] 64,39,16,128,128,64,64,1,None
|
|
|
|
|
-2026-05-25 13:52:50 | INFO | peft-platform | [253:ddc610b6] 64,39,16,128,128,64,64,1,None
|
|
|
|
|
-2026-05-25 13:52:50 | INFO | peft-platform | [253:ddc610b6] 64,39,16,128,128,64,64,1,None
|
|
|
|
|
-2026-05-25 13:52:50 | INFO | peft-platform | [253:ddc610b6] 64,39,16,128,128,64,64,1,None
|
|
|
|
|
-2026-05-25 13:52:50 | INFO | peft-platform | [253:ddc610b6] 64,39,16,128,128,64,64,1,None
|
|
|
|
|
-2026-05-25 13:52:50 | INFO | peft-platform | [253:ddc610b6] 64,39,16,128,128,64,64,1,None
|
|
|
|
|
-2026-05-25 13:52:50 | INFO | peft-platform | [253:ddc610b6] 64,39,16,128,128,64,64,1,None
|
|
|
|
|
-2026-05-25 13:52:50 | INFO | peft-platform | [253:ddc610b6] 64,39,16,128,128,64,64,1,None
|
|
|
|
|
-2026-05-25 13:52:50 | INFO | peft-platform | [253:ddc610b6] 64,39,16,128,128,64,64,1,None
|
|
|
|
|
-2026-05-25 13:52:50 | INFO | peft-platform | [253:ddc610b6] 64,39,16,128,128,64,64,1,None
|
|
|
|
|
-2026-05-25 13:52:50 | INFO | peft-platform | [253:ddc610b6] 64,39,16,128,128,64,64,1,None
|
|
|
|
|
-2026-05-25 13:52:50 | INFO | peft-platform | [253:ddc610b6] 64,39,16,128,128,64,64,1,None
|
|
|
|
|
-2026-05-25 13:52:50 | INFO | peft-platform | [253:ddc610b6] 64,39,16,128,128,64,64,1,None
|
|
|
|
|
-2026-05-25 13:52:50 | INFO | peft-platform | [253:ddc610b6] 64,39,16,128,128,64,64,1,None
|
|
|
|
|
-2026-05-25 13:52:50 | INFO | peft-platform | [253:ddc610b6] 64,39,16,128,128,64,64,1,None
|
|
|
|
|
-2026-05-25 13:52:50 | INFO | peft-platform | [253:ddc610b6] 64,39,16,128,128,64,64,1,None
|
|
|
|
|
-2026-05-25 13:52:50 | INFO | peft-platform | [253:ddc610b6] 64,39,16,128,128,64,64,1,None
|
|
|
|
|
-2026-05-25 13:52:50 | INFO | peft-platform | [253:ddc610b6] 64,39,16,128,128,64,64,1,None
|
|
|
|
|
-2026-05-25 13:52:50 | INFO | peft-platform | [253:ddc610b6] 64,39,16,128,128,64,64,1,None
|
|
|
|
|
-2026-05-25 13:52:50 | INFO | peft-platform | [253:ddc610b6] 64,39,16,128,128,64,64,1,None
|
|
|
|
|
-2026-05-25 13:52:50 | INFO | peft-platform | [253:ddc610b6] 64,39,16,128,128,64,64,1,None
|
|
|
|
|
-2026-05-25 13:52:50 | INFO | peft-platform | [253:ddc610b6] 64,39,16,128,128,64,64,1,None
|
|
|
|
|
-2026-05-25 13:52:50 | INFO | peft-platform | [253:ddc610b6] 64,39,16,128,128,64,64,1,None
|
|
|
|
|
-2026-05-25 13:52:50 | INFO | peft-platform | [253:ddc610b6] 64,39,16,128,128,64,64,1,None
|
|
|
|
|
-2026-05-25 13:52:50 | INFO | peft-platform | [253:ddc610b6] 64,39,16,128,128,64,64,1,None
|
|
|
|
|
-2026-05-25 13:52:50 | INFO | peft-platform | [253:ddc610b6] 64,39,16,128,128,64,64,1,None
|
|
|
|
|
-2026-05-25 13:52:50 | INFO | peft-platform | [253:ddc610b6] 64,39,16,128,128,64,64,1,None
|
|
|
|
|
-2026-05-25 13:52:50 | INFO | peft-platform | [253:ddc610b6] 64,39,16,128,128,64,64,1,None
|
|
|
|
|
-2026-05-25 13:52:50 | INFO | peft-platform | [253:ddc610b6] 64,39,16,128,128,64,64,1,None
|
|
|
|
|
-2026-05-25 13:52:50 | INFO | peft-platform | [253:ddc610b6] 64,39,16,128,128,64,64,1,None
|
|
|
|
|
-2026-05-25 13:52:50 | INFO | peft-platform | [253:ddc610b6] 64,39,16,128,128,64,64,1,None
|
|
|
|
|
-2026-05-25 13:52:50 | INFO | peft-platform | [253:ddc610b6] 64,39,16,128,128,64,64,1,None
|
|
|
|
|
-2026-05-25 13:52:50 | INFO | peft-platform | [253:ddc610b6] 64,39,16,128,128,64,64,1,None
|
|
|
|
|
-2026-05-25 13:52:50 | INFO | peft-platform | [253:ddc610b6] 64,39,16,128,128,64,64,1,None
|
|
|
|
|
-2026-05-25 13:52:50 | INFO | peft-platform | [253:ddc610b6] 64,39,16,128,128,64,64,1,None
|
|
|
|
|
-2026-05-25 13:52:50 | INFO | peft-platform | [253:ddc610b6] 64,39,16,128,128,64,64,1,None
|
|
|
|
|
-2026-05-25 13:52:50 | INFO | peft-platform | [253:ddc610b6] 64,39,16,128,128,64,64,1,None
|
|
|
|
|
-2026-05-25 13:52:50 | INFO | peft-platform | [253:ddc610b6] 64,39,16,128,128,64,64,1,None
|
|
|
|
|
-2026-05-25 13:52:50 | INFO | peft-platform | [253:ddc610b6] 64,39,16,128,128,64,64,1,None
|
|
|
|
|
-2026-05-25 13:52:50 | INFO | peft-platform | [253:ddc610b6] 64,39,16,128,128,64,64,1,None
|
|
|
|
|
-2026-05-25 13:52:50 | INFO | peft-platform | [253:ddc610b6] 64,39,16,128,128,64,64,1,None
|
|
|
|
|
-2026-05-25 13:52:50 | INFO | peft-platform | [253:ddc610b6] 64,39,16,128,128,64,64,1,None
|
|
|
|
|
-2026-05-25 13:52:50 | INFO | peft-platform | [253:ddc610b6] 64,39,16,128,128,64,64,1,None
|
|
|
|
|
-2026-05-25 13:52:50 | INFO | peft-platform | [253:ddc610b6] 64,39,16,128,128,64,64,1,None
|
|
|
|
|
-2026-05-25 13:52:50 | INFO | peft-platform | [253:ddc610b6] 64,39,16,128,128,64,64,1,None
|
|
|
|
|
-2026-05-25 13:52:50 | INFO | peft-platform | [253:ddc610b6] 64,39,16,128,128,64,64,1,None
|
|
|
|
|
-2026-05-25 13:52:50 | INFO | peft-platform | [253:ddc610b6] 64,39,16,128,128,64,64,1,None
|
|
|
|
|
-2026-05-25 13:52:50 | INFO | peft-platform | [253:ddc610b6] 64,39,16,128,128,64,64,1,None
|
|
|
|
|
-2026-05-25 13:52:50 | INFO | peft-platform | [253:ddc610b6] 64,39,16,128,128,64,64,1,None
|
|
|
|
|
-2026-05-25 13:52:50 | INFO | peft-platform | [253:ddc610b6] 64,39,16,128,128,64,64,1,None
|
|
|
|
|
-2026-05-25 13:52:50 | INFO | peft-platform | [253:ddc610b6] 64,39,16,128,128,64,64,1,None
|
|
|
|
|
-2026-05-25 13:52:50 | INFO | peft-platform | [253:ddc610b6] 64,39,16,128,128,64,64,1,None
|
|
|
|
|
-2026-05-25 13:52:50 | INFO | peft-platform | [253:ddc610b6] 64,39,16,128,128,64,64,1,None
|
|
|
|
|
-2026-05-25 13:52:50 | INFO | peft-platform | [253:ddc610b6] 64,39,16,128,128,64,64,1,None
|
|
|
|
|
-2026-05-25 13:52:50 | INFO | peft-platform | [253:ddc610b6] 64,39,16,128,128,64,64,1,None
|
|
|
|
|
-2026-05-25 13:52:50 | INFO | peft-platform | [253:ddc610b6] 64,39,16,128,128,64,64,1,None
|
|
|
|
|
-2026-05-25 13:52:50 | INFO | peft-platform | [253:ddc610b6] 64,39,16,128,128,64,64,1,None
|
|
|
|
|
-2026-05-25 13:52:50 | INFO | peft-platform | [253:ddc610b6] 64,39,16,128,128,64,64,1,None
|
|
|
|
|
-2026-05-25 13:52:50 | INFO | peft-platform | [253:ddc610b6] 64,39,16,128,128,64,64,1,None
|
|
|
|
|
-2026-05-25 13:52:50 | INFO | peft-platform | [253:ddc610b6] 64,39,16,128,128,64,64,1,None
|
|
|
|
|
-2026-05-25 13:52:50 | INFO | peft-platform | [253:ddc610b6] 64,39,16,128,128,64,64,1,None
|
|
|
|
|
-2026-05-25 13:52:50 | INFO | peft-platform | [253:ddc610b6] 64,39,16,128,128,64,64,1,None
|
|
|
|
|
-2026-05-25 13:52:50 | INFO | peft-platform | [253:ddc610b6] 64,39,16,128,128,64,64,1,None
|
|
|
|
|
-2026-05-25 13:52:50 | INFO | peft-platform | [253:ddc610b6] 64,39,16,128,128,64,64,1,None
|
|
|
|
|
-2026-05-25 13:52:50 | INFO | peft-platform | [253:ddc610b6] 64,39,16,128,128,64,64,1,None
|
|
|
|
|
-2026-05-25 13:52:50 | INFO | peft-platform | [253:ddc610b6] 64,39,16,128,128,64,64,1,None
|
|
|
|
|
-2026-05-25 13:52:50 | INFO | peft-platform | [253:ddc610b6] 64,39,16,128,128,64,64,1,None
|
|
|
|
|
-2026-05-25 13:52:50 | INFO | peft-platform | [253:ddc610b6] 64,39,16,128,128,64,64,1,None
|
|
|
|
|
-2026-05-25 13:52:50 | INFO | peft-platform | [253:ddc610b6] 64,39,16,128,128,64,64,1,None
|
|
|
|
|
-2026-05-25 13:52:50 | INFO | peft-platform | [253:ddc610b6] 64,39,16,128,128,64,64,1,None
|
|
|
|
|
-2026-05-25 13:52:50 | INFO | peft-platform | [253:ddc610b6] 64,39,16,128,128,64,64,1,None
|
|
|
|
|
-2026-05-25 13:52:50 | INFO | peft-platform | [253:ddc610b6] 64,39,16,128,128,64,64,1,None
|
|
|
|
|
-2026-05-25 13:52:50 | INFO | peft-platform | [253:ddc610b6] 64,39,16,128,128,64,64,1,None
|
|
|
|
|
-2026-05-25 13:52:50 | INFO | peft-platform | [253:ddc610b6] 64,39,16,128,128,64,64,1,None
|
|
|
|
|
-2026-05-25 13:52:50 | INFO | peft-platform | [253:ddc610b6] 64,39,16,128,128,64,64,1,None
|
|
|
|
|
-2026-05-25 13:52:50 | INFO | peft-platform | [253:ddc610b6] 64,39,16,128,128,64,64,1,None
|
|
|
|
|
-2026-05-25 13:52:50 | INFO | peft-platform | [253:ddc610b6] 64,39,16,128,128,64,64,1,None
|
|
|
|
|
-2026-05-25 13:52:50 | INFO | peft-platform | [253:ddc610b6] 64,39,16,128,128,64,64,1,None
|
|
|
|
|
-2026-05-25 13:52:50 | INFO | peft-platform | [253:ddc610b6] 64,39,16,128,128,64,64,1,None
|
|
|
|
|
-2026-05-25 13:52:50 | INFO | peft-platform | [253:ddc610b6] 64,39,16,128,128,64,64,1,None
|
|
|
|
|
-2026-05-25 13:52:50 | INFO | peft-platform | [253:ddc610b6] 64,39,16,128,128,64,64,1,None
|
|
|
|
|
-2026-05-25 13:52:50 | INFO | peft-platform | [253:ddc610b6] 64,39,16,128,128,64,64,1,None
|
|
|
|
|
-2026-05-25 13:52:50 | INFO | peft-platform | [253:ddc610b6] 64,39,16,128,128,64,64,1,None
|
|
|
|
|
-2026-05-25 13:52:50 | INFO | peft-platform | [253:ddc610b6] 64,39,16,128,128,64,64,1,None
|
|
|
|
|
-2026-05-25 13:52:50 | INFO | peft-platform | [253:ddc610b6] 64,39,16,128,128,64,64,1,None
|
|
|
|
|
-2026-05-25 13:52:50 | INFO | peft-platform | [253:ddc610b6] 64,39,16,128,128,64,64,1,None
|
|
|
|
|
-2026-05-25 13:52:50 | INFO | peft-platform | [253:ddc610b6] 64,39,16,128,128,64,64,1,None
|
|
|
|
|
-2026-05-25 13:52:50 | INFO | peft-platform | [253:ddc610b6] 64,39,16,128,128,64,64,1,None
|
|
|
|
|
-2026-05-25 13:52:50 | INFO | peft-platform | [253:ddc610b6] 64,39,16,128,128,64,64,1,None
|
|
|
|
|
-2026-05-25 13:52:50 | INFO | peft-platform | [253:ddc610b6] 64,39,16,128,128,64,64,1,None
|
|
|
|
|
-2026-05-25 13:52:50 | INFO | peft-platform | [253:ddc610b6] 64,39,16,128,128,64,64,1,None
|
|
|
|
|
-2026-05-25 13:52:50 | INFO | peft-platform | [253:ddc610b6] 64,39,16,128,128,64,64,1,None
|
|
|
|
|
-2026-05-25 13:52:50 | INFO | peft-platform | [253:ddc610b6] 64,39,16,128,128,64,64,1,None
|
|
|
|
|
-2026-05-25 13:52:50 | INFO | peft-platform | [253:ddc610b6] 64,39,16,128,128,64,64,1,None
|
|
|
|
|
-2026-05-25 13:52:50 | INFO | peft-platform | [253:ddc610b6] 64,39,16,128,128,64,64,1,None
|
|
|
|
|
-2026-05-25 13:52:50 | INFO | peft-platform | [253:ddc610b6] 64,39,16,128,128,64,64,1,None
|
|
|
|
|
-2026-05-25 13:52:50 | INFO | peft-platform | [253:ddc610b6] 64,39,16,128,128,64,64,1,None
|
|
|
|
|
-2026-05-25 13:52:50 | INFO | peft-platform | [253:ddc610b6] 64,39,16,128,128,64,64,1,None
|
|
|
|
|
-2026-05-25 13:52:50 | INFO | peft-platform | [253:ddc610b6] 64,39,16,128,128,64,64,1,None
|
|
|
|
|
-2026-05-25 13:52:50 | INFO | peft-platform | [253:ddc610b6] 64,39,16,128,128,64,64,1,None
|
|
|
|
|
-2026-05-25 13:52:50 | INFO | peft-platform | [253:ddc610b6] 64,39,16,128,128,64,64,1,None
|
|
|
|
|
-2026-05-25 13:52:50 | INFO | peft-platform | [253:ddc610b6] 64,39,16,128,128,64,64,1,None
|
|
|
|
|
-2026-05-25 13:52:50 | INFO | peft-platform | [253:ddc610b6] 64,39,16,128,128,64,64,1,None
|
|
|
|
|
-2026-05-25 13:52:50 | INFO | peft-platform | [253:ddc610b6] 64,39,16,128,128,64,64,1,None
|
|
|
|
|
-2026-05-25 13:52:50 | INFO | peft-platform | [253:ddc610b6] 64,39,16,128,128,64,64,1,None
|
|
|
|
|
-2026-05-25 13:52:50 | INFO | peft-platform | [253:ddc610b6] 64,39,16,128,128,64,64,1,None
|
|
|
|
|
-2026-05-25 13:52:50 | WARNING | peft-platform | [253:ddc610b6] /opt/conda/lib/python3.10/site-packages/torch/autograd/graph.py:829: UserWarning: Attempting to run cuBLAS, but there was no current CUDA context! Attempting to set the primary context... (Triggered internally at /workspace/framework/mcPytorch/aten/src/ATen/cuda/CublasHandlePool.cpp:183.)
|
|
|
|
|
-2026-05-25 13:52:50 | INFO | peft-platform | [253:ddc610b6] return Variable._execution_engine.run_backward( # Calls into the C++ engine to run the backward pass
|
|
|
|
|
-2026-05-25 13:52:50 | INFO | peft-platform | [253:ddc610b6] 64,39,16,128,128,64,64,1,None
|
|
|
|
|
-2026-05-25 13:52:50 | INFO | peft-platform | [253:ddc610b6] Training failed for job ddc610b6-d872-466c-b382-3c9bfd6df12a: '_ProgressCallback' object has no attribute 'on_pre_optimizer_step'
|
|
|
|
|
-2026-05-25 13:52:50 | INFO | peft-platform | [253:ddc610b6] [remote_train] [rank 1] ERROR: '_ProgressCallback' object has no attribute 'on_pre_optimizer_step'
|
|
|
|
|
-2026-05-25 13:52:50 | INFO | peft-platform | [253:ddc610b6] [remote_train] Traceback (most recent call last):
|
|
|
|
|
-2026-05-25 13:52:50 | INFO | peft-platform | [253:ddc610b6] File "/root/Fine-tuning/backend/app/engines/remote_train.py", line 236, in run_training
|
|
|
|
|
-2026-05-25 13:52:50 | INFO | peft-platform | [253:ddc610b6] adapter_path = await engine.train(
|
|
|
|
|
-2026-05-25 13:52:50 | INFO | peft-platform | [253:ddc610b6] File "/root/Fine-tuning/backend/app/engines/text_engine.py", line 394, in train
|
|
|
|
|
-2026-05-25 13:52:50 | INFO | peft-platform | [253:ddc610b6] trainer.train()
|
|
|
|
|
-2026-05-25 13:52:50 | INFO | peft-platform | [253:ddc610b6] File "/opt/conda/lib/python3.10/site-packages/transformers/trainer.py", line 1427, in train
|
|
|
|
|
-2026-05-25 13:52:50 | INFO | peft-platform | [253:ddc610b6] return inner_training_loop(
|
|
|
|
|
-2026-05-25 13:52:50 | INFO | peft-platform | [253:ddc610b6] File "/opt/conda/lib/python3.10/site-packages/transformers/trainer.py", line 1509, in _inner_training_loop
|
|
|
|
|
-2026-05-25 13:52:50 | INFO | peft-platform | [253:ddc610b6] self._run_epoch(
|
|
|
|
|
-2026-05-25 13:52:50 | INFO | peft-platform | [253:ddc610b6] File "/opt/conda/lib/python3.10/site-packages/transformers/trainer.py", line 1762, in _run_epoch
|
|
|
|
|
-2026-05-25 13:52:50 | INFO | peft-platform | [253:ddc610b6] self.control = self.callback_handler.on_pre_optimizer_step(self.args, self.state, self.control)
|
|
|
|
|
-2026-05-25 13:52:50 | INFO | peft-platform | [253:ddc610b6] File "/opt/conda/lib/python3.10/site-packages/transformers/trainer_callback.py", line 511, in on_pre_optimizer_step
|
|
|
|
|
-2026-05-25 13:52:50 | INFO | peft-platform | [253:ddc610b6] return self.call_event("on_pre_optimizer_step", args, state, control, **kwargs)
|
|
|
|
|
-2026-05-25 13:52:50 | INFO | peft-platform | [253:ddc610b6] File "/opt/conda/lib/python3.10/site-packages/transformers/trainer_callback.py", line 545, in call_event
|
|
|
|
|
-2026-05-25 13:52:50 | INFO | peft-platform | [253:ddc610b6] result = getattr(callback, event)(
|
|
|
|
|
-2026-05-25 13:52:50 | ERROR | peft-platform | [253:ddc610b6] AttributeError: '_ProgressCallback' object has no attribute 'on_pre_optimizer_step'
|
|
|
|
|
-2026-05-25 13:52:50 | INFO | peft-platform | [253:ddc610b6] [remote_train] Step 1/1 done (epoch 1.00)
|
|
|
|
|
-2026-05-25 13:52:50 | INFO | peft-platform | [253:ddc610b6] [rank1]: Traceback (most recent call last):
|
|
|
|
|
-2026-05-25 13:52:50 | INFO | peft-platform | [253:ddc610b6] [rank1]: File "/opt/conda/lib/python3.10/runpy.py", line 196, in _run_module_as_main
|
|
|
|
|
-2026-05-25 13:52:50 | INFO | peft-platform | [253:ddc610b6] [rank1]: return _run_code(code, main_globals, None,
|
|
|
|
|
-2026-05-25 13:52:50 | INFO | peft-platform | [253:ddc610b6] [rank1]: File "/opt/conda/lib/python3.10/runpy.py", line 86, in _run_code
|
|
|
|
|
-2026-05-25 13:52:50 | INFO | peft-platform | [253:ddc610b6] [rank1]: exec(code, run_globals)
|
|
|
|
|
-2026-05-25 13:52:50 | INFO | peft-platform | [253:ddc610b6] [rank1]: File "/root/Fine-tuning/backend/app/engines/remote_train.py", line 466, in <module>
|
|
|
|
|
-2026-05-25 13:52:50 | INFO | peft-platform | [253:ddc610b6] [rank1]: main()
|
|
|
|
|
-2026-05-25 13:52:50 | INFO | peft-platform | [253:ddc610b6] [rank1]: File "/root/Fine-tuning/backend/app/engines/remote_train.py", line 461, in main
|
|
|
|
|
-2026-05-25 13:52:50 | INFO | peft-platform | [253:ddc610b6] [rank1]: asyncio.run(run_training(job_id, model_id, model_type, dataset_id, config,
|
|
|
|
|
-2026-05-25 13:52:50 | INFO | peft-platform | [253:ddc610b6] [rank1]: File "/opt/conda/lib/python3.10/asyncio/runners.py", line 44, in run
|
|
|
|
|
-2026-05-25 13:52:50 | INFO | peft-platform | [253:ddc610b6] [rank1]: return loop.run_until_complete(main)
|
|
|
|
|
-2026-05-25 13:52:50 | INFO | peft-platform | [253:ddc610b6] [rank1]: File "/opt/conda/lib/python3.10/asyncio/base_events.py", line 649, in run_until_complete
|
|
|
|
|
-2026-05-25 13:52:50 | INFO | peft-platform | [253:ddc610b6] [rank1]: return future.result()
|
|
|
|
|
-2026-05-25 13:52:50 | INFO | peft-platform | [253:ddc610b6] [rank1]: File "/root/Fine-tuning/backend/app/engines/remote_train.py", line 236, in run_training
|
|
|
|
|
-2026-05-25 13:52:50 | INFO | peft-platform | [253:ddc610b6] [rank1]: adapter_path = await engine.train(
|
|
|
|
|
-2026-05-25 13:52:50 | INFO | peft-platform | [253:ddc610b6] [rank1]: File "/root/Fine-tuning/backend/app/engines/text_engine.py", line 394, in train
|
|
|
|
|
-2026-05-25 13:52:50 | INFO | peft-platform | [253:ddc610b6] [rank1]: trainer.train()
|
|
|
|
|
-2026-05-25 13:52:50 | INFO | peft-platform | [253:ddc610b6] [rank1]: File "/opt/conda/lib/python3.10/site-packages/transformers/trainer.py", line 1427, in train
|
|
|
|
|
-2026-05-25 13:52:50 | INFO | peft-platform | [253:ddc610b6] [rank1]: return inner_training_loop(
|
|
|
|
|
-2026-05-25 13:52:50 | INFO | peft-platform | [253:ddc610b6] [rank1]: File "/opt/conda/lib/python3.10/site-packages/transformers/trainer.py", line 1509, in _inner_training_loop
|
|
|
|
|
-2026-05-25 13:52:50 | INFO | peft-platform | [253:ddc610b6] [rank1]: self._run_epoch(
|
|
|
|
|
-2026-05-25 13:52:50 | INFO | peft-platform | [253:ddc610b6] [rank1]: File "/opt/conda/lib/python3.10/site-packages/transformers/trainer.py", line 1762, in _run_epoch
|
|
|
|
|
-2026-05-25 13:52:50 | INFO | peft-platform | [253:ddc610b6] [rank1]: self.control = self.callback_handler.on_pre_optimizer_step(self.args, self.state, self.control)
|
|
|
|
|
-2026-05-25 13:52:50 | INFO | peft-platform | [253:ddc610b6] [rank1]: File "/opt/conda/lib/python3.10/site-packages/transformers/trainer_callback.py", line 511, in on_pre_optimizer_step
|
|
|
|
|
-2026-05-25 13:52:50 | INFO | peft-platform | [253:ddc610b6] [rank1]: return self.call_event("on_pre_optimizer_step", args, state, control, **kwargs)
|
|
|
|
|
-2026-05-25 13:52:50 | INFO | peft-platform | [253:ddc610b6] [rank1]: File "/opt/conda/lib/python3.10/site-packages/transformers/trainer_callback.py", line 545, in call_event
|
|
|
|
|
-2026-05-25 13:52:50 | INFO | peft-platform | [253:ddc610b6] [rank1]: result = getattr(callback, event)(
|
|
|
|
|
-2026-05-25 13:52:50 | ERROR | peft-platform | [253:ddc610b6] [rank1]: AttributeError: '_ProgressCallback' object has no attribute 'on_pre_optimizer_step'
|
|
|
|
|
-2026-05-25 13:52:50 | INFO | peft-platform | [253:ddc610b6] 100%|██████████| 1/1 [00:35<00:00, 35.50s/it]
|
|
|
|
|
-INFO: 127.0.0.1:52106 - "GET /health HTTP/1.1" 200 OK
|
|
|
|
|
|
|
+lq@lq:~/Fine-tuning$ curl -s http://localhost:8000/api/v1/deployment/c43bf8e3-d92d-4266-be19-0212c3e1b6 21/status | python3 -m json.tool
|
|
|
|
|
+b^HExpecting value: line 1 column 1 (char 0)
|
|
|
|
|
+lq@lq:~/Fine-tuning$ sudo docker logs -f finetune-backend
|
|
|
|
|
+[sudo] password for lq:
|
|
|
|
|
+=> Syncing backend code to compute node 192.168.91.253 ...
|
|
|
|
|
+Warning: Permanently added '192.168.91.253' (ED25519) to the list of known hosts.
|
|
|
|
|
+sending incremental file list
|
|
|
|
|
+./
|
|
|
|
|
+.dockerignore
|
|
|
|
|
+.env.docker
|
|
|
|
|
+.env.example
|
|
|
|
|
+.python-version
|
|
|
|
|
+Dockerfile
|
|
|
|
|
+entrypoint.sh
|
|
|
|
|
+main.py
|
|
|
|
|
+pyproject.toml
|
|
|
|
|
+requirements.txt
|
|
|
|
|
+app/__init__.py
|
|
|
|
|
+app/config.py
|
|
|
|
|
+app/__pycache__/__init__.cpython-310.pyc
|
|
|
|
|
+app/__pycache__/config.cpython-310.pyc
|
|
|
|
|
+app/api/
|
|
|
|
|
+app/api/__init__.py
|
|
|
|
|
+app/api/api_keys.py
|
|
|
|
|
+app/api/auth.py
|
|
|
|
|
+app/api/datasets.py
|
|
|
|
|
+app/api/deployment.py
|
|
|
|
|
+app/api/evaluation.py
|
|
|
|
|
+app/api/inference.py
|
|
|
|
|
+app/api/models.py
|
|
|
|
|
+app/api/sample_center.py
|
|
|
|
|
+app/api/training.py
|
|
|
|
|
+app/api/__pycache__/
|
|
|
|
|
+app/api/__pycache__/__init__.cpython-310.pyc
|
|
|
|
|
+app/api/__pycache__/api_keys.cpython-310.pyc
|
|
|
|
|
+app/api/__pycache__/auth.cpython-310.pyc
|
|
|
|
|
+app/api/__pycache__/datasets.cpython-310.pyc
|
|
|
|
|
+app/api/__pycache__/deployment.cpython-310.pyc
|
|
|
|
|
+app/api/__pycache__/evaluation.cpython-310.pyc
|
|
|
|
|
+app/api/__pycache__/inference.cpython-310.pyc
|
|
|
|
|
+app/api/__pycache__/models.cpython-310.pyc
|
|
|
|
|
+app/api/__pycache__/sample_center.cpython-310.pyc
|
|
|
|
|
+app/api/__pycache__/training.cpython-310.pyc
|
|
|
|
|
+app/core/
|
|
|
|
|
+app/core/__init__.py
|
|
|
|
|
+app/core/auth.py
|
|
|
|
|
+app/core/background_tasks.py
|
|
|
|
|
+app/core/db.py
|
|
|
|
|
+app/core/deploy_server_template.py
|
|
|
|
|
+app/core/inference_worker.py
|
|
|
|
|
+app/core/job_queue.py
|
|
|
|
|
+app/core/logging.py
|
|
|
|
|
+app/core/remote_deploy.py
|
|
|
|
|
+app/core/remote_eval.py
|
|
|
|
|
+app/core/remote_executor.py
|
|
|
|
|
+app/core/security.py
|
|
|
|
|
+app/core/sso_client.py
|
|
|
|
|
+app/core/websocket.py
|
|
|
|
|
+app/core/__pycache__/
|
|
|
|
|
+app/core/__pycache__/__init__.cpython-310.pyc
|
|
|
|
|
+app/core/__pycache__/auth.cpython-310.pyc
|
|
|
|
|
+app/core/__pycache__/background_tasks.cpython-310.pyc
|
|
|
|
|
+app/core/__pycache__/db.cpython-310.pyc
|
|
|
|
|
+app/core/__pycache__/job_queue.cpython-310.pyc
|
|
|
|
|
+app/core/__pycache__/logging.cpython-310.pyc
|
|
|
|
|
+app/core/__pycache__/remote_deploy.cpython-310.pyc
|
|
|
|
|
+app/core/__pycache__/remote_eval.cpython-310.pyc
|
|
|
|
|
+app/core/__pycache__/remote_executor.cpython-310.pyc
|
|
|
|
|
+app/core/__pycache__/security.cpython-310.pyc
|
|
|
|
|
+app/core/__pycache__/sso_client.cpython-310.pyc
|
|
|
|
|
+app/core/__pycache__/websocket.cpython-310.pyc
|
|
|
|
|
+app/engines/__init__.py
|
|
|
|
|
+app/engines/__main__.py
|
|
|
|
|
+app/engines/base.py
|
|
|
|
|
+app/engines/multimodal_engine.py
|
|
|
|
|
+app/engines/remote_train.py
|
|
|
|
|
+app/engines/text_engine.py
|
|
|
|
|
+app/engines/vision_engine.py
|
|
|
|
|
+app/engines/__pycache__/__init__.cpython-310.pyc
|
|
|
|
|
+app/engines/__pycache__/base.cpython-310.pyc
|
|
|
|
|
+app/engines/__pycache__/remote_train.cpython-310.pyc
|
|
|
|
|
+app/engines/__pycache__/text_engine.cpython-310.pyc
|
|
|
|
|
+app/peft/__init__.py
|
|
|
|
|
+app/peft/__pycache__/__init__.cpython-310.pyc
|
|
|
|
|
+app/preprocessors/__init__.py
|
|
|
|
|
+app/preprocessors/__pycache__/__init__.cpython-310.pyc
|
|
|
|
|
+app/schemas/
|
|
|
|
|
+app/schemas/__init__.py
|
|
|
|
|
+app/schemas/background_task.py
|
|
|
|
|
+app/schemas/common.py
|
|
|
|
|
+app/schemas/dataset.py
|
|
|
|
|
+app/schemas/deployment.py
|
|
|
|
|
+app/schemas/evaluation.py
|
|
|
|
|
+app/schemas/model.py
|
|
|
|
|
+app/schemas/model_test.py
|
|
|
|
|
+app/schemas/sample_center.py
|
|
|
|
|
+app/schemas/training.py
|
|
|
|
|
+app/schemas/__pycache__/
|
|
|
|
|
+app/schemas/__pycache__/__init__.cpython-310.pyc
|
|
|
|
|
+app/schemas/__pycache__/background_task.cpython-310.pyc
|
|
|
|
|
+app/schemas/__pycache__/common.cpython-310.pyc
|
|
|
|
|
+app/schemas/__pycache__/dataset.cpython-310.pyc
|
|
|
|
|
+app/schemas/__pycache__/deployment.cpython-310.pyc
|
|
|
|
|
+app/schemas/__pycache__/evaluation.cpython-310.pyc
|
|
|
|
|
+app/schemas/__pycache__/model.cpython-310.pyc
|
|
|
|
|
+app/schemas/__pycache__/model_test.cpython-310.pyc
|
|
|
|
|
+app/schemas/__pycache__/sample_center.cpython-310.pyc
|
|
|
|
|
+app/schemas/__pycache__/training.cpython-310.pyc
|
|
|
|
|
+app/services/
|
|
|
|
|
+app/services/api_key_service.py
|
|
|
|
|
+app/services/dataset_service.py
|
|
|
|
|
+app/services/deploy_service.py
|
|
|
|
|
+app/services/eval_service.py
|
|
|
|
|
+app/services/inference_service.py
|
|
|
|
|
+app/services/model_service.py
|
|
|
|
|
+app/services/model_test_service.py
|
|
|
|
|
+app/services/sample_center_service.py
|
|
|
|
|
+app/services/training_service.py
|
|
|
|
|
+app/services/__pycache__/
|
|
|
|
|
+app/services/__pycache__/api_key_service.cpython-310.pyc
|
|
|
|
|
+app/services/__pycache__/dataset_service.cpython-310.pyc
|
|
|
|
|
+app/services/__pycache__/deploy_service.cpython-310.pyc
|
|
|
|
|
+app/services/__pycache__/eval_service.cpython-310.pyc
|
|
|
|
|
+app/services/__pycache__/inference_service.cpython-310.pyc
|
|
|
|
|
+app/services/__pycache__/model_service.cpython-310.pyc
|
|
|
|
|
+app/services/__pycache__/model_test_service.cpython-310.pyc
|
|
|
|
|
+app/services/__pycache__/sample_center_service.cpython-310.pyc
|
|
|
|
|
+app/services/__pycache__/training_service.cpython-310.pyc
|
|
|
|
|
+
|
|
|
|
|
+sent 8,330 bytes received 6,959 bytes 826.43 bytes/sec
|
|
|
|
|
+total size is 517,664 speedup is 33.86
|
|
|
|
|
+=> Sync done.
|
|
|
|
|
+INFO: Started server process [1]
|
|
|
|
|
+INFO: Waiting for application startup.
|
|
|
|
|
+ERROR: Traceback (most recent call last):
|
|
|
|
|
+ File "/usr/local/lib/python3.10/site-packages/sqlalchemy/dialects/postgresql/asyncpg.py", line 526, in _prepare_and_execute
|
|
|
|
|
+ prepared_stmt, attributes = await adapt_connection._prepare(
|
|
|
|
|
+ File "/usr/local/lib/python3.10/site-packages/sqlalchemy/dialects/postgresql/asyncpg.py", line 773, in _prepare
|
|
|
|
|
+ prepared_stmt = await self._connection.prepare(
|
|
|
|
|
+ File "/usr/local/lib/python3.10/site-packages/asyncpg/connection.py", line 638, in prepare
|
|
|
|
|
+ return await self._prepare(
|
|
|
|
|
+ File "/usr/local/lib/python3.10/site-packages/asyncpg/connection.py", line 657, in _prepare
|
|
|
|
|
+ stmt = await self._get_statement(
|
|
|
|
|
+ File "/usr/local/lib/python3.10/site-packages/asyncpg/connection.py", line 443, in _get_statement
|
|
|
|
|
+ statement = await self._protocol.prepare(
|
|
|
|
|
+ File "asyncpg/protocol/protocol.pyx", line 165, in prepare
|
|
|
|
|
+asyncpg.exceptions.InFailedSQLTransactionError: current transaction is aborted, commands ignored until end of transaction block
|
|
|
|
|
+
|
|
|
|
|
+The above exception was the direct cause of the following exception:
|
|
|
|
|
+
|
|
|
|
|
+Traceback (most recent call last):
|
|
|
|
|
+ File "/usr/local/lib/python3.10/site-packages/sqlalchemy/engine/base.py", line 1967, in _exec_single_context
|
|
|
|
|
+ self.dialect.do_execute(
|
|
|
|
|
+ File "/usr/local/lib/python3.10/site-packages/sqlalchemy/engine/default.py", line 952, in do_execute
|
|
|
|
|
+ cursor.execute(statement, parameters)
|
|
|
|
|
+ File "/usr/local/lib/python3.10/site-packages/sqlalchemy/dialects/postgresql/asyncpg.py", line 585, in execute
|
|
|
|
|
+ self._adapt_connection.await_(
|
|
|
|
|
+ File "/usr/local/lib/python3.10/site-packages/sqlalchemy/util/_concurrency_py3k.py", line 132, in await_only
|
|
|
|
|
+ return current.parent.switch(awaitable) # type: ignore[no-any-return,attr-defined] # noqa: E501
|
|
|
|
|
+ File "/usr/local/lib/python3.10/site-packages/sqlalchemy/util/_concurrency_py3k.py", line 196, in greenlet_spawn
|
|
|
|
|
+ value = await result
|
|
|
|
|
+ File "/usr/local/lib/python3.10/site-packages/sqlalchemy/dialects/postgresql/asyncpg.py", line 563, in _prepare_and_execute
|
|
|
|
|
+ self._handle_exception(error)
|
|
|
|
|
+ File "/usr/local/lib/python3.10/site-packages/sqlalchemy/dialects/postgresql/asyncpg.py", line 513, in _handle_exception
|
|
|
|
|
+ self._adapt_connection._handle_exception(error)
|
|
|
|
|
+ File "/usr/local/lib/python3.10/site-packages/sqlalchemy/dialects/postgresql/asyncpg.py", line 797, in _handle_exception
|
|
|
|
|
+ raise translated_error from error
|
|
|
|
|
+sqlalchemy.dialects.postgresql.asyncpg.AsyncAdapt_asyncpg_dbapi.Error: <class 'asyncpg.exceptions.InFailedSQLTransactionError'>: current transaction is aborted, commands ignored until end of transaction block
|
|
|
|
|
+
|
|
|
|
|
+The above exception was the direct cause of the following exception:
|
|
|
|
|
+
|
|
|
|
|
+Traceback (most recent call last):
|
|
|
|
|
+ File "/usr/local/lib/python3.10/site-packages/starlette/routing.py", line 638, in lifespan
|
|
|
|
|
+ async with self.lifespan_context(app) as maybe_state:
|
|
|
|
|
+ File "/usr/local/lib/python3.10/contextlib.py", line 199, in __aenter__
|
|
|
|
|
+ return await anext(self.gen)
|
|
|
|
|
+ File "/usr/local/lib/python3.10/site-packages/fastapi/routing.py", line 216, in merged_lifespan
|
|
|
|
|
+ async with original_context(app) as maybe_original_state:
|
|
|
|
|
+ File "/usr/local/lib/python3.10/contextlib.py", line 199, in __aenter__
|
|
|
|
|
+ return await anext(self.gen)
|
|
|
|
|
+ File "/usr/local/lib/python3.10/site-packages/fastapi/routing.py", line 216, in merged_lifespan
|
|
|
|
|
+ async with original_context(app) as maybe_original_state:
|
|
|
|
|
+ File "/usr/local/lib/python3.10/contextlib.py", line 199, in __aenter__
|
|
|
|
|
+ return await anext(self.gen)
|
|
|
|
|
+ File "/usr/local/lib/python3.10/site-packages/fastapi/routing.py", line 216, in merged_lifespan
|
|
|
|
|
+ async with original_context(app) as maybe_original_state:
|
|
|
|
|
+ File "/usr/local/lib/python3.10/contextlib.py", line 199, in __aenter__
|
|
|
|
|
+ return await anext(self.gen)
|
|
|
|
|
+ File "/usr/local/lib/python3.10/site-packages/fastapi/routing.py", line 216, in merged_lifespan
|
|
|
|
|
+ async with original_context(app) as maybe_original_state:
|
|
|
|
|
+ File "/usr/local/lib/python3.10/contextlib.py", line 199, in __aenter__
|
|
|
|
|
+ return await anext(self.gen)
|
|
|
|
|
+ File "/usr/local/lib/python3.10/site-packages/fastapi/routing.py", line 216, in merged_lifespan
|
|
|
|
|
+ async with original_context(app) as maybe_original_state:
|
|
|
|
|
+ File "/usr/local/lib/python3.10/contextlib.py", line 199, in __aenter__
|
|
|
|
|
+ return await anext(self.gen)
|
|
|
|
|
+ File "/usr/local/lib/python3.10/site-packages/fastapi/routing.py", line 216, in merged_lifespan
|
|
|
|
|
+ async with original_context(app) as maybe_original_state:
|
|
|
|
|
+ File "/usr/local/lib/python3.10/contextlib.py", line 199, in __aenter__
|
|
|
|
|
+ return await anext(self.gen)
|
|
|
|
|
+ File "/usr/local/lib/python3.10/site-packages/fastapi/routing.py", line 216, in merged_lifespan
|
|
|
|
|
+ async with original_context(app) as maybe_original_state:
|
|
|
|
|
+ File "/usr/local/lib/python3.10/contextlib.py", line 199, in __aenter__
|
|
|
|
|
+ return await anext(self.gen)
|
|
|
|
|
+ File "/usr/local/lib/python3.10/site-packages/fastapi/routing.py", line 216, in merged_lifespan
|
|
|
|
|
+ async with original_context(app) as maybe_original_state:
|
|
|
|
|
+ File "/usr/local/lib/python3.10/contextlib.py", line 199, in __aenter__
|
|
|
|
|
+ return await anext(self.gen)
|
|
|
|
|
+ File "/usr/local/lib/python3.10/site-packages/fastapi/routing.py", line 216, in merged_lifespan
|
|
|
|
|
+ async with original_context(app) as maybe_original_state:
|
|
|
|
|
+ File "/usr/local/lib/python3.10/contextlib.py", line 199, in __aenter__
|
|
|
|
|
+ return await anext(self.gen)
|
|
|
|
|
+ File "/usr/local/lib/python3.10/site-packages/fastapi/routing.py", line 216, in merged_lifespan
|
|
|
|
|
+ async with original_context(app) as maybe_original_state:
|
|
|
|
|
+ File "/usr/local/lib/python3.10/contextlib.py", line 199, in __aenter__
|
|
|
|
|
+ return await anext(self.gen)
|
|
|
|
|
+ File "/usr/local/lib/python3.10/site-packages/fastapi/routing.py", line 216, in merged_lifespan
|
|
|
|
|
+ async with original_context(app) as maybe_original_state:
|
|
|
|
|
+ File "/usr/local/lib/python3.10/contextlib.py", line 199, in __aenter__
|
|
|
|
|
+ return await anext(self.gen)
|
|
|
|
|
+ File "/app/main.py", line 26, in lifespan
|
|
|
|
|
+ await init_db()
|
|
|
|
|
+ File "/app/app/core/db.py", line 46, in init_db
|
|
|
|
|
+ await _migrate_tables()
|
|
|
|
|
+ File "/app/app/core/db.py", line 64, in _migrate_tables
|
|
|
|
|
+ await conn.execute(text(stmt))
|
|
|
|
|
+ File "/usr/local/lib/python3.10/site-packages/sqlalchemy/ext/asyncio/engine.py", line 659, in execute
|
|
|
|
|
+ result = await greenlet_spawn(
|
|
|
|
|
+ File "/usr/local/lib/python3.10/site-packages/sqlalchemy/util/_concurrency_py3k.py", line 201, in greenlet_spawn
|
|
|
|
|
+ result = context.throw(*sys.exc_info())
|
|
|
|
|
+ File "/usr/local/lib/python3.10/site-packages/sqlalchemy/engine/base.py", line 1419, in execute
|
|
|
|
|
+ return meth(
|
|
|
|
|
+ File "/usr/local/lib/python3.10/site-packages/sqlalchemy/sql/elements.py", line 527, in _execute_on_connection
|
|
|
|
|
+ return connection._execute_clauseelement(
|
|
|
|
|
+ File "/usr/local/lib/python3.10/site-packages/sqlalchemy/engine/base.py", line 1641, in _execute_clauseelement
|
|
|
|
|
+ ret = self._execute_context(
|
|
|
|
|
+ File "/usr/local/lib/python3.10/site-packages/sqlalchemy/engine/base.py", line 1846, in _execute_context
|
|
|
|
|
+ return self._exec_single_context(
|
|
|
|
|
+ File "/usr/local/lib/python3.10/site-packages/sqlalchemy/engine/base.py", line 1986, in _exec_single_context
|
|
|
|
|
+ self._handle_dbapi_exception(
|
|
|
|
|
+ File "/usr/local/lib/python3.10/site-packages/sqlalchemy/engine/base.py", line 2363, in _handle_dbapi_exception
|
|
|
|
|
+ raise sqlalchemy_exception.with_traceback(exc_info[2]) from e
|
|
|
|
|
+ File "/usr/local/lib/python3.10/site-packages/sqlalchemy/engine/base.py", line 1967, in _exec_single_context
|
|
|
|
|
+ self.dialect.do_execute(
|
|
|
|
|
+ File "/usr/local/lib/python3.10/site-packages/sqlalchemy/engine/default.py", line 952, in do_execute
|
|
|
|
|
+ cursor.execute(statement, parameters)
|
|
|
|
|
+ File "/usr/local/lib/python3.10/site-packages/sqlalchemy/dialects/postgresql/asyncpg.py", line 585, in execute
|
|
|
|
|
+ self._adapt_connection.await_(
|
|
|
|
|
+ File "/usr/local/lib/python3.10/site-packages/sqlalchemy/util/_concurrency_py3k.py", line 132, in await_only
|
|
|
|
|
+ return current.parent.switch(awaitable) # type: ignore[no-any-return,attr-defined] # noqa: E501
|
|
|
|
|
+ File "/usr/local/lib/python3.10/site-packages/sqlalchemy/util/_concurrency_py3k.py", line 196, in greenlet_spawn
|
|
|
|
|
+ value = await result
|
|
|
|
|
+ File "/usr/local/lib/python3.10/site-packages/sqlalchemy/dialects/postgresql/asyncpg.py", line 563, in _prepare_and_execute
|
|
|
|
|
+ self._handle_exception(error)
|
|
|
|
|
+ File "/usr/local/lib/python3.10/site-packages/sqlalchemy/dialects/postgresql/asyncpg.py", line 513, in _handle_exception
|
|
|
|
|
+ self._adapt_connection._handle_exception(error)
|
|
|
|
|
+ File "/usr/local/lib/python3.10/site-packages/sqlalchemy/dialects/postgresql/asyncpg.py", line 797, in _handle_exception
|
|
|
|
|
+ raise translated_error from error
|
|
|
|
|
+sqlalchemy.exc.DBAPIError: (sqlalchemy.dialects.postgresql.asyncpg.Error) <class 'asyncpg.exceptions.InFailedSQLTransactionError'>: current transaction is aborted, commands ignored until end of transaction block
|
|
|
|
|
+[SQL: ALTER TABLE deploy_tasks ADD COLUMN endpoint_url VARCHAR(256)]
|
|
|
|
|
+(Background on this error at: https://sqlalche.me/e/20/dbapi)
|