result.txt 46 KB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183184185186187188189190191192193194195196197198199200201202203204205206207208209210211212213214215216217218219220221222223224225226227228229230231232233234235236237238239240241242243244245246247248249250251252253254255256257258259260261262263264265266267268269270271272273274275276277278279280281282283284285286287288289290291292293294295296297298299300301302303304305306307308309310311312313314315316317318319320321322323324325326327328329330331332333334335336337338339340341342343344345346347348349350351352353354355356357358359360361362363364365366367368369370371372373374
  1. INFO: 172.20.0.4:59236 - "GET /api/v1/training/jobs HTTP/1.0" 200 OK
  2. INFO: 172.20.0.4:59240 - "GET /api/v1/training/jobs HTTP/1.0" 200 OK
  3. INFO: 172.20.0.4:59252 - "GET /api/v1/datasets/ HTTP/1.0" 200 OK
  4. INFO: 172.20.0.4:59238 - "GET /api/v1/models/ HTTP/1.0" 200 OK
  5. INFO: 172.20.0.4:59262 - "GET /api/v1/models/ HTTP/1.0" 200 OK
  6. INFO: 172.20.0.4:59276 - "GET /api/v1/training/jobs HTTP/1.0" 200 OK
  7. INFO: 172.20.0.4:59278 - "GET /api/v1/models/ HTTP/1.0" 200 OK
  8. INFO: 172.20.0.4:59294 - "GET /api/v1/datasets/ HTTP/1.0" 200 OK
  9. INFO: 172.20.0.4:59308 - "GET /api/v1/training/jobs HTTP/1.0" 200 OK
  10. INFO: 172.20.0.4:38434 - "GET /api/v1/training/jobs HTTP/1.0" 200 OK
  11. INFO: 172.20.0.4:38440 - "GET /api/v1/training/jobs HTTP/1.0" 200 OK
  12. INFO: 127.0.0.1:48106 - "GET /health HTTP/1.1" 200 OK
  13. INFO: 172.20.0.4:56722 - "GET /api/v1/training/jobs HTTP/1.0" 200 OK
  14. INFO: 172.20.0.4:56736 - "GET /api/v1/training/jobs HTTP/1.0" 200 OK
  15. 2026-05-25 13:49:13 | INFO | peft-platform | Training job ddc610b6-d872-466c-b382-3c9bfd6df12a: num_gpus=2, batch_size=64
  16. 2026-05-25 13:49:13 | INFO | peft-platform | Job ddc610b6-d872-466c-b382-3c9bfd6df12a enqueued
  17. 2026-05-25 13:49:13 | INFO | peft-platform | Training job created: ddc610b6-d872-466c-b382-3c9bfd6df12a
  18. INFO: 172.20.0.4:56748 - "POST /api/v1/training/jobs HTTP/1.0" 200 OK
  19. 2026-05-25 13:49:13 | INFO | app.engines.text_engine | Preprocessed 60 samples for sft/alpaca
  20. INFO: 172.20.0.4:56768 - "GET /api/v1/models/ HTTP/1.0" 200 OK
  21. INFO: 172.20.0.4:56784 - "GET /api/v1/datasets/ HTTP/1.0" 200 OK
  22. INFO: 172.20.0.4:56758 - "GET /api/v1/training/jobs HTTP/1.0" 200 OK
  23. INFO: 172.20.0.4:50036 - "GET /api/v1/training/jobs HTTP/1.0" 200 OK
  24. INFO: 172.20.0.4:50048 - "GET /api/v1/training/jobs HTTP/1.0" 200 OK
  25. INFO: 172.20.0.4:37870 - "GET /api/v1/training/jobs HTTP/1.0" 200 OK
  26. INFO: 172.20.0.4:37874 - "GET /api/v1/training/jobs HTTP/1.0" 200 OK
  27. INFO: 127.0.0.1:46502 - "GET /health HTTP/1.1" 200 OK
  28. INFO: 172.20.0.4:51788 - "GET /api/v1/training/jobs HTTP/1.0" 200 OK
  29. 2026-05-25 13:49:35 | INFO | peft-platform | Remote cleanup result: true
  30. cleaned 70 processes
  31. 2026-05-25 13:50:28 | INFO | peft-platform | Created remote dataset directory: /root/Fine-tuning/backend/data/datasets
  32. 2026-05-25 13:50:28 | INFO | peft-platform | Uploading dataset file: /root/Fine-tuning/backend/data/processed/ms_yanalong_yanalong/data.jsonl -> /root/Fine-tuning/backend/data/datasets/data.jsonl
  33. 2026-05-25 13:50:46 | INFO | peft-platform | Dataset uploaded successfully: /root/Fine-tuning/backend/data/datasets/data.jsonl
  34. 2026-05-25 13:51:03 | INFO | peft-platform | Multi-GPU training: num_gpus=2, CUDA_VISIBLE_DEVICES=2,3
  35. 2026-05-25 13:51:21 | INFO | peft-platform | Remote training launched in container: job=ddc610b6-d872-466c-b382-3c9bfd6df12a, container_pid=76529
  36. INFO: 127.0.0.1:57534 - "GET /health HTTP/1.1" 200 OK
  37. INFO: 127.0.0.1:57616 - "GET /health HTTP/1.1" 200 OK
  38. INFO: 127.0.0.1:52350 - "GET /health HTTP/1.1" 200 OK
  39. INFO: 172.20.0.4:51796 - "GET /api/v1/training/jobs HTTP/1.0" 200 OK
  40. INFO: 172.20.0.4:38770 - "GET /api/v1/training/jobs HTTP/1.0" 200 OK
  41. INFO: 172.20.0.4:58504 - "GET /api/v1/training/jobs HTTP/1.0" 200 OK
  42. INFO: 172.20.0.4:58496 - "GET /api/v1/training/jobs HTTP/1.0" 200 OK
  43. INFO: 172.20.0.4:38780 - "GET /api/v1/training/jobs HTTP/1.0" 200 OK
  44. INFO: 172.20.0.4:41362 - "GET /api/v1/training/jobs HTTP/1.0" 200 OK
  45. INFO: 172.20.0.4:46036 - "GET /api/v1/training/jobs HTTP/1.0" 200 OK
  46. INFO: 172.20.0.4:46018 - "GET /api/v1/training/jobs HTTP/1.0" 200 OK
  47. INFO: 172.20.0.4:46016 - "GET /api/v1/training/jobs HTTP/1.0" 200 OK
  48. INFO: 172.20.0.4:46038 - "GET /api/v1/training/jobs HTTP/1.0" 200 OK
  49. INFO: 172.20.0.4:46050 - "GET /api/v1/training/jobs HTTP/1.0" 200 OK
  50. INFO: 172.20.0.4:46064 - "GET /api/v1/training/jobs HTTP/1.0" 200 OK
  51. INFO: 172.20.0.4:46072 - "GET /api/v1/training/jobs HTTP/1.0" 200 OK
  52. INFO: 172.20.0.4:46076 - "GET /api/v1/training/jobs HTTP/1.0" 200 OK
  53. INFO: 172.20.0.4:34810 - "GET /api/v1/training/jobs HTTP/1.0" 200 OK
  54. INFO: 172.20.0.4:34812 - "GET /api/v1/training/jobs HTTP/1.0" 200 OK
  55. INFO: 172.20.0.4:52798 - "GET /api/v1/training/jobs HTTP/1.0" 200 OK
  56. INFO: 172.20.0.4:52810 - "GET /api/v1/training/jobs HTTP/1.0" 200 OK
  57. INFO: 172.20.0.4:47732 - "GET /api/v1/training/jobs HTTP/1.0" 200 OK
  58. INFO: 172.20.0.4:47748 - "GET /api/v1/training/jobs HTTP/1.0" 200 OK
  59. INFO: 127.0.0.1:59998 - "GET /health HTTP/1.1" 200 OK
  60. INFO: 172.20.0.4:42814 - "GET /api/v1/training/jobs HTTP/1.0" 200 OK
  61. INFO: 172.20.0.4:42822 - "GET /api/v1/training/jobs HTTP/1.0" 200 OK
  62. INFO: 172.20.0.4:54916 - "GET /api/v1/training/jobs HTTP/1.0" 200 OK
  63. INFO: 172.20.0.4:54926 - "GET /api/v1/training/jobs HTTP/1.0" 200 OK
  64. INFO: 172.20.0.4:41970 - "GET /api/v1/training/jobs HTTP/1.0" 200 OK
  65. INFO: 127.0.0.1:34236 - "GET /health HTTP/1.1" 200 OK
  66. INFO: 172.20.0.4:60076 - "GET /api/v1/training/jobs HTTP/1.0" 200 OK
  67. 2026-05-25 13:52:50 | INFO | peft-platform | [253:ddc610b6] *****************************************
  68. 2026-05-25 13:52:50 | INFO | peft-platform | [253:ddc610b6] Setting OMP_NUM_THREADS environment variable for each process to be 1 in default, to avoid your system being overloaded, please further tune the variable for optimal performance in your application as needed.
  69. 2026-05-25 13:52:50 | INFO | peft-platform | [253:ddc610b6] *****************************************
  70. 2026-05-25 13:52:50 | INFO | peft-platform | [253:ddc610b6] [remote_train] DDP mode: rank=0, local_rank=0, world_size=2
  71. 2026-05-25 13:52:50 | INFO | peft-platform | [253:ddc610b6] [remote_train] [rank 1] === Training job started: ddc610b6-d872-466c-b382-3c9bfd6df12a ===
  72. 2026-05-25 13:52:50 | INFO | peft-platform | [253:ddc610b6] [remote_train] fla package found at: /opt/conda/lib/python3.10/site-packages/fla
  73. 2026-05-25 13:52:50 | INFO | peft-platform | [253:ddc610b6] [remote_train] fla shared memory patch v2 already applied, skipping
  74. 2026-05-25 13:52:50 | INFO | peft-platform | [253:ddc610b6] [remote_train] [rank 0] === Training job started: ddc610b6-d872-466c-b382-3c9bfd6df12a ===
  75. 2026-05-25 13:52:50 | INFO | peft-platform | [253:ddc610b6] [remote_train] model_id=Qwen/Qwen3.5-0.8B, model_type=text
  76. 2026-05-25 13:52:50 | INFO | peft-platform | [253:ddc610b6] [remote_train] dataset_path=/root/Fine-tuning/backend/data/datasets/data.jsonl
  77. 2026-05-25 13:52:50 | INFO | peft-platform | [253:ddc610b6] [remote_train] config={"model_id": "Qwen/Qwen3.5-0.8B", "model_type": "text", "dataset_id": "3d5f8808-e71a-449d-94e9-c61c4881b2cf", "peft_method": "adalora", "epochs": 3, "batch_size": 64, "gradient_accumulation": 4, "lear
  78. 2026-05-25 13:52:50 | INFO | peft-platform | [253:ddc610b6] [remote_train] DDP: world_size=2, batch_size per GPU=64
  79. 2026-05-25 13:52:50 | INFO | peft-platform | [253:ddc610b6] [remote_train] Step 1: Preprocessing dataset...
  80. 2026-05-25 13:52:50 | INFO | peft-platform | [253:ddc610b6] [remote_train] task_type=sft, template=auto
  81. 2026-05-25 13:52:50 | INFO | peft-platform | [253:ddc610b6] [remote_train] Engine loaded: TextEngine
  82. 2026-05-25 13:52:50 | INFO | peft-platform | [253:ddc610b6] [remote_train] Running preprocess_dataset...
  83. 2026-05-25 13:52:50 | INFO | peft-platform | [253:ddc610b6] [remote_train] Preprocessing done, output: /root/Fine-tuning/backend/data/processed/ddc610b6-d872-466c-b382-3c9bfd6df12a_processed.jsonl
  84. 2026-05-25 13:52:50 | INFO | peft-platform | [253:ddc610b6] [remote_train] Step 2: Loading model: Qwen/Qwen3.5-0.8B...
  85. 2026-05-25 13:52:50 | ERROR | peft-platform | [253:ddc610b6] Current Triton version 3.0.0 is below the recommended 3.2.0 version. Errors may occur and these issues will not be fixed. Please consider upgrading Triton.
  86. 2026-05-25 13:52:50 | INFO | peft-platform | [253:ddc610b6] Current Python version 3.10 is below the recommended 3.11 version. It is recommended to upgrade to Python 3.11 or higher for the best experience.
  87. 2026-05-25 13:52:50 | ERROR | peft-platform | [253:ddc610b6] Current Triton version 3.0.0 is below the recommended 3.2.0 version. Errors may occur and these issues will not be fixed. Please consider upgrading Triton.
  88. 2026-05-25 13:52:50 | INFO | peft-platform | [253:ddc610b6] Current Python version 3.10 is below the recommended 3.11 version. It is recommended to upgrade to Python 3.11 or higher for the best experience.
  89. 2026-05-25 13:52:50 | INFO | peft-platform | [253:ddc610b6] torch.compile is not available in Python 3.10, using identity decorator instead
  90. 2026-05-25 13:52:50 | WARNING | peft-platform | [253:ddc610b6] /opt/conda/lib/python3.10/site-packages/torchvision/datapoints/__init__.py:12: UserWarning: The torchvision.datapoints and torchvision.transforms.v2 namespaces are still Beta. While we do not expect major breaking changes, some APIs may still change according to user feedback. Please submit any feedback you may have in this issue: https://github.com/pytorch/vision/issues/6753, and you can also check out https://github.com/pytorch/vision/issues/7319 to learn more about the APIs that we suspect might involve future changes. You can silence this warning by calling torchvision.disable_beta_transforms_warning().
  91. 2026-05-25 13:52:50 | INFO | peft-platform | [253:ddc610b6] warnings.warn(_BETA_TRANSFORMS_WARNING)
  92. 2026-05-25 13:52:50 | WARNING | peft-platform | [253:ddc610b6] /opt/conda/lib/python3.10/site-packages/torchvision/transforms/v2/__init__.py:54: UserWarning: The torchvision.datapoints and torchvision.transforms.v2 namespaces are still Beta. While we do not expect major breaking changes, some APIs may still change according to user feedback. Please submit any feedback you may have in this issue: https://github.com/pytorch/vision/issues/6753, and you can also check out https://github.com/pytorch/vision/issues/7319 to learn more about the APIs that we suspect might involve future changes. You can silence this warning by calling torchvision.disable_beta_transforms_warning().
  93. 2026-05-25 13:52:50 | INFO | peft-platform | [253:ddc610b6] warnings.warn(_BETA_TRANSFORMS_WARNING)
  94. 2026-05-25 13:52:50 | INFO | peft-platform | [253:ddc610b6] Loading weights: 0%| | 0/320 [00:00<?, ?it/s]torch.compile is not available in Python 3.10, using identity decorator instead
  95. 2026-05-25 13:52:50 | WARNING | peft-platform | [253:ddc610b6] /opt/conda/lib/python3.10/site-packages/torchvision/datapoints/__init__.py:12: UserWarning: The torchvision.datapoints and torchvision.transforms.v2 namespaces are still Beta. While we do not expect major breaking changes, some APIs may still change according to user feedback. Please submit any feedback you may have in this issue: https://github.com/pytorch/vision/issues/6753, and you can also check out https://github.com/pytorch/vision/issues/7319 to learn more about the APIs that we suspect might involve future changes. You can silence this warning by calling torchvision.disable_beta_transforms_warning().
  96. 2026-05-25 13:52:50 | INFO | peft-platform | [253:ddc610b6] warnings.warn(_BETA_TRANSFORMS_WARNING)
  97. 2026-05-25 13:52:50 | WARNING | peft-platform | [253:ddc610b6] /opt/conda/lib/python3.10/site-packages/torchvision/transforms/v2/__init__.py:54: UserWarning: The torchvision.datapoints and torchvision.transforms.v2 namespaces are still Beta. While we do not expect major breaking changes, some APIs may still change according to user feedback. Please submit any feedback you may have in this issue: https://github.com/pytorch/vision/issues/6753, and you can also check out https://github.com/pytorch/vision/issues/7319 to learn more about the APIs that we suspect might involve future changes. You can silence this warning by calling torchvision.disable_beta_transforms_warning().
  98. 2026-05-25 13:52:50 | INFO | peft-platform | [253:ddc610b6] warnings.warn(_BETA_TRANSFORMS_WARNING)
  99. 2026-05-25 13:52:50 | INFO | peft-platform | [253:ddc610b6] Loading weights: 0%| | 0/320 [00:00<?, ?it/s]
  100. 2026-05-25 13:52:50 | INFO | peft-platform | [253:ddc610b6] Loading weights: 0%| | 1/320 [00:03<18:22, 3.46s/it]
  101. 2026-05-25 13:52:50 | INFO | peft-platform | [253:ddc610b6] Loading weights: 0%| | 1/320 [00:02<14:44, 2.77s/it]
  102. 2026-05-25 13:52:50 | INFO | peft-platform | [253:ddc610b6] Loading weights: 2%|▎ | 8/320 [00:03<01:42, 3.05it/s]
  103. 2026-05-25 13:52:50 | INFO | peft-platform | [253:ddc610b6] Loading weights: 2%|▎ | 8/320 [00:02<01:24, 3.71it/s]
  104. 2026-05-25 13:52:50 | INFO | peft-platform | [253:ddc610b6] Loading weights: 4%|▍ | 12/320 [00:03<01:01, 5.00it/s]
  105. 2026-05-25 13:52:50 | INFO | peft-platform | [253:ddc610b6] Loading weights: 6%|▌ | 18/320 [00:03<00:32, 9.35it/s]
  106. 2026-05-25 13:52:50 | INFO | peft-platform | [253:ddc610b6] Loading weights: 7%|▋ | 21/320 [00:03<00:27, 10.90it/s]
  107. 2026-05-25 13:52:50 | INFO | peft-platform | [253:ddc610b6] Loading weights: 8%|▊ | 24/320 [00:03<00:22, 13.08it/s]
  108. 2026-05-25 13:52:50 | INFO | peft-platform | [253:ddc610b6] Loading weights: 8%|▊ | 27/320 [00:03<00:20, 14.49it/s]
  109. 2026-05-25 13:52:50 | INFO | peft-platform | [253:ddc610b6] Loading weights: 11%|█▏ | 36/320 [00:04<00:12, 22.55it/s]
  110. 2026-05-25 13:52:50 | INFO | peft-platform | [253:ddc610b6] Loading weights: 11%|█▏ | 36/320 [00:03<00:12, 22.72it/s]
  111. 2026-05-25 13:52:50 | INFO | peft-platform | [253:ddc610b6] Loading weights: 13%|█▎ | 42/320 [00:03<00:10, 26.01it/s]
  112. 2026-05-25 13:52:50 | INFO | peft-platform | [253:ddc610b6] Loading weights: 13%|█▎ | 42/320 [00:04<00:10, 26.05it/s]
  113. 2026-05-25 13:52:50 | INFO | peft-platform | [253:ddc610b6] Loading weights: 15%|█▌ | 48/320 [00:03<00:09, 28.59it/s]
  114. 2026-05-25 13:52:50 | INFO | peft-platform | [253:ddc610b6] Loading weights: 15%|█▌ | 48/320 [00:04<00:09, 28.81it/s]
  115. 2026-05-25 13:52:50 | INFO | peft-platform | [253:ddc610b6] Loading weights: 19%|█▉ | 60/320 [00:03<00:06, 37.69it/s]
  116. 2026-05-25 13:52:50 | INFO | peft-platform | [253:ddc610b6] Loading weights: 19%|█▉ | 60/320 [00:04<00:06, 38.28it/s]
  117. 2026-05-25 13:52:50 | INFO | peft-platform | [253:ddc610b6] Loading weights: 21%|██ | 66/320 [00:04<00:06, 37.61it/s]
  118. 2026-05-25 13:52:50 | INFO | peft-platform | [253:ddc610b6] Loading weights: 21%|██ | 66/320 [00:04<00:06, 38.34it/s]
  119. 2026-05-25 13:52:50 | INFO | peft-platform | [253:ddc610b6] Loading weights: 23%|██▎ | 74/320 [00:04<00:05, 44.84it/s]
  120. 2026-05-25 13:52:50 | INFO | peft-platform | [253:ddc610b6] Loading weights: 23%|██▎ | 75/320 [00:04<00:05, 45.16it/s]
  121. 2026-05-25 13:52:50 | INFO | peft-platform | [253:ddc610b6] Loading weights: 25%|██▌ | 81/320 [00:04<00:05, 47.29it/s]
  122. 2026-05-25 13:52:50 | INFO | peft-platform | [253:ddc610b6] Loading weights: 25%|██▌ | 80/320 [00:04<00:05, 44.48it/s]
  123. 2026-05-25 13:52:50 | INFO | peft-platform | [253:ddc610b6] Loading weights: 28%|██▊ | 89/320 [00:05<00:04, 52.42it/s]
  124. 2026-05-25 13:52:50 | INFO | peft-platform | [253:ddc610b6] Loading weights: 28%|██▊ | 88/320 [00:04<00:04, 49.24it/s]
  125. 2026-05-25 13:52:50 | INFO | peft-platform | [253:ddc610b6] Loading weights: 30%|██▉ | 95/320 [00:05<00:04, 47.78it/s]
  126. 2026-05-25 13:52:50 | INFO | peft-platform | [253:ddc610b6] Loading weights: 29%|██▉ | 94/320 [00:04<00:04, 46.23it/s]
  127. 2026-05-25 13:52:50 | INFO | peft-platform | [253:ddc610b6] Loading weights: 31%|███▏ | 100/320 [00:04<00:04, 47.04it/s]
  128. 2026-05-25 13:52:50 | INFO | peft-platform | [253:ddc610b6] Loading weights: 32%|███▏ | 101/320 [00:05<00:04, 47.55it/s]
  129. 2026-05-25 13:52:50 | INFO | peft-platform | [253:ddc610b6] Loading weights: 36%|███▌ | 114/320 [00:04<00:04, 51.27it/s]
  130. 2026-05-25 13:52:50 | INFO | peft-platform | [253:ddc610b6] Loading weights: 36%|███▌ | 114/320 [00:05<00:04, 50.08it/s]
  131. 2026-05-25 13:52:50 | INFO | peft-platform | [253:ddc610b6] Loading weights: 38%|███▊ | 120/320 [00:05<00:04, 48.49it/s]
  132. 2026-05-25 13:52:50 | INFO | peft-platform | [253:ddc610b6] Loading weights: 38%|███▊ | 120/320 [00:05<00:04, 47.46it/s]
  133. 2026-05-25 13:52:50 | INFO | peft-platform | [253:ddc610b6] Loading weights: 40%|████ | 128/320 [00:05<00:03, 54.21it/s]
  134. 2026-05-25 13:52:50 | INFO | peft-platform | [253:ddc610b6] Loading weights: 40%|████ | 128/320 [00:05<00:03, 50.73it/s]
  135. 2026-05-25 13:52:50 | INFO | peft-platform | [253:ddc610b6] Loading weights: 42%|████▏ | 134/320 [00:05<00:03, 52.68it/s]
  136. 2026-05-25 13:52:50 | INFO | peft-platform | [253:ddc610b6] Loading weights: 44%|████▍ | 141/320 [00:05<00:03, 55.06it/s]
  137. 2026-05-25 13:52:50 | INFO | peft-platform | [253:ddc610b6] Loading weights: 44%|████▍ | 142/320 [00:06<00:03, 55.16it/s]
  138. 2026-05-25 13:52:50 | INFO | peft-platform | [253:ddc610b6] Loading weights: 46%|████▌ | 147/320 [00:05<00:03, 47.98it/s]
  139. 2026-05-25 13:52:50 | INFO | peft-platform | [253:ddc610b6] Loading weights: 46%|████▋ | 148/320 [00:06<00:03, 50.18it/s]
  140. 2026-05-25 13:52:50 | INFO | peft-platform | [253:ddc610b6] Loading weights: 48%|████▊ | 153/320 [00:06<00:03, 45.32it/s]
  141. 2026-05-25 13:52:50 | INFO | peft-platform | [253:ddc610b6] Loading weights: 48%|████▊ | 153/320 [00:05<00:03, 44.03it/s]
  142. 2026-05-25 13:52:50 | INFO | peft-platform | [253:ddc610b6] Loading weights: 52%|█████▏ | 167/320 [00:05<00:03, 49.67it/s]
  143. 2026-05-25 13:52:50 | INFO | peft-platform | [253:ddc610b6] Loading weights: 52%|█████▏ | 167/320 [00:06<00:03, 49.28it/s]
  144. 2026-05-25 13:52:50 | INFO | peft-platform | [253:ddc610b6] Loading weights: 54%|█████▍ | 172/320 [00:06<00:03, 48.28it/s]
  145. 2026-05-25 13:52:50 | INFO | peft-platform | [253:ddc610b6] Loading weights: 54%|█████▍ | 172/320 [00:06<00:03, 48.19it/s]
  146. 2026-05-25 13:52:50 | INFO | peft-platform | [253:ddc610b6] Loading weights: 56%|█████▋ | 180/320 [00:06<00:02, 54.82it/s]
  147. 2026-05-25 13:52:50 | INFO | peft-platform | [253:ddc610b6] Loading weights: 57%|█████▋ | 182/320 [00:06<00:02, 58.29it/s]
  148. 2026-05-25 13:52:50 | INFO | peft-platform | [253:ddc610b6] Loading weights: 60%|██████ | 193/320 [00:07<00:02, 56.56it/s]
  149. 2026-05-25 13:52:50 | INFO | peft-platform | [253:ddc610b6] Loading weights: 61%|██████ | 195/320 [00:06<00:02, 59.04it/s]
  150. 2026-05-25 13:52:50 | INFO | peft-platform | [253:ddc610b6] Loading weights: 62%|██████▏ | 199/320 [00:07<00:02, 53.09it/s]
  151. 2026-05-25 13:52:50 | INFO | peft-platform | [253:ddc610b6] Loading weights: 63%|██████▎ | 201/320 [00:06<00:02, 56.81it/s]
  152. 2026-05-25 13:52:50 | INFO | peft-platform | [253:ddc610b6] Loading weights: 64%|██████▍ | 205/320 [00:07<00:02, 53.47it/s]
  153. 2026-05-25 13:52:50 | INFO | peft-platform | [253:ddc610b6] Loading weights: 65%|██████▍ | 207/320 [00:06<00:01, 56.75it/s]
  154. 2026-05-25 13:52:50 | INFO | peft-platform | [253:ddc610b6] Loading weights: 68%|██████▊ | 218/320 [00:07<00:01, 56.74it/s]
  155. 2026-05-25 13:52:50 | INFO | peft-platform | [253:ddc610b6] Loading weights: 68%|██████▊ | 218/320 [00:06<00:01, 55.23it/s]
  156. 2026-05-25 13:52:50 | INFO | peft-platform | [253:ddc610b6] Loading weights: 70%|███████ | 224/320 [00:06<00:01, 55.22it/s]
  157. 2026-05-25 13:52:50 | INFO | peft-platform | [253:ddc610b6] Loading weights: 70%|███████ | 224/320 [00:07<00:01, 52.28it/s]
  158. 2026-05-25 13:52:50 | INFO | peft-platform | [253:ddc610b6] Loading weights: 73%|███████▎ | 234/320 [00:07<00:01, 57.33it/s]
  159. 2026-05-25 13:52:50 | INFO | peft-platform | [253:ddc610b6] Loading weights: 73%|███████▎ | 234/320 [00:07<00:01, 54.79it/s]
  160. 2026-05-25 13:52:50 | INFO | peft-platform | [253:ddc610b6] Loading weights: 75%|███████▌ | 240/320 [00:07<00:01, 55.86it/s]
  161. 2026-05-25 13:52:50 | INFO | peft-platform | [253:ddc610b6] Loading weights: 75%|███████▌ | 240/320 [00:07<00:01, 56.89it/s]
  162. 2026-05-25 13:52:50 | INFO | peft-platform | [253:ddc610b6] Loading weights: 78%|███████▊ | 248/320 [00:08<00:01, 60.44it/s]
  163. 2026-05-25 13:52:50 | INFO | peft-platform | [253:ddc610b6] Loading weights: 78%|███████▊ | 248/320 [00:07<00:01, 54.79it/s]
  164. 2026-05-25 13:52:50 | INFO | peft-platform | [253:ddc610b6] Loading weights: 79%|███████▉ | 254/320 [00:07<00:01, 51.18it/s]
  165. 2026-05-25 13:52:50 | INFO | peft-platform | [253:ddc610b6] Loading weights: 80%|███████▉ | 255/320 [00:08<00:01, 52.41it/s]
  166. 2026-05-25 13:52:50 | INFO | peft-platform | [253:ddc610b6] Loading weights: 81%|████████▏ | 260/320 [00:07<00:01, 51.17it/s]
  167. 2026-05-25 13:52:50 | INFO | peft-platform | [253:ddc610b6] Loading weights: 82%|████████▏ | 261/320 [00:08<00:01, 49.19it/s]
  168. 2026-05-25 13:52:50 | INFO | peft-platform | [253:ddc610b6] Loading weights: 83%|████████▎ | 266/320 [00:07<00:01, 53.06it/s]
  169. 2026-05-25 13:52:50 | INFO | peft-platform | [253:ddc610b6] Loading weights: 85%|████████▍ | 271/320 [00:08<00:00, 59.53it/s]
  170. 2026-05-25 13:52:50 | INFO | peft-platform | [253:ddc610b6] Loading weights: 85%|████████▌ | 273/320 [00:07<00:00, 56.73it/s]
  171. 2026-05-25 13:52:50 | INFO | peft-platform | [253:ddc610b6] Loading weights: 87%|████████▋ | 278/320 [00:08<00:00, 50.99it/s]
  172. 2026-05-25 13:52:50 | INFO | peft-platform | [253:ddc610b6] Loading weights: 87%|████████▋ | 279/320 [00:08<00:00, 53.63it/s]
  173. 2026-05-25 13:52:50 | INFO | peft-platform | [253:ddc610b6] Loading weights: 89%|████████▉ | 286/320 [00:08<00:00, 50.30it/s]
  174. 2026-05-25 13:52:50 | INFO | peft-platform | [253:ddc610b6] Loading weights: 89%|████████▉ | 285/320 [00:08<00:00, 47.93it/s]
  175. 2026-05-25 13:52:50 | INFO | peft-platform | [253:ddc610b6] Loading weights: 91%|█████████ | 290/320 [00:08<00:00, 43.37it/s]
  176. 2026-05-25 13:52:50 | INFO | peft-platform | [253:ddc610b6] Loading weights: 91%|█████████▏| 292/320 [00:08<00:00, 47.45it/s]
  177. 2026-05-25 13:52:50 | INFO | peft-platform | [253:ddc610b6] Loading weights: 94%|█████████▍| 301/320 [00:09<00:00, 54.23it/s]
  178. 2026-05-25 13:52:50 | INFO | peft-platform | [253:ddc610b6] Loading weights: 93%|█████████▎| 298/320 [00:08<00:00, 49.52it/s]
  179. 2026-05-25 13:52:50 | INFO | peft-platform | [253:ddc610b6] Loading weights: 95%|█████████▌| 304/320 [00:08<00:00, 50.90it/s]
  180. 2026-05-25 13:52:50 | INFO | peft-platform | [253:ddc610b6] Loading weights: 96%|█████████▌| 307/320 [00:09<00:00, 48.82it/s]
  181. 2026-05-25 13:52:50 | INFO | peft-platform | [253:ddc610b6] Loading weights: 97%|█████████▋| 310/320 [00:08<00:00, 47.50it/s]
  182. 2026-05-25 13:52:50 | INFO | peft-platform | [253:ddc610b6] Loading weights: 98%|█████████▊| 313/320 [00:09<00:00, 51.02it/s]
  183. 2026-05-25 13:52:50 | INFO | peft-platform | [253:ddc610b6] Loading weights: 100%|██████████| 320/320 [00:09<00:00, 33.79it/s]
  184. 2026-05-25 13:52:50 | INFO | peft-platform | [253:ddc610b6] Loading weights: 100%|██████████| 320/320 [00:08<00:00, 36.43it/s]
  185. 2026-05-25 13:52:50 | INFO | peft-platform | [253:ddc610b6] [remote_train] Model loaded successfully
  186. 2026-05-25 13:52:50 | INFO | peft-platform | [253:ddc610b6] [remote_train] Step 3: Building PEFT config...
  187. 2026-05-25 13:52:50 | INFO | peft-platform | [253:ddc610b6] [remote_train] Step 4: Starting training...
  188. 2026-05-25 13:52:50 | INFO | peft-platform | [253:ddc610b6] [remote_train] NOTE: First step may take 2-5 minutes due to Triton kernel compilation (autotuning). This is normal.
  189. 2026-05-25 13:52:50 | INFO | peft-platform | [253:ddc610b6] [remote_train] Total steps: 3 epochs, batch_size per GPU=64
  190. 2026-05-25 13:52:50 | INFO | peft-platform | [253:ddc610b6] Map: 0%| | 0/60 [00:00<?, ? examples/s]
  191. 2026-05-25 13:52:50 | INFO | peft-platform | [253:ddc610b6] Map: 100%|██████████| 60/60 [00:00<00:00, 2242.42 examples/s]
  192. 2026-05-25 13:52:50 | WARNING | peft-platform | [253:ddc610b6] /opt/conda/lib/python3.10/site-packages/peft/tuners/tuners_utils.py:1348: UserWarning: Model has `tie_word_embeddings=True` and a tied layer is part of the adapter, but `ensure_weight_tying` is not set to True. This can lead to complications, for example when merging the adapter or converting your model to formats other than safetensors. Check the discussion here: https://github.com/huggingface/peft/issues/2777
  193. 2026-05-25 13:52:50 | INFO | peft-platform | [253:ddc610b6] warnings.warn(msg)
  194. 2026-05-25 13:52:50 | INFO | peft-platform | [253:ddc610b6] bitsandbytes library load error: Configured CUDA binary not found at /opt/conda/lib/python3.10/site-packages/bitsandbytes/libbitsandbytes_cuda116.so
  195. 2026-05-25 13:52:50 | INFO | peft-platform | [253:ddc610b6] Traceback (most recent call last):
  196. 2026-05-25 13:52:50 | INFO | peft-platform | [253:ddc610b6] File "/opt/conda/lib/python3.10/site-packages/bitsandbytes/cextension.py", line 320, in <module>
  197. 2026-05-25 13:52:50 | INFO | peft-platform | [253:ddc610b6] lib = get_native_library()
  198. 2026-05-25 13:52:50 | INFO | peft-platform | [253:ddc610b6] File "/opt/conda/lib/python3.10/site-packages/bitsandbytes/cextension.py", line 288, in get_native_library
  199. 2026-05-25 13:52:50 | ERROR | peft-platform | [253:ddc610b6] raise RuntimeError(f"Configured {BNB_BACKEND} binary not found at {cuda_binary_path}")
  200. 2026-05-25 13:52:50 | ERROR | peft-platform | [253:ddc610b6] RuntimeError: Configured CUDA binary not found at /opt/conda/lib/python3.10/site-packages/bitsandbytes/libbitsandbytes_cuda116.so
  201. 2026-05-25 13:52:50 | INFO | peft-platform | [253:ddc610b6] trainable params: 2,535,624 || all params: 754,928,673 || trainable%: 0.3359
  202. 2026-05-25 13:52:50 | WARNING | peft-platform | [253:ddc610b6] [transformers] warmup_ratio is deprecated and will be removed in v5.2. Use `warmup_steps` instead.
  203. 2026-05-25 13:52:50 | INFO | peft-platform | [253:ddc610b6] Map: 0%| | 0/60 [00:00<?, ? examples/s]
  204. 2026-05-25 13:52:50 | INFO | peft-platform | [253:ddc610b6] Map: 100%|██████████| 60/60 [00:00<00:00, 1935.52 examples/s]
  205. 2026-05-25 13:52:50 | WARNING | peft-platform | [253:ddc610b6] /opt/conda/lib/python3.10/site-packages/peft/tuners/tuners_utils.py:1348: UserWarning: Model has `tie_word_embeddings=True` and a tied layer is part of the adapter, but `ensure_weight_tying` is not set to True. This can lead to complications, for example when merging the adapter or converting your model to formats other than safetensors. Check the discussion here: https://github.com/huggingface/peft/issues/2777
  206. 2026-05-25 13:52:50 | INFO | peft-platform | [253:ddc610b6] warnings.warn(msg)
  207. 2026-05-25 13:52:50 | INFO | peft-platform | [253:ddc610b6] bitsandbytes library load error: Configured CUDA binary not found at /opt/conda/lib/python3.10/site-packages/bitsandbytes/libbitsandbytes_cuda116.so
  208. 2026-05-25 13:52:50 | INFO | peft-platform | [253:ddc610b6] Traceback (most recent call last):
  209. 2026-05-25 13:52:50 | INFO | peft-platform | [253:ddc610b6] File "/opt/conda/lib/python3.10/site-packages/bitsandbytes/cextension.py", line 320, in <module>
  210. 2026-05-25 13:52:50 | INFO | peft-platform | [253:ddc610b6] lib = get_native_library()
  211. 2026-05-25 13:52:50 | INFO | peft-platform | [253:ddc610b6] File "/opt/conda/lib/python3.10/site-packages/bitsandbytes/cextension.py", line 288, in get_native_library
  212. 2026-05-25 13:52:50 | ERROR | peft-platform | [253:ddc610b6] raise RuntimeError(f"Configured {BNB_BACKEND} binary not found at {cuda_binary_path}")
  213. 2026-05-25 13:52:50 | ERROR | peft-platform | [253:ddc610b6] RuntimeError: Configured CUDA binary not found at /opt/conda/lib/python3.10/site-packages/bitsandbytes/libbitsandbytes_cuda116.so
  214. 2026-05-25 13:52:50 | INFO | peft-platform | [253:ddc610b6] trainable params: 2,535,624 || all params: 754,928,673 || trainable%: 0.3359
  215. 2026-05-25 13:52:50 | WARNING | peft-platform | [253:ddc610b6] [transformers] warmup_ratio is deprecated and will be removed in v5.2. Use `warmup_steps` instead.
  216. 2026-05-25 13:52:50 | INFO | peft-platform | [253:ddc610b6] 0%| | 0/1 [00:00<?, ?it/s]64,39,16,128,128,64,64,1,None
  217. 2026-05-25 13:52:50 | INFO | peft-platform | [253:ddc610b6] 64,39,16,128,128,64,64,1,None
  218. 2026-05-25 13:52:50 | INFO | peft-platform | [253:ddc610b6] 64,39,16,128,128,64,64,1,None
  219. 2026-05-25 13:52:50 | INFO | peft-platform | [253:ddc610b6] 64,39,16,128,128,64,64,1,None
  220. 2026-05-25 13:52:50 | INFO | peft-platform | [253:ddc610b6] 64,39,16,128,128,64,64,1,None
  221. 2026-05-25 13:52:50 | INFO | peft-platform | [253:ddc610b6] 64,39,16,128,128,64,64,1,None
  222. 2026-05-25 13:52:50 | INFO | peft-platform | [253:ddc610b6] 64,39,16,128,128,64,64,1,None
  223. 2026-05-25 13:52:50 | INFO | peft-platform | [253:ddc610b6] 64,39,16,128,128,64,64,1,None
  224. 2026-05-25 13:52:50 | INFO | peft-platform | [253:ddc610b6] 64,39,16,128,128,64,64,1,None
  225. 2026-05-25 13:52:50 | INFO | peft-platform | [253:ddc610b6] 64,39,16,128,128,64,64,1,None
  226. 2026-05-25 13:52:50 | INFO | peft-platform | [253:ddc610b6] 64,39,16,128,128,64,64,1,None
  227. 2026-05-25 13:52:50 | INFO | peft-platform | [253:ddc610b6] 64,39,16,128,128,64,64,1,None
  228. 2026-05-25 13:52:50 | INFO | peft-platform | [253:ddc610b6] 64,39,16,128,128,64,64,1,None
  229. 2026-05-25 13:52:50 | INFO | peft-platform | [253:ddc610b6] 64,39,16,128,128,64,64,1,None
  230. 2026-05-25 13:52:50 | INFO | peft-platform | [253:ddc610b6] 64,39,16,128,128,64,64,1,None
  231. 2026-05-25 13:52:50 | INFO | peft-platform | [253:ddc610b6] 64,39,16,128,128,64,64,1,None
  232. 2026-05-25 13:52:50 | INFO | peft-platform | [253:ddc610b6] 64,39,16,128,128,64,64,1,None
  233. 2026-05-25 13:52:50 | INFO | peft-platform | [253:ddc610b6] 64,39,16,128,128,64,64,1,None
  234. 2026-05-25 13:52:50 | INFO | peft-platform | [253:ddc610b6] 64,39,16,128,128,64,64,1,None
  235. 2026-05-25 13:52:50 | INFO | peft-platform | [253:ddc610b6] 64,39,16,128,128,64,64,1,None
  236. 2026-05-25 13:52:50 | INFO | peft-platform | [253:ddc610b6] 64,39,16,128,128,64,64,1,None
  237. 2026-05-25 13:52:50 | INFO | peft-platform | [253:ddc610b6] 64,39,16,128,128,64,64,1,None
  238. 2026-05-25 13:52:50 | INFO | peft-platform | [253:ddc610b6] 64,39,16,128,128,64,64,1,None
  239. 2026-05-25 13:52:50 | INFO | peft-platform | [253:ddc610b6] 64,39,16,128,128,64,64,1,None
  240. 2026-05-25 13:52:50 | INFO | peft-platform | [253:ddc610b6] 64,39,16,128,128,64,64,1,None
  241. 2026-05-25 13:52:50 | INFO | peft-platform | [253:ddc610b6] 64,39,16,128,128,64,64,1,None
  242. 2026-05-25 13:52:50 | INFO | peft-platform | [253:ddc610b6] 64,39,16,128,128,64,64,1,None
  243. 2026-05-25 13:52:50 | INFO | peft-platform | [253:ddc610b6] 64,39,16,128,128,64,64,1,None
  244. 2026-05-25 13:52:50 | INFO | peft-platform | [253:ddc610b6] 64,39,16,128,128,64,64,1,None
  245. 2026-05-25 13:52:50 | INFO | peft-platform | [253:ddc610b6] 64,39,16,128,128,64,64,1,None
  246. 2026-05-25 13:52:50 | INFO | peft-platform | [253:ddc610b6] 64,39,16,128,128,64,64,1,None
  247. 2026-05-25 13:52:50 | INFO | peft-platform | [253:ddc610b6] 64,39,16,128,128,64,64,1,None
  248. 2026-05-25 13:52:50 | INFO | peft-platform | [253:ddc610b6] 64,39,16,128,128,64,64,1,None
  249. 2026-05-25 13:52:50 | INFO | peft-platform | [253:ddc610b6] 64,39,16,128,128,64,64,1,None
  250. 2026-05-25 13:52:50 | INFO | peft-platform | [253:ddc610b6] 64,39,16,128,128,64,64,1,None
  251. 2026-05-25 13:52:50 | INFO | peft-platform | [253:ddc610b6] 64,39,16,128,128,64,64,1,None
  252. 2026-05-25 13:52:50 | INFO | peft-platform | [253:ddc610b6] 64,39,16,128,128,64,64,1,None
  253. 2026-05-25 13:52:50 | INFO | peft-platform | [253:ddc610b6] 64,39,16,128,128,64,64,1,None
  254. 2026-05-25 13:52:50 | INFO | peft-platform | [253:ddc610b6] 64,39,16,128,128,64,64,1,None
  255. 2026-05-25 13:52:50 | INFO | peft-platform | [253:ddc610b6] 64,39,16,128,128,64,64,1,None
  256. 2026-05-25 13:52:50 | INFO | peft-platform | [253:ddc610b6] 64,39,16,128,128,64,64,1,None
  257. 2026-05-25 13:52:50 | INFO | peft-platform | [253:ddc610b6] 64,39,16,128,128,64,64,1,None
  258. 2026-05-25 13:52:50 | INFO | peft-platform | [253:ddc610b6] 64,39,16,128,128,64,64,1,None
  259. 2026-05-25 13:52:50 | INFO | peft-platform | [253:ddc610b6] 64,39,16,128,128,64,64,1,None
  260. 2026-05-25 13:52:50 | INFO | peft-platform | [253:ddc610b6] 64,39,16,128,128,64,64,1,None
  261. 2026-05-25 13:52:50 | INFO | peft-platform | [253:ddc610b6] 64,39,16,128,128,64,64,1,None
  262. 2026-05-25 13:52:50 | INFO | peft-platform | [253:ddc610b6] 64,39,16,128,128,64,64,1,None
  263. 2026-05-25 13:52:50 | INFO | peft-platform | [253:ddc610b6] 64,39,16,128,128,64,64,1,None
  264. 2026-05-25 13:52:50 | INFO | peft-platform | [253:ddc610b6] 64,39,16,128,128,64,64,1,None
  265. 2026-05-25 13:52:50 | INFO | peft-platform | [253:ddc610b6] 64,39,16,128,128,64,64,1,None
  266. 2026-05-25 13:52:50 | INFO | peft-platform | [253:ddc610b6] 64,39,16,128,128,64,64,1,None
  267. 2026-05-25 13:52:50 | INFO | peft-platform | [253:ddc610b6] 64,39,16,128,128,64,64,1,None
  268. 2026-05-25 13:52:50 | INFO | peft-platform | [253:ddc610b6] 64,39,16,128,128,64,64,1,None
  269. 2026-05-25 13:52:50 | INFO | peft-platform | [253:ddc610b6] 64,39,16,128,128,64,64,1,None
  270. 2026-05-25 13:52:50 | INFO | peft-platform | [253:ddc610b6] 64,39,16,128,128,64,64,1,None
  271. 2026-05-25 13:52:50 | INFO | peft-platform | [253:ddc610b6] 64,39,16,128,128,64,64,1,None
  272. 2026-05-25 13:52:50 | INFO | peft-platform | [253:ddc610b6] 64,39,16,128,128,64,64,1,None
  273. 2026-05-25 13:52:50 | INFO | peft-platform | [253:ddc610b6] 64,39,16,128,128,64,64,1,None
  274. 2026-05-25 13:52:50 | INFO | peft-platform | [253:ddc610b6] 64,39,16,128,128,64,64,1,None
  275. 2026-05-25 13:52:50 | INFO | peft-platform | [253:ddc610b6] 64,39,16,128,128,64,64,1,None
  276. 2026-05-25 13:52:50 | INFO | peft-platform | [253:ddc610b6] 64,39,16,128,128,64,64,1,None
  277. 2026-05-25 13:52:50 | INFO | peft-platform | [253:ddc610b6] 64,39,16,128,128,64,64,1,None
  278. 2026-05-25 13:52:50 | INFO | peft-platform | [253:ddc610b6] 64,39,16,128,128,64,64,1,None
  279. 2026-05-25 13:52:50 | INFO | peft-platform | [253:ddc610b6] 64,39,16,128,128,64,64,1,None
  280. 2026-05-25 13:52:50 | INFO | peft-platform | [253:ddc610b6] 64,39,16,128,128,64,64,1,None
  281. 2026-05-25 13:52:50 | INFO | peft-platform | [253:ddc610b6] 64,39,16,128,128,64,64,1,None
  282. 2026-05-25 13:52:50 | INFO | peft-platform | [253:ddc610b6] 64,39,16,128,128,64,64,1,None
  283. 2026-05-25 13:52:50 | INFO | peft-platform | [253:ddc610b6] 64,39,16,128,128,64,64,1,None
  284. 2026-05-25 13:52:50 | INFO | peft-platform | [253:ddc610b6] 64,39,16,128,128,64,64,1,None
  285. 2026-05-25 13:52:50 | INFO | peft-platform | [253:ddc610b6] 64,39,16,128,128,64,64,1,None
  286. 2026-05-25 13:52:50 | INFO | peft-platform | [253:ddc610b6] 64,39,16,128,128,64,64,1,None
  287. 2026-05-25 13:52:50 | INFO | peft-platform | [253:ddc610b6] 64,39,16,128,128,64,64,1,None
  288. 2026-05-25 13:52:50 | INFO | peft-platform | [253:ddc610b6] 64,39,16,128,128,64,64,1,None
  289. 2026-05-25 13:52:50 | INFO | peft-platform | [253:ddc610b6] 64,39,16,128,128,64,64,1,None
  290. 2026-05-25 13:52:50 | INFO | peft-platform | [253:ddc610b6] 64,39,16,128,128,64,64,1,None
  291. 2026-05-25 13:52:50 | INFO | peft-platform | [253:ddc610b6] 64,39,16,128,128,64,64,1,None
  292. 2026-05-25 13:52:50 | INFO | peft-platform | [253:ddc610b6] 64,39,16,128,128,64,64,1,None
  293. 2026-05-25 13:52:50 | INFO | peft-platform | [253:ddc610b6] 64,39,16,128,128,64,64,1,None
  294. 2026-05-25 13:52:50 | INFO | peft-platform | [253:ddc610b6] 64,39,16,128,128,64,64,1,None
  295. 2026-05-25 13:52:50 | INFO | peft-platform | [253:ddc610b6] 64,39,16,128,128,64,64,1,None
  296. 2026-05-25 13:52:50 | INFO | peft-platform | [253:ddc610b6] 64,39,16,128,128,64,64,1,None
  297. 2026-05-25 13:52:50 | INFO | peft-platform | [253:ddc610b6] 64,39,16,128,128,64,64,1,None
  298. 2026-05-25 13:52:50 | INFO | peft-platform | [253:ddc610b6] 64,39,16,128,128,64,64,1,None
  299. 2026-05-25 13:52:50 | INFO | peft-platform | [253:ddc610b6] 64,39,16,128,128,64,64,1,None
  300. 2026-05-25 13:52:50 | INFO | peft-platform | [253:ddc610b6] 64,39,16,128,128,64,64,1,None
  301. 2026-05-25 13:52:50 | INFO | peft-platform | [253:ddc610b6] 64,39,16,128,128,64,64,1,None
  302. 2026-05-25 13:52:50 | INFO | peft-platform | [253:ddc610b6] 64,39,16,128,128,64,64,1,None
  303. 2026-05-25 13:52:50 | INFO | peft-platform | [253:ddc610b6] 64,39,16,128,128,64,64,1,None
  304. 2026-05-25 13:52:50 | INFO | peft-platform | [253:ddc610b6] 64,39,16,128,128,64,64,1,None
  305. 2026-05-25 13:52:50 | INFO | peft-platform | [253:ddc610b6] 64,39,16,128,128,64,64,1,None
  306. 2026-05-25 13:52:50 | INFO | peft-platform | [253:ddc610b6] 64,39,16,128,128,64,64,1,None
  307. 2026-05-25 13:52:50 | INFO | peft-platform | [253:ddc610b6] 64,39,16,128,128,64,64,1,None
  308. 2026-05-25 13:52:50 | INFO | peft-platform | [253:ddc610b6] 64,39,16,128,128,64,64,1,None
  309. 2026-05-25 13:52:50 | INFO | peft-platform | [253:ddc610b6] 64,39,16,128,128,64,64,1,None
  310. 2026-05-25 13:52:50 | INFO | peft-platform | [253:ddc610b6] 64,39,16,128,128,64,64,1,None
  311. 2026-05-25 13:52:50 | INFO | peft-platform | [253:ddc610b6] 64,39,16,128,128,64,64,1,None
  312. 2026-05-25 13:52:50 | INFO | peft-platform | [253:ddc610b6] 64,39,16,128,128,64,64,1,None
  313. 2026-05-25 13:52:50 | INFO | peft-platform | [253:ddc610b6] 64,39,16,128,128,64,64,1,None
  314. 2026-05-25 13:52:50 | INFO | peft-platform | [253:ddc610b6] 64,39,16,128,128,64,64,1,None
  315. 2026-05-25 13:52:50 | INFO | peft-platform | [253:ddc610b6] 64,39,16,128,128,64,64,1,None
  316. 2026-05-25 13:52:50 | INFO | peft-platform | [253:ddc610b6] 64,39,16,128,128,64,64,1,None
  317. 2026-05-25 13:52:50 | INFO | peft-platform | [253:ddc610b6] 64,39,16,128,128,64,64,1,None
  318. 2026-05-25 13:52:50 | INFO | peft-platform | [253:ddc610b6] 64,39,16,128,128,64,64,1,None
  319. 2026-05-25 13:52:50 | INFO | peft-platform | [253:ddc610b6] 64,39,16,128,128,64,64,1,None
  320. 2026-05-25 13:52:50 | INFO | peft-platform | [253:ddc610b6] 64,39,16,128,128,64,64,1,None
  321. 2026-05-25 13:52:50 | INFO | peft-platform | [253:ddc610b6] 64,39,16,128,128,64,64,1,None
  322. 2026-05-25 13:52:50 | INFO | peft-platform | [253:ddc610b6] 64,39,16,128,128,64,64,1,None
  323. 2026-05-25 13:52:50 | WARNING | peft-platform | [253:ddc610b6] /opt/conda/lib/python3.10/site-packages/torch/autograd/graph.py:829: UserWarning: Attempting to run cuBLAS, but there was no current CUDA context! Attempting to set the primary context... (Triggered internally at /workspace/framework/mcPytorch/aten/src/ATen/cuda/CublasHandlePool.cpp:183.)
  324. 2026-05-25 13:52:50 | INFO | peft-platform | [253:ddc610b6] return Variable._execution_engine.run_backward( # Calls into the C++ engine to run the backward pass
  325. 2026-05-25 13:52:50 | INFO | peft-platform | [253:ddc610b6] 64,39,16,128,128,64,64,1,None
  326. 2026-05-25 13:52:50 | INFO | peft-platform | [253:ddc610b6] Training failed for job ddc610b6-d872-466c-b382-3c9bfd6df12a: '_ProgressCallback' object has no attribute 'on_pre_optimizer_step'
  327. 2026-05-25 13:52:50 | INFO | peft-platform | [253:ddc610b6] [remote_train] [rank 1] ERROR: '_ProgressCallback' object has no attribute 'on_pre_optimizer_step'
  328. 2026-05-25 13:52:50 | INFO | peft-platform | [253:ddc610b6] [remote_train] Traceback (most recent call last):
  329. 2026-05-25 13:52:50 | INFO | peft-platform | [253:ddc610b6] File "/root/Fine-tuning/backend/app/engines/remote_train.py", line 236, in run_training
  330. 2026-05-25 13:52:50 | INFO | peft-platform | [253:ddc610b6] adapter_path = await engine.train(
  331. 2026-05-25 13:52:50 | INFO | peft-platform | [253:ddc610b6] File "/root/Fine-tuning/backend/app/engines/text_engine.py", line 394, in train
  332. 2026-05-25 13:52:50 | INFO | peft-platform | [253:ddc610b6] trainer.train()
  333. 2026-05-25 13:52:50 | INFO | peft-platform | [253:ddc610b6] File "/opt/conda/lib/python3.10/site-packages/transformers/trainer.py", line 1427, in train
  334. 2026-05-25 13:52:50 | INFO | peft-platform | [253:ddc610b6] return inner_training_loop(
  335. 2026-05-25 13:52:50 | INFO | peft-platform | [253:ddc610b6] File "/opt/conda/lib/python3.10/site-packages/transformers/trainer.py", line 1509, in _inner_training_loop
  336. 2026-05-25 13:52:50 | INFO | peft-platform | [253:ddc610b6] self._run_epoch(
  337. 2026-05-25 13:52:50 | INFO | peft-platform | [253:ddc610b6] File "/opt/conda/lib/python3.10/site-packages/transformers/trainer.py", line 1762, in _run_epoch
  338. 2026-05-25 13:52:50 | INFO | peft-platform | [253:ddc610b6] self.control = self.callback_handler.on_pre_optimizer_step(self.args, self.state, self.control)
  339. 2026-05-25 13:52:50 | INFO | peft-platform | [253:ddc610b6] File "/opt/conda/lib/python3.10/site-packages/transformers/trainer_callback.py", line 511, in on_pre_optimizer_step
  340. 2026-05-25 13:52:50 | INFO | peft-platform | [253:ddc610b6] return self.call_event("on_pre_optimizer_step", args, state, control, **kwargs)
  341. 2026-05-25 13:52:50 | INFO | peft-platform | [253:ddc610b6] File "/opt/conda/lib/python3.10/site-packages/transformers/trainer_callback.py", line 545, in call_event
  342. 2026-05-25 13:52:50 | INFO | peft-platform | [253:ddc610b6] result = getattr(callback, event)(
  343. 2026-05-25 13:52:50 | ERROR | peft-platform | [253:ddc610b6] AttributeError: '_ProgressCallback' object has no attribute 'on_pre_optimizer_step'
  344. 2026-05-25 13:52:50 | INFO | peft-platform | [253:ddc610b6] [remote_train] Step 1/1 done (epoch 1.00)
  345. 2026-05-25 13:52:50 | INFO | peft-platform | [253:ddc610b6] [rank1]: Traceback (most recent call last):
  346. 2026-05-25 13:52:50 | INFO | peft-platform | [253:ddc610b6] [rank1]: File "/opt/conda/lib/python3.10/runpy.py", line 196, in _run_module_as_main
  347. 2026-05-25 13:52:50 | INFO | peft-platform | [253:ddc610b6] [rank1]: return _run_code(code, main_globals, None,
  348. 2026-05-25 13:52:50 | INFO | peft-platform | [253:ddc610b6] [rank1]: File "/opt/conda/lib/python3.10/runpy.py", line 86, in _run_code
  349. 2026-05-25 13:52:50 | INFO | peft-platform | [253:ddc610b6] [rank1]: exec(code, run_globals)
  350. 2026-05-25 13:52:50 | INFO | peft-platform | [253:ddc610b6] [rank1]: File "/root/Fine-tuning/backend/app/engines/remote_train.py", line 466, in <module>
  351. 2026-05-25 13:52:50 | INFO | peft-platform | [253:ddc610b6] [rank1]: main()
  352. 2026-05-25 13:52:50 | INFO | peft-platform | [253:ddc610b6] [rank1]: File "/root/Fine-tuning/backend/app/engines/remote_train.py", line 461, in main
  353. 2026-05-25 13:52:50 | INFO | peft-platform | [253:ddc610b6] [rank1]: asyncio.run(run_training(job_id, model_id, model_type, dataset_id, config,
  354. 2026-05-25 13:52:50 | INFO | peft-platform | [253:ddc610b6] [rank1]: File "/opt/conda/lib/python3.10/asyncio/runners.py", line 44, in run
  355. 2026-05-25 13:52:50 | INFO | peft-platform | [253:ddc610b6] [rank1]: return loop.run_until_complete(main)
  356. 2026-05-25 13:52:50 | INFO | peft-platform | [253:ddc610b6] [rank1]: File "/opt/conda/lib/python3.10/asyncio/base_events.py", line 649, in run_until_complete
  357. 2026-05-25 13:52:50 | INFO | peft-platform | [253:ddc610b6] [rank1]: return future.result()
  358. 2026-05-25 13:52:50 | INFO | peft-platform | [253:ddc610b6] [rank1]: File "/root/Fine-tuning/backend/app/engines/remote_train.py", line 236, in run_training
  359. 2026-05-25 13:52:50 | INFO | peft-platform | [253:ddc610b6] [rank1]: adapter_path = await engine.train(
  360. 2026-05-25 13:52:50 | INFO | peft-platform | [253:ddc610b6] [rank1]: File "/root/Fine-tuning/backend/app/engines/text_engine.py", line 394, in train
  361. 2026-05-25 13:52:50 | INFO | peft-platform | [253:ddc610b6] [rank1]: trainer.train()
  362. 2026-05-25 13:52:50 | INFO | peft-platform | [253:ddc610b6] [rank1]: File "/opt/conda/lib/python3.10/site-packages/transformers/trainer.py", line 1427, in train
  363. 2026-05-25 13:52:50 | INFO | peft-platform | [253:ddc610b6] [rank1]: return inner_training_loop(
  364. 2026-05-25 13:52:50 | INFO | peft-platform | [253:ddc610b6] [rank1]: File "/opt/conda/lib/python3.10/site-packages/transformers/trainer.py", line 1509, in _inner_training_loop
  365. 2026-05-25 13:52:50 | INFO | peft-platform | [253:ddc610b6] [rank1]: self._run_epoch(
  366. 2026-05-25 13:52:50 | INFO | peft-platform | [253:ddc610b6] [rank1]: File "/opt/conda/lib/python3.10/site-packages/transformers/trainer.py", line 1762, in _run_epoch
  367. 2026-05-25 13:52:50 | INFO | peft-platform | [253:ddc610b6] [rank1]: self.control = self.callback_handler.on_pre_optimizer_step(self.args, self.state, self.control)
  368. 2026-05-25 13:52:50 | INFO | peft-platform | [253:ddc610b6] [rank1]: File "/opt/conda/lib/python3.10/site-packages/transformers/trainer_callback.py", line 511, in on_pre_optimizer_step
  369. 2026-05-25 13:52:50 | INFO | peft-platform | [253:ddc610b6] [rank1]: return self.call_event("on_pre_optimizer_step", args, state, control, **kwargs)
  370. 2026-05-25 13:52:50 | INFO | peft-platform | [253:ddc610b6] [rank1]: File "/opt/conda/lib/python3.10/site-packages/transformers/trainer_callback.py", line 545, in call_event
  371. 2026-05-25 13:52:50 | INFO | peft-platform | [253:ddc610b6] [rank1]: result = getattr(callback, event)(
  372. 2026-05-25 13:52:50 | ERROR | peft-platform | [253:ddc610b6] [rank1]: AttributeError: '_ProgressCallback' object has no attribute 'on_pre_optimizer_step'
  373. 2026-05-25 13:52:50 | INFO | peft-platform | [253:ddc610b6] 100%|██████████| 1/1 [00:35<00:00, 35.50s/it]
  374. INFO: 127.0.0.1:52106 - "GET /health HTTP/1.1" 200 OK