|
|
@@ -1,18 +1,16 @@
|
|
|
-(base) [root@localhost ~]# docker exec finetune-trainer /opt/conda/bin/python -c "from transformers import AutoModelForCausalLM, AutoConfig; cfg = AutoConfig.from_pretrained('/root/Fine-tuning/backend/data/models/Qwen_Qwen3.5-0.8B'); print('model_type:', cfg.model_type); print('architectures:', cfg.architectures)"
|
|
|
-model_type: qwen3_5
|
|
|
-architectures: ['Qwen3_5ForConditionalGeneration']
|
|
|
-(base) [root@localhost ~]# docker exec finetune-trainer /opt/conda/bin/python -c "import torch; print('torch:', torch.__version__); print('cuda:', torch.cuda.is_available()); print('devices:', torch.cuda.device_count())"
|
|
|
-torch: 2.8.0+metax3.5.3.9
|
|
|
-cuda: True
|
|
|
-devices: 4
|
|
|
-(base) [root@localhost ~]# docker exec finetune-trainer /opt/conda/bin/python -c "import torch; from transformers import AutoModelForCausalLM; m = AutoModelForCausalLM.from_pretrained('/root/Fine-tuning/backend/data/models/Qwen_Qwen3.5-0.8B', torch_dtype=torch.float16, device_map='auto'); print('Loaded OK')"
|
|
|
-[transformers] `torch_dtype` is deprecated! Use `dtype` instead!
|
|
|
-Current Triton version 3.0.0 is below the recommended 3.2.0 version. Errors may occur and these issues will not be fixed. Please consider upgrading Triton.
|
|
|
-Current Python version 3.10 is below the recommended 3.11 version. It is recommended to upgrade to Python 3.11 or higher for the best experience.
|
|
|
-torch.compile is not available in Python 3.10, using identity decorator instead
|
|
|
-/opt/conda/lib/python3.10/site-packages/torchvision/datapoints/__init__.py:12: UserWarning: The torchvision.datapoints and torchvision.transforms.v2 namespaces are still Beta. While we do not expect major breaking changes, some APIs may still change according to user feedback. Please submit any feedback you may have in this issue: https://github.com/pytorch/vision/issues/6753, and you can also check out https://github.com/pytorch/vision/issues/7319 to learn more about the APIs that we suspect might involve future changes. You can silence this warning by calling torchvision.disable_beta_transforms_warning().
|
|
|
- warnings.warn(_BETA_TRANSFORMS_WARNING)
|
|
|
-/opt/conda/lib/python3.10/site-packages/torchvision/transforms/v2/__init__.py:54: UserWarning: The torchvision.datapoints and torchvision.transforms.v2 namespaces are still Beta. While we do not expect major breaking changes, some APIs may still change according to user feedback. Please submit any feedback you may have in this issue: https://github.com/pytorch/vision/issues/6753, and you can also check out https://github.com/pytorch/vision/issues/7319 to learn more about the APIs that we suspect might involve future changes. You can silence this warning by calling torchvision.disable_beta_transforms_warning().
|
|
|
- warnings.warn(_BETA_TRANSFORMS_WARNING)
|
|
|
-Loading weights: 100%|██████████| 320/320 [00:06<00:00, 48.32it/s]
|
|
|
-Loaded OK
|
|
|
+(base) [root@localhost ~]# docker exec finetune-trainer ps aux
|
|
|
+USER PID %CPU %MEM VSZ RSS TTY STAT START TIME COMMAND
|
|
|
+root 1 0.0 0.0 2824 1060 ? Ss May21 0:00 tail -f /dev/null
|
|
|
+root 300 0.1 0.0 0 0 ? Z May21 0:30 [python] <defunct>
|
|
|
+root 405 0.2 0.0 0 0 ? Z May21 0:59 [python] <defunct>
|
|
|
+root 1139 0.6 0.0 0 0 ? Z May21 2:57 [python] <defunct>
|
|
|
+root 1496 0.1 0.0 0 0 ? Z May21 0:37 [python] <defunct>
|
|
|
+root 1655 0.2 0.0 0 0 ? Z May21 1:21 [python] <defunct>
|
|
|
+root 13911 2.3 0.0 0 0 ? Z May21 0:38 [python] <defunct>
|
|
|
+root 14070 4.2 0.0 0 0 ? Z May21 0:59 [python] <defunct>
|
|
|
+root 14488 7.3 0.0 0 0 ? Z May21 1:00 [python] <defunct>
|
|
|
+root 14906 147 2.1 56294212 11559636 ? Sl May21 7:06 /opt/conda/bin/python -m app.engines.remote_train 3485b881-b7be-4a0d-83bd-e8330d9b0fad Qwen/Qwen1.5-0.5B text /root/Fine-tuning/backend/data/datasets/data.jsonl /root/Fine-tuning/backend/data/config_3485b881-b7be-4a0d-83bd-e8330d9b0fad.json
|
|
|
+root 15565 0.0 0.0 7064 1592 ? Rs 00:03 0:00 ps aux
|
|
|
+(base) [root@localhost ~]# docker exec finetune-trainer bash -c 'maca-smi || nvidia-smi'
|
|
|
+bash: line 1: maca-smi: command not found
|
|
|
+bash: line 1: nvidia-smi: command not found
|