|
@@ -1,113 +1,11 @@
|
|
|
-(base) [root@localhost ~]# docker inspect qwen3-reranker-vllm | grep -A 30 '"Env"'
|
|
|
|
|
- "Env": [
|
|
|
|
|
- "VLLM_TORCH_COMPILE=0",
|
|
|
|
|
- "VLLM_DISABLE_TORCH_COMPILE=1",
|
|
|
|
|
- "TORCH_EXTENSIONS_DIR=/tmp/torch_ext_$",
|
|
|
|
|
- "MAX_JOBS=1",
|
|
|
|
|
- "CUDA_VISIBLE_DEVICES=3",
|
|
|
|
|
- "PYTHONUNBUFFERED=1",
|
|
|
|
|
- "PATH=/opt/maca/bin:/opt/maca/mxgpu_llvm/bin:/opt/maca/ompi/bin:/opt/maca/ucx/bin:/opt/mxdriver/bin:/usr/local/sbin:/usr/local/bin:/usr/sbin:/usr/bin:/sbin:/bin",
|
|
|
|
|
- "LIBRARY_PATH=/opt/mxdriver/lib:",
|
|
|
|
|
- "LD_LIBRARY_PATH=/opt/maca/lib:/opt/maca/ompi/lib:/opt/maca/ucx/lib:/opt/mxdriver/lib:",
|
|
|
|
|
- "MACA_PATH=/opt/maca",
|
|
|
|
|
- "MACA_CLANG_PATH=/opt/maca/mxgpu_llvm/bin",
|
|
|
|
|
- "DEBIAN_FRONTEND=noninteractive",
|
|
|
|
|
- "TORCH_ALLOW_TF32_CUBLAS_OVERRIDE=1",
|
|
|
|
|
- "TZ=Asia/Shanghai",
|
|
|
|
|
- "CUCC_PATH=/opt/maca/tools/cu-bridge",
|
|
|
|
|
- "CUDA_PATH=/opt/maca/tools/cu-bridge"
|
|
|
|
|
- ],
|
|
|
|
|
- "Cmd": [
|
|
|
|
|
- "sh",
|
|
|
|
|
- "-c",
|
|
|
|
|
- "/opt/conda/bin/vllm serve /model/Qwen3-Reranker-8B --served-model-name Qwen3-Reranker-8B --task score --host 0.0.0.0 --port 30000 --tensor-parallel-size 1 --max-num-batched-tokens 4096 --max-model-len 16384 --gpu-memory-utilization 0.45 --hf_overrides '{\"architectures\": [\"Qwen3ForSequenceClassification\"],\"classifier_from_token\": [\"no\", \"yes\"],\"is_original_qwen3_reranker\": true}' --api-key sk-123456 2>&1 | tee /var/log/vllm/qwen3-reranker-server.log"
|
|
|
|
|
- ],
|
|
|
|
|
- "Image": "vllm-metax:lq",
|
|
|
|
|
- "Volumes": null,
|
|
|
|
|
- "WorkingDir": "/workspace",
|
|
|
|
|
- "Entrypoint": null,
|
|
|
|
|
- "OnBuild": null,
|
|
|
|
|
- "Labels": {
|
|
|
|
|
- "cn.kylinos.kylin-server-platform.base_image": "",
|
|
|
|
|
- "cn.kylinos.kylin-server-platform.build_id": "2503-build20",
|
|
|
|
|
-(base) [root@localhost ~]# docker inspect qwen3-embedding-vllm | grep -A 30 '"Env"'
|
|
|
|
|
- "Env": [
|
|
|
|
|
- "VLLM_DISABLE_TORCH_COMPILE=1",
|
|
|
|
|
- "TORCH_EXTENSIONS_DIR=/tmp/torch_ext_$",
|
|
|
|
|
- "MAX_JOBS=1",
|
|
|
|
|
- "CUDA_VISIBLE_DEVICES=2",
|
|
|
|
|
- "PYTHONUNBUFFERED=1",
|
|
|
|
|
- "VLLM_TORCH_COMPILE=0",
|
|
|
|
|
- "PATH=/opt/maca/bin:/opt/maca/mxgpu_llvm/bin:/opt/maca/ompi/bin:/opt/maca/ucx/bin:/opt/mxdriver/bin:/usr/local/sbin:/usr/local/bin:/usr/sbin:/usr/bin:/sbin:/bin",
|
|
|
|
|
- "LIBRARY_PATH=/opt/mxdriver/lib:",
|
|
|
|
|
- "LD_LIBRARY_PATH=/opt/maca/lib:/opt/maca/ompi/lib:/opt/maca/ucx/lib:/opt/mxdriver/lib:",
|
|
|
|
|
- "MACA_PATH=/opt/maca",
|
|
|
|
|
- "MACA_CLANG_PATH=/opt/maca/mxgpu_llvm/bin",
|
|
|
|
|
- "DEBIAN_FRONTEND=noninteractive",
|
|
|
|
|
- "TORCH_ALLOW_TF32_CUBLAS_OVERRIDE=1",
|
|
|
|
|
- "TZ=Asia/Shanghai",
|
|
|
|
|
- "CUCC_PATH=/opt/maca/tools/cu-bridge",
|
|
|
|
|
- "CUDA_PATH=/opt/maca/tools/cu-bridge"
|
|
|
|
|
- ],
|
|
|
|
|
- "Cmd": [
|
|
|
|
|
- "sh",
|
|
|
|
|
- "-c",
|
|
|
|
|
- "/opt/conda/bin/vllm serve /model/Qwen3-Embedding-8B --served-model-name Qwen3-Embedding-8B --task embedding --host 0.0.0.0 --port 30000 --tensor-parallel-size 1 --max-num-batched-tokens 4096 --max-model-len 16384 --gpu-memory-utilization 0.45 --api-key sk-123456 2>&1 | tee /var/log/vllm/qwen3-embedding-server.log"
|
|
|
|
|
- ],
|
|
|
|
|
- "Image": "vllm-metax:lq",
|
|
|
|
|
- "Volumes": null,
|
|
|
|
|
- "WorkingDir": "/workspace",
|
|
|
|
|
- "Entrypoint": null,
|
|
|
|
|
- "OnBuild": null,
|
|
|
|
|
- "Labels": {
|
|
|
|
|
- "cn.kylinos.kylin-server-platform.base_image": "",
|
|
|
|
|
- "cn.kylinos.kylin-server-platform.build_id": "2503-build20",
|
|
|
|
|
-(base) [root@localhost ~]# docker inspect finetune-trainer | grep -A 5 '"Image"'
|
|
|
|
|
- "Image": "sha256:5334348e7a9b0340366d2813c876312bbedf662a49308070fabfd2bb2fccc0f5",
|
|
|
|
|
- "ResolvConfPath": "/var/lib/docker/containers/df66d0d470b87306937f6a4aa67a4e3bd130ba923cba676e2ebde211b6d1b1f4/resolv.conf",
|
|
|
|
|
- "HostnamePath": "/var/lib/docker/containers/df66d0d470b87306937f6a4aa67a4e3bd130ba923cba676e2ebde211b6d1b1f4/hostname",
|
|
|
|
|
- "HostsPath": "/var/lib/docker/containers/df66d0d470b87306937f6a4aa67a4e3bd130ba923cba676e2ebde211b6d1b1f4/hosts",
|
|
|
|
|
- "LogPath": "/var/lib/docker/containers/df66d0d470b87306937f6a4aa67a4e3bd130ba923cba676e2ebde211b6d1b1f4/df66d0d470b87306937f6a4aa67a4e3bd130ba923cba676e2ebde211b6d1b1f4-json.log",
|
|
|
|
|
- "Name": "/finetune-trainer",
|
|
|
|
|
---
|
|
|
|
|
- "Image": "5334348e7a9b",
|
|
|
|
|
- "Volumes": null,
|
|
|
|
|
- "WorkingDir": "/workspace",
|
|
|
|
|
- "Entrypoint": null,
|
|
|
|
|
- "OnBuild": null,
|
|
|
|
|
- "Labels": {
|
|
|
|
|
-(base) [root@localhost ~]# docker inspect finetune-trainer | grep -A 30 '"Env"'
|
|
|
|
|
- "Env": [
|
|
|
|
|
- "PATH=/opt/maca/bin:/opt/maca/mxgpu_llvm/bin:/opt/maca/ompi/bin:/opt/maca/ucx/bin:/opt/mxdriver/bin:/usr/local/sbin:/usr/local/bin:/usr/sbin:/usr/bin:/sbin:/bin",
|
|
|
|
|
- "LIBRARY_PATH=/opt/mxdriver/lib:",
|
|
|
|
|
- "LD_LIBRARY_PATH=/opt/maca/mxshmem/lib:/opt/maca/mxshmem/lib:/opt/maca/mxshmem/lib:/opt/maca/lib:/opt/maca/ompi/lib:/opt/maca/ucx/lib:/opt/mxdriver/lib:",
|
|
|
|
|
- "MACA_PATH=/opt/maca",
|
|
|
|
|
- "MACA_CLANG_PATH=/opt/maca/mxgpu_llvm/bin",
|
|
|
|
|
- "DEBIAN_FRONTEND=noninteractive",
|
|
|
|
|
- "TORCH_ALLOW_TF32_CUBLAS_OVERRIDE=1",
|
|
|
|
|
- "TZ=Asia/Shanghai",
|
|
|
|
|
- "CUCC_PATH=/opt/maca/tools/cu-bridge",
|
|
|
|
|
- "CUDA_PATH=/opt/maca/tools/cu-bridge"
|
|
|
|
|
- ],
|
|
|
|
|
- "Cmd": [
|
|
|
|
|
- "tail",
|
|
|
|
|
- "-f",
|
|
|
|
|
- "/dev/null"
|
|
|
|
|
- ],
|
|
|
|
|
- "Image": "5334348e7a9b",
|
|
|
|
|
- "Volumes": null,
|
|
|
|
|
- "WorkingDir": "/workspace",
|
|
|
|
|
- "Entrypoint": null,
|
|
|
|
|
- "OnBuild": null,
|
|
|
|
|
- "Labels": {
|
|
|
|
|
- "com.metax.driver.version": "3.5.3.11",
|
|
|
|
|
- "com.metax.sdk.version": "3.5.3.20",
|
|
|
|
|
- "com.metax.torch.version": "2.8+3.5.3.9",
|
|
|
|
|
- "org.opencontainers.image.ref.name": "ubuntu",
|
|
|
|
|
- "org.opencontainers.image.version": "22.04"
|
|
|
|
|
- }
|
|
|
|
|
- },
|
|
|
|
|
- "NetworkSettings": {
|
|
|
|
|
-(base) [root@localhost ~]# docker history finetune-trainer --no-trunc | head -10
|
|
|
|
|
-Error response from daemon: No such image: finetune-trainer:latest
|
|
|
|
|
-(base) [root@localhost ~]#
|
|
|
|
|
|
|
+(base) [root@localhost ~]# docker exec -e MACA_VISIBLE_DEVICES=2,3 -e CUDA_VISIBLE_DEVICES=2,3 finetune-trainer bash -c '/opt/conda/bin/python -c "from transformers import AutoModelForCausalLM; model = AutoModelForCausalLM.from_pretrained(\"/root/Fine-tuning/backend/data/models/Qwen/Qwen1.5-0.5B\", torch_dtype=\"auto\", device_map=\"auto\"); print(\"Model loaded successfully!\")"'
|
|
|
|
|
+`torch_dtype` is deprecated! Use `dtype` instead!
|
|
|
|
|
+Traceback (most recent call last):
|
|
|
|
|
+ File "<string>", line 1, in <module>
|
|
|
|
|
+ File "/opt/conda/lib/python3.10/site-packages/transformers/models/auto/auto_factory.py", line 604, in from_pretrained
|
|
|
|
|
+ return model_class.from_pretrained(
|
|
|
|
|
+ File "/opt/conda/lib/python3.10/site-packages/transformers/modeling_utils.py", line 277, in _wrapper
|
|
|
|
|
+ return func(*args, **kwargs)
|
|
|
|
|
+ File "/opt/conda/lib/python3.10/site-packages/transformers/modeling_utils.py", line 4806, in from_pretrained
|
|
|
|
|
+ raise ValueError(
|
|
|
|
|
+ValueError: Using a `device_map`, `tp_plan`, `torch.device` context manager or setting `torch.set_default_device(device)` requires `accelerate`. You can install it with `pip install accelerate`
|