| 123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159 |
- services:
- qwen3.5-122b:
- image: vllm/vllm-openai:latest
- container_name: qwen3.5-122b-vllm
- runtime: nvidia
- shm_size: '10gb'
- ports:
- - "25423:30000"
- volumes:
- # # 宿主机路径:容器内路径
- - /data/app_workspace/models:/model:ro
- - ~/.cache/huggingface:/root/.cache/huggingface
- - /data/app_workspace/deploy_models/vllm/logs:/var/log/vllm # 日志目录映射
- - /data/app_workspace/deploy_models/vllm/vllm_start_shell:/vllm_start_shell:ro #
- environment:
- - CUDA_VISIBLE_DEVICES
- - PYTHONUNBUFFERED=1 # 确保实时输出
- - VLLM_LOGGING_LEVEL=INFO # 使用环境变量控制日志级别
- # 直接执行脚本,避免复杂的 shell 嵌套
- entrypoint: ["/bin/bash", "/vllm_start_shell/start-vllm-qwen3.5-122b.sh"]
- ipc: host
- deploy:
- resources:
- reservations:
- devices:
- - driver: nvidia
- device_ids: ["0","1"] # Modify for multiple GPUs: ["0", "1"]
- #count: all
- capabilities: [gpu]
- qwen3-8b:
- image: vllm/vllm-openai:latest
- container_name: qwen3-8b-vllm
- runtime: nvidia
- shm_size: '10gb'
- ports:
- - "25424:30000"
- volumes:
- # # 宿主机路径:容器内路径
- - /data/app_workspace/models:/model:ro
- - ~/.cache/huggingface:/root/.cache/huggingface
- - /data/app_workspace/deploy_models/vllm/logs:/var/log/vllm # 日志目录映射
- - /data/app_workspace/deploy_models/vllm/vllm_start_shell:/vllm_start_shell:ro #
- environment:
- - CUDA_VISIBLE_DEVICES
- - PYTHONUNBUFFERED=1 # 确保实时输出
- - VLLM_LOGGING_LEVEL=INFO # 使用环境变量控制日志级别
- # 直接执行脚本,避免复杂的 shell 嵌套
- entrypoint: ["/bin/bash", "/vllm_start_shell/start-vllm-qwen3-8b.sh"]
- ipc: host
- deploy:
- resources:
- reservations:
- devices:
- - driver: nvidia
- device_ids: ["2"] # Modify for multiple GPUs: ["0", "1"]
- #count: all
- capabilities: [gpu]
- healthcheck:
- test: ["CMD", "curl", "-f", "http://localhost:30000/v1/models", "-H", "Authorization: Bearer lq123456"]
- interval: 10s
- timeout: 5s
- retries: 30
- start_period: 60s
- qwen3-embedding-8b:
- image: vllm/vllm-openai:latest
- #image: vllm/vllm-openai:v0.15.0
- container_name: qwen3-embedding-8b-vllm
- runtime: nvidia
- shm_size: '5gb'
- ports:
- - "25425:30000"
- volumes:
- # # 宿主机路径:容器内路径
- - /data/app_workspace/models:/model:ro
- - ~/.cache/huggingface:/root/.cache/huggingface
- - /data/app_workspace/deploy_models/vllm/logs:/var/log/vllm # 日志目录映射
- - /data/app_workspace/deploy_models/vllm/vllm_start_shell:/vllm_start_shell:ro #
- environment:
- - CUDA_VISIBLE_DEVICES
- - PYTHONUNBUFFERED=1 # 确保实时输出
- - VLLM_LOGGING_LEVEL=INFO # 使用环境变量控制日志级别
- # 直接执行脚本,避免复杂的 shell 嵌套
- entrypoint: ["/bin/bash", "/vllm_start_shell/start-vllm-qwen3-embedding-8b.sh"]
- ipc: host
- deploy:
- resources:
- reservations:
- devices:
- - driver: nvidia
- device_ids: ["2"] # Modify for multiple GPUs: ["0", "1"]
- #count: all
- capabilities: [gpu]
- depends_on:
- qwen3-8b:
- condition: service_healthy # 等待 qwen3-8b 健康检查通过
- qwen3-reranker-8b:
- #image: vllm/vllm-openai:latest# v0.18 版本不支持rerank部署
- image: vllm/vllm-openai:v0.15.0
- container_name: qwen3-reranker-8b-vllm
- runtime: nvidia
- shm_size: '5gb'
- ports:
- - "25426:30000"
- volumes:
- # # 宿主机路径:容器内路径
- - /data/app_workspace/models:/model:ro
- - ~/.cache/huggingface:/root/.cache/huggingface
- - /data/app_workspace/deploy_models/vllm/logs:/var/log/vllm # 日志目录映射
- - /data/app_workspace/deploy_models/vllm/vllm_start_shell:/vllm_start_shell:ro #
- - /data/app_workspace/deploy_models/vllm/sglang-main:/vllm/sglang-main:ro
- environment:
- - CUDA_VISIBLE_DEVICES
- - PYTHONUNBUFFERED=1 # 确保实时输出
- - VLLM_LOGGING_LEVEL=INFO # 使用环境变量控制日志级别
- # 直接执行脚本,避免复杂的 shell 嵌套
- entrypoint: ["/bin/bash", "/vllm_start_shell/start-vllm-qwen3-reranker-8b-15.sh"]
- ipc: host
- deploy:
- resources:
- reservations:
- devices:
- - driver: nvidia
- device_ids: ["3"] # Modify for multiple GPUs: ["0", "1"]
- #count: all
- capabilities: [gpu]
- qwen3.5-35b:
- image: vllm/vllm-openai:latest
- container_name: qwen3.5-35b-vllm
- runtime: nvidia
- shm_size: '5gb'
- ports:
- - "25427:30000"
- volumes:
- # # 宿主机路径:容器内路径
- - /data/app_workspace/models:/model:ro
- - ~/.cache/huggingface:/root/.cache/huggingface
- - /data/app_workspace/deploy_models/vllm/logs:/var/log/vllm # 日志目录映射
- - /data/app_workspace/deploy_models/vllm/vllm_start_shell:/vllm_start_shell:ro #
- environment:
- - CUDA_VISIBLE_DEVICES
- - PYTHONUNBUFFERED=1 # 确保实时输出
- - VLLM_LOGGING_LEVEL=INFO # 使用环境变量控制日志级别
- # 直接执行脚本,避免复杂的 shell 嵌套
- entrypoint: ["/bin/bash", "/vllm_start_shell/start-vllm-qwen3.5-35b.sh"]
- ipc: host
- deploy:
- resources:
- reservations:
- devices:
- - driver: nvidia
- device_ids: ["4"] # Modify for multiple GPUs: ["0", "1"]
- #count: all
- capabilities: [gpu]
|