docker-compose.yaml.nvidia 1.0 KB

1234567891011121314151617181920212223242526272829
  1. services:
  2. qwen3.6-27b:
  3. image: cr.metax-tech.com/public-ai-release/maca/vllm-metax:0.19.0-maca.ai3.5.3.502-torch2.8-py312-ubuntu22.04-amd64
  4. container_name: qwen3.6-27b-vllm
  5. shm_size: '10gb'
  6. ports:
  7. - "8004:30000"
  8. volumes:
  9. # # 宿主机路径:容器内路径
  10. - /opt/lq/models:/model:ro
  11. - ~/.cache/huggingface:/root/.cache/huggingface
  12. - /opt/lq/deploy_models/logs/logs:/var/log/vllm # 日志目录映射
  13. - /opt/lq/deploy_models/bench_suite:/bench_suite #脚本目录映射
  14. environment:
  15. - CUDA_VISIBLE_DEVICES=1
  16. - PYTHONUNBUFFERED=1 # 确保实时输出
  17. command: >
  18. sh -c "mkdir -p /var/log/vllm &&
  19. python3 -m vllm.entrypoints.openai.api_server
  20. --model-path /model/Qwen3.6-27B
  21. --served-model-name Qwen3.6-27B
  22. --host 0.0.0.0
  23. --port 30000
  24. --tensor-parallel-size 1
  25. --max-num-batched-tokens 4096
  26. --max-model-len 8192
  27. --api-key sk-12345
  28. --log-level info 2>&1 | tee /var/log/vllm/qwen3.6-27b-server.log"
  29. ipc: host