docker-compose.yaml.qwen3.6-27b-vllm 1.4 KB

12345678910111213141516171819202122232425262728293031323334353637383940414243444546474849
  1. version: '3.8'
  2. services:
  3. qwen3.6-27b:
  4. image: cr.metax-tech.com/public-ai-release/maca/vllm-metax:0.19.0-maca.ai3.5.3.502-torch2.8-py312-ubuntu22.04-amd64
  5. container_name: qwen3.6-27b-vllm
  6. stdin_open: true
  7. tty: true
  8. restart: unless-stopped
  9. network_mode: host
  10. devices:
  11. - "/dev/dri:/dev/dri"
  12. - "/dev/mxcd:/dev/mxcd"
  13. - "/dev/mem:/dev/mem"
  14. group_add:
  15. - "video"
  16. privileged: true
  17. security_opt:
  18. - "apparmor=unconfined"
  19. - "seccomp=unconfined"
  20. shm_size: '100gb'
  21. ulimits:
  22. memlock:
  23. soft: -1
  24. hard: -1
  25. ports:
  26. - "8004:30000"
  27. environment:
  28. - CUDA_VISIBLE_DEVICES=1
  29. - PYTHONUNBUFFERED=1 # 确保实时输出
  30. volumes:
  31. - "/usr/local/:/usr/local/"
  32. - "/pde_ai:/pde_ai"
  33. - "/opt/lq/models:/model:ro"
  34. - "~/.cache/huggingface:/root/.cache/huggingface"
  35. - "/opt/lq/deploy_models/logs/logs:/var/log/vllm" # 日志目录映射
  36. - "/opt/lq/deploy_models/bench_suite:/bench_suite" #脚本目录映射
  37. command: >
  38. sh -c "mkdir -p /var/log/vllm &&
  39. python3 -m vllm.entrypoints.openai.api_server
  40. --model-path /model/Qwen3.6-27B
  41. --served-model-name Qwen3.6-27B
  42. --host 0.0.0.0
  43. --port 30000
  44. --tensor-parallel-size 1
  45. --max-num-batched-tokens 4096
  46. --max-model-len 8192
  47. --api-key sk-123456
  48. --log-level info 2>&1 | tee /var/log/vllm/qwen3.6-27b-server.log"