version: '3.8' services: qwen3.6-27b: image: cr.metax-tech.com/public-ai-release/maca/vllm-metax:0.19.0-maca.ai3.5.3.502-torch2.8-py312-ubuntu22.04-amd64 container_name: qwen3.6-27b-vllm stdin_open: true tty: true restart: unless-stopped network_mode: host devices: - "/dev/dri:/dev/dri" - "/dev/mxcd:/dev/mxcd" - "/dev/mem:/dev/mem" group_add: - "video" privileged: true security_opt: - "apparmor=unconfined" - "seccomp=unconfined" shm_size: '100gb' ulimits: memlock: soft: -1 hard: -1 ports: - "8004:30000" environment: - CUDA_VISIBLE_DEVICES=1 - PYTHONUNBUFFERED=1 # 确保实时输出 volumes: - "/usr/local/:/usr/local/" - "/pde_ai:/pde_ai" - "/opt/lq/models:/model:ro" - "~/.cache/huggingface:/root/.cache/huggingface" - "/opt/lq/deploy_models/logs/logs:/var/log/vllm" # 日志目录映射 - "/opt/lq/deploy_models/bench_suite:/bench_suite" #脚本目录映射 command: > sh -c "mkdir -p /var/log/vllm && python3 -m vllm.entrypoints.openai.api_server --model-path /model/Qwen3.6-27B --served-model-name Qwen3.6-27B --host 0.0.0.0 --port 30000 --tensor-parallel-size 1 --max-num-batched-tokens 4096 --max-model-len 8192 --api-key sk-123456 --log-level info 2>&1 | tee /var/log/vllm/qwen3.6-27b-server.log"