services: qwen3.6-27b: image: cr.metax-tech.com/public-ai-release/maca/vllm-metax:0.19.0-maca.ai3.5.3.502-torch2.8-py312-ubuntu22.04-amd64 container_name: qwen3.6-27b-vllm shm_size: '10gb' ports: - "8004:30000" volumes: # # 宿主机路径:容器内路径 - /opt/lq/models:/model:ro - ~/.cache/huggingface:/root/.cache/huggingface - /opt/lq/deploy_models/logs/logs:/var/log/vllm # 日志目录映射 - /opt/lq/deploy_models/bench_suite:/bench_suite #脚本目录映射 environment: - CUDA_VISIBLE_DEVICES=1 - PYTHONUNBUFFERED=1 # 确保实时输出 command: > sh -c "mkdir -p /var/log/vllm && python3 -m vllm.entrypoints.openai.api_server --model-path /model/Qwen3.6-27B --served-model-name Qwen3.6-27B --host 0.0.0.0 --port 30000 --tensor-parallel-size 1 --max-num-batched-tokens 4096 --max-model-len 8192 --api-key sk-12345 --log-level info 2>&1 | tee /var/log/vllm/qwen3.6-27b-server.log" ipc: host