version: '3.8' services: qwen3-8b: image: lmsysorg/sglang:lates runtime: nvidia shm_size: '10gb' ports: - "25424:30001" volumes: # # 宿主机路径:容器内路径 - /data/app_workspace/models:/model:ro - ~/.cache/huggingface:/root/.cache/huggingface - /data/app_workspace/deploy_models/sglang/logs:/var/log/sglang # 日志目录映射 environment: - CUDA_VISIBLE_DEVICES - PYTHONUNBUFFERED=1 # 确保实时输出 command: > sh -c "mkdir -p /var/log/sglang && python3 -m sglang.launch_server --model-path /model/Qwen3-8B --tp 1 --host 0.0.0.0 --port 30001 --api-key lq123456 --mem-fraction-static 0.33 --log-level info 2>&1 | tee /var/log/sglang/qwen3-8b-server.log" ipc: host deploy: resources: reservations: devices: - driver: nvidia device_ids: ["2"] # Modify for multiple GPUs: ["0", "1"] #count: all capabilities: [gpu]