| 123456789101112131415161718192021222324252627282930313233343536 |
- version: '3.8'
- services:
- qwen3-8b:
- image: lmsysorg/sglang:lates
- runtime: nvidia
- shm_size: '10gb'
- ports:
- - "25424:30001"
- volumes:
- # # 宿主机路径:容器内路径
- - /data/app_workspace/models:/model:ro
- - ~/.cache/huggingface:/root/.cache/huggingface
- - /data/app_workspace/deploy_models/sglang/logs:/var/log/sglang # 日志目录映射
- environment:
- - CUDA_VISIBLE_DEVICES
- - PYTHONUNBUFFERED=1 # 确保实时输出
- command: >
- sh -c "mkdir -p /var/log/sglang &&
- python3 -m sglang.launch_server
- --model-path /model/Qwen3-8B
- --tp 1
- --host 0.0.0.0
- --port 30001
- --api-key lq123456
- --mem-fraction-static 0.33
- --log-level info 2>&1 | tee /var/log/sglang/qwen3-8b-server.log"
- ipc: host
- deploy:
- resources:
- reservations:
- devices:
- - driver: nvidia
- device_ids: ["2"] # Modify for multiple GPUs: ["0", "1"]
- #count: all
- capabilities: [gpu]
|