docker-compose_qwen3-8b.yaml.bak 1.0 KB

123456789101112131415161718192021222324252627282930313233343536
  1. version: '3.8'
  2. services:
  3. qwen3-8b:
  4. image: lmsysorg/sglang:lates
  5. runtime: nvidia
  6. shm_size: '10gb'
  7. ports:
  8. - "25424:30001"
  9. volumes:
  10. # # 宿主机路径:容器内路径
  11. - /data/app_workspace/models:/model:ro
  12. - ~/.cache/huggingface:/root/.cache/huggingface
  13. - /data/app_workspace/deploy_models/sglang/logs:/var/log/sglang # 日志目录映射
  14. environment:
  15. - CUDA_VISIBLE_DEVICES
  16. - PYTHONUNBUFFERED=1 # 确保实时输出
  17. command: >
  18. sh -c "mkdir -p /var/log/sglang &&
  19. python3 -m sglang.launch_server
  20. --model-path /model/Qwen3-8B
  21. --tp 1
  22. --host 0.0.0.0
  23. --port 30001
  24. --api-key lq123456
  25. --mem-fraction-static 0.33
  26. --log-level info 2>&1 | tee /var/log/sglang/qwen3-8b-server.log"
  27. ipc: host
  28. deploy:
  29. resources:
  30. reservations:
  31. devices:
  32. - driver: nvidia
  33. device_ids: ["2"] # Modify for multiple GPUs: ["0", "1"]
  34. #count: all
  35. capabilities: [gpu]