docker-compose.yml 2.5 KB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869
  1. services:
  2. glm-ocr-vllm-docker:
  3. image: vllm/vllm-openai:nightly
  4. container_name: glm-ocr-vllm
  5. runtime: nvidia
  6. restart: unless-stopped
  7. environment:
  8. - PYTHONUNBUFFERED=1
  9. - PIP_INDEX_URL=https://pypi.tuna.tsinghua.edu.cn/simple
  10. - PIP_TRUSTED_HOST=pypi.tuna.tsinghua.edu.cn
  11. - PYTHONPATH=/opt/pip-packages:/usr/local/lib/python3.12/dist-packages
  12. ports:
  13. - "25429:30000"
  14. volumes:
  15. - /data/app_workspace/models/GLM-OCR:/models/GLM-OCR:ro
  16. - /data/app_workspace/glm-ocr/logs-docker:/var/log/vllm
  17. - /data/app_workspace/glm-ocr/pip-packages:/opt/pip-packages:rw
  18. - /data/app_workspace/glm-ocr/pip-cache:/root/.cache/pip:rw
  19. entrypoint: ["/bin/bash", "-c"]
  20. command:
  21. - |
  22. # 关键修改:优先检查缓存,只在需要时安装
  23. echo "=== 检查 transformers 版本 ==="
  24. if python3 -c "import transformers; assert transformers.__version__ >= '5.3.0', f'需要 transformers>=5.3.0,当前版本 {transformers.__version__}'" 2>/dev/null; then
  25. echo "✅ 使用已安装的 transformers $(python3 -c 'import transformers; print(transformers.__version__)')"
  26. else
  27. echo "⚠️ 安装或更新 transformers>=5.3.0..."
  28. pip3 install "transformers>=5.3.0" \
  29. --target /opt/pip-packages \
  30. --root-user-action=ignore \
  31. -q 2>&1 | tail -5
  32. fi
  33. echo "=== 启动 vLLM ==="
  34. python3 -m vllm.entrypoints.openai.api_server \
  35. --model /models/GLM-OCR \
  36. --served-model-name GLM-OCR \
  37. --host 0.0.0.0 \
  38. --port 30000 \
  39. --api-key sk_prod_sXgHYxfVvZdw7O-cki6i7Cp2TbguOvbA_f4beb12a \
  40. --gpu-memory-utilization 0.60 \
  41. --max-model-len 4096 \
  42. --max-num-seqs 96 \
  43. --enable-prefix-caching \
  44. --trust-remote-code \
  45. --allowed-local-media-path / \
  46. --dtype bfloat16
  47. ulimits:
  48. memlock: 67108864
  49. stack: 67108864
  50. ipc: host
  51. deploy:
  52. resources:
  53. reservations:
  54. devices:
  55. - driver: nvidia
  56. device_ids: ["6"]
  57. capabilities: [gpu]
  58. healthcheck:
  59. test: ["CMD-SHELL", "curl -f http://localhost:30000/health -H 'Authorization: Bearer sk_prod_sXgHYxfVvZdw7O-cki6i7Cp2TbguOvbA_f4beb12a' || exit 1"]
  60. interval: 30s
  61. timeout: 10s
  62. retries: 5
  63. start_period: 180s
  64. logging:
  65. driver: "json-file"
  66. options:
  67. max-size: "500m"
  68. max-file: "3"