| 123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869 |
- services:
- glm-ocr-vllm-docker:
- image: vllm/vllm-openai:nightly
- container_name: glm-ocr-vllm
- runtime: nvidia
- restart: unless-stopped
- environment:
- - PYTHONUNBUFFERED=1
- - PIP_INDEX_URL=https://pypi.tuna.tsinghua.edu.cn/simple
- - PIP_TRUSTED_HOST=pypi.tuna.tsinghua.edu.cn
- - PYTHONPATH=/opt/pip-packages:/usr/local/lib/python3.12/dist-packages
- ports:
- - "25429:30000"
- volumes:
- - /data/app_workspace/models/GLM-OCR:/models/GLM-OCR:ro
- - /data/app_workspace/glm-ocr/logs-docker:/var/log/vllm
- - /data/app_workspace/glm-ocr/pip-packages:/opt/pip-packages:rw
- - /data/app_workspace/glm-ocr/pip-cache:/root/.cache/pip:rw
- entrypoint: ["/bin/bash", "-c"]
- command:
- - |
- # 关键修改:优先检查缓存,只在需要时安装
- echo "=== 检查 transformers 版本 ==="
- if python3 -c "import transformers; assert transformers.__version__ >= '5.3.0', f'需要 transformers>=5.3.0,当前版本 {transformers.__version__}'" 2>/dev/null; then
- echo "✅ 使用已安装的 transformers $(python3 -c 'import transformers; print(transformers.__version__)')"
- else
- echo "⚠️ 安装或更新 transformers>=5.3.0..."
- pip3 install "transformers>=5.3.0" \
- --target /opt/pip-packages \
- --root-user-action=ignore \
- -q 2>&1 | tail -5
- fi
-
- echo "=== 启动 vLLM ==="
- python3 -m vllm.entrypoints.openai.api_server \
- --model /models/GLM-OCR \
- --served-model-name GLM-OCR \
- --host 0.0.0.0 \
- --port 30000 \
- --api-key sk_prod_sXgHYxfVvZdw7O-cki6i7Cp2TbguOvbA_f4beb12a \
- --gpu-memory-utilization 0.60 \
- --max-model-len 4096 \
- --max-num-seqs 96 \
- --enable-prefix-caching \
- --trust-remote-code \
- --allowed-local-media-path / \
- --dtype bfloat16
- ulimits:
- memlock: 67108864
- stack: 67108864
- ipc: host
- deploy:
- resources:
- reservations:
- devices:
- - driver: nvidia
- device_ids: ["6"]
- capabilities: [gpu]
- healthcheck:
- test: ["CMD-SHELL", "curl -f http://localhost:30000/health -H 'Authorization: Bearer sk_prod_sXgHYxfVvZdw7O-cki6i7Cp2TbguOvbA_f4beb12a' || exit 1"]
- interval: 30s
- timeout: 10s
- retries: 5
- start_period: 180s
- logging:
- driver: "json-file"
- options:
- max-size: "500m"
- max-file: "3"
|