services: glm-ocr-vllm-docker: image: vllm/vllm-openai:nightly container_name: glm-ocr-vllm runtime: nvidia restart: unless-stopped environment: - PYTHONUNBUFFERED=1 - PIP_INDEX_URL=https://pypi.tuna.tsinghua.edu.cn/simple - PIP_TRUSTED_HOST=pypi.tuna.tsinghua.edu.cn - PYTHONPATH=/opt/pip-packages:/usr/local/lib/python3.12/dist-packages ports: - "25429:30000" volumes: - /data/app_workspace/models/GLM-OCR:/models/GLM-OCR:ro - /data/app_workspace/glm-ocr/logs-docker:/var/log/vllm - /data/app_workspace/glm-ocr/pip-packages:/opt/pip-packages:rw - /data/app_workspace/glm-ocr/pip-cache:/root/.cache/pip:rw entrypoint: ["/bin/bash", "-c"] command: - | # 关键修改:优先检查缓存,只在需要时安装 echo "=== 检查 transformers 版本 ===" if python3 -c "import transformers; assert transformers.__version__ >= '5.3.0', f'需要 transformers>=5.3.0,当前版本 {transformers.__version__}'" 2>/dev/null; then echo "✅ 使用已安装的 transformers $(python3 -c 'import transformers; print(transformers.__version__)')" else echo "⚠️ 安装或更新 transformers>=5.3.0..." pip3 install "transformers>=5.3.0" \ --target /opt/pip-packages \ --root-user-action=ignore \ -q 2>&1 | tail -5 fi echo "=== 启动 vLLM ===" python3 -m vllm.entrypoints.openai.api_server \ --model /models/GLM-OCR \ --served-model-name GLM-OCR \ --host 0.0.0.0 \ --port 30000 \ --api-key sk_prod_sXgHYxfVvZdw7O-cki6i7Cp2TbguOvbA_f4beb12a \ --gpu-memory-utilization 0.60 \ --max-model-len 4096 \ --max-num-seqs 96 \ --enable-prefix-caching \ --trust-remote-code \ --allowed-local-media-path / \ --dtype bfloat16 ulimits: memlock: 67108864 stack: 67108864 ipc: host deploy: resources: reservations: devices: - driver: nvidia device_ids: ["6"] capabilities: [gpu] healthcheck: test: ["CMD-SHELL", "curl -f http://localhost:30000/health -H 'Authorization: Bearer sk_prod_sXgHYxfVvZdw7O-cki6i7Cp2TbguOvbA_f4beb12a' || exit 1"] interval: 30s timeout: 10s retries: 5 start_period: 180s logging: driver: "json-file" options: max-size: "500m" max-file: "3"