docker-compose.yaml 5.6 KB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159
  1. services:
  2. qwen3.5-122b:
  3. image: vllm/vllm-openai:latest
  4. container_name: qwen3.5-122b-vllm
  5. runtime: nvidia
  6. shm_size: '10gb'
  7. ports:
  8. - "25423:30000"
  9. volumes:
  10. # # 宿主机路径:容器内路径
  11. - /data/app_workspace/models:/model:ro
  12. - ~/.cache/huggingface:/root/.cache/huggingface
  13. - /data/app_workspace/deploy_models/vllm/logs:/var/log/vllm # 日志目录映射
  14. - /data/app_workspace/deploy_models/vllm/vllm_start_shell:/vllm_start_shell:ro #
  15. environment:
  16. - CUDA_VISIBLE_DEVICES
  17. - PYTHONUNBUFFERED=1 # 确保实时输出
  18. - VLLM_LOGGING_LEVEL=INFO # 使用环境变量控制日志级别
  19. # 直接执行脚本,避免复杂的 shell 嵌套
  20. entrypoint: ["/bin/bash", "/vllm_start_shell/start-vllm-qwen3.5-122b.sh"]
  21. ipc: host
  22. deploy:
  23. resources:
  24. reservations:
  25. devices:
  26. - driver: nvidia
  27. device_ids: ["0","1"] # Modify for multiple GPUs: ["0", "1"]
  28. #count: all
  29. capabilities: [gpu]
  30. qwen3-8b:
  31. image: vllm/vllm-openai:latest
  32. container_name: qwen3-8b-vllm
  33. runtime: nvidia
  34. shm_size: '10gb'
  35. ports:
  36. - "25424:30000"
  37. volumes:
  38. # # 宿主机路径:容器内路径
  39. - /data/app_workspace/models:/model:ro
  40. - ~/.cache/huggingface:/root/.cache/huggingface
  41. - /data/app_workspace/deploy_models/vllm/logs:/var/log/vllm # 日志目录映射
  42. - /data/app_workspace/deploy_models/vllm/vllm_start_shell:/vllm_start_shell:ro #
  43. environment:
  44. - CUDA_VISIBLE_DEVICES
  45. - PYTHONUNBUFFERED=1 # 确保实时输出
  46. - VLLM_LOGGING_LEVEL=INFO # 使用环境变量控制日志级别
  47. # 直接执行脚本,避免复杂的 shell 嵌套
  48. entrypoint: ["/bin/bash", "/vllm_start_shell/start-vllm-qwen3-8b.sh"]
  49. ipc: host
  50. deploy:
  51. resources:
  52. reservations:
  53. devices:
  54. - driver: nvidia
  55. device_ids: ["2"] # Modify for multiple GPUs: ["0", "1"]
  56. #count: all
  57. capabilities: [gpu]
  58. healthcheck:
  59. test: ["CMD", "curl", "-f", "http://localhost:30000/v1/models", "-H", "Authorization: Bearer lq123456"]
  60. interval: 10s
  61. timeout: 5s
  62. retries: 30
  63. start_period: 60s
  64. qwen3-embedding-8b:
  65. image: vllm/vllm-openai:latest
  66. #image: vllm/vllm-openai:v0.15.0
  67. container_name: qwen3-embedding-8b-vllm
  68. runtime: nvidia
  69. shm_size: '5gb'
  70. ports:
  71. - "25425:30000"
  72. volumes:
  73. # # 宿主机路径:容器内路径
  74. - /data/app_workspace/models:/model:ro
  75. - ~/.cache/huggingface:/root/.cache/huggingface
  76. - /data/app_workspace/deploy_models/vllm/logs:/var/log/vllm # 日志目录映射
  77. - /data/app_workspace/deploy_models/vllm/vllm_start_shell:/vllm_start_shell:ro #
  78. environment:
  79. - CUDA_VISIBLE_DEVICES
  80. - PYTHONUNBUFFERED=1 # 确保实时输出
  81. - VLLM_LOGGING_LEVEL=INFO # 使用环境变量控制日志级别
  82. # 直接执行脚本,避免复杂的 shell 嵌套
  83. entrypoint: ["/bin/bash", "/vllm_start_shell/start-vllm-qwen3-embedding-8b.sh"]
  84. ipc: host
  85. deploy:
  86. resources:
  87. reservations:
  88. devices:
  89. - driver: nvidia
  90. device_ids: ["2"] # Modify for multiple GPUs: ["0", "1"]
  91. #count: all
  92. capabilities: [gpu]
  93. depends_on:
  94. qwen3-8b:
  95. condition: service_healthy # 等待 qwen3-8b 健康检查通过
  96. qwen3-reranker-8b:
  97. #image: vllm/vllm-openai:latest# v0.18 版本不支持rerank部署
  98. image: vllm/vllm-openai:v0.15.0
  99. container_name: qwen3-reranker-8b-vllm
  100. runtime: nvidia
  101. shm_size: '5gb'
  102. ports:
  103. - "25426:30000"
  104. volumes:
  105. # # 宿主机路径:容器内路径
  106. - /data/app_workspace/models:/model:ro
  107. - ~/.cache/huggingface:/root/.cache/huggingface
  108. - /data/app_workspace/deploy_models/vllm/logs:/var/log/vllm # 日志目录映射
  109. - /data/app_workspace/deploy_models/vllm/vllm_start_shell:/vllm_start_shell:ro #
  110. - /data/app_workspace/deploy_models/vllm/sglang-main:/vllm/sglang-main:ro
  111. environment:
  112. - CUDA_VISIBLE_DEVICES
  113. - PYTHONUNBUFFERED=1 # 确保实时输出
  114. - VLLM_LOGGING_LEVEL=INFO # 使用环境变量控制日志级别
  115. # 直接执行脚本,避免复杂的 shell 嵌套
  116. entrypoint: ["/bin/bash", "/vllm_start_shell/start-vllm-qwen3-reranker-8b-15.sh"]
  117. ipc: host
  118. deploy:
  119. resources:
  120. reservations:
  121. devices:
  122. - driver: nvidia
  123. device_ids: ["3"] # Modify for multiple GPUs: ["0", "1"]
  124. #count: all
  125. capabilities: [gpu]
  126. qwen3.5-35b:
  127. image: vllm/vllm-openai:latest
  128. container_name: qwen3.5-35b-vllm
  129. runtime: nvidia
  130. shm_size: '5gb'
  131. ports:
  132. - "25427:30000"
  133. volumes:
  134. # # 宿主机路径:容器内路径
  135. - /data/app_workspace/models:/model:ro
  136. - ~/.cache/huggingface:/root/.cache/huggingface
  137. - /data/app_workspace/deploy_models/vllm/logs:/var/log/vllm # 日志目录映射
  138. - /data/app_workspace/deploy_models/vllm/vllm_start_shell:/vllm_start_shell:ro #
  139. environment:
  140. - CUDA_VISIBLE_DEVICES
  141. - PYTHONUNBUFFERED=1 # 确保实时输出
  142. - VLLM_LOGGING_LEVEL=INFO # 使用环境变量控制日志级别
  143. # 直接执行脚本,避免复杂的 shell 嵌套
  144. entrypoint: ["/bin/bash", "/vllm_start_shell/start-vllm-qwen3.5-35b.sh"]
  145. ipc: host
  146. deploy:
  147. resources:
  148. reservations:
  149. devices:
  150. - driver: nvidia
  151. device_ids: ["4"] # Modify for multiple GPUs: ["0", "1"]
  152. #count: all
  153. capabilities: [gpu]