docker-compose.yaml 6.5 KB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183184185186187188189190191192193
  1. services:
  2. qwen3.5-122b:
  3. image: lmsysorg/sglang:latest
  4. container_name: qwen3.5-122b-sglang
  5. runtime: nvidia
  6. shm_size: '200gb'
  7. ports:
  8. - "25423:30000"
  9. volumes:
  10. # # 宿主机路径:容器内路径
  11. - /data/app_workspace/models:/model:ro
  12. - ~/.cache/huggingface:/root/.cache/huggingface
  13. - /data/app_workspace/deploy_models/sglang/logs:/var/log/sglang # 日志目录映射
  14. - /data/app_workspace/deploy_models/sglang/bench_suite:/bench_suite #脚本目录映射
  15. environment:
  16. - CUDA_VISIBLE_DEVICES
  17. - PYTHONUNBUFFERED=1 # 确保实时输出
  18. command: >
  19. sh -c "mkdir -p /var/log/sglang &&
  20. python3 -m sglang.launch_server
  21. --model-path /model/Qwen3.5-122B-A10B
  22. --tp 4
  23. --host 0.0.0.0
  24. --port 30000
  25. --api-key sk-prod_ojkjwcO4TTd9TL3vK6uo8a2Dvcdoz64u_9a89845f
  26. --mem-fraction-static 0.95
  27. --log-level info 2>&1 | tee /var/log/sglang/qwen3_5-122b-server.log"
  28. ipc: host
  29. deploy:
  30. resources:
  31. reservations:
  32. devices:
  33. - driver: nvidia
  34. device_ids: ["0","1","2","3"] # Modify for multiple GPUs: ["0", "1"]
  35. #count: all
  36. capabilities: [gpu]
  37. qwen3-embedding-8b:
  38. image: lmsysorg/sglang:latest
  39. container_name: qwen3-embedding-8b-sglang
  40. runtime: nvidia
  41. shm_size: '100gb'
  42. ports:
  43. - "25425:30000"
  44. volumes:
  45. # # 宿主机路径:容器内路径
  46. - /data/app_workspace/models:/model:ro
  47. - ~/.cache/huggingface:/root/.cache/huggingface
  48. - /data/app_workspace/deploy_models/sglang/logs:/var/log/sglang # 日志目录映射
  49. - /data/app_workspace/deploy_models/sglang/bench_suite:/bench_suite #脚本目录映射
  50. environment:
  51. - CUDA_VISIBLE_DEVICES
  52. - PYTHONUNBUFFERED=1 # 确保实时输出
  53. command: >
  54. sh -c "mkdir -p /var/log/sglang &&
  55. python3 -m sglang.launch_server
  56. --model-path /model/Qwen3-Embedding-8B
  57. --is-embedding
  58. --tp 1
  59. --host 0.0.0.0
  60. --port 30000
  61. --api-key sk_prod_3HDoVka8mU8Jqj9Xnmfkn8bxk5kmzKrz_700c186f
  62. --mem-fraction-static 0.45
  63. --log-level info 2>&1 | tee /var/log/sglang/qwen3-embedding-8b-server.log"
  64. ipc: host
  65. deploy:
  66. resources:
  67. reservations:
  68. devices:
  69. - driver: nvidia
  70. device_ids: ["5"] # Modify for multiple GPUs: ["0", "1"]
  71. #count: all
  72. capabilities: [gpu]
  73. healthcheck:
  74. test: ["CMD", "curl", "-f", "http://localhost:30000/v1/embeddings", "-H", "Authorization: Bearer sk_prod_SELVoIV1d3gku28koH_ONg8L_B2cQis__71f55615", "-H", "Content-Type: application/json", "-d", "{\"input\": \"health\"}"]
  75. interval: 10s
  76. timeout: 5s
  77. retries: 30
  78. start_period: 60s
  79. qwen3-reranker-8b:
  80. image: lmsysorg/sglang:latest
  81. container_name: qwen3-reranker-8b-sglang
  82. runtime: nvidia
  83. shm_size: '100gb'
  84. ports:
  85. - "25426:30000"
  86. volumes:
  87. # # 宿主机路径:容器内路径
  88. - /data/app_workspace/models:/model:ro
  89. - ~/.cache/huggingface:/root/.cache/huggingface
  90. - /data/app_workspace/deploy_models/sglang/logs:/var/log/sglang # 日志目录映射
  91. - /data/app_workspace/deploy_models/sglang/sglang-main:/sglang/sglang-main:ro
  92. - /data/app_workspace/deploy_models/sglang/bench_suite:/bench_suite #脚本目录映射
  93. environment:
  94. - CUDA_VISIBLE_DEVICES
  95. - PYTHONUNBUFFERED=1 # 确保实时输出
  96. command: >
  97. sh -c "mkdir -p /var/log/sglang &&
  98. python3 -m sglang.launch_server
  99. --model-path /model/Qwen3-Reranker-8B
  100. --tp 1
  101. --host 0.0.0.0
  102. --port 30000
  103. --api-key sk_prod_dvgYHKWFoQlYAKmkIvBSyuguNSQGeNh0_23c65608
  104. --mem-fraction-static 0.50
  105. --disable-radix-cache
  106. --chat-template /sglang/sglang-main/examples/chat_template/qwen3_reranker.jinja
  107. --log-level info 2>&1 | tee /var/log/sglang/qwen3-reranker-8b-server.log"
  108. ipc: host
  109. deploy:
  110. resources:
  111. reservations:
  112. devices:
  113. - driver: nvidia
  114. device_ids: ["5"] # Modify for multiple GPUs: ["0", "1"]
  115. #count: all
  116. capabilities: [gpu]
  117. depends_on:
  118. qwen3-embedding-8b:
  119. condition: service_healthy # 等待 qwen3-embedding-8b 健康检查通过
  120. qwen3.5-35b:
  121. image: lmsysorg/sglang:latest
  122. container_name: qwen3.5-35b-sglang
  123. runtime: nvidia
  124. shm_size: '100gb'
  125. ports:
  126. - "25427:30000"
  127. volumes:
  128. # # 宿主机路径:容器内路径
  129. - /data/app_workspace/models:/model:ro
  130. - ~/.cache/huggingface:/root/.cache/huggingface
  131. - /data/app_workspace/deploy_models/sglang/logs:/var/log/sglang # 日志目录映射
  132. - /data/app_workspace/deploy_models/sglang/bench_suite:/bench_suite #脚本目录映射
  133. environment:
  134. - CUDA_VISIBLE_DEVICES
  135. - PYTHONUNBUFFERED=1 # 确保实时输出
  136. command: >
  137. sh -c "mkdir -p /var/log/sglang &&
  138. python3 -m sglang.launch_server
  139. --model-path /model/Qwen3.5-35B-A3B
  140. --tp 1
  141. --host 0.0.0.0
  142. --port 30000
  143. --api-key sk_prod_0NuLZt1a2UrD80F9iB-GTxOIuAkJSZxH_5522d7ae
  144. --log-level info 2>&1 | tee /var/log/sglang/qwen3-35b-server.log"
  145. ipc: host
  146. deploy:
  147. resources:
  148. reservations:
  149. devices:
  150. - driver: nvidia
  151. device_ids: ["7"] # Modify for multiple GPUs: ["0", "1"]
  152. #count: all
  153. capabilities: [gpu]
  154. qwen3.6-27b:
  155. image: lmsysorg/sglang:latest
  156. container_name: qwen3.6-27b-sglang
  157. runtime: nvidia
  158. shm_size: '100gb'
  159. ports:
  160. - "25424:30000"
  161. volumes:
  162. # # 宿主机路径:容器内路径
  163. - /data/app_workspace/models:/model:ro
  164. - ~/.cache/huggingface:/root/.cache/huggingface
  165. - /data/app_workspace/deploy_models/sglang/logs:/var/log/sglang # 日志目录映射
  166. - /data/app_workspace/deploy_models/sglang/bench_suite:/bench_suite #脚本目录映射
  167. environment:
  168. - CUDA_VISIBLE_DEVICES
  169. - PYTHONUNBUFFERED=1 # 确保实时输出
  170. command: >
  171. sh -c "mkdir -p /var/log/sglang &&
  172. python3 -m sglang.launch_server
  173. --model-path /model/Qwen3.6-27B
  174. --tp 1
  175. --host 0.0.0.0
  176. --port 30000
  177. --api-key sk_prod_HH21x5WB9Pm7IM9Bf808BoJPEn_4bPX5_f2c5f3f6
  178. --log-level info 2>&1 | tee /var/log/sglang/qwen3.6-27b-server.log"
  179. ipc: host
  180. deploy:
  181. resources:
  182. reservations:
  183. devices:
  184. - driver: nvidia
  185. device_ids: ["4"] # Modify for multiple GPUs: ["0", "1"]
  186. #count: all
  187. capabilities: [gpu]