test_models.sh 9.2 KB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183184185186187188189190191192193194195196197198199200201202203204205206207208209210211212213214215216217218219220221222223224225226227228229230231232233234235236237238239240241242243244245246247248249250251252253254255256257258259260261262263264265266267268269270271272273274275276277278279280281282283284285286287288289290291292293294295296297298299
  1. #!/bin/bash
  2. # SGLang 多模型 curl 测试脚本
  3. # 移除 set -e,避免遇到错误就终止
  4. # 颜色定义
  5. GREEN='\033[0;32m'
  6. RED='\033[0;31m'
  7. YELLOW='\033[1;33m'
  8. NC='\033[0m'
  9. # 模型配置(按顺序定义) "qwen3-8b"
  10. MODEL_NAMES=("qwen3.6-27b" "qwen3.5-35b" "qwen3.5-122b" "qwen3-embedding-8b" "qwen3-reranker-8b")
  11. declare -A MODEL_PORTS=(
  12. ["qwen3-8b"]="25428"
  13. ["qwen3.6-27b"]="25424"
  14. ["qwen3.5-35b"]="25427"
  15. ["qwen3.5-122b"]="25423"
  16. ["qwen3-embedding-8b"]="25425"
  17. ["qwen3-reranker-8b"]="25426"
  18. )
  19. declare -A MODEL_PATHS=(
  20. ["qwen3-8b"]="Qwen/Qwen3-8B"
  21. ["qwen3.6-27b"]="Qwen/Qwen3.6-27B"
  22. ["qwen3.5-35b"]="Qwen/Qwen3.5-35B-A3B"
  23. ["qwen3.5-122b"]="Qwen/Qwen3.5-122B-A10B"
  24. ["qwen3-embedding-8b"]="Qwen/Qwen3-Embedding-8B"
  25. ["qwen3-reranker-8b"]="Qwen/Qwen3-Reranker-8B"
  26. )
  27. declare -A MODEL_TYPES=(
  28. ["qwen3-8b"]="chat"
  29. ["qwen3.6-27b"]="chat"
  30. ["qwen3.5-35b"]="chat"
  31. ["qwen3.5-122b"]="chat"
  32. ["qwen3-embedding-8b"]="embedding"
  33. ["qwen3-reranker-8b"]="rerank"
  34. )
  35. declare -A API_KEYS=(
  36. ["qwen3-8b"]="sk_prod_SELVoIV1d3gku28koH_ONg8L_B2cQis__71f55615"
  37. ["qwen3.6-27b"]="sk_prod_HH21x5WB9Pm7IM9Bf808BoJPEn_4bPX5_f2c5f3f6"
  38. ["qwen3.5-35b"]="sk_prod_0NuLZt1a2UrD80F9iB-GTxOIuAkJSZxH_5522d7ae"
  39. ["qwen3.5-122b"]="sk-prod_ojkjwcO4TTd9TL3vK6uo8a2Dvcdoz64u_9a89845f"
  40. ["qwen3-embedding-8b"]="sk_prod_3HDoVka8mU8Jqj9Xnmfkn8bxk5kmzKrz_700c186f"
  41. ["qwen3-reranker-8b"]="sk_prod_dvgYHKWFoQlYAKmkIvBSyuguNSQGeNh0_23c65608"
  42. )
  43. TIMEOUT=30
  44. echo "========================================"
  45. echo "SGLang 多模型健康检查 (curl)"
  46. echo "时间: $(date '+%Y-%m-%d %H:%M:%S')"
  47. echo "========================================"
  48. TOTAL=0
  49. SUCCESS=0
  50. # 测试对话模型
  51. test_chat_model() {
  52. local name=$1
  53. local port=$2
  54. local model_path=$3
  55. local api_key=${API_KEYS[$name]}
  56. echo ""
  57. echo "----------------------------------------"
  58. echo "测试模型: $name (对话模型)"
  59. echo "端口: $port"
  60. echo "----------------------------------------"
  61. local response
  62. local body
  63. local code
  64. response=$(curl -s -w "\n%{http_code}" \
  65. --max-time $TIMEOUT \
  66. -H "Content-Type: application/json" \
  67. -H "Authorization: Bearer $api_key" \
  68. -d "{
  69. \"model\": \"$model_path\",
  70. \"messages\": [{\"role\": \"user\", \"content\": \"你好,请用一句话介绍自己\"}],
  71. \"temperature\": 0.7,
  72. \"max_tokens\": 50
  73. }" \
  74. "http://localhost:$port/v1/chat/completions" 2>/dev/null || echo -e "\n000")
  75. body=$(echo "$response" | head -n -1)
  76. code=$(echo "$response" | tail -n 1)
  77. if [ "$code" = "200" ]; then
  78. local content=$(echo "$body" | grep -o '"content":"[^"]*"' | head -1 | cut -d'"' -f4)
  79. echo -e "${GREEN}✅ 成功${NC} HTTP $code"
  80. echo "回复: ${content:0:100}..."
  81. ((SUCCESS++))
  82. else
  83. echo -e "${RED}❌ 失败${NC} HTTP $code"
  84. echo "响应: ${body:0:200}"
  85. fi
  86. ((TOTAL++))
  87. }
  88. # 测试嵌入模型
  89. test_embedding_model() {
  90. local name=$1
  91. local port=$2
  92. local model_path=$3
  93. local api_key=${API_KEYS[$name]}
  94. echo ""
  95. echo "----------------------------------------"
  96. echo "测试模型: $name (嵌入模型)"
  97. echo "端口: $port"
  98. echo "----------------------------------------"
  99. local response
  100. local body
  101. local code
  102. response=$(curl -s -w "\n%{http_code}" \
  103. --max-time $TIMEOUT \
  104. -H "Content-Type: application/json" \
  105. -H "Authorization: Bearer $api_key" \
  106. -d "{
  107. \"model\": \"$model_path\",
  108. \"input\": [\"你好,这是一个测试句子\", \"Hello world\"]
  109. }" \
  110. "http://localhost:$port/v1/embeddings" 2>/dev/null || echo -e "\n000")
  111. body=$(echo "$response" | head -n -1)
  112. code=$(echo "$response" | tail -n 1)
  113. if [ "$code" = "200" ]; then
  114. local dims=$(echo "$body" | grep -o '"embedding":\[[^]]*\]' | head -1 | grep -o ',' | wc -l)
  115. dims=$((dims + 1))
  116. echo -e "${GREEN}✅ 成功${NC} HTTP $code"
  117. echo "向量维度: $dims"
  118. ((SUCCESS++))
  119. else
  120. echo -e "${YELLOW}⚠️ Embedding 接口失败,尝试 Rerank 接口...${NC}"
  121. response=$(curl -s -w "\n%{http_code}" \
  122. --max-time $TIMEOUT \
  123. -H "Content-Type: application/json" \
  124. -H "Authorization: Bearer $api_key" \
  125. -d "{
  126. \"model\": \"$model_path\",
  127. \"query\": \"测试查询\",
  128. \"documents\": [\"文档1\", \"文档2\"]
  129. }" \
  130. "http://localhost:$port/v1/rerank" 2>/dev/null || echo -e "\n000")
  131. code=$(echo "$response" | tail -n 1)
  132. if [ "$code" = "200" ]; then
  133. echo -e "${GREEN}✅ 成功${NC} (Rerank 接口可用)"
  134. ((SUCCESS++))
  135. else
  136. echo -e "${RED}❌ 失败${NC} HTTP $code"
  137. fi
  138. fi
  139. ((TOTAL++))
  140. }
  141. # 测试重排序模型
  142. test_rerank_model() {
  143. local name=$1
  144. local port=$2
  145. local model_path=$3
  146. local api_key=${API_KEYS[$name]}
  147. echo ""
  148. echo "----------------------------------------"
  149. echo "测试模型: $name (重排序模型)"
  150. echo "端口: $port"
  151. echo "----------------------------------------"
  152. local response
  153. local body
  154. local code
  155. response=$(curl -s -w "\n%{http_code}" \
  156. --max-time $TIMEOUT \
  157. -H "Content-Type: application/json" \
  158. -H "Authorization: Bearer $api_key" \
  159. -d "{
  160. \"model\": \"$model_path\",
  161. \"query\": \"什么是机器学习\",
  162. \"documents\": [\"机器学习是AI的分支\", \"Python是编程语言\", \"深度学习使用神经网络\"],
  163. \"top_n\": 2
  164. }" \
  165. "http://localhost:$port/v1/rerank" 2>/dev/null || echo -e "\n000")
  166. body=$(echo "$response" | head -n -1)
  167. code=$(echo "$response" | tail -n 1)
  168. if [ "$code" = "200" ]; then
  169. local top_doc=$(echo "$body" | grep -o '"text":"[^"]*"' | head -1 | cut -d'"' -f4)
  170. local score=$(echo "$body" | grep -o '"score":[0-9.]*' | head -1 | cut -d':' -f2)
  171. echo -e "${GREEN}✅ 成功${NC} HTTP $code"
  172. echo "Top1: ${top_doc:0:50}... (得分: $score)"
  173. ((SUCCESS++))
  174. else
  175. echo -e "${YELLOW}⚠️ Rerank 接口失败,尝试 Chat 接口...${NC}"
  176. response=$(curl -s -w "\n%{http_code}" \
  177. --max-time $TIMEOUT \
  178. -H "Content-Type: application/json" \
  179. -H "Authorization: Bearer $api_key" \
  180. -d "{
  181. \"model\": \"$model_path\",
  182. \"messages\": [{\"role\": \"user\", \"content\": \"你好\"}],
  183. \"max_tokens\": 20
  184. }" \
  185. "http://localhost:$port/v1/chat/completions" 2>/dev/null || echo -e "\n000")
  186. code=$(echo "$response" | tail -n 1)
  187. if [ "$code" = "200" ]; then
  188. echo -e "${GREEN}✅ 成功${NC} (Chat 接口可用)"
  189. ((SUCCESS++))
  190. else
  191. echo -e "${RED}❌ 失败${NC} HTTP $code"
  192. fi
  193. fi
  194. ((TOTAL++))
  195. }
  196. # 快速检查
  197. quick_check() {
  198. echo ""
  199. echo "========================================"
  200. echo "快速检查模式"
  201. echo "========================================"
  202. for key in "${MODEL_NAMES[@]}"; do
  203. local port=${MODEL_PORTS[$key]}
  204. local api_key=${API_KEYS[$key]}
  205. local code
  206. code=$(curl -s -o /dev/null -w "%{http_code}" \
  207. --max-time 5 \
  208. -H "Authorization: Bearer $api_key" \
  209. "http://localhost:$port/v1/models" 2>/dev/null || echo "000")
  210. if [ "$code" = "200" ]; then
  211. echo -e "${GREEN}✅${NC} $key (端口 $port)"
  212. ((SUCCESS++))
  213. else
  214. echo -e "${RED}❌${NC} $key (端口 $port) HTTP $code"
  215. fi
  216. ((TOTAL++))
  217. done
  218. }
  219. # 主函数
  220. main() {
  221. if [ "$1" = "--quick" ]; then
  222. quick_check
  223. elif [ "$1" = "--model" ] && [ -n "$2" ]; then
  224. local key=$2
  225. local port=${MODEL_PORTS[$key]}
  226. local path=${MODEL_PATHS[$key]}
  227. local mtype=${MODEL_TYPES[$key]}
  228. case $mtype in
  229. chat) test_chat_model "$key" "$port" "$path" ;;
  230. embedding) test_embedding_model "$key" "$port" "$path" ;;
  231. rerank) test_rerank_model "$key" "$port" "$path" ;;
  232. esac
  233. else
  234. # 按顺序测试所有模型
  235. for key in "${MODEL_NAMES[@]}"; do
  236. local port=${MODEL_PORTS[$key]}
  237. local path=${MODEL_PATHS[$key]}
  238. local mtype=${MODEL_TYPES[$key]}
  239. case $mtype in
  240. chat) test_chat_model "$key" "$port" "$path" ;;
  241. embedding) test_embedding_model "$key" "$port" "$path" ;;
  242. rerank) test_rerank_model "$key" "$port" "$path" ;;
  243. esac
  244. done
  245. fi
  246. echo ""
  247. echo "========================================"
  248. echo "测试结果摘要"
  249. echo "========================================"
  250. echo "总计: $SUCCESS / $TOTAL 个模型正常"
  251. if [ $SUCCESS -eq $TOTAL ]; then
  252. echo -e "${GREEN}所有模型运行正常!${NC}"
  253. exit 0
  254. else
  255. echo -e "${RED}部分模型异常,请检查日志${NC}"
  256. exit 1
  257. fi
  258. }
  259. main "$@"