test_ascend_mindie.py 12 KB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183184185186187188189190191192193194195196197198199200201202203204205206207208209210211212213214215216217218219220221222223224225226227228229230231232233234235236237238239240241242243244245246247248249250251252253254255256257258259260261262263264265266267268269270271272273274275276277278279280281282283284285286287288289290291292293294295296297298299300301302303304305306307308309310311312313314315316317318319320321322323324325326327328329330331332333334335336337338339340341342343344345346347348349350351352353354355356357358359360361362363364365366367368369370371372373374375376377378379380381382383384385386387388389390391392
  1. import argparse
  2. from gpustack.worker.backends.ascend_mindie import (
  3. AscendMindIEParameters,
  4. AscendMindIEServer,
  5. )
  6. import pytest
  7. @pytest.mark.parametrize(
  8. "world_size, local_world_size, args, expected",
  9. [
  10. # The following cases are a forward derivation, which means that
  11. # the world size is not provided,
  12. # and is determined by input parameters.
  13. [
  14. -1,
  15. -1,
  16. ["--pipeline-parallel-size=2", "--tensor-parallel-size=8"],
  17. AscendMindIEParameters(
  18. world_size=16,
  19. local_world_size=8,
  20. pipeline_parallel_size=2,
  21. tensor_parallel_size=8,
  22. max_prefill_tokens=8192,
  23. max_input_token_len=8192,
  24. max_iter_times=8192,
  25. ),
  26. ],
  27. [
  28. -1,
  29. -1,
  30. ["--tensor-parallel-size=8"],
  31. AscendMindIEParameters(
  32. world_size=8,
  33. tensor_parallel_size=8,
  34. moe_tensor_parallel_size=8,
  35. max_prefill_tokens=8192,
  36. max_input_token_len=8192,
  37. max_iter_times=8192,
  38. ),
  39. ],
  40. [
  41. -1,
  42. -1,
  43. ["--data-parallel-size=2", "--tensor-parallel-size=8"],
  44. AscendMindIEParameters(
  45. world_size=16,
  46. local_world_size=8,
  47. data_parallel_size=2,
  48. tensor_parallel_size=8,
  49. moe_tensor_parallel_size=16,
  50. max_prefill_tokens=8192,
  51. max_input_token_len=8192,
  52. max_iter_times=8192,
  53. ),
  54. ],
  55. [
  56. -1,
  57. -1,
  58. ["--context-parallel-size=2", "--tensor-parallel-size=8"],
  59. AscendMindIEParameters(
  60. world_size=16,
  61. local_world_size=8,
  62. context_parallel_size=2,
  63. tensor_parallel_size=8,
  64. moe_tensor_parallel_size=16,
  65. data_parallel_size=1,
  66. max_prefill_tokens=8192,
  67. max_input_token_len=8192,
  68. max_iter_times=8192,
  69. ),
  70. ],
  71. [
  72. -1,
  73. -1,
  74. ["--moe-expert-parallel-size=2", "--moe-tensor-parallel-size=8"],
  75. AscendMindIEParameters(
  76. world_size=16,
  77. local_world_size=8,
  78. tensor_parallel_size=8,
  79. moe_expert_parallel_size=2,
  80. moe_tensor_parallel_size=8,
  81. max_prefill_tokens=8192,
  82. max_input_token_len=8192,
  83. max_iter_times=8192,
  84. ),
  85. ],
  86. # The following cases are a backward derivation, which means that
  87. # the world size is provided,
  88. # and provided partial parameters.
  89. [
  90. 16,
  91. 8,
  92. ["--pipeline-parallel-size=2"],
  93. AscendMindIEParameters(
  94. world_size=16,
  95. local_world_size=8,
  96. pipeline_parallel_size=2,
  97. tensor_parallel_size=8,
  98. max_prefill_tokens=8192,
  99. max_input_token_len=8192,
  100. max_iter_times=8192,
  101. ),
  102. ],
  103. [
  104. 16,
  105. 8,
  106. ["--tensor-parallel-size=8"],
  107. AscendMindIEParameters(
  108. world_size=16,
  109. local_world_size=8,
  110. tensor_parallel_size=8,
  111. moe_tensor_parallel_size=16,
  112. max_prefill_tokens=8192,
  113. max_input_token_len=8192,
  114. max_iter_times=8192,
  115. ),
  116. ],
  117. [
  118. 16,
  119. 8,
  120. ["--data-parallel-size=2"],
  121. AscendMindIEParameters(
  122. world_size=16,
  123. local_world_size=8,
  124. data_parallel_size=2,
  125. tensor_parallel_size=8,
  126. moe_tensor_parallel_size=16,
  127. max_prefill_tokens=8192,
  128. max_input_token_len=8192,
  129. max_iter_times=8192,
  130. ),
  131. ],
  132. [
  133. 16,
  134. 8,
  135. ["--context-parallel-size=2"],
  136. AscendMindIEParameters(
  137. world_size=16,
  138. local_world_size=8,
  139. context_parallel_size=2,
  140. tensor_parallel_size=8,
  141. moe_tensor_parallel_size=16,
  142. data_parallel_size=1,
  143. max_prefill_tokens=8192,
  144. max_input_token_len=8192,
  145. max_iter_times=8192,
  146. ),
  147. ],
  148. [
  149. 16,
  150. 8,
  151. ["--moe-expert-parallel-size=2"],
  152. AscendMindIEParameters(
  153. world_size=16,
  154. local_world_size=8,
  155. moe_expert_parallel_size=2,
  156. tensor_parallel_size=16,
  157. moe_tensor_parallel_size=8,
  158. max_prefill_tokens=8192,
  159. max_input_token_len=8192,
  160. max_iter_times=8192,
  161. ),
  162. ],
  163. ],
  164. )
  165. @pytest.mark.asyncio
  166. async def test_ascend_mindie_parameters_parallelism_default(
  167. world_size, local_world_size, args, expected: AscendMindIEParameters
  168. ):
  169. actual = AscendMindIEParameters(
  170. world_size=world_size,
  171. local_world_size=local_world_size,
  172. )
  173. actual.from_args_and_envs(args)
  174. assert actual == expected
  175. @pytest.mark.parametrize(
  176. "world_size, local_world_size, args, exception_msg",
  177. [
  178. # The following cases are a forward derivation, which means that
  179. # the world size is not provided,
  180. # and is determined by input parameters.
  181. [
  182. -1,
  183. -1,
  184. ["--pipeline-parallel-size=-1"],
  185. "--pipeline-parallel-size must be greater than 0",
  186. ],
  187. [
  188. -1,
  189. -1,
  190. ["--tensor-parallel-size=3"],
  191. "--tensor-parallel-size must be the power of 2",
  192. ],
  193. [
  194. -1,
  195. -1,
  196. ["--data-parallel-size=3"],
  197. "--data-parallel-size must be the power of 2",
  198. ],
  199. [
  200. -1,
  201. -1,
  202. ["--context-parallel-size=3"],
  203. "--context-parallel-size must be the power of 2",
  204. ],
  205. [
  206. -1,
  207. -1,
  208. ["--sequence-parallel-size=3"],
  209. "--sequence-parallel-size must be the power of 2",
  210. ],
  211. [
  212. -1,
  213. -1,
  214. ["--moe-tensor-parallel-size=3"],
  215. "--moe-tensor-parallel-size must be the power of 2",
  216. ],
  217. [
  218. -1,
  219. -1,
  220. ["--moe-expert-parallel-size=3"],
  221. "--moe-expert-parallel-size must be the power of 2",
  222. ],
  223. [
  224. -1,
  225. -1,
  226. ["--pipeline-parallel-size=2", "--data-parallel-size=4"],
  227. "--pipeline-parallel-size 2 and --data-parallel-size 4 are incompatible, set --pipeline-parallel-size to 1 or disable data parallelism",
  228. ],
  229. [
  230. -1,
  231. -1,
  232. ["--data-parallel-size=4", "--context-parallel-size=2"],
  233. "--data-parallel-size 4 and --context-parallel-size 2 are incompatible, set --data-parallel-size to 1 or disable context parallelism",
  234. ],
  235. [
  236. -1,
  237. -1,
  238. ["--sequence-parallel-size=4", "--tensor-parallel-size=2"],
  239. "--sequence-parallel-size 4 must be equal to --tensor-parallel-size 2",
  240. ],
  241. [
  242. -1,
  243. -1,
  244. [
  245. "--data-parallel-size=4",
  246. "--tensor-parallel-size=2",
  247. ], # DP and TP are compatible
  248. "", # No exception expected
  249. ],
  250. [
  251. -1,
  252. -1,
  253. [
  254. "--context-parallel-size=2",
  255. "--tensor-parallel-size=4",
  256. ], # CP and TP are compatible
  257. "", # No exception expected
  258. ],
  259. [
  260. -1,
  261. -1,
  262. [
  263. "--sequence-parallel-size=4",
  264. "--tensor-parallel-size=4",
  265. ], # SP and TP are compatible
  266. "", # No exception expected
  267. ],
  268. # The following cases are a backward derivation, which means that
  269. # the world size is provided,
  270. # and provided partial parameters.
  271. # These situations should not normally occur,
  272. # if they do, it means we have made the wrong choice in resource selection.
  273. [
  274. 4,
  275. 4,
  276. ["--pipeline-parallel-size=2", "--tensor-parallel-size=4"],
  277. "--pipeline-parallel-size 2 and --tensor-parallel-size 4 must be multiples of world size: 4",
  278. ],
  279. [
  280. 16,
  281. 4,
  282. ["--tensor-parallel-size=8"],
  283. "--tensor-parallel-size 8 must be less or equal to local world size: 4 or equal to world size: 16",
  284. ],
  285. [
  286. 32,
  287. 8,
  288. ["--data-parallel-size=2", "--tensor-parallel-size=8"],
  289. "--data-parallel-size 2 and --tensor-parallel-size 8 must be multiples of world size: 32",
  290. ],
  291. [
  292. 32,
  293. 8,
  294. ["--context-parallel-size=2", "--tensor-parallel-size=8"],
  295. "--context-parallel-size 2 and --tensor-parallel-size 8 must be multiples of world size: 32",
  296. ],
  297. [
  298. 16,
  299. 4,
  300. ["--moe-expert-parallel-size=4", "--moe-tensor-parallel-size=8"],
  301. "--moe-tensor-parallel-size 8 must be less or equal to local world size: 4 or equal to world size: 16",
  302. ],
  303. [
  304. 16,
  305. 8,
  306. ["--moe-expert-parallel-size=4", "--moe-tensor-parallel-size=8"],
  307. "--moe-expert-parallel-size 4and --moe-tensor-parallel-size 8 must be multiples of world size: 16",
  308. ],
  309. [
  310. 32,
  311. 8,
  312. ["--moe-tensor-parallel-size=8"],
  313. "--moe-tensor-parallel-size 8 must be equal to world size: 32",
  314. ],
  315. ],
  316. )
  317. @pytest.mark.asyncio
  318. async def test_ascend_mindie_parameters_parallelism_violation(
  319. world_size,
  320. local_world_size,
  321. args,
  322. exception_msg: str,
  323. ):
  324. """
  325. Test AscendMindIEParameters.from_args for various parallelism violations.
  326. """
  327. if not exception_msg:
  328. # No exception expected
  329. params = AscendMindIEParameters(
  330. world_size=world_size,
  331. local_world_size=local_world_size,
  332. )
  333. params.from_args_and_envs(args)
  334. return
  335. with pytest.raises(argparse.ArgumentTypeError, match=exception_msg):
  336. params = AscendMindIEParameters(
  337. world_size=world_size,
  338. local_world_size=local_world_size,
  339. )
  340. params.from_args_and_envs(args)
  341. def test_ascend_mindie_parameters_changed_backend_parameters():
  342. baseline = AscendMindIEParameters(max_seq_len=32768)
  343. baseline.from_args_and_envs([])
  344. params = AscendMindIEParameters(max_seq_len=32768)
  345. params.from_args_and_envs(["--max-seq-len", "8192", "--dtype", "float16"])
  346. assert params.changed_backend_parameters(baseline) == [
  347. "--max-seq-len",
  348. "8192",
  349. "--max-input-token-len",
  350. "8192",
  351. "--max-prefill-tokens",
  352. "8192",
  353. "--max-iter-times",
  354. "8192",
  355. "--dtype",
  356. "float16",
  357. ]
  358. def test_filter_user_defined_parameters():
  359. parameters = [
  360. "--max-seq-len",
  361. "8192",
  362. "--max-input-token-len",
  363. "8192",
  364. "--dtype",
  365. "float16",
  366. ]
  367. user_backend_parameters = ["--max-input-token-len", "4096", "--dtype=bfloat16"]
  368. assert AscendMindIEServer._filter_user_defined_parameters(
  369. parameters,
  370. user_backend_parameters,
  371. ) == ["--max-seq-len", "8192"]
  372. def test_backend_parameter_name_keeps_store_true_no_prefix():
  373. assert AscendMindIEServer._backend_parameter_name("--no-metrics") == "no-metrics"
  374. assert (
  375. AscendMindIEServer._backend_parameter_name("--no-enable-split")
  376. == "enable-split"
  377. )