test_scheduler.py 7.5 KB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183184185186187188189190191192193194195196197198199200201202203204205206207208209210211212213214215216217218219220221222
  1. import pytest
  2. from gpustack.scheduler.evaluator import evaluate_model_metadata
  3. from tests.utils.model import new_model
  4. from gpustack.scheduler.scheduler import evaluate_pretrained_config
  5. from gpustack.schemas.models import CategoryEnum, BackendEnum
  6. @pytest.mark.parametrize(
  7. "case_name, model, expect_error, expect_error_match, expect_categories",
  8. [
  9. (
  10. # Checkpoint:
  11. # The model contains custom code but `--trust-remote-code` is not provided.
  12. # This should raise a ValueError with a specific message.
  13. "custom_code_without_trust_remote_code",
  14. new_model(
  15. 1,
  16. "test_name",
  17. 1,
  18. huggingface_repo_id="microsoft/Phi-4-multimodal-instruct",
  19. backend=BackendEnum.VLLM,
  20. backend_parameters=[],
  21. ),
  22. ValueError,
  23. "The model contains custom code that must be executed to load correctly. If you trust the source, please pass the backend parameter `--trust-remote-code` to allow custom code to be run.",
  24. None,
  25. ),
  26. (
  27. # Checkpoint:
  28. # The model contains custom code and `--trust-remote-code` is provided.
  29. # This should pass without errors and set the model category to LLM.
  30. "custom_code_with_trust_remote_code",
  31. new_model(
  32. 1,
  33. "test_name",
  34. 1,
  35. huggingface_repo_id="microsoft/Phi-4-multimodal-instruct",
  36. backend=BackendEnum.VLLM,
  37. backend_parameters=["--trust-remote-code"],
  38. ),
  39. None,
  40. None,
  41. ["LLM"],
  42. ),
  43. (
  44. # Checkpoint:
  45. # The model is of an unsupported architecture.
  46. # This should raise a ValueError with a specific message.
  47. "unsupported_architecture",
  48. new_model(
  49. 1,
  50. "test_name",
  51. 1,
  52. huggingface_repo_id="google-t5/t5-base",
  53. backend=BackendEnum.VLLM,
  54. backend_parameters=[],
  55. ),
  56. ValueError,
  57. "Unsupported architecture:",
  58. None,
  59. ),
  60. (
  61. # Checkpoint:
  62. # The model is of an unsupported architecture using custom backend.
  63. # This should pass without errors.
  64. "pass_unsupported_architecture_custom_backend",
  65. new_model(
  66. 1,
  67. "test_name",
  68. 1,
  69. huggingface_repo_id="google-t5/t5-base",
  70. backend=BackendEnum.CUSTOM,
  71. backend_parameters=[],
  72. ),
  73. None,
  74. None,
  75. None,
  76. ),
  77. (
  78. # Checkpoint:
  79. # The model is of an unsupported architecture using custom backend version.
  80. # This should pass without errors.
  81. "pass_unsupported_architecture_custom_backend_version",
  82. new_model(
  83. 1,
  84. "test_name",
  85. 1,
  86. huggingface_repo_id="google-t5/t5-base",
  87. backend=BackendEnum.VLLM,
  88. backend_version="custom_version",
  89. backend_parameters=[],
  90. ),
  91. None,
  92. None,
  93. None,
  94. ),
  95. (
  96. # Checkpoint:
  97. # The model is of a supported architecture.
  98. # This should pass without errors.
  99. "supported_architecture",
  100. new_model(
  101. 1,
  102. "test_name",
  103. 1,
  104. huggingface_repo_id="Qwen/Qwen2.5-0.5B-Instruct",
  105. backend=BackendEnum.VLLM,
  106. backend_parameters=[],
  107. ),
  108. None,
  109. None,
  110. ["LLM"],
  111. ),
  112. (
  113. # Checkpoint:
  114. # The model could run with vllm backend but get import error while get pretrained config.
  115. # This should pass without errors.
  116. "pass_import_error_in_pretrained_config",
  117. new_model(
  118. 1,
  119. "test_name",
  120. 1,
  121. huggingface_repo_id="deepseek-ai/DeepSeek-OCR",
  122. backend=BackendEnum.VLLM,
  123. backend_parameters=["--trust-remote-code"],
  124. ),
  125. None,
  126. None,
  127. ["LLM"],
  128. ),
  129. (
  130. # Checkpoint:
  131. # Image model.
  132. # This should pass without errors.
  133. "pass_image_model",
  134. new_model(
  135. 1,
  136. "test_name",
  137. 1,
  138. huggingface_repo_id="Tongyi-MAI/Z-Image-Turbo",
  139. backend=BackendEnum.SGLANG,
  140. backend_parameters=[],
  141. categories=[CategoryEnum.IMAGE],
  142. ),
  143. None,
  144. None,
  145. ["IMAGE"],
  146. ),
  147. ],
  148. )
  149. @pytest.mark.asyncio
  150. async def test_evaluate_pretrained_config(
  151. config, case_name, model, expect_error, expect_error_match, expect_categories
  152. ):
  153. try:
  154. if expect_error:
  155. with pytest.raises(expect_error, match=expect_error_match):
  156. await evaluate_pretrained_config(model)
  157. else:
  158. await evaluate_pretrained_config(model)
  159. if expect_categories:
  160. assert model.categories == [CategoryEnum[c] for c in expect_categories]
  161. except AssertionError as e:
  162. raise AssertionError(f"Test case '{case_name}' failed: {e}") from e
  163. @pytest.mark.parametrize(
  164. "case_name, model, expect_compatible, expect_error_match",
  165. [
  166. (
  167. # Checkpoint:
  168. # The model is of an unsupported architecture.
  169. # This should raise a ValueError with a specific message.
  170. "unsupported_architecture",
  171. new_model(
  172. 1,
  173. "test_name",
  174. 1,
  175. huggingface_repo_id="google-t5/t5-base",
  176. backend=BackendEnum.VLLM,
  177. backend_parameters=[],
  178. ),
  179. False,
  180. [
  181. "Unsupported architecture: ['T5ForConditionalGeneration']. To proceed with deployment, ensure the model is supported by backend, or deploy it using a custom backend version or custom backend."
  182. ],
  183. ),
  184. (
  185. # Checkpoint:
  186. # The model is of an unsupported architecture but config environment variable set to skip evaluation.
  187. # This should return compatible.
  188. "pass_evaluation_skip",
  189. new_model(
  190. 1,
  191. "test_name",
  192. 1,
  193. huggingface_repo_id="google-t5/t5-base",
  194. backend=BackendEnum.VLLM,
  195. backend_parameters=[],
  196. env={"GPUSTACK_SKIP_MODEL_EVALUATION": "1"},
  197. ),
  198. True,
  199. [],
  200. ),
  201. ],
  202. )
  203. @pytest.mark.asyncio
  204. async def test_evaluate_model_metadata(
  205. config, case_name, model, expect_compatible, expect_error_match
  206. ):
  207. try:
  208. actual_compatible, actual_error = await evaluate_model_metadata(
  209. config, model, []
  210. )
  211. assert (
  212. actual_compatible == expect_compatible
  213. ), f"Expected compatibility: {expect_compatible}, but got: {actual_compatible}. Error: {actual_error}"
  214. assert (
  215. expect_error_match == actual_error
  216. ), f"Expected error message: {expect_error_match}, but got: {actual_error}"
  217. except AssertionError as e:
  218. raise AssertionError(f"Test case '{case_name}' failed: {e}") from e