test_hub.py 6.2 KB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183184185186187188189190191192193194195196197198199200201202203204205206207208209210
  1. import pytest
  2. from tenacity import retry, stop_after_attempt, wait_fixed
  3. from gpustack.utils.hub import (
  4. get_hugging_face_model_min_gguf_path,
  5. get_model_scope_model_min_gguf_path,
  6. get_model_weight_size,
  7. read_repo_file_content,
  8. )
  9. from gpustack.schemas.models import (
  10. Model,
  11. SourceEnum,
  12. )
  13. from tests.utils.model import new_model
  14. def test_get_hub_model_weight_size():
  15. model_to_weight_sizes = [
  16. (
  17. Model(
  18. source=SourceEnum.HUGGING_FACE,
  19. huggingface_repo_id="Qwen/Qwen2-0.5B-Instruct",
  20. ),
  21. 988_097_824,
  22. ),
  23. (
  24. Model(
  25. source=SourceEnum.HUGGING_FACE,
  26. huggingface_repo_id="Qwen/Qwen2-VL-7B-Instruct",
  27. ),
  28. 16_582_831_200,
  29. ),
  30. (
  31. Model(
  32. source=SourceEnum.HUGGING_FACE,
  33. huggingface_repo_id="Qwen/Qwen2.5-72B-Instruct-GPTQ-Int4",
  34. ),
  35. 41_621_048_632,
  36. ),
  37. (
  38. Model(
  39. source=SourceEnum.HUGGING_FACE,
  40. huggingface_repo_id="unsloth/Llama-3.3-70B-Instruct-bnb-4bit",
  41. ),
  42. 39_518_238_055,
  43. ),
  44. (
  45. Model(
  46. source=SourceEnum.HUGGING_FACE,
  47. huggingface_repo_id="deepseek-ai/DeepSeek-R1",
  48. ),
  49. 688_586_727_753,
  50. ),
  51. (
  52. Model(
  53. source=SourceEnum.HUGGING_FACE,
  54. huggingface_repo_id="Systran/faster-whisper-large-v3",
  55. ),
  56. 3_087_284_237,
  57. ),
  58. (
  59. Model(
  60. source=SourceEnum.MODEL_SCOPE,
  61. model_scope_model_id="Qwen/Qwen2-0.5B-Instruct",
  62. ),
  63. 988_097_824,
  64. ),
  65. (
  66. Model(
  67. source=SourceEnum.MODEL_SCOPE,
  68. model_scope_model_id="Qwen/Qwen2-VL-7B-Instruct",
  69. ),
  70. 16_582_831_200,
  71. ),
  72. (
  73. Model(
  74. source=SourceEnum.MODEL_SCOPE,
  75. model_scope_model_id="Qwen/Qwen2.5-72B-Instruct-GPTQ-Int4",
  76. ),
  77. 41_621_048_632,
  78. ),
  79. (
  80. Model(
  81. source=SourceEnum.MODEL_SCOPE,
  82. model_scope_model_id="unsloth/Llama-3.3-70B-Instruct-bnb-4bit",
  83. ),
  84. 39_518_238_055,
  85. ),
  86. (
  87. Model(
  88. source=SourceEnum.MODEL_SCOPE,
  89. model_scope_model_id="deepseek-ai/DeepSeek-R1",
  90. ),
  91. 688_586_727_753,
  92. ),
  93. (
  94. Model(
  95. source=SourceEnum.MODEL_SCOPE,
  96. model_scope_model_id="gpustack/faster-whisper-large-v3",
  97. ),
  98. 3_087_284_237,
  99. ),
  100. (
  101. Model(
  102. source=SourceEnum.MODEL_SCOPE,
  103. model_scope_model_id="gpustack/CosyVoice2-0.5B",
  104. ),
  105. 2_557_256_546,
  106. # The CosyVoice2-0.5B repository contains a subdirectory named CosyVoice-BlankEN,
  107. # which is optional and should be excluded from weight calculations.
  108. ),
  109. ]
  110. for model, expected_weight_size in model_to_weight_sizes:
  111. computed = get_hub_model_weight_size_with_retry(model)
  112. assert (
  113. computed == expected_weight_size
  114. ), f"weight_size mismatch for {model}, computed: {computed}, expected: {expected_weight_size}"
  115. @retry(stop=stop_after_attempt(3), wait=wait_fixed(2))
  116. def get_hub_model_weight_size_with_retry(model: Model) -> int:
  117. return get_model_weight_size(model)
  118. def test_get_hf_min_gguf_file():
  119. model_to_gguf_file_path = [
  120. (
  121. "Qwen/Qwen2-0.5B-Instruct-GGUF",
  122. "qwen2-0_5b-instruct-q2_k.gguf",
  123. ),
  124. (
  125. "bartowski/Qwen2-VL-7B-Instruct-GGUF",
  126. "Qwen2-VL-7B-Instruct-IQ2_M.gguf",
  127. ),
  128. (
  129. "Qwen/Qwen2.5-72B-Instruct-GGUF",
  130. "qwen2.5-72b-instruct-q2_k-00001-of-00007.gguf",
  131. ),
  132. (
  133. "unsloth/Llama-3.3-70B-Instruct-GGUF",
  134. "Llama-3.3-70B-Instruct-UD-IQ1_M.gguf",
  135. ),
  136. (
  137. "unsloth/DeepSeek-R1-GGUF",
  138. "DeepSeek-R1-UD-IQ1_M/DeepSeek-R1-UD-IQ1_M-00001-of-00004.gguf",
  139. ),
  140. ]
  141. for model, expected_file_path in model_to_gguf_file_path:
  142. got = get_hugging_face_model_min_gguf_path(model)
  143. assert (
  144. got == expected_file_path
  145. ), f"min GGUF file path mismatch for huggingface model {model}, got: {got}, expected: {expected_file_path}"
  146. def test_get_ms_min_gguf_file():
  147. model_to_gguf_file_path = [
  148. (
  149. "Qwen/Qwen2-0.5B-Instruct-GGUF",
  150. "qwen2-0_5b-instruct-q2_k.gguf",
  151. ),
  152. (
  153. "bartowski/Qwen2-VL-7B-Instruct-GGUF",
  154. "Qwen2-VL-7B-Instruct-IQ2_M.gguf",
  155. ),
  156. (
  157. "Qwen/Qwen2.5-72B-Instruct-GGUF",
  158. "qwen2.5-72b-instruct-q2_k-00001-of-00007.gguf",
  159. ),
  160. (
  161. "unsloth/Llama-3.3-70B-Instruct-GGUF",
  162. "Llama-3.3-70B-Instruct-UD-IQ1_M.gguf",
  163. ),
  164. (
  165. "unsloth/DeepSeek-R1-GGUF",
  166. "DeepSeek-R1-UD-IQ1_M/DeepSeek-R1-UD-IQ1_M-00001-of-00004.gguf",
  167. ),
  168. ]
  169. for model, expected_file_path in model_to_gguf_file_path:
  170. got = get_model_scope_model_min_gguf_path(model)
  171. assert (
  172. got == expected_file_path
  173. ), f"min GGUF file path mismatch for modelscope model {model}, got: {got}, expected: {expected_file_path}"
  174. @pytest.mark.parametrize(
  175. "m, file, token, predicate",
  176. [
  177. (
  178. new_model(
  179. id=1,
  180. name="test_name",
  181. huggingface_repo_id="Qwen/Qwen3-0.6B",
  182. ),
  183. "config.json",
  184. None,
  185. lambda content: "Qwen3ForCausalLM" in content.get("architectures", []),
  186. ),
  187. (
  188. new_model(id=2, name="test_name2", model_scope_model_id="Qwen/Qwen3-0.6B"),
  189. "config.json",
  190. None,
  191. lambda content: "Qwen3ForCausalLM" in content.get("architectures", []),
  192. ),
  193. ],
  194. )
  195. def test_read_repo_file_content(m, file, token, predicate):
  196. config_dict = read_repo_file_content(m, file, token)
  197. assert predicate(config_dict)