model_registry.py 12 KB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183184185186187188189190191192193194195196197198199200201202203204205206207208209210211212213214215216217218219220221222223224225226227228229230231232233234235236237238239240241242243244245246247248249250251252253254255256257258259260261262263264265266267268269270271272273274275276277278279280281282283284285286287288289290291292293294295296297298299300301302303304305306307308309310311312313314315316317318319320321322323324325326327328329330331332333334335336337338339340341342343344345346347348349350351352353354355356357358359360361362363364365366367368369370371372373374375376377378379380381382383384385386387388389390391392393394395396397398399400401402403404405406407408409410411
  1. # Synced with https://github.com/vllm-project/vllm/blob/v0.17.1/vllm/model_executor/models/registry.py
  2. # Update these when the builtin vLLM is updated
  3. # List of supported model architectures for the default version of the vLLM backend
  4. # TODO version-aware support list
  5. from typing import List
  6. from gpustack.schemas.models import CategoryEnum
  7. _TEXT_GENERATION_MODELS = [
  8. # [Decoder-only]
  9. "ApertusForCausalLM",
  10. "AquilaModel",
  11. "AquilaForCausalLM",
  12. "ArceeForCausalLM",
  13. "ArcticForCausalLM",
  14. "AXK1ForCausalLM",
  15. "MiniMaxForCausalLM",
  16. "MiniMaxText01ForCausalLM",
  17. "MiniMaxM1ForCausalLM",
  18. "BaiChuanForCausalLM",
  19. "BaichuanForCausalLM",
  20. "BailingMoeForCausalLM",
  21. "BailingMoeV2ForCausalLM",
  22. "BailingMoeV2_5ForCausalLM",
  23. "BambaForCausalLM",
  24. "BloomForCausalLM",
  25. "ChatGLMModel",
  26. "ChatGLMForConditionalGeneration",
  27. "CohereForCausalLM",
  28. "Cohere2ForCausalLM",
  29. "CwmForCausalLM",
  30. "DbrxForCausalLM",
  31. "DeepseekForCausalLM",
  32. "DeepseekV2ForCausalLM",
  33. "DeepseekV3ForCausalLM",
  34. "DeepseekV32ForCausalLM",
  35. "Dots1ForCausalLM",
  36. "Ernie4_5_ForCausalLM",
  37. "Ernie4_5ForCausalLM", # Note: New class for "Ernie4_5_ForCausalLM"
  38. "Ernie4_5_MoeForCausalLM",
  39. "ExaoneForCausalLM",
  40. "Exaone4ForCausalLM",
  41. "ExaoneMoEForCausalLM",
  42. "Fairseq2LlamaForCausalLM",
  43. "FalconForCausalLM",
  44. "FalconMambaForCausalLM",
  45. "FalconH1ForCausalLM",
  46. "FlexOlmoForCausalLM",
  47. "GemmaForCausalLM",
  48. "Gemma2ForCausalLM",
  49. "Gemma3ForCausalLM",
  50. "Gemma3nForCausalLM",
  51. "Qwen3NextForCausalLM",
  52. "GlmForCausalLM",
  53. "Glm4ForCausalLM",
  54. "Glm4MoeForCausalLM",
  55. "Glm4MoeLiteForCausalLM",
  56. "GlmMoeDsaForCausalLM",
  57. "GptOssForCausalLM",
  58. "GPT2LMHeadModel",
  59. "GPTBigCodeForCausalLM",
  60. "GPTJForCausalLM",
  61. "GPTNeoXForCausalLM",
  62. "GraniteForCausalLM",
  63. "GraniteMoeForCausalLM",
  64. "GraniteMoeHybridForCausalLM",
  65. "GraniteMoeSharedForCausalLM",
  66. "GritLM",
  67. "Grok1ModelForCausalLM",
  68. "Grok1ForCausalLM",
  69. "HunYuanMoEV1ForCausalLM",
  70. "HunYuanDenseV1ForCausalLM",
  71. "HCXVisionForCausalLM",
  72. "InternLMForCausalLM",
  73. "InternLM2ForCausalLM",
  74. "InternLM2VEForCausalLM",
  75. "InternLM3ForCausalLM",
  76. "IQuestCoderForCausalLM",
  77. "IQuestLoopCoderForCausalLM",
  78. "JAISLMHeadModel",
  79. "JambaForCausalLM",
  80. "KimiLinearForCausalLM",
  81. "Lfm2ForCausalLM",
  82. "Lfm2MoeForCausalLM",
  83. "LlamaForCausalLM",
  84. "LLaMAForCausalLM",
  85. "Llama4ForCausalLM",
  86. "LongcatFlashForCausalLM",
  87. "MambaForCausalLM",
  88. "Mamba2ForCausalLM",
  89. "MiniCPMForCausalLM",
  90. "MiniCPM3ForCausalLM",
  91. "MiniMaxForCausalLM",
  92. "MiniMaxText01ForCausalLM",
  93. "MiniMaxM1ForCausalLM",
  94. "MiniMaxM2ForCausalLM",
  95. "MistralForCausalLM",
  96. "MixtralForCausalLM",
  97. "MotifForCausalLM",
  98. "QuantMixtralForCausalLM",
  99. "MptForCausalLM",
  100. "MPTForCausalLM",
  101. "MiMoForCausalLM",
  102. "MiMoV2FlashForCausalLM",
  103. "NemotronForCausalLM",
  104. "NemotronHForCausalLM",
  105. "NemotronHPuzzleForCausalLM",
  106. "OlmoForCausalLM",
  107. "Olmo2ForCausalLM",
  108. "Olmo3ForCausalLM",
  109. "OlmoeForCausalLM",
  110. "OPTForCausalLM",
  111. "OrionForCausalLM",
  112. "OuroForCausalLM",
  113. "PanguEmbeddedForCausalLM",
  114. "PanguProMoEV2ForCausalLM",
  115. "PanguUltraMoEForCausalLM",
  116. "PersimmonForCausalLM",
  117. "PhiForCausalLM",
  118. "Phi3ForCausalLM",
  119. "Phi3SmallForCausalLM",
  120. "PhiMoEForCausalLM",
  121. "Phi4FlashForCausalLM",
  122. "Plamo2ForCausalLM",
  123. "QWenLMHeadModel",
  124. "Qwen2ForCausalLM",
  125. "Qwen2MoeForCausalLM",
  126. "Qwen3ForCausalLM",
  127. "Qwen3MoeForCausalLM",
  128. "RWForCausalLM",
  129. "SeedOssForCausalLM",
  130. "Step1ForCausalLM",
  131. "Step3TextForCausalLM",
  132. "Step3p5ForCausalLM",
  133. "StableLMEpochForCausalLM",
  134. "StableLmForCausalLM",
  135. "Starcoder2ForCausalLM",
  136. "SolarForCausalLM",
  137. "TeleChat2ForCausalLM",
  138. "TeleFLMForCausalLM",
  139. "XverseForCausalLM",
  140. "Zamba2ForCausalLM",
  141. # [Encoder-decoder]
  142. "BartModel",
  143. "BartForConditionalGeneration",
  144. "MBartForConditionalGeneration",
  145. ]
  146. _EMBEDDING_MODELS = [
  147. # [Text-only]
  148. "BertModel",
  149. "BertSpladeSparseEmbeddingModel",
  150. "HF_ColBERT",
  151. "ColBERTModernBertModel",
  152. "ColBERTJinaRobertaModel",
  153. "DeciLMForCausalLM",
  154. "Gemma2Model",
  155. "Gemma3TextModel",
  156. # "GlmForCausalLM", # Registered in _TEXT_GENERATION_MODELS
  157. "GPT2ForSequenceClassification",
  158. # "GritLM",
  159. "GteModel",
  160. "GteNewModel",
  161. "InternLM2ForRewardModel",
  162. "JambaForSequenceClassification",
  163. "LlamaBidirectionalModel",
  164. "LlamaModel",
  165. # "AquilaModel", # Registered in _TEXT_GENERATION_MODELS
  166. # "AquilaForCausalLM", # Registered in _TEXT_GENERATION_MODELS
  167. # "InternLMForCausalLM", # Registered in _TEXT_GENERATION_MODELS
  168. # "InternLM3ForCausalLM", # Registered in _TEXT_GENERATION_MODELS
  169. # "LlamaForCausalLM", # Registered in _TEXT_GENERATION_MODELS
  170. # "LLaMAForCausalLM", # Registered in _TEXT_GENERATION_MODELS
  171. # "MistralForCausalLM", # Registered in _TEXT_GENERATION_MODELS
  172. # "XverseForCausalLM", # Registered in _TEXT_GENERATION_MODELS
  173. "MistralModel",
  174. "ModernBertModel",
  175. "NomicBertModel",
  176. # "Phi3ForCausalLM", # Registered in _TEXT_GENERATION_MODELS
  177. "Qwen2Model",
  178. # "Qwen2ForCausalLM", # Registered in _TEXT_GENERATION_MODELS
  179. "Qwen2ForRewardModel",
  180. "Qwen2ForProcessRewardModel",
  181. "RobertaForMaskedLM",
  182. "RobertaModel",
  183. # "TeleChat2ForCausalLM", # Registered in _TEXT_GENERATION_MODELS
  184. "VoyageQwen3BidirectionalEmbedModel",
  185. "XLMRobertaModel",
  186. "BgeM3EmbeddingModel",
  187. # [Multimodal]
  188. # "LlavaNextForConditionalGeneration", # Registered in _TEXT_GENERATION_MODELS
  189. # "Phi3VForCausalLM",
  190. # "Qwen2VLForConditionalGeneration", # Registered in _TEXT_GENERATION_MODELS
  191. "CLIPModel",
  192. "ColModernVBertForRetrieval",
  193. "ColQwen3",
  194. "OpsColQwen3Model",
  195. "Qwen3VLNemotronEmbedModel",
  196. "SiglipModel",
  197. "LlamaNemotronVLModel",
  198. "PrithviGeoSpatialMAE",
  199. "Terratorch",
  200. ]
  201. _CROSS_ENCODER_MODELS = [
  202. "BertForSequenceClassification",
  203. "BertForTokenClassification",
  204. "GteNewForSequenceClassification",
  205. "RobertaForSequenceClassification",
  206. "XLMRobertaForSequenceClassification",
  207. "ModernBertForSequenceClassification",
  208. "ModernBertForTokenClassification",
  209. "JinaVLForRanking",
  210. "LlamaBidirectionalForSequenceClassification",
  211. "LlamaNemotronVLForSequenceClassification",
  212. ]
  213. _MULTIMODAL_MODELS = [
  214. # [Decoder-only]
  215. "AriaForConditionalGeneration",
  216. "AudioFlamingo3ForConditionalGeneration",
  217. "MusicFlamingoForConditionalGeneration",
  218. "AyaVisionForConditionalGeneration",
  219. "BeeForConditionalGeneration",
  220. "Blip2ForConditionalGeneration",
  221. "ChameleonForConditionalGeneration",
  222. "Cohere2VisionForConditionalGeneration",
  223. "DeepseekVLV2ForCausalLM",
  224. "DeepseekOCRForCausalLM",
  225. "DeepseekOCR2ForCausalLM",
  226. "DotsOCRForCausalLM",
  227. "Eagle2_5_VLForConditionalGeneration",
  228. "Ernie4_5_VLMoeForConditionalGeneration",
  229. "FuyuForCausalLM",
  230. "Gemma3ForConditionalGeneration",
  231. "Gemma3nForConditionalGeneration",
  232. "GLM4VForCausalLM",
  233. "Glm4vForConditionalGeneration",
  234. "Glm4v_moeForConditionalGeneration",
  235. "Glm4vMoeForConditionalGeneration", # Note: New class for "Glm4v_moeForConditionalGeneration"
  236. "GlmOcrForConditionalGeneration",
  237. "H2OVLChatModel",
  238. "HunYuanVLForConditionalGeneration",
  239. "StepVLForConditionalGeneration",
  240. "InternVLChatModel",
  241. "NemotronH_Nano_VL_V2",
  242. "InternS1ForConditionalGeneration",
  243. "InternVLForConditionalGeneration",
  244. "InternS1ProForConditionalGeneration",
  245. "Idefics3ForConditionalGeneration",
  246. "IsaacForConditionalGeneration",
  247. "SmolVLMForConditionalGeneration",
  248. "KananaVForConditionalGeneration",
  249. "KeyeForConditionalGeneration",
  250. "KeyeVL1_5ForConditionalGeneration",
  251. "RForConditionalGeneration",
  252. "KimiVLForConditionalGeneration",
  253. "KimiK25ForConditionalGeneration",
  254. "LightOnOCRForConditionalGeneration",
  255. "Lfm2VlForConditionalGeneration",
  256. "Llama_Nemotron_Nano_VL",
  257. "Llama4ForConditionalGeneration",
  258. "LlavaForConditionalGeneration",
  259. "LlavaNextForConditionalGeneration",
  260. "LlavaNextVideoForConditionalGeneration",
  261. "LlavaOnevisionForConditionalGeneration",
  262. "MantisForConditionalGeneration",
  263. "MiDashengLMModel",
  264. "MiniMaxVL01ForConditionalGeneration",
  265. "MiniCPMO",
  266. "MiniCPMV",
  267. "Mistral3ForConditionalGeneration",
  268. "MolmoForCausalLM",
  269. "Molmo2ForConditionalGeneration",
  270. "NVLM_D",
  271. "OpenPanguVLForConditionalGeneration",
  272. "Ovis",
  273. "Ovis2_5",
  274. "Ovis2_6ForCausalLM",
  275. "Ovis2_6_MoeForCausalLM",
  276. "PaddleOCRVLForConditionalGeneration",
  277. "PaliGemmaForConditionalGeneration",
  278. "Phi3VForCausalLM",
  279. "Phi4MMForCausalLM",
  280. "Phi4MultimodalForCausalLM",
  281. "PixtralForConditionalGeneration",
  282. "QwenVLForConditionalGeneration",
  283. "Qwen2VLForConditionalGeneration",
  284. "Qwen2_5_VLForConditionalGeneration",
  285. "Qwen2AudioForConditionalGeneration",
  286. "Qwen2_5OmniModel",
  287. "Qwen2_5OmniForConditionalGeneration",
  288. "Qwen3OmniMoeForConditionalGeneration",
  289. "Qwen3VLForConditionalGeneration",
  290. "Qwen3VLMoeForConditionalGeneration",
  291. "Qwen3_5ForConditionalGeneration",
  292. "Qwen3_5MoeForConditionalGeneration",
  293. "UltravoxModel",
  294. "SkyworkR1VChatModel",
  295. "Step3VLForConditionalGeneration",
  296. "TarsierForConditionalGeneration",
  297. "Tarsier2ForConditionalGeneration",
  298. "VoxtralStreamingGeneration",
  299. # [Encoder-decoder]
  300. "Florence2ForConditionalGeneration",
  301. "MllamaForConditionalGeneration",
  302. "Llama4ForConditionalGeneration",
  303. "SkyworkR1VChatModel",
  304. "NemotronParseForConditionalGeneration",
  305. ]
  306. _SPEECH_TO_TEXT_MODELS = [
  307. "FireRedASR2ForConditionalGeneration",
  308. "FunASRForConditionalGeneration",
  309. "FunAudioChatForConditionalGeneration",
  310. "GlmAsrForConditionalGeneration",
  311. "GraniteSpeechForConditionalGeneration",
  312. "Qwen3ASRForConditionalGeneration",
  313. "Qwen3ASRRealtimeGeneration",
  314. "VoxtralForConditionalGeneration",
  315. "VoxtralRealtimeGeneration",
  316. "WhisperForConditionalGeneration",
  317. ]
  318. _TEXT_TO_SPEECH_MODELS = [
  319. "Qwen3TTSForConditionalGeneration",
  320. ]
  321. _TRANSFORMERS_SUPPORTED_MODELS = [
  322. # Text generation models
  323. "SmolLM3ForCausalLM",
  324. # Multimodal models
  325. "Emu3ForConditionalGeneration",
  326. ]
  327. _TRANSFORMERS_BACKEND_TEXT_GENERATION_MODELS = [
  328. "TransformersModel",
  329. "TransformersForCausalLM",
  330. "TransformersMoEForCausalLM",
  331. ]
  332. _TRANSFORMERS_BACKEND_MULTIMODAL_MODELS = [
  333. "TransformersForMultimodalLM",
  334. "TransformersMultiModalForCausalLM",
  335. "TransformersMultiModalMoEForCausalLM",
  336. ]
  337. _TRANSFORMERS_BACKEND_EMBEDDING_MODELS = [
  338. "TransformersEmbeddingModel",
  339. "TransformersMoEEmbeddingModel",
  340. "TransformersMultiModalEmbeddingModel",
  341. ]
  342. _TRANSFORMERS_BACKEND_CROSS_ENCODER_MODELS = [
  343. "TransformersForSequenceClassification",
  344. "TransformersMoEForSequenceClassification",
  345. "TransformersMultiModalForSequenceClassification",
  346. ]
  347. _LLM_MODELS = (
  348. _TEXT_GENERATION_MODELS
  349. + _MULTIMODAL_MODELS
  350. + _TRANSFORMERS_SUPPORTED_MODELS
  351. + _TRANSFORMERS_BACKEND_TEXT_GENERATION_MODELS
  352. + _TRANSFORMERS_BACKEND_MULTIMODAL_MODELS
  353. )
  354. _EMBEDDING_MODELS = _EMBEDDING_MODELS + _TRANSFORMERS_BACKEND_EMBEDDING_MODELS
  355. _RERANKER_MODELS = _CROSS_ENCODER_MODELS + _TRANSFORMERS_BACKEND_CROSS_ENCODER_MODELS
  356. def detect_model_type(architectures: List[str]) -> CategoryEnum:
  357. """
  358. Detect the model type based on the architectures.
  359. Args:
  360. architectures: List of model architecture names.
  361. Returns:
  362. The detected model category.
  363. """
  364. for architecture in architectures or []:
  365. if architecture in _EMBEDDING_MODELS:
  366. return CategoryEnum.EMBEDDING
  367. if architecture in _RERANKER_MODELS:
  368. return CategoryEnum.RERANKER
  369. if architecture in _SPEECH_TO_TEXT_MODELS:
  370. return CategoryEnum.SPEECH_TO_TEXT
  371. if architecture in _TEXT_TO_SPEECH_MODELS:
  372. return CategoryEnum.TEXT_TO_SPEECH
  373. if architecture in _LLM_MODELS:
  374. return CategoryEnum.LLM
  375. return CategoryEnum.UNKNOWN
  376. def is_multimodal_model(architectures: List[str]) -> bool:
  377. """
  378. Check if the model is a multimodal model based on the architectures.
  379. Args:
  380. architectures: List of model architecture names.
  381. Returns:
  382. True if the model is multimodal, False otherwise.
  383. """
  384. for architecture in architectures or []:
  385. if architecture in _MULTIMODAL_MODELS:
  386. return True
  387. return False