| 123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183184185186187188189190191192193194195196197198199200201202203204205206207208209210211212213214215216217218219220221222223224225226227228229230231232233234235236237238239240241242243244245246247248249250251252253254255256257258259260261262263264265266267268269270271272273274275276277278279280281282283284285286287288289290291292293294295296297298299300301302303304305306307308309310311312313314315316317318319320321322323324325326327328329330331332333334335336337338339340341342343344345346347348349350351352353354355356357358359360361362363364365366367368369370371372373374375376377378379380381382383384385386387388389390391392393394395396397398399400401402403404405406407408409410411 |
- # Synced with https://github.com/vllm-project/vllm/blob/v0.17.1/vllm/model_executor/models/registry.py
- # Update these when the builtin vLLM is updated
- # List of supported model architectures for the default version of the vLLM backend
- # TODO version-aware support list
- from typing import List
- from gpustack.schemas.models import CategoryEnum
- _TEXT_GENERATION_MODELS = [
- # [Decoder-only]
- "ApertusForCausalLM",
- "AquilaModel",
- "AquilaForCausalLM",
- "ArceeForCausalLM",
- "ArcticForCausalLM",
- "AXK1ForCausalLM",
- "MiniMaxForCausalLM",
- "MiniMaxText01ForCausalLM",
- "MiniMaxM1ForCausalLM",
- "BaiChuanForCausalLM",
- "BaichuanForCausalLM",
- "BailingMoeForCausalLM",
- "BailingMoeV2ForCausalLM",
- "BailingMoeV2_5ForCausalLM",
- "BambaForCausalLM",
- "BloomForCausalLM",
- "ChatGLMModel",
- "ChatGLMForConditionalGeneration",
- "CohereForCausalLM",
- "Cohere2ForCausalLM",
- "CwmForCausalLM",
- "DbrxForCausalLM",
- "DeepseekForCausalLM",
- "DeepseekV2ForCausalLM",
- "DeepseekV3ForCausalLM",
- "DeepseekV32ForCausalLM",
- "Dots1ForCausalLM",
- "Ernie4_5_ForCausalLM",
- "Ernie4_5ForCausalLM", # Note: New class for "Ernie4_5_ForCausalLM"
- "Ernie4_5_MoeForCausalLM",
- "ExaoneForCausalLM",
- "Exaone4ForCausalLM",
- "ExaoneMoEForCausalLM",
- "Fairseq2LlamaForCausalLM",
- "FalconForCausalLM",
- "FalconMambaForCausalLM",
- "FalconH1ForCausalLM",
- "FlexOlmoForCausalLM",
- "GemmaForCausalLM",
- "Gemma2ForCausalLM",
- "Gemma3ForCausalLM",
- "Gemma3nForCausalLM",
- "Qwen3NextForCausalLM",
- "GlmForCausalLM",
- "Glm4ForCausalLM",
- "Glm4MoeForCausalLM",
- "Glm4MoeLiteForCausalLM",
- "GlmMoeDsaForCausalLM",
- "GptOssForCausalLM",
- "GPT2LMHeadModel",
- "GPTBigCodeForCausalLM",
- "GPTJForCausalLM",
- "GPTNeoXForCausalLM",
- "GraniteForCausalLM",
- "GraniteMoeForCausalLM",
- "GraniteMoeHybridForCausalLM",
- "GraniteMoeSharedForCausalLM",
- "GritLM",
- "Grok1ModelForCausalLM",
- "Grok1ForCausalLM",
- "HunYuanMoEV1ForCausalLM",
- "HunYuanDenseV1ForCausalLM",
- "HCXVisionForCausalLM",
- "InternLMForCausalLM",
- "InternLM2ForCausalLM",
- "InternLM2VEForCausalLM",
- "InternLM3ForCausalLM",
- "IQuestCoderForCausalLM",
- "IQuestLoopCoderForCausalLM",
- "JAISLMHeadModel",
- "JambaForCausalLM",
- "KimiLinearForCausalLM",
- "Lfm2ForCausalLM",
- "Lfm2MoeForCausalLM",
- "LlamaForCausalLM",
- "LLaMAForCausalLM",
- "Llama4ForCausalLM",
- "LongcatFlashForCausalLM",
- "MambaForCausalLM",
- "Mamba2ForCausalLM",
- "MiniCPMForCausalLM",
- "MiniCPM3ForCausalLM",
- "MiniMaxForCausalLM",
- "MiniMaxText01ForCausalLM",
- "MiniMaxM1ForCausalLM",
- "MiniMaxM2ForCausalLM",
- "MistralForCausalLM",
- "MixtralForCausalLM",
- "MotifForCausalLM",
- "QuantMixtralForCausalLM",
- "MptForCausalLM",
- "MPTForCausalLM",
- "MiMoForCausalLM",
- "MiMoV2FlashForCausalLM",
- "NemotronForCausalLM",
- "NemotronHForCausalLM",
- "NemotronHPuzzleForCausalLM",
- "OlmoForCausalLM",
- "Olmo2ForCausalLM",
- "Olmo3ForCausalLM",
- "OlmoeForCausalLM",
- "OPTForCausalLM",
- "OrionForCausalLM",
- "OuroForCausalLM",
- "PanguEmbeddedForCausalLM",
- "PanguProMoEV2ForCausalLM",
- "PanguUltraMoEForCausalLM",
- "PersimmonForCausalLM",
- "PhiForCausalLM",
- "Phi3ForCausalLM",
- "Phi3SmallForCausalLM",
- "PhiMoEForCausalLM",
- "Phi4FlashForCausalLM",
- "Plamo2ForCausalLM",
- "QWenLMHeadModel",
- "Qwen2ForCausalLM",
- "Qwen2MoeForCausalLM",
- "Qwen3ForCausalLM",
- "Qwen3MoeForCausalLM",
- "RWForCausalLM",
- "SeedOssForCausalLM",
- "Step1ForCausalLM",
- "Step3TextForCausalLM",
- "Step3p5ForCausalLM",
- "StableLMEpochForCausalLM",
- "StableLmForCausalLM",
- "Starcoder2ForCausalLM",
- "SolarForCausalLM",
- "TeleChat2ForCausalLM",
- "TeleFLMForCausalLM",
- "XverseForCausalLM",
- "Zamba2ForCausalLM",
- # [Encoder-decoder]
- "BartModel",
- "BartForConditionalGeneration",
- "MBartForConditionalGeneration",
- ]
- _EMBEDDING_MODELS = [
- # [Text-only]
- "BertModel",
- "BertSpladeSparseEmbeddingModel",
- "HF_ColBERT",
- "ColBERTModernBertModel",
- "ColBERTJinaRobertaModel",
- "DeciLMForCausalLM",
- "Gemma2Model",
- "Gemma3TextModel",
- # "GlmForCausalLM", # Registered in _TEXT_GENERATION_MODELS
- "GPT2ForSequenceClassification",
- # "GritLM",
- "GteModel",
- "GteNewModel",
- "InternLM2ForRewardModel",
- "JambaForSequenceClassification",
- "LlamaBidirectionalModel",
- "LlamaModel",
- # "AquilaModel", # Registered in _TEXT_GENERATION_MODELS
- # "AquilaForCausalLM", # Registered in _TEXT_GENERATION_MODELS
- # "InternLMForCausalLM", # Registered in _TEXT_GENERATION_MODELS
- # "InternLM3ForCausalLM", # Registered in _TEXT_GENERATION_MODELS
- # "LlamaForCausalLM", # Registered in _TEXT_GENERATION_MODELS
- # "LLaMAForCausalLM", # Registered in _TEXT_GENERATION_MODELS
- # "MistralForCausalLM", # Registered in _TEXT_GENERATION_MODELS
- # "XverseForCausalLM", # Registered in _TEXT_GENERATION_MODELS
- "MistralModel",
- "ModernBertModel",
- "NomicBertModel",
- # "Phi3ForCausalLM", # Registered in _TEXT_GENERATION_MODELS
- "Qwen2Model",
- # "Qwen2ForCausalLM", # Registered in _TEXT_GENERATION_MODELS
- "Qwen2ForRewardModel",
- "Qwen2ForProcessRewardModel",
- "RobertaForMaskedLM",
- "RobertaModel",
- # "TeleChat2ForCausalLM", # Registered in _TEXT_GENERATION_MODELS
- "VoyageQwen3BidirectionalEmbedModel",
- "XLMRobertaModel",
- "BgeM3EmbeddingModel",
- # [Multimodal]
- # "LlavaNextForConditionalGeneration", # Registered in _TEXT_GENERATION_MODELS
- # "Phi3VForCausalLM",
- # "Qwen2VLForConditionalGeneration", # Registered in _TEXT_GENERATION_MODELS
- "CLIPModel",
- "ColModernVBertForRetrieval",
- "ColQwen3",
- "OpsColQwen3Model",
- "Qwen3VLNemotronEmbedModel",
- "SiglipModel",
- "LlamaNemotronVLModel",
- "PrithviGeoSpatialMAE",
- "Terratorch",
- ]
- _CROSS_ENCODER_MODELS = [
- "BertForSequenceClassification",
- "BertForTokenClassification",
- "GteNewForSequenceClassification",
- "RobertaForSequenceClassification",
- "XLMRobertaForSequenceClassification",
- "ModernBertForSequenceClassification",
- "ModernBertForTokenClassification",
- "JinaVLForRanking",
- "LlamaBidirectionalForSequenceClassification",
- "LlamaNemotronVLForSequenceClassification",
- ]
- _MULTIMODAL_MODELS = [
- # [Decoder-only]
- "AriaForConditionalGeneration",
- "AudioFlamingo3ForConditionalGeneration",
- "MusicFlamingoForConditionalGeneration",
- "AyaVisionForConditionalGeneration",
- "BeeForConditionalGeneration",
- "Blip2ForConditionalGeneration",
- "ChameleonForConditionalGeneration",
- "Cohere2VisionForConditionalGeneration",
- "DeepseekVLV2ForCausalLM",
- "DeepseekOCRForCausalLM",
- "DeepseekOCR2ForCausalLM",
- "DotsOCRForCausalLM",
- "Eagle2_5_VLForConditionalGeneration",
- "Ernie4_5_VLMoeForConditionalGeneration",
- "FuyuForCausalLM",
- "Gemma3ForConditionalGeneration",
- "Gemma3nForConditionalGeneration",
- "GLM4VForCausalLM",
- "Glm4vForConditionalGeneration",
- "Glm4v_moeForConditionalGeneration",
- "Glm4vMoeForConditionalGeneration", # Note: New class for "Glm4v_moeForConditionalGeneration"
- "GlmOcrForConditionalGeneration",
- "H2OVLChatModel",
- "HunYuanVLForConditionalGeneration",
- "StepVLForConditionalGeneration",
- "InternVLChatModel",
- "NemotronH_Nano_VL_V2",
- "InternS1ForConditionalGeneration",
- "InternVLForConditionalGeneration",
- "InternS1ProForConditionalGeneration",
- "Idefics3ForConditionalGeneration",
- "IsaacForConditionalGeneration",
- "SmolVLMForConditionalGeneration",
- "KananaVForConditionalGeneration",
- "KeyeForConditionalGeneration",
- "KeyeVL1_5ForConditionalGeneration",
- "RForConditionalGeneration",
- "KimiVLForConditionalGeneration",
- "KimiK25ForConditionalGeneration",
- "LightOnOCRForConditionalGeneration",
- "Lfm2VlForConditionalGeneration",
- "Llama_Nemotron_Nano_VL",
- "Llama4ForConditionalGeneration",
- "LlavaForConditionalGeneration",
- "LlavaNextForConditionalGeneration",
- "LlavaNextVideoForConditionalGeneration",
- "LlavaOnevisionForConditionalGeneration",
- "MantisForConditionalGeneration",
- "MiDashengLMModel",
- "MiniMaxVL01ForConditionalGeneration",
- "MiniCPMO",
- "MiniCPMV",
- "Mistral3ForConditionalGeneration",
- "MolmoForCausalLM",
- "Molmo2ForConditionalGeneration",
- "NVLM_D",
- "OpenPanguVLForConditionalGeneration",
- "Ovis",
- "Ovis2_5",
- "Ovis2_6ForCausalLM",
- "Ovis2_6_MoeForCausalLM",
- "PaddleOCRVLForConditionalGeneration",
- "PaliGemmaForConditionalGeneration",
- "Phi3VForCausalLM",
- "Phi4MMForCausalLM",
- "Phi4MultimodalForCausalLM",
- "PixtralForConditionalGeneration",
- "QwenVLForConditionalGeneration",
- "Qwen2VLForConditionalGeneration",
- "Qwen2_5_VLForConditionalGeneration",
- "Qwen2AudioForConditionalGeneration",
- "Qwen2_5OmniModel",
- "Qwen2_5OmniForConditionalGeneration",
- "Qwen3OmniMoeForConditionalGeneration",
- "Qwen3VLForConditionalGeneration",
- "Qwen3VLMoeForConditionalGeneration",
- "Qwen3_5ForConditionalGeneration",
- "Qwen3_5MoeForConditionalGeneration",
- "UltravoxModel",
- "SkyworkR1VChatModel",
- "Step3VLForConditionalGeneration",
- "TarsierForConditionalGeneration",
- "Tarsier2ForConditionalGeneration",
- "VoxtralStreamingGeneration",
- # [Encoder-decoder]
- "Florence2ForConditionalGeneration",
- "MllamaForConditionalGeneration",
- "Llama4ForConditionalGeneration",
- "SkyworkR1VChatModel",
- "NemotronParseForConditionalGeneration",
- ]
- _SPEECH_TO_TEXT_MODELS = [
- "FireRedASR2ForConditionalGeneration",
- "FunASRForConditionalGeneration",
- "FunAudioChatForConditionalGeneration",
- "GlmAsrForConditionalGeneration",
- "GraniteSpeechForConditionalGeneration",
- "Qwen3ASRForConditionalGeneration",
- "Qwen3ASRRealtimeGeneration",
- "VoxtralForConditionalGeneration",
- "VoxtralRealtimeGeneration",
- "WhisperForConditionalGeneration",
- ]
- _TEXT_TO_SPEECH_MODELS = [
- "Qwen3TTSForConditionalGeneration",
- ]
- _TRANSFORMERS_SUPPORTED_MODELS = [
- # Text generation models
- "SmolLM3ForCausalLM",
- # Multimodal models
- "Emu3ForConditionalGeneration",
- ]
- _TRANSFORMERS_BACKEND_TEXT_GENERATION_MODELS = [
- "TransformersModel",
- "TransformersForCausalLM",
- "TransformersMoEForCausalLM",
- ]
- _TRANSFORMERS_BACKEND_MULTIMODAL_MODELS = [
- "TransformersForMultimodalLM",
- "TransformersMultiModalForCausalLM",
- "TransformersMultiModalMoEForCausalLM",
- ]
- _TRANSFORMERS_BACKEND_EMBEDDING_MODELS = [
- "TransformersEmbeddingModel",
- "TransformersMoEEmbeddingModel",
- "TransformersMultiModalEmbeddingModel",
- ]
- _TRANSFORMERS_BACKEND_CROSS_ENCODER_MODELS = [
- "TransformersForSequenceClassification",
- "TransformersMoEForSequenceClassification",
- "TransformersMultiModalForSequenceClassification",
- ]
- _LLM_MODELS = (
- _TEXT_GENERATION_MODELS
- + _MULTIMODAL_MODELS
- + _TRANSFORMERS_SUPPORTED_MODELS
- + _TRANSFORMERS_BACKEND_TEXT_GENERATION_MODELS
- + _TRANSFORMERS_BACKEND_MULTIMODAL_MODELS
- )
- _EMBEDDING_MODELS = _EMBEDDING_MODELS + _TRANSFORMERS_BACKEND_EMBEDDING_MODELS
- _RERANKER_MODELS = _CROSS_ENCODER_MODELS + _TRANSFORMERS_BACKEND_CROSS_ENCODER_MODELS
- def detect_model_type(architectures: List[str]) -> CategoryEnum:
- """
- Detect the model type based on the architectures.
- Args:
- architectures: List of model architecture names.
- Returns:
- The detected model category.
- """
- for architecture in architectures or []:
- if architecture in _EMBEDDING_MODELS:
- return CategoryEnum.EMBEDDING
- if architecture in _RERANKER_MODELS:
- return CategoryEnum.RERANKER
- if architecture in _SPEECH_TO_TEXT_MODELS:
- return CategoryEnum.SPEECH_TO_TEXT
- if architecture in _TEXT_TO_SPEECH_MODELS:
- return CategoryEnum.TEXT_TO_SPEECH
- if architecture in _LLM_MODELS:
- return CategoryEnum.LLM
- return CategoryEnum.UNKNOWN
- def is_multimodal_model(architectures: List[str]) -> bool:
- """
- Check if the model is a multimodal model based on the architectures.
- Args:
- architectures: List of model architecture names.
- Returns:
- True if the model is multimodal, False otherwise.
- """
- for architecture in architectures or []:
- if architecture in _MULTIMODAL_MODELS:
- return True
- return False
|