Maas2-group
/
maas-base


			
							123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687
							from gpustack.schemas.models import (
    CategoryEnum,
    GPUSelector,
    Model,
    ModelInstance,
    ModelInstanceStateEnum,
    PlacementStrategyEnum,
    SourceEnum,
)


def new_model_instance(
    id,
    name,
    model_id,
    worker_id=None,
    state=ModelInstanceStateEnum.PENDING,
    gpu_indexes=None,
    computed_resource_claim=None,
) -> ModelInstance:
    return ModelInstance(
        id=id,
        name=name,
        worker_id=worker_id,
        model_id=model_id,
        model_name="test",
        state=state,
        gpu_indexes=gpu_indexes,
        computed_resource_claim=computed_resource_claim,
    )


def new_model(
    id,
    name,
    replicas=1,
    huggingface_repo_id=None,
    model_scope_model_id=None,
    placement_strategy=PlacementStrategyEnum.BINPACK,
    distributable=True,
    extended_kv_cache=None,
    categories=[CategoryEnum.LLM],
    cpu_offloading=True,
    distributed_inference_across_workers=True,
    **kargs,
) -> Model:
    if huggingface_repo_id is not None:
        source = SourceEnum.HUGGING_FACE
    if model_scope_model_id is not None:
        source = SourceEnum.MODEL_SCOPE

    return Model(
        id=id,
        name=name,
        replicas=replicas,
        ready_replicas=0,
        source=source,
        huggingface_repo_id=huggingface_repo_id,
        model_scope_model_id=model_scope_model_id,
        distributable=distributable,
        placement_strategy=placement_strategy,
        cpu_offloading=cpu_offloading,
        distributed_inference_across_workers=distributed_inference_across_workers,
        categories=categories,
        extended_kv_cache=extended_kv_cache,
        **kargs,
    )


def make_model(
    gpus_per_replica=2, gpu_ids=None, repo_id="Qwen/Qwen2.5-7B-Instruct", **kwargs
):
    gpu_selector = None
    if gpu_ids is not None:
        gpu_selector = GPUSelector(
            gpu_ids=gpu_ids,
            gpus_per_replica=gpus_per_replica,
        )

    return new_model(
        1,
        "test_name",
        1,
        huggingface_repo_id=repo_id,
        gpu_selector=gpu_selector,
        **kwargs,
    )