Maas2-group
/
maas-base


			
							123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118
							import logging
import requests
from typing import Any, Dict
from gpustack.schemas.models import (
    BackendEnum,
    ModelInstance,
    Model,
    CategoryEnum,
)

logger = logging.getLogger(__name__)


def get_meta_from_running_instance(
    mi: ModelInstance, backend: str, model: Model
) -> Dict[str, Any]:
    """
    Get the meta information from the running instance (synchronous version).
    """

    if backend == BackendEnum.SGLANG and CategoryEnum.IMAGE in model.categories:
        # SGLang Diffusion does not provide metadata endpoints at the moment.
        return {}

    meta_path = "/v1/models"
    if backend == BackendEnum.ASCEND_MINDIE:
        # Ref: https://www.hiascend.com/document/detail/zh/mindie/21RC2/mindieservice/servicedev/mindie_service0066.html
        meta_path = "/info"

    try:
        url = f"http://{mi.worker_ip}:{mi.port}{meta_path}"
        response = requests.get(url, timeout=1)
        response.raise_for_status()

        response_json = response.json()

        if backend == BackendEnum.ASCEND_MINDIE:
            model_meta = parse_tgi_info_meta(response_json)
        else:
            model_meta = parse_v1_models_meta(response_json)

        return model_meta
    except Exception as e:
        logger.warning(f"Failed to get meta from running instance {mi.name}: {e}")
        return {}


def parse_v1_models_meta(response_json: Dict[str, Any]) -> Dict[str, Any]:
    """
    Parse the meta information from the /v1/models response.
    """
    if "data" not in response_json or not response_json["data"]:
        return {}

    first_model = response_json["data"][0]
    meta_info = first_model.get("meta", {})

    # Optional keys from different backends
    optional_keys = [
        "voices",
        "max_model_len",
    ]
    for key in optional_keys:
        if key in first_model:
            meta_info[key] = first_model[key]

    return meta_info


def parse_tgi_info_meta(response_json: Dict[str, Any]) -> Dict[str, Any]:
    """
    Parse the meta information from the TGI-like /info response.

    Example:
    {
        "docker_label": null,
        "max_batch_total_tokens": 8192,
        "max_best_of": 1,
        "max_concurrent_requests": 200,
        "max_stop_sequences": null,
        "max_waiting_tokens": null,
        "sha": null,
        "validation_workers": null,
        "version": "1.0.0",
        "waiting_served_ratio": null,
        "models": [
            {
                "model_device_type": "npu",
                "model_dtype": "float16",
                "model_id": "deepseek",
                "model_pipeline_tag": "text-generation",
                "model_sha": null,
                "max_total_tokens": 2560
            }
        ],
        "max_input_length": 2048
    }
    """
    meta_info = {}

    if "models" in response_json and response_json["models"]:
        first_model = response_json["models"][0]
        meta_info.update(first_model)

    # Optional keys from TGI-like backends
    optional_keys = [
        "max_batch_total_tokens",
        "max_best_of",
        "max_concurrent_requests",
        "max_stop_sequences",
        "max_waiting_tokens",
        "max_input_length",
    ]
    for key in optional_keys:
        if key in response_json:
            meta_info[key] = response_json[key]

    return meta_info