model_meta.py 3.3 KB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118
  1. import logging
  2. import requests
  3. from typing import Any, Dict
  4. from gpustack.schemas.models import (
  5. BackendEnum,
  6. ModelInstance,
  7. Model,
  8. CategoryEnum,
  9. )
  10. logger = logging.getLogger(__name__)
  11. def get_meta_from_running_instance(
  12. mi: ModelInstance, backend: str, model: Model
  13. ) -> Dict[str, Any]:
  14. """
  15. Get the meta information from the running instance (synchronous version).
  16. """
  17. if backend == BackendEnum.SGLANG and CategoryEnum.IMAGE in model.categories:
  18. # SGLang Diffusion does not provide metadata endpoints at the moment.
  19. return {}
  20. meta_path = "/v1/models"
  21. if backend == BackendEnum.ASCEND_MINDIE:
  22. # Ref: https://www.hiascend.com/document/detail/zh/mindie/21RC2/mindieservice/servicedev/mindie_service0066.html
  23. meta_path = "/info"
  24. try:
  25. url = f"http://{mi.worker_ip}:{mi.port}{meta_path}"
  26. response = requests.get(url, timeout=1)
  27. response.raise_for_status()
  28. response_json = response.json()
  29. if backend == BackendEnum.ASCEND_MINDIE:
  30. model_meta = parse_tgi_info_meta(response_json)
  31. else:
  32. model_meta = parse_v1_models_meta(response_json)
  33. return model_meta
  34. except Exception as e:
  35. logger.warning(f"Failed to get meta from running instance {mi.name}: {e}")
  36. return {}
  37. def parse_v1_models_meta(response_json: Dict[str, Any]) -> Dict[str, Any]:
  38. """
  39. Parse the meta information from the /v1/models response.
  40. """
  41. if "data" not in response_json or not response_json["data"]:
  42. return {}
  43. first_model = response_json["data"][0]
  44. meta_info = first_model.get("meta", {})
  45. # Optional keys from different backends
  46. optional_keys = [
  47. "voices",
  48. "max_model_len",
  49. ]
  50. for key in optional_keys:
  51. if key in first_model:
  52. meta_info[key] = first_model[key]
  53. return meta_info
  54. def parse_tgi_info_meta(response_json: Dict[str, Any]) -> Dict[str, Any]:
  55. """
  56. Parse the meta information from the TGI-like /info response.
  57. Example:
  58. {
  59. "docker_label": null,
  60. "max_batch_total_tokens": 8192,
  61. "max_best_of": 1,
  62. "max_concurrent_requests": 200,
  63. "max_stop_sequences": null,
  64. "max_waiting_tokens": null,
  65. "sha": null,
  66. "validation_workers": null,
  67. "version": "1.0.0",
  68. "waiting_served_ratio": null,
  69. "models": [
  70. {
  71. "model_device_type": "npu",
  72. "model_dtype": "float16",
  73. "model_id": "deepseek",
  74. "model_pipeline_tag": "text-generation",
  75. "model_sha": null,
  76. "max_total_tokens": 2560
  77. }
  78. ],
  79. "max_input_length": 2048
  80. }
  81. """
  82. meta_info = {}
  83. if "models" in response_json and response_json["models"]:
  84. first_model = response_json["models"][0]
  85. meta_info.update(first_model)
  86. # Optional keys from TGI-like backends
  87. optional_keys = [
  88. "max_batch_total_tokens",
  89. "max_best_of",
  90. "max_concurrent_requests",
  91. "max_stop_sequences",
  92. "max_waiting_tokens",
  93. "max_input_length",
  94. ]
  95. for key in optional_keys:
  96. if key in response_json:
  97. meta_info[key] = response_json[key]
  98. return meta_info