snapshot.py 4.7 KB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118
  1. from typing import List, Optional, Tuple
  2. from gpustack.schemas.benchmark import (
  3. GPUSnapshot,
  4. GPUSnapshots,
  5. ModelInstanceRuntimeInfo,
  6. ModelInstanceSnapshot,
  7. WorkerSnapshot,
  8. )
  9. from gpustack.schemas.models import Model, ModelInstance
  10. from gpustack.schemas.workers import Worker
  11. from gpustack.utils.gpu import make_gpu_id
  12. def create_model_instance_snapshot(
  13. model_instance: ModelInstance, model: Model
  14. ) -> ModelInstanceSnapshot:
  15. """Create a snapshot of the model instance."""
  16. subordinate_workers_snapshots: Optional[List[ModelInstanceRuntimeInfo]] = None
  17. if (
  18. model_instance.distributed_servers
  19. and model_instance.distributed_servers.subordinate_workers
  20. ):
  21. subordinate_workers_snapshots = []
  22. for subworker in model_instance.distributed_servers.subordinate_workers:
  23. subordinate_workers_snapshots.append(
  24. ModelInstanceRuntimeInfo(
  25. worker_id=subworker.worker_id,
  26. worker_name=subworker.worker_name,
  27. worker_ip=subworker.worker_ip,
  28. ports=subworker.ports,
  29. gpu_type=subworker.gpu_type,
  30. gpu_indexes=subworker.gpu_indexes,
  31. gpu_ids=[
  32. make_gpu_id(subworker.worker_name, subworker.gpu_type, idx)
  33. for idx in (subworker.gpu_indexes or [])
  34. ],
  35. computed_resource_claim=subworker.computed_resource_claim,
  36. )
  37. )
  38. return ModelInstanceSnapshot(
  39. id=model_instance.id,
  40. name=model_instance.name,
  41. state=model_instance.state,
  42. state_message=model_instance.state_message,
  43. worker_id=model_instance.worker_id,
  44. worker_name=model_instance.worker_name,
  45. worker_ip=model_instance.worker_ip,
  46. ports=model_instance.ports,
  47. gpu_type=model_instance.gpu_type,
  48. gpu_indexes=model_instance.gpu_indexes,
  49. gpu_ids=[
  50. make_gpu_id(model_instance.worker_name, model_instance.gpu_type, idx)
  51. for idx in (model_instance.gpu_indexes or [])
  52. ],
  53. computed_resource_claim=model_instance.computed_resource_claim,
  54. resolved_path=model_instance.resolved_path,
  55. subordinate_workers=subordinate_workers_snapshots,
  56. backend=model_instance.backend,
  57. backend_version=model_instance.backend_version,
  58. api_detected_backend_version=model_instance.api_detected_backend_version,
  59. backend_parameters=model.backend_parameters,
  60. injected_backend_parameters=model_instance.injected_backend_parameters,
  61. env=model.env,
  62. image_name=model.image_name,
  63. run_command=model.run_command,
  64. extended_kv_cache=model.extended_kv_cache,
  65. speculative_config=model.speculative_config,
  66. )
  67. def create_worker_snapshot(
  68. worker: Worker, gpu_type: str, gpu_indexes: List[int]
  69. ) -> Tuple[Optional[WorkerSnapshot], Optional[GPUSnapshots]]:
  70. worker_snapshot = WorkerSnapshot(
  71. id=worker.id,
  72. name=worker.name,
  73. os=worker.status.os if worker.status and worker.status.os else None,
  74. cpu_total=(
  75. worker.status.cpu.total if worker.status and worker.status.cpu else None
  76. ),
  77. memory_total=(
  78. worker.status.memory.total
  79. if worker.status and worker.status.memory
  80. else None
  81. ),
  82. )
  83. gpu_snapshots = None
  84. if worker.status and worker.status.gpu_devices:
  85. gpu_snapshots = {}
  86. for gpu_device in worker.status.gpu_devices:
  87. if gpu_device.type != gpu_type or gpu_device.index not in gpu_indexes:
  88. continue
  89. gpu_id = make_gpu_id(worker.name, gpu_device.type, gpu_device.index)
  90. gpu_snapshot = GPUSnapshot(
  91. id=gpu_id,
  92. worker_id=worker.id,
  93. worker_name=worker.name,
  94. vendor=gpu_device.vendor,
  95. type=gpu_device.type,
  96. index=gpu_device.index,
  97. device_index=gpu_device.device_index,
  98. device_chip_index=gpu_device.device_chip_index,
  99. arch_family=gpu_device.arch_family,
  100. name=gpu_device.name,
  101. uuid=gpu_device.uuid,
  102. driver_version=gpu_device.driver_version,
  103. runtime_version=gpu_device.runtime_version,
  104. compute_capability=gpu_device.compute_capability,
  105. core_total=gpu_device.core.total if gpu_device.core else None,
  106. memory_total=gpu_device.memory.total if gpu_device.memory else None,
  107. )
  108. gpu_snapshots[gpu_id] = gpu_snapshot
  109. return worker_snapshot, gpu_snapshots