| 123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118 |
- from typing import List, Optional, Tuple
- from gpustack.schemas.benchmark import (
- GPUSnapshot,
- GPUSnapshots,
- ModelInstanceRuntimeInfo,
- ModelInstanceSnapshot,
- WorkerSnapshot,
- )
- from gpustack.schemas.models import Model, ModelInstance
- from gpustack.schemas.workers import Worker
- from gpustack.utils.gpu import make_gpu_id
- def create_model_instance_snapshot(
- model_instance: ModelInstance, model: Model
- ) -> ModelInstanceSnapshot:
- """Create a snapshot of the model instance."""
- subordinate_workers_snapshots: Optional[List[ModelInstanceRuntimeInfo]] = None
- if (
- model_instance.distributed_servers
- and model_instance.distributed_servers.subordinate_workers
- ):
- subordinate_workers_snapshots = []
- for subworker in model_instance.distributed_servers.subordinate_workers:
- subordinate_workers_snapshots.append(
- ModelInstanceRuntimeInfo(
- worker_id=subworker.worker_id,
- worker_name=subworker.worker_name,
- worker_ip=subworker.worker_ip,
- ports=subworker.ports,
- gpu_type=subworker.gpu_type,
- gpu_indexes=subworker.gpu_indexes,
- gpu_ids=[
- make_gpu_id(subworker.worker_name, subworker.gpu_type, idx)
- for idx in (subworker.gpu_indexes or [])
- ],
- computed_resource_claim=subworker.computed_resource_claim,
- )
- )
- return ModelInstanceSnapshot(
- id=model_instance.id,
- name=model_instance.name,
- state=model_instance.state,
- state_message=model_instance.state_message,
- worker_id=model_instance.worker_id,
- worker_name=model_instance.worker_name,
- worker_ip=model_instance.worker_ip,
- ports=model_instance.ports,
- gpu_type=model_instance.gpu_type,
- gpu_indexes=model_instance.gpu_indexes,
- gpu_ids=[
- make_gpu_id(model_instance.worker_name, model_instance.gpu_type, idx)
- for idx in (model_instance.gpu_indexes or [])
- ],
- computed_resource_claim=model_instance.computed_resource_claim,
- resolved_path=model_instance.resolved_path,
- subordinate_workers=subordinate_workers_snapshots,
- backend=model_instance.backend,
- backend_version=model_instance.backend_version,
- api_detected_backend_version=model_instance.api_detected_backend_version,
- backend_parameters=model.backend_parameters,
- injected_backend_parameters=model_instance.injected_backend_parameters,
- env=model.env,
- image_name=model.image_name,
- run_command=model.run_command,
- extended_kv_cache=model.extended_kv_cache,
- speculative_config=model.speculative_config,
- )
- def create_worker_snapshot(
- worker: Worker, gpu_type: str, gpu_indexes: List[int]
- ) -> Tuple[Optional[WorkerSnapshot], Optional[GPUSnapshots]]:
- worker_snapshot = WorkerSnapshot(
- id=worker.id,
- name=worker.name,
- os=worker.status.os if worker.status and worker.status.os else None,
- cpu_total=(
- worker.status.cpu.total if worker.status and worker.status.cpu else None
- ),
- memory_total=(
- worker.status.memory.total
- if worker.status and worker.status.memory
- else None
- ),
- )
- gpu_snapshots = None
- if worker.status and worker.status.gpu_devices:
- gpu_snapshots = {}
- for gpu_device in worker.status.gpu_devices:
- if gpu_device.type != gpu_type or gpu_device.index not in gpu_indexes:
- continue
- gpu_id = make_gpu_id(worker.name, gpu_device.type, gpu_device.index)
- gpu_snapshot = GPUSnapshot(
- id=gpu_id,
- worker_id=worker.id,
- worker_name=worker.name,
- vendor=gpu_device.vendor,
- type=gpu_device.type,
- index=gpu_device.index,
- device_index=gpu_device.device_index,
- device_chip_index=gpu_device.device_chip_index,
- arch_family=gpu_device.arch_family,
- name=gpu_device.name,
- uuid=gpu_device.uuid,
- driver_version=gpu_device.driver_version,
- runtime_version=gpu_device.runtime_version,
- compute_capability=gpu_device.compute_capability,
- core_total=gpu_device.core.total if gpu_device.core else None,
- memory_total=gpu_device.memory.total if gpu_device.memory else None,
- )
- gpu_snapshots[gpu_id] = gpu_snapshot
- return worker_snapshot, gpu_snapshots
|