offload_layer_scorer.py 1.8 KB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657
  1. import logging
  2. from typing import List
  3. from gpustack.policies.base import ModelInstanceScore, ModelInstanceScorer
  4. from gpustack.schemas.models import Model, ModelInstance
  5. logger = logging.getLogger(__name__)
  6. class OffloadLayerScorer(ModelInstanceScorer):
  7. def __init__(self, model: Model, max_score: float = 100.0):
  8. self._model = model
  9. self._max_score = max_score
  10. async def score_instances(
  11. self, instances: List[ModelInstance]
  12. ) -> List[ModelInstanceScore]:
  13. """
  14. Score the instances with offload layers.
  15. """
  16. logger.debug(
  17. f"model {self._model.name}, score instances with offload layer policy"
  18. )
  19. scored_instances = []
  20. for instance in instances:
  21. if instance.computed_resource_claim is None:
  22. scored_instances.append(
  23. ModelInstanceScore(model_instance=instance, score=0)
  24. )
  25. continue
  26. if (
  27. instance.computed_resource_claim.total_layers is None
  28. or instance.computed_resource_claim.offload_layers is None
  29. ):
  30. scored_instances.append(
  31. ModelInstanceScore(model_instance=instance, score=0)
  32. )
  33. continue
  34. score = 0
  35. total_layers = instance.computed_resource_claim.total_layers
  36. offload_layers = instance.computed_resource_claim.offload_layers
  37. if total_layers == offload_layers:
  38. score = self._max_score
  39. else:
  40. score = offload_layers / total_layers * self._max_score
  41. scored_instances.append(
  42. ModelInstanceScore(model_instance=instance, score=score)
  43. )
  44. return scored_instances