test_controller.py 3.8 KB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141
  1. from typing import List
  2. import pytest
  3. from gpustack.policies.base import ModelInstanceScore
  4. from gpustack.schemas.models import (
  5. ComputedResourceClaim,
  6. ModelInstanceStateEnum,
  7. )
  8. from gpustack.schemas.workers import WorkerStateEnum
  9. from gpustack.server.controllers import find_scale_down_candidates
  10. from tests.fixtures.workers.fixtures import (
  11. linux_nvidia_19_4090_24gx2,
  12. linux_nvidia_2_4080_16gx2,
  13. linux_cpu_1,
  14. )
  15. from unittest.mock import patch
  16. from tests.utils.mock import mock_async_session
  17. from tests.utils.model import new_model, new_model_instance
  18. @pytest.mark.asyncio
  19. async def test_find_scale_down_candidates():
  20. w1 = linux_nvidia_19_4090_24gx2()
  21. w1.state = WorkerStateEnum.NOT_READY
  22. workers = [
  23. w1,
  24. linux_nvidia_2_4080_16gx2(),
  25. linux_cpu_1(),
  26. ]
  27. m = new_model(1, "test", 3, "Meta-Llama-3-70B-Instruct-GGUF")
  28. mis = [
  29. new_model_instance(
  30. 1,
  31. "test-1",
  32. 1,
  33. 4,
  34. ModelInstanceStateEnum.RUNNING,
  35. [0, 1],
  36. ComputedResourceClaim(
  37. is_unified_memory=False,
  38. offload_layers=81,
  39. total_layers=81,
  40. ram=455165112,
  41. vram={0: 22912443392, 1: 22911897600},
  42. ),
  43. ),
  44. new_model_instance(
  45. 2,
  46. "test-2",
  47. 1,
  48. 3,
  49. ModelInstanceStateEnum.RUNNING,
  50. [0, 1],
  51. ComputedResourceClaim(
  52. is_unified_memory=False,
  53. offload_layers=60,
  54. total_layers=81,
  55. ram=1093245112,
  56. vram={0: 16900820992, 1: 16900820992},
  57. ),
  58. ),
  59. new_model_instance(
  60. 3,
  61. "test-3",
  62. 1,
  63. 6,
  64. ModelInstanceStateEnum.RUNNING,
  65. None,
  66. ComputedResourceClaim(
  67. is_unified_memory=False,
  68. offload_layers=0,
  69. total_layers=81,
  70. ram=3106511032,
  71. ),
  72. ),
  73. ]
  74. with (
  75. patch(
  76. 'gpustack.schemas.models.ModelInstance.all_by_field',
  77. return_value=mis,
  78. ),
  79. patch(
  80. 'gpustack.schemas.models.ModelInstance.all',
  81. return_value=mis,
  82. ),
  83. patch(
  84. 'gpustack.schemas.workers.Worker.all',
  85. return_value=workers,
  86. ),
  87. patch(
  88. 'gpustack.policies.scorers.placement_scorer.async_session',
  89. return_value=mock_async_session(),
  90. ),
  91. patch(
  92. 'gpustack.policies.scorers.status_scorer.async_session',
  93. return_value=mock_async_session(),
  94. ),
  95. ):
  96. candidates = await find_scale_down_candidates(mis, m, total_max_score=100)
  97. expected_candidates = [
  98. {
  99. "worker_id": 4,
  100. "instacnce_id": 1,
  101. "gpu_indexes": [0, 1],
  102. "score": 9.538995598356342,
  103. },
  104. {
  105. "worker_id": 6,
  106. "instacnce_id": 3,
  107. "score": 90.1308159326069,
  108. },
  109. {
  110. "worker_id": 3,
  111. "instacnce_id": 2,
  112. "score": 97.3594505895714,
  113. },
  114. ]
  115. compare_candidates(candidates, expected_candidates)
  116. def compare_candidates(candidates: List[ModelInstanceScore], expected_candidates):
  117. for i, expected in enumerate(expected_candidates):
  118. candidate = candidates[i]
  119. instance = candidate.model_instance
  120. if "worker_id" in expected:
  121. assert instance.worker_id == expected["worker_id"]
  122. if "instance_id" in expected:
  123. assert instance.id == expected["instance_id"]
  124. if "score" in expected:
  125. assert str(candidate.score)[:5] == str(expected["score"])[:5]