model.py 2.1 KB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687
  1. from gpustack.schemas.models import (
  2. CategoryEnum,
  3. GPUSelector,
  4. Model,
  5. ModelInstance,
  6. ModelInstanceStateEnum,
  7. PlacementStrategyEnum,
  8. SourceEnum,
  9. )
  10. def new_model_instance(
  11. id,
  12. name,
  13. model_id,
  14. worker_id=None,
  15. state=ModelInstanceStateEnum.PENDING,
  16. gpu_indexes=None,
  17. computed_resource_claim=None,
  18. ) -> ModelInstance:
  19. return ModelInstance(
  20. id=id,
  21. name=name,
  22. worker_id=worker_id,
  23. model_id=model_id,
  24. model_name="test",
  25. state=state,
  26. gpu_indexes=gpu_indexes,
  27. computed_resource_claim=computed_resource_claim,
  28. )
  29. def new_model(
  30. id,
  31. name,
  32. replicas=1,
  33. huggingface_repo_id=None,
  34. model_scope_model_id=None,
  35. placement_strategy=PlacementStrategyEnum.BINPACK,
  36. distributable=True,
  37. extended_kv_cache=None,
  38. categories=[CategoryEnum.LLM],
  39. cpu_offloading=True,
  40. distributed_inference_across_workers=True,
  41. **kargs,
  42. ) -> Model:
  43. if huggingface_repo_id is not None:
  44. source = SourceEnum.HUGGING_FACE
  45. if model_scope_model_id is not None:
  46. source = SourceEnum.MODEL_SCOPE
  47. return Model(
  48. id=id,
  49. name=name,
  50. replicas=replicas,
  51. ready_replicas=0,
  52. source=source,
  53. huggingface_repo_id=huggingface_repo_id,
  54. model_scope_model_id=model_scope_model_id,
  55. distributable=distributable,
  56. placement_strategy=placement_strategy,
  57. cpu_offloading=cpu_offloading,
  58. distributed_inference_across_workers=distributed_inference_across_workers,
  59. categories=categories,
  60. extended_kv_cache=extended_kv_cache,
  61. **kargs,
  62. )
  63. def make_model(
  64. gpus_per_replica=2, gpu_ids=None, repo_id="Qwen/Qwen2.5-7B-Instruct", **kwargs
  65. ):
  66. gpu_selector = None
  67. if gpu_ids is not None:
  68. gpu_selector = GPUSelector(
  69. gpu_ids=gpu_ids,
  70. gpus_per_replica=gpus_per_replica,
  71. )
  72. return new_model(
  73. 1,
  74. "test_name",
  75. 1,
  76. huggingface_repo_id=repo_id,
  77. gpu_selector=gpu_selector,
  78. **kwargs,
  79. )