model_evaluations.py 1.5 KB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748
  1. from fastapi import APIRouter, Request
  2. from gpustack.api.tenant import assert_cluster_visible
  3. from gpustack.config.config import Config
  4. from gpustack.scheduler.evaluator import evaluate_models
  5. from gpustack.schemas.clusters import Cluster
  6. from gpustack.schemas.model_evaluations import (
  7. ModelEvaluationRequest,
  8. ModelEvaluationResponse,
  9. )
  10. from gpustack.api.exceptions import (
  11. InternalServerErrorException,
  12. )
  13. from gpustack.server.deps import SessionDep, TenantContextDep
  14. router = APIRouter()
  15. @router.post("", response_model=ModelEvaluationResponse)
  16. async def create_model_evaluation(
  17. request: Request,
  18. session: SessionDep,
  19. ctx: TenantContextDep,
  20. model_evaluation_in: ModelEvaluationRequest,
  21. ):
  22. config: Config = request.app.state.server_config
  23. model_specs = model_evaluation_in.model_specs
  24. # If a specific cluster was named, gate access through cluster visibility.
  25. if model_evaluation_in.cluster_id is not None:
  26. cluster = await Cluster.one_by_id(session, model_evaluation_in.cluster_id)
  27. assert_cluster_visible(ctx, cluster, not_found_message="Cluster not found")
  28. try:
  29. results = await evaluate_models(
  30. cluster_id=model_evaluation_in.cluster_id,
  31. config=config,
  32. session=session,
  33. model_specs=model_specs,
  34. )
  35. except Exception as e:
  36. raise InternalServerErrorException(
  37. message=f"Failed to evaluate model compatibility: {e}"
  38. )
  39. return ModelEvaluationResponse(results=results)