| 123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183184185186187188189190191192193194195196197198199200201202203204205206207208209210211212213214215216217218219220221222223224225226227228229230231232233234235236237238239240241242243244245246247248249250251252253254255256257258259260261262263264265266267268269270271272273274275276277278279280281282283284285286287288289290291292293294295296297298299300301302303304305306307308309310311312313314315316317318319320321322323324325326327328329330331332333334335336337338339340341342343344345346347348349350 |
- from dataclasses import dataclass
- from datetime import datetime
- from enum import Enum
- from typing import Any, ClassVar, Dict, List, Optional
- from pydantic import BaseModel
- from sqlalchemy import JSON, Column
- from sqlmodel import Field, ForeignKey, Integer, SQLModel, Text
- from gpustack.schemas.common import (
- ListParams,
- PaginatedList,
- pydantic_column_type,
- )
- from gpustack.mixins import BaseModelMixin
- from gpustack.schemas.models import (
- ComputedResourceClaim,
- ExtendedKVCacheConfig,
- SpeculativeConfig,
- )
- from gpustack.schemas.workers import GPUDeviceInfo, OperatingSystemInfo
- DATASET_RANDOM = "Random"
- DATASET_SHAREGPT = "ShareGPT"
- class BenchmarkStateEnum(str, Enum):
- r"""
- Enum for Benchmark State
- Transitions:
- |- - Server - -|- - - - - - - Worker - - - - - - -|
- | | |
- PENDING ---> ---> ---> QUEUED ---> RUNNING ---> COMPLETED/STOPPED/ERROR
- ^ ^
- | |
- |----------|
- |
- |(Worker unreachable)
- v
- UNREACHABLE
- """
- PENDING = "pending"
- QUEUED = "queued"
- RUNNING = "running"
- COMPLETED = "completed"
- STOPPED = "stopped"
- ERROR = "error"
- UNREACHABLE = "unreachable"
- def __str__(self):
- return self.value
- class ModelInstanceRuntimeInfo(BaseModel):
- computed_resource_claim: Optional[ComputedResourceClaim]
- ports: Optional[List[int]]
- worker_id: Optional[int] = None
- worker_name: Optional[str] = None
- worker_ip: Optional[str] = None
- gpu_type: Optional[str] = None
- gpu_indexes: Optional[List[int]] = None
- gpu_ids: Optional[List[str]] = None
- class ModelInstanceSnapshot(ModelInstanceRuntimeInfo):
- id: int
- name: str
- resolved_path: Optional[str] = None
- # resource info
- state: Optional[str] = None
- state_message: Optional[str] = None
- # backend info
- backend: Optional[str] = None
- backend_version: Optional[str] = None
- api_detected_backend_version: Optional[str] = None
- backend_parameters: Optional[List[str]] = Field(sa_type=JSON, default=None)
- injected_backend_parameters: Optional[List[str]] = Field(sa_type=JSON, default=None)
- image_name: Optional[str] = None
- run_command: Optional[str] = Field(sa_type=Text, default=None)
- env: Optional[Dict[str, str]] = Field(sa_type=JSON, default=None)
- # Extended KV Cache configuration. Currently maps to LMCache config in vLLM and SGLang.
- extended_kv_cache: Optional[ExtendedKVCacheConfig] = Field(
- sa_type=pydantic_column_type(ExtendedKVCacheConfig), default=None
- )
- speculative_config: Optional[SpeculativeConfig] = Field(
- sa_type=pydantic_column_type(SpeculativeConfig), default=None
- )
- # subordinate workers info
- subordinate_workers: Optional[List[ModelInstanceRuntimeInfo]] = None
- class WorkerSnapshot(BaseModel):
- id: int
- name: str
- cpu_total: Optional[int] = None
- memory_total: Optional[int] = None
- os: Optional[OperatingSystemInfo] = None
- class GPUSnapshot(GPUDeviceInfo):
- id: str
- worker_id: int
- worker_name: str
- memory_total: Optional[int] = None
- core_total: Optional[int] = None
- @dataclass
- class BenchmarkDeploymentMetadata:
- name: str
- labels: dict[str, str]
- class BenchmarkBase(SQLModel):
- name: str = Field(index=True, unique=True)
- description: Optional[str] = Field(
- sa_type=Text,
- nullable=True,
- default=None,
- )
- profile: Optional[str] = Field(default="Custom")
- dataset_name: Optional[str] = Field(
- default=None
- ) # denormalized field for easier query
- dataset_input_tokens: Optional[int] = Field(default=None)
- dataset_output_tokens: Optional[int] = Field(default=None)
- dataset_seed: Optional[int] = Field(default=None)
- cluster_id: int = Field(default=None)
- model_id: Optional[int] = Field(default=None)
- model_name: Optional[str] = Field(
- default=None
- ) # denormalized field for easier query
- model_instance_name: str
- request_rate: int = Field(default=10) # requests per second
- total_requests: Optional[int] = Field(
- default=None
- ) # total number of requests to send
- # Benchmark state fields
- state: BenchmarkStateEnum = Field(
- default=BenchmarkStateEnum.PENDING,
- index=True,
- )
- state_message: Optional[str] = Field(
- default=None, sa_column=Column(Text, nullable=True)
- )
- progress: Optional[float] = Field(default=None)
- worker_id: Optional[int] = Field(default=None)
- pid: Optional[int] = Field(default=None)
- def get_deployment_metadata(
- self,
- ) -> Optional[BenchmarkDeploymentMetadata]:
- """
- Get the deployment metadata for the benchmark.
- """
- return BenchmarkDeploymentMetadata(
- name=self.name,
- labels={
- "benchmark-name": self.name,
- "model-instance-name": self.model_instance_name or "",
- "type": "benchmark",
- },
- )
- ModelInstanceSnapshots = Dict[str, ModelInstanceSnapshot]
- WorkerSnapshots = Dict[str, WorkerSnapshot]
- GPUSnapshots = Dict[str, GPUSnapshot]
- class BenchmarkSnapshot(BaseModel):
- instances: Optional[ModelInstanceSnapshots] = None
- workers: Optional[WorkerSnapshots] = None
- gpus: Optional[GPUSnapshots] = None
- class BenchmarkMetricsLite(SQLModel):
- requests_per_second_mean: Optional[float] = Field(
- default=None, description="Mean requests per second (unit: req/s)"
- )
- request_latency_mean: Optional[float] = Field(
- default=None, description="Mean request latency (unit: seconds)"
- )
- time_per_output_token_mean: Optional[float] = Field(
- default=None, description="Mean time per output token (unit: ms)"
- )
- inter_token_latency_mean: Optional[float] = Field(
- default=None, description="Mean inter-token latency (unit: ms)"
- )
- time_to_first_token_mean: Optional[float] = Field(
- default=None, description="Mean time to first token (unit: ms)"
- )
- tokens_per_second_mean: Optional[float] = Field(
- default=None, description="Mean tokens per second (unit: tok/s)"
- )
- output_tokens_per_second_mean: Optional[float] = Field(
- default=None, description="Mean output tokens per second (unit: tok/s)"
- )
- input_tokens_per_second_mean: Optional[float] = Field(
- default=None, description="Mean prompt tokens per second (unit: tok/s)"
- )
- request_concurrency_mean: Optional[float] = Field(
- default=None,
- description="Mean request concurrency (unit: number of concurrent requests)",
- )
- request_concurrency_max: Optional[float] = Field(
- default=None,
- description="Max request concurrency (unit: number of concurrent requests)",
- )
- request_total: Optional[int] = Field(
- default=None, description="Total number of requests made"
- )
- request_successful: Optional[int] = Field(
- default=None, description="Total number of successful requests"
- )
- request_errored: Optional[int] = Field(
- default=None, description="Total number of errored requests"
- )
- request_incomplete: Optional[int] = Field(
- default=None, description="Total number of incomplete requests"
- )
- class BenchmarkMetrics(BenchmarkMetricsLite):
- raw_metrics: Optional[Dict[str, Any]] = Field(
- sa_column=Column(JSON), default=None
- ) # deferred loading of potentially large field
- class BenchmarkWithSnapshots(BenchmarkBase):
- snapshot: Optional[BenchmarkSnapshot] = Field(
- default=None,
- sa_column=Column(pydantic_column_type(BenchmarkSnapshot)),
- )
- gpu_summary: Optional[str] = Field(
- default=None, sa_column=Column(Text, nullable=True)
- )
- gpu_vendor_summary: Optional[str] = Field(
- default=None, sa_column=Column(Text, nullable=True)
- )
- class Benchmark(BenchmarkWithSnapshots, BenchmarkMetrics, BaseModelMixin, table=True):
- id: Optional[int] = Field(default=None, primary_key=True)
- # Tenant scope. Server-derived from cluster on creation.
- owner_principal_id: Optional[int] = Field(
- default=None,
- sa_column=Column(Integer, ForeignKey("principals.id"), nullable=True),
- )
- __tablename__ = 'benchmarks'
- class BenchmarkListParams(ListParams):
- sortable_fields: ClassVar[List[str]] = [
- "name",
- "dataset_name",
- "model_name",
- "state",
- "created_at",
- "updated_at",
- # metrics fields
- "requests_per_second_mean",
- "request_latency_mean",
- "time_per_output_token_mean",
- "inter_token_latency_mean",
- "time_to_first_token_mean",
- "tokens_per_second_mean",
- "output_tokens_per_second_mean",
- "input_tokens_per_second_mean",
- "request_concurrency_mean",
- "request_concurrency_max",
- "request_total",
- "request_successful",
- "request_errored",
- "request_incomplete",
- ]
- class BenchmarkCreate(BenchmarkBase):
- pass
- class BenchmarkUpdate(SQLModel):
- name: str = Field(index=True, unique=True)
- description: Optional[str] = Field(
- sa_type=Text,
- nullable=True,
- default=None,
- )
- class BenchmarkStateUpdate(SQLModel):
- state: Optional[BenchmarkStateEnum] = None
- state_message: Optional[str] = Field(
- default=None, sa_column=Column(Text, nullable=True)
- )
- pid: Optional[int] = Field(default=None)
- progress: Optional[float] = None
- class BenchmarkFullPublic(
- BenchmarkWithSnapshots,
- BenchmarkMetrics,
- ):
- id: int
- created_at: datetime
- updated_at: datetime
- gpu_summary: Optional[str] = Field(
- default=None, sa_column=Column(Text, nullable=True)
- )
- gpu_vendor_summary: Optional[str] = Field(
- default=None, sa_column=Column(Text, nullable=True)
- )
- class BenchmarkPublic(
- BenchmarkWithSnapshots,
- BenchmarkMetricsLite,
- ):
- id: int
- created_at: datetime
- updated_at: datetime
- gpu_summary: Optional[str] = Field(
- default=None, sa_column=Column(Text, nullable=True)
- )
- gpu_vendor_summary: Optional[str] = Field(
- default=None, sa_column=Column(Text, nullable=True)
- )
- BenchmarksPublic = PaginatedList[BenchmarkPublic]
|