| 123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183184185186187188189190191192193194195196197198199200201202203204205206207208209210211212213214215216217218219220221222223224225226227228229230231232233234235236237238239240241242243244245246247248249250251252253254255256257258259260261262263264265266267268269270271272273274275276277278279280281282283284285286287288289290291292293294295296297298299300301302303304305306307308309310311312313314315316317318319320321322323324325326327328329330331332333334335336337338339340341342343344345346347348349350351352353354355356357358359360361362363364365366367368369370371372373374375376377378379380381382383384385386387388389390391392393394395396397398399400401402403404405406407408409410411412413414415416417418419420421422423424425426427428429430431432433434435436437438439440441442443444445446447448449450451452453454455456457458459460461462463464465466467468469470471472473474475476477478479480481482483484485486487488489490491492493494495496497498499500501502503504505506507508509510511512513514515516517518519520521522523524525526527528529530531532533534535536537538539540541542543544545546547548549550551552553554555556557558559560561562563564565566567568569570571572573574575576577578579580581582583584585586587588589590591592593594595596597598599600601602603604605606607608609610611612613614615616617618619620621622623624625626627628629630631632633634635636637638639640641642643644645646647648649650651652653654655656657658659660661662663664665666667668669670671672673674675676677678679680681682683684685686687688689690691692693694695696697698699700701702703704705706707708709710711712713714715716717718719720721722723724725726727728729730731732733734735736737738739740741742743744745746747748749750751752753754755756757758759760761762763764765766767768769770771772773774775776777778779780781782783784785786787788789790791792793794795796797798799800801802803804805806807808809810811812813814815816817818819820821822823824825826827828829830831832833834835836837838839840841842843844845846847848849850851852853854855856857858859860861862863864865866867868869870871872873874875876877878879880881882883884885886887 |
- import argparse
- import asyncio
- import json
- import logging
- import multiprocessing
- import os
- import sys
- from typing import Any, Dict
- import yaml
- from gpustack import __version__, __git_commit__
- from gpustack.config.config import set_global_config
- from gpustack.extension import Plugin, iter_plugin_classes
- from gpustack.logging import setup_logging
- from gpustack.utils.envs import get_gpustack_env, get_gpustack_env_bool
- from gpustack.worker.worker import Worker
- from gpustack.config import Config
- from gpustack.server.server import Server
- from gpustack.gateway import initialize_gateway
- logger = logging.getLogger(__name__)
- class OptionalBoolAction(argparse.Action):
- def __init__(self, option_strings, dest, nargs=None, **kwargs):
- if nargs is not None:
- raise ValueError("nargs not allowed")
- super(OptionalBoolAction, self).__init__(
- option_strings, dest, nargs=0, **kwargs
- )
- self.default = kwargs.get("default")
- def __call__(self, parser, namespace, values, option_string=None):
- setattr(namespace, self.dest, True)
- def setup_start_cmd(subparsers: argparse._SubParsersAction):
- parser_server: argparse.ArgumentParser = subparsers.add_parser(
- "start",
- help="Run GPUStack server or worker.",
- description="Run GPUStack server or worker.",
- )
- start_cmd_options(parser_server)
- def start_cmd_options(parser_server: argparse.ArgumentParser):
- common_group = parser_server.add_argument_group("Common settings")
- common_group.add_argument(
- "--advertise-address",
- type=str,
- help="The IP address to expose for external access. If not set, the system will auto-detect a suitable local IP address.",
- default=get_gpustack_env("ADVERTISE_ADDRESS"),
- )
- common_group.add_argument(
- "--port",
- type=int,
- help="Port to bind the server to.",
- default=get_gpustack_env("PORT"),
- )
- common_group.add_argument(
- "--tls-port",
- type=int,
- help="Port to bind the TLS server to.",
- default=get_gpustack_env("TLS_PORT"),
- )
- common_group.add_argument(
- "--api-port",
- type=int,
- help="Port to bind the GPUStack API server to.",
- default=get_gpustack_env("API_PORT"),
- )
- common_group.add_argument(
- "--proxy-port",
- type=int,
- help="Port for the HTTP/HTTPS proxy server used in tunnel proxy mode to forward inference requests to workers behind firewalls or NAT via persistent WebSocket tunnels.",
- default=get_gpustack_env("PROXY_PORT"),
- )
- common_group.add_argument(
- "--config-file",
- type=str,
- help="Path to the YAML config file.",
- default=get_gpustack_env("CONFIG_FILE"),
- )
- common_group.add_argument(
- "-d",
- "--debug",
- action=OptionalBoolAction,
- help="Enable debug mode.",
- default=get_gpustack_env_bool("DEBUG"),
- )
- common_group.add_argument(
- "--data-dir",
- type=str,
- help="Directory to store data. The default is OS specific.",
- default=get_gpustack_env("DATA_DIR"),
- )
- common_group.add_argument(
- "--cache-dir",
- type=str,
- help="Directory to store cache (e.g., model files). The default is <data-dir>/cache.",
- default=get_gpustack_env("CACHE_DIR"),
- )
- common_group.add_argument(
- "--bin-dir",
- type=str,
- help="Directory to store additional binaries, e.g., versioned backend executables.",
- default=get_gpustack_env("BIN_DIR"),
- )
- common_group.add_argument(
- "--pipx-path",
- type=str,
- help="Path to the pipx executable, used to install versioned backends.",
- default=get_gpustack_env("PIPX_PATH"),
- )
- common_group.add_argument(
- "--huggingface-token",
- type=str,
- help="User Access Token to authenticate to the Hugging Face Hub.",
- default=os.getenv("HF_TOKEN"),
- )
- common_group.add_argument(
- "--system-default-container-registry",
- type=str,
- help="Default container registry for GPUStack to pull system and inference images. The default is 'docker.io'.",
- default=get_gpustack_env("SYSTEM_DEFAULT_CONTAINER_REGISTRY"),
- )
- common_group.add_argument(
- "--image-name-override",
- type=str,
- help="Override the default image name for the GPUStack container.",
- default=get_gpustack_env("IMAGE_NAME_OVERRIDE"),
- )
- common_group.add_argument(
- "--image-repo",
- type=str,
- help="Override the default image repository gpustack/gpustack for the GPUStack container.",
- default=get_gpustack_env("IMAGE_REPO"),
- )
- common_group.add_argument(
- "--benchmark-image-repo",
- type=str,
- help="Override the default benchmark image repository gpustack/benchmark-runner for the GPUStack benchmark container.",
- default=get_gpustack_env("BENCHMARK_IMAGE_REPO"),
- )
- common_group.add_argument(
- "--gateway-mode",
- type=str,
- help="Gateway running mode. Options: embedded, in-cluster, external, disabled, or auto (default).",
- default=get_gpustack_env("GATEWAY_MODE"),
- )
- common_group.add_argument(
- "--gateway-kubeconfig",
- type=str,
- help="Path to the kubeconfig file for gatway. Only useful for external gateway-mode.",
- default=get_gpustack_env("GATEWAY_KUBECONFIG"),
- )
- common_group.add_argument(
- "--gateway-namespace",
- type=str,
- help="The namespace where the gateway component is deployed.",
- default=get_gpustack_env("GATEWAY_NAMESPACE"),
- )
- common_group.add_argument(
- "--service-discovery-name",
- type=str,
- help="the name of the service discovery service in DNS. Only useful when deployed in Kubernetes with service discovery.",
- default=get_gpustack_env("SERVICE_DISCOVERY_NAME"),
- )
- common_group.add_argument(
- "--namespace",
- type=str,
- help="Kubernetes namespace for GPUStack to deploy gateway routing rules and model instances.",
- default=os.getenv("POD_NAMESPACE"),
- )
- server_group = parser_server.add_argument_group("Server settings")
- # Database settings
- server_group.add_argument(
- "--database-port",
- type=int,
- help="Port of the database. Example: 5432 for PostgreSQL.",
- default=get_gpustack_env("DATABASE_PORT"),
- )
- server_group.add_argument(
- "--metrics-port",
- type=int,
- help="Port to expose server metrics.",
- default=get_gpustack_env("METRICS_PORT"),
- )
- server_group.add_argument(
- "--database-url",
- type=str,
- help="URL of the database. Example: postgresql://user:password@hostname:port/db_name.",
- default=get_gpustack_env("DATABASE_URL"),
- )
- # Embedded worker settings
- server_group.add_argument(
- "--disable-worker",
- action=OptionalBoolAction,
- help="(DEPRECATED) Disable the embedded worker for the GPUStack server. New installations will not have the embedded worker by default. Use '--enable-worker' to enable the embedded worker if needed. If neither flag is set, for backward compatibility, the embedded worker will be enabled by default for legacy installations prior to v2.0.1.",
- default=get_gpustack_env_bool("DISABLE_WORKER"),
- )
- server_group.add_argument(
- "--enable-worker",
- action=OptionalBoolAction,
- help="Enable the embedded worker for the GPUStack server.",
- default=get_gpustack_env_bool("ENABLE_WORKER"),
- )
- # Server settings
- server_group.add_argument(
- "--disable-metrics",
- action=OptionalBoolAction,
- help="Disable server metrics.",
- default=get_gpustack_env_bool(
- "DISABLE_METRICS",
- ),
- )
- server_group.add_argument(
- "--bootstrap-password",
- type=str,
- help="Initial password for the default admin user. Random by default.",
- default=get_gpustack_env("BOOTSTRAP_PASSWORD"),
- )
- server_group.add_argument(
- "--ssl-keyfile",
- type=str,
- help="Path to the SSL key file.",
- default=get_gpustack_env("SSL_KEYFILE"),
- )
- server_group.add_argument(
- "--ssl-certfile",
- type=str,
- help="Path to the SSL certificate file.",
- default=get_gpustack_env("SSL_CERTFILE"),
- )
- server_group.add_argument(
- "--force-auth-localhost",
- action=OptionalBoolAction,
- help="Force authentication for requests originating from localhost (127.0.0.1)."
- "When set to True, all requests from localhost will require authentication.",
- default=get_gpustack_env_bool("FORCE_AUTH_LOCALHOST"),
- )
- server_group.add_argument(
- "--disable-update-check",
- action=OptionalBoolAction,
- help="Disable update check.",
- default=get_gpustack_env_bool("DISABLE_UPDATE_CHECK"),
- )
- server_group.add_argument(
- "--disable-openapi-docs",
- action=OptionalBoolAction,
- help="Disable autogenerated OpenAPI documentation endpoints (Swagger UI at /docs, ReDoc at /redoc, and the raw spec at /openapi.json).",
- default=get_gpustack_env_bool("DISABLE_OPENAPI_DOCS"),
- )
- server_group.add_argument(
- "--update-check-url",
- type=str,
- help=argparse.SUPPRESS,
- default=get_gpustack_env("UPDATE_CHECK_URL"),
- )
- server_group.add_argument(
- "--model-catalog-file",
- type=str,
- help="Path or URL to the model catalog file.",
- default=get_gpustack_env("MODEL_CATALOG_FILE"),
- )
- server_group.add_argument(
- "--server-external-url",
- type=str,
- help="External URL of the server. Should be set if the server is behind a reverse proxy.",
- default=get_gpustack_env("SERVER_EXTERNAL_URL"),
- )
- server_group.add_argument(
- "--gateway-concurrency",
- type=int,
- help="Number of concurrent connections for the embedded gateway. The default is 16.",
- default=get_gpustack_env("GATEWAY_CONCURRENCY"),
- )
- server_group.add_argument(
- "--gateway-plugin-server-url",
- type=str,
- help=argparse.SUPPRESS,
- default=get_gpustack_env("GATEWAY_PLUGIN_SERVER_URL"),
- )
- server_group.add_argument(
- "--gateway-ingress-class",
- type=str,
- help=argparse.SUPPRESS,
- default=get_gpustack_env("GATEWAY_INGRESS_CLASS"),
- )
- # Observability settings
- server_group.add_argument(
- "--disable-builtin-observability",
- action=OptionalBoolAction,
- help="Disable embedded Grafana and Prometheus services.",
- default=get_gpustack_env_bool("DISABLE_BUILTIN_OBSERVABILITY"),
- )
- server_group.add_argument(
- "--builtin-prometheus-port",
- type=int,
- help="Port for the embedded Prometheus service. Default is 19090.",
- default=get_gpustack_env("BUILTIN_PROMETHEUS_PORT"),
- )
- server_group.add_argument(
- "--builtin-grafana-port",
- type=int,
- help="Port for the embedded Grafana service. Default is 13000.",
- default=get_gpustack_env("BUILTIN_GRAFANA_PORT"),
- )
- server_group.add_argument(
- "--grafana-url",
- type=str,
- help="Grafana base URL for dashboard redirects and proxying. Must be browser-reachable (not a container-only hostname). If set, embedded Grafana and Prometheus will be disabled. Only required for external Grafana.",
- default=get_gpustack_env("GRAFANA_URL"),
- )
- server_group.add_argument(
- "--grafana-worker-dashboard-uid",
- type=str,
- help="Grafana dashboard UID for worker dashboard redirects.",
- default=get_gpustack_env("GRAFANA_WORKER_DASHBOARD_UID"),
- )
- server_group.add_argument(
- "--grafana-model-dashboard-uid",
- type=str,
- help="Grafana dashboard UID for model dashboard redirects.",
- default=get_gpustack_env("GRAFANA_MODEL_DASHBOARD_UID"),
- )
- # CORS settings
- server_group.add_argument(
- "--enable-cors",
- action=OptionalBoolAction,
- help="Enable Cross-Origin Resource Sharing (CORS) on the server.",
- default=get_gpustack_env_bool("ENABLE_CORS"),
- )
- server_group.add_argument(
- "--allow-credentials",
- action=OptionalBoolAction,
- help="Allow cookies and credentials in cross-origin requests.",
- default=get_gpustack_env_bool("ALLOW_CREDENTIALS"),
- )
- server_group.add_argument(
- "--allow-origins",
- action='append',
- help='Origins allowed for cross-origin requests. Specify the flag multiple times for multiple origins. Example: --allow-origins https://example.com --allow-origins https://api.example.com. Default: ["*"] (all origins allowed).',
- )
- server_group.add_argument(
- "--allow-methods",
- action='append',
- help='HTTP methods allowed in cross-origin requests. Specify the flag multiple times for multiple methods. Example: --allow-methods GET --allow-methods POST. Default: ["GET", "POST"].',
- )
- server_group.add_argument(
- "--allow-headers",
- action='append',
- help='HTTP request headers allowed in cross-origin requests. Specify the flag multiple times for multiple headers. Example: --allow-headers Authorization --allow-headers X-API-Key --allow-headers Content-Type. Default: ["Authorization", "Content-Type", "X-API-Key"].',
- )
- # OIDC settings
- server_group.add_argument(
- "--oidc-issuer",
- type=str,
- help="The issuer URL of the OIDC provider. OIDC discovery under `<issuer>/.well-known/openid-configuration` will be used to discover the OIDC configuration.",
- default=get_gpustack_env("OIDC_ISSUER"),
- )
- server_group.add_argument(
- "--oidc-client-id",
- type=str,
- help="OIDC client ID.",
- default=get_gpustack_env("OIDC_CLIENT_ID"),
- )
- server_group.add_argument(
- "--oidc-client-secret",
- type=str,
- help="OIDC client secret.",
- default=get_gpustack_env("OIDC_CLIENT_SECRET"),
- )
- server_group.add_argument(
- "--oidc-redirect-uri",
- type=str,
- help="The redirect URI configured in your OIDC application. This must be set to `<server-url>/auth/oidc/callback`.",
- default=get_gpustack_env("OIDC_REDIRECT_URI"),
- )
- server_group.add_argument(
- "--oidc-skip-userinfo",
- action=OptionalBoolAction,
- help="Skip using the UserInfo endpoint and retrieve user details from the ID token.",
- default=get_gpustack_env_bool("OIDC_SKIP_USERINFO"),
- )
- server_group.add_argument(
- "--oidc-use-userinfo",
- action=OptionalBoolAction,
- help="[Deprecated] Use the UserInfo endpoint to fetch user details after authentication.",
- default=get_gpustack_env_bool("OIDC_USE_USERINFO"),
- )
- # SAML settings
- server_group.add_argument(
- "--saml-idp-server-url",
- type=str,
- help="SAML IdP server URL.",
- default=get_gpustack_env("SAML_IDP_SERVER_URL"),
- )
- server_group.add_argument(
- "--saml-idp-logout-url",
- type=str,
- help="SAML IdP Single Logout endpoint URL.",
- default=get_gpustack_env("SAML_IDP_LOGOUT_URL"),
- )
- server_group.add_argument(
- "--saml-idp-entity-id",
- type=str,
- help="SAML IdP entity ID.",
- default=get_gpustack_env("SAML_IDP_ENTITY_ID"),
- )
- server_group.add_argument(
- "--saml-idp-x509-cert",
- type=str,
- help="SAML IdP X.509 certificate.",
- default=get_gpustack_env("SAML_IDP_X509_CERT"),
- )
- server_group.add_argument(
- "--saml-sp-entity-id",
- type=str,
- help="SAML SP entity ID.",
- default=get_gpustack_env("SAML_SP_ENTITY_ID"),
- )
- server_group.add_argument(
- "--saml-sp-acs-url",
- type=str,
- help="SAML SP Assertion Consumer Service(ACS) URL. It should be set to `<server-url>/auth/saml/callback`.",
- default=get_gpustack_env("SAML_SP_ACS_URL"),
- )
- server_group.add_argument(
- "--saml-sp-slo-url",
- type=str,
- help="SAML SP Single Logout Service URL. It can be set to `<server-url>/auth/saml/logout/callback` if you need to receive LogoutResponse.",
- default=get_gpustack_env("SAML_SP_SLO_URL"),
- )
- server_group.add_argument(
- "--saml-sp-x509-cert",
- type=str,
- help="SAML SP X.509 certificate.",
- default=get_gpustack_env("SAML_SP_X509_CERT"),
- )
- server_group.add_argument(
- "--saml-sp-private-key",
- type=str,
- help="SAML SP private key.",
- default=get_gpustack_env("SAML_SP_PRIVATE_KEY"),
- )
- server_group.add_argument(
- "--saml-sp-attribute-prefix",
- type=str,
- help="SAML Service Provider attribute prefix, which is used for fetching the attributes that are specified by --external-auth-*. e.g., 'http://schemas.auth0.com/'.",
- default=get_gpustack_env("SAML_SP_ATTRIBUTE_PREFIX"),
- )
- server_group.add_argument(
- "--saml-security",
- type=str,
- help="SAML security settings in JSON.",
- default=get_gpustack_env("SAML_SECURITY"),
- )
- # External Authentication settings
- server_group.add_argument(
- "--external-auth-name",
- type=str,
- help="Mapping of external authentication user information to username, e.g., 'preferred_username'. For SAML, you must configure the full attribute name like 'http://schemas.xmlsoap.org/ws/2005/05/identity/claims/emailaddress' or simplify with 'emailaddress' by '--saml-sp-attribute-prefix'.",
- default=get_gpustack_env("EXTERNAL_AUTH_NAME"),
- )
- server_group.add_argument(
- "--external-auth-full-name",
- type=str,
- help="Mapping of external authentication user information to user's full name. Multiple elements can be combined, e.g., 'name' or 'firstName+lastName'. For SAML, you must configure the full attribute name like 'http://schemas.xmlsoap.org/ws/2005/05/identity/claims/name' or simplify with 'name' by '--saml-sp-attribute-prefix'.",
- default=get_gpustack_env("EXTERNAL_AUTH_FULL_NAME"),
- )
- server_group.add_argument(
- "--external-auth-avatar-url",
- type=str,
- help="Mapping of external authentication user information to user's avatar URL. e.g.,'picture'. For SAML, you must configure the full attribute name like 'http://schemas.auth0.com/picture' or simplify with 'picture' by '--saml-sp-attribute-prefix'.",
- default=get_gpustack_env("EXTERNAL_AUTH_AVATAR_URL"),
- )
- server_group.add_argument(
- "--external-auth-default-inactive",
- action=OptionalBoolAction,
- help="Set newly created externally authenticated users inactive by default.",
- default=get_gpustack_env_bool("EXTERNAL_AUTH_DEFAULT_INACTIVE"),
- )
- server_group.add_argument(
- "--external-auth-post-logout-redirect-key",
- type=str,
- help="Generic key for post-logout redirection across IdPs.",
- default=get_gpustack_env("EXTERNAL_AUTH_POST_LOGOUT_REDIRECT_KEY"),
- )
- worker_group = parser_server.add_argument_group("Worker settings")
- worker_group.add_argument(
- "-t",
- "--token",
- type=str,
- help="Shared secret used to add a worker.",
- default=get_gpustack_env("TOKEN"),
- )
- worker_group.add_argument(
- "-s",
- "--server-url",
- type=str,
- help="Server to connect to.",
- default=get_gpustack_env("SERVER_URL"),
- )
- worker_group.add_argument(
- "--worker-ip",
- type=str,
- help="IP address of the worker node. Auto-detected by default.",
- default=get_gpustack_env("WORKER_IP"),
- )
- worker_group.add_argument(
- "--worker-ifname",
- type=str,
- help="Network interface name of the worker node. Auto-detected by default.",
- default=get_gpustack_env("WORKER_IFNAME"),
- )
- worker_group.add_argument(
- "--worker-name",
- type=str,
- help="Name of the worker node. Use the hostname by default.",
- default=get_gpustack_env("WORKER_NAME"),
- )
- worker_group.add_argument(
- "--worker-port",
- type=int,
- help="Port to bind the worker to.",
- default=get_gpustack_env("WORKER_PORT"),
- )
- worker_group.add_argument(
- "--service-port-range",
- type=str,
- help="Port range for inference services, specified as a string in the form 'N1-N2'. Both ends of the range are inclusive. The default is '40000-40063'.",
- default=get_gpustack_env("SERVICE_PORT_RANGE"),
- )
- worker_group.add_argument(
- "--ray-port-range",
- type=str,
- help="Port range for Ray services(vLLM distributed deployment using), specified as a string in the form 'N1-N2'. Both ends of the range are inclusive. The default is '41000-41999'.",
- default=get_gpustack_env("RAY_PORT_RANGE"),
- )
- worker_group.add_argument(
- "--benchmark-max-duration-seconds",
- type=int,
- help="Max duration for a benchmark before timeout. Disabled when unset.",
- default=get_gpustack_env("BENCHMARK_MAX_DURATION_SECONDS"),
- )
- worker_group.add_argument(
- "--disable-worker-metrics",
- action=OptionalBoolAction,
- help="Disable worker metrics.",
- default=get_gpustack_env_bool(
- "DISABLE_WORKER_METRICS",
- ),
- )
- worker_group.add_argument(
- "--worker-metrics-port",
- type=int,
- help="Port to expose worker metrics.",
- default=get_gpustack_env("WORKER_METRICS_PORT"),
- )
- worker_group.add_argument(
- "--log-dir",
- type=str,
- help="Directory to store logs.",
- default=get_gpustack_env("LOG_DIR"),
- )
- worker_group.add_argument(
- "--benchmark-dir",
- type=str,
- help="Directory to store benchmark results.",
- default=get_gpustack_env("BENCHMARK_DIR"),
- )
- worker_group.add_argument(
- "--system-reserved",
- type=json.loads,
- help="The system reserves resources during scheduling, measured in GiB. \
- Where RAM is reserved per worker, and VRAM is reserved per GPU device. \
- By default, no resources are reserved. \
- Example: '{\"ram\": 2, \"vram\": 1}' or '{\"memory\": 2, \"gpu_memory\": 1}', \
- Note: The 'memory' and 'gpu_memory' keys are deprecated and will be removed in future releases.",
- default=get_gpustack_env("SYSTEM_RESERVED"),
- )
- worker_group.add_argument(
- "--tools-download-base-url",
- type=str,
- help=argparse.SUPPRESS,
- default=get_gpustack_env("TOOLS_DOWNLOAD_BASE_URL"),
- )
- worker_group.add_argument(
- "--enable-hf-transfer",
- action=OptionalBoolAction,
- help="[Deprecated] hf_transfer support was removed in huggingface_hub v1.0; this flag is a no-op.",
- default=os.getenv("HF_HUB_ENABLE_HF_TRANSFER"),
- )
- worker_group.add_argument(
- "--enable-hf-xet",
- action=OptionalBoolAction,
- help="[Deprecated] Enable downloading model files using Hugging Face Xet.",
- )
- worker_group.add_argument(
- "--proxy-mode",
- type=str,
- help="Proxy mode for server accessing model instances: "
- "direct (server connects directly) or worker (via worker proxy). "
- "Default value is direct for embedded worker, and worker for standalone worker.",
- )
- # Allow plugins to set up start command arguments
- _setup_plugin_start_args(parser_server)
- parser_server.set_defaults(func=run)
- def _setup_plugin_start_args(parser: argparse.ArgumentParser):
- """Allow plugins to set up start command arguments.
- Called at CLI-parse time — no FastAPI app or ``Config`` exists yet —
- so we invoke the classmethod ``Plugin.setup_start_cmd`` on the plugin
- *class* directly, without instantiating it.
- """
- for name, plugin_class in iter_plugin_classes():
- if not (isinstance(plugin_class, type) and issubclass(plugin_class, Plugin)):
- continue
- try:
- plugin_class.setup_start_cmd(parser)
- logger.debug(f"Set up start args from plugin: {name}")
- except Exception as e:
- raise RuntimeError(
- f"Failed to set up CLI args from plugin '{name}': {e}"
- ) from e
- def _contribute_plugin_config(args: argparse.Namespace, config_data: dict):
- """Allow plugins to forward their CLI args into the ``Config`` kwargs.
- Plugin-contributed argparse fields land on ``args`` but are not picked
- up by core's ``set_*_options`` whitelists; this hook bridges the gap so
- plugins can expose their settings on the resulting ``cfg`` (which uses
- Pydantic ``extra="allow"``).
- """
- for name, plugin_class in iter_plugin_classes():
- if not (isinstance(plugin_class, type) and issubclass(plugin_class, Plugin)):
- continue
- try:
- plugin_class.contribute_config(args, config_data)
- except Exception as e:
- raise RuntimeError(
- f"Failed to contribute config from plugin '{name}': {e}"
- ) from e
- def run(args: argparse.Namespace):
- try:
- cfg = parse_args(args)
- setup_logging(cfg.debug)
- debug_env_info()
- set_third_party_env(cfg=cfg)
- start_tracemalloc_if_debug(cfg)
- initialize_gateway(cfg)
- multiprocessing.set_start_method('spawn')
- logger.info(f"GPUStack version: {__version__} ({__git_commit__})")
- if cfg.server_url:
- run_worker(cfg)
- else:
- check_database_available(cfg)
- run_server(cfg)
- except Exception as e:
- logger.exception(e)
- sys.exit(1)
- def check_database_available(cfg):
- """Check if the database is reachable before starting the server."""
- from gpustack.utils.db import test_db_connection
- db_url = cfg.get_database_url()
- if not test_db_connection(db_url):
- logger.error(
- f"Cannot connect to database at {db_url}. "
- "GPUStack requires a PostgreSQL database. "
- "You can start one quickly with Docker:\n"
- " docker run -d --name gpustack-db "
- "-e POSTGRES_USER=root -e POSTGRES_HOST_AUTH_METHOD=trust "
- "-e POSTGRES_DB=gpustack -p 5432:5432 postgres:16"
- )
- sys.exit(1)
- def run_server(cfg: Config):
- server = Server(
- config=cfg,
- worker_process=multiprocessing.Process(target=run_worker, args=(cfg,)),
- )
- try:
- asyncio.run(server.start())
- except (KeyboardInterrupt, asyncio.CancelledError):
- pass
- except Exception:
- raise
- finally:
- logger.info("Server has shut down.")
- def run_worker(cfg: Config):
- set_global_config(cfg)
- worker = Worker(cfg)
- worker.start()
- def load_config_from_yaml(yaml_file: str) -> Dict[str, Any]:
- with open(yaml_file, "r") as file:
- return yaml.safe_load(file)
- def parse_args(args: argparse.Namespace) -> Config:
- config_data = {}
- if args.config_file:
- config_data.update(load_config_from_yaml(args.config_file))
- # CLI args have higher priority than config file
- set_common_options(args, config_data)
- set_server_options(args, config_data)
- set_worker_options(args, config_data)
- _contribute_plugin_config(args, config_data)
- try:
- cfg = Config(**config_data)
- except Exception as e:
- raise Exception(f"Config error: {e}")
- set_global_config(cfg)
- return cfg
- def set_config_option(args, config_data: dict, option_name: str):
- option_value = getattr(args, option_name, None)
- if option_value is not None:
- config_data[option_name] = option_value
- def set_common_options(args, config_data: dict):
- options = [
- "debug",
- "data_dir",
- "cache_dir",
- "bin_dir",
- "pipx_path",
- "huggingface_token",
- "system_default_container_registry",
- "image_name_override",
- "image_repo",
- "benchmark_image_repo",
- "advertise_address",
- "port",
- "tls_port",
- "api_port",
- "proxy_port",
- "gateway_mode",
- "gateway_kubeconfig",
- "gateway_namespace",
- "service_discovery_name",
- "namespace",
- ]
- for option in options:
- set_config_option(args, config_data, option)
- def set_server_options(args, config_data: dict):
- options = [
- "metrics_port",
- "database_port",
- "disable_metrics",
- "database_url",
- "disable_worker",
- "enable_worker",
- "bootstrap_password",
- "ssl_keyfile",
- "ssl_certfile",
- "force_auth_localhost",
- "disable_update_check",
- "disable_openapi_docs",
- "update_check_url",
- "model_catalog_file",
- "enable_cors",
- "allow_origins",
- "allow_credentials",
- "allow_methods",
- "allow_headers",
- "external_auth_name",
- "external_auth_full_name",
- "external_auth_avatar_url",
- "external_auth_default_inactive",
- "oidc_issuer",
- "oidc_client_id",
- "oidc_client_secret",
- "oidc_redirect_uri",
- "external_auth_post_logout_redirect_key",
- "oidc_skip_userinfo",
- "oidc_use_userinfo",
- "saml_idp_server_url",
- "saml_idp_logout_url",
- "saml_idp_entity_id",
- "saml_idp_x509_cert",
- "saml_sp_entity_id",
- "saml_sp_acs_url",
- "saml_sp_slo_url",
- "saml_sp_x509_cert",
- "saml_sp_private_key",
- "saml_sp_attribute_prefix",
- "saml_security",
- "server_external_url",
- "gateway_concurrency",
- "gateway_plugin_server_url",
- "gateway_ingress_class",
- "disable_builtin_observability",
- "builtin_prometheus_port",
- "builtin_grafana_port",
- "grafana_url",
- "grafana_worker_dashboard_uid",
- "grafana_model_dashboard_uid",
- ]
- for option in options:
- set_config_option(args, config_data, option)
- def set_worker_options(args, config_data: dict):
- options = [
- "token",
- "server_url",
- "worker_ip",
- "worker_ifname",
- "worker_name",
- "worker_port",
- "disable_worker_metrics",
- "worker_metrics_port",
- "service_port_range",
- "ray_port_range",
- "benchmark_max_duration_seconds",
- "log_dir",
- "benchmark_dir",
- "system_reserved",
- "tools_download_base_url",
- "enable_hf_transfer",
- "proxy_mode",
- ]
- for option in options:
- set_config_option(args, config_data, option)
- def debug_env_info():
- hf_endpoint = os.getenv("HF_ENDPOINT")
- if hf_endpoint:
- logger.debug(f"Using HF_ENDPOINT: {hf_endpoint}")
- def start_tracemalloc_if_debug(cfg: Config):
- if cfg.debug:
- import tracemalloc
- tracemalloc.start()
- logger.debug("tracemalloc started for memory profiling")
- def set_third_party_env(cfg: Config):
- if cfg.enable_hf_transfer:
- logger.warning(
- "enable_hf_transfer is deprecated and ignored: hf_transfer support was "
- "removed in huggingface_hub v1.0. hf_xet is now the default downloader."
- )
|