start.py 32 KB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183184185186187188189190191192193194195196197198199200201202203204205206207208209210211212213214215216217218219220221222223224225226227228229230231232233234235236237238239240241242243244245246247248249250251252253254255256257258259260261262263264265266267268269270271272273274275276277278279280281282283284285286287288289290291292293294295296297298299300301302303304305306307308309310311312313314315316317318319320321322323324325326327328329330331332333334335336337338339340341342343344345346347348349350351352353354355356357358359360361362363364365366367368369370371372373374375376377378379380381382383384385386387388389390391392393394395396397398399400401402403404405406407408409410411412413414415416417418419420421422423424425426427428429430431432433434435436437438439440441442443444445446447448449450451452453454455456457458459460461462463464465466467468469470471472473474475476477478479480481482483484485486487488489490491492493494495496497498499500501502503504505506507508509510511512513514515516517518519520521522523524525526527528529530531532533534535536537538539540541542543544545546547548549550551552553554555556557558559560561562563564565566567568569570571572573574575576577578579580581582583584585586587588589590591592593594595596597598599600601602603604605606607608609610611612613614615616617618619620621622623624625626627628629630631632633634635636637638639640641642643644645646647648649650651652653654655656657658659660661662663664665666667668669670671672673674675676677678679680681682683684685686687688689690691692693694695696697698699700701702703704705706707708709710711712713714715716717718719720721722723724725726727728729730731732733734735736737738739740741742743744745746747748749750751752753754755756757758759760761762763764765766767768769770771772773774775776777778779780781782783784785786787788789790791792793794795796797798799800801802803804805806807808809810811812813814815816817818819820821822823824825826827828829830831832833834835836837838839840841842843844845846847848849850851852853854855856857858859860861862863864865866867868869870871872873874875876877878879880881882883884885886887888889890891892893894895896897898899900901902903904905906907908909910911912913914915916917918919920921922923924925926927928929
  1. import argparse
  2. import asyncio
  3. import json
  4. import logging
  5. import multiprocessing
  6. import os
  7. import sys
  8. from typing import Any, Dict
  9. import yaml
  10. from gpustack import __version__, __git_commit__
  11. from gpustack.config.config import set_global_config
  12. from gpustack.extension import Plugin, iter_plugin_classes
  13. from gpustack.logging import setup_logging
  14. from gpustack.utils.envs import get_gpustack_env, get_gpustack_env_bool
  15. from gpustack.worker.worker import Worker
  16. from gpustack.config import Config
  17. from gpustack.server.server import Server
  18. from gpustack.gateway import initialize_gateway
  19. logger = logging.getLogger(__name__)
  20. class OptionalBoolAction(argparse.Action):
  21. def __init__(self, option_strings, dest, nargs=None, **kwargs):
  22. if nargs is not None:
  23. raise ValueError("nargs not allowed")
  24. super(OptionalBoolAction, self).__init__(
  25. option_strings, dest, nargs=0, **kwargs
  26. )
  27. self.default = kwargs.get("default")
  28. def __call__(self, parser, namespace, values, option_string=None):
  29. setattr(namespace, self.dest, True)
  30. def setup_start_cmd(subparsers: argparse._SubParsersAction):
  31. parser_server: argparse.ArgumentParser = subparsers.add_parser(
  32. "start",
  33. help="Run GPUStack server or worker.",
  34. description="Run GPUStack server or worker.",
  35. )
  36. start_cmd_options(parser_server)
  37. def start_cmd_options(parser_server: argparse.ArgumentParser):
  38. common_group = parser_server.add_argument_group("Common settings")
  39. common_group.add_argument(
  40. "--advertise-address",
  41. type=str,
  42. help="The IP address to expose for external access. If not set, the system will auto-detect a suitable local IP address.",
  43. default=get_gpustack_env("ADVERTISE_ADDRESS"),
  44. )
  45. common_group.add_argument(
  46. "--port",
  47. type=int,
  48. help="Port to bind the server to.",
  49. default=get_gpustack_env("PORT"),
  50. )
  51. common_group.add_argument(
  52. "--tls-port",
  53. type=int,
  54. help="Port to bind the TLS server to.",
  55. default=get_gpustack_env("TLS_PORT"),
  56. )
  57. common_group.add_argument(
  58. "--api-port",
  59. type=int,
  60. help="Port to bind the GPUStack API server to.",
  61. default=get_gpustack_env("API_PORT"),
  62. )
  63. common_group.add_argument(
  64. "--proxy-port",
  65. type=int,
  66. help="Port for the HTTP/HTTPS proxy server used in tunnel proxy mode to forward inference requests to workers behind firewalls or NAT via persistent WebSocket tunnels.",
  67. default=get_gpustack_env("PROXY_PORT"),
  68. )
  69. common_group.add_argument(
  70. "--config-file",
  71. type=str,
  72. help="Path to the YAML config file.",
  73. default=get_gpustack_env("CONFIG_FILE"),
  74. )
  75. common_group.add_argument(
  76. "-d",
  77. "--debug",
  78. action=OptionalBoolAction,
  79. help="Enable debug mode.",
  80. default=get_gpustack_env_bool("DEBUG"),
  81. )
  82. common_group.add_argument(
  83. "--data-dir",
  84. type=str,
  85. help="Directory to store data. The default is OS specific.",
  86. default=get_gpustack_env("DATA_DIR"),
  87. )
  88. common_group.add_argument(
  89. "--cache-dir",
  90. type=str,
  91. help="Directory to store cache (e.g., model files). The default is <data-dir>/cache.",
  92. default=get_gpustack_env("CACHE_DIR"),
  93. )
  94. common_group.add_argument(
  95. "--bin-dir",
  96. type=str,
  97. help="Directory to store additional binaries, e.g., versioned backend executables.",
  98. default=get_gpustack_env("BIN_DIR"),
  99. )
  100. common_group.add_argument(
  101. "--pipx-path",
  102. type=str,
  103. help="Path to the pipx executable, used to install versioned backends.",
  104. default=get_gpustack_env("PIPX_PATH"),
  105. )
  106. common_group.add_argument(
  107. "--huggingface-token",
  108. type=str,
  109. help="User Access Token to authenticate to the Hugging Face Hub.",
  110. default=os.getenv("HF_TOKEN"),
  111. )
  112. common_group.add_argument(
  113. "--system-default-container-registry",
  114. type=str,
  115. help="Default container registry for GPUStack to pull system and inference images. The default is 'docker.io'.",
  116. default=get_gpustack_env("SYSTEM_DEFAULT_CONTAINER_REGISTRY"),
  117. )
  118. common_group.add_argument(
  119. "--image-name-override",
  120. type=str,
  121. help="Override the default image name for the GPUStack container.",
  122. default=get_gpustack_env("IMAGE_NAME_OVERRIDE"),
  123. )
  124. common_group.add_argument(
  125. "--image-repo",
  126. type=str,
  127. help="Override the default image repository gpustack/gpustack for the GPUStack container.",
  128. default=get_gpustack_env("IMAGE_REPO"),
  129. )
  130. common_group.add_argument(
  131. "--benchmark-image-repo",
  132. type=str,
  133. help="Override the default benchmark image repository gpustack/benchmark-runner for the GPUStack benchmark container.",
  134. default=get_gpustack_env("BENCHMARK_IMAGE_REPO"),
  135. )
  136. common_group.add_argument(
  137. "--gateway-mode",
  138. type=str,
  139. help="Gateway running mode. Options: embedded, in-cluster, external, disabled, or auto (default).",
  140. default=get_gpustack_env("GATEWAY_MODE"),
  141. )
  142. common_group.add_argument(
  143. "--gateway-kubeconfig",
  144. type=str,
  145. help="Path to the kubeconfig file for gatway. Only useful for external gateway-mode.",
  146. default=get_gpustack_env("GATEWAY_KUBECONFIG"),
  147. )
  148. common_group.add_argument(
  149. "--gateway-namespace",
  150. type=str,
  151. help="The namespace where the gateway component is deployed.",
  152. default=get_gpustack_env("GATEWAY_NAMESPACE"),
  153. )
  154. common_group.add_argument(
  155. "--service-discovery-name",
  156. type=str,
  157. help="the name of the service discovery service in DNS. Only useful when deployed in Kubernetes with service discovery.",
  158. default=get_gpustack_env("SERVICE_DISCOVERY_NAME"),
  159. )
  160. common_group.add_argument(
  161. "--namespace",
  162. type=str,
  163. help="Kubernetes namespace for GPUStack to deploy gateway routing rules and model instances.",
  164. default=os.getenv("POD_NAMESPACE"),
  165. )
  166. server_group = parser_server.add_argument_group("Server settings")
  167. # Database settings
  168. server_group.add_argument(
  169. "--database-port",
  170. type=int,
  171. help="Port of the database. Example: 5432 for PostgreSQL.",
  172. default=get_gpustack_env("DATABASE_PORT"),
  173. )
  174. server_group.add_argument(
  175. "--metrics-port",
  176. type=int,
  177. help="Port to expose server metrics.",
  178. default=get_gpustack_env("METRICS_PORT"),
  179. )
  180. server_group.add_argument(
  181. "--database-url",
  182. type=str,
  183. help="URL of the database. Example: postgresql://user:password@hostname:port/db_name.",
  184. default=get_gpustack_env("DATABASE_URL"),
  185. )
  186. # Embedded worker settings
  187. server_group.add_argument(
  188. "--disable-worker",
  189. action=OptionalBoolAction,
  190. help="(DEPRECATED) Disable the embedded worker for the GPUStack server. New installations will not have the embedded worker by default. Use '--enable-worker' to enable the embedded worker if needed. If neither flag is set, for backward compatibility, the embedded worker will be enabled by default for legacy installations prior to v2.0.1.",
  191. default=get_gpustack_env_bool("DISABLE_WORKER"),
  192. )
  193. server_group.add_argument(
  194. "--enable-worker",
  195. action=OptionalBoolAction,
  196. help="Enable the embedded worker for the GPUStack server.",
  197. default=get_gpustack_env_bool("ENABLE_WORKER"),
  198. )
  199. # Server settings
  200. server_group.add_argument(
  201. "--disable-metrics",
  202. action=OptionalBoolAction,
  203. help="Disable server metrics.",
  204. default=get_gpustack_env_bool(
  205. "DISABLE_METRICS",
  206. ),
  207. )
  208. server_group.add_argument(
  209. "--bootstrap-password",
  210. type=str,
  211. help="Initial password for the default admin user. Random by default.",
  212. default=get_gpustack_env("BOOTSTRAP_PASSWORD"),
  213. )
  214. server_group.add_argument(
  215. "--ssl-keyfile",
  216. type=str,
  217. help="Path to the SSL key file.",
  218. default=get_gpustack_env("SSL_KEYFILE"),
  219. )
  220. server_group.add_argument(
  221. "--ssl-certfile",
  222. type=str,
  223. help="Path to the SSL certificate file.",
  224. default=get_gpustack_env("SSL_CERTFILE"),
  225. )
  226. server_group.add_argument(
  227. "--force-auth-localhost",
  228. action=OptionalBoolAction,
  229. help="Force authentication for requests originating from localhost (127.0.0.1)."
  230. "When set to True, all requests from localhost will require authentication.",
  231. default=get_gpustack_env_bool("FORCE_AUTH_LOCALHOST"),
  232. )
  233. server_group.add_argument(
  234. "--disable-update-check",
  235. action=OptionalBoolAction,
  236. help="Disable update check.",
  237. default=get_gpustack_env_bool("DISABLE_UPDATE_CHECK"),
  238. )
  239. server_group.add_argument(
  240. "--disable-openapi-docs",
  241. action=OptionalBoolAction,
  242. help="Disable autogenerated OpenAPI documentation endpoints (Swagger UI at /docs, ReDoc at /redoc, and the raw spec at /openapi.json).",
  243. default=get_gpustack_env_bool("DISABLE_OPENAPI_DOCS"),
  244. )
  245. server_group.add_argument(
  246. "--update-check-url",
  247. type=str,
  248. help=argparse.SUPPRESS,
  249. default=get_gpustack_env("UPDATE_CHECK_URL"),
  250. )
  251. server_group.add_argument(
  252. "--model-catalog-file",
  253. type=str,
  254. help="Path or URL to the model catalog file.",
  255. default=get_gpustack_env("MODEL_CATALOG_FILE"),
  256. )
  257. server_group.add_argument(
  258. "--server-external-url",
  259. type=str,
  260. help="External URL of the server. Should be set if the server is behind a reverse proxy.",
  261. default=get_gpustack_env("SERVER_EXTERNAL_URL"),
  262. )
  263. server_group.add_argument(
  264. "--gateway-concurrency",
  265. type=int,
  266. help="Number of concurrent connections for the embedded gateway. The default is 16.",
  267. default=get_gpustack_env("GATEWAY_CONCURRENCY"),
  268. )
  269. server_group.add_argument(
  270. "--gateway-plugin-server-url",
  271. type=str,
  272. help=argparse.SUPPRESS,
  273. default=get_gpustack_env("GATEWAY_PLUGIN_SERVER_URL"),
  274. )
  275. server_group.add_argument(
  276. "--gateway-ingress-class",
  277. type=str,
  278. help=argparse.SUPPRESS,
  279. default=get_gpustack_env("GATEWAY_INGRESS_CLASS"),
  280. )
  281. # Observability settings
  282. server_group.add_argument(
  283. "--disable-builtin-observability",
  284. action=OptionalBoolAction,
  285. help="Disable embedded Grafana and Prometheus services.",
  286. default=get_gpustack_env_bool("DISABLE_BUILTIN_OBSERVABILITY"),
  287. )
  288. server_group.add_argument(
  289. "--builtin-prometheus-port",
  290. type=int,
  291. help="Port for the embedded Prometheus service. Default is 19090.",
  292. default=get_gpustack_env("BUILTIN_PROMETHEUS_PORT"),
  293. )
  294. server_group.add_argument(
  295. "--builtin-grafana-port",
  296. type=int,
  297. help="Port for the embedded Grafana service. Default is 13000.",
  298. default=get_gpustack_env("BUILTIN_GRAFANA_PORT"),
  299. )
  300. server_group.add_argument(
  301. "--grafana-url",
  302. type=str,
  303. help="Grafana base URL for dashboard redirects and proxying. Must be browser-reachable (not a container-only hostname). If set, embedded Grafana and Prometheus will be disabled. Only required for external Grafana.",
  304. default=get_gpustack_env("GRAFANA_URL"),
  305. )
  306. server_group.add_argument(
  307. "--grafana-worker-dashboard-uid",
  308. type=str,
  309. help="Grafana dashboard UID for worker dashboard redirects.",
  310. default=get_gpustack_env("GRAFANA_WORKER_DASHBOARD_UID"),
  311. )
  312. server_group.add_argument(
  313. "--grafana-model-dashboard-uid",
  314. type=str,
  315. help="Grafana dashboard UID for model dashboard redirects.",
  316. default=get_gpustack_env("GRAFANA_MODEL_DASHBOARD_UID"),
  317. )
  318. # CORS settings
  319. server_group.add_argument(
  320. "--enable-cors",
  321. action=OptionalBoolAction,
  322. help="Enable Cross-Origin Resource Sharing (CORS) on the server.",
  323. default=get_gpustack_env_bool("ENABLE_CORS"),
  324. )
  325. server_group.add_argument(
  326. "--allow-credentials",
  327. action=OptionalBoolAction,
  328. help="Allow cookies and credentials in cross-origin requests.",
  329. default=get_gpustack_env_bool("ALLOW_CREDENTIALS"),
  330. )
  331. server_group.add_argument(
  332. "--allow-origins",
  333. action='append',
  334. help='Origins allowed for cross-origin requests. Specify the flag multiple times for multiple origins. Example: --allow-origins https://example.com --allow-origins https://api.example.com. Default: ["*"] (all origins allowed).',
  335. )
  336. server_group.add_argument(
  337. "--allow-methods",
  338. action='append',
  339. help='HTTP methods allowed in cross-origin requests. Specify the flag multiple times for multiple methods. Example: --allow-methods GET --allow-methods POST. Default: ["GET", "POST"].',
  340. )
  341. server_group.add_argument(
  342. "--allow-headers",
  343. action='append',
  344. help='HTTP request headers allowed in cross-origin requests. Specify the flag multiple times for multiple headers. Example: --allow-headers Authorization --allow-headers X-API-Key --allow-headers Content-Type. Default: ["Authorization", "Content-Type", "X-API-Key"].',
  345. )
  346. # OIDC settings
  347. server_group.add_argument(
  348. "--oidc-issuer",
  349. type=str,
  350. help="The issuer URL of the OIDC provider. OIDC discovery under `<issuer>/.well-known/openid-configuration` will be used to discover the OIDC configuration.",
  351. default=get_gpustack_env("OIDC_ISSUER"),
  352. )
  353. server_group.add_argument(
  354. "--oidc-client-id",
  355. type=str,
  356. help="OIDC client ID.",
  357. default=get_gpustack_env("OIDC_CLIENT_ID"),
  358. )
  359. server_group.add_argument(
  360. "--oidc-client-secret",
  361. type=str,
  362. help="OIDC client secret.",
  363. default=get_gpustack_env("OIDC_CLIENT_SECRET"),
  364. )
  365. server_group.add_argument(
  366. "--oidc-redirect-uri",
  367. type=str,
  368. help="The redirect URI configured in your OIDC application. This must be set to `<server-url>/auth/oidc/callback`.",
  369. default=get_gpustack_env("OIDC_REDIRECT_URI"),
  370. )
  371. server_group.add_argument(
  372. "--oidc-skip-userinfo",
  373. action=OptionalBoolAction,
  374. help="Skip using the UserInfo endpoint and retrieve user details from the ID token.",
  375. default=get_gpustack_env_bool("OIDC_SKIP_USERINFO"),
  376. )
  377. server_group.add_argument(
  378. "--oidc-use-userinfo",
  379. action=OptionalBoolAction,
  380. help="[Deprecated] Use the UserInfo endpoint to fetch user details after authentication.",
  381. default=get_gpustack_env_bool("OIDC_USE_USERINFO"),
  382. )
  383. # SAML settings
  384. server_group.add_argument(
  385. "--saml-idp-server-url",
  386. type=str,
  387. help="SAML IdP server URL.",
  388. default=get_gpustack_env("SAML_IDP_SERVER_URL"),
  389. )
  390. server_group.add_argument(
  391. "--saml-idp-logout-url",
  392. type=str,
  393. help="SAML IdP Single Logout endpoint URL.",
  394. default=get_gpustack_env("SAML_IDP_LOGOUT_URL"),
  395. )
  396. server_group.add_argument(
  397. "--saml-idp-entity-id",
  398. type=str,
  399. help="SAML IdP entity ID.",
  400. default=get_gpustack_env("SAML_IDP_ENTITY_ID"),
  401. )
  402. server_group.add_argument(
  403. "--saml-idp-x509-cert",
  404. type=str,
  405. help="SAML IdP X.509 certificate.",
  406. default=get_gpustack_env("SAML_IDP_X509_CERT"),
  407. )
  408. server_group.add_argument(
  409. "--saml-sp-entity-id",
  410. type=str,
  411. help="SAML SP entity ID.",
  412. default=get_gpustack_env("SAML_SP_ENTITY_ID"),
  413. )
  414. server_group.add_argument(
  415. "--saml-sp-acs-url",
  416. type=str,
  417. help="SAML SP Assertion Consumer Service(ACS) URL. It should be set to `<server-url>/auth/saml/callback`.",
  418. default=get_gpustack_env("SAML_SP_ACS_URL"),
  419. )
  420. server_group.add_argument(
  421. "--saml-sp-slo-url",
  422. type=str,
  423. help="SAML SP Single Logout Service URL. It can be set to `<server-url>/auth/saml/logout/callback` if you need to receive LogoutResponse.",
  424. default=get_gpustack_env("SAML_SP_SLO_URL"),
  425. )
  426. server_group.add_argument(
  427. "--saml-sp-x509-cert",
  428. type=str,
  429. help="SAML SP X.509 certificate.",
  430. default=get_gpustack_env("SAML_SP_X509_CERT"),
  431. )
  432. server_group.add_argument(
  433. "--saml-sp-private-key",
  434. type=str,
  435. help="SAML SP private key.",
  436. default=get_gpustack_env("SAML_SP_PRIVATE_KEY"),
  437. )
  438. server_group.add_argument(
  439. "--saml-sp-attribute-prefix",
  440. type=str,
  441. help="SAML Service Provider attribute prefix, which is used for fetching the attributes that are specified by --external-auth-*. e.g., 'http://schemas.auth0.com/'.",
  442. default=get_gpustack_env("SAML_SP_ATTRIBUTE_PREFIX"),
  443. )
  444. server_group.add_argument(
  445. "--saml-security",
  446. type=str,
  447. help="SAML security settings in JSON.",
  448. default=get_gpustack_env("SAML_SECURITY"),
  449. )
  450. # External Authentication settings
  451. server_group.add_argument(
  452. "--external-auth-name",
  453. type=str,
  454. help="Mapping of external authentication user information to username, e.g., 'preferred_username'. For SAML, you must configure the full attribute name like 'http://schemas.xmlsoap.org/ws/2005/05/identity/claims/emailaddress' or simplify with 'emailaddress' by '--saml-sp-attribute-prefix'.",
  455. default=get_gpustack_env("EXTERNAL_AUTH_NAME"),
  456. )
  457. server_group.add_argument(
  458. "--external-auth-full-name",
  459. type=str,
  460. help="Mapping of external authentication user information to user's full name. Multiple elements can be combined, e.g., 'name' or 'firstName+lastName'. For SAML, you must configure the full attribute name like 'http://schemas.xmlsoap.org/ws/2005/05/identity/claims/name' or simplify with 'name' by '--saml-sp-attribute-prefix'.",
  461. default=get_gpustack_env("EXTERNAL_AUTH_FULL_NAME"),
  462. )
  463. server_group.add_argument(
  464. "--external-auth-avatar-url",
  465. type=str,
  466. help="Mapping of external authentication user information to user's avatar URL. e.g.,'picture'. For SAML, you must configure the full attribute name like 'http://schemas.auth0.com/picture' or simplify with 'picture' by '--saml-sp-attribute-prefix'.",
  467. default=get_gpustack_env("EXTERNAL_AUTH_AVATAR_URL"),
  468. )
  469. server_group.add_argument(
  470. "--external-auth-default-inactive",
  471. action=OptionalBoolAction,
  472. help="Set newly created externally authenticated users inactive by default.",
  473. default=get_gpustack_env_bool("EXTERNAL_AUTH_DEFAULT_INACTIVE"),
  474. )
  475. server_group.add_argument(
  476. "--external-auth-post-logout-redirect-key",
  477. type=str,
  478. help="Generic key for post-logout redirection across IdPs.",
  479. default=get_gpustack_env("EXTERNAL_AUTH_POST_LOGOUT_REDIRECT_KEY"),
  480. )
  481. server_group.add_argument(
  482. "--sso-base-url",
  483. type=str,
  484. help="SSO platform base URL (e.g. http://192.168.92.61:8200).",
  485. default=get_gpustack_env("SSO_BASE_URL"),
  486. )
  487. server_group.add_argument(
  488. "--sso-client-id",
  489. type=str,
  490. help="SSO client ID (app_key from t_sys_app).",
  491. default=get_gpustack_env("SSO_CLIENT_ID"),
  492. )
  493. server_group.add_argument(
  494. "--sso-client-secret",
  495. type=str,
  496. help="SSO client secret (app_secret from t_sys_app).",
  497. default=get_gpustack_env("SSO_CLIENT_SECRET"),
  498. )
  499. server_group.add_argument(
  500. "--sso-redirect-uri",
  501. type=str,
  502. help="SSO redirect URI (must be registered in t_sys_app).",
  503. default=get_gpustack_env("SSO_REDIRECT_URI"),
  504. )
  505. server_group.add_argument(
  506. "--sso-frontend-url",
  507. type=str,
  508. help="Frontend base URL for SSO redirect.",
  509. default=get_gpustack_env("SSO_FRONTEND_URL"),
  510. )
  511. server_group.add_argument(
  512. "--sso-scope",
  513. type=str,
  514. help="OAuth2 scope for SSO (default: email).",
  515. default=get_gpustack_env("SSO_SCOPE") or "email",
  516. )
  517. server_group.add_argument(
  518. "--sso-logout-redirect-url",
  519. type=str,
  520. help="SSO logout redirect URL.",
  521. default=get_gpustack_env("SSO_LOGOUT_REDIRECT_URL"),
  522. )
  523. worker_group = parser_server.add_argument_group("Worker settings")
  524. worker_group.add_argument(
  525. "-t",
  526. "--token",
  527. type=str,
  528. help="Shared secret used to add a worker.",
  529. default=get_gpustack_env("TOKEN"),
  530. )
  531. worker_group.add_argument(
  532. "-s",
  533. "--server-url",
  534. type=str,
  535. help="Server to connect to.",
  536. default=get_gpustack_env("SERVER_URL"),
  537. )
  538. worker_group.add_argument(
  539. "--worker-ip",
  540. type=str,
  541. help="IP address of the worker node. Auto-detected by default.",
  542. default=get_gpustack_env("WORKER_IP"),
  543. )
  544. worker_group.add_argument(
  545. "--worker-ifname",
  546. type=str,
  547. help="Network interface name of the worker node. Auto-detected by default.",
  548. default=get_gpustack_env("WORKER_IFNAME"),
  549. )
  550. worker_group.add_argument(
  551. "--worker-name",
  552. type=str,
  553. help="Name of the worker node. Use the hostname by default.",
  554. default=get_gpustack_env("WORKER_NAME"),
  555. )
  556. worker_group.add_argument(
  557. "--worker-port",
  558. type=int,
  559. help="Port to bind the worker to.",
  560. default=get_gpustack_env("WORKER_PORT"),
  561. )
  562. worker_group.add_argument(
  563. "--service-port-range",
  564. type=str,
  565. help="Port range for inference services, specified as a string in the form 'N1-N2'. Both ends of the range are inclusive. The default is '40000-40063'.",
  566. default=get_gpustack_env("SERVICE_PORT_RANGE"),
  567. )
  568. worker_group.add_argument(
  569. "--ray-port-range",
  570. type=str,
  571. help="Port range for Ray services(vLLM distributed deployment using), specified as a string in the form 'N1-N2'. Both ends of the range are inclusive. The default is '41000-41999'.",
  572. default=get_gpustack_env("RAY_PORT_RANGE"),
  573. )
  574. worker_group.add_argument(
  575. "--benchmark-max-duration-seconds",
  576. type=int,
  577. help="Max duration for a benchmark before timeout. Disabled when unset.",
  578. default=get_gpustack_env("BENCHMARK_MAX_DURATION_SECONDS"),
  579. )
  580. worker_group.add_argument(
  581. "--disable-worker-metrics",
  582. action=OptionalBoolAction,
  583. help="Disable worker metrics.",
  584. default=get_gpustack_env_bool(
  585. "DISABLE_WORKER_METRICS",
  586. ),
  587. )
  588. worker_group.add_argument(
  589. "--worker-metrics-port",
  590. type=int,
  591. help="Port to expose worker metrics.",
  592. default=get_gpustack_env("WORKER_METRICS_PORT"),
  593. )
  594. worker_group.add_argument(
  595. "--log-dir",
  596. type=str,
  597. help="Directory to store logs.",
  598. default=get_gpustack_env("LOG_DIR"),
  599. )
  600. worker_group.add_argument(
  601. "--benchmark-dir",
  602. type=str,
  603. help="Directory to store benchmark results.",
  604. default=get_gpustack_env("BENCHMARK_DIR"),
  605. )
  606. worker_group.add_argument(
  607. "--system-reserved",
  608. type=json.loads,
  609. help="The system reserves resources during scheduling, measured in GiB. \
  610. Where RAM is reserved per worker, and VRAM is reserved per GPU device. \
  611. By default, no resources are reserved. \
  612. Example: '{\"ram\": 2, \"vram\": 1}' or '{\"memory\": 2, \"gpu_memory\": 1}', \
  613. Note: The 'memory' and 'gpu_memory' keys are deprecated and will be removed in future releases.",
  614. default=get_gpustack_env("SYSTEM_RESERVED"),
  615. )
  616. worker_group.add_argument(
  617. "--tools-download-base-url",
  618. type=str,
  619. help=argparse.SUPPRESS,
  620. default=get_gpustack_env("TOOLS_DOWNLOAD_BASE_URL"),
  621. )
  622. worker_group.add_argument(
  623. "--enable-hf-transfer",
  624. action=OptionalBoolAction,
  625. help="[Deprecated] hf_transfer support was removed in huggingface_hub v1.0; this flag is a no-op.",
  626. default=os.getenv("HF_HUB_ENABLE_HF_TRANSFER"),
  627. )
  628. worker_group.add_argument(
  629. "--enable-hf-xet",
  630. action=OptionalBoolAction,
  631. help="[Deprecated] Enable downloading model files using Hugging Face Xet.",
  632. )
  633. worker_group.add_argument(
  634. "--proxy-mode",
  635. type=str,
  636. help="Proxy mode for server accessing model instances: "
  637. "direct (server connects directly) or worker (via worker proxy). "
  638. "Default value is direct for embedded worker, and worker for standalone worker.",
  639. )
  640. # Allow plugins to set up start command arguments
  641. _setup_plugin_start_args(parser_server)
  642. parser_server.set_defaults(func=run)
  643. def _setup_plugin_start_args(parser: argparse.ArgumentParser):
  644. """Allow plugins to set up start command arguments.
  645. Called at CLI-parse time — no FastAPI app or ``Config`` exists yet —
  646. so we invoke the classmethod ``Plugin.setup_start_cmd`` on the plugin
  647. *class* directly, without instantiating it.
  648. """
  649. for name, plugin_class in iter_plugin_classes():
  650. if not (isinstance(plugin_class, type) and issubclass(plugin_class, Plugin)):
  651. continue
  652. try:
  653. plugin_class.setup_start_cmd(parser)
  654. logger.debug(f"Set up start args from plugin: {name}")
  655. except Exception as e:
  656. raise RuntimeError(
  657. f"Failed to set up CLI args from plugin '{name}': {e}"
  658. ) from e
  659. def _contribute_plugin_config(args: argparse.Namespace, config_data: dict):
  660. """Allow plugins to forward their CLI args into the ``Config`` kwargs.
  661. Plugin-contributed argparse fields land on ``args`` but are not picked
  662. up by core's ``set_*_options`` whitelists; this hook bridges the gap so
  663. plugins can expose their settings on the resulting ``cfg`` (which uses
  664. Pydantic ``extra="allow"``).
  665. """
  666. for name, plugin_class in iter_plugin_classes():
  667. if not (isinstance(plugin_class, type) and issubclass(plugin_class, Plugin)):
  668. continue
  669. try:
  670. plugin_class.contribute_config(args, config_data)
  671. except Exception as e:
  672. raise RuntimeError(
  673. f"Failed to contribute config from plugin '{name}': {e}"
  674. ) from e
  675. def run(args: argparse.Namespace):
  676. try:
  677. cfg = parse_args(args)
  678. setup_logging(cfg.debug)
  679. debug_env_info()
  680. set_third_party_env(cfg=cfg)
  681. start_tracemalloc_if_debug(cfg)
  682. initialize_gateway(cfg)
  683. multiprocessing.set_start_method('spawn')
  684. logger.info(f"GPUStack version: {__version__} ({__git_commit__})")
  685. if cfg.server_url:
  686. run_worker(cfg)
  687. else:
  688. check_database_available(cfg)
  689. run_server(cfg)
  690. except Exception as e:
  691. logger.exception(e)
  692. sys.exit(1)
  693. def check_database_available(cfg):
  694. """Check if the database is reachable before starting the server."""
  695. from gpustack.utils.db import test_db_connection
  696. db_url = cfg.get_database_url()
  697. if not test_db_connection(db_url):
  698. logger.error(
  699. f"Cannot connect to database at {db_url}. "
  700. "GPUStack requires a PostgreSQL database. "
  701. "You can start one quickly with Docker:\n"
  702. " docker run -d --name gpustack-db "
  703. "-e POSTGRES_USER=root -e POSTGRES_HOST_AUTH_METHOD=trust "
  704. "-e POSTGRES_DB=gpustack -p 5432:5432 postgres:16"
  705. )
  706. sys.exit(1)
  707. def run_server(cfg: Config):
  708. server = Server(
  709. config=cfg,
  710. worker_process=multiprocessing.Process(target=run_worker, args=(cfg,)),
  711. )
  712. try:
  713. asyncio.run(server.start())
  714. except (KeyboardInterrupt, asyncio.CancelledError):
  715. pass
  716. except Exception:
  717. raise
  718. finally:
  719. logger.info("Server has shut down.")
  720. def run_worker(cfg: Config):
  721. set_global_config(cfg)
  722. worker = Worker(cfg)
  723. worker.start()
  724. def load_config_from_yaml(yaml_file: str) -> Dict[str, Any]:
  725. with open(yaml_file, "r") as file:
  726. return yaml.safe_load(file)
  727. def parse_args(args: argparse.Namespace) -> Config:
  728. config_data = {}
  729. if args.config_file:
  730. config_data.update(load_config_from_yaml(args.config_file))
  731. # CLI args have higher priority than config file
  732. set_common_options(args, config_data)
  733. set_server_options(args, config_data)
  734. set_worker_options(args, config_data)
  735. _contribute_plugin_config(args, config_data)
  736. try:
  737. cfg = Config(**config_data)
  738. except Exception as e:
  739. raise Exception(f"Config error: {e}")
  740. set_global_config(cfg)
  741. return cfg
  742. def set_config_option(args, config_data: dict, option_name: str):
  743. option_value = getattr(args, option_name, None)
  744. if option_value is not None:
  745. config_data[option_name] = option_value
  746. def set_common_options(args, config_data: dict):
  747. options = [
  748. "debug",
  749. "data_dir",
  750. "cache_dir",
  751. "bin_dir",
  752. "pipx_path",
  753. "huggingface_token",
  754. "system_default_container_registry",
  755. "image_name_override",
  756. "image_repo",
  757. "benchmark_image_repo",
  758. "advertise_address",
  759. "port",
  760. "tls_port",
  761. "api_port",
  762. "proxy_port",
  763. "gateway_mode",
  764. "gateway_kubeconfig",
  765. "gateway_namespace",
  766. "service_discovery_name",
  767. "namespace",
  768. ]
  769. for option in options:
  770. set_config_option(args, config_data, option)
  771. def set_server_options(args, config_data: dict):
  772. options = [
  773. "metrics_port",
  774. "database_port",
  775. "disable_metrics",
  776. "database_url",
  777. "disable_worker",
  778. "enable_worker",
  779. "bootstrap_password",
  780. "ssl_keyfile",
  781. "ssl_certfile",
  782. "force_auth_localhost",
  783. "disable_update_check",
  784. "disable_openapi_docs",
  785. "update_check_url",
  786. "model_catalog_file",
  787. "enable_cors",
  788. "allow_origins",
  789. "allow_credentials",
  790. "allow_methods",
  791. "allow_headers",
  792. "external_auth_name",
  793. "external_auth_full_name",
  794. "external_auth_avatar_url",
  795. "external_auth_default_inactive",
  796. "oidc_issuer",
  797. "oidc_client_id",
  798. "oidc_client_secret",
  799. "oidc_redirect_uri",
  800. "external_auth_post_logout_redirect_key",
  801. "oidc_skip_userinfo",
  802. "oidc_use_userinfo",
  803. "saml_idp_server_url",
  804. "saml_idp_logout_url",
  805. "saml_idp_entity_id",
  806. "saml_idp_x509_cert",
  807. "saml_sp_entity_id",
  808. "saml_sp_acs_url",
  809. "saml_sp_slo_url",
  810. "saml_sp_x509_cert",
  811. "saml_sp_private_key",
  812. "saml_sp_attribute_prefix",
  813. "saml_security",
  814. "server_external_url",
  815. "gateway_concurrency",
  816. "gateway_plugin_server_url",
  817. "gateway_ingress_class",
  818. "disable_builtin_observability",
  819. "builtin_prometheus_port",
  820. "builtin_grafana_port",
  821. "grafana_url",
  822. "grafana_worker_dashboard_uid",
  823. "grafana_model_dashboard_uid",
  824. ]
  825. for option in options:
  826. set_config_option(args, config_data, option)
  827. def set_worker_options(args, config_data: dict):
  828. options = [
  829. "token",
  830. "server_url",
  831. "worker_ip",
  832. "worker_ifname",
  833. "worker_name",
  834. "worker_port",
  835. "disable_worker_metrics",
  836. "worker_metrics_port",
  837. "service_port_range",
  838. "ray_port_range",
  839. "benchmark_max_duration_seconds",
  840. "log_dir",
  841. "benchmark_dir",
  842. "system_reserved",
  843. "tools_download_base_url",
  844. "enable_hf_transfer",
  845. "proxy_mode",
  846. ]
  847. for option in options:
  848. set_config_option(args, config_data, option)
  849. def debug_env_info():
  850. hf_endpoint = os.getenv("HF_ENDPOINT")
  851. if hf_endpoint:
  852. logger.debug(f"Using HF_ENDPOINT: {hf_endpoint}")
  853. def start_tracemalloc_if_debug(cfg: Config):
  854. if cfg.debug:
  855. import tracemalloc
  856. tracemalloc.start()
  857. logger.debug("tracemalloc started for memory profiling")
  858. def set_third_party_env(cfg: Config):
  859. if cfg.enable_hf_transfer:
  860. logger.warning(
  861. "enable_hf_transfer is deprecated and ignored: hf_transfer support was "
  862. "removed in huggingface_hub v1.0. hf_xet is now the default downloader."
  863. )