routes.py 11 KB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183184185186187188189190191192193194195196197198199200201202203204205206207208209210211212213214215216217218219220221222223224225226227228229230231232233234235236237238239240241242243244245246247248249250251252253254255256257258259260261262263264265266267268269270271272273274275276277278279280281282283284285286287288289290291292293294295296297298299300301302303304305306307308309310311312313314315316317318319320321322323324325326327328329330331332333334335336337338339340341342343344345346347348349350351352353354355356357358359360361362363
  1. import os
  2. from fastapi import APIRouter, Depends
  3. from gpustack.routes import (
  4. api_keys,
  5. auth,
  6. cluster_access,
  7. config,
  8. dashboard,
  9. debug,
  10. draft_models,
  11. gpu_devices,
  12. inference_backend,
  13. me_orgs,
  14. metrics,
  15. model_evaluations,
  16. model_files,
  17. model_instances,
  18. model_route_principals,
  19. model_sets,
  20. organization_members,
  21. organizations,
  22. probes,
  23. proxy,
  24. update,
  25. user_groups,
  26. users,
  27. models,
  28. openai,
  29. workers,
  30. usage,
  31. cloud_credentials,
  32. worker_pools,
  33. clusters,
  34. token,
  35. benchmarks,
  36. benchmark_profiles,
  37. model_provider,
  38. rerank,
  39. model_routes,
  40. grafana,
  41. prometheus,
  42. )
  43. from gpustack.api.exceptions import error_responses, openai_api_error_responses
  44. from gpustack.api.auth import (
  45. get_admin_user,
  46. get_current_user,
  47. get_cluster_user,
  48. get_worker_user,
  49. management_scope,
  50. inference_scope,
  51. )
  52. from gpustack.websocket_proxy.message_server import router as message_server_router
  53. from gpustack.routes.gateway_metrics import router as gateway_metrics_router
  54. from gpustack_higress_plugins.server import router as higress_plugins_router
  55. versioned_prefix = "/v2"
  56. # Toggle for surfacing extended API endpoints in the OpenAPI schema
  57. # and ``/docs``. Endpoints stay mounted regardless — only the public
  58. # docs surface is gated. Off by default; set the env var to a truthy
  59. # value to expose the full surface.
  60. _EXTENDED_API_IN_SCHEMA = os.getenv("GPUSTACK_EXTENDED_API_DOCS", "").lower() in (
  61. "1",
  62. "true",
  63. "yes",
  64. )
  65. api_router = APIRouter(responses=error_responses)
  66. management_router = APIRouter(dependencies=[Depends(management_scope)])
  67. management_router.include_router(
  68. grafana.router,
  69. prefix="/grafana",
  70. dependencies=[Depends(get_admin_user)],
  71. include_in_schema=False,
  72. )
  73. management_router.include_router(
  74. prometheus.router,
  75. prefix="/prometheus",
  76. dependencies=[Depends(get_admin_user)],
  77. include_in_schema=False,
  78. )
  79. # authed routes
  80. v1_base_router = APIRouter(dependencies=[Depends(get_current_user)])
  81. v1_base_router.include_router(users.me_router, prefix="/users", tags=["Users"])
  82. v1_base_router.include_router(users.directory_router, tags=["Users"])
  83. v1_base_router.include_router(api_keys.router, prefix="/api-keys", tags=["API Keys"])
  84. v1_base_router.include_router(usage.router, prefix="/usage", tags=["Usage"])
  85. v1_base_router.include_router(
  86. me_orgs.router,
  87. prefix="/users/me",
  88. tags=["My Organizations"],
  89. include_in_schema=_EXTENDED_API_IN_SCHEMA,
  90. )
  91. v1_base_router.include_router(
  92. organization_members.router,
  93. tags=["Organization Members"],
  94. include_in_schema=_EXTENDED_API_IN_SCHEMA,
  95. )
  96. v1_base_router.include_router(
  97. user_groups.router,
  98. tags=["User Groups"],
  99. include_in_schema=_EXTENDED_API_IN_SCHEMA,
  100. )
  101. v1_base_router.include_router(
  102. metrics.router, prefix="/metrics", include_in_schema=False
  103. )
  104. v1_base_router.include_router(
  105. model_routes.my_models_router,
  106. dependencies=[Depends(get_current_user)],
  107. prefix="/my-models",
  108. tags=["My Models"],
  109. )
  110. # BYO cluster: clusters / cloud-credentials / worker-pools live on the
  111. # user-level router so Org owner / admin can CRUD their own infra. The
  112. # routes themselves enforce per-row ownership via assert_cluster_writable
  113. # and friends, so platform-only operations (e.g. set-default) still
  114. # require is_admin inside the handler.
  115. v1_base_router.include_router(clusters.router, prefix="/clusters", tags=["Clusters"])
  116. v1_base_router.include_router(
  117. cloud_credentials.router,
  118. prefix="/cloud-credentials",
  119. tags=["Cloud Credentials"],
  120. )
  121. v1_base_router.include_router(
  122. worker_pools.router, prefix="/worker-pools", tags=["Worker Pools"]
  123. )
  124. # Workers are visible to anyone who can see their cluster; mutations gated
  125. # by an explicit is_admin check inside each handler.
  126. v1_base_router.include_router(workers.router, prefix="/workers", tags=["Workers"])
  127. cluster_client_router = APIRouter()
  128. cluster_client_router.add_api_route(
  129. path="/clusters/{id}/manifests",
  130. endpoint=clusters.get_cluster_manifests,
  131. methods=["GET"],
  132. )
  133. cluster_client_router.add_api_route(
  134. path="/workers",
  135. endpoint=workers.create_worker,
  136. methods=["POST"],
  137. )
  138. model_routers = [
  139. {"router": models.router, "prefix": "/models", "tags": ["Models"]},
  140. {
  141. "router": model_instances.router,
  142. "prefix": "/model-instances",
  143. "tags": ["Model Instances"],
  144. },
  145. {"router": model_files.router, "prefix": "/model-files", "tags": ["Model Files"]},
  146. {"router": benchmarks.router, "prefix": "/benchmarks", "tags": ["Benchmarks"]},
  147. {
  148. "router": benchmark_profiles.router,
  149. "prefix": "/benchmark-profiles",
  150. "tags": ["Benchmark Profiles"],
  151. },
  152. {
  153. "router": model_routes.target_router,
  154. "prefix": "/model-route-targets",
  155. "tags": ["Model Route Targets"],
  156. },
  157. ]
  158. # worker client have full access to model and model instances
  159. worker_client_router = APIRouter()
  160. for model_router in model_routers:
  161. worker_client_router.include_router(**model_router)
  162. # ready only access to workers
  163. worker_client_router.add_api_route(
  164. path="/workers",
  165. endpoint=workers.get_workers,
  166. methods=["GET"],
  167. response_model=workers.WorkersPublic,
  168. )
  169. worker_client_router.add_api_route(
  170. path="/workers/{id}",
  171. endpoint=workers.get_worker,
  172. methods=["GET"],
  173. response_model=workers.WorkerPublic,
  174. )
  175. worker_client_router.add_api_route(
  176. path="/worker-status",
  177. endpoint=workers.create_worker_status,
  178. methods=["POST"],
  179. include_in_schema=False,
  180. )
  181. worker_client_router.add_api_route(
  182. path="/worker-heartbeat",
  183. endpoint=workers.heartbeat,
  184. methods=["POST"],
  185. include_in_schema=False,
  186. )
  187. worker_client_router.include_router(
  188. inference_backend.router, prefix="/inference-backends", tags=["Inference Backend"]
  189. )
  190. # Tenant-aware routers: any logged-in user can hit them; the handlers
  191. # filter by TenantContext (owner_principal_id / cluster visibility).
  192. tenant_routers = model_routers + [
  193. {"router": gpu_devices.router, "prefix": "/gpu-devices", "tags": ["GPU Devices"]},
  194. {
  195. "router": model_provider.router,
  196. "prefix": "/model-providers",
  197. "tags": ["Model Providers"],
  198. },
  199. {
  200. "router": model_routes.router,
  201. "prefix": "/model-routes",
  202. "tags": ["Model Routes"],
  203. },
  204. {
  205. "router": model_route_principals.router,
  206. "prefix": "/model-routes",
  207. "tags": ["Model Route Principals"],
  208. "include_in_schema": _EXTENDED_API_IN_SCHEMA,
  209. },
  210. {
  211. "router": model_evaluations.router,
  212. "prefix": "/model-evaluations",
  213. "tags": ["Model Evaluations"],
  214. },
  215. # Read-only platform catalogs (no tenant data) — every logged-in user
  216. # needs them to deploy models, including Org owners/managers.
  217. {"router": model_sets.router, "prefix": "/model-sets", "tags": ["Model Sets"]},
  218. {
  219. "router": draft_models.router,
  220. "prefix": "/draft-models",
  221. "tags": ["Draft Models"],
  222. },
  223. # Inference backends are platform-wide (admin curates) but every Org
  224. # owner/manager needs to read them to pick a backend at deploy time.
  225. # Worker / cluster system users also reach this through v1_base_router
  226. # since `get_current_user` accepts ``is_system=True`` callers.
  227. {
  228. "router": inference_backend.router,
  229. "prefix": "/inference-backends",
  230. "tags": ["Inference Backend"],
  231. },
  232. ]
  233. # Platform-only routers — admin can manage globally; non-admin gets 403.
  234. admin_routers = [
  235. {"router": dashboard.router, "prefix": "/dashboard", "tags": ["Dashboard"]},
  236. {"router": users.router, "prefix": "/users", "tags": ["Users"]},
  237. {
  238. "router": organizations.router,
  239. "prefix": "/organizations",
  240. "tags": ["Organizations"],
  241. "include_in_schema": _EXTENDED_API_IN_SCHEMA,
  242. },
  243. {
  244. "router": cluster_access.router,
  245. "tags": ["Cluster Access"],
  246. "include_in_schema": _EXTENDED_API_IN_SCHEMA,
  247. },
  248. ]
  249. for tr in tenant_routers:
  250. v1_base_router.include_router(**tr)
  251. v1_admin_router = APIRouter()
  252. for admin_router in admin_routers:
  253. v1_admin_router.include_router(**admin_router)
  254. # Order matters: FastAPI dispatches the FIRST router whose path matches.
  255. # v1_base_router and worker_client_router register overlapping endpoints
  256. # (e.g. /v2/models, /v2/workers) — putting v1_base_router first means
  257. # regular user requests resolve through ``get_current_user`` (which also
  258. # accepts worker / cluster system users), and only routes that are unique
  259. # to the worker / cluster client paths fall through to those routers.
  260. management_router.include_router(
  261. v1_base_router, dependencies=[Depends(get_current_user)], prefix=versioned_prefix
  262. )
  263. management_router.include_router(
  264. worker_client_router,
  265. dependencies=[Depends(get_worker_user)],
  266. prefix=versioned_prefix,
  267. )
  268. management_router.include_router(
  269. cluster_client_router,
  270. dependencies=[Depends(get_cluster_user)],
  271. prefix=versioned_prefix,
  272. )
  273. management_router.include_router(
  274. v1_admin_router, dependencies=[Depends(get_admin_user)], prefix=versioned_prefix
  275. )
  276. management_router.include_router(
  277. config.router,
  278. dependencies=[Depends(get_admin_user)],
  279. prefix=versioned_prefix,
  280. include_in_schema=False,
  281. )
  282. management_router.include_router(
  283. debug.router,
  284. dependencies=[Depends(get_admin_user)],
  285. prefix="/debug",
  286. include_in_schema=False,
  287. )
  288. management_router.include_router(
  289. update.router,
  290. dependencies=[Depends(get_admin_user)],
  291. prefix="/update",
  292. include_in_schema=False,
  293. )
  294. management_router.include_router(
  295. proxy.router,
  296. dependencies=[Depends(get_current_user)],
  297. prefix="/proxy",
  298. tags=["Server-Side Proxy"],
  299. include_in_schema=False,
  300. )
  301. inference_router = APIRouter(
  302. dependencies=[Depends(get_current_user), Depends(inference_scope)]
  303. )
  304. inference_router.include_router(
  305. openai.get_legacy_api_router(),
  306. prefix="/v1-openai",
  307. responses=openai_api_error_responses,
  308. tags=["OpenAI-Compatible APIs (Legacy alias)"],
  309. )
  310. inference_router.include_router(
  311. openai.get_api_router(),
  312. prefix="/v1",
  313. responses=openai_api_error_responses,
  314. tags=["OpenAI-Compatible APIs"],
  315. )
  316. inference_router.include_router(
  317. rerank.router,
  318. prefix="/v1",
  319. tags=["Rerank"],
  320. )
  321. # Following routes should not check api scope as it is publicly accessible and used for authentication by external services.
  322. api_router.include_router(probes.router, tags=["Probes"])
  323. api_router.include_router(auth.router, prefix="/auth", tags=["Auth"])
  324. api_router.include_router(
  325. router=token.router,
  326. prefix="/token-auth",
  327. include_in_schema=False,
  328. )
  329. api_router.include_router(management_router)
  330. api_router.include_router(inference_router)
  331. api_router.include_router(higress_plugins_router, include_in_schema=False)
  332. api_router.include_router(
  333. gateway_metrics_router,
  334. prefix=f"{versioned_prefix}/usage",
  335. include_in_schema=False,
  336. )
  337. api_router.include_router(
  338. message_server_router,
  339. tags=["WebSocket Proxy"],
  340. include_in_schema=True,
  341. )