tenant.py 20 KB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183184185186187188189190191192193194195196197198199200201202203204205206207208209210211212213214215216217218219220221222223224225226227228229230231232233234235236237238239240241242243244245246247248249250251252253254255256257258259260261262263264265266267268269270271272273274275276277278279280281282283284285286287288289290291292293294295296297298299300301302303304305306307308309310311312313314315316317318319320321322323324325326327328329330331332333334335336337338339340341342343344345346347348349350351352353354355356357358359360361362363364365366367368369370371372373374375376377378379380381382383384385386387388389390391392393394395396397398399400401402403404405406407408409410411412413414415416417418419420421422423424425426427428429430431432433434435436437438439440441442443444445446447448449450451452453454455456457458459460461462463464465466467468469470471472473474475476477478479480481482483484485486487488489490491492493494495496497498499500501502503504505506507508509510511512513514515516517518519520521522523524525526527528529530531532533534535536537538539540541542543544545546547548549550551552553554555556557558559560561562563564565566567568569570571572573574575576577578579580581582
  1. """Tenant context resolution for multi-tenant request handling.
  2. Each authenticated request resolves to a TenantContext that captures:
  3. - the user identity
  4. - whether they are a platform-level super-admin
  5. - which principal the request is operating as (current_principal_id) —
  6. either an Org-principal the user is a member of, or the user's own
  7. USER-principal for personal scope
  8. - which Org-level role they hold there (admin / user) — None for
  9. personal scope
  10. - which clusters are accessible in that context
  11. Read resolution order for current_principal_id:
  12. 1. If authenticated via API key, use api_key.owner_principal_id (header
  13. is ignored)
  14. 2. Else, X-Organization-Id request header if provided
  15. 3. Else, for non-admin: user.principal_id (NOT NULL by schema —
  16. provisioned at signup, so non-admin requests are structurally never
  17. context-less and can't bypass tenant filters with a NULL
  18. current_principal_id)
  19. 4. Else, for platform admin: None — "act across all principals", read
  20. paths skip tenant filters via bypass_tenant_filter
  21. """
  22. from dataclasses import dataclass, field
  23. from typing import Annotated, Any, List, Optional, Set
  24. from fastapi import Depends, Header, Request
  25. from sqlmodel import select
  26. from sqlmodel.ext.asyncio.session import AsyncSession
  27. from gpustack.api.auth import get_current_user
  28. from gpustack.api.exceptions import (
  29. ForbiddenException,
  30. InvalidException,
  31. NotFoundException,
  32. )
  33. from gpustack.schemas.api_keys import ApiKey
  34. from gpustack.schemas.cluster_access import ClusterAccess
  35. from gpustack.schemas.principals import (
  36. OrgRole,
  37. Principal,
  38. PrincipalMembership,
  39. PrincipalType,
  40. )
  41. from gpustack.schemas.users import User
  42. from gpustack.server.db import get_session
  43. PlatformAdminError = ForbiddenException
  44. OrgRoleError = ForbiddenException
  45. @dataclass
  46. class TenantContext:
  47. """Per-request tenant resolution result."""
  48. user: User
  49. is_platform_admin: bool
  50. # The principal the request is operating as. ORG-principal for an
  51. # Org context; the user's own USER-principal for personal scope;
  52. # None for platform-admin "All" mode.
  53. current_principal_id: Optional[int]
  54. org_role: Optional[OrgRole]
  55. accessible_cluster_ids: Set[int] = field(default_factory=set)
  56. # True when ``current_principal_id`` is the user's own
  57. # USER-principal (personal scope) rather than an ORG-principal.
  58. # Lets endpoints treat the "owner of a one-member namespace" case
  59. # differently from a real multi-user Org admin — e.g.
  60. # ``scope='all'`` on the Usage page should behave like ``self``
  61. # here (no other tenants share this scope).
  62. current_is_personal_scope: bool = False
  63. @property
  64. def has_org_context(self) -> bool:
  65. return self.current_principal_id is not None
  66. def assert_org_role(self, *allowed: OrgRole) -> None:
  67. """Raise if the caller doesn't hold one of the ``allowed`` roles
  68. in the current Org. Platform admins bypass.
  69. """
  70. if self.is_platform_admin:
  71. return
  72. if self.org_role is None or self.org_role not in allowed:
  73. raise OrgRoleError(message="Insufficient organization role")
  74. def target_principal_id_for_write(self) -> Optional[int]:
  75. """Resolve the principal a CREATE / write request should land in.
  76. Reads happily honor the platform-admin "no current_principal_id
  77. = all-orgs" mode, but writes need an actual principal to stamp
  78. on the new row. When the request didn't pin a context (no
  79. header on an admin request, or a built-in client like the OSS
  80. host that never sends ``X-Organization-Id``), fall back to the
  81. user's own USER-principal — guaranteed non-null by schema —
  82. instead of failing.
  83. """
  84. if self.current_principal_id is not None:
  85. return self.current_principal_id
  86. return getattr(self.user, "principal_id", None)
  87. async def _resolve_membership(
  88. session: AsyncSession, member_principal_id: int, parent_principal_id: int
  89. ) -> Optional[PrincipalMembership]:
  90. """Active membership lookup. Soft-deleted rows are ignored — once
  91. a user is removed from an Org their permissions go with them, even
  92. though the audit row stays around.
  93. """
  94. stmt = select(PrincipalMembership).where(
  95. PrincipalMembership.member_principal_id == member_principal_id,
  96. PrincipalMembership.parent_principal_id == parent_principal_id,
  97. PrincipalMembership.deleted_at.is_(None),
  98. )
  99. return (await session.exec(stmt)).first()
  100. async def _user_group_principal_ids(
  101. session: AsyncSession,
  102. user_principal_id: int,
  103. org_principal_id: int,
  104. ) -> List[int]:
  105. """GROUP-principal ids inside ``org_principal_id`` that the user
  106. is a member of.
  107. Two-hop join: first find groups whose parent is the org, then find
  108. memberships where the user joins those groups.
  109. """
  110. parents = select(Principal.id).where(
  111. Principal.parent_principal_id == org_principal_id,
  112. Principal.kind == PrincipalType.GROUP,
  113. Principal.deleted_at.is_(None),
  114. )
  115. stmt = select(PrincipalMembership.parent_principal_id).where(
  116. PrincipalMembership.member_principal_id == user_principal_id,
  117. PrincipalMembership.parent_principal_id.in_(parents),
  118. PrincipalMembership.deleted_at.is_(None),
  119. )
  120. return list((await session.exec(stmt)).all())
  121. async def _accessible_clusters(
  122. session: AsyncSession,
  123. user_principal_id: int,
  124. org_principal_id: Optional[int],
  125. group_principal_ids: List[int],
  126. ) -> Set[int]:
  127. """Cluster ids reachable from any of: org, user, or any joined group."""
  128. principal_ids = [user_principal_id, *group_principal_ids]
  129. if org_principal_id is not None:
  130. principal_ids.append(org_principal_id)
  131. if not principal_ids:
  132. return set()
  133. stmt = select(ClusterAccess.cluster_id).where(
  134. ClusterAccess.principal_id.in_(principal_ids),
  135. )
  136. return set((await session.exec(stmt)).all())
  137. def _resolve_requested_principal_id(
  138. request: Request,
  139. user: User,
  140. header_value: Optional[str],
  141. ) -> Optional[int]:
  142. api_key: Optional[ApiKey] = getattr(request.state, "api_key", None)
  143. if api_key is not None and api_key.owner_principal_id is not None:
  144. return api_key.owner_principal_id
  145. if header_value:
  146. try:
  147. return int(header_value)
  148. except ValueError as exc:
  149. # 400 — the header is structurally bad, not a permission issue.
  150. raise InvalidException(message="Invalid X-Organization-Id") from exc
  151. # Platform admins default to "no context" (cross-principal platform
  152. # view) when nothing is supplied. They opt into act-as mode by
  153. # sending X-Organization-Id explicitly. Non-admins fall back to
  154. # their own USER-principal (NOT NULL by schema), which guarantees
  155. # ``current_principal_id`` is never None for non-admin callers —
  156. # closing the bypass that would otherwise let an empty filter run
  157. # against tenant-scoped lists.
  158. if user.is_admin:
  159. return None
  160. return user.principal_id
  161. async def get_tenant_context(
  162. request: Request,
  163. session: Annotated[AsyncSession, Depends(get_session)],
  164. user: Annotated[User, Depends(get_current_user)],
  165. x_organization_id: Annotated[Optional[str], Header()] = None,
  166. ) -> TenantContext:
  167. """Resolve the per-request TenantContext.
  168. Result is cached on `request.state.tenant_context` so multiple downstream
  169. dependencies in the same request share one resolution.
  170. """
  171. if hasattr(request.state, "tenant_context"):
  172. return request.state.tenant_context
  173. is_platform_admin = bool(user.is_admin)
  174. current_principal_id = _resolve_requested_principal_id(
  175. request, user, x_organization_id
  176. )
  177. org_role: Optional[OrgRole] = None
  178. accessible_cluster_ids: Set[int] = set()
  179. current_is_personal_scope = False
  180. if current_principal_id is not None and not user.is_system:
  181. # Personal scope short-circuit: when the request points at the
  182. # caller's own USER-principal there's no org membership to
  183. # resolve and no cluster_access grants other than the caller's
  184. # own. Skip the joins.
  185. if current_principal_id == user.principal_id:
  186. current_is_personal_scope = True
  187. accessible_cluster_ids = await _accessible_clusters(
  188. session,
  189. user.principal_id,
  190. None,
  191. [],
  192. )
  193. else:
  194. membership = await _resolve_membership(
  195. session, user.principal_id, current_principal_id
  196. )
  197. if membership is not None:
  198. org_role = membership.role
  199. elif not is_platform_admin:
  200. # Non-admin users cannot operate as a principal they are
  201. # not a member of.
  202. raise ForbiddenException(
  203. message=(f"Not a member of organization " f"{current_principal_id}")
  204. )
  205. group_ids = await _user_group_principal_ids(
  206. session, user.principal_id, current_principal_id
  207. )
  208. accessible_cluster_ids = await _accessible_clusters(
  209. session,
  210. user.principal_id,
  211. current_principal_id,
  212. group_ids,
  213. )
  214. # Validate the org-principal exists and isn't soft-deleted
  215. # before letting the request continue. Org soft-delete is
  216. # "removed for users" — block context resolution against it
  217. # so the membership row (still present, since CASCADE
  218. # doesn't fire on soft delete) can't be used to keep
  219. # operating in a logically-removed Org.
  220. org_row = await Principal.first_by_field(
  221. session, "id", current_principal_id
  222. )
  223. if (
  224. org_row is None
  225. or org_row.deleted_at is not None
  226. or org_row.kind != PrincipalType.ORG
  227. ):
  228. raise NotFoundException(
  229. message=(f"Organization {current_principal_id} not found")
  230. )
  231. ctx = TenantContext(
  232. user=user,
  233. is_platform_admin=is_platform_admin,
  234. current_principal_id=current_principal_id,
  235. org_role=org_role,
  236. accessible_cluster_ids=accessible_cluster_ids,
  237. current_is_personal_scope=current_is_personal_scope,
  238. )
  239. request.state.tenant_context = ctx
  240. return ctx
  241. async def require_platform_admin(
  242. ctx: Annotated[TenantContext, Depends(get_tenant_context)],
  243. ) -> TenantContext:
  244. """Allow only platform-level super-admin (`users.is_admin = True`)."""
  245. if not ctx.is_platform_admin:
  246. raise PlatformAdminError(message="Platform admin permission required")
  247. return ctx
  248. def bypass_tenant_filter(ctx: TenantContext) -> bool:
  249. """Identify request contexts that should not be tenant-scoped.
  250. Two categories bypass:
  251. - Platform admin with no principal context (cross-principal platform
  252. view).
  253. - System users (worker / cluster service accounts that the server
  254. itself spawns). They authenticate as ``is_system=True`` and need
  255. to read every tenant's resources to do their job — e.g. a worker
  256. fetching the Model row for an instance assigned to it.
  257. """
  258. if ctx.user is not None and getattr(ctx.user, "is_system", False):
  259. return True
  260. if ctx.is_platform_admin and ctx.current_principal_id is None:
  261. return True
  262. return False
  263. def tenant_list_conditions(
  264. ctx: TenantContext,
  265. model: Any,
  266. ) -> List[Any]:
  267. """Build SQLAlchemy WHERE clauses to scope a list query to the caller.
  268. Visibility model:
  269. - System users (workers / cluster service accounts) and platform
  270. admin without org context see everything — returns no conditions.
  271. - Everyone else with a principal context filters by
  272. ``model.owner_principal_id == ctx.current_principal_id``.
  273. Membership in the org is already enforced by
  274. ``get_tenant_context``.
  275. """
  276. conditions: List[Any] = []
  277. if bypass_tenant_filter(ctx):
  278. return conditions
  279. if ctx.current_principal_id is not None and hasattr(model, "owner_principal_id"):
  280. conditions.append(model.owner_principal_id == ctx.current_principal_id)
  281. return conditions
  282. def cluster_visibility_conditions(
  283. ctx: TenantContext,
  284. model: Any,
  285. ) -> List[Any]:
  286. """Visibility filter specific to Cluster-like infrastructure rows.
  287. Clusters can be visible to a non-admin caller through TWO independent
  288. paths, so the regular ``owner_principal_id`` equality filter would
  289. be too narrow:
  290. - **Own-principal cluster**
  291. (``cluster.owner_principal_id == current_principal_id``):
  292. the caller's BYO cluster.
  293. - **Granted via cluster_access** (``cluster.id`` ∈
  294. ``ctx.accessible_cluster_ids``): global clusters the admin
  295. authorised, or another principal's cluster sublet to us.
  296. Either path makes the cluster visible. System users and platform
  297. admins (no-context) bypass entirely.
  298. """
  299. from sqlalchemy import or_
  300. if bypass_tenant_filter(ctx):
  301. return []
  302. or_clauses = []
  303. if ctx.current_principal_id is not None:
  304. or_clauses.append(model.owner_principal_id == ctx.current_principal_id)
  305. if ctx.accessible_cluster_ids:
  306. or_clauses.append(model.id.in_(ctx.accessible_cluster_ids))
  307. if not or_clauses:
  308. # No avenue to see anything; force an empty result rather than
  309. # leak the full table when accessible_cluster_ids is empty.
  310. return [model.id == -1]
  311. return [or_(*or_clauses)]
  312. def cluster_resource_visibility_conditions(
  313. ctx: TenantContext,
  314. model: Any,
  315. ) -> List[Any]:
  316. """Visibility filter for resources that carry BOTH ``owner_principal_id``
  317. (denormalized from cluster) AND ``cluster_id`` — Worker, ModelFile,
  318. Benchmark, ModelEvaluation, etc.
  319. A row is visible if:
  320. - it's owned by the caller's current principal
  321. (``owner_principal_id`` match), OR
  322. - its cluster is granted via ``cluster_access`` (``cluster_id`` ∈
  323. ``accessible_cluster_ids``).
  324. NULL ``owner_principal_id`` rows live on global clusters; they're
  325. only visible through the second branch (cluster_access) for
  326. non-admin.
  327. """
  328. from sqlalchemy import or_
  329. if bypass_tenant_filter(ctx):
  330. return []
  331. or_clauses = []
  332. if ctx.current_principal_id is not None and hasattr(model, "owner_principal_id"):
  333. or_clauses.append(model.owner_principal_id == ctx.current_principal_id)
  334. if ctx.accessible_cluster_ids and hasattr(model, "cluster_id"):
  335. or_clauses.append(model.cluster_id.in_(ctx.accessible_cluster_ids))
  336. if not or_clauses:
  337. # No access path; force empty result rather than leak.
  338. anchor = getattr(model, "cluster_id", None) or getattr(model, "id", None)
  339. return [anchor == -1]
  340. return [or_(*or_clauses)]
  341. def assert_cluster_resource_visible(
  342. ctx: TenantContext,
  343. resource: Any,
  344. *,
  345. not_found_message: str = "Resource not found",
  346. ) -> None:
  347. """Single-row mirror of ``cluster_resource_visibility_conditions``."""
  348. if resource is None:
  349. raise NotFoundException(message=not_found_message)
  350. if bypass_tenant_filter(ctx):
  351. return
  352. owner = getattr(resource, "owner_principal_id", None)
  353. cluster_id = getattr(resource, "cluster_id", None)
  354. if (
  355. ctx.current_principal_id is not None
  356. and owner is not None
  357. and owner == ctx.current_principal_id
  358. ):
  359. return
  360. if cluster_id is not None and cluster_id in ctx.accessible_cluster_ids:
  361. return
  362. raise NotFoundException(message=not_found_message)
  363. def assert_cluster_visible(
  364. ctx: TenantContext,
  365. cluster: Any,
  366. *,
  367. not_found_message: str = "Cluster not found",
  368. ) -> None:
  369. """404 if the caller can't see this cluster (own-principal OR cluster_access)."""
  370. if cluster is None:
  371. raise NotFoundException(message=not_found_message)
  372. if bypass_tenant_filter(ctx):
  373. return
  374. cluster_owner = getattr(cluster, "owner_principal_id", None)
  375. if (
  376. ctx.current_principal_id is not None
  377. and cluster_owner is not None
  378. and cluster_owner == ctx.current_principal_id
  379. ):
  380. return
  381. if cluster.id in ctx.accessible_cluster_ids:
  382. return
  383. raise NotFoundException(message=not_found_message)
  384. def assert_org_owned_writable(
  385. ctx: TenantContext,
  386. resource: Any,
  387. *,
  388. resource_label: str = "resource",
  389. ) -> None:
  390. """403 if the caller can't mutate an org-owned infrastructure row.
  391. Used for clusters / cloud_credentials / worker_pools / inference
  392. backends — anything with a nullable ``owner_principal_id`` and these
  393. write rules:
  394. - Platform admin / system user → allowed (bypass via
  395. ``bypass_tenant_filter`` for "All" mode admin and system users;
  396. admin in act-as falls through to row-owner check, where they're
  397. treated like an Org admin).
  398. - **Owned by current principal**: an Org admin can write; platform
  399. admin in act-as bypasses the role check (admin is admin
  400. everywhere, even when scoped to one Org).
  401. - **Global** (owner IS NULL): only "All"-mode admin — Org admins
  402. and admin-in-act-as cannot mutate Global rows directly. Resource
  403. handlers redirect such writes to the caller's own row instead.
  404. - **Other principal's row**: never writable for non-admin.
  405. """
  406. if bypass_tenant_filter(ctx):
  407. return
  408. res_owner = getattr(resource, "owner_principal_id", None)
  409. if res_owner is None:
  410. raise PlatformAdminError(
  411. message=f"Only platform admin can modify global {resource_label}"
  412. )
  413. if res_owner != ctx.current_principal_id:
  414. raise OrgRoleError(
  415. message=(
  416. f"{resource_label.capitalize()} does not belong to "
  417. "current organization"
  418. )
  419. )
  420. # Platform admin acting-as the Org passes the role check unconditionally;
  421. # for non-admin we require Org admin.
  422. if not ctx.is_platform_admin and ctx.org_role != OrgRole.ADMIN:
  423. raise OrgRoleError(
  424. message=(
  425. f"Insufficient organization role to modify this " f"{resource_label}"
  426. )
  427. )
  428. def assert_cluster_writable(
  429. ctx: TenantContext,
  430. cluster: Any,
  431. ) -> None:
  432. assert_org_owned_writable(ctx, cluster, resource_label="cluster")
  433. def validate_owner_principal(
  434. input_owner_principal_id: Optional[int],
  435. ctx: TenantContext,
  436. *,
  437. resource_label: str = "resource",
  438. ) -> None:
  439. """Decide whether the caller can create a row owned by
  440. ``input_owner_principal_id``.
  441. - Platform admin: any value (including NULL = global)
  442. - Org admin: must equal ``current_principal_id``; can't create global
  443. """
  444. if ctx.is_platform_admin:
  445. return
  446. if input_owner_principal_id is None:
  447. raise InvalidException(
  448. message=(f"Only platform admin can create global {resource_label}s")
  449. )
  450. if (
  451. ctx.current_principal_id is None
  452. or input_owner_principal_id != ctx.current_principal_id
  453. ):
  454. raise InvalidException(
  455. message="owner_principal_id must match the current organization"
  456. )
  457. if ctx.org_role != OrgRole.ADMIN:
  458. raise InvalidException(
  459. message=(f"Insufficient organization role to create a " f"{resource_label}")
  460. )
  461. def assert_resource_visible(
  462. ctx: TenantContext,
  463. resource: Any,
  464. *,
  465. not_found_message: str = "Resource not found",
  466. ) -> None:
  467. """Raise 404 if the caller is not allowed to see ``resource``.
  468. Mirrors the semantics of ``tenant_list_conditions`` for single-item
  469. GET / PUT / DELETE handlers: same visibility rules, raised as 404
  470. rather than 403 to avoid leaking the existence of cross-tenant rows.
  471. """
  472. if resource is None:
  473. raise NotFoundException(message=not_found_message)
  474. if bypass_tenant_filter(ctx):
  475. return
  476. owner = getattr(resource, "owner_principal_id", None)
  477. if (
  478. ctx.current_principal_id is not None
  479. and owner is not None
  480. and owner != ctx.current_principal_id
  481. ):
  482. raise NotFoundException(message=not_found_message)
  483. def require_org_role(*allowed: OrgRole):
  484. """Build a dependency that requires the requesting user to hold one of the
  485. given roles in `current_principal_id`. Platform admins always pass.
  486. """
  487. async def _dep(
  488. ctx: Annotated[TenantContext, Depends(get_tenant_context)],
  489. ) -> TenantContext:
  490. if ctx.is_platform_admin:
  491. return ctx
  492. if ctx.current_principal_id is None:
  493. raise OrgRoleError(message="Organization context required")
  494. ctx.assert_org_role(*allowed)
  495. return ctx
  496. return _dep