mkdocs.yml 11 KB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183184185186187188189190191192193194195196197198199200201202203204205206207208209210211212213214215216217218219220221222223224225226227228229230231232233234235236237238239240241242243244245246247248249250251252253254255256257258259260261262263264
  1. # Project information
  2. site_name: MASS-Base
  3. site_url: https://docs.gpustack.ai
  4. site_author: MASS-Base
  5. site_description: >-
  6. MASS-Base is an open-source GPU cluster manager designed for efficient AI model deployment.
  7. It lets you run models efficiently on your own GPU hardware by choosing the best inference engines,
  8. scheduling GPU resources, analyzing model architectures, and automatically configuring deployment parameters.
  9. # Repository
  10. repo_name: gpustack/gpustack
  11. repo_url: https://github.com/gpustack/gpustack
  12. # Copyright
  13. copyright: Copyright © 2026 GPUStack.ai
  14. theme:
  15. name: material
  16. logo: assets/logo-white.png
  17. favicon: assets/logo.png
  18. custom_dir: docs/overrides
  19. palette:
  20. - media: "(prefers-color-scheme)"
  21. toggle:
  22. icon: material/brightness-auto
  23. name: Switch to light mode
  24. - media: "(prefers-color-scheme: light)"
  25. scheme: default
  26. primary: custom
  27. accent: orange
  28. toggle:
  29. icon: material/brightness-7
  30. name: Switch to dark mode
  31. - media: "(prefers-color-scheme: dark)"
  32. scheme: slate
  33. primary: light blue
  34. accent: orange
  35. toggle:
  36. icon: material/brightness-4
  37. name: Switch to system preference
  38. features:
  39. - search.suggest
  40. - search.highlight
  41. - content.tabs.link
  42. - navigation.indexes
  43. - content.tooltips
  44. - navigation.path
  45. - navigation.tabs
  46. - content.code.annotate
  47. - content.code.copy
  48. - content.code.select
  49. - content.action.view
  50. - content.action.edit
  51. - navigation.top
  52. - navigation.footer
  53. - navigation.tracking
  54. - search.share
  55. - toc.follow
  56. icon:
  57. repo: fontawesome/brands/github
  58. language: en
  59. plugins:
  60. - glightbox
  61. - search
  62. - redirects:
  63. redirect_maps:
  64. 'installation/nvidia/installation.md': 'installation/requirements.md'
  65. 'installation/amd/installation.md': 'installation/requirements.md'
  66. 'installation/ascend/installation.md': 'installation/requirements.md'
  67. 'installation/hygon/installation.md': 'installation/requirements.md'
  68. 'installation/mthreads/installation.md': 'installation/requirements.md'
  69. 'installation/iluvatar/installation.md': 'installation/requirements.md'
  70. 'installation/cambricon/installation.md': 'installation/requirements.md'
  71. 'installation/metax/installation.md': 'installation/requirements.md'
  72. - minify:
  73. minify_html: true
  74. minify_css: true
  75. minify_js: true
  76. cache_safe: true
  77. htmlmin_opts:
  78. remove_comments: true
  79. extra:
  80. generator: false
  81. version:
  82. provider: mike
  83. social:
  84. - icon: fontawesome/brands/github
  85. link: https://github.com/gpustack
  86. - icon: fontawesome/brands/docker
  87. link: https://hub.docker.com/r/gpustack
  88. - icon: fontawesome/brands/python
  89. link: https://pypi.org/project/gpustack/
  90. - icon: fontawesome/brands/discord
  91. link: https://discord.gg/VXYJzuaqwD
  92. - icon: fontawesome/brands/weixin
  93. link: https://github.com/gpustack/gpustack/blob/main/docs/assets/wechat-group-qrcode.jpg
  94. extra_javascript:
  95. - image-selector/assets/extra.js
  96. - https://unpkg.com/katex@0/dist/katex.min.js
  97. - javascripts/katex.js
  98. - https://unpkg.com/katex@0/dist/contrib/auto-render.min.js
  99. - https://unpkg.com/tablesort@5.3.0/dist/tablesort.min.js
  100. - javascripts/tablesort.js
  101. extra_css:
  102. - stylesheets/extra.css
  103. - https://unpkg.com/katex@0/dist/katex.min.css
  104. markdown_extensions:
  105. - abbr
  106. - def_list
  107. - attr_list
  108. - md_in_html
  109. - pymdownx.highlight:
  110. linenums: true
  111. line_spans: __span
  112. anchor_linenums: true
  113. pygments_lang_class: true
  114. - pymdownx.inlinehilite
  115. - pymdownx.superfences:
  116. custom_fences:
  117. - name: mermaid
  118. class: mermaid
  119. format: !!python/name:pymdownx.superfences.fence_code_format
  120. - pymdownx.tabbed:
  121. alternate_style: true
  122. - admonition
  123. - sane_lists
  124. - pymdownx.details
  125. - pymdownx.tilde
  126. - pymdownx.betterem
  127. - pymdownx.caret
  128. - pymdownx.tasklist:
  129. custom_checkbox: true
  130. - pymdownx.arithmatex:
  131. generic: true
  132. - pymdownx.emoji:
  133. emoji_generator: !!python/name:material.extensions.emoji.to_svg
  134. emoji_index: !!python/name:material.extensions.emoji.twemoji
  135. - pymdownx.critic
  136. - pymdownx.keys
  137. - pymdownx.snippets:
  138. auto_append:
  139. - docs/includes/abbreviations.md
  140. edit_uri: edit/main/docs/
  141. nav:
  142. - Home:
  143. - Overview: overview.md
  144. - Quickstart: quickstart.md
  145. - Installation:
  146. - Requirements: installation/requirements.md
  147. - Installation: installation/installation.md
  148. - Air-Gapped Installation: installation/air-gapped.md
  149. - Uninstallation: installation/uninstallation.md
  150. - Upgrade: upgrade.md
  151. - Migration: migration.md
  152. - User Guide:
  153. - Playground:
  154. - Playgound: user-guide/playground/index.md
  155. - Chat: user-guide/playground/chat.md
  156. - Image: user-guide/playground/image.md
  157. - Audio: user-guide/playground/audio.md
  158. - Embedding: user-guide/playground/embedding.md
  159. - Rerank: user-guide/playground/rerank.md
  160. - Model Catalog: user-guide/model-catalog.md
  161. - Model Deployment Management: user-guide/model-deployment-management.md
  162. - Model Route Management: user-guide/model-route-management.md
  163. - Model Provider Management: user-guide/model-provider-management.md
  164. - Inference Backend Management: user-guide/inference-backend-management.md
  165. - Built-in Inference Backends: user-guide/built-in-inference-backends.md
  166. - Compatibility Check: user-guide/compatibility-check.md
  167. - Model File management: user-guide/model-file-management.md
  168. - Cluster Management: user-guide/cluster-management.md
  169. - Cloud Credential Management: user-guide/cloud-credential-management.md
  170. - API Key Management: user-guide/api-key-management.md
  171. - User Management: user-guide/user-management.md
  172. - Single Sign-On (SSO) Authentication: user-guide/sso.md
  173. - Observability: user-guide/observability.md
  174. - Benchmarking: user-guide/benchmarking.md
  175. - Using Models:
  176. - Using Large Language Models: using-models/using-large-language-models.md
  177. - Using Vision Language Models: using-models/using-vision-language-models.md
  178. - Using Embedding Models: using-models/using-embedding-models.md
  179. - Using Reranker Models: using-models/using-reranker-models.md
  180. - Using Image Generation Models: using-models/using-image-generation-models.md
  181. - Using Audio Models: using-models/using-audio-models.md
  182. - Tutorials:
  183. - Running DeepSeek R1 671B with Distributed vLLM: tutorials/running-deepseek-r1-671b-with-distributed-vllm.md
  184. - Running DeepSeek R1 671B with Distributed Ascend Mindie: tutorials/running-deepseek-r1-671b-with-distributed-ascend-mindie.md
  185. - Inference On CPUs: tutorials/inference-on-cpus.md
  186. - Inference with Tool Calling: tutorials/inference-with-tool-calling.md
  187. - Using Custom Inference Backend: tutorials/using-custom-backends.md
  188. - Adding a GPU Cluster Using DigitalOcean: tutorials/adding-gpucluster-using-digitalocean.md
  189. - Adding a GPU Cluster Using Kubernetes: tutorials/adding-gpucluster-using-kubernetes.md
  190. - Managing Model Routes: tutorials/managing-model-routes.md
  191. - Integrations:
  192. - Inference APIs: integrations/inference-apis.md
  193. - Integrate with Claude Code: integrations/integrate-with-claude-code.md
  194. - Integrate with Dify: integrations/integrate-with-dify.md
  195. - Integrate with RAGFlow: integrations/integrate-with-ragflow.md
  196. - Integrate with CherryStudio: integrations/integrate-with-cherrystudio.md
  197. - Integrate with OpenClaw: integrations/integrate-with-openclaw.md
  198. - Integrate with n8n: integrations/integrate-with-n8n.md
  199. - Integrate with MaxKB: integrations/integrate-with-maxkb.md
  200. - Architecture: architecture.md
  201. - Scheduler: scheduler.md
  202. - Troubleshooting: troubleshooting.md
  203. - FAQ: faq.md
  204. - API Reference: api-reference.md
  205. - CLI Reference:
  206. - Start: cli-reference/start.md
  207. - Download Tools: cli-reference/download-tools.md
  208. - Reload Config: cli-reference/reload-config.md
  209. - List Images: cli-reference/list-images.md
  210. - Save Images: cli-reference/save-images.md
  211. - Load Images: cli-reference/load-images.md
  212. - Copy Images: cli-reference/copy-images.md
  213. - Environment Variables: environment-variables.md
  214. - Inference Performance Lab:
  215. - Overview: performance-lab/overview.md
  216. - Optimizing Throughput:
  217. - Qwen3.5-35B-A3B:
  218. - H200: performance-lab/qwen3.5-35b-a3b/h200.md
  219. - Qwen3.5-9B:
  220. - H100: performance-lab/qwen3.5-9b/h100.md
  221. - GLM-4.5-Air:
  222. - A100: performance-lab/glm-4.5-air/a100.md
  223. - H100: performance-lab/glm-4.5-air/h100.md
  224. - GLM-4.x:
  225. - A100: performance-lab/glm-4.x/a100.md
  226. - H100: performance-lab/glm-4.x/h100.md
  227. - H200: performance-lab/glm-4.x/h200.md
  228. - GPT-OSS-20B:
  229. - A100: performance-lab/gpt-oss-20b/a100.md
  230. - H100: performance-lab/gpt-oss-20b/h100.md
  231. - GPT-OSS-120B:
  232. - A100: performance-lab/gpt-oss-120b/a100.md
  233. - H100: performance-lab/gpt-oss-120b/h100.md
  234. - DeepSeek-R1:
  235. - H200: performance-lab/deepseek-r1/h200.md
  236. - DeepSeek-V3.2:
  237. - H200: performance-lab/deepseek-v3.2/h200.md
  238. - Qwen3-8B:
  239. - 910B: performance-lab/qwen3-8b/910b.md
  240. - Qwen3-14B:
  241. - A100: performance-lab/qwen3-14b/a100.md
  242. - H100: performance-lab/qwen3-14b/h100.md
  243. - Qwen3-32B:
  244. - A100: performance-lab/qwen3-32b/a100.md
  245. - H100: performance-lab/qwen3-32b/h100.md
  246. - Qwen3-30B-A3B:
  247. - 910B: performance-lab/qwen3-30b-a3b/910b.md
  248. - Qwen3-235B-A22B:
  249. - A100: performance-lab/qwen3-235b-a22b/a100.md
  250. - H100: performance-lab/qwen3-235b-a22b/h100.md
  251. - Optimizing Latency:
  252. - Qwen3.5-35B-A3B:
  253. - H200: performance-lab/qwen3.5-35b-a3b/h200-latency.md
  254. - Qwen3.5-9B:
  255. - H100: performance-lab/qwen3.5-9b/h100-latency.md
  256. - Qwen3-8B:
  257. - H100: performance-lab/qwen3-8b/h100-latency.md
  258. - References:
  259. - The Impact of Quantization on vLLM Inference Performance: performance-lab/references/the-impact-of-quantization-on-vllm-inference-performance.md
  260. - Evaluating LMCache Prefill Acceleration in vLLM: performance-lab/references/evaluating-lmcache-prefill-acceleration-in-vllm.md
  261. - Container Image Selector: image-selector/index.html