daemonset.jinja 7.8 KB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183184185186187188189190191192193194195196197198199200201202203204205206207208209210211212213214215216217218219220221222223224225226227228229230231232233234235236237238239240241242243244
  1. ---
  2. # DaemonSet
  3. apiVersion: apps/v1
  4. kind: DaemonSet
  5. metadata:
  6. labels:
  7. app: gpustack-worker
  8. name: gpustack-worker
  9. namespace: {{ config.namespace }}
  10. spec:
  11. revisionHistoryLimit: 10
  12. selector:
  13. matchLabels:
  14. app: gpustack-worker
  15. template:
  16. metadata:
  17. labels:
  18. app: gpustack-worker
  19. spec:
  20. containers:
  21. - env:
  22. - name: GPUSTACK_SERVER_URL
  23. value: {{ config.server_url }}
  24. - name: GPUSTACK_WORKER_NAME
  25. valueFrom:
  26. fieldRef:
  27. fieldPath: spec.nodeName
  28. - name: GPUSTACK_WORKER_IP
  29. valueFrom:
  30. fieldRef:
  31. apiVersion: v1
  32. fieldPath: status.hostIP
  33. - name: GPUSTACK_RUNTIME_DEPLOY
  34. value: "Kubernetes"
  35. - name: GPUSTACK_RUNTIME_DEPLOY_MIRRORED_DEPLOYMENT
  36. value: "true"
  37. - name: GPUSTACK_RUNTIME_DEPLOY_MIRRORED_NAME
  38. valueFrom:
  39. fieldRef:
  40. fieldPath: metadata.name
  41. - name: GPUSTACK_RUNTIME_KUBERNETES_NAMESPACE
  42. valueFrom:
  43. fieldRef:
  44. fieldPath: metadata.namespace
  45. - name: GPUSTACK_RUNTIME_KUBERNETES_NODE_NAME
  46. valueFrom:
  47. fieldRef:
  48. fieldPath: spec.nodeName
  49. {%- if config.runtime == 'hygon' %}
  50. - name: ROCM_PATH
  51. value: /opt/dtk
  52. - name: ROCM_SMI_LIB_PATH
  53. value: /opt/hyhal/lib
  54. {%- endif %}
  55. envFrom:
  56. - secretRef:
  57. name: registration-token
  58. optional: false
  59. {%- if config.args and config.args|length > 0 %}
  60. args:
  61. {%- for arg in config.args %}
  62. - {{ arg }}
  63. {%- endfor %}
  64. {%- endif %}
  65. image: {{ config.image }}
  66. imagePullPolicy: IfNotPresent
  67. name: gpustack-worker
  68. resources: {}
  69. securityContext:
  70. allowPrivilegeEscalation: true
  71. capabilities: {}
  72. privileged: true
  73. readOnlyRootFilesystem: false
  74. runAsNonRoot: false
  75. terminationMessagePath: /dev/termination-log
  76. terminationMessagePolicy: File
  77. volumeMounts:
  78. - name: cdi
  79. mountPath: /var/run/cdi
  80. - name: kubelet-device-plugins
  81. mountPath: /var/lib/kubelet/device-plugins
  82. {%- if config.runtime == 'amd' %}
  83. - name: gpustack-amd-driver
  84. mountPath: /opt/rocm
  85. readOnly: true
  86. {%- endif %}
  87. {%- if config.runtime == 'ascend' %}
  88. - name: gpustack-ascend-driver
  89. mountPath: /usr/local/Ascend/driver
  90. readOnly: true
  91. - name: gpustack-ascend-toolkit
  92. mountPath: /usr/local/Ascend/ascend-toolkit
  93. readOnly: true
  94. {%- endif %}
  95. {%- if config.runtime == 'hygon' %}
  96. - name: gpustack-hygon-driver
  97. mountPath: /opt/hyhal
  98. readOnly: true
  99. - name: gpustack-hygon-toolkit
  100. mountPath: /opt/dtk
  101. readOnly: true
  102. {%- endif %}
  103. {%- if config.runtime == 'metax' %}
  104. - name: gpustack-metax-driver
  105. mountPath: /opt/mxdriver
  106. readOnly: true
  107. - name: gpustack-metax-toolkit
  108. mountPath: /opt/maca
  109. readOnly: true
  110. {%- endif %}
  111. {%- if config.runtime == 'iluvatar' %}
  112. - name: gpustack-iluvatar-toolkit
  113. mountPath: /usr/local/corex
  114. readOnly: true
  115. {%- endif %}
  116. {%- if config.runtime == 'cambricon' %}
  117. - name: gpustack-cambricon-bin
  118. mountPath: /usr/bin/cnmon
  119. - name: gpustack-cambricon-toolkit
  120. mountPath: /usr/local/neuware
  121. readOnly: true
  122. {%- endif %}
  123. {%- if config.runtime == 'thead' %}
  124. - name: gpustack-thead-toolkit
  125. mountPath: /usr/local/PPU_SDK
  126. readOnly: true
  127. {%- endif %}
  128. {%- if config.k8s_volume_mounts %}
  129. {%- for vm in config.k8s_volume_mounts %}
  130. - name: {{ vm.name | to_yaml }}
  131. mountPath: {{ vm.mount_path | to_yaml }}
  132. readOnly: {{ vm.read_only | lower }}
  133. {%- endfor %}
  134. {%- endif %}
  135. ports:
  136. - name: api
  137. containerPort: 10150
  138. protocol: TCP
  139. - name: metrics
  140. containerPort: 10151
  141. protocol: TCP
  142. readinessProbe:
  143. httpGet:
  144. path: /readyz
  145. port: api
  146. initialDelaySeconds: 5
  147. periodSeconds: 10
  148. timeoutSeconds: 2
  149. failureThreshold: 3
  150. successThreshold: 1
  151. livenessProbe:
  152. httpGet:
  153. path: /healthz
  154. port: api
  155. initialDelaySeconds: 10
  156. periodSeconds: 10
  157. timeoutSeconds: 2
  158. failureThreshold: 5
  159. successThreshold: 1
  160. volumes:
  161. - name: cdi
  162. hostPath:
  163. path: /var/run/cdi
  164. type: DirectoryOrCreate
  165. - name: kubelet-device-plugins
  166. hostPath:
  167. path: /var/lib/kubelet/device-plugins
  168. type: DirectoryOrCreate
  169. {%- if config.runtime == 'ascend' %}
  170. - name: gpustack-ascend-driver
  171. hostPath:
  172. path: /usr/local/Ascend/driver
  173. type: DirectoryOrCreate
  174. - name: gpustack-ascend-toolkit
  175. hostPath:
  176. path: /usr/local/Ascend/ascend-toolkit
  177. type: DirectoryOrCreate
  178. {%- endif %}
  179. {%- if config.runtime == 'amd' %}
  180. - name: gpustack-amd-driver
  181. hostPath:
  182. path: /opt/rocm
  183. type: DirectoryOrCreate
  184. {%- endif %}
  185. {%- if config.runtime == 'hygon' %}
  186. - name: gpustack-hygon-driver
  187. hostPath:
  188. path: /opt/hyhal
  189. type: DirectoryOrCreate
  190. - name: gpustack-hygon-toolkit
  191. hostPath:
  192. path: /opt/dtk
  193. type: DirectoryOrCreate
  194. {%- endif %}
  195. {%- if config.runtime == 'metax' %}
  196. - name: gpustack-metax-driver
  197. hostPath:
  198. path: /opt/mxdriver
  199. type: DirectoryOrCreate
  200. - name: gpustack-metax-toolkit
  201. hostPath:
  202. path: /opt/maca
  203. type: DirectoryOrCreate
  204. {%- endif %}
  205. {%- if config.runtime == 'iluvatar' %}
  206. - name: gpustack-iluvatar-toolkit
  207. hostPath:
  208. path: /usr/local/corex
  209. type: DirectoryOrCreate
  210. {%- endif %}
  211. {%- if config.runtime == 'cambricon' %}
  212. - name: gpustack-cambricon-bin
  213. hostPath:
  214. path: /usr/bin/cnmon
  215. type: FileOrCreate
  216. - name: gpustack-cambricon-toolkit
  217. hostPath:
  218. path: /usr/local/neuware
  219. type: DirectoryOrCreate
  220. {%- endif %}
  221. {%- if config.runtime == 'thead' %}
  222. - name: gpustack-thead-toolkit
  223. hostPath:
  224. path: /usr/local/PPU_SDK
  225. type: DirectoryOrCreate
  226. {%- endif %}
  227. {%- if config.k8s_volume_mounts %}
  228. {%- for vm in config.k8s_volume_mounts %}
  229. - name: {{ vm.name | to_yaml }}
  230. {{ vm.volume_source | to_yaml | indent(10) }}
  231. {%- endfor %}
  232. {%- endif %}
  233. hostNetwork: true
  234. hostIPC: true
  235. dnsPolicy: ClusterFirstWithHostNet
  236. serviceAccountName: gpustack-worker
  237. {%- if config.runtime in ['nvidia', 'mthreads'] %}
  238. runtimeClassName: {{ config.runtime }}
  239. {%- endif %}
  240. updateStrategy:
  241. rollingUpdate:
  242. maxSurge: 0
  243. maxUnavailable: 1
  244. type: RollingUpdate