Dockerfile 34 KB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183184185186187188189190191192193194195196197198199200201202203204205206207208209210211212213214215216217218219220221222223224225226227228229230231232233234235236237238239240241242243244245246247248249250251252253254255256257258259260261262263264265266267268269270271272273274275276277278279280281282283284285286287288289290291292293294295296297298299300301302303304305306307308309310311312313314315316317318319320321322323324325326327328329330331332333334335336337338339340341342343344345346347348349350351352353354355356357358359360361362363364365366367368369370371372373374375376377378379380381382383384385386387388389390391392393394395396397398399400401402403404405406407408409410411412413414415416417418419420421422423424425426427428429430431432433434435436437438439440441442443444445446447448449450451452453454455456457458459460461462463464465466467468469470471472473474475476477478479480481482483484485486487488489490491492493494495496497498499500501502503504505506507508509510511512513514515516517518519520521522523524525526527528529530531532533534535536537538539540541542543544545546547548549550551552553554555556557558559560561562563564565566567568569570571572573574575576577578579580581582583584585586587588589590591592593594595596597598599600601602603604605606607608609610611612613614615616617618619620621622623624625626627628629630631632633634635636637638639640641642643644645646647648649650651652653654655656657658659660661662663664665666667668669670671672673674675676677678679680681682683684685686687688689690691692693694695696697698699700701702703704705706707708709710711712713714715716717718719720721722723724725726727728729730731732733734735736737738739740741742743744745746747748749750751752753754755
  1. # Package logic:
  2. # 1. base target:
  3. # - Install tools.
  4. # - Upgrade GCC if needed.
  5. # - Install C buildkit.
  6. # - Upgrade Python if needed.
  7. # - Install Python buildkit.
  8. # - Install Platform toolkit.
  9. # - Install S6-overlay.
  10. # 2. gpustack target.
  11. # - Install PostgreSQL.
  12. # - Install Higress standalone components.
  13. # - Install gpustack package from the mounted source code.
  14. # - Setup entrypoint to gpustack command.
  15. # Argument usage:
  16. # - PYTHON_VERSION: Version of Python to use.
  17. # - GPUSTACK_BASE_IMAGE: Base image for the gpustack stage.
  18. # - GPUSTACK_RUNTIME_ROCM_VERSION: Version of ROCm detection library for gpustack-runtime, update this if project dependencies has changed.
  19. # - GPUSTACK_RUNTIME_DOCKER_MIRRORED_NAME_FILTER_LABELS: Semicolon-separated list of labels to filter mirrored images when deploying mirrored deployment.
  20. # - HIGRESS_VERSION: Version of Higress to use.
  21. # - HIGRESS_APISERVER_VERSION: Version of Higress API server to use.
  22. ARG PYTHON_VERSION=3.11
  23. ARG GPUSTACK_BASE_IMAGE=base
  24. ARG GPUSTACK_RUNTIME_ROCM_VERSION=7.0.2
  25. ARG GPUSTACK_RUNTIME_DOCKER_MIRRORED_NAME_FILTER_LABELS
  26. ARG HIGRESS_VERSION=2.1.9
  27. ARG HIGRESS_APISERVER_VERSION=0.0.26
  28. ARG PROMETHEUS_VERSION=3.5.1
  29. ARG GRAFANA_VERSION=12.2.4
  30. # Stage Base
  31. #
  32. # Example build command:
  33. # docker build --tag=gpustack/gpustack:base --file=pack/Dockerfile --target=base --progress=plain .
  34. #
  35. FROM gpustack/mirrored-higress-api-server:${HIGRESS_APISERVER_VERSION} AS apiserver
  36. FROM gpustack/mirrored-higress-higress:${HIGRESS_VERSION} AS controller
  37. FROM gpustack/mirrored-higress-pilot:${HIGRESS_VERSION} AS pilot
  38. FROM gpustack/mirrored-higress-gateway:${HIGRESS_VERSION} AS gateway
  39. FROM ubuntu:24.04@sha256:d1e2e92c075e5ca139d51a140fff46f84315c0fdce203eab2807c7e495eff4f9 AS base
  40. SHELL ["/bin/bash", "-eo", "pipefail", "-c"]
  41. ARG TARGETPLATFORM
  42. ARG TARGETOS
  43. ARG TARGETARCH
  44. ## Install Tools
  45. ENV DEBIAN_FRONTEND=noninteractive \
  46. LANG='en_US.UTF-8' \
  47. LANGUAGE='en_US:en' \
  48. LC_ALL='en_US.UTF-8'
  49. RUN <<EOF
  50. # Tools
  51. # Use Tsinghua mirrors for Ubuntu
  52. sed -i 's|//archive.ubuntu.com|//mirrors.tuna.tsinghua.edu.cn|g' /etc/apt/sources.list
  53. sed -i 's|//security.ubuntu.com|//mirrors.tuna.tsinghua.edu.cn|g' /etc/apt/sources.list
  54. # Refresh
  55. apt-get update -y && apt-get install -y --no-install-recommends \
  56. software-properties-common apt-transport-https \
  57. ca-certificates gnupg2 lsb-release gnupg-agent \
  58. && apt-get update -y \
  59. && add-apt-repository -y ppa:ubuntu-toolchain-r/test \
  60. && sed -i 's|ppa.launchpadcontent.net|launchpad.proxy.ustclug.org|g' /etc/apt/sources.list.d/*.list \
  61. && apt-get update -y
  62. # Install
  63. apt-get install -y --no-install-recommends \
  64. ca-certificates build-essential binutils bash openssl \
  65. curl wget aria2 \
  66. git git-lfs \
  67. unzip xz-utils \
  68. tzdata locales \
  69. iproute2 iputils-ping ifstat net-tools dnsutils pciutils ipmitool \
  70. procps sysstat htop \
  71. vim jq bc tree \
  72. logrotate cron netcat-openbsd
  73. # Update locale
  74. localedef -i en_US -c -f UTF-8 -A /usr/share/locale/locale.alias en_US.UTF-8
  75. # Update timezone
  76. rm -f /etc/localtime \
  77. && ln -sf /usr/share/zoneinfo/Asia/Shanghai /etc/localtime \
  78. && echo "Asia/Shanghai" > /etc/timezone \
  79. && dpkg-reconfigure --frontend noninteractive tzdata
  80. # Cleanup
  81. rm -rf /var/tmp/* \
  82. && rm -rf /tmp/* \
  83. && rm -rf /var/cache/apt
  84. EOF
  85. ## Upgrade GCC if needed
  86. RUN <<EOF
  87. # GCC
  88. # Upgrade GCC if the Ubuntu version is lower than 21.04.
  89. source /etc/os-release
  90. if (( $(echo "${VERSION_ID} >= 21.04" | bc -l) )); then
  91. echo "Skipping GCC upgrade for ${VERSION_ID}..."
  92. exit 0
  93. fi
  94. # Install
  95. apt-get install -y --no-install-recommends \
  96. gcc-11 g++-11 gfortran-11 gfortran
  97. # Update alternatives
  98. if [[ -f /etc/alternatives/gcov-dump ]]; then update-alternatives --remove-all gcov-dump; fi; update-alternatives --install /usr/bin/gcov-dump gcov-dump /usr/bin/gcov-dump-11 10
  99. if [[ -f /etc/alternatives/lto-dump ]]; then update-alternatives --remove-all lto-dump; fi; update-alternatives --install /usr/bin/lto-dump lto-dump /usr/bin/lto-dump-11 10
  100. if [[ -f /etc/alternatives/gcov ]]; then update-alternatives --remove-all gcov; fi; update-alternatives --install /usr/bin/gcov gcov /usr/bin/gcov-11 10
  101. if [[ -f /etc/alternatives/gcc ]]; then update-alternatives --remove-all gcc; fi; update-alternatives --install /usr/bin/gcc gcc /usr/bin/gcc-11 10
  102. if [[ -f /etc/alternatives/gcc-nm ]]; then update-alternatives --remove-all gcc-nm; fi; update-alternatives --install /usr/bin/gcc-nm gcc-nm /usr/bin/gcc-nm-11 10
  103. if [[ -f /etc/alternatives/cpp ]]; then update-alternatives --remove-all cpp; fi; update-alternatives --install /usr/bin/cpp cpp /usr/bin/cpp-11 10
  104. if [[ -f /etc/alternatives/g++ ]]; then update-alternatives --remove-all g++; fi; update-alternatives --install /usr/bin/g++ g++ /usr/bin/g++-11 10
  105. if [[ -f /etc/alternatives/gcc-ar ]]; then update-alternatives --remove-all gcc-ar; fi; update-alternatives --install /usr/bin/gcc-ar gcc-ar /usr/bin/gcc-ar-11 10
  106. if [[ -f /etc/alternatives/gcov-tool ]]; then update-alternatives --remove-all gcov-tool; fi; update-alternatives --install /usr/bin/gcov-tool gcov-tool /usr/bin/gcov-tool-11 10
  107. if [[ -f /etc/alternatives/gcc-ranlib ]]; then update-alternatives --remove-all gcc-ranlib; fi; update-alternatives --install /usr/bin/gcc-ranlib gcc-ranlib /usr/bin/gcc-ranlib-11 10
  108. if [[ -f /etc/alternatives/gfortran ]]; then update-alternatives --remove-all gfortran; fi; update-alternatives --install /usr/bin/gfortran gfortran /usr/bin/gfortran-11 10
  109. # Cleanup
  110. rm -rf /var/tmp/* \
  111. && rm -rf /tmp/* \
  112. && rm -rf /var/cache/apt
  113. EOF
  114. ## Install C buildkit
  115. RUN <<EOF
  116. # C buildkit
  117. # Install
  118. apt-get install -y --no-install-recommends \
  119. make ninja-build pkg-config ccache
  120. curl --retry 3 --retry-connrefused -fL "https://gh-proxy.com/https://github.com/Kitware/CMake/releases/download/v3.31.7/cmake-3.31.7-linux-$(uname -m).tar.gz" | tar -zx -C /usr --strip-components 1
  121. # Install dependencies
  122. apt-get install -y --no-install-recommends \
  123. openssl libssl-dev \
  124. zlib1g zlib1g-dev libbz2-dev libffi-dev \
  125. lzma lzma-dev uuid-dev liblzma-dev \
  126. ffmpeg libjpeg-dev libpng-dev libtiff-dev libwebp-dev \
  127. libsqlite3-dev \
  128. libxml2 libxslt1-dev \
  129. libnuma1 libnuma-dev \
  130. libgpgme-dev libassuan-dev libbtrfs-dev libdevmapper-dev \
  131. libjemalloc-dev
  132. # Cleanup
  133. rm -rf /var/tmp/* \
  134. && rm -rf /tmp/* \
  135. && rm -rf /var/cache/apt
  136. EOF
  137. ## Upgrade Python if needed
  138. ARG PYTHON_VERSION
  139. ENV PYTHON_VERSION=${PYTHON_VERSION}
  140. RUN <<EOF
  141. # Python
  142. if (( $(echo "$(python3 --version | cut -d' ' -f2 | cut -d'.' -f1,2) == ${PYTHON_VERSION}" | bc -l) )); then
  143. echo "Skipping Python upgrade for ${PYTHON_VERSION}..."
  144. if [[ -z "$(ldconfig -v 2>/dev/null | grep libpython${PYTHON_VERSION})" ]]; then
  145. PYTHON_LIB_PREFIX=$(python3 -c "import sys; print(sys.base_prefix);")
  146. echo "${PYTHON_LIB_PREFIX}/lib" >> /etc/ld.so.conf.d/python3.conf
  147. echo "${PYTHON_LIB_PREFIX}/lib64" >> /etc/ld.so.conf.d/python3.conf
  148. ldconfig -v
  149. fi
  150. exit 0
  151. fi
  152. # Add deadsnakes PPA for Python versions
  153. for i in 1 2 3; do
  154. add-apt-repository -y ppa:deadsnakes/ppa && break || { echo "Attempt $i failed, retrying in 5s..."; sleep 5; }
  155. done
  156. sed -i 's|ppa.launchpadcontent.net|launchpad.proxy.ustclug.org|g' /etc/apt/sources.list.d/*.list
  157. apt-get update -y
  158. # Install
  159. apt-get install -y --no-install-recommends \
  160. python${PYTHON_VERSION} \
  161. python${PYTHON_VERSION}-dev \
  162. python${PYTHON_VERSION}-venv \
  163. python${PYTHON_VERSION}-lib2to3 \
  164. python${PYTHON_VERSION}-gdbm \
  165. python${PYTHON_VERSION}-tk
  166. if (( $(echo "${PYTHON_VERSION} <= 3.11" | bc -l) )); then
  167. apt-get install -y --no-install-recommends \
  168. python${PYTHON_VERSION}-distutils
  169. fi
  170. # Update alternatives
  171. if [[ -f /etc/alternatives/python3 ]]; then update-alternatives --remove-all python3; fi; update-alternatives --install /usr/bin/python3 python3 /usr/bin/python${PYTHON_VERSION} 1
  172. if [[ -f /etc/alternatives/python ]]; then update-alternatives --remove-all python; fi; update-alternatives --install /usr/bin/python python /usr/bin/python${PYTHON_VERSION} 1
  173. curl -sS "https://gh-proxy.com/https://raw.githubusercontent.com/pypa/get-pip/main/get-pip.py" | python${PYTHON_VERSION}
  174. if [[ -f /etc/alternatives/2to3 ]]; then update-alternatives --remove-all 2to3; fi; update-alternatives --install /usr/bin/2to3 2to3 /usr/bin/2to3${PYTHON_VERSION} 1 || true
  175. if [[ -f /etc/alternatives/pydoc3 ]]; then update-alternatives --remove-all pydoc3; fi; update-alternatives --install /usr/bin/pydoc3 pydoc3 /usr/bin/pydoc${PYTHON_VERSION} 1 || true
  176. if [[ -f /etc/alternatives/idle3 ]]; then update-alternatives --remove-all idle3; fi; update-alternatives --install /usr/bin/idle3 idle3 /usr/bin/idle${PYTHON_VERSION} 1 || true
  177. if [[ -f /etc/alternatives/python3-config ]]; then update-alternatives --remove-all python3-config; fi; update-alternatives --install /usr/bin/python3-config python3-config /usr/bin/python${PYTHON_VERSION}-config 1 || true
  178. # Cleanup
  179. rm -rf /var/tmp/* \
  180. && rm -rf /tmp/* \
  181. && rm -rf /var/cache/apt
  182. EOF
  183. ## Install Python buildkit
  184. ENV PIP_NO_CACHE_DIR=1 \
  185. PIP_DISABLE_PIP_VERSION_CHECK=1 \
  186. PIP_ROOT_USER_ACTION=ignore \
  187. PIPX_HOME=/root/.local/share/pipx \
  188. PIPX_LOCAL_VENVS=/root/.local/share/pipx/venvs \
  189. POETRY_NO_CACHE=1 \
  190. UV_NO_CACHE=1 \
  191. UV_HTTP_TIMEOUT=500 \
  192. UV_INDEX_STRATEGY="unsafe-best-match"
  193. RUN <<EOF
  194. # Buildkit
  195. cat <<EOT >/tmp/requirements.txt
  196. build
  197. cmake<4
  198. ninja<1.11
  199. setuptools<80
  200. setuptools-scm
  201. packaging<25
  202. wheel
  203. pybind11<3
  204. Cython
  205. psutil
  206. pipx
  207. uv
  208. yq
  209. hatchling
  210. py-spy
  211. poetry
  212. EOT
  213. pip install -i https://mirrors.tuna.tsinghua.edu.cn/pypi/web/simple -r /tmp/requirements.txt
  214. # Cleanup
  215. rm -rf /var/tmp/* \
  216. && rm -rf /tmp/*
  217. EOF
  218. ## Install s6-overlay
  219. ARG S6_OVERLAY_VERSION=3.2.1.0
  220. RUN set -eux; \
  221. case "${TARGETARCH}" in \
  222. amd64) S6_ARCH="x86_64" ;; \
  223. arm64) S6_ARCH="aarch64" ;; \
  224. arm/v7) S6_ARCH="armhf" ;; \
  225. arm/v6) S6_ARCH="arm" ;; \
  226. *) \
  227. echo >&2 "⚠️ Warning: Unknown TARGETARCH='${TARGETARCH}', defaulting to x86_64"; \
  228. S6_ARCH="x86_64"; \
  229. ;; \
  230. esac; \
  231. echo "Installing s6-overlay ${S6_OVERLAY_VERSION} for arch: ${S6_ARCH} (from TARGETARCH=${TARGETARCH})"; \
  232. base_url="https://gh-proxy.com/https://github.com/just-containers/s6-overlay/releases/download/v${S6_OVERLAY_VERSION}"; \
  233. for pkg in noarch ${S6_ARCH}; do \
  234. wget -q -O "/tmp/s6-overlay-${pkg}.tar.xz" "${base_url}/s6-overlay-${pkg}.tar.xz"; \
  235. done \
  236. && echo "📦 Extracting s6-overlay ..." \
  237. && tar -C / -Jxpf /tmp/s6-overlay-noarch.tar.xz \
  238. && tar -C / -Jxpf /tmp/s6-overlay-${S6_ARCH}.tar.xz \
  239. && rm -f /tmp/s6-overlay-*.tar.xz \
  240. && echo "Installed s6-overlay ${S6_OVERLAY_VERSION} successfully."
  241. ENV S6_KEEP_ENV=1 \
  242. S6_BEHAVIOUR_IF_STAGE2_FAILS=2 \
  243. S6_SERVICES_GRACETIME=3000 \
  244. S6_KILL_GRACETIME=3000 \
  245. S6_VERBOSITY=1 \
  246. S6_CMD_WAIT_FOR_SERVICES=1
  247. #
  248. # Stage GPUStack
  249. #
  250. # Example build command:
  251. # docker build --tag=gpustack/gpustack:main --file=pack/Dockerfile --progress=plain .
  252. #
  253. # Vendor ROCm libraries from ROCm base image,
  254. # now only linux/amd64 is supported.
  255. # Must build on linux/amd64 platform.
  256. FROM --platform=${BUILDPLATFORM} rocm/dev-ubuntu-22.04:${GPUSTACK_RUNTIME_ROCM_VERSION} AS rocm-base
  257. FROM ${GPUSTACK_BASE_IMAGE} AS gpustack
  258. SHELL ["/bin/bash", "-eo", "pipefail", "-c"]
  259. ARG TARGETPLATFORM
  260. ARG TARGETOS
  261. ARG TARGETARCH
  262. ## Configure data volume
  263. VOLUME /var/lib/gpustack
  264. ## Install PostgreSQL
  265. ENV PGCONFIG_FILE=/etc/postgresql/main/postgresql.conf \
  266. POSTGRES_DB=gpustack
  267. RUN set -eux; \
  268. groupadd -r postgres --gid=9999; \
  269. useradd -r -g postgres --uid=9999 --home-dir=/var/lib/postgresql --shell=/bin/bash postgres; \
  270. mkdir -p /var/lib/postgresql; \
  271. chown -R postgres:postgres /var/lib/postgresql
  272. RUN <<EOF
  273. set -eux
  274. # Use Tsinghua mirror for PostgreSQL APT repository
  275. wget -O /tmp/ACCC4CF8.asc https://mirrors.tuna.tsinghua.edu.cn/postgresql/repos/apt/ACCC4CF8.asc \
  276. && gpg --dearmor /tmp/ACCC4CF8.asc \
  277. && mv /tmp/ACCC4CF8.asc.gpg /usr/share/keyrings/postgresql-archive-keyring.gpg \
  278. && echo "deb [signed-by=/usr/share/keyrings/postgresql-archive-keyring.gpg] https://mirrors.tuna.tsinghua.edu.cn/postgresql/repos/apt $(lsb_release -cs)-pgdg main" > /etc/apt/sources.list.d/pgdg.list
  279. # Install
  280. apt-get update -y && apt-get install -y --no-install-recommends \
  281. postgresql-17 \
  282. gosu
  283. # Create symlinks for PostgreSQL 17 to simplify usage
  284. ln -s /usr/lib/postgresql/17/bin /usr/lib/postgresql/bin \
  285. && ln -s /etc/postgresql/17/main /etc/postgresql/main \
  286. && ln -s /var/lib/postgresql/17/main /var/lib/postgresql/main \
  287. && ls -1 /usr/lib/postgresql/bin/ | xargs -I @ ln -sf /usr/lib/postgresql/bin/@ /usr/bin/@
  288. gosu postgres echo "listen_addresses='*'" >> "$PGCONFIG_FILE" \
  289. && gosu postgres echo "local all postgres peer" > /etc/postgresql/main/pg_hba.conf \
  290. && gosu postgres echo "host all root 127.0.0.1/32 trust" >> /etc/postgresql/main/pg_hba.conf \
  291. && gosu postgres echo "host all root ::1/128 trust" >> /etc/postgresql/main/pg_hba.conf \
  292. && gosu postgres echo "host all all 0.0.0.0/0 scram-sha-256" >> /etc/postgresql/main/pg_hba.conf
  293. gosu postgres sed -i "s/^data_directory/#data_directory/" "$PGCONFIG_FILE" \
  294. && gosu postgres sed -i "s/^hba_file/#hba_file/" "$PGCONFIG_FILE" \
  295. && gosu postgres sed -i "s/^#log_destination/log_destination/" "$PGCONFIG_FILE" \
  296. && gosu postgres sed -i "s/^#log_min_messages = warning/log_min_messages = info/" "$PGCONFIG_FILE" \
  297. && gosu postgres sed -i "s/^#logging_collector = off/logging_collector = on/" "$PGCONFIG_FILE" \
  298. && gosu postgres sed -i "s/^#log_filename/log_filename/" "$PGCONFIG_FILE" \
  299. && gosu postgres sed -i "s/^#log_rotation_size/log_rotation_size/" "$PGCONFIG_FILE"
  300. # Cleanup
  301. rm -rf /var/tmp/* \
  302. && rm -rf /tmp/* \
  303. && rm -rf /var/cache/apt
  304. EOF
  305. ## Install Higress standalone components
  306. RUN --mount=type=bind,from=apiserver,source=/apiserver,dst=/mnt/apiserver,rw \
  307. --mount=type=bind,from=controller,source=/usr/local/bin/higress,dst=/mnt/higress,rw \
  308. --mount=type=bind,from=pilot,source=/usr/local/bin,dst=/mnt/pilot,rw \
  309. --mount=type=bind,from=gateway,source=/,dst=/mnt/gateway,rw <<EOF
  310. # Prepare Higress standalone components
  311. set -eux;
  312. # Install API server
  313. cp /mnt/apiserver /usr/local/bin/apiserver;
  314. # Install controller
  315. cp /mnt/higress /usr/local/bin/higress;
  316. # Install pilot
  317. cp /mnt/pilot/pilot-discovery /usr/local/bin/pilot-discovery;
  318. cp /mnt/pilot/higress-pilot-start.sh /usr/local/bin/higress-pilot-start.sh;
  319. # Install gateway
  320. mkdir -p /var/lib/istio/envoy/
  321. cp /mnt/gateway/var/lib/istio/envoy/*.json /var/lib/istio/envoy/;
  322. cp /mnt/gateway/var/lib/istio/envoy/*.so /var/lib/istio/envoy/;
  323. cp /mnt/gateway/usr/local/bin/pilot-agent /usr/local/bin/pilot-agent;
  324. cp /mnt/gateway/usr/local/bin/envoy /usr/local/bin/envoy;
  325. cp /mnt/gateway/usr/local/bin/supercronic-linux-${TARGETARCH} /usr/local/bin/;
  326. ln -s supercronic-linux-${TARGETARCH} supercronic && mv supercronic /usr/local/bin/;
  327. EOF
  328. # Initialize configurations
  329. COPY pack/rootfs/ /
  330. COPY docker-compose/grafana/grafana_dashboards/ /etc/dashboards/
  331. ## END Install Higress standalone components
  332. ## Install Skopeo
  333. ARG GOPROXY="https://goproxy.cn,direct"
  334. RUN <<EOF
  335. # Skopeo
  336. # Install Go
  337. curl --retry 3 --retry-connrefused -fL "https://golang.google.cn/dl/go1.23.3.${TARGETOS}-${TARGETARCH}.tar.gz" | tar -zx -C /usr/local
  338. export PATH="/usr/local/go/bin:${PATH}"
  339. export GOPROXY="${GOPROXY}"
  340. export
  341. # Download
  342. git -C /tmp clone --recursive --shallow-submodules \
  343. --depth 1 --branch v1.20.0 --single-branch \
  344. https://gh-proxy.com/https://github.com/containers/skopeo.git skopeo
  345. # Build and install
  346. pushd /tmp/skopeo \
  347. && sed -i "/export GOPROXY=.*/d" Makefile \
  348. && make vendor \
  349. && DISABLE_DOCS=1 PREFIX=/usr make install-binary
  350. # Configure Skopeo to allow insecure registries.
  351. mkdir -p /etc/containers
  352. cat<<EOT > /etc/containers/policy.json
  353. {
  354. "default": [
  355. {
  356. "type": "insecureAcceptAnything"
  357. }
  358. ]
  359. }
  360. EOT
  361. # Review
  362. skopeo --version
  363. # Cleanup go
  364. go clean -cache -modcache -testcache \
  365. && rm -rf /usr/local/go \
  366. && rm -rf /root/.cache/go-build
  367. # Cleanup
  368. rm -rf /var/tmp/* \
  369. && rm -rf /tmp/* \
  370. && rm -rf /var/cache/apt
  371. EOF
  372. ## Install Prometheus and Grafana
  373. ARG PROMETHEUS_VERSION
  374. ARG GRAFANA_VERSION
  375. RUN <<EOF
  376. set -eux
  377. apt-get update -y && apt-get install -y --no-install-recommends \
  378. fontconfig
  379. case "${TARGETARCH}" in
  380. amd64)
  381. PROM_ARCH="amd64"
  382. GRAFANA_ARCH="amd64"
  383. ;;
  384. arm64)
  385. PROM_ARCH="arm64"
  386. GRAFANA_ARCH="arm64"
  387. ;;
  388. *)
  389. echo "Unsupported TARGETARCH: ${TARGETARCH}"
  390. exit 1
  391. ;;
  392. esac
  393. curl --retry 3 --retry-connrefused -fL \
  394. "https://gh-proxy.com/https://github.com/prometheus/prometheus/releases/download/v${PROMETHEUS_VERSION}/prometheus-${PROMETHEUS_VERSION}.linux-${PROM_ARCH}.tar.gz" \
  395. | tar -zx -C /opt
  396. mv "/opt/prometheus-${PROMETHEUS_VERSION}.linux-${PROM_ARCH}" /opt/prometheus
  397. ln -s /opt/prometheus/prometheus /usr/local/bin/prometheus
  398. ln -s /opt/prometheus/promtool /usr/local/bin/promtool
  399. curl --retry 3 --retry-connrefused -fL \
  400. "https://mirrors.tuna.tsinghua.edu.cn/grafana/oss/grafana-${GRAFANA_VERSION}.linux-${GRAFANA_ARCH}.tar.gz" \
  401. | tar -zx -C /opt
  402. mv "/opt/grafana-${GRAFANA_VERSION}" /opt/grafana
  403. ln -s /opt/grafana/bin/grafana-server /usr/local/bin/grafana-server
  404. ln -s /opt/grafana/bin/grafana-cli /usr/local/bin/grafana-cli
  405. mkdir -p /etc/grafana
  406. cp /opt/grafana/conf/sample.ini /etc/grafana/grafana.ini.sample
  407. rm -rf /var/tmp/* \
  408. && rm -rf /tmp/* \
  409. && rm -rf /var/cache/apt
  410. EOF
  411. ## Install GPUStack
  412. RUN --mount=type=cache,target=/root/.cache \
  413. --mount=type=bind,target=/workspace/gpustack,rw <<EOF
  414. # Install GPUStack
  415. export POETRY_NO_CACHE=0
  416. export UV_NO_CACHE=0
  417. export UV_SYSTEM_PYTHON=1
  418. export UV_LINK_MODE=copy
  419. # Build GPUStack
  420. cd /workspace/gpustack \
  421. && git config --global --add safe.directory /workspace/gpustack \
  422. && make build
  423. # Install GPUStack.
  424. # FIXME: There is no linux/arm64 vLLM prebuilt wheel,
  425. # so we only install the all wheel for linux/amd64.
  426. if [ "${TARGETARCH}" == "amd64" ]; then
  427. WHEEL_PACKAGE="$(ls /workspace/gpustack/dist/*.whl)[all]";
  428. else
  429. WHEEL_PACKAGE="$(ls /workspace/gpustack/dist/*.whl)[audio]";
  430. fi
  431. uv pip install --no-build-isolation --extra-index-url https://download.pytorch.org/whl/cpu/ --index-url https://mirrors.tuna.tsinghua.edu.cn/pypi/web/simple \
  432. ${WHEEL_PACKAGE}
  433. # Download tools
  434. gpustack download-tools
  435. tree -hs "$(pip show gpustack | grep Location: | head -n 1 | cut -d" " -f 2)/gpustack/third_party"
  436. # Set up environment
  437. mkdir -p /var/lib/gpustack \
  438. && chmod -R 0755 /var/lib/gpustack
  439. # Review
  440. uv pip tree \
  441. --package gpustack
  442. gpustack version
  443. # Try to update PCI IDs
  444. if ! update-pciids; then
  445. curl -o /usr/share/misc/pci.ids https://mirrors.tuna.tsinghua.edu.cn/misc/pci.ids || true
  446. fi
  447. # Cleanup
  448. rm -rf /var/tmp/* \
  449. && rm -rf /tmp/* \
  450. && rm -rf /workspace/gpustack/dist
  451. EOF
  452. ## Entrypoint
  453. ## Active all AMD devices detection,
  454. ## works with (default) ROCm container runtime and privileged mode.
  455. ## See https://rocm.docs.amd.com/projects/amdsmi/en/latest/reference/amdsmi-py-api.html.
  456. ## Runs:
  457. ## - With container runtime installed:
  458. ## + If installed AMD contaienr runtime as default runtime, try with:
  459. ## docker run --rm -it -v /var/run/docker.sock:/var/run/docker.sock --privileged ...
  460. ## + If there are mulitple container runtimes installed, try with:
  461. ## docker run --rm -it -v /var/run/docker.sock:/var/run/docker.sock --privileged --runtime amd ...
  462. ## + If failed to detect devices' name, try with:
  463. ## docker run --rm -it -v /var/run/docker.sock:/var/run/docker.sock --privileged -v /usr/share:/usr/share:ro ...
  464. ## + If want to detect the correct host ROCm version, try with:
  465. ## docker run --rm -it -v /var/run/docker.sock:/var/run/docker.sock --privileged -v /opt/rocm:/opt/rocm:ro ...
  466. ## + Disallowing privileged, try with:
  467. ## docker run --rm -it -v /var/run/docker.sock:/var/run/docker.sock ...
  468. ## - Without container runtime installed:
  469. ## + Allowing privileged, try with:
  470. ## docker run --rm -it -v /var/run/docker.sock:/var/run/docker.sock --privileged -v /opt/rocm:/opt/rocm:ro ...
  471. ## + Disallowing privileged, try with:
  472. ## docker run --rm -it -v /var/run/docker.sock:/var/run/docker.sock --security-opt seccomp=unconfined -v /dev:/dev:ro --group-add video -v /opt/rocm:/opt/rocm:ro ...
  473. RUN --mount=type=bind,from=rocm-base,source=/opt/rocm/share,target=/opt/rocm/share,rw <<EOF
  474. # Reinstall amd-smi
  475. export UV_SYSTEM_PYTHON=1
  476. export UV_PRERELEASE=allow
  477. uv pip install --no-build-isolation \
  478. /opt/rocm/share/amd_smi
  479. uv pip tree
  480. # Hack to avoid: Fail to open libdrm_amdgpu.so: libdrm_amdgpu.so: cannot open shared object file: No such file or directory
  481. TARGET_DIR="/usr/lib/$(uname -m)-linux-gnu"
  482. TARGET_LIB="libdrm_amdgpu.so.1"
  483. TARGET_LINK="libdrm_amdgpu.so"
  484. TARGET_LIB_EXISTED="true"
  485. if [[ ! -e "${TARGET_DIR}/${TARGET_LIB}" ]]; then
  486. TARGET_LIB_EXISTED="false"
  487. touch "${TARGET_DIR}/${TARGET_LIB}"
  488. fi
  489. pushd "${TARGET_DIR}" \
  490. && ln -sf "${TARGET_LIB}" "${TARGET_LINK}"
  491. if [[ "${TARGET_LIB_EXISTED}" == "false" ]]; then
  492. rm -f "${TARGET_DIR}/${TARGET_LIB}"
  493. fi
  494. EOF
  495. ENV AMD_VISIBLE_DEVICES="all" \
  496. GPUSTACK_RUNTIME_DEPLOY_MIRRORED_DEPLOYMENT_IGNORE_VOLUMES="/opt/rocm"
  497. ## Active all Ascend devices detection,
  498. ## works with (default) Ascend container runtime and privileged mode.
  499. ## See https://gitcode.com/Ascend/mind-cluster/blob/master/component/ascend-common/devmanager/dcmi/dcmi_interface_api.h.
  500. ## Runs:
  501. ## - With container runtime installed:
  502. ## + If installed Ascend container runtime as default runtime, try with:
  503. ## docker run --rm -it -v /var/run/docker.sock:/var/run/docker.sock --privileged -e "ASCEND_VISIBLE_DEVICES=$(npu-smi info -m | tail -n 1 | awk '{print $1}')" ...
  504. ## + If there are mulitple container runtimes installed, try with:
  505. ## docker run --rm -it -v /var/run/docker.sock:/var/run/docker.sock --privileged -e "ASCEND_VISIBLE_DEVICES=$(npu-smi info -m | tail -n 1 | awk '{print $1}')" --runtime ascend ...
  506. ## + If want to detect the correct host CANN version and SoC name, try with:
  507. ## docker run --rm -it -v /var/run/docker.sock:/var/run/docker.sock --privileged -e "ASCEND_VISIBLE_DEVICES=$(npu-smi info -m | tail -n 1 | awk '{print $1}')" -v /usr/local/Ascend/ascend-toolkit:/usr/local/Ascend/ascend-toolkit:ro ...
  508. ## + Disallowing privileged, try with:
  509. ## docker run --rm -it -v /var/run/docker.sock:/var/run/docker.sock -e "ASCEND_VISIBLE_DEVICES=$(npu-smi info -m | grep -v mcu | awk '{if(NR>1){print $1}}' | uniq | paste -sd ',')" ...
  510. ## - Without container runtime installed:
  511. ## + Allowing privileged, try with:
  512. ## docker run --rm -it -v /var/run/docker.sock:/var/run/docker.sock --privileged -v /usr/local/dcmi:/usr/local/dcmi:ro -v /usr/local/Ascend/driver:/usr/local/Ascend/driver:ro -v /etc/hccn.conf:/etc/hccn.conf:ro -v /etc/ascend_install.info:/etc/ascend_install.info:ro ...
  513. ## + Disallowing privileged, try with:
  514. ## docker run --rm -it -v /var/run/docker.sock:/var/run/docker.sock --security-opt seccomp=unconfined -v /dev:/dev:ro -v /usr/local/dcmi:/usr/local/dcmi:ro -v /usr/local/Ascend/driver:/usr/local/Ascend/driver:ro -v /etc/hccn.conf:/etc/hccn.conf:ro -v /etc/ascend_install.info:/etc/ascend_install.info:ro ...
  515. ENV ASCEND_HOME_PATH="/usr/local/Ascend/ascend-toolkit/latest" \
  516. LD_LIBRARY_PATH="/usr/local/Ascend/driver/lib64/common:/usr/local/Ascend/driver/lib64/driver:/usr/local/Ascend/ascend-toolkit/latest/runtime/lib64:${LD_LIBRARY_PATH}" \
  517. GPUSTACK_RUNTIME_DEPLOY_MIRRORED_DEPLOYMENT_IGNORE_VOLUMES="/usr/local/Ascend/ascend-toolkit;${GPUSTACK_RUNTIME_DEPLOY_MIRRORED_DEPLOYMENT_IGNORE_VOLUMES}"
  518. ## Active all Cambricon devices detection,
  519. ## works with (default) Cambricon container runtime and privileged mode.
  520. ## See https://github.com/Cambricon/cambricon-k8s-device-plugin/blob/master/device-plugin/pkg/cndev/include/cndev.h,
  521. ## https://github.com/Cambricon/cambricon-k8s-device-plugin/blob/master/device-plugin/pkg/cntopo/include/cntopo.h.
  522. ## Runs:
  523. ## - With container runtime installed:
  524. ## [TODO, TBD]
  525. ## - Without container runtime installed:
  526. ## + Allowing privileged, try with:
  527. ## docker run --rm -it -v /var/run/docker.sock:/var/run/docker.sock --privileged -v /usr/local/neuware:/usr/local/neuware:ro -v /usr/bin/cnmon:/usr/bin/cnmon ...
  528. ## + Disallowing privileged, try with:
  529. ## docker run --rm -it -v /var/run/docker.sock:/var/run/docker.sock --security-opt seccomp=unconfined -v /dev:/dev:ro -v /usr/local/neuware:/usr/local/neuware:ro -v /usr/bin/cnmon:/usr/bin/cnmon ...
  530. ENV CAMBRICON_VISIBLE_DEVICES="all" \
  531. NEUWARE_HOME="/usr/local/neuware" \
  532. LD_LIBRARY_PATH="/usr/local/neuware/lib64:${LD_LIBRARY_PATH}" \
  533. GPUSTACK_RUNTIME_DEPLOY_MIRRORED_DEPLOYMENT_IGNORE_VOLUMES="/usr/local/neuware;${GPUSTACK_RUNTIME_DEPLOY_MIRRORED_DEPLOYMENT_IGNORE_VOLUMES}"
  534. ## Active all Hygon devices detection,
  535. ## works with (default) Hygon container runtime and privileged mode.
  536. ## See https://github.com/Project-HAMi/dcu-dcgm/blob/master/pkg/dcgm/include/rocm_smi.h.
  537. ## Runs:
  538. ## - With container runtime installed:
  539. ## [TODO, TBD]
  540. ## - Without container runtime installed:
  541. ## + Allowing privileged, try with:
  542. ## docker run --rm -it -v /var/run/docker.sock:/var/run/docker.sock --privileged -v /opt/hyhal:/opt/hyhal:ro -v /opt/dtk:/opt/dtk:ro -e ROCM_SMI_LIB_PATH=/opt/hyhal/lib -e ROCM_PATH=/opt/dtk ...
  543. ## + Disallowing privileged, try with:
  544. ## docker run --rm -it -v /var/run/docker.sock:/var/run/docker.sock --security-opt seccomp=unconfined -v /dev:/dev:ro --group-add video -v /opt/hyhal:/opt/hyhal:ro -v /opt/dtk:/opt/dtk:ro -e ROCM_SMI_LIB_PATH=/opt/hyhal/lib -e ROCM_PATH=/opt/dtk ...
  545. ENV HYGON_VISIBLE_DEVICES="all" \
  546. GPUSTACK_RUNTIME_DEPLOY_MIRRORED_DEPLOYMENT_IGNORE_VOLUMES="/opt/dtk;${GPUSTACK_RUNTIME_DEPLOY_MIRRORED_DEPLOYMENT_IGNORE_VOLUMES}"
  547. ## Active all Iluvatar devices detection,
  548. ## works with (default) Iluvatar container runtime and privileged mode.
  549. ## See https://github.com/Deep-Spark/ix-container-toolkit.
  550. ## Runs:
  551. ## - With container runtime installed:
  552. ## + If installed Iluvatar container runtime as default runtime, try with:
  553. ## docker run --rm -it -v /var/run/docker.sock:/var/run/docker.sock --privileged -v /usr/local/corex:/usr/local/corex:ro ...
  554. ## + If there are mulitple container runtimes installed, try with:
  555. ## docker run --rm -it -v /var/run/docker.sock:/var/run/docker.sock --privileged --runtime iluvatar -v /usr/local/corex:/usr/local/corex:ro ...
  556. ## + Disallowing privileged, try with:
  557. ## docker run --rm -it -v /var/run/docker.sock:/var/run/docker.sock -v /usr/local/corex:/usr/local/corex:ro ...
  558. ## - Without container runtime installed:
  559. ## + Allowing privileged, try with:
  560. ## docker run --rm -it -v /var/run/docker.sock:/var/run/docker.sock --privileged -v /usr/local/corex:/usr/local/corex:ro ...
  561. ## + Disallowing privileged, try with:
  562. ## docker run --rm -it -v /var/run/docker.sock:/var/run/docker.sock --security-opt seccomp=unconfined -v /dev:/dev:ro -v /usr/local/corex:/usr/local/corex:ro ...
  563. ENV IX_VISIBLE_DEVICES="all" \
  564. COREX_HOME="/usr/local/corex" \
  565. LD_LIBRARY_PATH="/usr/local/corex/lib64:${LD_LIBRARY_PATH}" \
  566. GPUSTACK_RUNTIME_DEPLOY_MIRRORED_DEPLOYMENT_IGNORE_VOLUMES="/usr/local/corex;${GPUSTACK_RUNTIME_DEPLOY_MIRRORED_DEPLOYMENT_IGNORE_VOLUMES}"
  567. ## Active all MetaX devices detection,
  568. ## works with (default) MetaX container runtime and privileged mode.
  569. ## See https://developer.metax-tech.com/api/client/document/preview/626/k8s/03_component.html#container-runtime.
  570. ## Runs:
  571. ## - With container runtime installed:
  572. ## [TODO, TBD]
  573. ## - Without container runtime installed:
  574. ## + Allowing privileged, try with:
  575. ## docker run --rm -it -v /var/run/docker.sock:/var/run/docker.sock --privileged -v /opt/mxdriver:/opt/mxdriver:ro -v /opt/maca:/opt/maca:ro ...
  576. ## + Disallowing privileged, try with:
  577. ## docker run --rm -it -v /var/run/docker.sock:/var/run/docker.sock --security-opt seccomp=unconfined -v /dev:/dev:ro -v /opt/mxdriver:/opt/mxdriver:ro -v /opt/maca:/opt/maca:ro ...
  578. ENV LD_LIBRARY_PATH="/opt/maca/lib:/opt/mxdriver/lib:${LD_LIBRARY_PATH}" \
  579. GPUSTACK_RUNTIME_DEPLOY_MIRRORED_DEPLOYMENT_IGNORE_VOLUMES="/opt/maca;${GPUSTACK_RUNTIME_DEPLOY_MIRRORED_DEPLOYMENT_IGNORE_VOLUMES}"
  580. ## Active all MThreads devices detection,
  581. ## works with (default) MThreads container runtime and privileged mode.
  582. ## See https://docs.mthreads.com/cloud-native/cloud-native-doc-online/install_guide.
  583. ## Runs:
  584. ## - With container runtime installed:
  585. ## + If installed MThreads container runtime as default runtime, try with:
  586. ## docker run --rm -it -v /var/run/docker.sock:/var/run/docker.sock --privileged ...
  587. ## + If there are mulitple container runtimes installed, try with:
  588. ## docker run --rm -it -v /var/run/docker.sock:/var/run/docker.sock --privileged --runtime mthreads ...
  589. ## + Disallowing privileged, try with:
  590. ## docker run --rm -it -v /var/run/docker.sock:/var/run/docker.sock ...
  591. ## - Without container runtime installed:
  592. ## [TODO, TBD]
  593. ENV MTHREADS_VISIBLE_DEVICES="all" \
  594. MTHREADS_DRIVER_CAPABILITIES="compute,utility"
  595. ## Active all NVIDIA devices detection,
  596. ## works with (default) NVIDIA container runtime and privileged mode.
  597. ## See https://docs.nvidia.com/deploy/nvml-api/nvml-api-reference.html#nvml-api-reference.
  598. ## Runs:
  599. ## - With container runtime installed:
  600. ## + If installed NVIDIA container runtime as default runtime, try with:
  601. ## docker run --rm -it -v /var/run/docker.sock:/var/run/docker.sock --privileged ...
  602. ## + If there are mulitple container runtimes installed, try with:
  603. ## docker run --rm -it -v /var/run/docker.sock:/var/run/docker.sock --privileged --runtime nvidia ...
  604. ## + Disallowing privileged, try with:
  605. ## docker run --rm -it -v /var/run/docker.sock:/var/run/docker.sock ...
  606. ## - Without container runtime installed:
  607. ## [TODO, TBD]
  608. ENV NVIDIA_DISABLE_REQUIRE="true" \
  609. NVIDIA_VISIBLE_DEVICES="all" \
  610. NVIDIA_DRIVER_CAPABILITIES="compute,utility"
  611. ## Active all T-Head devices detection,
  612. ## works with (default) T-Head container runtime and privileged mode.
  613. ## See https://help.aliyun.com/document_detail/2996754.html.
  614. ## Runs:
  615. ## - With container runtime installed:
  616. ## [TODO, TBD]
  617. ## - Without container runtime installed:
  618. ## + Allowing privileged, try with:
  619. ## docker run --rm -it -v /var/run/docker.sock:/var/run/docker.sock -e GPUSTACK_RUNTIME_DOCKER_RESOURCE_INJECTION_POLICY=CDI -v /var/run/cdi:/var/run/cdi --privileged -v /usr/local/PPU_SDK:/usr/local/PPU_SDK:ro ...
  620. ## + Disallowing privileged, try with:
  621. ## docker run --rm -it -v /var/run/docker.sock:/var/run/docker.sock -e GPUSTACK_RUNTIME_DOCKER_RESOURCE_INJECTION_POLICY=CDI -v /var/run/cdi:/var/run/cdi --security-opt seccomp=unconfined -v /dev:/dev:ro -v /usr/local/PPU_SDK:/usr/local/PPU_SDK:ro ...
  622. ENV PPU_HOME="/usr/local/PPU_SDK" \
  623. LD_LIBRARY_PATH="/usr/local/PPU_SDK/CUDA_SDK/lib64:/usr/local/PPU_SDK/lib:${LD_LIBRARY_PATH}" \
  624. GPUSTACK_RUNTIME_DEPLOY_MIRRORED_DEPLOYMENT_IGNORE_VOLUMES="/usr/local/PPU_SDK;${GPUSTACK_RUNTIME_DEPLOY_MIRRORED_DEPLOYMENT_IGNORE_VOLUMES}"
  625. ## Active GPUStack runtime mirrored deployment mode,
  626. ## if getting an error like, "Found multiple Containers with the same hostname ...",
  627. ## please use `--env GPUSTACK_RUNTIME_DEPLOY_MIRRORED_NAME=...` to specify the exact container name.
  628. ##
  629. ARG GPUSTACK_RUNTIME_DOCKER_MIRRORED_NAME_FILTER_LABELS
  630. ## GPUSTACK_RUNTIME_LOG_EXCEPTION=false: Disable logging exceptions from gpustack-runtime.
  631. ## GPUSTACK_RUNTIME_DEPLOY_CDI_SPECS_DIRECTORY=/var/run/cdi: Set CDI specs directory.
  632. ## GPUSTACK_RUNTIME_DEPLOY_MIRRORED_DEPLOYMENT=true: Enable mirrored deployment mode.
  633. ## GPUSTACK_RUNTIME_DEPLOY_CORRECT_RUNNER_IMAGE=false: Disable auto correction of runner images.
  634. ## GPUSTACK_RUNTIME_DOCKER_IMAGE_NO_PULL_VISUALIZATION=true: Disable visualizating image pull progress, instead using simple logs.
  635. ## GPUSTACK_RUNTIME_DOCKER_MIRRORED_NAME_FILTER_LABELS: Configure filter labels for mirrored deployment.
  636. ## GPUSTACK_RUNTIME_DEPLOY_MIRRORED_DEPLOYMENT_IGNORE_VOLUMES: Declare volumes to be ignored during mirrored deployment.
  637. ENV GPUSTACK_RUNTIME_LOG_EXCEPTION="false" \
  638. GPUSTACK_RUNTIME_DEPLOY_CDI_SPECS_DIRECTORY="/var/run/cdi" \
  639. GPUSTACK_RUNTIME_DEPLOY_MIRRORED_DEPLOYMENT="true" \
  640. GPUSTACK_RUNTIME_DEPLOY_MIRRORED_DEPLOYMENT_IGNORE_VOLUMES="/var/run/cdi;${GPUSTACK_RUNTIME_DEPLOY_MIRRORED_DEPLOYMENT_IGNORE_VOLUMES}" \
  641. GPUSTACK_RUNTIME_DEPLOY_CORRECT_RUNNER_IMAGE="false" \
  642. GPUSTACK_RUNTIME_DOCKER_IMAGE_NO_PULL_VISUALIZATION="true" \
  643. GPUSTACK_RUNTIME_DOCKER_MIRRORED_NAME_FILTER_LABELS="${GPUSTACK_RUNTIME_DOCKER_MIRRORED_NAME_FILTER_LABELS}"
  644. COPY --chmod=755 pack/entrypoint.sh /usr/bin/entrypoint.sh
  645. WORKDIR /
  646. ENTRYPOINT [ "/usr/bin/entrypoint.sh" ]