| 123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183184185186187188189190191192193194195196197198199200201202203204205206207208209210211212213214215216217218219220221222223224225226227228229230231232233234235236237238239240241242243244245246247248249250251252253254255256257258259260261262263264265266267268269270271272273274275276277278279280281282283284285286287288289290291292293294295296297298299300301302303304305306307308309310311312313314315316317318319320321322323324325326327328329330331332333334335336337338339340341342343344345346347348349350351352353354355356357358359360361362363364365366367368369370371372373374375376377378379380381382383384385386387388389390391392393394395396397398399400401402403404405406407408409410411412413414415416417418419420421422423424425426427428429430431432433434435436437438439440441442443444445446447448449450451452453454455456457458459460461462463464465466467468469470471472473474475476477478479480481482483484485486487488489490491492493494495496497498499500501502503504505506507508509510511512513514515516517518519520521522523524525526527528529530531532533534535536537538539540541542543544545546547548549550551552553554555556557558559560561562563564565566567568569570571572573574575576577578579580581582583584585586587588589590591592593594595596597598599600601602603604605606607608609610611612613614615616617618619620621622623624625626627628629630631632633634635636637638639640641642643644645646647648649650651652653654655656657658659660661662663664665666667668669670671672673674675676677678679680681682683684685686687688689690691692693694695696697698699700701702703704705706707708709710711712713714715716717718719720721722723724725726727728729730731732733734735736737738739740741742743744745746747748749750751752753754755756757758759760761762763764765766767768769770771 |
- # Package logic:
- # 1. base target:
- # - Install tools.
- # - Upgrade GCC if needed.
- # - Install C buildkit.
- # - Upgrade Python if needed.
- # - Install Python buildkit.
- # - Install Platform toolkit.
- # - Install S6-overlay.
- # 2. gpustack target.
- # - Install PostgreSQL.
- # - Install Higress standalone components.
- # - Install gpustack package from the mounted source code.
- # - Setup entrypoint to gpustack command.
- # Argument usage:
- # - PYTHON_VERSION: Version of Python to use.
- # - GPUSTACK_BASE_IMAGE: Base image for the gpustack stage.
- # - GPUSTACK_RUNTIME_ROCM_VERSION: Version of ROCm detection library for gpustack-runtime, update this if project dependencies has changed.
- # - GPUSTACK_RUNTIME_DOCKER_MIRRORED_NAME_FILTER_LABELS: Semicolon-separated list of labels to filter mirrored images when deploying mirrored deployment.
- # - HIGRESS_VERSION: Version of Higress to use.
- # - HIGRESS_APISERVER_VERSION: Version of Higress API server to use.
- ARG PYTHON_VERSION=3.12
- ARG REGISTRY_MIRROR=docker.1panel.live
- ARG HIGRESS_REGISTRY=registry.cn-hangzhou.aliyuncs.com/gpustack
- ARG GPUSTACK_BASE_IMAGE=base
- ARG ROCM_REGISTRY=docker.1panel.live
- ARG GPUSTACK_RUNTIME_ROCM_VERSION=7.0.2
- ARG GPUSTACK_RUNTIME_DOCKER_MIRRORED_NAME_FILTER_LABELS
- ARG HIGRESS_VERSION=2.1.9
- ARG HIGRESS_APISERVER_VERSION=0.0.26
- ARG PROMETHEUS_VERSION=3.5.1
- ARG GRAFANA_VERSION=12.2.4
- # Stage Base
- #
- # Example build command:
- # docker build --tag=gpustack/gpustack:base --file=pack/Dockerfile --target=base --progress=plain .
- #
- FROM ${HIGRESS_REGISTRY}/mirrored-higress-api-server:${HIGRESS_APISERVER_VERSION} AS apiserver
- FROM ${HIGRESS_REGISTRY}/mirrored-higress-higress:${HIGRESS_VERSION} AS controller
- FROM ${HIGRESS_REGISTRY}/mirrored-higress-pilot:${HIGRESS_VERSION} AS pilot
- FROM ${HIGRESS_REGISTRY}/mirrored-higress-gateway:${HIGRESS_VERSION} AS gateway
- FROM ${REGISTRY_MIRROR}/library/ubuntu:24.04@sha256:d1e2e92c075e5ca139d51a140fff46f84315c0fdce203eab2807c7e495eff4f9 AS base
- SHELL ["/bin/bash", "-eo", "pipefail", "-c"]
- ARG TARGETPLATFORM
- ARG TARGETOS
- ARG TARGETARCH
- ## Install Tools
- ENV DEBIAN_FRONTEND=noninteractive \
- LANG='en_US.UTF-8' \
- LANGUAGE='en_US:en' \
- LC_ALL='en_US.UTF-8'
- RUN <<EOF
- # Tools
- # Use Tsinghua mirrors for Ubuntu
- sed -i 's|//archive.ubuntu.com|//mirrors.tuna.tsinghua.edu.cn|g' /etc/apt/sources.list
- sed -i 's|//security.ubuntu.com|//mirrors.tuna.tsinghua.edu.cn|g' /etc/apt/sources.list
- sed -i 's|//ppa.launchpadcontent.net|//launchpad.proxy.ustclug.org|g' /etc/apt/sources.list
- # Refresh
- apt-get update -y && apt-get install -y --no-install-recommends \
- software-properties-common apt-transport-https \
- ca-certificates gnupg2 lsb-release gnupg-agent \
- && apt-get update -y
- # Install
- apt-get install -y --no-install-recommends \
- ca-certificates build-essential binutils bash openssl \
- curl wget aria2 \
- git git-lfs \
- unzip xz-utils \
- tzdata locales \
- iproute2 iputils-ping ifstat net-tools dnsutils pciutils ipmitool \
- procps sysstat htop \
- vim jq bc tree \
- logrotate cron netcat-openbsd \
- python3-pip python3-venv
- # Update locale
- localedef -i en_US -c -f UTF-8 -A /usr/share/locale/locale.alias en_US.UTF-8
- # Update timezone
- rm -f /etc/localtime \
- && ln -sf /usr/share/zoneinfo/Asia/Shanghai /etc/localtime \
- && echo "Asia/Shanghai" > /etc/timezone \
- && dpkg-reconfigure --frontend noninteractive tzdata
- # Cleanup
- rm -rf /var/tmp/* \
- && rm -rf /tmp/* \
- && rm -rf /var/cache/apt
- EOF
- ## Upgrade GCC if needed
- RUN <<EOF
- # GCC
- # Upgrade GCC if the Ubuntu version is lower than 21.04.
- source /etc/os-release
- if (( $(echo "${VERSION_ID} >= 21.04" | bc -l) )); then
- echo "Skipping GCC upgrade for ${VERSION_ID}..."
- exit 0
- fi
- # Install
- apt-get install -y --no-install-recommends \
- gcc-11 g++-11 gfortran-11 gfortran
- # Update alternatives
- if [[ -f /etc/alternatives/gcov-dump ]]; then update-alternatives --remove-all gcov-dump; fi; update-alternatives --install /usr/bin/gcov-dump gcov-dump /usr/bin/gcov-dump-11 10
- if [[ -f /etc/alternatives/lto-dump ]]; then update-alternatives --remove-all lto-dump; fi; update-alternatives --install /usr/bin/lto-dump lto-dump /usr/bin/lto-dump-11 10
- if [[ -f /etc/alternatives/gcov ]]; then update-alternatives --remove-all gcov; fi; update-alternatives --install /usr/bin/gcov gcov /usr/bin/gcov-11 10
- if [[ -f /etc/alternatives/gcc ]]; then update-alternatives --remove-all gcc; fi; update-alternatives --install /usr/bin/gcc gcc /usr/bin/gcc-11 10
- if [[ -f /etc/alternatives/gcc-nm ]]; then update-alternatives --remove-all gcc-nm; fi; update-alternatives --install /usr/bin/gcc-nm gcc-nm /usr/bin/gcc-nm-11 10
- if [[ -f /etc/alternatives/cpp ]]; then update-alternatives --remove-all cpp; fi; update-alternatives --install /usr/bin/cpp cpp /usr/bin/cpp-11 10
- if [[ -f /etc/alternatives/g++ ]]; then update-alternatives --remove-all g++; fi; update-alternatives --install /usr/bin/g++ g++ /usr/bin/g++-11 10
- if [[ -f /etc/alternatives/gcc-ar ]]; then update-alternatives --remove-all gcc-ar; fi; update-alternatives --install /usr/bin/gcc-ar gcc-ar /usr/bin/gcc-ar-11 10
- if [[ -f /etc/alternatives/gcov-tool ]]; then update-alternatives --remove-all gcov-tool; fi; update-alternatives --install /usr/bin/gcov-tool gcov-tool /usr/bin/gcov-tool-11 10
- if [[ -f /etc/alternatives/gcc-ranlib ]]; then update-alternatives --remove-all gcc-ranlib; fi; update-alternatives --install /usr/bin/gcc-ranlib gcc-ranlib /usr/bin/gcc-ranlib-11 10
- if [[ -f /etc/alternatives/gfortran ]]; then update-alternatives --remove-all gfortran; fi; update-alternatives --install /usr/bin/gfortran gfortran /usr/bin/gfortran-11 10
- # Cleanup
- rm -rf /var/tmp/* \
- && rm -rf /tmp/* \
- && rm -rf /var/cache/apt
- EOF
- ## Install C buildkit
- RUN <<EOF
- # C buildkit
- # Install
- apt-get install -y --no-install-recommends \
- make ninja-build pkg-config ccache
- curl --retry 3 --retry-connrefused -fL "https://gh-proxy.com/https://github.com/Kitware/CMake/releases/download/v3.31.7/cmake-3.31.7-linux-$(uname -m).tar.gz" | tar -zx -C /usr --strip-components 1
- # Install dependencies
- apt-get install -y --no-install-recommends \
- openssl libssl-dev \
- zlib1g zlib1g-dev libbz2-dev libffi-dev \
- lzma lzma-dev uuid-dev liblzma-dev \
- ffmpeg libjpeg-dev libpng-dev libtiff-dev libwebp-dev \
- libsqlite3-dev \
- libxml2 libxslt1-dev \
- libnuma1 libnuma-dev \
- libgpgme-dev libassuan-dev libbtrfs-dev libdevmapper-dev \
- libjemalloc-dev
- # Cleanup
- rm -rf /var/tmp/* \
- && rm -rf /tmp/* \
- && rm -rf /var/cache/apt
- EOF
- ## Upgrade Python if needed
- ARG PYTHON_VERSION
- ENV PYTHON_VERSION=${PYTHON_VERSION}
- RUN <<EOF
- # Python
- if (( $(echo "$(python3 --version | cut -d' ' -f2 | cut -d'.' -f1,2) == ${PYTHON_VERSION}" | bc -l) )); then
- echo "Skipping Python upgrade for ${PYTHON_VERSION}..."
- if [[ -z "$(ldconfig -v 2>/dev/null | grep libpython${PYTHON_VERSION})" ]]; then
- PYTHON_LIB_PREFIX=$(python3 -c "import sys; print(sys.base_prefix);")
- echo "${PYTHON_LIB_PREFIX}/lib" >> /etc/ld.so.conf.d/python3.conf
- echo "${PYTHON_LIB_PREFIX}/lib64" >> /etc/ld.so.conf.d/python3.conf
- ldconfig -v
- fi
- exit 0
- fi
- # Add deadsnakes PPA via USTC mirror (bypass add-apt-repository which connects to launchpad)
- CODENAME=$(. /etc/os-release && echo "$VERSION_CODENAME")
- wget -q -O /tmp/deadsnakes.asc https://keyserver.ubuntu.com/pks/lookup?op=get&search=0x6c7c47c706f57f28c70139e07965b5db433a593c
- gpg --dearmor /tmp/deadsnakes.asc
- mv /tmp/deadsnakes.asc.gpg /usr/share/keyrings/deadsnakes.gpg
- cat > /etc/apt/sources.list.d/deadsnakes-ubuntu-ppa-${CODENAME}.sources <<SRCEOF
- Types: deb
- URIs: https://launchpad.proxy.ustclug.org/deadsnakes/ppa/ubuntu
- Suites: ${CODENAME}
- Components: main
- Signed-By: /usr/share/keyrings/deadsnakes.gpg
- SRCEOF
- apt-get update -y
- # Install
- apt-get install -y --no-install-recommends \
- python${PYTHON_VERSION} \
- python${PYTHON_VERSION}-dev \
- python${PYTHON_VERSION}-venv \
- python${PYTHON_VERSION}-lib2to3 \
- python${PYTHON_VERSION}-gdbm \
- python${PYTHON_VERSION}-tk
- if (( $(echo "${PYTHON_VERSION} <= 3.11" | bc -l) )); then
- apt-get install -y --no-install-recommends \
- python${PYTHON_VERSION}-distutils
- fi
- # Update alternatives
- if [[ -f /etc/alternatives/python3 ]]; then update-alternatives --remove-all python3; fi; update-alternatives --install /usr/bin/python3 python3 /usr/bin/python${PYTHON_VERSION} 1
- if [[ -f /etc/alternatives/python ]]; then update-alternatives --remove-all python; fi; update-alternatives --install /usr/bin/python python /usr/bin/python${PYTHON_VERSION} 1
- curl -sS "https://gh-proxy.com/https://raw.githubusercontent.com/pypa/get-pip/main/get-pip.py" | python${PYTHON_VERSION}
- if [[ -f /etc/alternatives/2to3 ]]; then update-alternatives --remove-all 2to3; fi; update-alternatives --install /usr/bin/2to3 2to3 /usr/bin/2to3${PYTHON_VERSION} 1 || true
- if [[ -f /etc/alternatives/pydoc3 ]]; then update-alternatives --remove-all pydoc3; fi; update-alternatives --install /usr/bin/pydoc3 pydoc3 /usr/bin/pydoc${PYTHON_VERSION} 1 || true
- if [[ -f /etc/alternatives/idle3 ]]; then update-alternatives --remove-all idle3; fi; update-alternatives --install /usr/bin/idle3 idle3 /usr/bin/idle${PYTHON_VERSION} 1 || true
- if [[ -f /etc/alternatives/python3-config ]]; then update-alternatives --remove-all python3-config; fi; update-alternatives --install /usr/bin/python3-config python3-config /usr/bin/python${PYTHON_VERSION}-config 1 || true
- # Cleanup
- rm -rf /var/tmp/* \
- && rm -rf /tmp/* \
- && rm -rf /var/cache/apt
- EOF
- ## Install Python buildkit
- ENV PIP_NO_CACHE_DIR=1 \
- PIP_DISABLE_PIP_VERSION_CHECK=1 \
- PIP_ROOT_USER_ACTION=ignore \
- PIPX_HOME=/root/.local/share/pipx \
- PIPX_LOCAL_VENVS=/root/.local/share/pipx/venvs \
- POETRY_NO_CACHE=1 \
- UV_NO_CACHE=1 \
- UV_HTTP_TIMEOUT=500 \
- UV_INDEX_STRATEGY="unsafe-best-match"
- RUN <<EOF
- # Buildkit
- cat <<EOT >/tmp/requirements.txt
- build
- cmake<4
- ninja<1.11
- setuptools<80
- setuptools-scm
- packaging<25
- wheel
- pybind11<3
- Cython
- psutil
- pipx
- uv
- yq
- hatchling
- py-spy
- poetry
- EOT
- python3 -m pip install -i https://mirrors.tuna.tsinghua.edu.cn/pypi/web/simple --break-system-packages -r /tmp/requirements.txt
- # Cleanup
- rm -rf /var/tmp/* \
- && rm -rf /tmp/*
- EOF
- ## Install s6-overlay
- ARG S6_OVERLAY_VERSION=3.2.1.0
- RUN set -eux; \
- case "${TARGETARCH}" in \
- amd64) S6_ARCH="x86_64" ;; \
- arm64) S6_ARCH="aarch64" ;; \
- arm/v7) S6_ARCH="armhf" ;; \
- arm/v6) S6_ARCH="arm" ;; \
- *) \
- echo >&2 "⚠️ Warning: Unknown TARGETARCH='${TARGETARCH}', defaulting to x86_64"; \
- S6_ARCH="x86_64"; \
- ;; \
- esac; \
- echo "Installing s6-overlay ${S6_OVERLAY_VERSION} for arch: ${S6_ARCH} (from TARGETARCH=${TARGETARCH})"; \
- base_url="https://gh-proxy.com/https://github.com/just-containers/s6-overlay/releases/download/v${S6_OVERLAY_VERSION}"; \
- for pkg in noarch ${S6_ARCH}; do \
- wget -q -O "/tmp/s6-overlay-${pkg}.tar.xz" "${base_url}/s6-overlay-${pkg}.tar.xz"; \
- done \
- && echo "📦 Extracting s6-overlay ..." \
- && tar -C / -Jxpf /tmp/s6-overlay-noarch.tar.xz \
- && tar -C / -Jxpf /tmp/s6-overlay-${S6_ARCH}.tar.xz \
- && rm -f /tmp/s6-overlay-*.tar.xz \
- && echo "Installed s6-overlay ${S6_OVERLAY_VERSION} successfully."
- ENV S6_KEEP_ENV=1 \
- S6_BEHAVIOUR_IF_STAGE2_FAILS=2 \
- S6_SERVICES_GRACETIME=3000 \
- S6_KILL_GRACETIME=3000 \
- S6_VERBOSITY=1 \
- S6_CMD_WAIT_FOR_SERVICES=1
- #
- # Stage GPUStack
- #
- # Example build command:
- # docker build --tag=gpustack/gpustack:main --file=pack/Dockerfile --progress=plain .
- #
- # Vendor ROCm libraries from ROCm base image,
- # now only linux/amd64 is supported.
- # Must build on linux/amd64 platform.
- FROM --platform=${BUILDPLATFORM} ${ROCM_REGISTRY}/rocm/dev-ubuntu-22.04:${GPUSTACK_RUNTIME_ROCM_VERSION} AS rocm-base
- FROM ${GPUSTACK_BASE_IMAGE} AS gpustack
- SHELL ["/bin/bash", "-eo", "pipefail", "-c"]
- ARG TARGETPLATFORM
- ARG TARGETOS
- ARG TARGETARCH
- ## Configure data volume
- VOLUME /var/lib/gpustack
- ## Install PostgreSQL
- ENV PGCONFIG_FILE=/etc/postgresql/main/postgresql.conf \
- POSTGRES_DB=gpustack
- RUN set -eux; \
- groupadd -r postgres --gid=9999; \
- useradd -r -g postgres --uid=9999 --home-dir=/var/lib/postgresql --shell=/bin/bash postgres; \
- mkdir -p /var/lib/postgresql; \
- chown -R postgres:postgres /var/lib/postgresql
- RUN <<EOF
- set -eux
- # Use Tsinghua mirror for PostgreSQL APT repository
- wget -O /tmp/ACCC4CF8.asc https://www.postgresql.org/media/keys/ACCC4CF8.asc \
- && gpg --dearmor /tmp/ACCC4CF8.asc \
- && mv /tmp/ACCC4CF8.asc.gpg /usr/share/keyrings/postgresql-archive-keyring.gpg \
- && echo "deb [signed-by=/usr/share/keyrings/postgresql-archive-keyring.gpg] http://apt.postgresql.org/pub/repos/apt $(lsb_release -cs)-pgdg main" > /etc/apt/sources.list.d/pgdg.list
- # Install
- apt-get update -y && apt-get install -y --no-install-recommends \
- postgresql-17 \
- gosu
- # Create symlinks for PostgreSQL 17 to simplify usage
- ln -s /usr/lib/postgresql/17/bin /usr/lib/postgresql/bin \
- && ln -s /etc/postgresql/17/main /etc/postgresql/main \
- && ln -s /var/lib/postgresql/17/main /var/lib/postgresql/main \
- && ls -1 /usr/lib/postgresql/bin/ | xargs -I @ ln -sf /usr/lib/postgresql/bin/@ /usr/bin/@
- gosu postgres echo "listen_addresses='*'" >> "$PGCONFIG_FILE" \
- && gosu postgres echo "local all postgres peer" > /etc/postgresql/main/pg_hba.conf \
- && gosu postgres echo "host all root 127.0.0.1/32 trust" >> /etc/postgresql/main/pg_hba.conf \
- && gosu postgres echo "host all root ::1/128 trust" >> /etc/postgresql/main/pg_hba.conf \
- && gosu postgres echo "host all all 0.0.0.0/0 scram-sha-256" >> /etc/postgresql/main/pg_hba.conf
- gosu postgres sed -i "s/^data_directory/#data_directory/" "$PGCONFIG_FILE" \
- && gosu postgres sed -i "s/^hba_file/#hba_file/" "$PGCONFIG_FILE" \
- && gosu postgres sed -i "s/^#log_destination/log_destination/" "$PGCONFIG_FILE" \
- && gosu postgres sed -i "s/^#log_min_messages = warning/log_min_messages = info/" "$PGCONFIG_FILE" \
- && gosu postgres sed -i "s/^#logging_collector = off/logging_collector = on/" "$PGCONFIG_FILE" \
- && gosu postgres sed -i "s/^#log_filename/log_filename/" "$PGCONFIG_FILE" \
- && gosu postgres sed -i "s/^#log_rotation_size/log_rotation_size/" "$PGCONFIG_FILE"
- # Cleanup
- rm -rf /var/tmp/* \
- && rm -rf /tmp/* \
- && rm -rf /var/cache/apt
- EOF
- ## Install Higress standalone components
- RUN --mount=type=bind,from=apiserver,source=/apiserver,dst=/mnt/apiserver,rw \
- --mount=type=bind,from=controller,source=/usr/local/bin/higress,dst=/mnt/higress,rw \
- --mount=type=bind,from=pilot,source=/usr/local/bin,dst=/mnt/pilot,rw \
- --mount=type=bind,from=gateway,source=/,dst=/mnt/gateway,rw <<EOF
- # Prepare Higress standalone components
- set -eux;
- # Install API server
- cp /mnt/apiserver /usr/local/bin/apiserver;
- # Install controller
- cp /mnt/higress /usr/local/bin/higress;
- # Install pilot
- cp /mnt/pilot/pilot-discovery /usr/local/bin/pilot-discovery;
- cp /mnt/pilot/higress-pilot-start.sh /usr/local/bin/higress-pilot-start.sh;
- # Install gateway
- mkdir -p /var/lib/istio/envoy/
- cp /mnt/gateway/var/lib/istio/envoy/*.json /var/lib/istio/envoy/;
- cp /mnt/gateway/var/lib/istio/envoy/*.so /var/lib/istio/envoy/;
- cp /mnt/gateway/usr/local/bin/pilot-agent /usr/local/bin/pilot-agent;
- cp /mnt/gateway/usr/local/bin/envoy /usr/local/bin/envoy;
- cp /mnt/gateway/usr/local/bin/supercronic-linux-${TARGETARCH} /usr/local/bin/;
- ln -s supercronic-linux-${TARGETARCH} supercronic && mv supercronic /usr/local/bin/;
- EOF
- # Initialize configurations
- COPY pack/rootfs/ /
- COPY docker-compose/grafana/grafana_dashboards/ /etc/dashboards/
- # Fix execute permissions for scripts (lost on Windows bind mount)
- RUN find /etc/s6-overlay -name '*.sh' -exec chmod +x {} + \
- && find /etc/s6-overlay/scripts -type f ! -name '*.sh' -exec chmod +x {} + \
- && find /etc/s6-overlay/s6-rc.d -name 'check' -path '*/data/*' -exec chmod +x {} +
- ## END Install Higress standalone components
- ## Install Skopeo
- ARG GOPROXY="https://goproxy.cn,direct"
- RUN <<EOF
- # Skopeo
- # Install Go
- curl --retry 3 --retry-connrefused -fL "https://golang.google.cn/dl/go1.23.3.${TARGETOS}-${TARGETARCH}.tar.gz" | tar -zx -C /usr/local
- export PATH="/usr/local/go/bin:${PATH}"
- export GOPROXY="${GOPROXY}"
- export
- # Download
- git -C /tmp clone --recursive --shallow-submodules \
- --depth 1 --branch v1.20.0 --single-branch \
- https://gh-proxy.com/https://github.com/containers/skopeo.git skopeo
- # Build and install
- pushd /tmp/skopeo \
- && sed -i "/export GOPROXY=.*/d" Makefile \
- && make vendor \
- && DISABLE_DOCS=1 PREFIX=/usr make install-binary
- # Configure Skopeo to allow insecure registries.
- mkdir -p /etc/containers
- cat<<EOT > /etc/containers/policy.json
- {
- "default": [
- {
- "type": "insecureAcceptAnything"
- }
- ]
- }
- EOT
- # Review
- skopeo --version
- # Cleanup go
- go clean -cache -modcache -testcache \
- && rm -rf /usr/local/go \
- && rm -rf /root/.cache/go-build
- # Cleanup
- rm -rf /var/tmp/* \
- && rm -rf /tmp/* \
- && rm -rf /var/cache/apt
- EOF
- ## Install Prometheus and Grafana
- ARG PROMETHEUS_VERSION
- ARG GRAFANA_VERSION
- RUN --mount=type=bind,source=pack/third_party/prometheus-3.5.1.linux-amd64.tar.gz,target=/tmp/prometheus.tar.gz \
- --mount=type=bind,source=pack/third_party/grafana-12.2.4.linux-amd64.tar.gz,target=/tmp/grafana.tar.gz <<EOF
- set -eux
- apt-get update -y && apt-get install -y --no-install-recommends \
- fontconfig
- case "${TARGETARCH}" in
- amd64)
- PROM_ARCH="amd64"
- GRAFANA_ARCH="amd64"
- ;;
- arm64)
- PROM_ARCH="arm64"
- GRAFANA_ARCH="arm64"
- ;;
- *)
- echo "Unsupported TARGETARCH: ${TARGETARCH}"
- exit 1
- ;;
- esac
- tar -zx -f /tmp/prometheus.tar.gz -C /opt
- mv "/opt/prometheus-${PROMETHEUS_VERSION}.linux-${PROM_ARCH}" /opt/prometheus
- ln -s /opt/prometheus/prometheus /usr/local/bin/prometheus
- ln -s /opt/prometheus/promtool /usr/local/bin/promtool
- tar -zx -f /tmp/grafana.tar.gz -C /opt
- mv "/opt/grafana-${GRAFANA_VERSION}" /opt/grafana
- ln -s /opt/grafana/bin/grafana-server /usr/local/bin/grafana-server
- ln -s /opt/grafana/bin/grafana-cli /usr/local/bin/grafana-cli
- mkdir -p /etc/grafana
- cp /opt/grafana/conf/sample.ini /etc/grafana/grafana.ini.sample
- rm -rf /var/tmp/* \
- && find /tmp -mindepth 1 -not -name 'prometheus.tar.gz' -not -name 'grafana.tar.gz' -exec rm -rf {} + \
- && rm -rf /var/cache/apt
- EOF
- ## Install GPUStack
- ARG GPUSTACK_VERSION=latest
- RUN --mount=type=cache,target=/root/.cache \
- --mount=type=bind,target=/workspace/gpustack,rw <<EOF
- # Install GPUStack
- export POETRY_NO_CACHE=0
- export UV_NO_CACHE=0
- export UV_SYSTEM_PYTHON=1
- export UV_LINK_MODE=copy
- # Remove PEP 668 EXTERNALLY-MANAGED marker to allow system-wide pip installs
- rm -f /usr/lib/python*/EXTERNALLY-MANAGED
- # Build GPUStack
- cd /workspace/gpustack \
- && git config --global --add safe.directory /workspace/gpustack \
- && chmod +x hack/*.sh hack/lib/*.sh \
- && make build
- # Install GPUStack.
- # FIXME: There is no linux/arm64 vLLM prebuilt wheel,
- # so we only install the all wheel for linux/amd64.
- if [ "${TARGETARCH}" == "amd64" ]; then
- WHEEL_PACKAGE="$(ls /workspace/gpustack/dist/*.whl)[all]";
- else
- WHEEL_PACKAGE="$(ls /workspace/gpustack/dist/*.whl)[audio]";
- fi
- uv pip install --no-build-isolation --extra-index-url https://download.pytorch.org/whl/cpu/ --index-url https://mirrors.tuna.tsinghua.edu.cn/pypi/web/simple \
- ${WHEEL_PACKAGE}
- # Download tools
- gpustack download-tools
- tree -hs "$(pip show gpustack | grep Location: | head -n 1 | cut -d" " -f 2)/gpustack/third_party"
- # Set up environment
- mkdir -p /var/lib/gpustack \
- && chmod -R 0755 /var/lib/gpustack
- # Review
- uv pip tree \
- --package gpustack
- gpustack version
- # Try to update PCI IDs
- if ! update-pciids; then
- curl -o /usr/share/misc/pci.ids https://mirrors.tuna.tsinghua.edu.cn/misc/pci.ids || true
- fi
- # Cleanup
- rm -rf /var/tmp/* \
- && rm -rf /tmp/* \
- && rm -rf /workspace/gpustack/dist
- EOF
- ## Entrypoint
- ## Active all AMD devices detection,
- ## works with (default) ROCm container runtime and privileged mode.
- ## See https://rocm.docs.amd.com/projects/amdsmi/en/latest/reference/amdsmi-py-api.html.
- ## Runs:
- ## - With container runtime installed:
- ## + If installed AMD contaienr runtime as default runtime, try with:
- ## docker run --rm -it -v /var/run/docker.sock:/var/run/docker.sock --privileged ...
- ## + If there are mulitple container runtimes installed, try with:
- ## docker run --rm -it -v /var/run/docker.sock:/var/run/docker.sock --privileged --runtime amd ...
- ## + If failed to detect devices' name, try with:
- ## docker run --rm -it -v /var/run/docker.sock:/var/run/docker.sock --privileged -v /usr/share:/usr/share:ro ...
- ## + If want to detect the correct host ROCm version, try with:
- ## docker run --rm -it -v /var/run/docker.sock:/var/run/docker.sock --privileged -v /opt/rocm:/opt/rocm:ro ...
- ## + Disallowing privileged, try with:
- ## docker run --rm -it -v /var/run/docker.sock:/var/run/docker.sock ...
- ## - Without container runtime installed:
- ## + Allowing privileged, try with:
- ## docker run --rm -it -v /var/run/docker.sock:/var/run/docker.sock --privileged -v /opt/rocm:/opt/rocm:ro ...
- ## + Disallowing privileged, try with:
- ## docker run --rm -it -v /var/run/docker.sock:/var/run/docker.sock --security-opt seccomp=unconfined -v /dev:/dev:ro --group-add video -v /opt/rocm:/opt/rocm:ro ...
- RUN --mount=type=bind,from=rocm-base,source=/opt/rocm/share,target=/opt/rocm/share,rw <<EOF
- # Reinstall amd-smi
- export UV_SYSTEM_PYTHON=1
- export UV_PRERELEASE=allow
- uv pip install --no-build-isolation \
- /opt/rocm/share/amd_smi
- uv pip tree
- # Hack to avoid: Fail to open libdrm_amdgpu.so: libdrm_amdgpu.so: cannot open shared object file: No such file or directory
- TARGET_DIR="/usr/lib/$(uname -m)-linux-gnu"
- TARGET_LIB="libdrm_amdgpu.so.1"
- TARGET_LINK="libdrm_amdgpu.so"
- TARGET_LIB_EXISTED="true"
- if [[ ! -e "${TARGET_DIR}/${TARGET_LIB}" ]]; then
- TARGET_LIB_EXISTED="false"
- touch "${TARGET_DIR}/${TARGET_LIB}"
- fi
- pushd "${TARGET_DIR}" \
- && ln -sf "${TARGET_LIB}" "${TARGET_LINK}"
- if [[ "${TARGET_LIB_EXISTED}" == "false" ]]; then
- rm -f "${TARGET_DIR}/${TARGET_LIB}"
- fi
- EOF
- ENV AMD_VISIBLE_DEVICES="all" \
- GPUSTACK_RUNTIME_DEPLOY_MIRRORED_DEPLOYMENT_IGNORE_VOLUMES="/opt/rocm"
- ## Active all Ascend devices detection,
- ## works with (default) Ascend container runtime and privileged mode.
- ## See https://gitcode.com/Ascend/mind-cluster/blob/master/component/ascend-common/devmanager/dcmi/dcmi_interface_api.h.
- ## Runs:
- ## - With container runtime installed:
- ## + If installed Ascend container runtime as default runtime, try with:
- ## docker run --rm -it -v /var/run/docker.sock:/var/run/docker.sock --privileged -e "ASCEND_VISIBLE_DEVICES=$(npu-smi info -m | tail -n 1 | awk '{print $1}')" ...
- ## + If there are mulitple container runtimes installed, try with:
- ## docker run --rm -it -v /var/run/docker.sock:/var/run/docker.sock --privileged -e "ASCEND_VISIBLE_DEVICES=$(npu-smi info -m | tail -n 1 | awk '{print $1}')" --runtime ascend ...
- ## + If want to detect the correct host CANN version and SoC name, try with:
- ## docker run --rm -it -v /var/run/docker.sock:/var/run/docker.sock --privileged -e "ASCEND_VISIBLE_DEVICES=$(npu-smi info -m | tail -n 1 | awk '{print $1}')" -v /usr/local/Ascend/ascend-toolkit:/usr/local/Ascend/ascend-toolkit:ro ...
- ## + Disallowing privileged, try with:
- ## docker run --rm -it -v /var/run/docker.sock:/var/run/docker.sock -e "ASCEND_VISIBLE_DEVICES=$(npu-smi info -m | grep -v mcu | awk '{if(NR>1){print $1}}' | uniq | paste -sd ',')" ...
- ## - Without container runtime installed:
- ## + Allowing privileged, try with:
- ## docker run --rm -it -v /var/run/docker.sock:/var/run/docker.sock --privileged -v /usr/local/dcmi:/usr/local/dcmi:ro -v /usr/local/Ascend/driver:/usr/local/Ascend/driver:ro -v /etc/hccn.conf:/etc/hccn.conf:ro -v /etc/ascend_install.info:/etc/ascend_install.info:ro ...
- ## + Disallowing privileged, try with:
- ## docker run --rm -it -v /var/run/docker.sock:/var/run/docker.sock --security-opt seccomp=unconfined -v /dev:/dev:ro -v /usr/local/dcmi:/usr/local/dcmi:ro -v /usr/local/Ascend/driver:/usr/local/Ascend/driver:ro -v /etc/hccn.conf:/etc/hccn.conf:ro -v /etc/ascend_install.info:/etc/ascend_install.info:ro ...
- ENV ASCEND_HOME_PATH="/usr/local/Ascend/ascend-toolkit/latest" \
- LD_LIBRARY_PATH="/usr/local/Ascend/driver/lib64/common:/usr/local/Ascend/driver/lib64/driver:/usr/local/Ascend/ascend-toolkit/latest/runtime/lib64:${LD_LIBRARY_PATH}" \
- GPUSTACK_RUNTIME_DEPLOY_MIRRORED_DEPLOYMENT_IGNORE_VOLUMES="/usr/local/Ascend/ascend-toolkit;${GPUSTACK_RUNTIME_DEPLOY_MIRRORED_DEPLOYMENT_IGNORE_VOLUMES}"
- ## Active all Cambricon devices detection,
- ## works with (default) Cambricon container runtime and privileged mode.
- ## See https://github.com/Cambricon/cambricon-k8s-device-plugin/blob/master/device-plugin/pkg/cndev/include/cndev.h,
- ## https://github.com/Cambricon/cambricon-k8s-device-plugin/blob/master/device-plugin/pkg/cntopo/include/cntopo.h.
- ## Runs:
- ## - With container runtime installed:
- ## [TODO, TBD]
- ## - Without container runtime installed:
- ## + Allowing privileged, try with:
- ## docker run --rm -it -v /var/run/docker.sock:/var/run/docker.sock --privileged -v /usr/local/neuware:/usr/local/neuware:ro -v /usr/bin/cnmon:/usr/bin/cnmon ...
- ## + Disallowing privileged, try with:
- ## docker run --rm -it -v /var/run/docker.sock:/var/run/docker.sock --security-opt seccomp=unconfined -v /dev:/dev:ro -v /usr/local/neuware:/usr/local/neuware:ro -v /usr/bin/cnmon:/usr/bin/cnmon ...
- ENV CAMBRICON_VISIBLE_DEVICES="all" \
- NEUWARE_HOME="/usr/local/neuware" \
- LD_LIBRARY_PATH="/usr/local/neuware/lib64:${LD_LIBRARY_PATH}" \
- GPUSTACK_RUNTIME_DEPLOY_MIRRORED_DEPLOYMENT_IGNORE_VOLUMES="/usr/local/neuware;${GPUSTACK_RUNTIME_DEPLOY_MIRRORED_DEPLOYMENT_IGNORE_VOLUMES}"
- ## Active all Hygon devices detection,
- ## works with (default) Hygon container runtime and privileged mode.
- ## See https://github.com/Project-HAMi/dcu-dcgm/blob/master/pkg/dcgm/include/rocm_smi.h.
- ## Runs:
- ## - With container runtime installed:
- ## [TODO, TBD]
- ## - Without container runtime installed:
- ## + Allowing privileged, try with:
- ## docker run --rm -it -v /var/run/docker.sock:/var/run/docker.sock --privileged -v /opt/hyhal:/opt/hyhal:ro -v /opt/dtk:/opt/dtk:ro -e ROCM_SMI_LIB_PATH=/opt/hyhal/lib -e ROCM_PATH=/opt/dtk ...
- ## + Disallowing privileged, try with:
- ## docker run --rm -it -v /var/run/docker.sock:/var/run/docker.sock --security-opt seccomp=unconfined -v /dev:/dev:ro --group-add video -v /opt/hyhal:/opt/hyhal:ro -v /opt/dtk:/opt/dtk:ro -e ROCM_SMI_LIB_PATH=/opt/hyhal/lib -e ROCM_PATH=/opt/dtk ...
- ENV HYGON_VISIBLE_DEVICES="all" \
- GPUSTACK_RUNTIME_DEPLOY_MIRRORED_DEPLOYMENT_IGNORE_VOLUMES="/opt/dtk;${GPUSTACK_RUNTIME_DEPLOY_MIRRORED_DEPLOYMENT_IGNORE_VOLUMES}"
- ## Active all Iluvatar devices detection,
- ## works with (default) Iluvatar container runtime and privileged mode.
- ## See https://github.com/Deep-Spark/ix-container-toolkit.
- ## Runs:
- ## - With container runtime installed:
- ## + If installed Iluvatar container runtime as default runtime, try with:
- ## docker run --rm -it -v /var/run/docker.sock:/var/run/docker.sock --privileged -v /usr/local/corex:/usr/local/corex:ro ...
- ## + If there are mulitple container runtimes installed, try with:
- ## docker run --rm -it -v /var/run/docker.sock:/var/run/docker.sock --privileged --runtime iluvatar -v /usr/local/corex:/usr/local/corex:ro ...
- ## + Disallowing privileged, try with:
- ## docker run --rm -it -v /var/run/docker.sock:/var/run/docker.sock -v /usr/local/corex:/usr/local/corex:ro ...
- ## - Without container runtime installed:
- ## + Allowing privileged, try with:
- ## docker run --rm -it -v /var/run/docker.sock:/var/run/docker.sock --privileged -v /usr/local/corex:/usr/local/corex:ro ...
- ## + Disallowing privileged, try with:
- ## docker run --rm -it -v /var/run/docker.sock:/var/run/docker.sock --security-opt seccomp=unconfined -v /dev:/dev:ro -v /usr/local/corex:/usr/local/corex:ro ...
- ENV IX_VISIBLE_DEVICES="all" \
- COREX_HOME="/usr/local/corex" \
- LD_LIBRARY_PATH="/usr/local/corex/lib64:${LD_LIBRARY_PATH}" \
- GPUSTACK_RUNTIME_DEPLOY_MIRRORED_DEPLOYMENT_IGNORE_VOLUMES="/usr/local/corex;${GPUSTACK_RUNTIME_DEPLOY_MIRRORED_DEPLOYMENT_IGNORE_VOLUMES}"
- ## Active all MetaX devices detection,
- ## works with (default) MetaX container runtime and privileged mode.
- ## See https://developer.metax-tech.com/api/client/document/preview/626/k8s/03_component.html#container-runtime.
- ## Runs:
- ## - With container runtime installed:
- ## [TODO, TBD]
- ## - Without container runtime installed:
- ## + Allowing privileged, try with:
- ## docker run --rm -it -v /var/run/docker.sock:/var/run/docker.sock --privileged -v /opt/mxdriver:/opt/mxdriver:ro -v /opt/maca:/opt/maca:ro ...
- ## + Disallowing privileged, try with:
- ## docker run --rm -it -v /var/run/docker.sock:/var/run/docker.sock --security-opt seccomp=unconfined -v /dev:/dev:ro -v /opt/mxdriver:/opt/mxdriver:ro -v /opt/maca:/opt/maca:ro ...
- ENV LD_LIBRARY_PATH="/opt/maca/lib:/opt/mxdriver/lib:${LD_LIBRARY_PATH}" \
- GPUSTACK_RUNTIME_DEPLOY_MIRRORED_DEPLOYMENT_IGNORE_VOLUMES="/opt/maca;${GPUSTACK_RUNTIME_DEPLOY_MIRRORED_DEPLOYMENT_IGNORE_VOLUMES}"
- ## Active all MThreads devices detection,
- ## works with (default) MThreads container runtime and privileged mode.
- ## See https://docs.mthreads.com/cloud-native/cloud-native-doc-online/install_guide.
- ## Runs:
- ## - With container runtime installed:
- ## + If installed MThreads container runtime as default runtime, try with:
- ## docker run --rm -it -v /var/run/docker.sock:/var/run/docker.sock --privileged ...
- ## + If there are mulitple container runtimes installed, try with:
- ## docker run --rm -it -v /var/run/docker.sock:/var/run/docker.sock --privileged --runtime mthreads ...
- ## + Disallowing privileged, try with:
- ## docker run --rm -it -v /var/run/docker.sock:/var/run/docker.sock ...
- ## - Without container runtime installed:
- ## [TODO, TBD]
- ENV MTHREADS_VISIBLE_DEVICES="all" \
- MTHREADS_DRIVER_CAPABILITIES="compute,utility"
- ## Active all NVIDIA devices detection,
- ## works with (default) NVIDIA container runtime and privileged mode.
- ## See https://docs.nvidia.com/deploy/nvml-api/nvml-api-reference.html#nvml-api-reference.
- ## Runs:
- ## - With container runtime installed:
- ## + If installed NVIDIA container runtime as default runtime, try with:
- ## docker run --rm -it -v /var/run/docker.sock:/var/run/docker.sock --privileged ...
- ## + If there are mulitple container runtimes installed, try with:
- ## docker run --rm -it -v /var/run/docker.sock:/var/run/docker.sock --privileged --runtime nvidia ...
- ## + Disallowing privileged, try with:
- ## docker run --rm -it -v /var/run/docker.sock:/var/run/docker.sock ...
- ## - Without container runtime installed:
- ## [TODO, TBD]
- ENV NVIDIA_DISABLE_REQUIRE="true" \
- NVIDIA_VISIBLE_DEVICES="all" \
- NVIDIA_DRIVER_CAPABILITIES="compute,utility"
- ## Active all T-Head devices detection,
- ## works with (default) T-Head container runtime and privileged mode.
- ## See https://help.aliyun.com/document_detail/2996754.html.
- ## Runs:
- ## - With container runtime installed:
- ## [TODO, TBD]
- ## - Without container runtime installed:
- ## + Allowing privileged, try with:
- ## docker run --rm -it -v /var/run/docker.sock:/var/run/docker.sock -e GPUSTACK_RUNTIME_DOCKER_RESOURCE_INJECTION_POLICY=CDI -v /var/run/cdi:/var/run/cdi --privileged -v /usr/local/PPU_SDK:/usr/local/PPU_SDK:ro ...
- ## + Disallowing privileged, try with:
- ## docker run --rm -it -v /var/run/docker.sock:/var/run/docker.sock -e GPUSTACK_RUNTIME_DOCKER_RESOURCE_INJECTION_POLICY=CDI -v /var/run/cdi:/var/run/cdi --security-opt seccomp=unconfined -v /dev:/dev:ro -v /usr/local/PPU_SDK:/usr/local/PPU_SDK:ro ...
- ENV PPU_HOME="/usr/local/PPU_SDK" \
- LD_LIBRARY_PATH="/usr/local/PPU_SDK/CUDA_SDK/lib64:/usr/local/PPU_SDK/lib:${LD_LIBRARY_PATH}" \
- GPUSTACK_RUNTIME_DEPLOY_MIRRORED_DEPLOYMENT_IGNORE_VOLUMES="/usr/local/PPU_SDK;${GPUSTACK_RUNTIME_DEPLOY_MIRRORED_DEPLOYMENT_IGNORE_VOLUMES}"
- ## Active GPUStack runtime mirrored deployment mode,
- ## if getting an error like, "Found multiple Containers with the same hostname ...",
- ## please use `--env GPUSTACK_RUNTIME_DEPLOY_MIRRORED_NAME=...` to specify the exact container name.
- ##
- ARG GPUSTACK_RUNTIME_DOCKER_MIRRORED_NAME_FILTER_LABELS
- ## GPUSTACK_RUNTIME_LOG_EXCEPTION=false: Disable logging exceptions from gpustack-runtime.
- ## GPUSTACK_RUNTIME_DEPLOY_CDI_SPECS_DIRECTORY=/var/run/cdi: Set CDI specs directory.
- ## GPUSTACK_RUNTIME_DEPLOY_MIRRORED_DEPLOYMENT=true: Enable mirrored deployment mode.
- ## GPUSTACK_RUNTIME_DEPLOY_CORRECT_RUNNER_IMAGE=false: Disable auto correction of runner images.
- ## GPUSTACK_RUNTIME_DOCKER_IMAGE_NO_PULL_VISUALIZATION=true: Disable visualizating image pull progress, instead using simple logs.
- ## GPUSTACK_RUNTIME_DOCKER_MIRRORED_NAME_FILTER_LABELS: Configure filter labels for mirrored deployment.
- ## GPUSTACK_RUNTIME_DEPLOY_MIRRORED_DEPLOYMENT_IGNORE_VOLUMES: Declare volumes to be ignored during mirrored deployment.
- ENV GPUSTACK_RUNTIME_LOG_EXCEPTION="false" \
- GPUSTACK_RUNTIME_DEPLOY_CDI_SPECS_DIRECTORY="/var/run/cdi" \
- GPUSTACK_RUNTIME_DEPLOY_MIRRORED_DEPLOYMENT="true" \
- GPUSTACK_RUNTIME_DEPLOY_MIRRORED_DEPLOYMENT_IGNORE_VOLUMES="/var/run/cdi;${GPUSTACK_RUNTIME_DEPLOY_MIRRORED_DEPLOYMENT_IGNORE_VOLUMES}" \
- GPUSTACK_RUNTIME_DEPLOY_CORRECT_RUNNER_IMAGE="false" \
- GPUSTACK_RUNTIME_DOCKER_IMAGE_NO_PULL_VISUALIZATION="true" \
- GPUSTACK_RUNTIME_DOCKER_MIRRORED_NAME_FILTER_LABELS="${GPUSTACK_RUNTIME_DOCKER_MIRRORED_NAME_FILTER_LABELS}"
- COPY --chmod=755 pack/entrypoint.sh /usr/bin/entrypoint.sh
- WORKDIR /
- ENTRYPOINT [ "/usr/bin/entrypoint.sh" ]
|