feat(ml): update ONNX Runtime, OpenVINO and ROCm stack (#23458)

2026-01-15 12:15:42 +00:00 · 2026-01-01 18:17:55 +01:00
parent 72a898d89d
commit 3321c1a9df
8 changed files with 676 additions and 1089 deletions
--- a/.editorconfig
+++ b/.editorconfig
@@ -8,6 +8,9 @@ indent_size = 2
 insert_final_newline = true
 trim_trailing_whitespace = true

+[*.py]
+indent_size = 4
+
 [*.{ts,js}]
 quote_type = single

--- a/.github/workflows/test.yml
+++ b/.github/workflows/test.yml
@@ -574,9 +574,9 @@ jobs:
        uses: astral-sh/setup-uv@681c641aba71e4a1c380be3ab5e12ad51f415867 # v7.1.6
      - uses: actions/setup-python@83679a892e2d95755f2dac6acb0bfd1e9ac5d548 # v6.1.0
        # TODO: add caching when supported (https://github.com/actions/setup-python/pull/818)
-        # with:
-        #   python-version: 3.11
-        #   cache: 'uv'
+        with:
+          python-version: 3.11
+          #cache: 'uv'
      - name: Install dependencies
        run: |
          uv sync --extra cpu
--- a/machine-learning/Dockerfile
+++ b/machine-learning/Dockerfile
@@ -2,7 +2,7 @@ ARG DEVICE=cpu

 FROM python:3.11-bookworm@sha256:667cf70698924920f29ebdb8d749ab665811503b87093d4f11826d114fd7255e AS builder-cpu

-FROM builder-cpu AS builder-openvino
+FROM python:3.13-slim-trixie@sha256:0222b795db95bf7412cede36ab46a266cfb31f632e64051aac9806dabf840a61 AS builder-openvino

 FROM builder-cpu AS builder-cuda

@@ -22,20 +22,18 @@ FROM builder-cpu AS builder-rknn

 # Warning: 25GiB+ disk space required to pull this image
 # TODO: find a way to reduce the image size
-FROM rocm/dev-ubuntu-22.04:6.4.3-complete@sha256:6cda50e312f3aac068cea9ec06c560ca1f522ad546bc8b3d2cf06da0fe8e8a76 AS builder-rocm
+FROM rocm/dev-ubuntu-24.04:6.4.4-complete@sha256:31418ac10a3769a71eaef330c07280d1d999d7074621339b8f93c484c35f6078 AS builder-rocm

 # renovate: datasource=github-releases depName=Microsoft/onnxruntime
 ARG ONNXRUNTIME_VERSION="v1.22.1"
 WORKDIR /code

-RUN apt-get update && apt-get install -y --no-install-recommends wget git python3.10-venv
-RUN wget -nv https://github.com/Kitware/CMake/releases/download/v3.30.1/cmake-3.30.1-linux-x86_64.sh && \
-    chmod +x cmake-3.30.1-linux-x86_64.sh && \
-    mkdir -p /code/cmake-3.30.1-linux-x86_64 && \
-    ./cmake-3.30.1-linux-x86_64.sh --skip-license --prefix=/code/cmake-3.30.1-linux-x86_64 && \
-    rm cmake-3.30.1-linux-x86_64.sh
-
-ENV PATH=/code/cmake-3.30.1-linux-x86_64/bin:${PATH}
+RUN apt-get update && apt-get install -y --no-install-recommends wget git
+RUN wget -nv https://github.com/Kitware/CMake/releases/download/v3.31.9/cmake-3.31.9-linux-x86_64.sh && \
+    chmod +x cmake-3.31.9-linux-x86_64.sh && \
+    mkdir -p /code/cmake-3.31.9-linux-x86_64 && \
+    ./cmake-3.31.9-linux-x86_64.sh --skip-license --prefix=/code/cmake-3.31.9-linux-x86_64 && \
+    rm cmake-3.31.9-linux-x86_64.sh

 RUN git clone --single-branch --branch "${ONNXRUNTIME_VERSION}" --recursive "https://github.com/Microsoft/onnxruntime" onnxruntime
 WORKDIR /code/onnxruntime
@@ -45,9 +43,26 @@ COPY ./patches/* /tmp/
 RUN git apply /tmp/*.patch

 RUN /bin/sh ./dockerfiles/scripts/install_common_deps.sh
+
+ENV PATH=/opt/rocm-venv/bin:/code/cmake-3.31.9-linux-x86_64/bin:${PATH}
+ENV CCACHE_DIR="/ccache"
 # Note: the `parallel` setting uses a substantial amount of RAM
-RUN ./build.sh --allow_running_as_root --config Release --build_wheel --update --build --parallel 17 --cmake_extra_defines\
-    ONNXRUNTIME_VERSION="${ONNXRUNTIME_VERSION}" --skip_tests --use_rocm --rocm_home=/opt/rocm
+RUN --mount=type=cache,target=/ccache \
+    ./build.sh \
+    --allow_running_as_root \
+    --config Release \
+    --build_wheel \
+    --update \
+    --build \
+    --parallel 17 \
+    --cmake_extra_defines \
+    ONNXRUNTIME_VERSION="${ONNXRUNTIME_VERSION}" \
+    CMAKE_HIP_ARCHITECTURES="gfx900;gfx906;gfx908;gfx90a;gfx940;gfx941;gfx942;gfx1030;gfx1100;gfx1101;gfx1102;gfx1200;gfx1201" \
+    --skip_tests \
+    --use_rocm \
+    --rocm_home=/opt/rocm \
+    --use_cache \
+    --compile_no_warning_as_error
 RUN mv /code/onnxruntime/build/Linux/Release/dist/*.whl /opt/

 FROM builder-${DEVICE} AS builder
@@ -73,15 +88,18 @@ FROM python:3.11-slim-bookworm@sha256:917ec0e42cd6af87657a768449c2f604a6b67c7ab8
 ENV LD_PRELOAD=/usr/lib/libmimalloc.so.2 \
    MACHINE_LEARNING_MODEL_ARENA=false

-FROM python:3.11-slim-bookworm@sha256:917ec0e42cd6af87657a768449c2f604a6b67c7ab8e10ff917b8724799f816d3 AS prod-openvino
+FROM python:3.13-slim-trixie@sha256:0222b795db95bf7412cede36ab46a266cfb31f632e64051aac9806dabf840a61 AS prod-openvino

 RUN apt-get update && \
    apt-get install --no-install-recommends -yqq ocl-icd-libopencl1 wget && \
-    wget -nv https://github.com/intel/intel-graphics-compiler/releases/download/igc-1.0.17384.11/intel-igc-core_1.0.17384.11_amd64.deb && \
-    wget -nv https://github.com/intel/intel-graphics-compiler/releases/download/igc-1.0.17384.11/intel-igc-opencl_1.0.17384.11_amd64.deb && \
-    wget -nv https://github.com/intel/compute-runtime/releases/download/24.31.30508.7/intel-opencl-icd_24.31.30508.7_amd64.deb && \
+    wget -nv https://github.com/intel/intel-graphics-compiler/releases/download/v2.24.8/intel-igc-core-2_2.24.8+20344_amd64.deb && \
+    wget -nv https://github.com/intel/intel-graphics-compiler/releases/download/v2.24.8/intel-igc-opencl-2_2.24.8+20344_amd64.deb && \
+    wget -nv https://github.com/intel/compute-runtime/releases/download/25.48.36300.8/intel-opencl-icd_25.48.36300.8-0_amd64.deb &&  \
+    wget -nv https://github.com/intel/intel-graphics-compiler/releases/download/igc-1.0.17537.24/intel-igc-core_1.0.17537.24_amd64.deb && \
+    wget -nv https://github.com/intel/intel-graphics-compiler/releases/download/igc-1.0.17537.24/intel-igc-opencl_1.0.17537.24_amd64.deb && \
+    wget -nv https://github.com/intel/compute-runtime/releases/download/24.35.30872.36/intel-opencl-icd-legacy1_24.35.30872.36_amd64.deb && \
    # TODO: Figure out how to get renovate to manage this differently versioned libigdgmm file
-    wget -nv https://github.com/intel/compute-runtime/releases/download/24.31.30508.7/libigdgmm12_22.4.1_amd64.deb && \
+    wget -nv https://github.com/intel/compute-runtime/releases/download/25.48.36300.8/libigdgmm12_22.8.2_amd64.deb && \
    dpkg -i *.deb && \
    rm *.deb && \
    apt-get remove wget -yqq && \
@@ -102,7 +120,7 @@ COPY --from=builder-cuda /usr/local/bin/python3 /usr/local/bin/python3
 COPY --from=builder-cuda /usr/local/lib/python3.11 /usr/local/lib/python3.11
 COPY --from=builder-cuda /usr/local/lib/libpython3.11.so /usr/local/lib/libpython3.11.so

-FROM rocm/dev-ubuntu-22.04:6.4.3-complete@sha256:6cda50e312f3aac068cea9ec06c560ca1f522ad546bc8b3d2cf06da0fe8e8a76 AS prod-rocm
+FROM rocm/dev-ubuntu-24.04:6.4.4-complete@sha256:31418ac10a3769a71eaef330c07280d1d999d7074621339b8f93c484c35f6078 AS prod-rocm

 FROM prod-cpu AS prod-armnn

--- a/machine-learning/immich_ml/main.py
+++ b/machine-learning/immich_ml/main.py
@@ -36,7 +36,7 @@ from .schemas import (
    T,
 )

-MultiPartParser.max_file_size = 2**26  # spools to disk if payload is 64 MiB or larger
+MultiPartParser.spool_max_size = 2**26  # spools to disk if payload is 64 MiB or larger

 model_cache = ModelCache(revalidate=settings.model_ttl > 0)
 thread_pool: ThreadPoolExecutor | None = None
--- a/machine-learning/patches/0002-install-system-deps.patch
+++ b/machine-learning/patches/0002-install-system-deps.patch
@@ -0,0 +1,33 @@
+diff --git a/dockerfiles/scripts/install_common_deps.sh b/dockerfiles/scripts/install_common_deps.sh
+index bbb672a99e..0dc652fbda 100644
+--- a/dockerfiles/scripts/install_common_deps.sh
+++ b/dockerfiles/scripts/install_common_deps.sh
+@@ -8,16 +8,23 @@ apt-get update && apt-get install -y --no-install-recommends \
+         curl \
+         libcurl4-openssl-dev \
+         libssl-dev \
+-        python3-dev
+        python3-dev \
+        ccache
+ 
+ # Dependencies: conda
+-wget --quiet https://repo.anaconda.com/miniconda/Miniconda3-4.5.11-Linux-x86_64.sh -O ~/miniconda.sh --no-check-certificate && /bin/bash ~/miniconda.sh -b -p /opt/miniconda
+wget --quiet https://repo.anaconda.com/miniconda/Miniconda3-py312_25.9.1-1-Linux-x86_64.sh -O ~/miniconda.sh && /bin/bash ~/miniconda.sh -b -p /opt/miniconda
+ rm ~/miniconda.sh
+ /opt/miniconda/bin/conda clean -ya
+ 
+-pip install numpy
+-pip install packaging
+-pip install "wheel>=0.35.1"
+# Dependencies: venv and packages
+/opt/miniconda/bin/python3 -m venv /opt/rocm-venv
+/opt/rocm-venv/bin/pip install --no-cache-dir --upgrade pip
+/opt/rocm-venv/bin/pip install --no-cache-dir \
+  "numpy==2.3.4" \
+  "packaging==25.0" \
+  "wheel==0.45.1" \
+  "setuptools==80.9.0"
+
+ rm -rf /opt/miniconda/pkgs
+ 
+ # Dependencies: cmake
--- a/machine-learning/patches/0002-target-gfx900-gfx1102.patch
+++ b/machine-learning/patches/0002-target-gfx900-gfx1102.patch
@@ -1,13 +0,0 @@
-diff --git a/cmake/CMakeLists.txt b/cmake/CMakeLists.txt
-index 2714e6f59..a69da76b4 100644
--- a/cmake/CMakeLists.txt
-+++ b/cmake/CMakeLists.txt
-@@ -338,7 +338,7 @@ if (onnxruntime_USE_ROCM)
-     if (ROCM_VERSION_DEV VERSION_LESS "6.2")
-       message(FATAL_ERROR "CMAKE_HIP_ARCHITECTURES is not set when ROCm version < 6.2")
-     else()
-      set(CMAKE_HIP_ARCHITECTURES "gfx908;gfx90a;gfx1030;gfx1100;gfx1101;gfx940;gfx941;gfx942;gfx1200;gfx1201")
-+      set(CMAKE_HIP_ARCHITECTURES "gfx900;gfx908;gfx90a;gfx1030;gfx1100;gfx1101;gfx1102;gfx940;gfx941;gfx942;gfx1200;gfx1201")
-     endif()
-   endif()
- 
--- a/machine-learning/pyproject.toml
+++ b/machine-learning/pyproject.toml
@@ -3,7 +3,7 @@ name = "immich-ml"
 version = "2.4.1"
 description = ""
 authors = [{ name = "Hau Tran", email = "alex.tran1502@gmail.com" }]
-requires-python = ">=3.10,<4.0"
+requires-python = ">=3.11,<4.0"
 readme = "README.md"
 dependencies = [
    "aiocache>=0.12.1,<1.0",
@@ -12,7 +12,7 @@ dependencies = [
    "gunicorn>=21.1.0",
    "huggingface-hub>=0.20.1,<1.0",
    "insightface>=0.7.3,<1.0",
-    "numpy<2",
+    "numpy>=2.3.4",
    "opencv-python-headless>=4.7.0.72,<5.0",
    "orjson>=3.9.5",
    "pillow>=9.5.0,<11.0",
@@ -49,24 +49,16 @@ lint = [
 dev = ["locust>=2.15.1", { include-group = "test" }, { include-group = "lint" }]

 [project.optional-dependencies]
-cpu = ["onnxruntime>=1.15.0,<2"]
-cuda = ["onnxruntime-gpu>=1.17.0,<2"]
-openvino = ["onnxruntime-openvino>=1.17.1,<1.19.0"]
-armnn = ["onnxruntime>=1.15.0,<2"]
-rknn = ["onnxruntime>=1.15.0,<2", "rknn-toolkit-lite2>=2.3.0,<3"]
+cpu = ["onnxruntime>=1.23.2,<2"]
+cuda = ["onnxruntime-gpu>=1.23.2,<2"]
+openvino = ["onnxruntime-openvino>=1.23.0,<2"]
+armnn = ["onnxruntime>=1.23.2,<2"]
+rknn = ["onnxruntime>=1.23.2,<2", "rknn-toolkit-lite2>=2.3.0,<3"]
 rocm = []

 [tool.uv]
 compile-bytecode = true

-[[tool.uv.index]]
-name = "cuda12"
-url = "https://aiinfra.pkgs.visualstudio.com/PublicPackages/_packaging/onnxruntime-cuda-12/pypi/simple/"
-explicit = true
-
-[tool.uv.sources]
-onnxruntime-gpu = { index = "cuda12" }
-
 [tool.hatch.build.targets.sdist]
 include = ["immich_ml"]

--- a/machine-learning/uv.lock
+++ b/machine-learning/uv.lock