This is an automated email from the ASF dual-hosted git repository.

tqchen pushed a commit to branch main
in repository https://gitbox.apache.org/repos/asf/tvm.git


The following commit(s) were added to refs/heads/main by this push:
     new 5cbf50628b [CI] Add cibw-based wheel publishing to PyPI (#19656)
5cbf50628b is described below

commit 5cbf50628b093d0b9c03a4f85b7af3bd9f545f38
Author: Shushi Hong <[email protected]>
AuthorDate: Tue Jun 2 17:36:31 2026 -0400

    [CI] Add cibw-based wheel publishing to PyPI (#19656)
    
    Add a workflow_dispatch pipeline that builds, tests, and publishes
    manylinux/ macOS/Windows wheels via cibuildwheel with OIDC trusted
    publishing.
---
 .github/actions/build-wheel-for-publish/action.yml | 144 ++++++++++++
 .github/actions/setup/action.yml                   |  14 +-
 .github/workflows/publish_wheel.yml                | 255 +++++++++++++++++++++
 .gitignore                                         |   2 +
 CMakeLists.txt                                     |  65 ++++--
 ci/scripts/package/README.md                       |  29 +++
 .../scripts/package}/build-environment.yaml        |   2 +
 .../package/manylinux_build_libtvm_runtime_cuda.sh |  70 ++++++
 .../package/windows_build_libtvm_runtime_cuda.bat  |  98 ++++++++
 cmake/modules/CUDA.cmake                           |  16 +-
 cmake/modules/Hexagon.cmake                        |  10 +-
 cmake/modules/Metal.cmake                          |  10 +-
 cmake/modules/OpenCL.cmake                         |  10 +-
 cmake/modules/ROCM.cmake                           |  10 +-
 cmake/modules/Vulkan.cmake                         |  10 +-
 cmake/utils/FindLLVM.cmake                         |   9 +-
 cmake/utils/Library.cmake                          |  65 ++++++
 pyproject.toml                                     | 138 +++--------
 tests/lint/check_file_type.py                      |   1 +
 .../python/wheel/test_validate_runtime_library.py  |  50 ++++
 20 files changed, 823 insertions(+), 185 deletions(-)

diff --git a/.github/actions/build-wheel-for-publish/action.yml 
b/.github/actions/build-wheel-for-publish/action.yml
new file mode 100644
index 0000000000..1471b2e71c
--- /dev/null
+++ b/.github/actions/build-wheel-for-publish/action.yml
@@ -0,0 +1,144 @@
+# Licensed to the Apache Software Foundation (ASF) under one
+# or more contributor license agreements.  See the NOTICE file
+# distributed with this work for additional information
+# regarding copyright ownership.  The ASF licenses this file
+# to you under the Apache License, Version 2.0 (the
+# "License"); you may not use this file except in compliance
+# with the License.  You may obtain a copy of the License at
+#
+#   http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing,
+# software distributed under the License is distributed on an
+# "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+# KIND, either express or implied.  See the License for the
+# specific language governing permissions and limitations
+# under the License.
+
+name: Build TVM Wheel
+description: >
+  Build and test the LLVM-enabled TVM wheel for a given OS/architecture
+  combination using cibuildwheel.
+
+inputs:
+  arch:
+    description: "Target architecture for cibuildwheel (e.g., x86_64, aarch64, 
arm64, AMD64)"
+    required: true
+  build:
+    description: "cibuildwheel build selector (e.g., cp310-manylinux_x86_64)"
+    required: true
+  cmake_defines:
+    description: "Feature CMake defines that vary by wheel (e.g. 
-DUSE_METAL=ON on macOS)"
+    required: false
+    default: ""
+  include_cuda_runtime:
+    description: "Set to 1 to build and inject the CUDA runtime library (Linux 
only)"
+    required: false
+    default: "0"
+
+runs:
+  using: "composite"
+  steps:
+    # Single source of truth for the LLVM toolchain version, shared by the 
cache
+    # key and the conda install steps below.
+    - name: Set LLVM version
+      shell: bash
+      run: echo "LLVM_VERSION=22.1.0" >> "$GITHUB_ENV"
+
+    - name: Prepare LLVM cache path (Unix)
+      if: runner.os != 'Windows'
+      shell: bash
+      run: |
+        set -eux
+        sudo mkdir -p /opt/llvm
+        sudo chown -R "$(whoami)" /opt/llvm
+
+    # ---- Cache LLVM prefix ----
+    - name: Cache LLVM
+      uses: actions/cache@27d5ce7f107fe9357f9df03efb73ab90386fccae # v5.0.5
+      id: llvm-cache
+      with:
+        path: ${{ runner.os == 'Windows' && 'C:/opt/llvm' || '/opt/llvm' }}
+        key: tvm-wheel-llvm-${{ env.LLVM_VERSION }}-${{ runner.os }}-${{ 
inputs.arch }}-v6
+
+    # ---- Install LLVM via conda (cache miss only) ----
+    - name: Setup conda
+      if: steps.llvm-cache.outputs.cache-hit != 'true'
+      uses: 
conda-incubator/setup-miniconda@8ee1f361103df19b6f8c8655fd3967a8ecb162d5 # 
v4.0.1
+      continue-on-error: true
+      id: conda1
+      with:
+        miniforge-version: latest
+        conda-remove-defaults: true
+
+    - name: Setup conda (retry with tar.bz2)
+      if: steps.llvm-cache.outputs.cache-hit != 'true' && steps.conda1.outcome 
== 'failure'
+      uses: 
conda-incubator/setup-miniconda@8ee1f361103df19b6f8c8655fd3967a8ecb162d5 # 
v4.0.1
+      with:
+        miniforge-version: latest
+        use-only-tar-bz2: true
+        conda-remove-defaults: true
+
+    - name: Install LLVM (Unix)
+      if: steps.llvm-cache.outputs.cache-hit != 'true' && runner.os != 
'Windows'
+      shell: bash -l {0}
+      run: |
+        set -eux
+        if [[ "${RUNNER_OS}" == "Linux" ]]; then
+          sudo mkdir -p /opt/llvm
+          sudo chown -R "$(whoami)" /opt/llvm
+        fi
+        conda create -q -p /opt/llvm -c conda-forge \
+          "llvmdev=${LLVM_VERSION}" "clangdev=${LLVM_VERSION}" 
"compiler-rt=${LLVM_VERSION}" zlib zstd-static libxml2-devel \
+          -y
+
+    - name: Install LLVM (Windows)
+      if: steps.llvm-cache.outputs.cache-hit != 'true' && runner.os == 
'Windows'
+      shell: cmd /C call {0}
+      run: |
+        call conda create -q -p C:\opt\llvm -c conda-forge 
llvmdev=%LLVM_VERSION% zlib zstd-static libxml2-devel -y
+
+    # The {project} placeholder is not expanded inside CIBW_ENVIRONMENT 
values, so
+    # compute the forward-slash host path here (the Windows analog of the Linux
+    # /project hardcode).
+    - name: Compute CUDA sidecar path (Windows)
+      if: runner.os == 'Windows' && inputs.include_cuda_runtime == '1'
+      shell: bash
+      run: echo "TVM_CUDA_EXTRA_LIB=$(cygpath -m 
"$(pwd)")/build-wheel-cuda/lib/tvm_runtime_cuda.dll" >> "$GITHUB_ENV"
+
+    # ---- Build and test wheels ----
+    - name: Build and test wheels
+      uses: pypa/cibuildwheel@298ed2fb2c105540f5ed055e8a6ad78d82dd3a7e # v3.3.1
+      with:
+        package-dir: .
+        output-dir: wheelhouse
+      env:
+        CIBW_BUILD: ${{ inputs.build }}
+        CIBW_ARCHS_LINUX: ${{ inputs.arch }}
+        CIBW_ARCHS_MACOS: ${{ inputs.arch }}
+        CIBW_ARCHS_WINDOWS: ${{ inputs.arch }}
+        # Linux builds run in cibuildwheel's default manylinux_2_28 container;
+        # bind-mount the cached LLVM prefix into it. Ignored on macOS/Windows,
+        # which build without a container.
+        CIBW_CONTAINER_ENGINE: "docker; create_args: --volume 
/opt/llvm:/opt/llvm:ro"
+        # Each per-OS block carries the toolchain location (conda LLVM prefix +
+        # llvm-config path), the universal static-link policy (USE_LLVM 
--link-static
+        # and ZLIB_USE_STATIC_LIBS), and the CUDA sidecar. Feature defines 
that vary by
+        # wheel (e.g. USE_METAL on macOS) come from the matrix via 
inputs.cmake_defines.
+        # One explicit block per build platform (macOS / Linux / Windows); 
cibuildwheel
+        # env overrides replace rather than merge, so there is no shared base 
block.
+        CIBW_ENVIRONMENT_MACOS: >-
+          CMAKE_PREFIX_PATH="/opt/llvm"
+          CMAKE_ARGS="-DUSE_LLVM='/opt/llvm/bin/llvm-config --link-static' 
-DZLIB_USE_STATIC_LIBS=ON -DCMAKE_PREFIX_PATH=/opt/llvm ${{ 
inputs.cmake_defines }} ${{ inputs.include_cuda_runtime == '1' && 
'-DTVM_PACKAGE_EXTRA_LIBS=/project/build-wheel-cuda/lib/libtvm_runtime_cuda.so' 
|| '' }}"
+        CIBW_ENVIRONMENT_LINUX: >-
+          CMAKE_PREFIX_PATH="/opt/llvm"
+          LIBRARY_PATH="/opt/llvm/lib"
+          CMAKE_ARGS="-DUSE_LLVM='/opt/llvm/bin/llvm-config --link-static' 
-DZLIB_USE_STATIC_LIBS=ON -DCMAKE_PREFIX_PATH=/opt/llvm ${{ 
inputs.cmake_defines }} ${{ inputs.include_cuda_runtime == '1' && 
'-DTVM_PACKAGE_EXTRA_LIBS=/project/build-wheel-cuda/lib/libtvm_runtime_cuda.so' 
|| '' }}"
+        CIBW_ENVIRONMENT_WINDOWS: >-
+          CMAKE_PREFIX_PATH="C:/opt/llvm/Library"
+          PATH="C:/opt/llvm/Library/bin;$PATH"
+          CMAKE_ARGS="-DUSE_LLVM='C:/opt/llvm/Library/bin/llvm-config.exe 
--link-static' -DZLIB_USE_STATIC_LIBS=ON 
-DCMAKE_PREFIX_PATH=C:/opt/llvm/Library ${{ inputs.cmake_defines }} ${{ 
inputs.include_cuda_runtime == '1' && format('-DTVM_PACKAGE_EXTRA_LIBS={0}', 
env.TVM_CUDA_EXTRA_LIB) || '' }}"
+        # Tells tests/python/wheel to assert the CUDA runtime is bundled
+        # (only on the CUDA wheels; the value is "0" for CPU wheels).
+        CIBW_TEST_ENVIRONMENT: >-
+          TVM_WHEEL_EXPECT_CUDA_RUNTIME="${{ inputs.include_cuda_runtime }}"
diff --git a/.github/actions/setup/action.yml b/.github/actions/setup/action.yml
index e78ce2f66d..842dbd03b0 100644
--- a/.github/actions/setup/action.yml
+++ b/.github/actions/setup/action.yml
@@ -1,34 +1,36 @@
 runs:
  using: "composite"
  steps:
-  - uses: actions/cache@cdf6c1fa76f9f475f3d7449005a359c84ca0f306 # v5.0.3
+  - uses: actions/cache@27d5ce7f107fe9357f9df03efb73ab90386fccae # v5.0.5
     env:
       CACHE_NUMBER: 2
     with:
       path: ~/conda_pkgs_dir
-      key: ${{ runner.os }}-conda-${{ env.CACHE_NUMBER }}-${{ 
hashFiles('tests/conda/build-environment.yaml') }}
-  - uses: 
conda-incubator/setup-miniconda@fc2d68f6413eb2d87b895e92f8584b5b94a10167 # 
v3.3.0
+      key: ${{ runner.os }}-conda-${{ env.CACHE_NUMBER }}-${{ 
hashFiles('ci/scripts/package/build-environment.yaml') }}
+  - uses: 
conda-incubator/setup-miniconda@8ee1f361103df19b6f8c8655fd3967a8ecb162d5 # 
v4.0.1
     continue-on-error: true
     id: conda1
     with:
       activate-environment: tvm-build
       channel-priority: strict
-      environment-file: tests/conda/build-environment.yaml
+      environment-file: ci/scripts/package/build-environment.yaml
       auto-activate-base: false
       miniforge-version: latest
       python-version: "3.10"
       condarc-file: tests/conda/condarc
-  - uses: 
conda-incubator/setup-miniconda@fc2d68f6413eb2d87b895e92f8584b5b94a10167 # 
v3.3.0
+      conda-remove-defaults: true
+  - uses: 
conda-incubator/setup-miniconda@8ee1f361103df19b6f8c8655fd3967a8ecb162d5 # 
v4.0.1
     if: steps.conda1.outcome == 'failure'
     with:
       activate-environment: tvm-build
       channel-priority: strict
-      environment-file: tests/conda/build-environment.yaml
+      environment-file: ci/scripts/package/build-environment.yaml
       auto-activate-base: false
       miniforge-version: latest
       use-only-tar-bz2: true
       python-version: "3.10"
       condarc-file: tests/conda/condarc
+      conda-remove-defaults: true
   - name: Conda info
     shell: pwsh
     run: |
diff --git a/.github/workflows/publish_wheel.yml 
b/.github/workflows/publish_wheel.yml
new file mode 100644
index 0000000000..7e252ed7de
--- /dev/null
+++ b/.github/workflows/publish_wheel.yml
@@ -0,0 +1,255 @@
+# Licensed to the Apache Software Foundation (ASF) under one
+# or more contributor license agreements.  See the NOTICE file
+# distributed with this work for additional information
+# regarding copyright ownership.  The ASF licenses this file
+# to you under the Apache License, Version 2.0 (the
+# "License"); you may not use this file except in compliance
+# with the License.  You may obtain a copy of the License at
+#
+#   http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing,
+# software distributed under the License is distributed on an
+# "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+# KIND, either express or implied.  See the License for the
+# specific language governing permissions and limitations
+# under the License.
+
+name: Publish TVM wheels
+
+on:
+  workflow_dispatch:
+    inputs:
+      tag:
+        description: "Tag, branch, or SHA to build; PyPI publishes require 
refs/tags/<tag>"
+        required: true
+        type: string
+      publish_repository:
+        description: "Where to publish after the wheel build succeeds"
+        required: true
+        default: "none"
+        type: choice
+        options:
+          - none
+          - testpypi
+          - pypi
+
+permissions:
+  contents: read
+
+# CI runners are ephemeral, so pip's HTTP cache buys nothing and a stale
+# preinstalled cache (e.g. on the macOS image) only produces noisy
+# "Cache entry deserialization failed" warnings. Disable it everywhere.
+env:
+  PIP_NO_CACHE_DIR: "1"
+
+jobs:
+  # Build the CUDA runtime sidecar once per arch and upload it as an artifact 
that
+  # build_wheels bundles. The Linux legs build inside a manylinux_2_28 image 
-- the
+  # same glibc baseline cibuildwheel targets for the wheel -- so the sidecar 
stays
+  # ABI-compatible with the wheel's libtvm_runtime.so regardless of the exact 
tag.
+  build_cuda_runtime:
+    name: ${{ matrix.name }}
+    runs-on: ${{ matrix.os }}
+    container: ${{ matrix.container }}
+    strategy:
+      fail-fast: false
+      matrix:
+        include:
+          - name: "CUDA runtime sidecar (Linux x86_64, manylinux_2_28)"
+            os: ubuntu-latest
+            container: quay.io/pypa/manylinux_2_28_x86_64:latest
+            arch: x86_64
+            script: ci/scripts/package/manylinux_build_libtvm_runtime_cuda.sh
+            lib: build-wheel-cuda/lib/libtvm_runtime_cuda.so
+          - name: "CUDA runtime sidecar (Linux aarch64, manylinux_2_28)"
+            os: ubuntu-24.04-arm
+            container: quay.io/pypa/manylinux_2_28_aarch64:latest
+            arch: aarch64
+            script: ci/scripts/package/manylinux_build_libtvm_runtime_cuda.sh
+            lib: build-wheel-cuda/lib/libtvm_runtime_cuda.so
+          - name: "CUDA runtime sidecar (Windows AMD64)"
+            os: windows-2022
+            container: ""
+            arch: AMD64
+            script: ci/scripts/package/windows_build_libtvm_runtime_cuda.bat
+            lib: build-wheel-cuda/lib/tvm_runtime_cuda.dll
+    steps:
+      # The containerized Linux legs check out as root; mark the tree safe so 
git
+      # (submodule init in checkout) does not bail on "dubious ownership".
+      - name: Mark workspace safe for git
+        if: runner.os == 'Linux'
+        shell: bash
+        run: git config --global --add safe.directory '*'
+
+      - name: Checkout source
+        uses: actions/checkout@de0fac2e4500dabe0009e67214ff5f5447ce83dd # 
v6.0.2
+        with:
+          ref: ${{ inputs.tag }}
+          submodules: recursive
+          fetch-depth: 1
+          fetch-tags: true
+
+      # Windows has no manylinux interpreter; the script's pip install needs 
one.
+      - name: Set up Python (Windows host)
+        if: runner.os == 'Windows'
+        uses: actions/setup-python@a309ff8b426b58ec0e2a45f0f869d46889d02405 # 
v6.2.0
+        with:
+          python-version: "3.10"
+
+      - name: Build CUDA runtime sidecar (Unix)
+        if: runner.os != 'Windows'
+        shell: bash
+        run: bash ${{ matrix.script }}
+
+      - name: Build CUDA runtime sidecar (Windows)
+        if: runner.os == 'Windows'
+        shell: cmd
+        run: call ${{ matrix.script }}
+
+      - name: Upload CUDA runtime sidecar
+        uses: actions/upload-artifact@043fb46d1a93c77aae656e7c1c64a875d1fc6a0a 
# v7.0.1
+        with:
+          name: tvm-cuda-runtime-${{ matrix.arch }}
+          path: ${{ matrix.lib }}
+          if-no-files-found: error
+
+  build_wheels:
+    name: ${{ matrix.name }}
+    # All-or-nothing: a failed sidecar leg blocks the whole wheel matrix (a 
publish
+    # needs the complete 4-wheel set anyway).
+    needs: [build_cuda_runtime]
+    runs-on: ${{ matrix.os }}
+    strategy:
+      fail-fast: false
+      matrix:
+        include:
+          - name: "Linux x86_64 wheel with CUDA runtime (manylinux_2_28)"
+            os: ubuntu-latest
+            arch: x86_64
+            build: cp310-manylinux_x86_64
+            include_cuda_runtime: "1"
+            artifact_suffix: linux-x86_64-manylinux_2_28
+          - name: "Linux aarch64 wheel with CUDA runtime (manylinux_2_28)"
+            os: ubuntu-24.04-arm
+            arch: aarch64
+            build: cp310-manylinux_aarch64
+            include_cuda_runtime: "1"
+            artifact_suffix: linux-aarch64-manylinux_2_28
+          - name: "macOS arm64 wheel with Metal"
+            os: macos-14
+            arch: arm64
+            build: cp310-macosx_arm64
+            include_cuda_runtime: "0"
+            artifact_suffix: macos-arm64
+            cmake_defines: "-DUSE_METAL=ON"
+          - name: "Windows AMD64 wheel with CUDA runtime"
+            os: windows-2022
+            arch: AMD64
+            build: cp310-win_amd64
+            include_cuda_runtime: "1"
+            artifact_suffix: windows-amd64
+    steps:
+      - name: Validate publish inputs
+        shell: bash
+        env:
+          TVM_PUBLISH_REPOSITORY: ${{ inputs.publish_repository }}
+          TVM_PUBLISH_REF: ${{ inputs.tag }}
+        run: |
+          set -eux
+          if [[ "${TVM_PUBLISH_REPOSITORY}" == "pypi" && "${TVM_PUBLISH_REF}" 
!= refs/tags/* ]]; then
+            echo "PyPI publishes must use an immutable refs/tags/<tag> ref" >&2
+            exit 1
+          fi
+
+      - name: Checkout source
+        uses: actions/checkout@de0fac2e4500dabe0009e67214ff5f5447ce83dd # 
v6.0.2
+        with:
+          ref: ${{ inputs.tag }}
+          submodules: recursive
+          fetch-depth: 1
+          fetch-tags: true
+
+      # Land the sidecar where -DTVM_PACKAGE_EXTRA_LIBS / cibuildwheel's 
/project
+      # mount expects it. Skipped on CPU-only rows (macOS).
+      - name: Download CUDA runtime sidecar
+        if: ${{ matrix.include_cuda_runtime == '1' }}
+        uses: 
actions/download-artifact@3e5f45b2cfb9172054b4087a40e8e0b5a5461e7c # v8.0.1
+        with:
+          name: tvm-cuda-runtime-${{ matrix.arch }}
+          path: build-wheel-cuda/lib
+
+      # Provide a known host Python (3.10) for the version-stamp step below; 
the wheel
+      # builds themselves use cibuildwheel's own interpreters.
+      - name: Set up Python (host)
+        uses: actions/setup-python@a309ff8b426b58ec0e2a45f0f869d46889d02405 # 
v6.2.0
+        with:
+          python-version: "3.10"
+
+      # Stamp the version from the git tag (git describe) so the wheel version 
comes
+      # from the ref being built, not the hardcoded value in pyproject.toml. 
version.py
+      # rewrites pyproject.toml (and libinfo.py etc.) in place; on a non-tag 
ref it
+      # falls back to the in-repo __version__. Runs on the host before 
cibuildwheel
+      # reads pyproject.
+      - name: Stamp wheel version from git
+        shell: bash
+        run: python version.py --git-describe
+
+      - name: Build TVM wheel
+        uses: ./.github/actions/build-wheel-for-publish
+        with:
+          arch: ${{ matrix.arch }}
+          build: ${{ matrix.build }}
+          cmake_defines: ${{ matrix.cmake_defines }}
+          include_cuda_runtime: ${{ matrix.include_cuda_runtime }}
+
+      - name: Upload wheel artifact
+        uses: actions/upload-artifact@043fb46d1a93c77aae656e7c1c64a875d1fc6a0a 
# v7.0.1
+        with:
+          name: tvm-wheel-${{ matrix.artifact_suffix }}
+          path: wheelhouse/*.whl
+          if-no-files-found: error
+
+  upload_pypi:
+    name: Upload package distributions
+    needs: [build_wheels]
+    if: ${{ inputs.publish_repository != 'none' }}
+    runs-on: ubuntu-latest
+    environment: ${{ inputs.publish_repository }}
+    permissions:
+      actions: read
+      contents: read
+      id-token: write
+      attestations: write
+    steps:
+      - uses: 
actions/download-artifact@3e5f45b2cfb9172054b4087a40e8e0b5a5461e7c # v8.0.1
+        with:
+          pattern: tvm-wheel-*
+          path: dist
+          merge-multiple: true
+
+      # Print wheel sizes for visibility only. We do not fail on the PyPI 100 
MB
+      # limit here -- the publish step's upload would surface that error 
directly.
+      - name: Print wheel sizes
+        shell: bash
+        run: ls -alh dist/*.whl
+
+      - name: Generate artifact attestation for wheels
+        uses: 
actions/attest-build-provenance@a2bbfa25375fe432b6a289bc6b6cd05ecd0c4c32 # 
v4.1.0
+        with:
+          subject-path: dist/*
+
+      - name: Publish package distributions to TestPyPI
+        if: ${{ inputs.publish_repository == 'testpypi' }}
+        uses: 
pypa/gh-action-pypi-publish@ed0c53931b1dc9bd32cbe73a98c7f6766f8a527e # v1.13.0
+        with:
+          attestations: true
+          verbose: true
+          repository-url: https://test.pypi.org/legacy/
+
+      - name: Publish package distributions to PyPI
+        if: ${{ inputs.publish_repository == 'pypi' }}
+        uses: 
pypa/gh-action-pypi-publish@ed0c53931b1dc9bd32cbe73a98c7f6766f8a527e # v1.13.0
+        with:
+          attestations: true
+          verbose: true
diff --git a/.gitignore b/.gitignore
index 9e734b0be0..0ee1eb2418 100644
--- a/.gitignore
+++ b/.gitignore
@@ -14,6 +14,8 @@ __pycache__/
 env/
 build/
 build-*/
+!.github/actions/build-*/
+!.github/actions/build-*/action.yml
 develop-eggs/
 dist/
 downloads/
diff --git a/CMakeLists.txt b/CMakeLists.txt
index a11a872970..9591352e4d 100644
--- a/CMakeLists.txt
+++ b/CMakeLists.txt
@@ -5,6 +5,7 @@ project(tvm C CXX)
 include(cmake/utils/Utils.cmake)
 include(cmake/utils/Summary.cmake)
 include(cmake/utils/Linker.cmake)
+include(cmake/utils/Library.cmake)
 include(cmake/utils/FindCUDA.cmake)
 include(cmake/utils/FindNCCL.cmake)
 include(cmake/utils/FindOpenCL.cmake)
@@ -526,16 +527,7 @@ if(TVM_VISIBILITY_FLAG)
   set_property(TARGET tvm_runtime_extra APPEND PROPERTY LINK_OPTIONS 
"${TVM_VISIBILITY_FLAG}")
 endif()
 
-set_target_properties(tvm_runtime_extra PROPERTIES
-  LIBRARY_OUTPUT_DIRECTORY "${CMAKE_BINARY_DIR}/lib"
-  RUNTIME_OUTPUT_DIRECTORY "${CMAKE_BINARY_DIR}/lib"
-  ARCHIVE_OUTPUT_DIRECTORY "${CMAKE_BINARY_DIR}/lib"
-)
-
-install(TARGETS tvm_runtime_extra DESTINATION lib${LIB_SUFFIX})
-if(TVM_BUILD_PYTHON_MODULE)
-  install(TARGETS tvm_runtime_extra DESTINATION "lib")
-endif()
+tvm_configure_target_library(tvm_runtime_extra RUNTIME_MODULE)
 
 add_library(tvm_objs OBJECT ${COMPILER_SRCS})
 add_library(tvm_runtime_objs OBJECT ${RUNTIME_SRCS})
@@ -592,6 +584,25 @@ target_include_directories(tvm_compiler PUBLIC 
"$<INSTALL_INTERFACE:${CMAKE_INST
 set_property(TARGET tvm_compiler APPEND PROPERTY LINK_OPTIONS 
"${TVM_NO_UNDEFINED_SYMBOLS}")
 set_property(TARGET tvm_compiler APPEND PROPERTY LINK_OPTIONS 
"${TVM_VISIBILITY_FLAG}")
 
+# Work around a GNU ld (binutils) relaxation bug that miscompiles
+# R_X86_64_GOTPCRELX relocations inside very large statically-linked archives.
+# When the full LLVM static libraries are linked into libtvm_compiler.so, the
+# library is large enough that ld can relax an indirect GOT call (LLVM built
+# with -fno-plt emits these) into a direct call with an incorrect displacement.
+# The call then targets read-only data instead of the intended function and
+# crashes at runtime with a SIGSEGV inside llvm::X86Subtarget during code
+# generation. Disabling linker relaxation keeps the GOT-indirect sequences and
+# avoids the miscompilation; it is harmless when LLVM is linked dynamically.
+# See binutils bug ld/25754.
+if(CMAKE_SYSTEM_NAME STREQUAL "Linux" AND NOT "${USE_LLVM}" MATCHES 
${IS_FALSE_PATTERN})
+  set_property(TARGET tvm_compiler APPEND PROPERTY LINK_OPTIONS 
"-Wl,--no-relax")
+  # LLVM's --system-libs reports -lxml2, but TVM calls no libxml2 symbols. The
+  # manylinux toolchain does not default to --as-needed, so set it explicitly:
+  # the unused libxml2 is then not recorded as a NEEDED dependency, so 
auditwheel
+  # does not vendor it (and its deps) into the wheel.
+  set_property(TARGET tvm_compiler APPEND PROPERTY LINK_OPTIONS 
"-Wl,--as-needed")
+endif()
+
 # Place runtime/compiler/allvisible artifacts under build/lib/ to mirror the
 # tvm-ffi layout and make tvm_ffi.libinfo.load_lib_ctypes(package="tvm") able
 # to discover them in dev / editable builds.
@@ -840,23 +851,19 @@ endif()
 # Note: NCCL, NVSHMEM, RCCL target_link_libraries are handled in the inline
 # libtvm_runtime_extra assembly block above.
 
+# Keep the core shared libraries relocatable. A relative rpath
+# ($ORIGIN / @loader_path) is correct in any build because the sibling runtime
+# DSOs are installed next to each other, so apply it unconditionally rather 
than
+# only when building the Python wheel. (tvm_runtime_extra already gets its 
rpath
+# where it is defined above.)
+tvm_configure_target_library(tvm_compiler)
+tvm_configure_target_library(tvm_runtime)
+
 # Python package installation configuration
 # This section ensures that all necessary files are installed for the Python 
wheel
 if(TVM_BUILD_PYTHON_MODULE)
   message(STATUS "Configuring Python package installation")
 
-  # Set RPATH for tvm_compiler and tvm_runtime to find each other relatively
-  # (libtvm_compiler.so links against libtvm_runtime.so).
-  if(APPLE)
-    # macOS uses @loader_path
-    set_target_properties(tvm_compiler PROPERTIES INSTALL_RPATH "@loader_path")
-    set_target_properties(tvm_runtime PROPERTIES INSTALL_RPATH "@loader_path")
-  elseif(LINUX)
-    # Linux uses $ORIGIN
-    set_target_properties(tvm_compiler PROPERTIES INSTALL_RPATH "\$ORIGIN")
-    set_target_properties(tvm_runtime PROPERTIES INSTALL_RPATH "\$ORIGIN")
-  endif()
-
   # Install compiled shared libraries into <project>/lib so that
   # tvm_ffi.libinfo.load_lib_ctypes(package="tvm", target_name=...) can find
   # them via the package RECORD or the project's lib/ fallback dir.
@@ -865,12 +872,26 @@ if(TVM_BUILD_PYTHON_MODULE)
 
   # Install third-party compiled dependencies into the same lib/ dir.
   if(TARGET fpA_intB_gemm)
+    tvm_configure_target_library(fpA_intB_gemm)
     install(TARGETS fpA_intB_gemm DESTINATION "lib")
   endif()
   if(TARGET flash_attn)
+    tvm_configure_target_library(flash_attn)
     install(TARGETS flash_attn DESTINATION "lib")
   endif()
 
+  # Install prebuilt extra runtime libraries into the same lib/ dir. This is 
how
+  # the separately-built CUDA runtime (libtvm_runtime_cuda.so) is bundled: the
+  # publishing flow builds it in a CUDA-enabled environment and passes its path
+  # via TVM_PACKAGE_EXTRA_LIBS, so it ships through the normal CMake install
+  # rather than a post-build wheel rewrite.
+  foreach(_extra_lib IN LISTS TVM_PACKAGE_EXTRA_LIBS)
+    if(NOT EXISTS "${_extra_lib}")
+      message(FATAL_ERROR "TVM_PACKAGE_EXTRA_LIBS entry does not exist: 
${_extra_lib}")
+    endif()
+    install(FILES "${_extra_lib}" DESTINATION "lib")
+  endforeach()
+
   # Install minimal header files needed by Python extensions
   install(
     DIRECTORY "${CMAKE_CURRENT_SOURCE_DIR}/include/tvm/runtime/"
diff --git a/ci/scripts/package/README.md b/ci/scripts/package/README.md
new file mode 100644
index 0000000000..f123fd1383
--- /dev/null
+++ b/ci/scripts/package/README.md
@@ -0,0 +1,29 @@
+<!--- Licensed to the Apache Software Foundation (ASF) under one -->
+<!--- or more contributor license agreements.  See the NOTICE file -->
+<!--- distributed with this work for additional information -->
+<!--- regarding copyright ownership.  The ASF licenses this file -->
+<!--- to you under the Apache License, Version 2.0 (the -->
+<!--- "License"); you may not use this file except in compliance -->
+<!--- with the License.  You may obtain a copy of the License at -->
+
+<!---   http://www.apache.org/licenses/LICENSE-2.0 -->
+
+<!--- Unless required by applicable law or agreed to in writing, -->
+<!--- software distributed under the License is distributed on an -->
+<!--- "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY -->
+<!--- KIND, either express or implied.  See the License for the -->
+<!--- specific language governing permissions and limitations -->
+<!--- under the License. -->
+
+# TVM wheel packaging
+
+The wheels are built by a standard `cibuildwheel` flow, configured in
+`.github/workflows/publish_wheel.yml` and `pyproject.toml` 
(`[tool.cibuildwheel]`
+and `[tool.scikit-build]`). This directory holds the few helper scripts that 
flow
+invokes:
+
+- `manylinux_build_libtvm_runtime_cuda.sh` — run by the `build_cuda_runtime` CI
+  stage; builds the `libtvm_runtime_cuda.so` sidecar inside the manylinux 
container.
+- `windows_build_libtvm_runtime_cuda.bat` — the Windows equivalent (run with
+  `shell: cmd`), building `tvm_runtime_cuda.dll`.
+- `build-environment.yaml` — conda environment for building the wheel.
diff --git a/tests/conda/build-environment.yaml 
b/ci/scripts/package/build-environment.yaml
similarity index 98%
rename from tests/conda/build-environment.yaml
rename to ci/scripts/package/build-environment.yaml
index ebd45ff4c4..3b2c4dd167 100644
--- a/tests/conda/build-environment.yaml
+++ b/ci/scripts/package/build-environment.yaml
@@ -33,6 +33,8 @@ dependencies:
   - pip
   - git
   - bzip2
+  - zlib
+  - zstd-static
   - pytest
   - numpy
   - scipy
diff --git a/ci/scripts/package/manylinux_build_libtvm_runtime_cuda.sh 
b/ci/scripts/package/manylinux_build_libtvm_runtime_cuda.sh
new file mode 100755
index 0000000000..fe721a18f4
--- /dev/null
+++ b/ci/scripts/package/manylinux_build_libtvm_runtime_cuda.sh
@@ -0,0 +1,70 @@
+#!/usr/bin/env bash
+# Licensed to the Apache Software Foundation (ASF) under one
+# or more contributor license agreements.  See the NOTICE file
+# distributed with this work for additional information
+# regarding copyright ownership.  The ASF licenses this file
+# to you under the Apache License, Version 2.0 (the
+# "License"); you may not use this file except in compliance
+# with the License.  You may obtain a copy of the License at
+#
+#   http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing,
+# software distributed under the License is distributed on an
+# "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+# KIND, either express or implied.  See the License for the
+# specific language governing permissions and limitations
+# under the License.
+#
+# Build libtvm_runtime_cuda.so inside a manylinux container, run by the
+# build_cuda_runtime CI job. Installs the pinned CUDA toolkit and builds the
+# sidecar into build-wheel-cuda/lib/ for the wheel build to bundle.
+#
+# Usage: manylinux_build_libtvm_runtime_cuda.sh
+set -euxo pipefail
+
+repo_root="$(cd "$(dirname "${BASH_SOURCE[0]}")/../../.." && pwd)"
+build_dir="${repo_root}/build-wheel-cuda"
+python_bin="/opt/python/cp310-cp310/bin/python"
+parallel="$(getconf _NPROCESSORS_ONLN 2>/dev/null || echo 4)"
+
+# Install the pinned CUDA toolkit into the manylinux_2_28 container. The RHEL8
+# local-repo RPM is compatible with manylinux_2_28 for both x86_64 and aarch64.
+arch="$(uname -m)"
+cuda_rpm="cuda-repo-rhel8-13-0-local-13.0.2_580.95.05-1.${arch}.rpm"
+curl -fsSLo "/tmp/${cuda_rpm}" \
+  
"https://developer.download.nvidia.com/compute/cuda/13.0.2/local_installers/${cuda_rpm}";
+rpm -i "/tmp/${cuda_rpm}"
+dnf clean all
+dnf -y --disablerepo=epel install cuda-toolkit-13-0
+rm -f "/tmp/${cuda_rpm}"
+dnf clean all
+
+# Build the CUDA runtime sidecar with CUDA on and LLVM off, so it does not need
+# the LLVM prefix; the main CPU wheel links LLVM statically. The manylinux 
image
+# ships no cmake/ninja, so install the build tools here.
+export PATH="/opt/python/cp310-cp310/bin:/usr/local/cuda/bin:${PATH}"
+"${python_bin}" -m pip install -U pip cmake ninja
+nvcc --version
+
+rm -rf "${build_dir}"
+# CMAKE_CUDA_COMPILER only tells CMake which nvcc to use; it does not affect 
the
+# resulting libtvm_runtime_cuda.so, which is built only from .cc host sources 
(no
+# .cu device code, so nvcc is never invoked for it). CMAKE_CUDA_ARCHITECTURES 
is
+# intentionally not set: it would be a no-op here for the same reason 
(verified --
+# the .so is byte-identical across arch values and carries no device code), and
+# modern CMake fills in a default so configure does not fail without it.
+cmake -S "${repo_root}" -B "${build_dir}" \
+  -DCMAKE_BUILD_TYPE=Release \
+  -DBUILD_TESTING=OFF \
+  -DTVM_BUILD_PYTHON_MODULE=ON \
+  -DUSE_CUDA=/usr/local/cuda \
+  -DUSE_LLVM=OFF \
+  -DUSE_CUBLAS=OFF -DUSE_CUDNN=OFF -DUSE_CUTLASS=OFF -DUSE_NCCL=OFF 
-DUSE_NVTX=OFF \
+  -DCMAKE_CUDA_COMPILER=/usr/local/cuda/bin/nvcc
+cmake --build "${build_dir}" --target tvm_runtime tvm_runtime_cuda --parallel 
"${parallel}"
+
+cuda_lib="${build_dir}/lib/libtvm_runtime_cuda.so"
+test -f "${cuda_lib}"
+patchelf --set-rpath '$ORIGIN' "${cuda_lib}"
+echo "CUDA runtime: ${cuda_lib}"
diff --git a/ci/scripts/package/windows_build_libtvm_runtime_cuda.bat 
b/ci/scripts/package/windows_build_libtvm_runtime_cuda.bat
new file mode 100644
index 0000000000..20523394ee
--- /dev/null
+++ b/ci/scripts/package/windows_build_libtvm_runtime_cuda.bat
@@ -0,0 +1,98 @@
+@echo off
+rem Licensed to the Apache Software Foundation (ASF) under one
+rem or more contributor license agreements.  See the NOTICE file
+rem distributed with this work for additional information
+rem regarding copyright ownership.  The ASF licenses this file
+rem to you under the Apache License, Version 2.0 (the
+rem "License"); you may not use this file except in compliance
+rem with the License.  You may obtain a copy of the License at
+rem
+rem   http://www.apache.org/licenses/LICENSE-2.0
+rem
+rem Unless required by applicable law or agreed to in writing,
+rem software distributed under the License is distributed on an
+rem "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+rem KIND, either express or implied.  See the License for the
+rem specific language governing permissions and limitations
+rem under the License.
+rem
+rem Build tvm_runtime_cuda.dll on a Windows runner, run by the 
build_cuda_runtime
+rem CI job (on the host; unlike Linux there is no build container on Windows).
+rem Installs the pinned CUDA toolkit via conda and builds the sidecar into
+rem build-wheel-cuda\lib\ for the wheel build to bundle. Windows mirror of
+rem manylinux_build_libtvm_runtime_cuda.sh. Run with: shell: cmd.
+setlocal enableextensions
+
+rem repo root = this script's directory / ..\..\.. (native Windows paths; no 
cygpath).
+pushd "%~dp0..\..\.." || exit /b 1
+set "repo_root=%CD%"
+popd
+set "build_dir=%repo_root%\build-wheel-cuda"
+set "cuda_prefix=C:\opt\cuda"
+
+rem Locate conda: the runner ships Miniconda (exposed via %CONDA%) but it may 
not be
+rem on PATH in this shell.
+set "conda_exe=conda"
+where conda >nul 2>nul || set "conda_exe=%CONDA%\Scripts\conda.exe"
+
+rem Install the pinned CUDA toolkit via conda from the nvidia channel, 
mirroring the
+rem LLVM-via-conda install used elsewhere. The win-64 channel caps at 13.0.x, 
matching
+rem the Linux hook's CUDA 13.0.2. The nvidia CDN occasionally returns a 
transient
+rem HTTP 5xx, so retry once; a half-finished first attempt can leave the prefix
+rem partially populated, so wipe it before retrying.
+if not exist "%cuda_prefix%\Library\bin\nvcc.exe" (
+  call "%conda_exe%" create -q -p "%cuda_prefix%" -c nvidia/label/cuda-13.0.2 
cuda-toolkit -y
+  if errorlevel 1 (
+    if exist "%cuda_prefix%" rmdir /s /q "%cuda_prefix%"
+    call "%conda_exe%" create -q -p "%cuda_prefix%" -c 
nvidia/label/cuda-13.0.2 cuda-toolkit -y || exit /b 1
+  )
+)
+
+rem conda lays the Windows toolkit out under <prefix>\Library (bin\nvcc.exe,
+rem lib\x64\cudart.lib, include\...). Discover the root from nvcc.exe so TVM's
+rem FindCUDA MSVC branch resolves against the real layout instead of a 
hardcode.
+set "nvcc_exe="
+for /f "delims=" %%i in ('dir /s /b "%cuda_prefix%\nvcc.exe" 2^>nul') do if 
not defined nvcc_exe set "nvcc_exe=%%i"
+if not defined nvcc_exe ( echo nvcc.exe not found under %cuda_prefix% & exit 
/b 1 )
+rem cuda_root = dirname(dirname(nvcc)) = <prefix>\Library
+for %%i in ("%nvcc_exe%") do set "nvcc_bin=%%~dpi"
+pushd "%nvcc_bin%.." || exit /b 1
+set "cuda_root=%CD%"
+popd
+set "CUDA_PATH=%cuda_root%"
+
+python -m pip install -U pip cmake ninja || exit /b 1
+"%nvcc_exe%" --version || exit /b 1
+
+rem nvcc needs the MSVC host compiler (cl.exe), so locate VS via vswhere and 
run the
+rem cmake configure+build inside vcvars64 (this shell is not a VS Developer 
prompt).
+set "vswhere=C:\Program Files (x86)\Microsoft Visual 
Studio\Installer\vswhere.exe"
+set "vs_path="
+for /f "usebackq delims=" %%i in (`"%vswhere%" -latest -products * -requires 
Microsoft.VisualStudio.Component.VC.Tools.x86.x64 -property installationPath`) 
do set "vs_path=%%i"
+if not defined vs_path ( echo Visual Studio with VC tools not found & exit /b 
1 )
+set "vcvars=%vs_path%\VC\Auxiliary\Build\vcvars64.bat"
+
+if exist "%build_dir%" rmdir /s /q "%build_dir%"
+
+rem CMAKE_CUDA_COMPILER only tells CMake which nvcc to use (load-bearing: the 
conda
+rem nvcc is not on PATH); it does not affect the resulting 
tvm_runtime_cuda.dll, which
+rem is built only from .cc host sources (no .cu device code). 
CMAKE_CUDA_ARCHITECTURES
+rem is intentionally not set -- a no-op for the same reason, and modern CMake 
fills a
+rem default. -allow-unsupported-compiler guards against the runner's MSVC 
being newer
+rem than the CUDA toolkit officially supports. The cmake command is kept on 
one line:
+rem `^` continuations in a batch file break on any trailing whitespace.
+rem CMake parses backslashes in string values as escapes (e.g. C:\opt -> 
invalid \o),
+rem so hand cmake forward-slash paths. cmd builtins (rmdir / if exist) keep 
backslashes.
+set "repo_root_fwd=%repo_root:\=/%"
+set "build_dir_fwd=%build_dir:\=/%"
+set "cuda_root_fwd=%cuda_root:\=/%"
+set "nvcc_fwd=%nvcc_exe:\=/%"
+
+call "%vcvars%" || exit /b 1
+cmake -S "%repo_root_fwd%" -B "%build_dir_fwd%" -G Ninja 
-DCMAKE_BUILD_TYPE=Release -DBUILD_TESTING=OFF -DTVM_BUILD_PYTHON_MODULE=ON 
-DUSE_CUDA="%cuda_root_fwd%" -DUSE_LLVM=OFF -DUSE_CUBLAS=OFF -DUSE_CUDNN=OFF 
-DUSE_CUTLASS=OFF -DUSE_NCCL=OFF -DUSE_NVTX=OFF 
-DCMAKE_CUDA_COMPILER="%nvcc_fwd%" 
-DCMAKE_CUDA_FLAGS="-allow-unsupported-compiler" || exit /b 1
+cmake --build "%build_dir_fwd%" --target tvm_runtime tvm_runtime_cuda --config 
Release || exit /b 1
+
+if not exist "%build_dir%\lib\tvm_runtime_cuda.dll" ( echo 
tvm_runtime_cuda.dll was not produced & exit /b 1 )
+rem No patchelf/rpath step on Windows; delvewheel vendors dependencies at 
repair time.
+echo CUDA runtime: %build_dir%\lib\tvm_runtime_cuda.dll
+endlocal
diff --git a/cmake/modules/CUDA.cmake b/cmake/modules/CUDA.cmake
index ec6160e7af..0028e04fcc 100644
--- a/cmake/modules/CUDA.cmake
+++ b/cmake/modules/CUDA.cmake
@@ -67,6 +67,10 @@ if(USE_CUDA)
 
   add_library(tvm_runtime_cuda_objs OBJECT ${RUNTIME_CUDA_SRCS} 
${VM_CUDA_BUILTIN_SRC_CC})
   target_link_libraries(tvm_runtime_cuda_objs PUBLIC tvm_ffi_header)
+  # These sources compile into tvm_runtime_cuda.dll, so their TVM_RUNTIME_DLL /
+  # TVM_FFI_DLL symbols must be dllexport on MSVC (e.g. GetCudaDeviceCount in
+  # cuda_device_api.cc). Mirror tvm_runtime_objs; a no-op on non-MSVC 
platforms.
+  target_compile_definitions(tvm_runtime_cuda_objs PRIVATE TVM_RUNTIME_EXPORTS 
TVM_FFI_EXPORTS)
   set_target_properties(tvm_runtime_cuda_objs PROPERTIES 
POSITION_INDEPENDENT_CODE ON)
   if(TVM_VISIBILITY_FLAG)
     target_compile_options(tvm_runtime_cuda_objs PRIVATE 
"${TVM_VISIBILITY_FLAG}")
@@ -74,15 +78,7 @@ if(USE_CUDA)
   add_library(tvm_runtime_cuda SHARED $<TARGET_OBJECTS:tvm_runtime_cuda_objs>)
   list(APPEND TVM_RUNTIME_BACKEND_LIBS tvm_runtime_cuda)
   target_link_libraries(tvm_runtime_cuda PUBLIC tvm_runtime 
${CUDA_CUDART_LIBRARY} ${CUDA_CUDA_LIBRARY})
-  set_target_properties(tvm_runtime_cuda PROPERTIES
-    LIBRARY_OUTPUT_DIRECTORY "${CMAKE_BINARY_DIR}/lib"
-    RUNTIME_OUTPUT_DIRECTORY "${CMAKE_BINARY_DIR}/lib"
-    ARCHIVE_OUTPUT_DIRECTORY "${CMAKE_BINARY_DIR}/lib"
-  )
-  install(TARGETS tvm_runtime_cuda DESTINATION lib${LIB_SUFFIX})
-  if(TVM_BUILD_PYTHON_MODULE)
-    install(TARGETS tvm_runtime_cuda DESTINATION "lib")
-  endif()
+  tvm_configure_target_library(tvm_runtime_cuda RUNTIME_MODULE)
 
   if(USE_NVTX)
     message(STATUS "Build with NVTX support")
@@ -102,7 +98,7 @@ if(USE_CUDA AND USE_CUDNN)
   add_library(tvm_cudnn_objs OBJECT ${CONTRIB_CUDNN_SRCS})
   target_link_libraries(tvm_cudnn_objs PRIVATE tvm_runtime_extra_defs)
   target_link_libraries(tvm_runtime_extra PRIVATE tvm_cudnn_objs 
${CUDA_CUDNN_LIBRARY})
-endif(USE_CUDNN)
+endif(USE_CUDA AND USE_CUDNN)
 
 if(USE_CUDA AND USE_CUDNN_FRONTEND)
   message(STATUS "Build with cuDNN Frontend support")
diff --git a/cmake/modules/Hexagon.cmake b/cmake/modules/Hexagon.cmake
index 431b15b13a..c92fc70799 100644
--- a/cmake/modules/Hexagon.cmake
+++ b/cmake/modules/Hexagon.cmake
@@ -346,13 +346,5 @@ elseif(USE_HEXAGON)
   add_library(tvm_runtime_hexagon SHARED 
$<TARGET_OBJECTS:tvm_runtime_hexagon_objs>)
   list(APPEND TVM_RUNTIME_BACKEND_LIBS tvm_runtime_hexagon)
   target_link_libraries(tvm_runtime_hexagon PUBLIC tvm_runtime)
-  set_target_properties(tvm_runtime_hexagon PROPERTIES
-    LIBRARY_OUTPUT_DIRECTORY "${CMAKE_BINARY_DIR}/lib"
-    RUNTIME_OUTPUT_DIRECTORY "${CMAKE_BINARY_DIR}/lib"
-    ARCHIVE_OUTPUT_DIRECTORY "${CMAKE_BINARY_DIR}/lib"
-  )
-  install(TARGETS tvm_runtime_hexagon DESTINATION lib${LIB_SUFFIX})
-  if(TVM_BUILD_PYTHON_MODULE)
-    install(TARGETS tvm_runtime_hexagon DESTINATION "lib")
-  endif()
+  tvm_configure_target_library(tvm_runtime_hexagon RUNTIME_MODULE)
 endif()
diff --git a/cmake/modules/Metal.cmake b/cmake/modules/Metal.cmake
index 72e7585534..c593d0d420 100644
--- a/cmake/modules/Metal.cmake
+++ b/cmake/modules/Metal.cmake
@@ -30,15 +30,7 @@ if(USE_METAL)
   add_library(tvm_runtime_metal SHARED 
$<TARGET_OBJECTS:tvm_runtime_metal_objs>)
   list(APPEND TVM_RUNTIME_BACKEND_LIBS tvm_runtime_metal)
   target_link_libraries(tvm_runtime_metal PUBLIC tvm_runtime ${METAL_LIB} 
${FOUNDATION_LIB})
-  set_target_properties(tvm_runtime_metal PROPERTIES
-    LIBRARY_OUTPUT_DIRECTORY "${CMAKE_BINARY_DIR}/lib"
-    RUNTIME_OUTPUT_DIRECTORY "${CMAKE_BINARY_DIR}/lib"
-    ARCHIVE_OUTPUT_DIRECTORY "${CMAKE_BINARY_DIR}/lib"
-  )
-  install(TARGETS tvm_runtime_metal DESTINATION lib${LIB_SUFFIX})
-  if(TVM_BUILD_PYTHON_MODULE)
-    install(TARGETS tvm_runtime_metal DESTINATION "lib")
-  endif()
+  tvm_configure_target_library(tvm_runtime_metal RUNTIME_MODULE)
 endif(USE_METAL)
 # When USE_METAL=OFF the codegen-side fallback in
 # src/target/metal/metal_fallback_module.cc handles construction; no opt
diff --git a/cmake/modules/OpenCL.cmake b/cmake/modules/OpenCL.cmake
index 9a1c20a5a5..f833832d4c 100644
--- a/cmake/modules/OpenCL.cmake
+++ b/cmake/modules/OpenCL.cmake
@@ -46,15 +46,7 @@ if(USE_OPENCL)
   add_library(tvm_runtime_opencl SHARED 
$<TARGET_OBJECTS:tvm_runtime_opencl_objs>)
   list(APPEND TVM_RUNTIME_BACKEND_LIBS tvm_runtime_opencl)
   target_link_libraries(tvm_runtime_opencl PUBLIC tvm_runtime ${_opencl_libs})
-  set_target_properties(tvm_runtime_opencl PROPERTIES
-    LIBRARY_OUTPUT_DIRECTORY "${CMAKE_BINARY_DIR}/lib"
-    RUNTIME_OUTPUT_DIRECTORY "${CMAKE_BINARY_DIR}/lib"
-    ARCHIVE_OUTPUT_DIRECTORY "${CMAKE_BINARY_DIR}/lib"
-  )
-  install(TARGETS tvm_runtime_opencl DESTINATION lib${LIB_SUFFIX})
-  if(TVM_BUILD_PYTHON_MODULE)
-    install(TARGETS tvm_runtime_opencl DESTINATION "lib")
-  endif()
+  tvm_configure_target_library(tvm_runtime_opencl RUNTIME_MODULE)
 
   if(USE_OPENCL_ENABLE_HOST_PTR)
     add_definitions(-DOPENCL_ENABLE_HOST_PTR)
diff --git a/cmake/modules/ROCM.cmake b/cmake/modules/ROCM.cmake
index b974aa4129..a2d1516558 100644
--- a/cmake/modules/ROCM.cmake
+++ b/cmake/modules/ROCM.cmake
@@ -48,15 +48,7 @@ if(USE_ROCM)
   add_library(tvm_runtime_rocm SHARED $<TARGET_OBJECTS:tvm_runtime_rocm_objs>)
   list(APPEND TVM_RUNTIME_BACKEND_LIBS tvm_runtime_rocm)
   target_link_libraries(tvm_runtime_rocm PUBLIC tvm_runtime ${_rocm_libs})
-  set_target_properties(tvm_runtime_rocm PROPERTIES
-    LIBRARY_OUTPUT_DIRECTORY "${CMAKE_BINARY_DIR}/lib"
-    RUNTIME_OUTPUT_DIRECTORY "${CMAKE_BINARY_DIR}/lib"
-    ARCHIVE_OUTPUT_DIRECTORY "${CMAKE_BINARY_DIR}/lib"
-  )
-  install(TARGETS tvm_runtime_rocm DESTINATION lib${LIB_SUFFIX})
-  if(TVM_BUILD_PYTHON_MODULE)
-    install(TARGETS tvm_runtime_rocm DESTINATION "lib")
-  endif()
+  tvm_configure_target_library(tvm_runtime_rocm RUNTIME_MODULE)
 endif(USE_ROCM)
 
 # HIPBLAS contrib goes into libtvm_runtime_extra.
diff --git a/cmake/modules/Vulkan.cmake b/cmake/modules/Vulkan.cmake
index 6821b4419b..ba51e4b842 100644
--- a/cmake/modules/Vulkan.cmake
+++ b/cmake/modules/Vulkan.cmake
@@ -55,13 +55,5 @@ if(USE_VULKAN)
   add_library(tvm_runtime_vulkan SHARED 
$<TARGET_OBJECTS:tvm_runtime_vulkan_objs>)
   list(APPEND TVM_RUNTIME_BACKEND_LIBS tvm_runtime_vulkan)
   target_link_libraries(tvm_runtime_vulkan PUBLIC tvm_runtime 
${Vulkan_LIBRARY})
-  set_target_properties(tvm_runtime_vulkan PROPERTIES
-    LIBRARY_OUTPUT_DIRECTORY "${CMAKE_BINARY_DIR}/lib"
-    RUNTIME_OUTPUT_DIRECTORY "${CMAKE_BINARY_DIR}/lib"
-    ARCHIVE_OUTPUT_DIRECTORY "${CMAKE_BINARY_DIR}/lib"
-  )
-  install(TARGETS tvm_runtime_vulkan DESTINATION lib${LIB_SUFFIX})
-  if(TVM_BUILD_PYTHON_MODULE)
-    install(TARGETS tvm_runtime_vulkan DESTINATION "lib")
-  endif()
+  tvm_configure_target_library(tvm_runtime_vulkan RUNTIME_MODULE)
 endif(USE_VULKAN)
diff --git a/cmake/utils/FindLLVM.cmake b/cmake/utils/FindLLVM.cmake
index 8aa9c8b1b9..2bf229eca7 100644
--- a/cmake/utils/FindLLVM.cmake
+++ b/cmake/utils/FindLLVM.cmake
@@ -210,8 +210,13 @@ macro(find_llvm use_llvm)
         message(STATUS "LLVM links against xml2")
         list(APPEND LLVM_LIBS "-lxml2")
       elseif("${__flag}" STREQUAL "zstd.dll.lib")
-        message(STATUS "LLVM linker flag under LLVM libdir: 
${__llvm_libdir}/zstd.lib")
-        list(APPEND LLVM_LIBS "${__llvm_libdir}/zstd.lib")
+        if (EXISTS "${__llvm_libdir}/zstd_static.lib")
+          message(STATUS "LLVM links against static zstd")
+          list(APPEND LLVM_LIBS "${__llvm_libdir}/zstd_static.lib")
+        else()
+          message(STATUS "LLVM linker flag under LLVM libdir: 
${__llvm_libdir}/zstd.lib")
+          list(APPEND LLVM_LIBS "${__llvm_libdir}/zstd.lib")
+        endif()
       elseif((__flag MATCHES ".lib$") AND (EXISTS 
"${__llvm_libdir}/${__flag}"))
         # If the library file ends in .lib try to also search the llvm_libdir
         message(STATUS "LLVM linker flag under LLVM libdir: 
${__llvm_libdir}/${__flag}")
diff --git a/cmake/utils/Library.cmake b/cmake/utils/Library.cmake
new file mode 100644
index 0000000000..93cc748ae4
--- /dev/null
+++ b/cmake/utils/Library.cmake
@@ -0,0 +1,65 @@
+# Licensed to the Apache Software Foundation (ASF) under one
+# or more contributor license agreements.  See the NOTICE file
+# distributed with this work for additional information
+# regarding copyright ownership.  The ASF licenses this file
+# to you under the Apache License, Version 2.0 (the
+# "License"); you may not use this file except in compliance
+# with the License.  You may obtain a copy of the License at
+#
+#   http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing,
+# software distributed under the License is distributed on an
+# "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+# KIND, either express or implied.  See the License for the
+# specific language governing permissions and limitations
+# under the License.
+
+# Helpers for configuring library targets.
+
+#######################################################
+# tvm_configure_target_library(target_name [RUNTIME_MODULE])
+#
+# Configure a TVM library target. The target always gets a relative rpath
+# ($ORIGIN / @loader_path) so that sibling shared libraries in the same
+# directory resolve each other regardless of the install location (e.g. inside 
a
+# Python wheel).
+#
+# With the RUNTIME_MODULE option -- used for the optional runtime backend
+# libraries (tvm_runtime_cuda, tvm_runtime_vulkan, ...) -- the target is also
+# emitted into the build "lib" directory and installed; when building the 
Python
+# module it is additionally installed into the package "lib" directory. Targets
+# that manage their own output directory / install rules (tvm_compiler,
+# tvm_runtime, ...) omit the option and take only the rpath.
+#
+# No-op if the target does not exist.
+function(tvm_configure_target_library target_name)
+  if(NOT TARGET ${target_name})
+    return()
+  endif()
+  cmake_parse_arguments(ARG "RUNTIME_MODULE" "" "" ${ARGN})
+
+  if(APPLE)
+    set_target_properties(${target_name} PROPERTIES
+      BUILD_RPATH "@loader_path"
+      INSTALL_RPATH "@loader_path"
+    )
+  elseif(UNIX)
+    set_target_properties(${target_name} PROPERTIES
+      BUILD_RPATH "\$ORIGIN"
+      INSTALL_RPATH "\$ORIGIN"
+    )
+  endif()
+
+  if(ARG_RUNTIME_MODULE)
+    set_target_properties(${target_name} PROPERTIES
+      LIBRARY_OUTPUT_DIRECTORY "${CMAKE_BINARY_DIR}/lib"
+      RUNTIME_OUTPUT_DIRECTORY "${CMAKE_BINARY_DIR}/lib"
+      ARCHIVE_OUTPUT_DIRECTORY "${CMAKE_BINARY_DIR}/lib"
+    )
+    install(TARGETS ${target_name} DESTINATION lib${LIB_SUFFIX})
+    if(TVM_BUILD_PYTHON_MODULE)
+      install(TARGETS ${target_name} DESTINATION "lib")
+    endif()
+  endif()
+endfunction()
diff --git a/pyproject.toml b/pyproject.toml
index 2e400f0609..c5a16defbb 100644
--- a/pyproject.toml
+++ b/pyproject.toml
@@ -16,7 +16,7 @@
 # under the License.
 
 [build-system]
-requires = ["scikit-build-core>=0.10.0"]
+requires = ["scikit-build-core>=0.11"]
 build-backend = "scikit_build_core.build"
 
 [project]
@@ -25,7 +25,8 @@ name = "tvm"
 version = "0.25.dev0"
 description = "Apache TVM: An End-to-End Deep Learning Compiler Stack"
 readme = "README.md"
-license = { text = "Apache-2.0" }
+license = "Apache-2.0"
+license-files = ["LICENSE"]
 requires-python = ">=3.10"
 authors = [{ name = "Apache TVM Community", email = "[email protected]" }]
 keywords = ["machine learning", "compiler", "deep learning", "inference"]
@@ -34,55 +35,38 @@ classifiers = [
   "Intended Audience :: Developers",
   "Intended Audience :: Education",
   "Intended Audience :: Science/Research",
-  "License :: OSI Approved :: Apache Software License",
   "Programming Language :: Python :: 3",
-  "Programming Language :: Python :: 3.9",
   "Programming Language :: Python :: 3.10",
   "Programming Language :: Python :: 3.11",
   "Programming Language :: Python :: 3.12",
+  "Programming Language :: Python :: 3.13",
   "Topic :: Scientific/Engineering :: Artificial Intelligence",
   "Topic :: Software Development :: Libraries :: Python Modules",
 ]
-# Core dependencies - these are the minimum required for basic TVM 
functionality
 dependencies = [
-  "apache-tvm-ffi",
-  "cloudpickle",
+  "apache-tvm-ffi>=0.1.11",
   "ml_dtypes",
   "numpy",
-  "packaging",
   "psutil",
-  "scipy",
-  "tornado",
   "typing_extensions",
 ]
 
-# Optional dependencies for different features
 [project.optional-dependencies]
-# Model importers
-importer-coreml = ["coremltools"]
-importer-keras = ["tensorflow", "tensorflow-estimator"]
-importer-onnx = [
-  "future",
-  "onnx",
-  "onnxoptimizer",
-  "onnxruntime",
-  "torch",
-  "torchvision",
-]
+importer-onnx = ["onnx", "onnxoptimizer", "onnxruntime"]
 importer-pytorch = ["torch", "torchvision"]
-importer-tensorflow = ["tensorflow", "tensorflow-estimator"]
 importer-tflite = ["tflite"]
-importer-paddle = ["paddlepaddle"]
+coreml = ["coremltools"]
+meta-schedule = ["xgboost"]
+all = ["xgboost"]
 
-# AutoTVM and autoscheduler
-autotvm = ["xgboost"]
-autoscheduler = ["xgboost"]
+[project.urls]
+Homepage = "https://tvm.apache.org/";
+Documentation = "https://tvm.apache.org/docs/";
+Repository = "https://github.com/apache/tvm";
+"Bug Tracker" = "https://github.com/apache/tvm/issues";
 
-# Development and testing
-dev = [
-  "ruff",
-  "mypy",
-  "pre-commit",
+[dependency-groups]
+test = [
   "pytest",
   "pytest-xdist",
   "pytest-cov",
@@ -92,42 +76,13 @@ dev = [
   "pytest-rerunfailures",
   "pytest-repeat",
 ]
-
-# All optional dependencies (excluding dev)
-all = [
-  "coremltools",
-  "tensorflow",
-  "tensorflow-estimator",
-  "future",
-  "onnx",
-  "onnxoptimizer",
-  "onnxruntime",
-  "torch",
-  "torchvision",
-  "tflite",
-  "paddlepaddle",
-  "xgboost",
-]
-
-[project.urls]
-Homepage = "https://tvm.apache.org/";
-Documentation = "https://tvm.apache.org/docs/";
-Repository = "https://github.com/apache/tvm";
-"Bug Tracker" = "https://github.com/apache/tvm/issues";
+lint = ["ruff", "pre-commit"]
+dev = [{ include-group = "test" }, { include-group = "lint" }]
 
 [tool.scikit-build]
-# Point to the root CMakeLists.txt
-cmake.source-dir = "."
 cmake.build-type = "Release"
-
-# Configure the wheel to be Python version-agnostic
 wheel.py-api = "py3"
-
-# Build configuration
-build-dir = "build"
-
-# CMake configuration - ensure proper installation paths
-cmake.args = ["-DTVM_BUILD_PYTHON_MODULE=ON"]
+build-dir = "build/{wheel_tag}"
 
 # Wheel configuration
 wheel.packages = ["python/tvm"]
@@ -139,7 +94,6 @@ sdist.include = [
   "/CMakeLists.txt",
   "/pyproject.toml",
   "/cmake/**/*",
-  "/  */*",
 
   # Source code
   "/src/**/*.cc",
@@ -176,6 +130,11 @@ sdist.exclude = [
 # Logging
 logging.level = "INFO"
 
+[tool.scikit-build.cmake.define]
+TVM_BUILD_PYTHON_MODULE = "ON"
+USE_CUDA = "OFF"
+BUILD_TESTING = "OFF"
+
 [tool.pytest.ini_options]
 testpaths = ["tests"]
 addopts = "-v --tb=short"
@@ -205,15 +164,7 @@ include = [
 line-length = 100
 indent-width = 4
 target-version = "py310"
-exclude = [
-  "3rdparty",
-  "build",
-  "dist",
-  ".venv",
-  ".mypy_cache",
-  ".ruff_cache",
-  "node_modules",
-]
+exclude = ["3rdparty", "build", "dist", ".venv", ".ruff_cache", "node_modules"]
 
 [tool.ruff.lint]
 select = [
@@ -257,32 +208,19 @@ line-ending = "auto"
 docstring-code-format = false
 docstring-code-line-length = "dynamic"
 
-[tool.mypy]
-python_version = "3.9"
-show_error_codes = true
-mypy_path = ["python"]
-files = ["python/tvm"]
-namespace_packages = true
-explicit_package_bases = true
-allow_redefinition = true
-ignore_missing_imports = true
-follow_imports = "skip"
-strict_optional = false
-exclude = '''(?x)(
-    ^\.venv/|
-    ^build/|
-    ^dist/|
-    ^\.mypy_cache/|
-    ^3rdparty/
-)'''
+[tool.cibuildwheel]
+# Skip win32, i686 (32-bit), and musllinux wheels.
+skip = "*-win32 *-manylinux_i686 *-musllinux*"
+build-verbosity = 1
+test-requires = ["pytest", "numpy"]
+test-command = "pytest -p no:tvm.testing.plugin -vvs 
{project}/tests/python/wheel && pytest -vvs 
{project}/tests/python/all-platform-minimal-test"
 
-[[tool.mypy.overrides]]
-module = ["python.tvm.auto_scheduler.*"]
-ignore_errors = true
+[tool.cibuildwheel.linux]
+repair-wheel-command = "auditwheel repair --exclude libtvm_ffi.so --exclude 
libtvm_runtime_cuda.so --exclude 'libcuda.so.*' --exclude 'libcudart.so.*' 
--exclude 'libnvrtc.so.*' --exclude 'libnvrtc-builtins.so.*' -w {dest_dir} 
{wheel}"
 
-[[tool.mypy.overrides]]
-module = ["python.tvm.runtime.*"]
-ignore_errors = true
+[tool.cibuildwheel.macos]
+repair-wheel-command = 'delocate-wheel --ignore-missing-dependencies --exclude 
libtvm_ffi.dylib --require-archs {delocate_archs} -w {dest_dir} -v {wheel}'
 
-[dependency-groups]
-lint = ["pre-commit"]
+[tool.cibuildwheel.windows]
+before-build = 'python -m pip install delvewheel'
+repair-wheel-command = "delvewheel repair --analyze-existing --ignore-existing 
--exclude tvm_ffi.dll --exclude libtvm_ffi.dll --exclude tvm_runtime_cuda.dll 
--exclude nvcuda.dll --exclude cudart64_13.dll --exclude nvrtc64_130_0.dll -w 
{dest_dir} {wheel}"
diff --git a/tests/lint/check_file_type.py b/tests/lint/check_file_type.py
index b561f638c4..6c17a83de6 100644
--- a/tests/lint/check_file_type.py
+++ b/tests/lint/check_file_type.py
@@ -39,6 +39,7 @@ ALLOW_EXTENSION = {
     "mjs",
     "ts",
     "sh",
+    "bat",
     "py",
     # configurations
     "cfg",
diff --git a/tests/python/wheel/test_validate_runtime_library.py 
b/tests/python/wheel/test_validate_runtime_library.py
new file mode 100644
index 0000000000..10a455f2a9
--- /dev/null
+++ b/tests/python/wheel/test_validate_runtime_library.py
@@ -0,0 +1,50 @@
+# Licensed to the Apache Software Foundation (ASF) under one
+# or more contributor license agreements.  See the NOTICE file
+# distributed with this work for additional information
+# regarding copyright ownership.  The ASF licenses this file
+# to you under the Apache License, Version 2.0 (the
+# "License"); you may not use this file except in compliance
+# with the License.  You may obtain a copy of the License at
+#
+#   http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing,
+# software distributed under the License is distributed on an
+# "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+# KIND, either express or implied.  See the License for the
+# specific language governing permissions and limitations
+# under the License.
+"""Post-install checks for a built TVM wheel.
+
+Run by cibuildwheel against the installed wheel (``test-command`` in
+``[tool.cibuildwheel]``). These assert the two wheel-specific things the 
standard
+``tests/python/all-platform-minimal-test`` suite cannot: that LLVM is enabled 
(its
+LLVM test merely *skips* when LLVM is absent), and that the CUDA runtime 
library
+got bundled (when ``TVM_WHEEL_EXPECT_CUDA_RUNTIME=1``). The functional LLVM
+compile / ndarray ops are covered by that all-platform suite.
+"""
+
+import glob
+import os
+from pathlib import Path
+
+import pytest
+
+import tvm
+
+
+def test_llvm_enabled():
+    """Every TVM wheel ships with LLVM enabled. The all-platform suite only 
skips
+    (does not fail) when LLVM is absent, so assert presence here."""
+    assert tvm.runtime.enabled("llvm"), "wheel was not built with LLVM enabled"
+
+
+def test_cuda_runtime_present():
+    """The bundled CUDA runtime library must be present in tvm/lib."""
+    if os.environ.get("TVM_WHEEL_EXPECT_CUDA_RUNTIME") != "1":
+        pytest.skip("CUDA runtime not expected in this wheel")
+    libdir = Path(tvm.__file__).resolve().parent / "lib"
+    present = glob.glob(str(libdir / "libtvm_runtime_cuda.*")) or glob.glob(
+        str(libdir / "tvm_runtime_cuda.*")
+    )
+    assert present, "CUDA runtime expected but not bundled in tvm/lib"

Reply via email to