This is an automated email from the ASF dual-hosted git repository.
github-actions[bot] pushed a change to branch nightly
in repository https://gitbox.apache.org/repos/asf/tvm.git
from 8039963c23 [Web][COS] Persist URL→hash mapping across page loads
(#19569)
add dda158ca10 [Fix][Relax] Support ND batched matmul chains in
AdjustMatmulOrder pass (#19650)
add 4a688ddcbc [Relax][Frontend][TFLite] Add EMBEDDING_LOOKUP_SPARSE
converter (#19652)
add 5cbf50628b [CI] Add cibw-based wheel publishing to PyPI (#19656)
add 57c638fc7c [TIRx] Post-bringup op-dispatch / codegen / TVMScript
follow-ups (#19657)
add a979b2f98c [RPC] Import tvm.testing lazily in rpc.testing (#19658)
No new revisions were added by this update.
Summary of changes:
.github/actions/build-wheel-for-publish/action.yml | 144 ++
.github/actions/setup/action.yml | 14 +-
.github/workflows/publish_wheel.yml | 255 ++++
.gitignore | 7 +
CMakeLists.txt | 65 +-
.../scripts/package}/README.md | 20 +-
.../scripts/package}/build-environment.yaml | 2 +
.../package/manylinux_build_libtvm_runtime_cuda.sh | 70 +
.../package/windows_build_libtvm_runtime_cuda.bat | 98 ++
cmake/modules/CUDA.cmake | 16 +-
cmake/modules/Hexagon.cmake | 10 +-
cmake/modules/Metal.cmake | 10 +-
cmake/modules/OpenCL.cmake | 10 +-
cmake/modules/ROCM.cmake | 10 +-
cmake/modules/Vulkan.cmake | 10 +-
cmake/utils/FindLLVM.cmake | 9 +-
cmake/utils/Library.cmake | 65 +
include/tvm/tirx/builtin.h | 9 +
include/tvm/tirx/exec_context.h | 4 +-
include/tvm/tirx/exec_scope.h | 26 +-
include/tvm/tirx/layout.h | 10 +-
include/tvm/tirx/script/builder/ir.h | 11 +
include/tvm/tirx/stmt.h | 38 +
include/tvm/tirx/stmt_functor.h | 4 +
include/tvm/tirx/tirx_op.h | 80 --
pyproject.toml | 138 +-
.../tvm/relax/frontend/tflite/tflite_frontend.py | 118 ++
python/tvm/rpc/testing.py | 8 +-
python/tvm/tirx/__init__.py | 5 +-
python/tvm/tirx/buffer.py | 10 +-
python/tvm/tirx/exec_context.py | 2 +-
python/tvm/tirx/exec_scope.py | 25 +-
python/tvm/tirx/lang/alloc_pool.py | 104 +-
python/tvm/tirx/lang/pipeline.py | 169 +--
python/tvm/tirx/lang/warp_role.py | 4 +-
python/tvm/tirx/layout.py | 349 ++++-
python/tvm/tirx/op.py | 178 ++-
python/tvm/tirx/operator/intrinsics/cuda/mma.py | 150 +-
.../tvm/tirx/operator/tile_primitive/__init__.py | 3 +-
.../operator/tile_primitive/cuda/copy/__init__.py | 8 +-
.../operator/tile_primitive/cuda/copy/_common.py | 540 +++++++
.../tile_primitive/cuda/copy/_swizzle_iter.py | 404 ++++++
.../tile_primitive/cuda/copy/collective.py | 162 ---
.../operator/tile_primitive/cuda/copy/fallback.py | 116 ++
.../operator/tile_primitive/cuda/copy/gmem_smem.py | 303 ++++
.../tile_primitive/cuda/copy/ld_stmatrix.py | 454 ++++++
.../tirx/operator/tile_primitive/cuda/copy/reg.py | 595 ++++++++
.../operator/tile_primitive/cuda/copy/scalar.py | 53 -
.../operator/tile_primitive/cuda/copy/utils.py | 92 +-
.../tile_primitive/cuda/copy/vectorized.py | 63 -
.../tile_primitive/cuda/copy_async/__init__.py | 2 +-
.../tile_primitive/cuda/copy_async/cp_async.py | 56 -
.../tile_primitive/cuda/copy_async/ldgsts.py | 275 ++++
.../tile_primitive/cuda/copy_async/tcgen05_ldst.py | 311 +++-
.../tile_primitive/cuda/elementwise/__init__.py | 24 +-
.../tile_primitive/cuda/elementwise/_common.py | 445 ++++--
.../cuda/elementwise/ops/__init__.py | 121 ++
.../tile_primitive/cuda/elementwise/ops/binary.py | 127 ++
.../tile_primitive/cuda/elementwise/ops/cast.py | 45 +
.../tile_primitive/cuda/elementwise/ops/fma.py | 50 +
.../tile_primitive/cuda/elementwise/ops/unary.py | 117 ++
.../tile_primitive/cuda/elementwise/reg.py | 361 +++++
.../tile_primitive/cuda/elementwise/register.py | 55 +-
.../cuda/elementwise/schedule_collective_reg.py | 410 ------
.../cuda/elementwise/schedule_collective_smem.py | 132 --
.../cuda/elementwise/schedule_thread.py | 121 --
.../tile_primitive/cuda/elementwise/schema.py | 1165 ---------------
.../tile_primitive/cuda/elementwise/smem.py | 264 ++++
.../cuda/elementwise/vec_emit/__init__.py | 40 +
.../cuda/elementwise/vec_emit/binary_f32x2.py | 96 ++
.../cuda/elementwise/vec_emit/cast_vec2.py | 89 ++
.../cuda/elementwise/vec_emit/fma_f32x2.py | 78 +
.../tile_primitive/cuda/exec_scope_utils.py | 4 +-
.../operator/tile_primitive/cuda/gemm}/__init__.py | 12 +-
.../tile_primitive/cuda/gemm/mma_m16n8k_.py | 595 ++++++++
.../tile_primitive/cuda/gemm_async/tcgen05.py | 44 +-
.../tile_primitive/cuda/permute_dims/__init__.py | 18 -
.../cuda/permute_dims/vectorized_last_2d.py | 151 --
.../{trn/copy => cuda/permute_layout}/__init__.py | 2 +-
.../cuda/permute_layout/warp_xor_swizzle.py | 388 +++++
.../tile_primitive/cuda/reduction/shared.py | 2 +-
python/tvm/tirx/operator/tile_primitive/ops.py | 53 +-
python/tvm/tirx/script/builder/frame.py | 4 +-
python/tvm/tirx/script/builder/ir.py | 153 +-
python/tvm/tirx/script/builder/tirx.py | 50 +-
python/tvm/tirx/script/builder/tmem_pool.py | 2 +-
python/tvm/tirx/script/parser/__init__.py | 16 +-
python/tvm/tirx/script/parser/entry.py | 175 +++
python/tvm/tirx/script/parser/parser.py | 12 +-
python/tvm/tirx/stmt.py | 35 +-
python/tvm/tirx/stmt_functor.py | 55 +
.../tvm/tirx/transform/trn/private_buffer_alloc.py | 45 +-
src/relax/op/op_common.cc | 48 +-
src/relax/op/op_common.h | 30 +
src/relax/transform/adjust_matmul_order.cc | 132 +-
src/target/cuda/codegen_cuda.cc | 18 -
src/target/cuda/codegen_cuda.h | 1 -
src/target/source/codegen_c.cc | 33 +-
src/target/source/codegen_c.h | 10 +
src/tirx/analysis/exec_context.cc | 10 +-
src/tirx/analysis/filter_canonical.cc | 226 +++
src/tirx/analysis/filter_canonical.h | 160 +++
src/tirx/analysis/verify_tirx_well_formed.cc | 113 +-
src/tirx/ir/exec_scope.cc | 22 +-
src/tirx/ir/layout/axis_registry.cc | 7 +-
src/tirx/ir/layout/tile_core.cc | 38 +
src/tirx/ir/layout/tile_internal.h | 5 +
src/tirx/ir/layout/tile_tile_ops.cc | 53 +-
src/tirx/ir/stmt.cc | 12 +
src/tirx/ir/stmt_functor.cc | 90 +-
src/tirx/ir/tir_visitor_with_path.cc | 17 +
src/tirx/ir/tir_visitor_with_path.h | 1 +
src/tirx/op/builtin.cc | 15 +-
src/tirx/op/op.cc | 9 +
src/tirx/op/tirx.cc | 49 +-
src/tirx/script/builder/ir.cc | 72 +-
src/tirx/script/printer/block.cc | 30 +
src/tirx/script/printer/utils.h | 28 +-
src/tirx/transform/split_host_device.cc | 10 -
src/tirx/transform/tile_primitive_dispatch.cc | 578 ++++++--
tests/lint/check_file_type.py | 1 +
tests/python/relax/test_frontend_tflite.py | 213 +++
.../relax/test_transform_adjust_matmul_order.py | 408 +++++-
tests/python/tirx-base/test_tir_stmt_functor.py | 2 +-
tests/python/tirx/codegen/test_codegen_ampere.py | 216 +++
.../python/tirx/codegen/test_codegen_blackwell.py | 386 ++---
tests/python/tirx/codegen/test_codegen_cuda.py | 732 ++++------
tests/python/tirx/codegen/test_codegen_dsmem.py | 62 +-
tests/python/tirx/codegen/test_codegen_hopper.py | 689 +++++----
tests/python/tirx/codegen/test_codegen_nki.py | 68 +-
tests/python/tirx/codegen/test_codegen_nvshmem.py | 157 +-
tests/python/tirx/codegen/test_cuda_copy.py | 214 +--
tests/python/tirx/codegen/test_cuda_cta_reduce.py | 144 +-
tests/python/tirx/codegen/test_cuda_warp_reduce.py | 102 +-
.../tile_primitive/cuda/copy/test_fallback.py | 242 ++++
.../tile_primitive/cuda/copy/test_gmem_smem.py | 575 ++++++++
.../tile_primitive/cuda/copy/test_ld_stmatrix.py | 499 +++++++
.../operator/tile_primitive/cuda/copy/test_reg.py | 423 ++++++
.../tile_primitive/cuda/copy/test_swizzle_iter.py | 443 ++++++
.../test_dsmem.py} | 88 +-
.../test_ldgsts.py} | 33 +-
.../test_smem_tmem.py} | 372 +++--
.../test_tma.py} | 345 ++---
.../tile_primitive/cuda/copy_async/test_tmem.py | 351 +++++
.../cuda/copy_async/test_tmem_16xnb.py | 885 ++++++++++++
.../cuda/{ => elementwise}/test_binary.py | 652 +++++----
.../cuda/{ => elementwise}/test_fma.py | 190 ++-
.../cuda/{ => elementwise}/test_unary.py | 945 ++++++------
.../cuda/gemm/test_gemm_mma_m16n8k_.py | 697 +++++++++
.../cuda/{ => gemm_async}/test_gemm_async.py | 1509 +++++++++++---------
.../cuda/permute_layout/test_permute_layout.py | 425 ++++++
.../cuda/{ => reduction}/test_reduction.py | 703 ++++-----
.../tile_primitive/cuda/test_copy_async_tmem.py | 137 --
.../operator/tile_primitive/cuda/test_copy_sync.py | 440 ------
.../tile_primitive/cuda/test_permute_dims.py | 152 --
.../operator/tile_primitive/trn/test_binary_trn.py | 133 +-
.../tile_primitive/trn/test_compose_op_trn.py | 305 ++--
.../operator/tile_primitive/trn/test_copy_trn.py | 265 ++--
.../operator/tile_primitive/trn/test_gemm_trn.py | 343 ++---
.../tile_primitive/trn/test_private_alloc_trn.py | 330 ++---
.../tile_primitive/trn/test_reduction_trn.py | 91 +-
.../operator/tile_primitive/trn/test_select_trn.py | 65 +-
.../operator/tile_primitive/trn/test_unary_trn.py | 107 +-
tests/python/tirx/test_control_flow.py | 82 +-
tests/python/tirx/test_exec_scope.py | 4 -
tests/python/tirx/test_hint.py | 18 +-
tests/python/tirx/test_inline.py | 30 +-
tests/python/tirx/test_jit.py | 225 +++
tests/python/tirx/test_layout.py | 18 +-
tests/python/tirx/test_op.py | 18 +-
tests/python/tirx/test_parser_printer.py | 1206 ++++++++--------
tests/python/tirx/test_printer_tir_namespaces.py | 10 +-
tests/python/tirx/test_verifier.py | 365 +++--
tests/python/tirx/transform/test_stmt_functor.py | 15 +-
.../tirx/transform/test_transform_lower_tirx.py | 828 ++++++-----
.../transform/test_transform_naive_allocator.py | 116 +-
.../python/wheel/test_validate_runtime_library.py | 50 +
177 files changed, 20930 insertions(+), 10261 deletions(-)
create mode 100644 .github/actions/build-wheel-for-publish/action.yml
create mode 100644 .github/workflows/publish_wheel.yml
copy {tests/python/all-platform-minimal-test => ci/scripts/package}/README.md
(58%)
rename {tests/conda => ci/scripts/package}/build-environment.yaml (98%)
create mode 100755 ci/scripts/package/manylinux_build_libtvm_runtime_cuda.sh
create mode 100644 ci/scripts/package/windows_build_libtvm_runtime_cuda.bat
create mode 100644 cmake/utils/Library.cmake
create mode 100644 python/tvm/tirx/operator/tile_primitive/cuda/copy/_common.py
create mode 100644
python/tvm/tirx/operator/tile_primitive/cuda/copy/_swizzle_iter.py
delete mode 100644
python/tvm/tirx/operator/tile_primitive/cuda/copy/collective.py
create mode 100644
python/tvm/tirx/operator/tile_primitive/cuda/copy/fallback.py
create mode 100644
python/tvm/tirx/operator/tile_primitive/cuda/copy/gmem_smem.py
create mode 100644
python/tvm/tirx/operator/tile_primitive/cuda/copy/ld_stmatrix.py
create mode 100644 python/tvm/tirx/operator/tile_primitive/cuda/copy/reg.py
delete mode 100644 python/tvm/tirx/operator/tile_primitive/cuda/copy/scalar.py
delete mode 100644
python/tvm/tirx/operator/tile_primitive/cuda/copy/vectorized.py
delete mode 100644
python/tvm/tirx/operator/tile_primitive/cuda/copy_async/cp_async.py
create mode 100644
python/tvm/tirx/operator/tile_primitive/cuda/copy_async/ldgsts.py
create mode 100644
python/tvm/tirx/operator/tile_primitive/cuda/elementwise/ops/__init__.py
create mode 100644
python/tvm/tirx/operator/tile_primitive/cuda/elementwise/ops/binary.py
create mode 100644
python/tvm/tirx/operator/tile_primitive/cuda/elementwise/ops/cast.py
create mode 100644
python/tvm/tirx/operator/tile_primitive/cuda/elementwise/ops/fma.py
create mode 100644
python/tvm/tirx/operator/tile_primitive/cuda/elementwise/ops/unary.py
create mode 100644
python/tvm/tirx/operator/tile_primitive/cuda/elementwise/reg.py
delete mode 100644
python/tvm/tirx/operator/tile_primitive/cuda/elementwise/schedule_collective_reg.py
delete mode 100644
python/tvm/tirx/operator/tile_primitive/cuda/elementwise/schedule_collective_smem.py
delete mode 100644
python/tvm/tirx/operator/tile_primitive/cuda/elementwise/schedule_thread.py
delete mode 100644
python/tvm/tirx/operator/tile_primitive/cuda/elementwise/schema.py
create mode 100644
python/tvm/tirx/operator/tile_primitive/cuda/elementwise/smem.py
create mode 100644
python/tvm/tirx/operator/tile_primitive/cuda/elementwise/vec_emit/__init__.py
create mode 100644
python/tvm/tirx/operator/tile_primitive/cuda/elementwise/vec_emit/binary_f32x2.py
create mode 100644
python/tvm/tirx/operator/tile_primitive/cuda/elementwise/vec_emit/cast_vec2.py
create mode 100644
python/tvm/tirx/operator/tile_primitive/cuda/elementwise/vec_emit/fma_f32x2.py
copy python/tvm/{relax/script/builder =>
tirx/operator/tile_primitive/cuda/gemm}/__init__.py (71%)
create mode 100644
python/tvm/tirx/operator/tile_primitive/cuda/gemm/mma_m16n8k_.py
delete mode 100644
python/tvm/tirx/operator/tile_primitive/cuda/permute_dims/__init__.py
delete mode 100644
python/tvm/tirx/operator/tile_primitive/cuda/permute_dims/vectorized_last_2d.py
copy python/tvm/tirx/operator/tile_primitive/{trn/copy =>
cuda/permute_layout}/__init__.py (96%)
create mode 100644
python/tvm/tirx/operator/tile_primitive/cuda/permute_layout/warp_xor_swizzle.py
create mode 100644 src/tirx/analysis/filter_canonical.cc
create mode 100644 src/tirx/analysis/filter_canonical.h
create mode 100644 tests/python/tirx/codegen/test_codegen_ampere.py
create mode 100644
tests/python/tirx/operator/tile_primitive/cuda/copy/test_fallback.py
create mode 100644
tests/python/tirx/operator/tile_primitive/cuda/copy/test_gmem_smem.py
create mode 100644
tests/python/tirx/operator/tile_primitive/cuda/copy/test_ld_stmatrix.py
create mode 100644
tests/python/tirx/operator/tile_primitive/cuda/copy/test_reg.py
create mode 100644
tests/python/tirx/operator/tile_primitive/cuda/copy/test_swizzle_iter.py
rename tests/python/tirx/operator/tile_primitive/cuda/{test_copy_dsmem.py =>
copy_async/test_dsmem.py} (81%)
rename tests/python/tirx/operator/tile_primitive/cuda/{test_copy_async_cta.py
=> copy_async/test_ldgsts.py} (80%)
rename
tests/python/tirx/operator/tile_primitive/cuda/{test_smem_tmem_dispatch.py =>
copy_async/test_smem_tmem.py} (52%)
rename tests/python/tirx/operator/tile_primitive/cuda/{test_copy_async_tma.py
=> copy_async/test_tma.py} (89%)
create mode 100644
tests/python/tirx/operator/tile_primitive/cuda/copy_async/test_tmem.py
create mode 100644
tests/python/tirx/operator/tile_primitive/cuda/copy_async/test_tmem_16xnb.py
rename tests/python/tirx/operator/tile_primitive/cuda/{ =>
elementwise}/test_binary.py (58%)
rename tests/python/tirx/operator/tile_primitive/cuda/{ =>
elementwise}/test_fma.py (64%)
rename tests/python/tirx/operator/tile_primitive/cuda/{ =>
elementwise}/test_unary.py (59%)
create mode 100644
tests/python/tirx/operator/tile_primitive/cuda/gemm/test_gemm_mma_m16n8k_.py
rename tests/python/tirx/operator/tile_primitive/cuda/{ =>
gemm_async}/test_gemm_async.py (53%)
create mode 100644
tests/python/tirx/operator/tile_primitive/cuda/permute_layout/test_permute_layout.py
rename tests/python/tirx/operator/tile_primitive/cuda/{ =>
reduction}/test_reduction.py (60%)
delete mode 100644
tests/python/tirx/operator/tile_primitive/cuda/test_copy_async_tmem.py
delete mode 100644
tests/python/tirx/operator/tile_primitive/cuda/test_copy_sync.py
delete mode 100644
tests/python/tirx/operator/tile_primitive/cuda/test_permute_dims.py
create mode 100644 tests/python/tirx/test_jit.py
create mode 100644 tests/python/wheel/test_validate_runtime_library.py