This is an automated email from the ASF dual-hosted git repository.

github-actions[bot] pushed a change to branch nightly
in repository https://gitbox.apache.org/repos/asf/tvm.git


    from 8039963c23 [Web][COS] Persist URL→hash mapping across page loads 
(#19569)
     add dda158ca10 [Fix][Relax] Support ND batched matmul chains in 
AdjustMatmulOrder pass (#19650)
     add 4a688ddcbc [Relax][Frontend][TFLite] Add EMBEDDING_LOOKUP_SPARSE 
converter (#19652)
     add 5cbf50628b [CI] Add cibw-based wheel publishing to PyPI (#19656)
     add 57c638fc7c [TIRx] Post-bringup op-dispatch / codegen / TVMScript 
follow-ups (#19657)
     add a979b2f98c [RPC] Import tvm.testing lazily in rpc.testing (#19658)

No new revisions were added by this update.

Summary of changes:
 .github/actions/build-wheel-for-publish/action.yml |  144 ++
 .github/actions/setup/action.yml                   |   14 +-
 .github/workflows/publish_wheel.yml                |  255 ++++
 .gitignore                                         |    7 +
 CMakeLists.txt                                     |   65 +-
 .../scripts/package}/README.md                     |   20 +-
 .../scripts/package}/build-environment.yaml        |    2 +
 .../package/manylinux_build_libtvm_runtime_cuda.sh |   70 +
 .../package/windows_build_libtvm_runtime_cuda.bat  |   98 ++
 cmake/modules/CUDA.cmake                           |   16 +-
 cmake/modules/Hexagon.cmake                        |   10 +-
 cmake/modules/Metal.cmake                          |   10 +-
 cmake/modules/OpenCL.cmake                         |   10 +-
 cmake/modules/ROCM.cmake                           |   10 +-
 cmake/modules/Vulkan.cmake                         |   10 +-
 cmake/utils/FindLLVM.cmake                         |    9 +-
 cmake/utils/Library.cmake                          |   65 +
 include/tvm/tirx/builtin.h                         |    9 +
 include/tvm/tirx/exec_context.h                    |    4 +-
 include/tvm/tirx/exec_scope.h                      |   26 +-
 include/tvm/tirx/layout.h                          |   10 +-
 include/tvm/tirx/script/builder/ir.h               |   11 +
 include/tvm/tirx/stmt.h                            |   38 +
 include/tvm/tirx/stmt_functor.h                    |    4 +
 include/tvm/tirx/tirx_op.h                         |   80 --
 pyproject.toml                                     |  138 +-
 .../tvm/relax/frontend/tflite/tflite_frontend.py   |  118 ++
 python/tvm/rpc/testing.py                          |    8 +-
 python/tvm/tirx/__init__.py                        |    5 +-
 python/tvm/tirx/buffer.py                          |   10 +-
 python/tvm/tirx/exec_context.py                    |    2 +-
 python/tvm/tirx/exec_scope.py                      |   25 +-
 python/tvm/tirx/lang/alloc_pool.py                 |  104 +-
 python/tvm/tirx/lang/pipeline.py                   |  169 +--
 python/tvm/tirx/lang/warp_role.py                  |    4 +-
 python/tvm/tirx/layout.py                          |  349 ++++-
 python/tvm/tirx/op.py                              |  178 ++-
 python/tvm/tirx/operator/intrinsics/cuda/mma.py    |  150 +-
 .../tvm/tirx/operator/tile_primitive/__init__.py   |    3 +-
 .../operator/tile_primitive/cuda/copy/__init__.py  |    8 +-
 .../operator/tile_primitive/cuda/copy/_common.py   |  540 +++++++
 .../tile_primitive/cuda/copy/_swizzle_iter.py      |  404 ++++++
 .../tile_primitive/cuda/copy/collective.py         |  162 ---
 .../operator/tile_primitive/cuda/copy/fallback.py  |  116 ++
 .../operator/tile_primitive/cuda/copy/gmem_smem.py |  303 ++++
 .../tile_primitive/cuda/copy/ld_stmatrix.py        |  454 ++++++
 .../tirx/operator/tile_primitive/cuda/copy/reg.py  |  595 ++++++++
 .../operator/tile_primitive/cuda/copy/scalar.py    |   53 -
 .../operator/tile_primitive/cuda/copy/utils.py     |   92 +-
 .../tile_primitive/cuda/copy/vectorized.py         |   63 -
 .../tile_primitive/cuda/copy_async/__init__.py     |    2 +-
 .../tile_primitive/cuda/copy_async/cp_async.py     |   56 -
 .../tile_primitive/cuda/copy_async/ldgsts.py       |  275 ++++
 .../tile_primitive/cuda/copy_async/tcgen05_ldst.py |  311 +++-
 .../tile_primitive/cuda/elementwise/__init__.py    |   24 +-
 .../tile_primitive/cuda/elementwise/_common.py     |  445 ++++--
 .../cuda/elementwise/ops/__init__.py               |  121 ++
 .../tile_primitive/cuda/elementwise/ops/binary.py  |  127 ++
 .../tile_primitive/cuda/elementwise/ops/cast.py    |   45 +
 .../tile_primitive/cuda/elementwise/ops/fma.py     |   50 +
 .../tile_primitive/cuda/elementwise/ops/unary.py   |  117 ++
 .../tile_primitive/cuda/elementwise/reg.py         |  361 +++++
 .../tile_primitive/cuda/elementwise/register.py    |   55 +-
 .../cuda/elementwise/schedule_collective_reg.py    |  410 ------
 .../cuda/elementwise/schedule_collective_smem.py   |  132 --
 .../cuda/elementwise/schedule_thread.py            |  121 --
 .../tile_primitive/cuda/elementwise/schema.py      | 1165 ---------------
 .../tile_primitive/cuda/elementwise/smem.py        |  264 ++++
 .../cuda/elementwise/vec_emit/__init__.py          |   40 +
 .../cuda/elementwise/vec_emit/binary_f32x2.py      |   96 ++
 .../cuda/elementwise/vec_emit/cast_vec2.py         |   89 ++
 .../cuda/elementwise/vec_emit/fma_f32x2.py         |   78 +
 .../tile_primitive/cuda/exec_scope_utils.py        |    4 +-
 .../operator/tile_primitive/cuda/gemm}/__init__.py |   12 +-
 .../tile_primitive/cuda/gemm/mma_m16n8k_.py        |  595 ++++++++
 .../tile_primitive/cuda/gemm_async/tcgen05.py      |   44 +-
 .../tile_primitive/cuda/permute_dims/__init__.py   |   18 -
 .../cuda/permute_dims/vectorized_last_2d.py        |  151 --
 .../{trn/copy => cuda/permute_layout}/__init__.py  |    2 +-
 .../cuda/permute_layout/warp_xor_swizzle.py        |  388 +++++
 .../tile_primitive/cuda/reduction/shared.py        |    2 +-
 python/tvm/tirx/operator/tile_primitive/ops.py     |   53 +-
 python/tvm/tirx/script/builder/frame.py            |    4 +-
 python/tvm/tirx/script/builder/ir.py               |  153 +-
 python/tvm/tirx/script/builder/tirx.py             |   50 +-
 python/tvm/tirx/script/builder/tmem_pool.py        |    2 +-
 python/tvm/tirx/script/parser/__init__.py          |   16 +-
 python/tvm/tirx/script/parser/entry.py             |  175 +++
 python/tvm/tirx/script/parser/parser.py            |   12 +-
 python/tvm/tirx/stmt.py                            |   35 +-
 python/tvm/tirx/stmt_functor.py                    |   55 +
 .../tvm/tirx/transform/trn/private_buffer_alloc.py |   45 +-
 src/relax/op/op_common.cc                          |   48 +-
 src/relax/op/op_common.h                           |   30 +
 src/relax/transform/adjust_matmul_order.cc         |  132 +-
 src/target/cuda/codegen_cuda.cc                    |   18 -
 src/target/cuda/codegen_cuda.h                     |    1 -
 src/target/source/codegen_c.cc                     |   33 +-
 src/target/source/codegen_c.h                      |   10 +
 src/tirx/analysis/exec_context.cc                  |   10 +-
 src/tirx/analysis/filter_canonical.cc              |  226 +++
 src/tirx/analysis/filter_canonical.h               |  160 +++
 src/tirx/analysis/verify_tirx_well_formed.cc       |  113 +-
 src/tirx/ir/exec_scope.cc                          |   22 +-
 src/tirx/ir/layout/axis_registry.cc                |    7 +-
 src/tirx/ir/layout/tile_core.cc                    |   38 +
 src/tirx/ir/layout/tile_internal.h                 |    5 +
 src/tirx/ir/layout/tile_tile_ops.cc                |   53 +-
 src/tirx/ir/stmt.cc                                |   12 +
 src/tirx/ir/stmt_functor.cc                        |   90 +-
 src/tirx/ir/tir_visitor_with_path.cc               |   17 +
 src/tirx/ir/tir_visitor_with_path.h                |    1 +
 src/tirx/op/builtin.cc                             |   15 +-
 src/tirx/op/op.cc                                  |    9 +
 src/tirx/op/tirx.cc                                |   49 +-
 src/tirx/script/builder/ir.cc                      |   72 +-
 src/tirx/script/printer/block.cc                   |   30 +
 src/tirx/script/printer/utils.h                    |   28 +-
 src/tirx/transform/split_host_device.cc            |   10 -
 src/tirx/transform/tile_primitive_dispatch.cc      |  578 ++++++--
 tests/lint/check_file_type.py                      |    1 +
 tests/python/relax/test_frontend_tflite.py         |  213 +++
 .../relax/test_transform_adjust_matmul_order.py    |  408 +++++-
 tests/python/tirx-base/test_tir_stmt_functor.py    |    2 +-
 tests/python/tirx/codegen/test_codegen_ampere.py   |  216 +++
 .../python/tirx/codegen/test_codegen_blackwell.py  |  386 ++---
 tests/python/tirx/codegen/test_codegen_cuda.py     |  732 ++++------
 tests/python/tirx/codegen/test_codegen_dsmem.py    |   62 +-
 tests/python/tirx/codegen/test_codegen_hopper.py   |  689 +++++----
 tests/python/tirx/codegen/test_codegen_nki.py      |   68 +-
 tests/python/tirx/codegen/test_codegen_nvshmem.py  |  157 +-
 tests/python/tirx/codegen/test_cuda_copy.py        |  214 +--
 tests/python/tirx/codegen/test_cuda_cta_reduce.py  |  144 +-
 tests/python/tirx/codegen/test_cuda_warp_reduce.py |  102 +-
 .../tile_primitive/cuda/copy/test_fallback.py      |  242 ++++
 .../tile_primitive/cuda/copy/test_gmem_smem.py     |  575 ++++++++
 .../tile_primitive/cuda/copy/test_ld_stmatrix.py   |  499 +++++++
 .../operator/tile_primitive/cuda/copy/test_reg.py  |  423 ++++++
 .../tile_primitive/cuda/copy/test_swizzle_iter.py  |  443 ++++++
 .../test_dsmem.py}                                 |   88 +-
 .../test_ldgsts.py}                                |   33 +-
 .../test_smem_tmem.py}                             |  372 +++--
 .../test_tma.py}                                   |  345 ++---
 .../tile_primitive/cuda/copy_async/test_tmem.py    |  351 +++++
 .../cuda/copy_async/test_tmem_16xnb.py             |  885 ++++++++++++
 .../cuda/{ => elementwise}/test_binary.py          |  652 +++++----
 .../cuda/{ => elementwise}/test_fma.py             |  190 ++-
 .../cuda/{ => elementwise}/test_unary.py           |  945 ++++++------
 .../cuda/gemm/test_gemm_mma_m16n8k_.py             |  697 +++++++++
 .../cuda/{ => gemm_async}/test_gemm_async.py       | 1509 +++++++++++---------
 .../cuda/permute_layout/test_permute_layout.py     |  425 ++++++
 .../cuda/{ => reduction}/test_reduction.py         |  703 ++++-----
 .../tile_primitive/cuda/test_copy_async_tmem.py    |  137 --
 .../operator/tile_primitive/cuda/test_copy_sync.py |  440 ------
 .../tile_primitive/cuda/test_permute_dims.py       |  152 --
 .../operator/tile_primitive/trn/test_binary_trn.py |  133 +-
 .../tile_primitive/trn/test_compose_op_trn.py      |  305 ++--
 .../operator/tile_primitive/trn/test_copy_trn.py   |  265 ++--
 .../operator/tile_primitive/trn/test_gemm_trn.py   |  343 ++---
 .../tile_primitive/trn/test_private_alloc_trn.py   |  330 ++---
 .../tile_primitive/trn/test_reduction_trn.py       |   91 +-
 .../operator/tile_primitive/trn/test_select_trn.py |   65 +-
 .../operator/tile_primitive/trn/test_unary_trn.py  |  107 +-
 tests/python/tirx/test_control_flow.py             |   82 +-
 tests/python/tirx/test_exec_scope.py               |    4 -
 tests/python/tirx/test_hint.py                     |   18 +-
 tests/python/tirx/test_inline.py                   |   30 +-
 tests/python/tirx/test_jit.py                      |  225 +++
 tests/python/tirx/test_layout.py                   |   18 +-
 tests/python/tirx/test_op.py                       |   18 +-
 tests/python/tirx/test_parser_printer.py           | 1206 ++++++++--------
 tests/python/tirx/test_printer_tir_namespaces.py   |   10 +-
 tests/python/tirx/test_verifier.py                 |  365 +++--
 tests/python/tirx/transform/test_stmt_functor.py   |   15 +-
 .../tirx/transform/test_transform_lower_tirx.py    |  828 ++++++-----
 .../transform/test_transform_naive_allocator.py    |  116 +-
 .../python/wheel/test_validate_runtime_library.py  |   50 +
 177 files changed, 20930 insertions(+), 10261 deletions(-)
 create mode 100644 .github/actions/build-wheel-for-publish/action.yml
 create mode 100644 .github/workflows/publish_wheel.yml
 copy {tests/python/all-platform-minimal-test => ci/scripts/package}/README.md 
(58%)
 rename {tests/conda => ci/scripts/package}/build-environment.yaml (98%)
 create mode 100755 ci/scripts/package/manylinux_build_libtvm_runtime_cuda.sh
 create mode 100644 ci/scripts/package/windows_build_libtvm_runtime_cuda.bat
 create mode 100644 cmake/utils/Library.cmake
 create mode 100644 python/tvm/tirx/operator/tile_primitive/cuda/copy/_common.py
 create mode 100644 
python/tvm/tirx/operator/tile_primitive/cuda/copy/_swizzle_iter.py
 delete mode 100644 
python/tvm/tirx/operator/tile_primitive/cuda/copy/collective.py
 create mode 100644 
python/tvm/tirx/operator/tile_primitive/cuda/copy/fallback.py
 create mode 100644 
python/tvm/tirx/operator/tile_primitive/cuda/copy/gmem_smem.py
 create mode 100644 
python/tvm/tirx/operator/tile_primitive/cuda/copy/ld_stmatrix.py
 create mode 100644 python/tvm/tirx/operator/tile_primitive/cuda/copy/reg.py
 delete mode 100644 python/tvm/tirx/operator/tile_primitive/cuda/copy/scalar.py
 delete mode 100644 
python/tvm/tirx/operator/tile_primitive/cuda/copy/vectorized.py
 delete mode 100644 
python/tvm/tirx/operator/tile_primitive/cuda/copy_async/cp_async.py
 create mode 100644 
python/tvm/tirx/operator/tile_primitive/cuda/copy_async/ldgsts.py
 create mode 100644 
python/tvm/tirx/operator/tile_primitive/cuda/elementwise/ops/__init__.py
 create mode 100644 
python/tvm/tirx/operator/tile_primitive/cuda/elementwise/ops/binary.py
 create mode 100644 
python/tvm/tirx/operator/tile_primitive/cuda/elementwise/ops/cast.py
 create mode 100644 
python/tvm/tirx/operator/tile_primitive/cuda/elementwise/ops/fma.py
 create mode 100644 
python/tvm/tirx/operator/tile_primitive/cuda/elementwise/ops/unary.py
 create mode 100644 
python/tvm/tirx/operator/tile_primitive/cuda/elementwise/reg.py
 delete mode 100644 
python/tvm/tirx/operator/tile_primitive/cuda/elementwise/schedule_collective_reg.py
 delete mode 100644 
python/tvm/tirx/operator/tile_primitive/cuda/elementwise/schedule_collective_smem.py
 delete mode 100644 
python/tvm/tirx/operator/tile_primitive/cuda/elementwise/schedule_thread.py
 delete mode 100644 
python/tvm/tirx/operator/tile_primitive/cuda/elementwise/schema.py
 create mode 100644 
python/tvm/tirx/operator/tile_primitive/cuda/elementwise/smem.py
 create mode 100644 
python/tvm/tirx/operator/tile_primitive/cuda/elementwise/vec_emit/__init__.py
 create mode 100644 
python/tvm/tirx/operator/tile_primitive/cuda/elementwise/vec_emit/binary_f32x2.py
 create mode 100644 
python/tvm/tirx/operator/tile_primitive/cuda/elementwise/vec_emit/cast_vec2.py
 create mode 100644 
python/tvm/tirx/operator/tile_primitive/cuda/elementwise/vec_emit/fma_f32x2.py
 copy python/tvm/{relax/script/builder => 
tirx/operator/tile_primitive/cuda/gemm}/__init__.py (71%)
 create mode 100644 
python/tvm/tirx/operator/tile_primitive/cuda/gemm/mma_m16n8k_.py
 delete mode 100644 
python/tvm/tirx/operator/tile_primitive/cuda/permute_dims/__init__.py
 delete mode 100644 
python/tvm/tirx/operator/tile_primitive/cuda/permute_dims/vectorized_last_2d.py
 copy python/tvm/tirx/operator/tile_primitive/{trn/copy => 
cuda/permute_layout}/__init__.py (96%)
 create mode 100644 
python/tvm/tirx/operator/tile_primitive/cuda/permute_layout/warp_xor_swizzle.py
 create mode 100644 src/tirx/analysis/filter_canonical.cc
 create mode 100644 src/tirx/analysis/filter_canonical.h
 create mode 100644 tests/python/tirx/codegen/test_codegen_ampere.py
 create mode 100644 
tests/python/tirx/operator/tile_primitive/cuda/copy/test_fallback.py
 create mode 100644 
tests/python/tirx/operator/tile_primitive/cuda/copy/test_gmem_smem.py
 create mode 100644 
tests/python/tirx/operator/tile_primitive/cuda/copy/test_ld_stmatrix.py
 create mode 100644 
tests/python/tirx/operator/tile_primitive/cuda/copy/test_reg.py
 create mode 100644 
tests/python/tirx/operator/tile_primitive/cuda/copy/test_swizzle_iter.py
 rename tests/python/tirx/operator/tile_primitive/cuda/{test_copy_dsmem.py => 
copy_async/test_dsmem.py} (81%)
 rename tests/python/tirx/operator/tile_primitive/cuda/{test_copy_async_cta.py 
=> copy_async/test_ldgsts.py} (80%)
 rename 
tests/python/tirx/operator/tile_primitive/cuda/{test_smem_tmem_dispatch.py => 
copy_async/test_smem_tmem.py} (52%)
 rename tests/python/tirx/operator/tile_primitive/cuda/{test_copy_async_tma.py 
=> copy_async/test_tma.py} (89%)
 create mode 100644 
tests/python/tirx/operator/tile_primitive/cuda/copy_async/test_tmem.py
 create mode 100644 
tests/python/tirx/operator/tile_primitive/cuda/copy_async/test_tmem_16xnb.py
 rename tests/python/tirx/operator/tile_primitive/cuda/{ => 
elementwise}/test_binary.py (58%)
 rename tests/python/tirx/operator/tile_primitive/cuda/{ => 
elementwise}/test_fma.py (64%)
 rename tests/python/tirx/operator/tile_primitive/cuda/{ => 
elementwise}/test_unary.py (59%)
 create mode 100644 
tests/python/tirx/operator/tile_primitive/cuda/gemm/test_gemm_mma_m16n8k_.py
 rename tests/python/tirx/operator/tile_primitive/cuda/{ => 
gemm_async}/test_gemm_async.py (53%)
 create mode 100644 
tests/python/tirx/operator/tile_primitive/cuda/permute_layout/test_permute_layout.py
 rename tests/python/tirx/operator/tile_primitive/cuda/{ => 
reduction}/test_reduction.py (60%)
 delete mode 100644 
tests/python/tirx/operator/tile_primitive/cuda/test_copy_async_tmem.py
 delete mode 100644 
tests/python/tirx/operator/tile_primitive/cuda/test_copy_sync.py
 delete mode 100644 
tests/python/tirx/operator/tile_primitive/cuda/test_permute_dims.py
 create mode 100644 tests/python/tirx/test_jit.py
 create mode 100644 tests/python/wheel/test_validate_runtime_library.py

Reply via email to