This is an automated email from the ASF dual-hosted git repository.
github-actions[bot] pushed a change to branch nightly
in repository https://gitbox.apache.org/repos/asf/tvm.git
from 21298c77ed [Relax][ONNX] Support exclusive option in CumSum (#19773)
add 9831966c8e [CPP_RPC] Bugfix race conditions and enhance print infos
(#19778)
add c9a3c2f442 [CMAKE] Upgrade TVM build baseline to C++20 (#19734)
add 5550fb33cd [REFACTOR][CUDA] Phase out cuda_common.h (#19770)
add a8a94184b5 [REFACTOR][PYTHON] Consolidate backend autoload infra
(#19769)
add 2410c50cec [Fix] nn.attention support dynamic batch_size (#19779)
add b44988ba9f [Relax][ONNX] Make ReduceMax/ReduceMin NaN propagation
order-independent(numpy semantics) (#19755)
add 7655c99abe [Docs][CI] Bump tlcpack-sphinx-addon to restore search
result summaries (#19782)
add 74f401fc1a [REFACTOR][IR] Cleanup IR naming utilities (#19781)
add 4899fa192d [CUDA] Narrow the cuda extra from cuda-python to
cuda-bindings (#19784)
add 4efd660fdd [AGENT] Migrate agent instructions to vendor-neutral layout
(#19783)
add e4da848e57 [Tests] Modernize test gating (#19777)
No new revisions were added by this update.
Summary of changes:
{.claude => .agents}/scripts/monitor_gpu.sh | 13 +-
.../skills/tir-bench/SKILL.md | 0
.../skills/tir-build/SKILL.md | 6 +-
.../skills/tir-test/SKILL.md | 4 +-
AGENTS.md | 99 +++
CMakeLists.txt | 4 +-
apps/cpp_rpc/rpc_env.cc | 53 +-
apps/cpp_rpc/rpc_env.h | 33 +
apps/cpp_rpc/rpc_server.cc | 24 +-
apps/cpp_rpc/rpc_tracker_client.h | 2 +-
cmake/utils/FindLLVM.cmake | 4 +-
docker/install/ubuntu_install_sphinx.sh | 2 +-
docs/install/from_source.rst | 14 +-
include/tvm/ir/global_var_supply.h | 128 ----
include/tvm/ir/name_supply.h | 169 -----
include/tvm/ir/unique_name_supply.h | 143 ++++
include/tvm/relax/binding_rewrite.h | 4 +-
include/tvm/relax/block_builder.h | 8 +-
include/tvm/tirx/op.h | 3 +-
pyproject.toml | 10 +-
python/tvm/__init__.py | 13 +-
python/tvm/_autoload_backends.py | 50 --
python/tvm/backend/__init__.py | 186 +-----
python/tvm/backend/_autoload_backends.py | 88 +++
python/tvm/backend/cuda/__init__.py | 32 +
python/tvm/backend/hexagon/__init__.py | 16 +
python/tvm/backend/{__init__.py => loader.py} | 26 +-
python/tvm/backend/metal/__init__.py | 31 +
python/tvm/backend/opencl/__init__.py | 33 +
python/tvm/backend/rocm/__init__.py | 34 +
python/tvm/backend/vulkan/__init__.py | 44 ++
python/tvm/base.py | 25 -
python/tvm/contrib/hexagon/_ci_env_check.py | 6 +-
python/tvm/contrib/hexagon/pytest_plugin.py | 6 +-
python/tvm/ir/supply.py | 97 +--
python/tvm/relax/frontend/onnx/onnx_frontend.py | 38 +-
python/tvm/relax/transform/legalize_ops/nn.py | 13 +-
python/tvm/runtime/__init__.py | 2 +-
python/tvm/runtime/_ffi_node_api.py | 5 +-
.../meta_schedule/space_generator/__init__.py | 2 -
python/tvm/support/nvcc.py | 12 +-
python/tvm/target/detect_target.py | 100 +--
python/tvm/target/x86.py | 39 --
python/tvm/testing/__init__.py | 1 +
python/tvm/testing/env.py | 518 +++++++++++++++
python/tvm/testing/plugin.py | 75 ++-
python/tvm/testing/utils.py | 721 +--------------------
src/backend/cuda/runtime/cuda_common.h | 69 --
src/backend/cuda/runtime/cuda_device_api.cc | 140 ++--
src/backend/cuda/runtime/cuda_module.cc | 16 +-
src/ir/access_path_repr.cc | 49 --
src/ir/global_var_supply.cc | 111 ----
src/ir/module.cc | 12 +-
src/ir/name_supply.cc | 108 ---
src/ir/unique_name_supply.cc | 114 ++++
src/relax/backend/contrib/cutlass/codegen.cc | 6 +-
src/relax/ir/binding_rewrite.cc | 4 +-
src/relax/ir/block_builder.cc | 6 +-
src/relax/ir/dataflow_expr_rewriter.cc | 2 +-
src/relax/transform/allocate_workspace.cc | 4 +-
src/relax/transform/normalize.cc | 2 +-
src/relax/transform/run_codegen.cc | 2 +-
.../extra/contrib/cublas/cublas_json_runtime.cc | 4 +-
src/runtime/extra/contrib/cublas/cublas_utils.cc | 5 +-
src/runtime/extra/contrib/cudnn/conv_backward.cc | 4 +-
src/runtime/extra/contrib/cudnn/conv_forward.cc | 2 +-
.../contrib/cudnn/cudnn_frontend/attention.cc | 4 +-
.../extra/contrib/cudnn/cudnn_json_runtime.cc | 8 +-
src/runtime/extra/contrib/cudnn/cudnn_utils.cc | 2 +-
src/runtime/extra/contrib/cudnn/cudnn_utils.h | 3 +-
src/runtime/extra/contrib/cudnn/softmax.cc | 2 +-
src/runtime/extra/contrib/curand/curand.cc | 2 +-
.../cutlass/fp16_group_gemm_runner_sm100.cuh | 3 +-
.../cutlass/fp16_group_gemm_runner_sm90.cuh | 3 +-
.../fp8_groupwise_scaled_gemm_runner_sm100.cuh | 4 +-
.../fp8_groupwise_scaled_gemm_runner_sm90.cuh | 4 +-
...p8_groupwise_scaled_group_gemm_runner_sm100.cuh | 4 +-
src/runtime/extra/contrib/cutlass/gemm_runner.cuh | 3 +-
src/runtime/extra/contrib/nvshmem/dist_gemm.cu | 7 +-
src/runtime/extra/contrib/nvshmem/init.cc | 7 +-
.../extra/contrib/nvshmem/memory_allocator.cc | 2 +-
.../extra/contrib/tensorrt/tensorrt_calibrator.h | 19 +-
src/runtime/extra/contrib/thrust/thrust.cu | 5 +-
.../extra/disco/cuda_ipc/cuda_ipc_memory.cc | 39 +-
src/runtime/extra/disco/nccl/nccl_context.h | 17 +-
src/runtime/extra/disco/protocol.h | 4 +-
src/runtime/rpc/rpc_endpoint.cc | 4 +-
src/runtime/vm/cuda/cuda_graph_builtin.cc | 18 +-
src/s_tir/transform/compact_buffer_region.cc | 2 +
src/s_tir/transform/inject_software_pipeline.cc | 2 +-
src/target/llvm/codegen_params.cc | 3 +-
src/target/source/codegen_c.cc | 2 +-
src/target/source/codegen_c.h | 4 +-
src/target/source/codegen_source_base.cc | 2 +-
src/target/source/codegen_source_base.h | 6 +-
src/te/operation/create_primfunc.cc | 6 +-
src/tirx/ir/index_map.cc | 4 +-
src/tirx/transform/bind_target.cc | 7 +-
src/tirx/transform/split_host_device.cc | 9 +-
tests/lint/check_asf_header.py | 2 +-
.../test_minimal_target_codegen_llvm.py | 4 +-
.../python/codegen/test_codegen_error_handling.py | 4 +-
tests/python/codegen/test_gpu_codegen_allreduce.py | 4 +-
tests/python/codegen/test_inject_ptx_ldg32.py | 5 +-
tests/python/codegen/test_target_codegen_blob.py | 2 +-
tests/python/codegen/test_target_codegen_bool.py | 3 +-
.../codegen/test_target_codegen_cross_llvm.py | 4 +-
tests/python/codegen/test_target_codegen_cuda.py | 96 +--
.../codegen/test_target_codegen_cuda_fastmath.py | 5 +-
.../python/codegen/test_target_codegen_cuda_fp4.py | 10 +-
.../python/codegen/test_target_codegen_cuda_fp8.py | 34 +-
tests/python/codegen/test_target_codegen_device.py | 8 +-
tests/python/codegen/test_target_codegen_extern.py | 3 +-
.../codegen/test_target_codegen_gpu_common.py | 4 +-
.../python/codegen/test_target_codegen_hexagon.py | 7 +-
tests/python/codegen/test_target_codegen_llvm.py | 69 +-
tests/python/codegen/test_target_codegen_metal.py | 32 +-
tests/python/codegen/test_target_codegen_opencl.py | 27 +-
tests/python/codegen/test_target_codegen_riscv.py | 7 +-
tests/python/codegen/test_target_codegen_rocm.py | 20 +-
tests/python/codegen/test_target_codegen_vulkan.py | 14 +-
tests/python/codegen/test_target_codegen_x86.py | 3 +-
tests/python/contrib/test_cutlass_gemm.py | 22 +-
.../test_hexagon/test_async_dma_pipeline.py | 5 +-
.../test_hexagon/test_benchmark_elemwise_add.py | 3 +-
.../test_hexagon/test_benchmark_maxpool2d.py | 3 +-
.../contrib/test_hexagon/test_dma_builtin.py | 4 +-
.../contrib/test_hexagon/test_meta_schedule.py | 7 +-
.../contrib/test_hexagon/test_parallel_hvx.py | 4 +-
.../test_hexagon/test_parallel_hvx_load_vtcm.py | 4 +-
.../contrib/test_hexagon/test_parallel_scalar.py | 4 +-
.../contrib/test_hexagon/test_relax_integration.py | 5 +-
.../contrib/test_hexagon/test_run_unit_tests.py | 5 +-
tests/python/contrib/test_hexagon/test_sigmoid.py | 4 +-
.../test_hexagon/test_software_pipeline_async.py | 4 +-
.../contrib/test_hexagon/test_thread_pool.py | 6 +-
tests/python/contrib/test_hexagon/test_vtcm.py | 5 +-
.../contrib/test_hexagon/test_vtcm_bandwidth.py | 3 +-
tests/python/contrib/test_hipblas.py | 8 +-
tests/python/contrib/test_random.py | 3 +-
.../python/contrib/test_tir_triton_integration.py | 4 +-
tests/python/disco/test_callback.py | 5 +-
tests/python/disco/test_loader.py | 5 +-
tests/python/disco/test_nvshmem.py | 6 +-
...t_name_supply.py => test_unique_name_supply.py} | 27 +-
tests/python/nightly/test_nnapi/test_network.py | 3 +-
tests/python/relax/backend/adreno/utils.py | 62 +-
tests/python/relax/test_codegen_cublas.py | 15 +-
tests/python/relax/test_codegen_cudnn.py | 6 +-
tests/python/relax/test_codegen_cutlass.py | 5 +-
tests/python/relax/test_codegen_hipblas.py | 6 +-
tests/python/relax/test_codegen_tensorrt.py | 6 +-
tests/python/relax/test_contrib_vllm.py | 8 +-
tests/python/relax/test_frontend_dynamo.py | 19 +-
.../relax/test_frontend_from_exported_program.py | 3 +-
tests/python/relax/test_frontend_from_fx.py | 7 +-
...test_frontend_nn_llm_sequence_prefill_masked.py | 29 +-
tests/python/relax/test_frontend_nn_op.py | 11 +-
tests/python/relax/test_frontend_onnx.py | 40 ++
tests/python/relax/test_frontend_stablehlo.py | 31 +-
tests/python/relax/test_op_vision.py | 31 +-
...ime_builtin_paged_attention_kv_cache_mla_tir.py | 17 +-
...runtime_builtin_paged_attention_kv_cache_tir.py | 33 +-
.../python/relax/test_runtime_builtin_rnn_state.py | 13 +-
tests/python/relax/test_tir_call_source_kernel.py | 5 +-
tests/python/relax/test_transform_codegen_pass.py | 15 +-
.../python/relax/test_transform_legalize_ops_nn.py | 35 +
tests/python/relax/test_vm_build.py | 10 +-
tests/python/relax/test_vm_builtin.py | 2 +-
tests/python/relax/test_vm_cuda_graph.py | 7 +-
tests/python/relax/test_vm_multi_device.py | 6 +-
tests/python/relax/texture/test_texture_nd.py | 6 +-
tests/python/runtime/test_runtime_module_export.py | 5 +-
tests/python/runtime/test_runtime_module_load.py | 8 +-
tests/python/runtime/test_runtime_rpc.py | 37 +-
tests/python/s_tir/dlight/test_primitives.py | 6 +-
.../test_meta_schedule_mma_tensorize.py | 13 +-
.../test_meta_schedule_space_post_opt.py | 6 +-
.../meta_schedule/test_meta_schedule_tune_tir.py | 6 +-
..._tir_schedule_tensorize_ldmatrix_mma_numeric.py | 17 +-
.../test_tir_schedule_tensorize_mfma_numeric.py | 11 +-
.../test_s_tir_transform_inject_ptx_async_copy.py | 13 +-
...est_s_tir_transform_inject_software_pipeline.py | 7 +-
.../transform/test_s_tir_transform_thread_sync.py | 9 +-
tests/python/target/test_arm_target.py | 9 +-
tests/python/target/test_target_target.py | 13 +-
tests/python/testing/test_env.py | 205 ++++++
tests/python/tirx-base/test_tir_imm_values.py | 13 +-
tests/python/tirx-base/test_tir_ptx_cp_async.py | 5 +-
.../tirx-base/test_tir_ptx_griddepcontrol.py | 5 +-
tests/python/tirx-base/test_tir_ptx_ldmatrix.py | 5 +-
tests/python/tirx-base/test_tir_ptx_mma.py | 61 +-
tests/python/tirx-base/test_tir_ptx_mma_sp.py | 8 +-
.../tirx-base/test_tir_ptx_scalar_f32_math.py | 5 +-
.../test_tir_transform_lower_intrin.py | 8 +-
.../test_tir_transform_lower_tvm_builtin.py | 3 +-
tests/python/tirx/codegen/test_codegen_ampere.py | 7 +-
.../python/tirx/codegen/test_codegen_blackwell.py | 19 +-
tests/python/tirx/codegen/test_codegen_hopper.py | 55 +-
.../tile_primitive/cuda/copy/test_gmem_smem.py | 4 +-
.../tile_primitive/cuda/copy/test_ld_stmatrix.py | 13 +-
.../tile_primitive/cuda/copy_async/test_dsmem.py | 4 +-
.../cuda/copy_async/test_smem_tmem.py | 13 +-
.../tile_primitive/cuda/copy_async/test_tma.py | 16 +-
.../cuda/gemm/test_gemm_mma_m16n8k_.py | 16 +-
tests/python/tirx/test_bench_utils.py | 17 +-
tests/python/tirx/test_op_namespace_cleanup.py | 4 +-
tests/python/tvmscript/test_tvmscript_ops.py | 3 +-
tests/scripts/task_python_integration_gpuonly.sh | 1 +
tests/scripts/task_python_unittest_gpuonly.sh | 1 +
210 files changed, 2771 insertions(+), 2631 deletions(-)
rename {.claude => .agents}/scripts/monitor_gpu.sh (86%)
rename .claude/commands/tir-bench.md => .agents/skills/tir-bench/SKILL.md
(100%)
rename .claude/commands/tir-build.md => .agents/skills/tir-build/SKILL.md (69%)
rename .claude/commands/tir-test.md => .agents/skills/tir-test/SKILL.md (95%)
create mode 100644 AGENTS.md
delete mode 100644 include/tvm/ir/global_var_supply.h
delete mode 100644 include/tvm/ir/name_supply.h
create mode 100644 include/tvm/ir/unique_name_supply.h
delete mode 100644 python/tvm/_autoload_backends.py
create mode 100644 python/tvm/backend/_autoload_backends.py
copy python/tvm/backend/{__init__.py => loader.py} (92%)
delete mode 100644 python/tvm/target/x86.py
create mode 100644 python/tvm/testing/env.py
delete mode 100644 src/backend/cuda/runtime/cuda_common.h
delete mode 100644 src/ir/access_path_repr.cc
delete mode 100644 src/ir/global_var_supply.cc
delete mode 100644 src/ir/name_supply.cc
create mode 100644 src/ir/unique_name_supply.cc
rename tests/python/ir/{test_name_supply.py => test_unique_name_supply.py}
(62%)
create mode 100644 tests/python/testing/test_env.py