This is an automated email from the ASF dual-hosted git repository.
github-actions[bot] pushed a change to branch nightly
in repository https://gitbox.apache.org/repos/asf/tvm.git
from 4dad956cba Fix PytestUnknownMarkWarning: Unknown
pytest.mark.adreno_clml (#19602)
add b1e1566f82 [REFACTOR][IR] Cleanup attrs.h: drop NullValue,
AttrsNodeReflAdapter, legacy BaseAttrsNode methods (#19607)
add f8bc1f44cb [Docs] Reorganize development guide content (#19606)
add 58d6a5a8f9 [REFACTOR] Move src/ir/script_printer.cc to
src/script/printer/ (#19611)
add d37b6abd56 [REFACTOR][IR] Phase out src/ir/structural_{hash,equal}.cc
to tvm-ffi (#19613)
add 3918e14389 [REFACTOR][IR] Inline ApplyPassToFunction into relax
decompose_ops, delete the util (#19612)
add 02b130249c [REFACTOR][TIR][ARITH] Phase out ControlFlowGraph,
NarrowPredicateExpression, and rename Simplify to StmtSimplify (#19604)
add 0388fd0ce0 [REFACTOR][IR] Phase out class Integer and class Bool in
Attrs and PassConfig (#19614)
add 0b0a2fda04 [CMAKE][RUNTIME] Link tvm_rpc with all backend runtime
libraries (#19617)
add e159487b0e [REFACTOR][IR] attrs.h follow-up cleanup: drop legacy
vtable / rename / phase out AttrFieldInfo (#19615)
add ec3171ab7a [REFACTOR][TIR] Tie
AnnotateDeviceRegions/SplitHostDevice/LowerDeviceKernelLaunch together (#19605)
add fa66213249 [Relax][Frontend][TFLite] Support control-flow
multi-subgraph operators (#19616)
add dcbebe7bfd [Relax][Frontend][TFLite] Add UNIDIRECTIONAL_SEQUENCE_RNN
converter (#19601)
No new revisions were added by this update.
Summary of changes:
CMakeLists.txt | 3 +
apps/cpp_rpc/CMakeLists.txt | 16 +-
cmake/modules/CUDA.cmake | 1 +
cmake/modules/Hexagon.cmake | 1 +
cmake/modules/Metal.cmake | 1 +
cmake/modules/OpenCL.cmake | 1 +
cmake/modules/ROCM.cmake | 1 +
cmake/modules/Vulkan.cmake | 1 +
docs/contribute/code_guide.rst | 2 +
docs/contribute/index.rst | 1 +
.../testing.rst} | 49 +-
docs/errors.rst | 108 +-
docs/how_to/dev/index.rst | 28 -
docs/how_to/dev/setup_rpc_system.rst | 243 ---
docs/how_to/tutorials/cross_compilation_and_rpc.py | 189 +-
docs/index.rst | 2 +-
include/tvm/ir/attrs.h | 282 +--
include/tvm/ir/function.h | 2 +-
include/tvm/ir/module.h | 2 +-
include/tvm/ir/op.h | 41 +-
include/tvm/ir/transform.h | 46 +-
include/tvm/relax/attrs/ccl.h | 12 +-
include/tvm/relax/attrs/create.h | 8 +-
include/tvm/relax/attrs/datatype.h | 8 +-
include/tvm/relax/attrs/distributed.h | 5 +-
include/tvm/relax/attrs/image.h | 12 +-
include/tvm/relax/attrs/index.h | 9 +-
include/tvm/relax/attrs/linear_algebra.h | 8 +-
include/tvm/relax/attrs/manipulate.h | 95 +-
include/tvm/relax/attrs/nn.h | 116 +-
include/tvm/relax/attrs/op.h | 25 +-
include/tvm/relax/attrs/qdq.h | 4 +-
include/tvm/relax/attrs/sampling.h | 4 +-
include/tvm/relax/attrs/search.h | 9 +-
include/tvm/relax/attrs/sorting.h | 16 +-
include/tvm/relax/attrs/statistical.h | 15 +-
include/tvm/relax/attrs/vision.h | 25 +-
include/tvm/relax/distributed/global_info.h | 4 +-
include/tvm/relax/expr.h | 2 +-
include/tvm/relax/script/builder/frame.h | 4 +-
include/tvm/relax/script/builder/ir.h | 2 +-
include/tvm/relax/transform.h | 2 +-
include/tvm/s_tir/analysis.h | 8 +-
.../meta_schedule/schedule/cuda/thread_bind.h | 2 +-
include/tvm/s_tir/meta_schedule/schedule_rule.h | 22 +-
include/tvm/s_tir/schedule/schedule.h | 14 +-
include/tvm/target/virtual_device.h | 8 +-
include/tvm/tirx/op_attr_types.h | 4 +-
include/tvm/tirx/transform.h | 4 +-
python/tvm/ir/base.py | 31 +-
python/tvm/ir/transform.py | 43 -
python/tvm/relax/expr.py | 4 +
.../tvm/relax/frontend/tflite/tflite_frontend.py | 535 +++++-
python/tvm/relax/op/manipulate.py | 2 +-
.../tvm/relax/transform/legalize_ops/manipulate.py | 2 +-
.../relax/transform/legalize_ops/statistical.py | 16 +-
python/tvm/s_tir/backend/adreno/pipeline.py | 9 +-
python/tvm/s_tir/pipeline.py | 17 +-
python/tvm/s_tir/transform/transform.py | 5 +-
python/tvm/testing/utils.py | 2 +-
python/tvm/tirx/compilation_pipeline.py | 22 +-
.../tile_primitive/trn/compose_op/unary_reduce.py | 2 +-
python/tvm/tirx/transform/transform.py | 21 +-
src/arith/narrow_predicate_expression.cc | 224 ---
src/arith/narrow_predicate_expression.h | 57 -
src/arith/scalable_expression.cc | 2 +-
src/ir/apply_pass_to_function.cc | 139 --
src/ir/attrs.cc | 43 +-
src/ir/module.cc | 15 +
src/ir/op.cc | 5 +-
src/ir/structural_equal.cc | 83 -
src/ir/structural_hash.cc | 89 -
src/ir/transform.cc | 4 +-
src/relax/analysis/computable_at_compile_time.cc | 4 +-
.../backend/adreno/annotate_custom_storage.cc | 16 +-
src/relax/backend/contrib/clml/codegen.cc | 15 +-
src/relax/backend/contrib/nnapi/codegen.cc | 4 +-
src/relax/backend/contrib/tensorrt/codegen.cc | 24 +-
src/relax/backend/vm/codegen_vm_tir.cc | 2 +-
src/relax/backend/vm/vm_shape_lower.cc | 6 +-
src/relax/distributed/axis_group_graph.cc | 2 +-
src/relax/distributed/global_info.cc | 6 +-
src/relax/ir/dataflow_matcher.cc | 2 +-
src/relax/ir/expr.cc | 6 +-
src/relax/op/ccl/ccl.cc | 8 +-
src/relax/op/distributed/distributed.cc | 8 +-
src/relax/op/image/resize.cc | 8 +-
src/relax/op/memory/view.cc | 8 +-
src/relax/op/nn/attention.cc | 6 +-
src/relax/op/nn/convolution.cc | 12 +-
src/relax/op/nn/nn.cc | 80 +-
src/relax/op/nn/nn.h | 8 +-
src/relax/op/nn/pooling.cc | 18 +-
src/relax/op/op.cc | 94 +-
src/relax/op/op_common.cc | 6 +-
src/relax/op/op_common.h | 4 +-
src/relax/op/tensor/binary.h | 2 +-
src/relax/op/tensor/create.cc | 28 +-
src/relax/op/tensor/datatype.cc | 4 +-
src/relax/op/tensor/grad.cc | 14 +-
src/relax/op/tensor/index.cc | 23 +-
src/relax/op/tensor/inspect.cc | 32 +-
src/relax/op/tensor/linear_algebra.cc | 6 +-
src/relax/op/tensor/manipulate.cc | 130 +-
src/relax/op/tensor/manipulate.h | 12 +-
src/relax/op/tensor/qdq.cc | 4 +-
src/relax/op/tensor/sampling.cc | 2 +-
src/relax/op/tensor/search.cc | 6 +-
src/relax/op/tensor/set.cc | 4 +-
src/relax/op/tensor/sorting.cc | 6 +-
src/relax/op/tensor/statistical.cc | 28 +-
src/relax/op/tensor/statistical.h | 24 +-
src/relax/op/tensor/ternary.cc | 2 +-
src/relax/op/tensor/unary.cc | 2 +-
src/relax/op/vision/multibox_transform_loc.cc | 2 +-
src/relax/op/vision/nms.cc | 6 +-
src/relax/op/vision/roi_align.cc | 2 +-
src/relax/op/vision/roi_pool.cc | 2 +-
src/relax/script/builder/frame.cc | 5 +-
src/relax/script/builder/ir.cc | 4 +-
src/relax/script/printer/call.cc | 4 +-
src/relax/script/printer/function.cc | 5 +-
src/relax/transform/allocate_workspace.cc | 8 +-
.../transform/attach_attr_layout_free_buffers.cc | 4 +-
src/relax/transform/bundle_model_params.cc | 4 +-
src/relax/transform/call_tir_rewrite.cc | 13 +-
src/relax/transform/compute_prim_value.cc | 5 +-
src/relax/transform/convert_layout.cc | 6 +-
src/relax/transform/dataflow_inplace.cc | 46 +-
src/relax/transform/decompose_ops.cc | 96 +-
src/relax/transform/eliminate_common_subexpr.cc | 4 +-
src/relax/transform/fold_constant.cc | 4 +-
src/relax/transform/fuse_ops.cc | 16 +-
src/relax/transform/fuse_tir.cc | 35 +-
src/relax/transform/gradient_simplifier.cc | 2 +-
src/relax/transform/lambda_lift.cc | 4 +-
src/relax/transform/legalize_ops.cc | 18 +-
src/relax/transform/lift_transform_params.cc | 40 +-
src/relax/transform/meta_schedule.cc | 14 +-
.../transform/reorder_permute_dims_after_concat.cc | 12 +-
src/relax/transform/reorder_take_after_matmul.cc | 2 +-
src/relax/transform/rewrite_cuda_graph.cc | 11 +-
.../specialize_primfunc_based_on_callsite.cc | 2 +-
src/relax/transform/split_call_tir_by_pattern.cc | 14 +-
src/relax/transform/static_plan_block_memory.cc | 5 +-
src/relax/transform/utils.h | 11 +-
src/relax/utils.cc | 4 +-
src/runtime/tensor.cc | 21 +
src/s_tir/analysis/calculate_allocated_memory.cc | 32 +-
src/s_tir/analysis/estimate_flops.cc | 4 +-
src/s_tir/analysis/is_pure_function.cc | 2 +-
src/s_tir/meta_schedule/arg_info.cc | 4 +-
.../feature_extractor/per_store_feature.cc | 7 +-
.../mutator/mutate_compute_location.cc | 6 +-
.../meta_schedule/mutator/mutate_thread_binding.cc | 6 +-
.../meta_schedule/mutator/mutate_tile_size.cc | 10 +-
src/s_tir/meta_schedule/mutator/mutate_unroll.cc | 7 +-
.../postproc/disallow_async_strided_mem_copy.cc | 2 +-
.../postproc/rewrite_cooperative_fetch.cc | 16 +-
src/s_tir/meta_schedule/postproc/rewrite_layout.cc | 4 +-
.../postproc/rewrite_unbound_block.cc | 8 +-
.../meta_schedule/postproc/verify_gpu_code.cc | 18 +-
.../meta_schedule/postproc/verify_vtcm_limit.cc | 4 +-
.../meta_schedule/schedule/cuda/thread_bind.cc | 14 +-
.../meta_schedule/schedule_rule/add_rfactor.cc | 4 +-
src/s_tir/meta_schedule/schedule_rule/auto_bind.cc | 12 +-
.../schedule_rule/cross_thread_reduction.cc | 23 +-
.../schedule_rule/multi_level_tiling.cc | 32 +-
.../schedule_rule/multi_level_tiling.h | 25 +-
.../multi_level_tiling_tensor_core.cc | 18 +-
.../multi_level_tiling_wide_vector.cc | 2 +-
.../multi_level_tiling_with_intrin.cc | 4 +-
.../schedule_rule/parallel_vectorize_unroll.cc | 4 +-
.../meta_schedule/schedule_rule/schedule_rule.cc | 92 +-
src/s_tir/meta_schedule/trace_apply.cc | 8 +-
src/s_tir/meta_schedule/utils.h | 8 +-
src/s_tir/schedule/analysis.h | 4 +-
src/s_tir/schedule/analysis/analysis.cc | 9 +-
src/s_tir/schedule/concrete_schedule.cc | 18 +-
src/s_tir/schedule/concrete_schedule.h | 14 +-
src/s_tir/schedule/instruction_traits.h | 4 +-
src/s_tir/schedule/primitive.h | 14 +-
.../schedule/primitive/annotate_buffer_access.cc | 12 +-
src/s_tir/schedule/primitive/blockize_tensorize.cc | 4 +-
src/s_tir/schedule/primitive/pad_einsum.cc | 19 +-
.../schedule/primitive/reorder_block_iter_var.cc | 10 +-
src/s_tir/schedule/primitive/sampling.cc | 62 +-
src/s_tir/schedule/trace.cc | 68 +-
src/s_tir/schedule/traced_schedule.cc | 18 +-
src/s_tir/schedule/traced_schedule.h | 14 +-
src/s_tir/schedule/transform.cc | 3 +-
src/s_tir/schedule/utils.h | 6 +-
src/s_tir/support/array_utils.h | 6 +-
src/s_tir/transform/compact_buffer_region.cc | 6 +-
src/s_tir/transform/default_gpu_schedule.cc | 8 +-
src/s_tir/transform/hoist_expression.cc | 26 +-
src/s_tir/transform/inject_double_buffer.cc | 10 +-
src/s_tir/transform/inject_software_pipeline.cc | 13 +-
src/s_tir/transform/loop_partition.cc | 10 +-
src/s_tir/transform/lower_async_dma.cc | 2 +-
.../transform/lower_cross_thread_reduction.cc | 2 +-
src/s_tir/transform/lower_thread_allreduce.cc | 8 +-
src/s_tir/transform/memhammer_coalesce.cc | 12 +-
src/s_tir/transform/memhammer_lower_auto_copy.cc | 54 +-
src/s_tir/transform/memhammer_rewrite_rule.h | 6 +-
.../transform/merge_shared_memory_allocations.cc | 451 +++--
src/s_tir/transform/profile_instrumentation.cc | 25 +-
src/s_tir/transform/rewrite_unsafe_select.cc | 2 +-
src/s_tir/transform/storage_access.h | 2 +-
src/s_tir/transform/unify_thread_binding.cc | 3 +-
.../transform/using_assume_to_reduce_branches.cc | 8 +-
src/script/printer/ir/ir.cc | 2 +-
src/{ir => script/printer}/script_printer.cc | 0
src/target/codegen.cc | 4 +-
src/target/cuda/codegen_cuda.cc | 21 +-
src/target/cuda/intrin_rule_cuda.cc | 10 +-
src/target/metal/codegen_metal.cc | 7 +-
src/target/metal/intrin_rule_metal.cc | 6 +-
src/target/opencl/codegen_opencl.cc | 5 +-
src/target/source/codegen_c_host.cc | 4 +-
src/target/source/codegen_trn.cc | 4 +-
src/target/target.cc | 2 +-
src/target/vulkan/spirv_support.cc | 89 +-
src/target/vulkan/spirv_utils.cc | 10 +-
src/target/webgpu/codegen_webgpu.cc | 7 +-
src/target/webgpu/intrin_rule_webgpu.cc | 6 +-
src/te/operation/create_primfunc.cc | 26 +-
src/tirx/analysis/control_flow_graph.cc | 1692 ------------------
src/tirx/analysis/control_flow_graph.h | 667 -------
src/tirx/analysis/side_effect.cc | 2 +-
src/tirx/analysis/stmt_finding.cc | 2 +-
src/tirx/analysis/verify_memory.cc | 4 +-
src/tirx/analysis/verify_tirx_well_formed.cc | 5 +-
src/tirx/ir/function.cc | 4 -
src/tirx/ir/stmt.cc | 4 +-
src/tirx/ir/tirx_stmt.cc | 2 +-
src/tirx/ir/transform.cc | 32 +-
src/tirx/op/builtin.cc | 321 ++--
src/tirx/op/op.cc | 8 +-
src/tirx/op/runtime.cc | 4 +-
src/tirx/op/target_builtin/cuda.cc | 202 +--
src/tirx/op/target_builtin/trn.cc | 36 +-
src/tirx/op/tirx.cc | 11 +-
src/tirx/script/builder/frame.cc | 6 +-
src/tirx/script/printer/buffer.cc | 2 +-
src/tirx/script/printer/expr.cc | 3 +-
src/tirx/script/printer/function.cc | 10 +-
src/tirx/script/printer/stmt.cc | 15 +-
src/tirx/transform/ir_utils.cc | 4 -
src/tirx/transform/lower_device_kernel_launch.cc | 91 +-
src/tirx/transform/lower_intrin.cc | 7 +-
src/tirx/transform/lower_warp_memory.cc | 2 +-
src/tirx/transform/make_packed_api.cc | 4 +-
src/tirx/transform/remove_no_op.cc | 71 +-
src/tirx/transform/remove_no_op.h | 14 +-
src/tirx/transform/split_host_device.cc | 16 +-
.../transform/{simplify.cc => stmt_simplify.cc} | 119 +-
src/tirx/transform/{simplify.h => stmt_simplify.h} | 16 +-
src/tirx/transform/storage_rewrite.cc | 10 +-
src/tirx/transform/unroll_loop.cc | 11 +-
src/tirx/transform/vectorize_loop.cc | 6 +-
tests/cpp/ir_functor_test.cc | 2 +-
.../test_arith_narrow_predicate_expression.py | 87 -
tests/python/relax/test_frontend_tflite.py | 1845 ++++++++++++++++++--
.../relax/test_transform_dead_code_elimination.py | 149 --
...e_postproc_rewrite_parallel_vectorize_unroll.py | 4 +-
.../test_tir_schedule_annotate_buffer_access.py | 14 +-
.../s_tir/schedule/test_tir_schedule_sampling.py | 2 +-
.../test_s_tir_transform_compact_buffer_region.py | 2 +-
...est_s_tir_transform_convert_blocks_to_opaque.py | 2 +-
.../transform/test_s_tir_transform_hoist_if.py | 2 +-
.../test_s_tir_transform_inject_double_buffer.py | 6 +-
...est_s_tir_transform_inject_software_pipeline.py | 2 +-
.../test_s_tir_transform_loop_partition.py | 22 +-
.../test_s_tir_transform_lower_match_buffer.py | 2 +-
.../test_s_tir_transform_lower_opaque_block.py | 2 +-
...form_merge_dynamic_shared_memory_allocations.py | 95 +-
...st_s_tir_transform_renormalize_split_pattern.py | 4 +-
.../test_s_tir_transform_unify_thread_binding.py | 2 +-
tests/python/te/test_te_create_primfunc.py | 2 +-
tests/python/tirx-base/test_tir_constructor.py | 2 +-
.../tirx-base/test_tir_stmt_functor_substitute.py | 2 +-
.../test_tir_transform_convert_ssa.py | 2 +-
.../test_tir_transform_flatten_buffer.py | 2 +-
.../test_tir_transform_lower_intrin.py | 2 +-
.../test_tir_transform_narrow_datatype.py | 2 +-
.../test_tir_transform_remove_no_op.py | 288 +--
.../tirx-transform/test_tir_transform_simplify.py | 696 +-------
.../test_tir_transform_unroll_loop.py | 2 +-
.../operator/tile_primitive/trn/test_binary_trn.py | 2 +-
.../tile_primitive/trn/test_compose_op_trn.py | 6 +-
.../operator/tile_primitive/trn/test_copy_trn.py | 12 +-
.../operator/tile_primitive/trn/test_gemm_trn.py | 8 +-
.../tile_primitive/trn/test_reduction_trn.py | 2 +-
.../operator/tile_primitive/trn/test_select_trn.py | 8 +-
.../operator/tile_primitive/trn/test_unary_trn.py | 2 +-
.../tvmscript/test_tvmscript_ir_builder_tir.py | 6 +-
297 files changed, 5085 insertions(+), 7066 deletions(-)
rename docs/{how_to/dev/pytest_target_parametrization.rst =>
contribute/testing.rst} (90%)
delete mode 100644 docs/how_to/dev/index.rst
delete mode 100644 docs/how_to/dev/setup_rpc_system.rst
delete mode 100644 src/arith/narrow_predicate_expression.cc
delete mode 100644 src/arith/narrow_predicate_expression.h
delete mode 100644 src/ir/apply_pass_to_function.cc
delete mode 100644 src/ir/structural_equal.cc
delete mode 100644 src/ir/structural_hash.cc
rename src/{ir => script/printer}/script_printer.cc (100%)
delete mode 100644 src/tirx/analysis/control_flow_graph.cc
delete mode 100644 src/tirx/analysis/control_flow_graph.h
rename src/tirx/transform/{simplify.cc => stmt_simplify.cc} (66%)
rename src/tirx/transform/{simplify.h => stmt_simplify.h} (70%)
delete mode 100644 tests/python/arith/test_arith_narrow_predicate_expression.py