This is an automated email from the ASF dual-hosted git repository.

github-actions[bot] pushed a change to branch nightly
in repository https://gitbox.apache.org/repos/asf/tvm.git


    from c1415d6e4d [Relax][Frontend][TFLite] Add NON_MAX_SUPPRESSION_V4 
converter (#19464)
     add 1e7314f446 [Relax][Frontend][TFLite] Add DENSIFY operator test and fix 
prefetched handling (#19421)
     add 7ecf466e33 [S-TIR][Dlight] Add layered fall back strategy to handle 
missing attr `max_shared_memory_per_block` (#19453)
     add 59c426e520 [REFACTOR][RUNTIME] Phase out 
include/tvm/runtime/threading_backend.h (#19469)
     add 84fbd77468 [REFACTOR][RUNTIME] Phase out 
include/tvm/runtime/builtin_fp16.h (#19472)
     add 2516b1fd0d [REFACTOR][RUNTIME] Phase out IntTuple alias; use 
ffi::Shape directly (#19471)
     add 6e8f77d664 [REFACTOR][RUNTIME][CODEGEN] Backend specific target and 
runtime to enable cross-compile fallback (#19465)
     add e5b96be3a5 [REFACTOR][IR] Move tvm/support/with.h → 
tvm/ir/with_context.h (#19474)
     add 90c678abc9 [Relax][Frontend][TFLite] Fix `STRIDED_SLICE` negative 
stride and add `STRIDED_SLICE/SPLIT_V` tests (#19468)
     add bcc48303c4 [REFACTOR][S-TIR] Move tvm/support/random_engine.h → 
tvm/s_tir/random_engine.h (#19475)
     add 556571fc9d [REFACTOR][RUNTIME] Phase out include/tvm/runtime/object.h 
(#19476)

No new revisions were added by this update.

Summary of changes:
 CMakeLists.txt                                     |   7 +
 apps/hexagon_launcher/launcher_hexagon.cc          |   7 +-
 cmake/modules/CUDA.cmake                           |   3 -
 cmake/modules/Hexagon.cmake                        |  10 +-
 cmake/modules/LLVM.cmake                           |   7 +-
 cmake/modules/Metal.cmake                          |   6 +-
 cmake/modules/OpenCL.cmake                         |   3 -
 cmake/modules/ROCM.cmake                           |   2 -
 cmake/modules/Vulkan.cmake                         |  13 +-
 include/tvm/arith/analyzer.h                       |  23 +-
 include/tvm/arith/int_set.h                        |  10 +-
 include/tvm/arith/int_solver.h                     |  24 +-
 include/tvm/arith/iter_affine_map.h                |  15 +-
 include/tvm/ir/analysis.h                          |   2 +-
 include/tvm/ir/attrs.h                             |  29 +-
 include/tvm/ir/cow.h                               |  70 +++
 include/tvm/ir/diagnostic.h                        |  54 +-
 include/tvm/ir/env_func.h                          |  21 +-
 include/tvm/ir/expr.h                              |  40 +-
 include/tvm/ir/global_info.h                       |   8 +-
 include/tvm/ir/global_var_supply.h                 |   9 +-
 include/tvm/ir/instrument.h                        |   9 +-
 include/tvm/ir/module.h                            |  11 +-
 include/tvm/ir/name_supply.h                       |   8 +-
 include/tvm/ir/node_functor.h                      |  27 +-
 include/tvm/ir/op.h                                |   4 +-
 include/tvm/ir/replace_global_vars.h               |   2 +-
 include/tvm/ir/repr.h                              |   8 +-
 include/tvm/ir/script_printer.h                    |  26 +-
 include/tvm/ir/source_map.h                        |  34 +-
 include/tvm/ir/transform.h                         |  38 +-
 include/tvm/ir/type.h                              |  22 +-
 include/tvm/ir/type_functor.h                      |  10 +-
 include/tvm/{support/with.h => ir/with_context.h}  |   8 +-
 include/tvm/relax/analysis.h                       |   2 +-
 include/tvm/relax/attrs/manipulate.h               |   2 +-
 include/tvm/relax/attrs/vision.h                   |   4 +-
 include/tvm/relax/binding_rewrite.h                |   9 +-
 include/tvm/relax/block_builder.h                  |  10 +-
 include/tvm/relax/dataflow_pattern.h               |  41 +-
 include/tvm/relax/dataflow_pattern_functor.h       |   8 +-
 include/tvm/relax/distributed/axis_group_graph.h   |   2 +-
 include/tvm/relax/distributed/struct_info.h        |  16 +-
 include/tvm/relax/exec_builder.h                   |  13 +-
 include/tvm/relax/expr.h                           |  47 +-
 include/tvm/relax/expr_functor.h                   |  56 +-
 include/tvm/relax/struct_info.h                    |   4 +-
 include/tvm/relax/struct_info_functor.h            |   6 +-
 include/tvm/relax/tir_pattern.h                    |   8 +-
 include/tvm/relax/transform.h                      |  19 +-
 include/tvm/runtime/builtin_fp16.h                 |  39 --
 include/tvm/runtime/disco/cuda_ipc_memory.h        |   9 +-
 include/tvm/runtime/disco/session.h                |  43 +-
 include/tvm/runtime/int_tuple.h                    |  38 --
 include/tvm/runtime/memory/memory_manager.h        |   9 +-
 include/tvm/runtime/object.h                       | 148 -----
 include/tvm/runtime/tensor.h                       |   5 +-
 include/tvm/runtime/timer.h                        |   9 +-
 include/tvm/runtime/vm/executable.h                |   1 -
 include/tvm/runtime/vm/vm.h                        |  24 +-
 include/tvm/s_tir/data_layout.h                    |  16 +-
 include/tvm/s_tir/meta_schedule/arg_info.h         |  17 +-
 include/tvm/s_tir/meta_schedule/builder.h          |  30 +-
 include/tvm/s_tir/meta_schedule/cost_model.h       |  10 +-
 include/tvm/s_tir/meta_schedule/database.h         |  39 +-
 include/tvm/s_tir/meta_schedule/extracted_task.h   |  10 +-
 .../tvm/s_tir/meta_schedule/feature_extractor.h    |  11 +-
 include/tvm/s_tir/meta_schedule/measure_callback.h |  11 +-
 .../tvm/s_tir/meta_schedule/measure_candidate.h    |  11 +-
 include/tvm/s_tir/meta_schedule/mutator.h          |  22 +-
 include/tvm/s_tir/meta_schedule/postproc.h         |  10 +-
 include/tvm/s_tir/meta_schedule/profiler.h         |   9 +-
 include/tvm/s_tir/meta_schedule/runner.h           |  39 +-
 include/tvm/s_tir/meta_schedule/schedule_rule.h    |   9 +-
 include/tvm/s_tir/meta_schedule/search_strategy.h  |  11 +-
 include/tvm/s_tir/meta_schedule/space_generator.h  |  14 +-
 include/tvm/s_tir/meta_schedule/task_scheduler.h   |  24 +-
 include/tvm/s_tir/meta_schedule/tune_context.h     |  20 +-
 include/tvm/{support => s_tir}/random_engine.h     |  10 +-
 include/tvm/s_tir/sblock_dependence_info.h         |  11 +-
 include/tvm/s_tir/sblock_scope.h                   |  40 +-
 include/tvm/s_tir/schedule/instruction.h           |  25 +-
 include/tvm/s_tir/schedule/schedule.h              |  34 +-
 include/tvm/s_tir/schedule/state.h                 |  10 +-
 include/tvm/s_tir/schedule/trace.h                 |  12 +-
 include/tvm/s_tir/utils.h                          |  12 +-
 include/tvm/script/ir_builder/base.h               |  25 +-
 include/tvm/script/ir_builder/ir/frame.h           |   2 +-
 include/tvm/script/ir_builder/relax/frame.h        |  14 +-
 include/tvm/script/ir_builder/tirx/frame.h         |  25 +-
 include/tvm/script/ir_builder/tirx/ir.h            |   6 +-
 include/tvm/script/printer/doc.h                   |  14 +-
 include/tvm/script/printer/ir_docsifier.h          |  41 +-
 include/tvm/script/printer/ir_docsifier_functor.h  |  12 +-
 include/tvm/support/serializer.h                   |  21 +
 include/tvm/target/tag.h                           |  12 +-
 include/tvm/target/target.h                        |  14 +-
 include/tvm/target/target_kind.h                   |  38 +-
 include/tvm/target/virtual_device.h                |   4 +-
 include/tvm/te/operation.h                         |   5 +-
 include/tvm/te/tensor.h                            |  10 +-
 include/tvm/tirx/buffer.h                          |   9 +-
 include/tvm/tirx/expr.h                            |  11 +-
 include/tvm/tirx/expr_functor.h                    |  10 +-
 include/tvm/tirx/function.h                        |   9 +-
 include/tvm/tirx/index_map.h                       |  13 +-
 include/tvm/tirx/stmt.h                            |  22 +-
 include/tvm/tirx/stmt_functor.h                    |  21 +-
 include/tvm/tirx/var.h                             |  13 +-
 python/tvm/contrib/nvcc.py                         |  12 +-
 .../tvm/relax/frontend/tflite/tflite_frontend.py   |  43 +-
 python/tvm/s_tir/dlight/analysis/__init__.py       |   1 +
 .../tvm/s_tir/dlight/analysis/common_analysis.py   |  30 +-
 python/tvm/s_tir/dlight/gpu/gemv.py                |   4 +-
 python/tvm/s_tir/dlight/gpu/low_batch_gemv.py      |   4 +-
 .../tvm/script/ir_builder/tirx/external_kernel.py  |  26 +-
 src/arith/analyzer.cc                              |   1 +
 src/arith/bound_deducer.cc                         |  10 +-
 src/arith/canonical_simplify.cc                    |   6 +-
 src/arith/const_int_bound.cc                       |   3 +-
 src/arith/detect_linear_equation.cc                |   4 +-
 src/arith/domain_touched.cc                        |   6 +-
 src/arith/int_constraints.cc                       |   7 +-
 src/arith/int_set.cc                               |   3 +-
 src/arith/interval_set.h                           |   1 +
 src/arith/ir_mutator_with_analyzer.cc              |   1 +
 src/arith/ir_mutator_with_analyzer.h               |   3 +-
 src/arith/ir_visitor_with_analyzer.h               |   2 +-
 src/arith/iter_affine_map.cc                       |  14 +-
 src/arith/modular_set.cc                           |   3 +-
 src/arith/narrow_predicate_expression.cc           |   1 +
 src/arith/pattern_match.h                          |  17 +-
 src/arith/presburger_set.cc                        |   3 +-
 src/arith/presburger_set.h                         |   1 +
 src/arith/rewrite_simplify.cc                      |   3 +-
 src/arith/rewrite_simplify.h                       |   9 +-
 src/arith/unwrap_vector_expr.cc                    |   1 +
 src/ir/analysis.cc                                 |   2 +-
 src/ir/attr_functor.h                              |  16 +-
 src/ir/attrs.cc                                    |   2 +-
 src/ir/diagnostic.cc                               |  43 +-
 src/ir/env_func.cc                                 |   4 +-
 src/ir/expr.cc                                     |   8 +-
 src/ir/global_info.cc                              |   2 +-
 src/ir/instrument.cc                               |   2 +-
 src/ir/module.cc                                   |  14 +-
 src/ir/op.cc                                       |   2 +-
 src/ir/repr.cc                                     |   5 +-
 src/ir/script_printer.cc                           |  12 +-
 src/ir/source_map.cc                               |   8 +-
 src/ir/structural_equal.cc                         |   4 +-
 src/ir/structural_hash.cc                          |   7 +-
 src/ir/type.cc                                     |  10 +-
 src/ir/type_functor.cc                             |   1 +
 src/relax/analysis/analysis.cc                     |   3 +-
 src/relax/analysis/collect_call_map.cc             |   6 +-
 src/relax/analysis/computable_at_compile_time.cc   |   2 +-
 src/relax/analysis/detect_recursion.cc             |   1 +
 src/relax/analysis/graph_partitioner.cc            |  14 +-
 src/relax/analysis/graph_partitioner.h             |  11 +-
 src/relax/analysis/layout_transformation.cc        |  15 +-
 src/relax/analysis/struct_info_analysis.cc         |   5 +-
 src/relax/analysis/tir_op_pattern_kind.cc          |   5 +-
 src/relax/analysis/udchain.cc                      |   6 +-
 src/relax/analysis/var2value.cc                    |   1 +
 src/relax/analysis/well_formed.cc                  |   3 +-
 .../backend/adreno/annotate_custom_storage.cc      |   5 +-
 .../backend/adreno/fold_vdevice_scope_change.cc    |   1 +
 src/relax/backend/contrib/clml/codegen.cc          |   5 +-
 .../backend/contrib/codegen_json/codegen_json.h    |  11 +-
 src/relax/backend/contrib/cublas/codegen.cc        |   6 +-
 src/relax/backend/contrib/cudnn/codegen.cc         |   1 +
 src/relax/backend/contrib/cutlass/codegen.cc       |  10 +-
 src/relax/backend/contrib/dnnl/codegen.cc          |   1 +
 src/relax/backend/contrib/example_npu/codegen.cc   |   1 +
 src/relax/backend/contrib/hipblas/codegen.cc       |   1 +
 src/relax/backend/contrib/nnapi/codegen.cc         |   1 +
 src/relax/backend/contrib/tensorrt/codegen.cc      |   5 +-
 src/relax/backend/contrib/utils.cc                 |   1 +
 src/relax/backend/contrib/utils.h                  |   1 +
 src/relax/backend/pattern_registry.h               |   1 -
 src/relax/backend/task_extraction.cc               |   1 +
 src/relax/backend/vm/codegen_vm.cc                 |   8 +-
 src/relax/backend/vm/codegen_vm_tir.cc             |   1 +
 src/relax/backend/vm/exec_builder.cc               |   4 +-
 src/relax/backend/vm/lower_runtime_builtin.cc      |   1 +
 src/relax/backend/vm/vm_shape_lower.cc             |   1 +
 src/relax/distributed/axis_group_graph.cc          |   1 +
 src/relax/distributed/global_info.cc               |   4 +-
 src/relax/distributed/struct_info.cc               |   8 +-
 src/relax/distributed/transform/lower_distir.cc    |   1 +
 .../transform/lower_global_view_to_local_view.cc   |  28 +-
 .../distributed/transform/propagate_sharding.cc    |   7 +-
 src/relax/distributed/transform/utils.h            |   1 +
 src/relax/ir/binding_rewrite.cc                    |   3 +-
 src/relax/ir/block_builder.cc                      |  23 +-
 src/relax/ir/dataflow_block_rewriter.cc            |   1 +
 src/relax/ir/dataflow_expr_rewriter.cc             |   1 +
 src/relax/ir/dataflow_matcher.cc                   |   3 +-
 src/relax/ir/dataflow_matcher.h                    |   2 +-
 src/relax/ir/dataflow_pattern.cc                   |  50 +-
 src/relax/ir/expr.cc                               |  50 +-
 src/relax/ir/expr_functor.cc                       |   9 +-
 src/relax/ir/py_expr_functor.cc                    |  29 +-
 src/relax/ir/struct_info.cc                        |  22 +-
 src/relax/ir/struct_info_functor.cc                |   1 +
 src/relax/ir/transform.cc                          |   1 +
 src/relax/ir/type.cc                               |  10 +-
 src/relax/op/ccl/ccl.cc                            |   6 +-
 src/relax/op/distributed/distributed.cc            |   6 +-
 src/relax/op/distributed/utils.cc                  |   2 +
 src/relax/op/image/resize.cc                       |  33 +-
 src/relax/op/nn/attention.cc                       |   4 +-
 src/relax/op/nn/convolution.cc                     |  19 +-
 src/relax/op/nn/nn.cc                              |  29 +-
 src/relax/op/nn/pooling.cc                         |  19 +-
 src/relax/op/op.cc                                 |  11 +-
 src/relax/op/op_common.cc                          |   2 +
 src/relax/op/op_common.h                           |   1 +
 src/relax/op/tensor/binary.cc                      |   2 +
 src/relax/op/tensor/create.cc                      |  21 +-
 src/relax/op/tensor/datatype.cc                    |  10 +-
 src/relax/op/tensor/grad.cc                        |   4 +-
 src/relax/op/tensor/index.cc                       |   4 +-
 src/relax/op/tensor/inspect.cc                     |   1 +
 src/relax/op/tensor/linear_algebra.cc              |   4 +-
 src/relax/op/tensor/manipulate.cc                  |  41 +-
 src/relax/op/tensor/qdq.cc                         |  16 +-
 src/relax/op/tensor/sampling.cc                    |   3 +-
 src/relax/op/tensor/search.cc                      |  30 +-
 src/relax/op/tensor/set.cc                         |   1 +
 src/relax/op/tensor/statistical.cc                 |   4 +-
 src/relax/op/tensor/statistical.h                  |  32 +-
 src/relax/op/vision/nms.cc                         |  14 +-
 src/relax/op/vision/nms.h                          |   1 -
 src/relax/training/utils.cc                        |   3 +-
 src/relax/transform/allocate_workspace.cc          |   3 +-
 src/relax/transform/alter_op_impl.cc               |   3 +-
 src/relax/transform/attach_global_symbol.cc        |   1 +
 src/relax/transform/bind_params.cc                 |   9 +-
 src/relax/transform/bundle_model_params.cc         |   1 +
 src/relax/transform/call_tir_rewrite.cc            |   1 +
 src/relax/transform/canonicalize_bindings.cc       |   9 +-
 src/relax/transform/convert_dataflow.cc            |   1 +
 src/relax/transform/convert_layout.cc              |   5 +-
 src/relax/transform/dataflow_inplace.cc            |  24 +-
 src/relax/transform/decompose_ops.cc               |   1 +
 src/relax/transform/eliminate_common_subexpr.cc    |   1 +
 src/relax/transform/expand_tuple_arguments.cc      |   2 +-
 src/relax/transform/fold_constant.cc               |   1 +
 src/relax/transform/fuse_ops.cc                    |  25 +-
 src/relax/transform/fuse_tir.cc                    |   5 +-
 src/relax/transform/gradient.cc                    |   7 +-
 src/relax/transform/gradient_simplifier.cc         |   1 +
 src/relax/transform/infer_amp_utils.cc             |   4 +-
 src/relax/transform/infer_amp_utils.h              |   4 +-
 src/relax/transform/infer_layout_utils.cc          |   1 +
 src/relax/transform/infer_layout_utils.h           |  18 +-
 src/relax/transform/inline_functions.cc            |   2 +-
 src/relax/transform/kill_after_last_use.cc         |   1 +
 src/relax/transform/lambda_lift.cc                 |   4 +-
 src/relax/transform/lazy_transform_params.cc       |   1 +
 src/relax/transform/legalize_ops.cc                |   1 +
 src/relax/transform/lift_transform_params.cc       |  58 +-
 src/relax/transform/lower_alloc_tensor.cc          |   1 +
 src/relax/transform/merge_composite_functions.cc   |   3 +-
 src/relax/transform/meta_schedule.cc               |   1 +
 src/relax/transform/normalize.cc                   |   1 +
 src/relax/transform/realize_vdevice.cc             |   3 +-
 src/relax/transform/remove_purity_checking.cc      |   1 +
 src/relax/transform/remove_unused_outputs.cc       |   5 +-
 src/relax/transform/remove_unused_parameters.cc    |   4 +-
 src/relax/transform/replace_global_vars.cc         |   5 +-
 src/relax/transform/rewrite_cuda_graph.cc          |   3 +-
 src/relax/transform/rewrite_dataflow_reshape.cc    |   1 +
 src/relax/transform/run_codegen.cc                 |   3 +-
 src/relax/transform/split_call_tir_by_pattern.cc   |  21 +-
 src/relax/transform/static_plan_block_memory.cc    |  11 +-
 src/relax/transform/to_mixed_precision.cc          |   5 +-
 src/relax/transform/topological_sort.cc            |   1 +
 src/relax/transform/update_param_struct_info.cc    |   1 +
 src/relax/transform/utils.h                        |   7 +-
 src/relax/utils.cc                                 |   1 +
 src/runtime/builtin_fp16.cc                        |  18 +
 src/runtime/const_loader_module.cc                 |   3 +-
 src/runtime/contrib/cublas/cublas_json_runtime.cc  |   3 +-
 .../contrib/cudnn/cudnn_frontend/attention.h       |   6 +-
 src/runtime/contrib/dnnl/dnnl_json_runtime.cc      |   3 +-
 .../contrib/hipblas/hipblas_json_runtime.cc        |   3 +-
 src/runtime/contrib/json/json_runtime.h            |   3 +-
 src/runtime/contrib/random/mt_random_engine.cc     |   4 +-
 src/runtime/contrib/random/random.cc               |   1 -
 src/runtime/cuda/cuda_module.cc                    | 185 +++---
 src/runtime/cuda/cuda_module.h                     |  66 ---
 src/runtime/disco/bcast_session.cc                 |   3 +-
 src/runtime/disco/bcast_session.h                  |   2 +-
 src/runtime/disco/cuda_ipc/cuda_ipc_memory.cc      |   2 +-
 src/runtime/disco/disco_worker.cc                  |   2 +-
 src/runtime/disco/distributed/socket_session.cc    |   4 +-
 src/runtime/disco/loader.cc                        |  27 +-
 src/runtime/disco/process_session.cc               |   5 +-
 src/runtime/disco/protocol.h                       |  32 +-
 src/runtime/disco/threaded_session.cc              |   4 +-
 src/runtime/file_utils.cc                          |  23 +-
 src/runtime/hexagon/hexagon_common.h               |   1 -
 src/runtime/hexagon/hexagon_module.cc              | 163 ++++--
 src/runtime/hexagon/hexagon_module.h               |  95 ----
 src/runtime/hexagon/rpc/hexagon/rpc_server.cc      |   1 -
 src/runtime/memory/memory_manager.cc               |   1 +
 src/runtime/metadata.h                             |   8 +-
 src/runtime/metal/metal_module.h                   |  61 --
 src/runtime/metal/metal_module.mm                  | 162 +++---
 src/runtime/opencl/opencl_common.h                 |  27 +-
 src/runtime/opencl/opencl_module.cc                | 118 ++--
 src/runtime/opencl/opencl_module.h                 |  92 ---
 src/runtime/opencl/opencl_module_spirv.cc          | 159 ------
 src/runtime/rocm/rocm_module.cc                    | 119 ++--
 src/runtime/rocm/rocm_module.h                     |  66 ---
 src/runtime/rpc/rpc_endpoint.cc                    |   8 +-
 src/runtime/rpc/rpc_local_session.cc               |   2 +-
 src/runtime/rpc/rpc_module.cc                      |   5 +-
 src/runtime/rpc/rpc_session.h                      |  25 +-
 src/runtime/spirv/spirv_shader.h                   |  30 -
 src/runtime/static_library.cc                      |   3 +-
 src/runtime/thread_pool.cc                         |   3 +-
 src/runtime/threading_backend.cc                   |   3 +-
 {include/tvm => src}/runtime/threading_backend.h   |   0
 src/runtime/vm/attn_backend.h                      |   1 -
 src/runtime/vm/attn_utils.h                        |   7 +-
 src/runtime/vm/builtin.cc                          |  12 +-
 src/runtime/vm/cuda/cuda_graph_builtin.cc          |  31 +-
 src/runtime/vm/executable.cc                       |   7 +-
 src/runtime/vm/kv_state.cc                         |   2 +-
 src/runtime/vm/kv_state.h                          |  24 +-
 src/runtime/vm/lm_support.cc                       |   8 +-
 src/runtime/vm/paged_kv_cache.cc                   |   2 +-
 src/runtime/vm/rnn_state.cc                        |   2 +-
 src/runtime/vm/vm.cc                               |  31 +-
 src/runtime/vulkan/vulkan_common.h                 |   3 -
 src/runtime/vulkan/vulkan_module.cc                |  76 +--
 src/runtime/vulkan/vulkan_module.h                 |  74 ---
 src/runtime/vulkan/vulkan_wrapped_func.cc          |  41 +-
 src/runtime/vulkan/vulkan_wrapped_func.h           |  36 +-
 src/s_tir/analysis/calculate_allocated_memory.cc   |   2 +-
 src/s_tir/analysis/estimate_flops.cc               |   2 +-
 src/s_tir/analysis/find_anchor_sblock.cc           |   1 +
 src/s_tir/analysis/identify_memcpy.cc              |   7 +-
 src/s_tir/analysis/is_pure_function.cc             |   1 +
 src/s_tir/analysis/oob_checker.cc                  |   6 +-
 .../analysis/sblock_access_region_detector.cc      |   1 +
 .../analysis/sblock_buffer_access_lca_detector.cc  |   5 +-
 src/s_tir/backend/adreno/texture_flatten.cc        |   2 +-
 src/s_tir/data_layout.cc                           |   1 -
 src/s_tir/meta_schedule/arg_info.cc                |  11 +-
 src/s_tir/meta_schedule/builder/builder.cc         |   6 +-
 src/s_tir/meta_schedule/cost_model/cost_model.cc   |   2 +-
 src/s_tir/meta_schedule/database/database.cc       |  39 +-
 src/s_tir/meta_schedule/database/database_utils.cc |   2 +-
 src/s_tir/meta_schedule/database/json_database.cc  |  10 +-
 .../meta_schedule/database/memory_database.cc      |   2 +-
 .../database/ordered_union_database.cc             |   2 +-
 .../meta_schedule/database/schedule_fn_database.cc |   2 +-
 src/s_tir/meta_schedule/database/union_database.cc |   2 +-
 src/s_tir/meta_schedule/extracted_task.cc          |   2 +-
 .../feature_extractor/feature_extractor.cc         |   2 +-
 .../feature_extractor/per_store_feature.cc         |   9 +-
 .../measure_callback/add_to_database.cc            |   2 +-
 .../measure_callback/measure_callback.cc           |   2 +-
 .../measure_callback/remove_build_artifact.cc      |   2 +-
 .../measure_callback/update_cost_model.cc          |   2 +-
 src/s_tir/meta_schedule/module_equality.cc         |   1 +
 .../mutator/mutate_compute_location.cc             |   3 +-
 src/s_tir/meta_schedule/mutator/mutate_parallel.cc |   5 +-
 .../meta_schedule/mutator/mutate_thread_binding.cc |   3 +-
 .../meta_schedule/mutator/mutate_tile_size.cc      |  15 +-
 src/s_tir/meta_schedule/mutator/mutate_unroll.cc   |   3 +-
 src/s_tir/meta_schedule/mutator/mutator.cc         |   9 +-
 .../postproc/disallow_async_strided_mem_copy.cc    |   5 +-
 .../postproc/disallow_dynamic_loop.cc              |   4 +-
 src/s_tir/meta_schedule/postproc/postproc.cc       |   2 +-
 .../postproc/rewrite_cooperative_fetch.cc          |   7 +-
 src/s_tir/meta_schedule/postproc/rewrite_layout.cc |   5 +-
 .../postproc/rewrite_parallel_vectorize_unroll.cc  |   4 +-
 .../postproc/rewrite_reduction_block.cc            |   5 +-
 .../meta_schedule/postproc/rewrite_tensorize.cc    |   6 +-
 .../postproc/rewrite_unbound_block.cc              |   4 +-
 .../meta_schedule/postproc/verify_gpu_code.cc      |   5 +-
 .../meta_schedule/postproc/verify_vtcm_limit.cc    |   4 +-
 src/s_tir/meta_schedule/profiler.cc                |   2 +-
 src/s_tir/meta_schedule/runner/runner.cc           |   8 +-
 .../meta_schedule/schedule_rule/add_rfactor.cc     |   4 +-
 .../schedule_rule/apply_custom_rule.cc             |   4 +-
 src/s_tir/meta_schedule/schedule_rule/auto_bind.cc |   4 +-
 .../meta_schedule/schedule_rule/auto_inline.cc     |  21 +-
 .../schedule_rule/cross_thread_reduction.cc        |   6 +-
 .../schedule_rule/multi_level_tiling.cc            |   6 +-
 .../schedule_rule/multi_level_tiling.h             |  12 +-
 .../multi_level_tiling_tensor_core.cc              |   9 +-
 .../multi_level_tiling_wide_vector.cc              |   2 +-
 .../multi_level_tiling_with_intrin.cc              |   2 +-
 .../schedule_rule/parallel_vectorize_unroll.cc     |   6 +-
 .../schedule_rule/random_compute_location.cc       |   3 +-
 .../meta_schedule/schedule_rule/schedule_rule.cc   |   2 +-
 .../search_strategy/evolutionary_search.cc         |   5 +-
 .../meta_schedule/search_strategy/replay_func.cc   |   4 +-
 .../meta_schedule/search_strategy/replay_trace.cc  |   4 +-
 .../search_strategy/search_strategy.cc             |   4 +-
 .../space_generator/post_order_apply.cc            |   4 +-
 .../meta_schedule/space_generator/schedule_fn.cc   |   6 +-
 .../space_generator/space_generator.cc             |   2 +-
 .../space_generator/space_generator_union.cc       |   4 +-
 .../meta_schedule/task_scheduler/gradient_based.cc |   8 +-
 .../meta_schedule/task_scheduler/round_robin.cc    |   2 +-
 .../meta_schedule/task_scheduler/task_scheduler.cc |   5 +-
 src/s_tir/meta_schedule/trace_apply.cc             |   6 +-
 src/s_tir/meta_schedule/tune_context.cc            |   7 +-
 src/s_tir/meta_schedule/utils.h                    |  24 +-
 src/s_tir/sblock_dependence_info.cc                |   2 +-
 src/s_tir/sblock_scope.cc                          |   9 +-
 src/s_tir/schedule/analysis.h                      |  24 +-
 src/s_tir/schedule/analysis/analysis.cc            |  65 ++-
 src/s_tir/schedule/analysis/layout.cc              |   1 +
 src/s_tir/schedule/analysis/reducer.cc             |   8 +-
 src/s_tir/schedule/analysis/verify.cc              |   2 +
 src/s_tir/schedule/concrete_schedule.cc            |  37 +-
 src/s_tir/schedule/concrete_schedule.h             |  22 +-
 src/s_tir/schedule/error.cc                        |   5 +-
 src/s_tir/schedule/error.h                         |   4 +-
 src/s_tir/schedule/instruction.cc                  |   5 +-
 src/s_tir/schedule/instruction_traits.h            |  26 +-
 src/s_tir/schedule/ir_comparator.cc                |   8 +-
 src/s_tir/schedule/ir_comparator.h                 |  18 +-
 src/s_tir/schedule/primitive.h                     |  44 +-
 src/s_tir/schedule/primitive/annotate.cc           |  23 +-
 .../schedule/primitive/annotate_buffer_access.cc   |   3 +-
 src/s_tir/schedule/primitive/block_annotate.cc     |   9 +-
 src/s_tir/schedule/primitive/blockize_tensorize.cc |  31 +-
 src/s_tir/schedule/primitive/cache_index.cc        |  11 +-
 .../schedule/primitive/cache_index_helpers.cc      |   3 +-
 src/s_tir/schedule/primitive/cache_index_helpers.h |   6 +-
 src/s_tir/schedule/primitive/cache_read_write.cc   |  71 ++-
 src/s_tir/schedule/primitive/compute_at.cc         |   8 +-
 src/s_tir/schedule/primitive/compute_inline.cc     |  19 +-
 src/s_tir/schedule/primitive/decompose_padding.cc  |   3 +-
 src/s_tir/schedule/primitive/for_kind.cc           |  10 +-
 src/s_tir/schedule/primitive/get_block_loop.cc     |   5 +-
 src/s_tir/schedule/primitive/hide_buffer_access.cc |   6 +-
 .../schedule/primitive/layout_transformation.cc    |  29 +-
 .../schedule/primitive/loop_transformation.cc      |  28 +-
 src/s_tir/schedule/primitive/pad_einsum.cc         |  17 +-
 src/s_tir/schedule/primitive/read_write_at.cc      |  17 +-
 src/s_tir/schedule/primitive/reduction.cc          |  21 +-
 .../schedule/primitive/reorder_block_iter_var.cc   |   4 +-
 src/s_tir/schedule/primitive/rolling_buffer.cc     |   8 +-
 src/s_tir/schedule/primitive/sampling.cc           |  44 +-
 src/s_tir/schedule/schedule.cc                     |  22 +-
 src/s_tir/schedule/state.cc                        |  35 +-
 src/s_tir/schedule/trace.cc                        |  77 +--
 src/s_tir/schedule/traced_schedule.cc              |   9 +-
 src/s_tir/schedule/transform.cc                    |  33 +-
 src/s_tir/schedule/transform.h                     |   2 +-
 src/s_tir/schedule/utils.h                         |   8 +-
 src/s_tir/transform/annotate_irregular_loop.cc     |   1 +
 src/s_tir/transform/bound_checker.cc               |   1 +
 src/s_tir/transform/canonicalize_loop.cc           |   1 +
 src/s_tir/transform/compact_buffer_region.cc       |  25 +-
 src/s_tir/transform/convert_blocks_to_opaque.cc    |   1 +
 src/s_tir/transform/default_gpu_schedule.cc        |   1 +
 src/s_tir/transform/hoist_expression.cc            |   5 +-
 src/s_tir/transform/inject_double_buffer.cc        |   1 +
 src/s_tir/transform/inject_software_pipeline.cc    |  43 +-
 src/s_tir/transform/inject_virtual_thread.cc       |   1 +
 src/s_tir/transform/lift_thread_binding.cc         |   7 +-
 src/s_tir/transform/loop_partition.cc              |  13 +-
 src/s_tir/transform/lower_async_dma.cc             |   1 +
 .../transform/lower_cross_thread_reduction.cc      |  66 +--
 src/s_tir/transform/lower_match_buffer.cc          |   1 +
 src/s_tir/transform/lower_opaque_block.cc          |   1 +
 .../manifest_shared_memory_local_stage.cc          |   5 +-
 .../transform/memhammer_intermediate_stage.cc      |  12 +-
 src/s_tir/transform/memhammer_lower_auto_copy.cc   |   5 +-
 .../transform/memhammer_tensorcore_rewrite.cc      |   6 +-
 .../transform/merge_shared_memory_allocations.cc   |   5 +-
 .../plan_update_buffer_allocation_location.cc      |   4 +-
 .../remove_weight_layout_rewrite_block.cc          |   2 +-
 src/s_tir/transform/renew_defs.cc                  |   3 +-
 src/s_tir/transform/storage_access.cc              |   1 +
 src/s_tir/transform/storage_access.h               |   2 +-
 src/s_tir/transform/tensorcore_infer_fragment.cc   |   1 +
 src/s_tir/transform/thread_storage_sync.cc         |   6 +-
 src/s_tir/transform/transform_mma_buffer_layout.cc |   3 +-
 src/s_tir/transform/unify_thread_binding.cc        |   1 +
 .../transform/using_assume_to_reduce_branches.cc   |   1 +
 src/script/ir_builder/base.cc                      |   9 +-
 src/script/ir_builder/ir/ir.cc                     |   8 +-
 src/script/ir_builder/relax/frame.cc               |   6 +-
 src/script/ir_builder/relax/ir.cc                  |  19 +-
 src/script/ir_builder/tirx/ir.cc                   |  45 +-
 src/script/ir_builder/tirx/utils.h                 |   1 +
 src/script/printer/doc.cc                          |  51 +-
 src/script/printer/doc_printer/base_doc_printer.h  |   2 +-
 src/script/printer/ir/ir.cc                        |   2 +-
 src/script/printer/ir/utils.h                      |   6 +-
 src/script/printer/ir_docsifier.cc                 |  37 +-
 src/script/printer/relax/binding.cc                |   2 +
 src/script/printer/relax/distributed.cc            |   1 +
 src/script/printer/relax/region.cc                 |   2 +
 src/script/printer/relax/struct_info.cc            |   3 +-
 src/script/printer/relax/tir.cc                    |   1 +
 src/script/printer/relax/utils.h                   |   4 +-
 src/script/printer/tirx/buffer.cc                  |   4 +-
 src/script/printer/tirx/expr.cc                    |   2 +-
 src/script/printer/tirx/function.cc                |   2 +-
 src/script/printer/tirx/stmt.cc                    |   7 +-
 src/script/printer/tirx/utils.h                    |  19 +-
 src/script/printer/utils.h                         |   5 +-
 src/support/libinfo.cc                             |   2 +-
 src/support/ordered_set.h                          |   2 -
 src/target/{source => cuda}/codegen_cuda.cc        |  57 ++
 src/target/{source => cuda}/codegen_cuda.h         |   8 +-
 src/target/cuda/cuda_fallback_module.cc            | 120 ++++
 src/target/cuda/cuda_fallback_module.h             |  86 +++
 src/target/{source => cuda}/intrin_rule_cuda.cc    |   0
 src/target/{source => cuda}/literal/cuda_half_t.h  |   0
 src/target/{source => cuda}/literal/cuda_int8_t.h  |   0
 src/target/{ => cuda}/llvm/codegen_nvptx.cc        |  16 +-
 src/target/{source => cuda}/ptx.cc                 |   0
 src/target/{source => cuda}/ptx.h                  |   0
 src/target/hexagon/hexagon_fallback_module.cc      | 130 +++++
 src/target/hexagon/hexagon_fallback_module.h       |  92 +++
 src/target/{ => hexagon}/llvm/codegen_hexagon.cc   |  27 +-
 .../{ => hexagon}/llvm/intrin_rule_hexagon.cc      |   2 +-
 src/target/llvm/codegen_llvm.cc                    |   1 +
 src/target/llvm/llvm_instance.cc                   |   1 -
 src/target/llvm/llvm_module.cc                     |   6 +-
 src/target/{source => metal}/codegen_metal.cc      |  18 +-
 src/target/{source => metal}/codegen_metal.h       |   8 +-
 src/target/{source => metal}/intrin_rule_metal.cc  |   0
 src/target/metal/metal_fallback_module.cc          | 131 +++++
 src/target/metal/metal_fallback_module.h           |  89 +++
 src/target/{source => opencl}/codegen_opencl.cc    |  21 +-
 src/target/{source => opencl}/codegen_opencl.h     |  10 +-
 .../{source => opencl}/intrin_rule_opencl.cc       |   0
 src/target/opencl/opencl_fallback_module.cc        | 128 +++++
 src/target/opencl/opencl_fallback_module.h         |  87 +++
 src/target/opt/build_cuda_off.cc                   |  24 -
 src/target/opt/build_cuda_on.cc                    | 112 ----
 src/target/opt/build_hexagon_off.cc                |  49 --
 src/target/opt/build_metal_off.cc                  |  24 -
 src/target/opt/build_opencl_off.cc                 |  53 --
 src/target/opt/build_rocm_off.cc                   |  24 -
 src/target/{ => rocm}/llvm/codegen_amdgpu.cc       |  15 +-
 src/target/{ => rocm}/llvm/intrin_rule_rocm.cc     |   4 +-
 src/target/rocm/rocm_fallback_module.cc            | 126 +++++
 src/target/rocm/rocm_fallback_module.h             |  86 +++
 src/target/source/codegen_c.cc                     |   1 +
 src/target/source/source_module.cc                 |   3 +-
 src/target/target.cc                               |  25 +-
 src/target/{spirv => vulkan}/build_vulkan.cc       |  26 +-
 src/target/{spirv => vulkan}/codegen_spirv.cc      |   0
 src/target/{spirv => vulkan}/codegen_spirv.h       |   8 +-
 src/target/{spirv => vulkan}/intrin_rule_spirv.cc  |   0
 src/target/{spirv => vulkan}/ir_builder.cc         |   0
 src/target/{spirv => vulkan}/ir_builder.h          |   6 +-
 src/target/{spirv => vulkan}/spirv_support.cc      |   0
 src/target/{spirv => vulkan}/spirv_support.h       |   6 +-
 src/target/{spirv => vulkan}/spirv_utils.cc        |   2 +-
 src/target/{spirv => vulkan}/spirv_utils.h         |   8 +-
 src/target/vulkan/vulkan_fallback_module.cc        | 125 ++++
 src/target/vulkan/vulkan_fallback_module.h         |  88 +++
 src/target/{source => webgpu}/codegen_webgpu.cc    |  72 +--
 src/target/{source => webgpu}/codegen_webgpu.h     |   8 +-
 .../{source => webgpu}/intrin_rule_webgpu.cc       |   0
 src/target/webgpu/webgpu_fallback_module.cc        | 139 +++++
 src/target/webgpu/webgpu_fallback_module.h         |  90 +++
 src/te/operation/compute_op.cc                     |   2 +-
 src/te/operation/create_primfunc.cc                |  13 +-
 src/te/operation/create_primfunc.h                 |   2 +-
 src/te/operation/graph.cc                          |   2 +-
 src/te/tensor.cc                                   |   1 +
 src/tirx/analysis/collect_call_map.cc              |   2 +-
 src/tirx/analysis/control_flow_graph.cc            |   1 +
 src/tirx/analysis/var_use_def_analysis.cc          |   1 +
 src/tirx/analysis/verify_ssa.cc                    |   1 +
 src/tirx/analysis/verify_well_formed.cc            |   9 +-
 src/tirx/ir/data_type_rewriter.cc                  |   3 +-
 src/tirx/ir/expr.cc                                |  37 +-
 src/tirx/ir/expr_functor.cc                        |   1 +
 src/tirx/ir/function.cc                            |   2 +-
 src/tirx/ir/index_map.cc                           |   7 +-
 src/tirx/ir/py_functor.cc                          |  48 +-
 src/tirx/ir/specialize.cc                          |   5 +-
 src/tirx/ir/stmt.cc                                |  27 +-
 src/tirx/ir/stmt_functor.cc                        |  44 +-
 src/tirx/ir/tir_visitor_with_path.h                |   2 +-
 src/tirx/ir/transform.cc                           |   2 +-
 src/tirx/transform/annotate_device_regions.cc      |   1 +
 src/tirx/transform/bind_target.cc                  |   1 +
 src/tirx/transform/common_subexpr_elim.cc          |   5 +-
 src/tirx/transform/flatten_buffer.cc               |   5 +-
 src/tirx/transform/force_narrow_index_to_i32.cc    |   1 +
 src/tirx/transform/inline_private_functions.cc     |   4 +-
 src/tirx/transform/ir_utils.cc                     |   3 +-
 src/tirx/transform/ir_utils.h                      |   2 +-
 src/tirx/transform/lower_custom_datatypes.cc       |   3 +-
 src/tirx/transform/lower_device_kernel_launch.cc   |   1 +
 src/tirx/transform/lower_intrin.cc                 |   1 +
 src/tirx/transform/lower_tvm_builtin.cc            |   1 +
 src/tirx/transform/lower_warp_memory.cc            |   1 +
 src/tirx/transform/make_packed_api.cc              |   1 +
 src/tirx/transform/narrow_datatype.cc              |   1 +
 src/tirx/transform/primfunc_utils.cc               |   1 +
 src/tirx/transform/remove_no_op.cc                 |   1 +
 src/tirx/transform/replace_global_vars.cc          |   2 +-
 src/tirx/transform/simplify.cc                     |   1 +
 src/tirx/transform/storage_rewrite.cc              |  19 +-
 src/tirx/transform/tvm_ffi_binder.cc               |   3 +-
 src/tirx/transform/unroll_loop.cc                  |   1 +
 src/tirx/transform/unsupported_dtype_legalize.cc   |  25 +-
 src/tirx/transform/update_pointer_storage_scope.cc |   1 +
 src/tirx/transform/vectorize_loop.cc               |   1 +
 tests/cpp/expr_test.cc                             |   5 +-
 tests/cpp/ir_functor_test.cc                       |  10 +-
 tests/cpp/object_protocol_test.cc                  |  17 +-
 tests/cpp/random_engine_test.cc                    |   8 +-
 tests/cpp/target_test.cc                           |   2 +-
 tests/cpp/threading_backend_test.cc                |   3 +-
 tests/python/codegen/test_target_codegen_cuda.py   |  38 +-
 tests/python/codegen/test_target_codegen_metal.py  |  26 +
 tests/python/codegen/test_target_codegen_opencl.py |  38 ++
 tests/python/codegen/test_target_codegen_rocm.py   |  39 ++
 tests/python/codegen/test_target_codegen_vulkan.py |  26 +
 tests/python/relax/test_frontend_tflite.py         | 627 +++++++++++++++++++++
 tests/python/s_tir/dlight/test_gpu_gemv.py         |  32 ++
 .../python/s_tir/dlight/test_gpu_low_batch_gemv.py |  26 +
 635 files changed, 6124 insertions(+), 4154 deletions(-)
 create mode 100644 include/tvm/ir/cow.h
 rename include/tvm/{support/with.h => ir/with_context.h} (97%)
 delete mode 100644 include/tvm/runtime/builtin_fp16.h
 delete mode 100644 include/tvm/runtime/int_tuple.h
 delete mode 100644 include/tvm/runtime/object.h
 rename include/tvm/{support => s_tir}/random_engine.h (96%)
 delete mode 100644 src/runtime/cuda/cuda_module.h
 delete mode 100644 src/runtime/hexagon/hexagon_module.h
 delete mode 100644 src/runtime/metal/metal_module.h
 delete mode 100644 src/runtime/opencl/opencl_module.h
 delete mode 100644 src/runtime/opencl/opencl_module_spirv.cc
 delete mode 100644 src/runtime/rocm/rocm_module.h
 delete mode 100644 src/runtime/spirv/spirv_shader.h
 rename {include/tvm => src}/runtime/threading_backend.h (100%)
 delete mode 100644 src/runtime/vulkan/vulkan_module.h
 rename src/target/{source => cuda}/codegen_cuda.cc (96%)
 rename src/target/{source => cuda}/codegen_cuda.h (97%)
 create mode 100644 src/target/cuda/cuda_fallback_module.cc
 create mode 100644 src/target/cuda/cuda_fallback_module.h
 rename src/target/{source => cuda}/intrin_rule_cuda.cc (100%)
 rename src/target/{source => cuda}/literal/cuda_half_t.h (100%)
 rename src/target/{source => cuda}/literal/cuda_int8_t.h (100%)
 rename src/target/{ => cuda}/llvm/codegen_nvptx.cc (95%)
 rename src/target/{source => cuda}/ptx.cc (100%)
 rename src/target/{source => cuda}/ptx.h (100%)
 create mode 100644 src/target/hexagon/hexagon_fallback_module.cc
 create mode 100644 src/target/hexagon/hexagon_fallback_module.h
 rename src/target/{ => hexagon}/llvm/codegen_hexagon.cc (95%)
 rename src/target/{ => hexagon}/llvm/intrin_rule_hexagon.cc (99%)
 rename src/target/{source => metal}/codegen_metal.cc (95%)
 rename src/target/{source => metal}/codegen_metal.h (94%)
 rename src/target/{source => metal}/intrin_rule_metal.cc (100%)
 create mode 100644 src/target/metal/metal_fallback_module.cc
 create mode 100644 src/target/metal/metal_fallback_module.h
 rename src/target/{source => opencl}/codegen_opencl.cc (97%)
 rename src/target/{source => opencl}/codegen_opencl.h (94%)
 rename src/target/{source => opencl}/intrin_rule_opencl.cc (100%)
 create mode 100644 src/target/opencl/opencl_fallback_module.cc
 create mode 100644 src/target/opencl/opencl_fallback_module.h
 delete mode 100644 src/target/opt/build_cuda_off.cc
 delete mode 100644 src/target/opt/build_cuda_on.cc
 delete mode 100644 src/target/opt/build_hexagon_off.cc
 delete mode 100644 src/target/opt/build_metal_off.cc
 delete mode 100644 src/target/opt/build_opencl_off.cc
 delete mode 100644 src/target/opt/build_rocm_off.cc
 rename src/target/{ => rocm}/llvm/codegen_amdgpu.cc (96%)
 rename src/target/{ => rocm}/llvm/intrin_rule_rocm.cc (99%)
 create mode 100644 src/target/rocm/rocm_fallback_module.cc
 create mode 100644 src/target/rocm/rocm_fallback_module.h
 rename src/target/{spirv => vulkan}/build_vulkan.cc (55%)
 rename src/target/{spirv => vulkan}/codegen_spirv.cc (100%)
 rename src/target/{spirv => vulkan}/codegen_spirv.h (97%)
 rename src/target/{spirv => vulkan}/intrin_rule_spirv.cc (100%)
 rename src/target/{spirv => vulkan}/ir_builder.cc (100%)
 rename src/target/{spirv => vulkan}/ir_builder.h (99%)
 rename src/target/{spirv => vulkan}/spirv_support.cc (100%)
 rename src/target/{spirv => vulkan}/spirv_support.h (98%)
 rename src/target/{spirv => vulkan}/spirv_utils.cc (99%)
 rename src/target/{spirv => vulkan}/spirv_utils.h (88%)
 create mode 100644 src/target/vulkan/vulkan_fallback_module.cc
 create mode 100644 src/target/vulkan/vulkan_fallback_module.h
 rename src/target/{source => webgpu}/codegen_webgpu.cc (93%)
 rename src/target/{source => webgpu}/codegen_webgpu.h (95%)
 rename src/target/{source => webgpu}/intrin_rule_webgpu.cc (100%)
 create mode 100644 src/target/webgpu/webgpu_fallback_module.cc
 create mode 100644 src/target/webgpu/webgpu_fallback_module.h


Reply via email to