Issue 120733
Summary [mlir] tiling: Invalid slice when #map(0) != 0
Labels mlir:linalg, mlir
Assignees
Reporter mgehre-amd
    In the [reproducer](https://godbolt.org/z/fhen1svYd),
```
func.func @test(%arg0 : tensor<9xf32>) -> tensor<6xf32> {
  %empty = tensor.empty() : tensor<6xf32>
  %generic = linalg.generic
    {indexing_maps = [affine_map<(d0) -> (d0 + 3)>,
 affine_map<(d0) -> (d0)>],
     iterator_types = ["parallel"]} ins(%arg0: tensor<9xf32>) outs(%empty : tensor<6xf32>) {
    ^bb0(%in : f32, %out: f32):
      linalg.yield %in : f32
    } -> tensor<6xf32>
  return %generic : tensor<6xf32>
}
module attributes {transform.with_named_sequence} {
  transform.named_sequence @__transform_main(%arg1: !transform.any_op {transform.readonly}) {
    %0 = transform.structured.match ops{["linalg.generic"]} in %arg1 : (!transform.any_op) -> !transform.any_op
    %1, %loop = transform.structured.tile_using_for %0 tile_sizes [3] : (!transform.any_op) -> (!transform.any_op, !transform.any_op)
    transform.yield
 }
}
```
becomes
```
#map = affine_map<(d0) -> (d0 + 3)>
#map1 = affine_map<(d0) -> (d0)>
module {
  func.func @test(%arg0: tensor<9xf32>) -> tensor<6xf32> {
    %0 = tensor.empty() : tensor<6xf32>
    %c0 = arith.constant 0 : index
    %c6 = arith.constant 6 : index
    %c3 = arith.constant 3 : index
    %1 = scf.for %arg1 = %c0 to %c6 step %c3 iter_args(%arg2 = %0) -> (tensor<6xf32>) {
      %2 = affine.apply #map(%arg1)
      %extracted_slice = tensor.extract_slice %arg0[%2] [6] [1] : tensor<9xf32> to tensor<6xf32>
      %extracted_slice_0 = tensor.extract_slice %arg2[%arg1] [3] [1] : tensor<6xf32> to tensor<3xf32>
 %3 = linalg.generic {indexing_maps = [#map, #map1], iterator_types = ["parallel"]} ins(%extracted_slice : tensor<6xf32>) outs(%extracted_slice_0 : tensor<3xf32>) {
      ^bb0(%in: f32, %out: f32):
        linalg.yield %in : f32
      } -> tensor<3xf32>
      %inserted_slice = tensor.insert_slice %3 into %arg2[%arg1] [3] [1] : tensor<3xf32> into tensor<6xf32>
      scf.yield %inserted_slice : tensor<6xf32>
    }
 return %1 : tensor<6xf32>
  }
```
This accesses out-of-bounds. `%2 = affine.apply #map(%arg1)` is `%arg1 + 3` (which takes the values `3` and `6` for the two loop iterations`
and `%extracted_slice = tensor.extract_slice %arg0[%2] [6] [1] : tensor<9xf32> to tensor<6xf32>` will extract `6` elements from that offset,
which tries to extract elements `6` to `12` in the second iteration - but the tensor only has 9 elements.

It seems that the implemenation computing the slices is only correct when `#map(0)=0`. The correct offset for the extract slice would be `#map(%arg1) - #map(0)`, which here is `%arg1`.
_______________________________________________
llvm-bugs mailing list
llvm-bugs@lists.llvm.org
https://lists.llvm.org/cgi-bin/mailman/listinfo/llvm-bugs

Reply via email to