Issue |
101709
|
Summary |
[mlir] Bufferization issue after tensor.insert_slice
|
Labels |
mlir
|
Assignees |
|
Reporter |
n-io
|
The `--one-shot-bufferize` pass lowers `tensor.insert_slice` to `memref.subview`, but also allocates an extra buffer and copies it into the subview. Instead, it could use the subview and save both an alloc and a copy.
The following has a `linalg.add` followed by a `tensor.insert_slice` in a partially bufferized custom dialect:
```
builtin.module {
%cst = arith.constant dense<1.234500e-01> : tensor<8xf32>
"mydialect.func"() <{function_type = (memref<16xf32>, index) -> memref<16xf32>, sym_name = "test"}> ({
^bb0(%res: memref<16xf32>, %offset: index):
%val = "mydialect.test_op"() : () -> (memref<8xf32>)
%0 = bufferization.to_tensor %val restrict : memref<8xf32>
%res_t = bufferization.to_tensor %res restrict writable : memref<16xf32>
%1 = linalg.add ins(%0, %cst : tensor<8xf32>, tensor<8xf32>) outs(%0 : tensor<8xf32>) -> tensor<8xf32>
%2 = tensor.insert_slice %1 into %res_t[%offset] [8] [1] : tensor<8xf32> into tensor<16xf32>
%3 = bufferization.to_memref %2 : memref<16xf32>
"mydialect.return"(%3) : (memref<16xf32>) -> ()
}) : () -> ()
}
```
Running `mlir-opt -allow-unregistered-dialect --one-shot-bufferize="allow-unknown-ops"` yields:
```
module {
memref.global "private" constant @__constant_8xf32 : memref<8xf32> = dense<1.234500e-01> {alignment = 64 : i64}
%0 = memref.get_global @__constant_8xf32 : memref<8xf32>
"mydialect.func"() <{function_type = (memref<16xf32>, index) -> memref<16xf32>, sym_name = "test"}> ({
^bb0(%arg0: memref<16xf32>, %arg1: index):
%1 = "mydialect.test_op"() : () -> memref<8xf32>
%alloc = memref.alloc() {alignment = 64 : i64} : memref<8xf32>
linalg.add ins(%1, %0 : memref<8xf32>, memref<8xf32>) outs(%alloc : memref<8xf32>)
%subview = memref.subview %arg0[%arg1] [8] [1] : memref<16xf32> to memref<8xf32, strided<[1], offset: ?>>
memref.copy %alloc, %subview : memref<8xf32> to memref<8xf32, strided<[1], offset: ?>>
"mydialect.return"(%arg0) : (memref<16xf32>) -> ()
}) : () -> ()
}
```
It'd be great if it could move up and use the `%subview` instead of the new `%alloc`. I'm not sure if this can be achieved with the existing passes?
_______________________________________________
llvm-bugs mailing list
llvm-bugs@lists.llvm.org
https://lists.llvm.org/cgi-bin/mailman/listinfo/llvm-bugs