Issue 170848
Summary [LoopUnrollAndJam] Invalid transformation when there is a self loop-carried dependency.
Labels loopoptim
Assignees
Reporter kasuga-fj
    godbolt: https://godbolt.org/z/4nxz9K4re

Unroll-and-jam is applied to the following LLVM IR:

```llvm
define void @f(ptr noalias %A, ptr noalias %B) {
entry:
  br label %loop.i.header

loop.i.header:
  %i = phi i64 [ 0, %entry ], [ %i.inc, %loop.i.latch ]
  %i.4 = mul i64 %i, 4
  br label %loop.j

loop.j:
  %j = phi i64 [ 0, %loop.i.header ], [ %j.inc, %loop.j ]
  %gep.B = getelementptr [16 x i8], ptr %B, i64 %i, i64 %j
  %val = load i8, ptr %gep.B
  %offset.A = add i64 %i.4, %j
  %gep.A = getelementptr i8, ptr %A, i64 %offset.A
  store i8 %val, ptr %gep.A
  %j.inc = add i64 %j, 1
  %ec.j = icmp eq i64 %j.inc, 16
  br i1 %ec.j, label %loop.i.latch, label %loop.j

loop.i.latch:
  %i.inc = add i64 %i, 1
  %ec.i = icmp eq i64 %i.inc, 32
  br i1 %ec.i, label %exit, label %loop.i.header

exit:
 ret void
}
```

This transformation breaks the original semantics. Consider the following pseudo code. For example, the value stored in `A[12]` will be from `B[3][0]` in the original loop, but from `B[0][12]` in the transformed one.

```c
A[256];
B[32][16];

// Original code
for (i = 0; i < 32; i++)
  for (j = 0; j < 16; j++)
    A[4*i + j] = B[i][j];

// Unroll-and-jammed (count = 4)
for (i = 0; i < 8; i++)
  for (j = 0; j < 16; j++) {
    A[4*(i*4+0) + j] = B[i*4+0][j];
    A[4*(i*4+1) + j] = B[i*4+1][j];
    A[4*(i*4+2) + j] = B[i*4+2][j];
    A[4*(i*4+3) + j] = B[i*4+3][j];
  }
```

Apparently the legality check ignores loop-carried dependency between the same instruction.
https://github.com/llvm/llvm-project/blob/64e3bcdd1ff18aebf32156f7bf4f89d3f320447c/llvm/lib/Transforms/Utils/LoopUnrollAndJam.cpp#L691-L698
_______________________________________________
llvm-bugs mailing list
[email protected]
https://lists.llvm.org/cgi-bin/mailman/listinfo/llvm-bugs

Reply via email to