https://gcc.gnu.org/bugzilla/show_bug.cgi?id=85804

            Bug ID: 85804
           Summary: [8/9 Regression][AArch64] Mis-compilation of loop with
                    strided array access and xor reduction
           Product: gcc
           Version: 8.1.1
            Status: UNCONFIRMED
          Severity: normal
          Priority: P3
         Component: tree-optimization
          Assignee: unassigned at gcc dot gnu.org
          Reporter: sudi at gcc dot gnu.org
  Target Milestone: ---

The following test case:

#include <stdio.h>

long d[32] = {0, 0, 1, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
              0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0};

int main() {
  int b = 0;
  for (int c = 0; c <= 5; c++)
    b ^= d[c * 5 + 1];
  printf("checksum = %x\n", b);
}

when compiled with:
aarch64-none-linux-gnu-gcc -O3 f.c
prints "checksum = 1".

All the elements being xor'd (1,6,11,16,21,26) are 0s and thus the result
should also be 0.

The assembly for main looks like:
main:
.LFB11:
        .cfi_startproc
        adrp    x1, .LANCHOR0
        add     x1, x1, :lo12:.LANCHOR0
        stp     x29, x30, [sp, -16]!
        .cfi_def_cfa_offset 16
        .cfi_offset 29, -16
        .cfi_offset 30, -8
        adrp    x0, .LC0
        add     x0, x0, :lo12:.LC0
        mov     x29, sp
        ldr     q1, [x1, 8]
        ldr     q2, [x1, 24]
        ldr     q0, [x1, 40]
        xtn     v2.2s, v2.2d
        xtn     v1.2s, v1.2d
        xtn     v0.2s, v0.2d
        eor     v1.8b, v1.8b, v2.8b
        eor     v0.8b, v0.8b, v1.8b
        ushr d1, d0, 32
        eor     v0.8b, v0.8b, v1.8b
        umov    w1, v0.s[0]
        bl      printf
        mov     w0, 0
        ldp     x29, x30, [sp], 16
        .cfi_restore 30
        .cfi_restore 29
        .cfi_def_cfa_offset 0
        ret
        .cfi_endproc


This goes away with -fno-tree-loop-vectorize.

Reply via email to