https://gcc.gnu.org/bugzilla/show_bug.cgi?id=88836

            Bug ID: 88836
           Summary: [SVE] Redundant PTEST in loop test
           Product: gcc
           Version: unknown
            Status: UNCONFIRMED
          Severity: enhancement
          Priority: P3
         Component: target
          Assignee: unassigned at gcc dot gnu.org
          Reporter: rsandifo at gcc dot gnu.org
  Target Milestone: ---

Compiling this code with -O3 -march=armv8-a+sve:

void
f (int *restrict x, int *restrict y, int *restrict z, int n)
{
  for (int i = 0; i < n; i += 2)
    {
      x[i] = y[i] + z[i];
      x[i + 1] = y[i + 1] - z[i + 1];
    }
}

gives:

f:
.LFB0:
        .cfi_startproc
        cmp     w3, 0
        ble     .L1
        sub     w4, w3, #1
        cntw    x3
        ptrue   p1.s, all
        lsr     w4, w4, 1
        add     w4, w4, 1
        whilelo p0.s, xzr, x4
        .p2align 3,,7
.L3:
        ld2w    {z4.s - z5.s}, p0/z, [x1]
        ld2w    {z2.s - z3.s}, p0/z, [x2]
        add     z0.s, z4.s, z2.s
        sub     z1.s, z5.s, z3.s
        st2w    {z0.s - z1.s}, p0, [x0]
        incb    x1, all, mul #2
        whilelo p0.s, x3, x4
        incb    x0, all, mul #2
        incb    x2, all, mul #2
        incw    x3
        ptest   p1, p0.b
        bne     .L3
.L1:
        ret
        .cfi_endproc

PR88834 is tracking the poor addressing mode choices.  But there's also no need
for that PTEST.  We should be able to use the flags set by the WHILELO
directly.

Reply via email to