https://gcc.gnu.org/bugzilla/show_bug.cgi?id=98581

            Bug ID: 98581
           Summary: unexpected reassociation for umin/umax ?
           Product: gcc
           Version: unknown
            Status: UNCONFIRMED
          Keywords: missed-optimization
          Severity: normal
          Priority: P3
         Component: tree-optimization
          Assignee: unassigned at gcc dot gnu.org
          Reporter: ktkachov at gcc dot gnu.org
  Target Milestone: ---

typedef signed int *__restrict__ pSINT;
typedef unsigned int *__restrict__ pUINT;

#define MIN(a, b) ((a) < (b) ? (a) : (b))
#define MAX(a, b) ((a) > (b) ? (a) : (b))

void saba_s (pSINT a, pSINT b, pSINT c)
{
  int i;
  for (i = 0; i < 4; i++)
    c[i] += (MAX (a[i], b[i]) - MIN (a[i], b[i]));
}

void saba_u (pUINT a, pUINT b, pUINT c)
{
  int i;
  for (i = 0; i < 4; i++)
    c[i] += (MAX (a[i], b[i]) - MIN (a[i], b[i]));
}

On aarch64 at -O3 generates:
saba_s:
        ldr     q0, [x0]
        ldr     q1, [x1]
        ldr     q2, [x2]
        sabd    v0.4s, v0.4s, v1.4s
        add     v0.4s, v0.4s, v2.4s
        str     q0, [x2]
        ret

saba_u:
        ldr     q1, [x0]
        ldr     q2, [x1]
        ldr     q3, [x2]
        umax    v0.4s, v1.4s, v2.4s
        umin    v1.4s, v1.4s, v2.4s
        add     v0.4s, v0.4s, v3.4s
        sub     v0.4s, v0.4s, v1.4s
        str     q0, [x2]
        ret

I would expect the (MAX (a[i], b[i]) - MIN (a[i], b[i])) part to match a uabd
instruction for the unsigned case, but it looks like the add and sub operations
are swapped which prevents the RTL pattern matching the operation.
This comes out this way out of GIMPLE. At expand the signed version is:
  vect__4.6_40 = MEM <vector(4) int> [(int *)c_16(D)];
  vect__6.9_37 = MEM <vector(4) int> [(int *)b_17(D)];
  vect__8.12_34 = MEM <vector(4) int> [(int *)a_18(D)];
  vect__9.13_33 = MAX_EXPR <vect__8.12_34, vect__6.9_37>;
  vect__10.14_32 = MIN_EXPR <vect__8.12_34, vect__6.9_37>;
  vect__11.15_31 = vect__9.13_33 - vect__10.14_32;
  vect__12.16_30 = vect__11.15_31 + vect__4.6_40;
  MEM <vector(4) int> [(int *)c_16(D)] = vect__12.16_30;
  return;


the unsigned is:
  vect__4.25_38 = MEM <vector(4) unsigned int> [(unsigned int *)c_16(D)];
  vect__6.28_35 = MEM <vector(4) unsigned int> [(unsigned int *)b_17(D)];
  vect__8.31_32 = MEM <vector(4) unsigned int> [(unsigned int *)a_18(D)];
  vect__9.32_31 = MAX_EXPR <vect__8.31_32, vect__6.28_35>;
  vect__10.33_30 = MIN_EXPR <vect__8.31_32, vect__6.28_35>;
  vect__13.34_29 = vect__9.32_31 + vect__4.25_38;
  vect__12.35_28 = vect__13.34_29 - vect__10.33_30;
  MEM <vector(4) unsigned int> [(unsigned int *)c_16(D)] = vect__12.35_28;
  return;

Reply via email to