https://gcc.gnu.org/bugzilla/show_bug.cgi?id=105030

            Bug ID: 105030
           Summary: store motion if-change flag causes if-conversion
                    optimization can't be taken.
           Product: gcc
           Version: 12.0
            Status: UNCONFIRMED
          Severity: normal
          Priority: P3
         Component: tree-optimization
          Assignee: unassigned at gcc dot gnu.org
          Reporter: guihaoc at gcc dot gnu.org
  Target Milestone: ---

// source code
extern void bar (double *, int);

void foo (double a[], int n)
{
  double atemp = 0.5;
  for (int i = 0; i < n; i++)
    if (a[i] < atemp)
      atemp = a[i];
  bar (&atemp, n);
}

// -O3 -fdump-tree-lim2
  if (_4 < atemp.0_5)
    goto <bb 4>; [50.00%]
  else
    goto <bb 5>; [50.00%]

  <bb 4> [local count: 477815112]:
  atemp_lsm.4_24 = _4;
  atemp_lsm_flag.5_25 = 1;

It creates the lsm flag in lim2 pass. So the "then" block has two sets which
blocks the if-conversion optimization.

//assemble -O3 -ffast-math -fno-unroll-loops on ppc64le
.L5:
        lfd 0,0(3)
        addi 3,3,8
        fcmpu 0,12,0
        ble 0,.L3
        fmr 12,0
        li 9,1
.L3:
        bdnz .L5
        andi. 9,9,0x1
        beq 0,.L2
        stfd 12,32(1)

Inefficient fcmpu is used. If the source code is tweaked as below, the
efficient xvmindp is generated.

// tweaked source code
extern void bar (double *, int);

void foo (double a[], int n)
{
  double atemp = 0.5;
  for (int i = 0; i < n; i++)
    if (a[i] < atemp)
      atemp = a[i];
  double btemp = atemp;
  bar (&btemp, n);
}

//assembly
.L4:
        lxv 0,0(9)
        addi 9,9,16
        xvmindp 12,12,0
        bdnz .L4

Reply via email to