https://gcc.gnu.org/bugzilla/show_bug.cgi?id=86901

            Bug ID: 86901
           Summary: [AArch64] Suboptimal register allocation for int/float
                    reinterpret
           Product: gcc
           Version: 9.0
            Status: UNCONFIRMED
          Severity: normal
          Priority: P3
         Component: middle-end
          Assignee: unassigned at gcc dot gnu.org
          Reporter: wilco at gcc dot gnu.org
  Target Milestone: ---

This example (narrowed down from GLIBC) shows inefficient register allocation:

typedef unsigned int uint32_t;

float g (float);

static inline uint32_t
top12 (float x)
{
  union
  {
    float f;
    uint32_t i;
  } u = {x};
  return (u.i >> 20) & 0x7ff;
}

void
f1 (float y, float *p)
{
  if (__builtin_expect (top12 (y) < top12 (1.0), 1))
    *p = y * y;
  else
    g (y + y);
}

void
f2 (float y, float *p)
{
  if (__builtin_expect (top12 (y) < top12 (1.0), 1))
    *p = y * y;
  else
    g (y);
}

On AArch64 this generates with -O2:

f1:
        fmov    x1, d0
        ubfx    x1, x1, 20, 11
        cmp     w1, 1015
        bhi     .L2
        fmul    s0, s0, s0
        str     s0, [x0]
        ret
.L2:
        fadd    s0, s0, s0
        b       g

f2:
        .cfi_startproc
        fmov    s1, s0   // eh?
        fmov    x1, d1   // why not fmov w1, s0???
        ubfx    x1, x1, 20, 11
        cmp     w1, 1015
        bhi     .L7
        fmul    s1, s0, s0
        str     s1, [x0]
        ret
.L7:
        b       g

Also the move is done as 64 bits rather than 32.

Reply via email to