https://gcc.gnu.org/bugzilla/show_bug.cgi?id=118499

--- Comment #24 from Thomas Koenig <tkoenig at gcc dot gnu.org> ---
Considering that ctz is rather expensive, comparable to an
integer multiplication, I think I will do away with this
optimization altogether - we are spending log2(n) imuls anyway.

I think the library version should look something like (where I know
that the first n == 0 comparison is not needed, but I had it in
for testing purposes).

#define XTYPE uint64_t
#define NTYPE uint32_t

XTYPE static inline
power_simple (XTYPE x, NTYPE n)
{
  XTYPE pow = 1;
  if (n == 0)
    return 1;

  for (;;)
    {
      if (n & 1)
        pow *= x;
      n >>= 1;
      if (n)
        x *= x;
      else
        break;
    }
  return pow;
}

XTYPE
power_complicated (XTYPE x, NTYPE n)
{
  const XTYPE mask = (XTYPE) (-1) / 2;
  if (n == 0)
    return 1;

  if  (x == 0)
    return 0;

  if (x & 1)
    return power_simple (x, n & mask);

  if (n > sizeof (x) * 8)
    return 0;

  return power_simple (x, n);
}

Reply via email to