Re: Infering that the condition of a for loop is initially true?

Niels Möller Mon, 18 Sep 2017 04:12:07 -0700

Jeff Law <l...@redhat.com> writes:

>> 1. Does gcc attempt to do this optimization? 
> Yes.  It happens as a side effect of jump threading and there are also
> dedicated passes to rotate the loop.
>
>> 
>> 2. If it does, how often does it succeed on loops in real programs?
> Often.  The net benefit is actually small though and sometimes this kind
> of loop rotation can impede vectorization.


Thanks for the info.

>> 3. Can I help the compiler to do that inference?
> In general, I'd advise against it.  You end up with ugly code which
> works with specific versions of the compiler, but which needs regular
> tweaking as the internal implementations of various optimizers change
> over time.

What would make sense to me is some annotation of valid range of
variables, if it can be done in a way which is friendly to both compiler
and humans.

> I'd have to have a self-contained example to dig into what's really
> going on, but my suspicion is either overflow or fairly weak range data
> and simplification due to the symbolic ranges.

Self-contained example below (the #define of GMP_NUMB_BITS must be
manually changed if tested on some architecture where long isn't 64
bits).

Actually, I see now that the mn variable is read from a struct field of
type unsigned short. So as long as size_t (or unsigned long) is larger
than unsigned short, the expression 2*mn can't overflow, and a compiler
tracking possible range of mn as the range of unsigned short should be
able to infer that the loop condition of the second loop is initially
true.

To make the same inference for the first loop (which doesn't matter for
validity of the hi variable), the compiler would also need to be told
that bn > 0.

Regards,
/Niels

typedef unsigned long mp_size_t;
typedef unsigned long mp_limb_t;
typedef mp_limb_t *mp_ptr;
typedef const mp_limb_t *mp_srcptr;
/* Must match szie of mp_limb_t */
#define GMP_NUMB_BITS 64

#define assert(x) do {} while(0)

mp_limb_t mpn_addmul_1 (mp_ptr, mp_srcptr, mp_size_t, mp_limb_t);
mp_limb_t mpn_add_n (mp_ptr, mp_srcptr, mp_srcptr, mp_size_t);
mp_limb_t
sec_add_1 (mp_limb_t *rp, mp_limb_t *ap, mp_size_t n, mp_limb_t b);
#define cnd_add_n(cnd, rp, ap, n) mpn_addmul_1 ((rp), (ap), (n), (cnd) != 0)

struct ecc_modulo
{
  unsigned short bit_size;
  unsigned short size;
  unsigned short B_size;

  const mp_limb_t *m;
  /* B^size mod m. Expected to have at least 32 leading zeros
     (equality for secp_256r1). */
  const mp_limb_t *B;
  /* 2^{bit_size} - p, same value as above, but shifted. */
  const mp_limb_t *B_shifted;
};

/* Computes r mod m, input 2*m->size, output m->size. */
void
ecc_mod (const struct ecc_modulo *m, mp_limb_t *rp)
{
  mp_limb_t hi;
  mp_size_t mn = m->size;
  mp_size_t bn = m->B_size;
  mp_size_t sn = mn - bn;
  mp_size_t rn = 2*mn;
  mp_size_t i;
  unsigned shift;

  assert (bn < mn);

  /* FIXME: Could use mpn_addmul_2. */
  /* Eliminate sn limbs at a time */
  if (m->B[bn-1] < ((mp_limb_t) 1 << (GMP_NUMB_BITS - 1)))
    {
      /* Multiply sn + 1 limbs at a time, so we get a mn+1 limb
         product. Then we can absorb the carry in the high limb */
      while (rn > 2 * mn - bn)
        {
          rn -= sn;

          for (i = 0; i <= sn; i++)
            rp[rn+i-1] = mpn_addmul_1 (rp + rn - mn - 1 + i, m->B, bn, 
rp[rn+i-1]);
          rp[rn-1] = rp[rn+sn-1]
            + mpn_add_n (rp + rn - sn - 1, rp + rn - sn - 1, rp + rn - 1, sn);
        }
      goto final_limbs;
    }
  else
    {
      /* The loop below always runs at least once. But the analyzer
         doesn't realize that, and complains about hi being used later
         on without a well defined value. */
#ifdef __clang_analyzer__
      hi = 0;
#endif
      while (rn >= 2 * mn - bn)
        {
          rn -= sn;

          for (i = 0; i < sn; i++)
            rp[rn+i] = mpn_addmul_1 (rp + rn - mn + i, m->B, bn, rp[rn+i]);
                                     
          hi = mpn_add_n (rp + rn - sn, rp + rn - sn, rp + rn, sn);
          hi = cnd_add_n (hi, rp + rn - mn, m->B, mn);
          assert (hi == 0);
        }
    }

  if (rn > mn)
    {
    final_limbs:
      sn = rn - mn;
      
      for (i = 0; i < sn; i++)
        rp[mn+i] = mpn_addmul_1 (rp + i, m->B, bn, rp[mn+i]);

      hi = mpn_add_n (rp + bn, rp + bn, rp + mn, sn);
      hi = sec_add_1 (rp + bn + sn, rp + bn + sn, mn - bn - sn, hi);
    }

  shift = m->size * GMP_NUMB_BITS - m->bit_size;
  if (shift > 0)
    {
      /* Combine hi with top bits, add in */
      hi = (hi << shift) | (rp[mn-1] >> (GMP_NUMB_BITS - shift));
      rp[mn-1] = (rp[mn-1] & (((mp_limb_t) 1 << (GMP_NUMB_BITS - shift)) - 1))
        + mpn_addmul_1 (rp, m->B_shifted, mn-1, hi);
    }
  else
    {
      hi = cnd_add_n (hi, rp, m->B_shifted, mn);
      assert (hi == 0);
    }
}



-- 
Niels Möller. PGP-encrypted email is preferred. Keyid 368C6677.
Internet email is subject to wholesale government surveillance.

Re: Infering that the condition of a for loop is initially true?

Reply via email to