On Mon, Sep 04, 2017 at 02:34:49PM +0000, Burakov, Anatoly wrote:
> > From: dev [mailto:dev-boun...@dpdk.org] On Behalf Of Pavan Nikhilesh
> > Sent: Sunday, September 3, 2017 1:36 PM
> > To: dev@dpdk.org
> > Cc: Dumitrescu, Cristian <cristian.dumitre...@intel.com>;
> > step...@networkplumber.org; Pavan Nikhilesh
> > <pbhagavat...@caviumnetworks.com>
> > Subject: [dpdk-dev] [PATCH v3 2/3] eal: add u64 bit variant for reciprocal
> >
> > Currently, rte_reciprocal only supports unsigned 32bit divisors. This commit
> > adds support for unsigned 64bit divisors.
> >
> > Rename unsigned 32bit specific functions appropriately and update
> > librte_sched accordingly.
> >
> > Signed-off-by: Pavan Nikhilesh <pbhagavat...@caviumnetworks.com>
> > ---
> >  lib/librte_eal/bsdapp/eal/rte_eal_version.map   |   3 +-
> >  lib/librte_eal/common/include/rte_reciprocal.h  | 111
> > ++++++++++++++++++++--
> >  lib/librte_eal/common/rte_reciprocal.c          | 120
> > +++++++++++++++++++++---
> >  lib/librte_eal/linuxapp/eal/rte_eal_version.map |   3 +-
> >  lib/librte_sched/Makefile                       |   4 +-
> >  lib/librte_sched/rte_sched.c                    |   9 +-
> >  6 files changed, 222 insertions(+), 28 deletions(-)
> >
> > diff --git a/lib/librte_eal/bsdapp/eal/rte_eal_version.map
> > b/lib/librte_eal/bsdapp/eal/rte_eal_version.map
> > index d0bda66..5fd6101 100644
> > --- a/lib/librte_eal/bsdapp/eal/rte_eal_version.map
> > +++ b/lib/librte_eal/bsdapp/eal/rte_eal_version.map
> > @@ -242,6 +242,7 @@ EXPERIMENTAL {
> >  DPDK_17.11 {
> >     global:
> >
> > -   rte_reciprocal_value;
> > +   rte_reciprocal_value_u32;
> > +   rte_reciprocal_value_u64;
> >
> >  } DPDK_17.08;
> > diff --git a/lib/librte_eal/common/include/rte_reciprocal.h
> > b/lib/librte_eal/common/include/rte_reciprocal.h
> > index b6d752f..801d1c8 100644
> > --- a/lib/librte_eal/common/include/rte_reciprocal.h
> > +++ b/lib/librte_eal/common/include/rte_reciprocal.h
> > @@ -22,22 +22,117 @@
> >  #ifndef _RTE_RECIPROCAL_H_
> >  #define _RTE_RECIPROCAL_H_
> >
> > -#include <stdint.h>
> > +#include <rte_memory.h>
> >
> > -struct rte_reciprocal {
> > +/**
> > + * Unsigned 32-bit divisor structure.
> > + */
> > +struct rte_reciprocal_u32 {
> >     uint32_t m;
> >     uint8_t sh1, sh2;
> > -};
> > +} __rte_cache_aligned;
> > +
> > +/**
> > + * Unsigned 64-bit divisor structure.
> > + */
> > +struct rte_reciprocal_u64 {
> > +   uint64_t m;
> > +   uint8_t sh1;
> > +} __rte_cache_aligned;
> >
> > +/**
> > + * Divide given unsigned 32-bit integer with pre calculated divisor.
> > + *
> > + * @param a
> > + *   The 32-bit dividend.
> > + * @param R
> > + *   The pointer to pre calculated divisor reciprocal structure.
> > + *
> > + * @return
> > + *   The result of the division
> > + */
> >  static inline uint32_t
> > -rte_reciprocal_divide(uint32_t a, struct rte_reciprocal R)
> > +rte_reciprocal_divide_u32(uint32_t a, struct rte_reciprocal_u32 *R) {
> > +   uint32_t t = (((uint64_t)a * R->m) >> 32);
> > +
> > +   return (t + ((a - t) >> R->sh1)) >> R->sh2; }
> > +
> > +static inline uint64_t
> > +mullhi_u64(uint64_t x, uint64_t y)
> > +{
> > +#ifdef __SIZEOF_INT128__
> > +   __uint128_t xl = x;
> > +   __uint128_t rl = xl * y;
> > +
> > +   return (rl >> 64);
> > +#else
> > +   uint64_t u0, u1, v0, v1, k, t;
> > +   uint64_t w1, w2;
> > +   uint64_t whi;
> > +
> > +   u1 = x >> 32; u0 = x & 0xFFFFFFFF;
> > +   v1 = y >> 32; v0 = y & 0xFFFFFFFF;
> > +
> > +   t = u0*v0;
> > +   k = t >> 32;
> > +
> > +   t = u1*v0 + k;
> > +   w1 = t & 0xFFFFFFFF;
> > +   w2 = t >> 32;
> > +
> > +   t = u0*v1 + w1;
> > +   k = t >> 32;
> > +
> > +   whi = u1*v1 + w2 + k;
> > +
> > +   return whi;
> > +#endif
> > +}
> > +
> > +/**
> > + * Divide given unsigned 64-bit integer with pre calculated divisor.
> > + *
> > + * @param a
> > + *   The 64-bit dividend.
> > + * @param R
> > + *   The pointer to pre calculated divisor reciprocal structure.
> > + *
> > + * @return
> > + *   The result of the division
> > + */
> > +static inline uint64_t
> > +rte_reciprocal_divide_u64(uint64_t a, struct rte_reciprocal_u64 *R)
> >  {
> > -   uint32_t t = (uint32_t)(((uint64_t)a * R.m) >> 32);
> > +   uint64_t q = mullhi_u64(R->m, a);
> > +   uint64_t t = ((a - q) >> 1) + q;
> >
> > -   return (t + ((a - t) >> R.sh1)) >> R.sh2;
> > +   return t >> R->sh1;
> >  }
> >
> > -struct rte_reciprocal
> > -rte_reciprocal_value(uint32_t d);
> > +/**
> > + * Generate pre calculated divisor structure.
> > + *
> > + * @param d
> > + *   The unsigned 32-bit divisor.
> > + *
> > + * @return
> > + *   Divisor structure.
> > + */
> > +struct rte_reciprocal_u32
> > +rte_reciprocal_value_u32(uint32_t d);
> > +
> > +/**
> > + * Generate pre calculated divisor structure.
> > + *
> > + * @param d
> > + *   The unsigned 64-bit divisor.
> > + *
> > + * @return
> > + *   Divisor structure.
> > + */
> > +struct rte_reciprocal_u64
> > +rte_reciprocal_value_u64(uint64_t d);
> >
> >  #endif /* _RTE_RECIPROCAL_H_ */
> > diff --git a/lib/librte_eal/common/rte_reciprocal.c
> > b/lib/librte_eal/common/rte_reciprocal.c
> > index 7ab99b4..5d7e367 100644
> > --- a/lib/librte_eal/common/rte_reciprocal.c
> > +++ b/lib/librte_eal/common/rte_reciprocal.c
> > @@ -31,18 +31,13 @@
> >   *   OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH
> > DAMAGE.
> >   */
> >
> > -#include <stdio.h>
> > -#include <stdint.h>
> > -
> > -#include <rte_common.h>
> > -
> > -#include "rte_reciprocal.h"
> > +#include <rte_reciprocal.h>
> >
> >  /* find largest set bit.
> >   * portable and slow but does not matter for this usage.
> >   */
> >  static inline int
> > -fls(uint32_t x)
> > +fls_u32(uint32_t x)
> >  {
> >     int b;
> >
> > @@ -54,21 +49,120 @@ fls(uint32_t x)
> >     return 0;
> >  }
> >
> > -struct rte_reciprocal
> > -rte_reciprocal_value(uint32_t d)
> > +struct rte_reciprocal_u32
> > +rte_reciprocal_value_u32(uint32_t d)
> >  {
> > -   struct rte_reciprocal R;
> > +   struct rte_reciprocal_u32 R;
> >     uint64_t m;
> >     int l;
> >
> > -   l = fls(d - 1);
> > +   l = fls_u32(d - 1);
> >     m = ((1ULL << 32) * ((1ULL << l) - d));
> >     m /= d;
> >
> >     ++m;
> >     R.m = m;
> > -   R.sh1 = RTE_MIN(l, 1);
> > -   R.sh2 = RTE_MAX(l - 1, 0);
> > +   R.sh1 = l > 1 ? 1 : l;
> > +   R.sh2 = (l - 1 > 0) ? l - 1 : 0;
>
> Is there a specific reason for changing from RTE_MIN to what you have? R.sh2 
> seems identical to the previous version (so reason for change is unclear), 
> and R.sh1 changes behavior (R.sh1 can now potentially be zero, even if in 
> practice it won't) without any explanation given.
>

This was a oversight from my end (Testing with standalone app). Thanks for the
catch, will undo in v4.

-Pavan

> Thanks,
> Anatoly
>
> > +
> > +   return R;
> > +}
> > +
> > +/* Code taken from Hacker's Delight:
> > + * http://www.hackersdelight.org/HDcode/divlu.c.
> > + * License permits inclusion here per:
> > + * http://www.hackersdelight.org/permissions.htm
> > + */
> > +static inline uint64_t
> > +divide_128_div_64_to_64(uint64_t u1, uint64_t u0, uint64_t v, uint64_t
> > +*r) {
> > +   const uint64_t b = (1ULL << 32); /* Number base (16 bits). */
> > +   uint64_t un1, un0,           /* Norm. dividend LSD's. */
> > +                    vn1, vn0,           /* Norm. divisor digits. */
> > +                    q1, q0,             /* Quotient digits. */
> > +                    un64, un21, un10,   /* Dividend digit pairs. */
> > +                    rhat;               /* A remainder. */
> > +   int s;                       /* Shift amount for norm. */
> > +
> > +    /* If overflow, set rem. to an impossible value. */
> > +   if (u1 >= v) {
> > +           if (r != NULL)
> > +                   *r = (uint64_t) -1;
> > +           return (uint64_t) -1;
> > +   }
> > +
> > +   /* Count leading zeros. */
> > +   s = __builtin_clzll(v);
> > +   if (s > 0) {
> > +           v = v << s;
> > +           un64 = (u1 << s) | ((u0 >> (64 - s)) & (-s >> 31));
> > +           un10 = u0 << s;
> > +   } else {
> > +
> > +           un64 = u1 | u0;
> > +           un10 = u0;
> > +   }
> > +
> > +   vn1 = v >> 32;
> > +   vn0 = v & 0xFFFFFFFF;
> > +
> > +   un1 = un10 >> 32;
> > +   un0 = un10 & 0xFFFFFFFF;
> > +
> > +   q1 = un64/vn1;
> > +   rhat = un64 - q1*vn1;
> > +again1:
> > +   if (q1 >= b || q1*vn0 > b*rhat + un1) {
> > +           q1 = q1 - 1;
> > +           rhat = rhat + vn1;
> > +           if (rhat < b)
> > +                   goto again1;
> > +   }
> > +
> > +   un21 = un64*b + un1 - q1*v;
> > +
> > +   q0 = un21/vn1;
> > +   rhat = un21 - q0*vn1;
> > +again2:
> > +   if (q0 >= b || q0*vn0 > b*rhat + un0) {
> > +           q0 = q0 - 1;
> > +           rhat = rhat + vn1;
> > +           if (rhat < b)
> > +                   goto again2;
> > +   }
> > +
> > +   if (r != NULL)
> > +           *r = (un21*b + un0 - q0*v) >> s;
> > +   return q1*b + q0;
> > +}
> > +
> > +struct rte_reciprocal_u64
> > +rte_reciprocal_value_u64(uint64_t d)
> > +{
> > +   struct rte_reciprocal_u64 R;
> > +
> > +   const uint32_t fld = 63 - __builtin_clzll(d);
> > +
> > +   if ((d & (d - 1)) == 0) {
> > +           R.m = 0;
> > +           R.sh1 = (fld - 1) | 0x40;
> > +   } else {
> > +           uint64_t rem;
> > +           uint64_t multiplier;
> > +           uint8_t more;
> > +
> > +           multiplier = divide_128_div_64_to_64(1ULL << fld, 0, d,
> > &rem);
> > +           multiplier += multiplier;
> > +
> > +           const uint64_t twice_rem = rem + rem;
> > +           if (twice_rem >= d || twice_rem < rem)
> > +                   multiplier += 1;
> > +           more = fld;
> > +           R.m = 1 + multiplier;
> > +           R.sh1 = more | 0x40;
> > +   }
> > +
> > +   R.sh1 &= 0x3F;
> >
> >     return R;
> >  }
> > diff --git a/lib/librte_eal/linuxapp/eal/rte_eal_version.map
> > b/lib/librte_eal/linuxapp/eal/rte_eal_version.map
> > index 65117cb..63ff2b8 100644
> > --- a/lib/librte_eal/linuxapp/eal/rte_eal_version.map
> > +++ b/lib/librte_eal/linuxapp/eal/rte_eal_version.map
> > @@ -247,6 +247,7 @@ EXPERIMENTAL {
> >  DPDK_17.11 {
> >     global:
> >
> > -   rte_reciprocal_value;
> > +   rte_reciprocal_value_u32;
> > +   rte_reciprocal_value_u64;
> >
> >  } DPDK_17.08;
> > diff --git a/lib/librte_sched/Makefile b/lib/librte_sched/Makefile index
> > 569656b..a2fd6f3 100644
> > --- a/lib/librte_sched/Makefile
> > +++ b/lib/librte_sched/Makefile
> > @@ -54,6 +54,8 @@ LIBABIVER := 1
> >  SRCS-$(CONFIG_RTE_LIBRTE_SCHED) += rte_sched.c rte_red.c rte_approx.c
> >
> >  # install includes
> > -SYMLINK-$(CONFIG_RTE_LIBRTE_SCHED)-include := rte_sched.h
> > rte_bitmap.h rte_sched_common.h rte_red.h rte_approx.h
> > +SYMLINK-$(CONFIG_RTE_LIBRTE_SCHED)-include := rte_sched.h
> > rte_bitmap.h
> > +SYMLINK-$(CONFIG_RTE_LIBRTE_SCHED)-include += rte_sched_common.h
> > +rte_red.h SYMLINK-$(CONFIG_RTE_LIBRTE_SCHED)-include +=
> > rte_approx.h
> >
> >  include $(RTE_SDK)/mk/rte.lib.mk
> > diff --git a/lib/librte_sched/rte_sched.c b/lib/librte_sched/rte_sched.c 
> > index
> > 3b8ccaa..7bb6d51 100644
> > --- a/lib/librte_sched/rte_sched.c
> > +++ b/lib/librte_sched/rte_sched.c
> > @@ -228,7 +228,7 @@ struct rte_sched_port {
> >     uint64_t time_cpu_cycles;     /* Current CPU time measured in CPU
> > cyles */
> >     uint64_t time_cpu_bytes;      /* Current CPU time measured in bytes
> > */
> >     uint64_t time;                /* Current NIC TX time measured in bytes 
> > */
> > -   struct rte_reciprocal inv_cycles_per_byte; /* CPU cycles per byte */
> > +   struct rte_reciprocal_u32 inv_cycles_per_byte; /* CPU cycles per
> > byte
> > +*/
> >
> >     /* Scheduling loop detection */
> >     uint32_t pipe_loop;
> > @@ -677,7 +677,7 @@ rte_sched_port_config(struct
> > rte_sched_port_params *params)
> >
> >     cycles_per_byte = (rte_get_tsc_hz() << RTE_SCHED_TIME_SHIFT)
> >             / params->rate;
> > -   port->inv_cycles_per_byte = rte_reciprocal_value(cycles_per_byte);
> > +   port->inv_cycles_per_byte =
> > rte_reciprocal_value_u32(cycles_per_byte);
> >
> >     /* Scheduling loop detection */
> >     port->pipe_loop = RTE_SCHED_PIPE_INVALID; @@ -2147,8 +2147,9
> > @@ rte_sched_port_time_resync(struct rte_sched_port *port)
> >     uint64_t bytes_diff;
> >
> >     /* Compute elapsed time in bytes */
> > -   bytes_diff = rte_reciprocal_divide(cycles_diff <<
> > RTE_SCHED_TIME_SHIFT,
> > -                                      port->inv_cycles_per_byte);
> > +   bytes_diff = rte_reciprocal_divide_u32(
> > +                   cycles_diff << RTE_SCHED_TIME_SHIFT,
> > +                   &port->inv_cycles_per_byte);
> >
> >     /* Advance port time */
> >     port->time_cpu_cycles = cycles;
> > --
> > 2.7.4
>

Reply via email to