On Mon, Sep 04, 2017 at 02:34:49PM +0000, Burakov, Anatoly wrote: > > From: dev [mailto:dev-boun...@dpdk.org] On Behalf Of Pavan Nikhilesh > > Sent: Sunday, September 3, 2017 1:36 PM > > To: dev@dpdk.org > > Cc: Dumitrescu, Cristian <cristian.dumitre...@intel.com>; > > step...@networkplumber.org; Pavan Nikhilesh > > <pbhagavat...@caviumnetworks.com> > > Subject: [dpdk-dev] [PATCH v3 2/3] eal: add u64 bit variant for reciprocal > > > > Currently, rte_reciprocal only supports unsigned 32bit divisors. This commit > > adds support for unsigned 64bit divisors. > > > > Rename unsigned 32bit specific functions appropriately and update > > librte_sched accordingly. > > > > Signed-off-by: Pavan Nikhilesh <pbhagavat...@caviumnetworks.com> > > --- > > lib/librte_eal/bsdapp/eal/rte_eal_version.map | 3 +- > > lib/librte_eal/common/include/rte_reciprocal.h | 111 > > ++++++++++++++++++++-- > > lib/librte_eal/common/rte_reciprocal.c | 120 > > +++++++++++++++++++++--- > > lib/librte_eal/linuxapp/eal/rte_eal_version.map | 3 +- > > lib/librte_sched/Makefile | 4 +- > > lib/librte_sched/rte_sched.c | 9 +- > > 6 files changed, 222 insertions(+), 28 deletions(-) > > > > diff --git a/lib/librte_eal/bsdapp/eal/rte_eal_version.map > > b/lib/librte_eal/bsdapp/eal/rte_eal_version.map > > index d0bda66..5fd6101 100644 > > --- a/lib/librte_eal/bsdapp/eal/rte_eal_version.map > > +++ b/lib/librte_eal/bsdapp/eal/rte_eal_version.map > > @@ -242,6 +242,7 @@ EXPERIMENTAL { > > DPDK_17.11 { > > global: > > > > - rte_reciprocal_value; > > + rte_reciprocal_value_u32; > > + rte_reciprocal_value_u64; > > > > } DPDK_17.08; > > diff --git a/lib/librte_eal/common/include/rte_reciprocal.h > > b/lib/librte_eal/common/include/rte_reciprocal.h > > index b6d752f..801d1c8 100644 > > --- a/lib/librte_eal/common/include/rte_reciprocal.h > > +++ b/lib/librte_eal/common/include/rte_reciprocal.h > > @@ -22,22 +22,117 @@ > > #ifndef _RTE_RECIPROCAL_H_ > > #define _RTE_RECIPROCAL_H_ > > > > -#include <stdint.h> > > +#include <rte_memory.h> > > > > -struct rte_reciprocal { > > +/** > > + * Unsigned 32-bit divisor structure. > > + */ > > +struct rte_reciprocal_u32 { > > uint32_t m; > > uint8_t sh1, sh2; > > -}; > > +} __rte_cache_aligned; > > + > > +/** > > + * Unsigned 64-bit divisor structure. > > + */ > > +struct rte_reciprocal_u64 { > > + uint64_t m; > > + uint8_t sh1; > > +} __rte_cache_aligned; > > > > +/** > > + * Divide given unsigned 32-bit integer with pre calculated divisor. > > + * > > + * @param a > > + * The 32-bit dividend. > > + * @param R > > + * The pointer to pre calculated divisor reciprocal structure. > > + * > > + * @return > > + * The result of the division > > + */ > > static inline uint32_t > > -rte_reciprocal_divide(uint32_t a, struct rte_reciprocal R) > > +rte_reciprocal_divide_u32(uint32_t a, struct rte_reciprocal_u32 *R) { > > + uint32_t t = (((uint64_t)a * R->m) >> 32); > > + > > + return (t + ((a - t) >> R->sh1)) >> R->sh2; } > > + > > +static inline uint64_t > > +mullhi_u64(uint64_t x, uint64_t y) > > +{ > > +#ifdef __SIZEOF_INT128__ > > + __uint128_t xl = x; > > + __uint128_t rl = xl * y; > > + > > + return (rl >> 64); > > +#else > > + uint64_t u0, u1, v0, v1, k, t; > > + uint64_t w1, w2; > > + uint64_t whi; > > + > > + u1 = x >> 32; u0 = x & 0xFFFFFFFF; > > + v1 = y >> 32; v0 = y & 0xFFFFFFFF; > > + > > + t = u0*v0; > > + k = t >> 32; > > + > > + t = u1*v0 + k; > > + w1 = t & 0xFFFFFFFF; > > + w2 = t >> 32; > > + > > + t = u0*v1 + w1; > > + k = t >> 32; > > + > > + whi = u1*v1 + w2 + k; > > + > > + return whi; > > +#endif > > +} > > + > > +/** > > + * Divide given unsigned 64-bit integer with pre calculated divisor. > > + * > > + * @param a > > + * The 64-bit dividend. > > + * @param R > > + * The pointer to pre calculated divisor reciprocal structure. > > + * > > + * @return > > + * The result of the division > > + */ > > +static inline uint64_t > > +rte_reciprocal_divide_u64(uint64_t a, struct rte_reciprocal_u64 *R) > > { > > - uint32_t t = (uint32_t)(((uint64_t)a * R.m) >> 32); > > + uint64_t q = mullhi_u64(R->m, a); > > + uint64_t t = ((a - q) >> 1) + q; > > > > - return (t + ((a - t) >> R.sh1)) >> R.sh2; > > + return t >> R->sh1; > > } > > > > -struct rte_reciprocal > > -rte_reciprocal_value(uint32_t d); > > +/** > > + * Generate pre calculated divisor structure. > > + * > > + * @param d > > + * The unsigned 32-bit divisor. > > + * > > + * @return > > + * Divisor structure. > > + */ > > +struct rte_reciprocal_u32 > > +rte_reciprocal_value_u32(uint32_t d); > > + > > +/** > > + * Generate pre calculated divisor structure. > > + * > > + * @param d > > + * The unsigned 64-bit divisor. > > + * > > + * @return > > + * Divisor structure. > > + */ > > +struct rte_reciprocal_u64 > > +rte_reciprocal_value_u64(uint64_t d); > > > > #endif /* _RTE_RECIPROCAL_H_ */ > > diff --git a/lib/librte_eal/common/rte_reciprocal.c > > b/lib/librte_eal/common/rte_reciprocal.c > > index 7ab99b4..5d7e367 100644 > > --- a/lib/librte_eal/common/rte_reciprocal.c > > +++ b/lib/librte_eal/common/rte_reciprocal.c > > @@ -31,18 +31,13 @@ > > * OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH > > DAMAGE. > > */ > > > > -#include <stdio.h> > > -#include <stdint.h> > > - > > -#include <rte_common.h> > > - > > -#include "rte_reciprocal.h" > > +#include <rte_reciprocal.h> > > > > /* find largest set bit. > > * portable and slow but does not matter for this usage. > > */ > > static inline int > > -fls(uint32_t x) > > +fls_u32(uint32_t x) > > { > > int b; > > > > @@ -54,21 +49,120 @@ fls(uint32_t x) > > return 0; > > } > > > > -struct rte_reciprocal > > -rte_reciprocal_value(uint32_t d) > > +struct rte_reciprocal_u32 > > +rte_reciprocal_value_u32(uint32_t d) > > { > > - struct rte_reciprocal R; > > + struct rte_reciprocal_u32 R; > > uint64_t m; > > int l; > > > > - l = fls(d - 1); > > + l = fls_u32(d - 1); > > m = ((1ULL << 32) * ((1ULL << l) - d)); > > m /= d; > > > > ++m; > > R.m = m; > > - R.sh1 = RTE_MIN(l, 1); > > - R.sh2 = RTE_MAX(l - 1, 0); > > + R.sh1 = l > 1 ? 1 : l; > > + R.sh2 = (l - 1 > 0) ? l - 1 : 0; > > Is there a specific reason for changing from RTE_MIN to what you have? R.sh2 > seems identical to the previous version (so reason for change is unclear), > and R.sh1 changes behavior (R.sh1 can now potentially be zero, even if in > practice it won't) without any explanation given. >
This was a oversight from my end (Testing with standalone app). Thanks for the catch, will undo in v4. -Pavan > Thanks, > Anatoly > > > + > > + return R; > > +} > > + > > +/* Code taken from Hacker's Delight: > > + * http://www.hackersdelight.org/HDcode/divlu.c. > > + * License permits inclusion here per: > > + * http://www.hackersdelight.org/permissions.htm > > + */ > > +static inline uint64_t > > +divide_128_div_64_to_64(uint64_t u1, uint64_t u0, uint64_t v, uint64_t > > +*r) { > > + const uint64_t b = (1ULL << 32); /* Number base (16 bits). */ > > + uint64_t un1, un0, /* Norm. dividend LSD's. */ > > + vn1, vn0, /* Norm. divisor digits. */ > > + q1, q0, /* Quotient digits. */ > > + un64, un21, un10, /* Dividend digit pairs. */ > > + rhat; /* A remainder. */ > > + int s; /* Shift amount for norm. */ > > + > > + /* If overflow, set rem. to an impossible value. */ > > + if (u1 >= v) { > > + if (r != NULL) > > + *r = (uint64_t) -1; > > + return (uint64_t) -1; > > + } > > + > > + /* Count leading zeros. */ > > + s = __builtin_clzll(v); > > + if (s > 0) { > > + v = v << s; > > + un64 = (u1 << s) | ((u0 >> (64 - s)) & (-s >> 31)); > > + un10 = u0 << s; > > + } else { > > + > > + un64 = u1 | u0; > > + un10 = u0; > > + } > > + > > + vn1 = v >> 32; > > + vn0 = v & 0xFFFFFFFF; > > + > > + un1 = un10 >> 32; > > + un0 = un10 & 0xFFFFFFFF; > > + > > + q1 = un64/vn1; > > + rhat = un64 - q1*vn1; > > +again1: > > + if (q1 >= b || q1*vn0 > b*rhat + un1) { > > + q1 = q1 - 1; > > + rhat = rhat + vn1; > > + if (rhat < b) > > + goto again1; > > + } > > + > > + un21 = un64*b + un1 - q1*v; > > + > > + q0 = un21/vn1; > > + rhat = un21 - q0*vn1; > > +again2: > > + if (q0 >= b || q0*vn0 > b*rhat + un0) { > > + q0 = q0 - 1; > > + rhat = rhat + vn1; > > + if (rhat < b) > > + goto again2; > > + } > > + > > + if (r != NULL) > > + *r = (un21*b + un0 - q0*v) >> s; > > + return q1*b + q0; > > +} > > + > > +struct rte_reciprocal_u64 > > +rte_reciprocal_value_u64(uint64_t d) > > +{ > > + struct rte_reciprocal_u64 R; > > + > > + const uint32_t fld = 63 - __builtin_clzll(d); > > + > > + if ((d & (d - 1)) == 0) { > > + R.m = 0; > > + R.sh1 = (fld - 1) | 0x40; > > + } else { > > + uint64_t rem; > > + uint64_t multiplier; > > + uint8_t more; > > + > > + multiplier = divide_128_div_64_to_64(1ULL << fld, 0, d, > > &rem); > > + multiplier += multiplier; > > + > > + const uint64_t twice_rem = rem + rem; > > + if (twice_rem >= d || twice_rem < rem) > > + multiplier += 1; > > + more = fld; > > + R.m = 1 + multiplier; > > + R.sh1 = more | 0x40; > > + } > > + > > + R.sh1 &= 0x3F; > > > > return R; > > } > > diff --git a/lib/librte_eal/linuxapp/eal/rte_eal_version.map > > b/lib/librte_eal/linuxapp/eal/rte_eal_version.map > > index 65117cb..63ff2b8 100644 > > --- a/lib/librte_eal/linuxapp/eal/rte_eal_version.map > > +++ b/lib/librte_eal/linuxapp/eal/rte_eal_version.map > > @@ -247,6 +247,7 @@ EXPERIMENTAL { > > DPDK_17.11 { > > global: > > > > - rte_reciprocal_value; > > + rte_reciprocal_value_u32; > > + rte_reciprocal_value_u64; > > > > } DPDK_17.08; > > diff --git a/lib/librte_sched/Makefile b/lib/librte_sched/Makefile index > > 569656b..a2fd6f3 100644 > > --- a/lib/librte_sched/Makefile > > +++ b/lib/librte_sched/Makefile > > @@ -54,6 +54,8 @@ LIBABIVER := 1 > > SRCS-$(CONFIG_RTE_LIBRTE_SCHED) += rte_sched.c rte_red.c rte_approx.c > > > > # install includes > > -SYMLINK-$(CONFIG_RTE_LIBRTE_SCHED)-include := rte_sched.h > > rte_bitmap.h rte_sched_common.h rte_red.h rte_approx.h > > +SYMLINK-$(CONFIG_RTE_LIBRTE_SCHED)-include := rte_sched.h > > rte_bitmap.h > > +SYMLINK-$(CONFIG_RTE_LIBRTE_SCHED)-include += rte_sched_common.h > > +rte_red.h SYMLINK-$(CONFIG_RTE_LIBRTE_SCHED)-include += > > rte_approx.h > > > > include $(RTE_SDK)/mk/rte.lib.mk > > diff --git a/lib/librte_sched/rte_sched.c b/lib/librte_sched/rte_sched.c > > index > > 3b8ccaa..7bb6d51 100644 > > --- a/lib/librte_sched/rte_sched.c > > +++ b/lib/librte_sched/rte_sched.c > > @@ -228,7 +228,7 @@ struct rte_sched_port { > > uint64_t time_cpu_cycles; /* Current CPU time measured in CPU > > cyles */ > > uint64_t time_cpu_bytes; /* Current CPU time measured in bytes > > */ > > uint64_t time; /* Current NIC TX time measured in bytes > > */ > > - struct rte_reciprocal inv_cycles_per_byte; /* CPU cycles per byte */ > > + struct rte_reciprocal_u32 inv_cycles_per_byte; /* CPU cycles per > > byte > > +*/ > > > > /* Scheduling loop detection */ > > uint32_t pipe_loop; > > @@ -677,7 +677,7 @@ rte_sched_port_config(struct > > rte_sched_port_params *params) > > > > cycles_per_byte = (rte_get_tsc_hz() << RTE_SCHED_TIME_SHIFT) > > / params->rate; > > - port->inv_cycles_per_byte = rte_reciprocal_value(cycles_per_byte); > > + port->inv_cycles_per_byte = > > rte_reciprocal_value_u32(cycles_per_byte); > > > > /* Scheduling loop detection */ > > port->pipe_loop = RTE_SCHED_PIPE_INVALID; @@ -2147,8 +2147,9 > > @@ rte_sched_port_time_resync(struct rte_sched_port *port) > > uint64_t bytes_diff; > > > > /* Compute elapsed time in bytes */ > > - bytes_diff = rte_reciprocal_divide(cycles_diff << > > RTE_SCHED_TIME_SHIFT, > > - port->inv_cycles_per_byte); > > + bytes_diff = rte_reciprocal_divide_u32( > > + cycles_diff << RTE_SCHED_TIME_SHIFT, > > + &port->inv_cycles_per_byte); > > > > /* Advance port time */ > > port->time_cpu_cycles = cycles; > > -- > > 2.7.4 >