From: Pavan Bhagavatula <pbhagavat...@caviumnetworks.com> Currently, rte_reciprocal only supports unsigned 32bit divisors. This commit adds support for unsigned 64bit divisors.
Rename unsigned 32bit specific functions appropriately and update librte_sched accordingly. Signed-off-by: Pavan Nikhilesh <pbhagavat...@caviumnetworks.com> --- lib/librte_eal/bsdapp/eal/rte_eal_version.map | 3 +- lib/librte_eal/common/include/rte_reciprocal.h | 109 ++++++++++++++++++++-- lib/librte_eal/common/rte_reciprocal.c | 116 +++++++++++++++++++++--- lib/librte_eal/linuxapp/eal/rte_eal_version.map | 3 +- lib/librte_sched/Makefile | 4 +- lib/librte_sched/rte_sched.c | 9 +- 6 files changed, 219 insertions(+), 25 deletions(-) diff --git a/lib/librte_eal/bsdapp/eal/rte_eal_version.map b/lib/librte_eal/bsdapp/eal/rte_eal_version.map index 90d7258..59a85bb 100644 --- a/lib/librte_eal/bsdapp/eal/rte_eal_version.map +++ b/lib/librte_eal/bsdapp/eal/rte_eal_version.map @@ -241,6 +241,7 @@ EXPERIMENTAL { DPDK_17.11 { global: - rte_reciprocal_value; + rte_reciprocal_value_u32; + rte_reciprocal_value_u64; } DPDK_17.08; diff --git a/lib/librte_eal/common/include/rte_reciprocal.h b/lib/librte_eal/common/include/rte_reciprocal.h index b6d752f..85599e6 100644 --- a/lib/librte_eal/common/include/rte_reciprocal.h +++ b/lib/librte_eal/common/include/rte_reciprocal.h @@ -22,22 +22,117 @@ #ifndef _RTE_RECIPROCAL_H_ #define _RTE_RECIPROCAL_H_ -#include <stdint.h> +#include <rte_memory.h> -struct rte_reciprocal { +/** + * Unsigned 32-bit divisor structure. + */ +struct rte_reciprocal_u32 { uint32_t m; uint8_t sh1, sh2; }; +/** + * Unsigned 64-bit divisor structure. + */ +struct rte_reciprocal_u64 { + uint64_t m; + uint8_t sh1; +}; + +/** + * Divide given unsigned 32-bit integer with pre calculated divisor. + * + * @param a + * The 32-bit dividend. + * @param R + * The pointer to pre calculated divisor reciprocal structure. + * + * @return + * The result of the division + */ static inline uint32_t -rte_reciprocal_divide(uint32_t a, struct rte_reciprocal R) +rte_reciprocal_divide_u32(uint32_t a, struct rte_reciprocal_u32 *R) { - uint32_t t = (uint32_t)(((uint64_t)a * R.m) >> 32); + uint32_t t = (((uint64_t)a * R->m) >> 32); - return (t + ((a - t) >> R.sh1)) >> R.sh2; + return (t + ((a - t) >> R->sh1)) >> R->sh2; } -struct rte_reciprocal -rte_reciprocal_value(uint32_t d); +static inline uint64_t +mullhi_u64(uint64_t x, uint64_t y) +{ +#ifdef __SIZEOF_INT128__ + __uint128_t xl = x; + __uint128_t rl = xl * y; + + return (rl >> 64); +#else + uint64_t u0, u1, v0, v1, k, t; + uint64_t w1, w2; + uint64_t whi; + + u1 = x >> 32; u0 = x & 0xFFFFFFFF; + v1 = y >> 32; v0 = y & 0xFFFFFFFF; + + t = u0*v0; + k = t >> 32; + + t = u1*v0 + k; + w1 = t & 0xFFFFFFFF; + w2 = t >> 32; + + t = u0*v1 + w1; + k = t >> 32; + + whi = u1*v1 + w2 + k; + + return whi; +#endif +} + +/** + * Divide given unsigned 64-bit integer with pre calculated divisor. + * + * @param a + * The 64-bit dividend. + * @param R + * The pointer to pre calculated divisor reciprocal structure. + * + * @return + * The result of the division + */ +static inline uint64_t +rte_reciprocal_divide_u64(uint64_t a, struct rte_reciprocal_u64 *R) +{ + uint64_t q = mullhi_u64(R->m, a); + uint64_t t = ((a - q) >> 1) + q; + + return t >> R->sh1; +} + +/** + * Generate pre calculated divisor structure. + * + * @param d + * The unsigned 32-bit divisor. + * + * @return + * Divisor structure. + */ +struct rte_reciprocal_u32 +rte_reciprocal_value_u32(uint32_t d); + +/** + * Generate pre calculated divisor structure. + * + * @param d + * The unsigned 64-bit divisor. + * + * @return + * Divisor structure. + */ +struct rte_reciprocal_u64 +rte_reciprocal_value_u64(uint64_t d); #endif /* _RTE_RECIPROCAL_H_ */ diff --git a/lib/librte_eal/common/rte_reciprocal.c b/lib/librte_eal/common/rte_reciprocal.c index 7ab99b4..2024e62 100644 --- a/lib/librte_eal/common/rte_reciprocal.c +++ b/lib/librte_eal/common/rte_reciprocal.c @@ -31,18 +31,13 @@ * OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. */ -#include <stdio.h> -#include <stdint.h> - -#include <rte_common.h> - -#include "rte_reciprocal.h" +#include <rte_reciprocal.h> /* find largest set bit. * portable and slow but does not matter for this usage. */ static inline int -fls(uint32_t x) +fls_u32(uint32_t x) { int b; @@ -54,14 +49,14 @@ fls(uint32_t x) return 0; } -struct rte_reciprocal -rte_reciprocal_value(uint32_t d) +struct rte_reciprocal_u32 +rte_reciprocal_value_u32(uint32_t d) { - struct rte_reciprocal R; + struct rte_reciprocal_u32 R; uint64_t m; int l; - l = fls(d - 1); + l = fls_u32(d - 1); m = ((1ULL << 32) * ((1ULL << l) - d)); m /= d; @@ -72,3 +67,102 @@ rte_reciprocal_value(uint32_t d) return R; } + +/* Code taken from Hacker's Delight: + * http://www.hackersdelight.org/HDcode/divlu.c. + * License permits inclusion here per: + * http://www.hackersdelight.org/permissions.htm + */ +static inline uint64_t +divide_128_div_64_to_64(uint64_t u1, uint64_t u0, uint64_t v, uint64_t *r) +{ + const uint64_t b = (1ULL << 32); /* Number base (16 bits). */ + uint64_t un1, un0, /* Norm. dividend LSD's. */ + vn1, vn0, /* Norm. divisor digits. */ + q1, q0, /* Quotient digits. */ + un64, un21, un10, /* Dividend digit pairs. */ + rhat; /* A remainder. */ + int s; /* Shift amount for norm. */ + + /* If overflow, set rem. to an impossible value. */ + if (u1 >= v) { + if (r != NULL) + *r = (uint64_t) -1; + return (uint64_t) -1; + } + + /* Count leading zeros. */ + s = __builtin_clzll(v); + if (s > 0) { + v = v << s; + un64 = (u1 << s) | ((u0 >> (64 - s)) & (-s >> 31)); + un10 = u0 << s; + } else { + + un64 = u1 | u0; + un10 = u0; + } + + vn1 = v >> 32; + vn0 = v & 0xFFFFFFFF; + + un1 = un10 >> 32; + un0 = un10 & 0xFFFFFFFF; + + q1 = un64/vn1; + rhat = un64 - q1*vn1; +again1: + if (q1 >= b || q1*vn0 > b*rhat + un1) { + q1 = q1 - 1; + rhat = rhat + vn1; + if (rhat < b) + goto again1; + } + + un21 = un64*b + un1 - q1*v; + + q0 = un21/vn1; + rhat = un21 - q0*vn1; +again2: + if (q0 >= b || q0*vn0 > b*rhat + un0) { + q0 = q0 - 1; + rhat = rhat + vn1; + if (rhat < b) + goto again2; + } + + if (r != NULL) + *r = (un21*b + un0 - q0*v) >> s; + return q1*b + q0; +} + +struct rte_reciprocal_u64 +rte_reciprocal_value_u64(uint64_t d) +{ + struct rte_reciprocal_u64 R; + + const uint32_t fld = 63 - __builtin_clzll(d); + + if ((d & (d - 1)) == 0) { + R.m = 0; + R.sh1 = (fld - 1) | 0x40; + } else { + uint64_t rem; + uint64_t multiplier; + uint8_t more; + + multiplier = divide_128_div_64_to_64(1ULL << fld, 0, d, &rem); + multiplier += multiplier; + + const uint64_t twice_rem = rem + rem; + if (twice_rem >= d || twice_rem < rem) + multiplier += 1; + more = fld; + R.m = 1 + multiplier; + R.sh1 = more | 0x40; + } + + R.sh1 &= 0x3F; + + return R; +} diff --git a/lib/librte_eal/linuxapp/eal/rte_eal_version.map b/lib/librte_eal/linuxapp/eal/rte_eal_version.map index 2070cba..2671627 100644 --- a/lib/librte_eal/linuxapp/eal/rte_eal_version.map +++ b/lib/librte_eal/linuxapp/eal/rte_eal_version.map @@ -246,6 +246,7 @@ EXPERIMENTAL { DPDK_17.11 { global: - rte_reciprocal_value; + rte_reciprocal_value_u32; + rte_reciprocal_value_u64; } DPDK_17.08; diff --git a/lib/librte_sched/Makefile b/lib/librte_sched/Makefile index 569656b..a2fd6f3 100644 --- a/lib/librte_sched/Makefile +++ b/lib/librte_sched/Makefile @@ -54,6 +54,8 @@ LIBABIVER := 1 SRCS-$(CONFIG_RTE_LIBRTE_SCHED) += rte_sched.c rte_red.c rte_approx.c # install includes -SYMLINK-$(CONFIG_RTE_LIBRTE_SCHED)-include := rte_sched.h rte_bitmap.h rte_sched_common.h rte_red.h rte_approx.h +SYMLINK-$(CONFIG_RTE_LIBRTE_SCHED)-include := rte_sched.h rte_bitmap.h +SYMLINK-$(CONFIG_RTE_LIBRTE_SCHED)-include += rte_sched_common.h rte_red.h +SYMLINK-$(CONFIG_RTE_LIBRTE_SCHED)-include += rte_approx.h include $(RTE_SDK)/mk/rte.lib.mk diff --git a/lib/librte_sched/rte_sched.c b/lib/librte_sched/rte_sched.c index 3b8ccaa..7bb6d51 100644 --- a/lib/librte_sched/rte_sched.c +++ b/lib/librte_sched/rte_sched.c @@ -228,7 +228,7 @@ struct rte_sched_port { uint64_t time_cpu_cycles; /* Current CPU time measured in CPU cyles */ uint64_t time_cpu_bytes; /* Current CPU time measured in bytes */ uint64_t time; /* Current NIC TX time measured in bytes */ - struct rte_reciprocal inv_cycles_per_byte; /* CPU cycles per byte */ + struct rte_reciprocal_u32 inv_cycles_per_byte; /* CPU cycles per byte */ /* Scheduling loop detection */ uint32_t pipe_loop; @@ -677,7 +677,7 @@ rte_sched_port_config(struct rte_sched_port_params *params) cycles_per_byte = (rte_get_tsc_hz() << RTE_SCHED_TIME_SHIFT) / params->rate; - port->inv_cycles_per_byte = rte_reciprocal_value(cycles_per_byte); + port->inv_cycles_per_byte = rte_reciprocal_value_u32(cycles_per_byte); /* Scheduling loop detection */ port->pipe_loop = RTE_SCHED_PIPE_INVALID; @@ -2147,8 +2147,9 @@ rte_sched_port_time_resync(struct rte_sched_port *port) uint64_t bytes_diff; /* Compute elapsed time in bytes */ - bytes_diff = rte_reciprocal_divide(cycles_diff << RTE_SCHED_TIME_SHIFT, - port->inv_cycles_per_byte); + bytes_diff = rte_reciprocal_divide_u32( + cycles_diff << RTE_SCHED_TIME_SHIFT, + &port->inv_cycles_per_byte); /* Advance port time */ port->time_cpu_cycles = cycles; -- 2.7.4