On 09/06/2017 03:41 PM, Pavan Nikhilesh Bhagavatula wrote:
> On Wed, Sep 06, 2017 at 01:28:24PM +0100, Kevin Traynor wrote:
>> On 09/06/2017 11:21 AM, Pavan Nikhilesh wrote:
>>> From: Pavan Bhagavatula <pbhagavat...@caviumnetworks.com>
>>>
>>> Currently, rte_reciprocal only supports unsigned 32bit divisors. This
>>> commit adds support for unsigned 64bit divisors.
>>>
>>> Rename unsigned 32bit specific functions appropriately and update
>>> librte_sched accordingly.
>>>
>>> Signed-off-by: Pavan Nikhilesh <pbhagavat...@caviumnetworks.com>
>>> ---
>>>  lib/librte_eal/bsdapp/eal/rte_eal_version.map   |   3 +-
>>>  lib/librte_eal/common/include/rte_reciprocal.h  | 109 
>>> ++++++++++++++++++++--
>>>  lib/librte_eal/common/rte_reciprocal.c          | 116 
>>> +++++++++++++++++++++---
>>>  lib/librte_eal/linuxapp/eal/rte_eal_version.map |   3 +-
>>>  lib/librte_sched/Makefile                       |   4 +-
>>>  lib/librte_sched/rte_sched.c                    |   9 +-
>>>  6 files changed, 219 insertions(+), 25 deletions(-)
>>>
>>> diff --git a/lib/librte_eal/bsdapp/eal/rte_eal_version.map 
>>> b/lib/librte_eal/bsdapp/eal/rte_eal_version.map
>>> index 90d7258..59a85bb 100644
>>> --- a/lib/librte_eal/bsdapp/eal/rte_eal_version.map
>>> +++ b/lib/librte_eal/bsdapp/eal/rte_eal_version.map
>>> @@ -241,6 +241,7 @@ EXPERIMENTAL {
>>>  DPDK_17.11 {
>>>     global:
>>>
>>> -   rte_reciprocal_value;
>>> +   rte_reciprocal_value_u32;
>>> +   rte_reciprocal_value_u64;
>>>
>>>  } DPDK_17.08;
>>> diff --git a/lib/librte_eal/common/include/rte_reciprocal.h 
>>> b/lib/librte_eal/common/include/rte_reciprocal.h
>>> index b6d752f..85599e6 100644
>>> --- a/lib/librte_eal/common/include/rte_reciprocal.h
>>> +++ b/lib/librte_eal/common/include/rte_reciprocal.h
>>
>> Hi Pavan, sorry for commenting late but the license in v1 of this file
>> states it cannot be removed. It is not included in later versions - can
>> you explain why?
>>
> Hi Kevin,
> 
> I have misinterpreted this mail
> http://dpdk.org/ml/archives/dev/2017-August/073781.html,
> any suggestion on how to proceed on this further?
> 
> Thanks,
> Pavan
> 

no, not really :( maybe someone else has a suggestion.

The DPDK charter [1] (6.IP Policy) lists the incoming licenses that are
accepted. There is a note about getting governing board approval for an
exception, but I don't think it's applicable to a relatively small
optimization like this.

Kevin.

[1] http://dpdk.org/about/charter

>> +/*
>> + * libdivide
>> + * Copyright (C) 2010 ridiculous_fish
>> + * This software is provided 'as-is', without any express or implied
>> + * warranty.  In no event will the authors be held liable for any damages
>> + * arising from the use of this software.
>> + * Permission is granted to anyone to use this software for any purpose,
>> + * including commercial applications, and to alter it and redistribute it
>> + * freely, subject to the following restrictions:
>> + *
>> + * 1. The origin of this software must not be misrepresented; you must not
>> + *    claim that you wrote the original software. If you use this software
>> + *    in a product, an acknowledgment in the product documentation would be
>> + *    appreciated but is not required.
>> + *
>> + * 2. Altered source versions must be plainly marked as such, and must
>> not be
>> + *    misrepresented as being the original software.
>> + *
>> + * 3. This notice may not be removed or altered from any source
>> distribution.
>> + *
>> + * libdiv...@ridiculousfish.com
>> + *
>> + */
>> +
>>
>>
>>
>>> @@ -22,22 +22,117 @@
>>>  #ifndef _RTE_RECIPROCAL_H_
>>>  #define _RTE_RECIPROCAL_H_
>>>
>>> -#include <stdint.h>
>>> +#include <rte_memory.h>
>>>
>>> -struct rte_reciprocal {
>>> +/**
>>> + * Unsigned 32-bit divisor structure.
>>> + */
>>> +struct rte_reciprocal_u32 {
>>>     uint32_t m;
>>>     uint8_t sh1, sh2;
>>>  };
>>>
>>> +/**
>>> + * Unsigned 64-bit divisor structure.
>>> + */
>>> +struct rte_reciprocal_u64 {
>>> +   uint64_t m;
>>> +   uint8_t sh1;
>>> +};
>>> +
>>> +/**
>>> + * Divide given unsigned 32-bit integer with pre calculated divisor.
>>> + *
>>> + * @param a
>>> + *   The 32-bit dividend.
>>> + * @param R
>>> + *   The pointer to pre calculated divisor reciprocal structure.
>>> + *
>>> + * @return
>>> + *   The result of the division
>>> + */
>>>  static inline uint32_t
>>> -rte_reciprocal_divide(uint32_t a, struct rte_reciprocal R)
>>> +rte_reciprocal_divide_u32(uint32_t a, struct rte_reciprocal_u32 *R)
>>>  {
>>> -   uint32_t t = (uint32_t)(((uint64_t)a * R.m) >> 32);
>>> +   uint32_t t = (((uint64_t)a * R->m) >> 32);
>>>
>>> -   return (t + ((a - t) >> R.sh1)) >> R.sh2;
>>> +   return (t + ((a - t) >> R->sh1)) >> R->sh2;
>>>  }
>>>
>>> -struct rte_reciprocal
>>> -rte_reciprocal_value(uint32_t d);
>>> +static inline uint64_t
>>> +mullhi_u64(uint64_t x, uint64_t y)
>>> +{
>>> +#ifdef __SIZEOF_INT128__
>>> +   __uint128_t xl = x;
>>> +   __uint128_t rl = xl * y;
>>> +
>>> +   return (rl >> 64);
>>> +#else
>>> +   uint64_t u0, u1, v0, v1, k, t;
>>> +   uint64_t w1, w2;
>>> +   uint64_t whi;
>>> +
>>> +   u1 = x >> 32; u0 = x & 0xFFFFFFFF;
>>> +   v1 = y >> 32; v0 = y & 0xFFFFFFFF;
>>> +
>>> +   t = u0*v0;
>>> +   k = t >> 32;
>>> +
>>> +   t = u1*v0 + k;
>>> +   w1 = t & 0xFFFFFFFF;
>>> +   w2 = t >> 32;
>>> +
>>> +   t = u0*v1 + w1;
>>> +   k = t >> 32;
>>> +
>>> +   whi = u1*v1 + w2 + k;
>>> +
>>> +   return whi;
>>> +#endif
>>> +}
>>> +
>>> +/**
>>> + * Divide given unsigned 64-bit integer with pre calculated divisor.
>>> + *
>>> + * @param a
>>> + *   The 64-bit dividend.
>>> + * @param R
>>> + *   The pointer to pre calculated divisor reciprocal structure.
>>> + *
>>> + * @return
>>> + *   The result of the division
>>> + */
>>> +static inline uint64_t
>>> +rte_reciprocal_divide_u64(uint64_t a, struct rte_reciprocal_u64 *R)
>>> +{
>>> +   uint64_t q = mullhi_u64(R->m, a);
>>> +   uint64_t t = ((a - q) >> 1) + q;
>>> +
>>> +   return t >> R->sh1;
>>> +}
>>> +
>>> +/**
>>> + * Generate pre calculated divisor structure.
>>> + *
>>> + * @param d
>>> + *   The unsigned 32-bit divisor.
>>> + *
>>> + * @return
>>> + *   Divisor structure.
>>> + */
>>> +struct rte_reciprocal_u32
>>> +rte_reciprocal_value_u32(uint32_t d);
>>> +
>>> +/**
>>> + * Generate pre calculated divisor structure.
>>> + *
>>> + * @param d
>>> + *   The unsigned 64-bit divisor.
>>> + *
>>> + * @return
>>> + *   Divisor structure.
>>> + */
>>> +struct rte_reciprocal_u64
>>> +rte_reciprocal_value_u64(uint64_t d);
>>>
>>>  #endif /* _RTE_RECIPROCAL_H_ */
>>> diff --git a/lib/librte_eal/common/rte_reciprocal.c 
>>> b/lib/librte_eal/common/rte_reciprocal.c
>>> index 7ab99b4..2024e62 100644
>>> --- a/lib/librte_eal/common/rte_reciprocal.c
>>> +++ b/lib/librte_eal/common/rte_reciprocal.c
>>> @@ -31,18 +31,13 @@
>>>   *   OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
>>>   */
>>>
>>> -#include <stdio.h>
>>> -#include <stdint.h>
>>> -
>>> -#include <rte_common.h>
>>> -
>>> -#include "rte_reciprocal.h"
>>> +#include <rte_reciprocal.h>
>>>
>>>  /* find largest set bit.
>>>   * portable and slow but does not matter for this usage.
>>>   */
>>>  static inline int
>>> -fls(uint32_t x)
>>> +fls_u32(uint32_t x)
>>>  {
>>>     int b;
>>>
>>> @@ -54,14 +49,14 @@ fls(uint32_t x)
>>>     return 0;
>>>  }
>>>
>>> -struct rte_reciprocal
>>> -rte_reciprocal_value(uint32_t d)
>>> +struct rte_reciprocal_u32
>>> +rte_reciprocal_value_u32(uint32_t d)
>>>  {
>>> -   struct rte_reciprocal R;
>>> +   struct rte_reciprocal_u32 R;
>>>     uint64_t m;
>>>     int l;
>>>
>>> -   l = fls(d - 1);
>>> +   l = fls_u32(d - 1);
>>>     m = ((1ULL << 32) * ((1ULL << l) - d));
>>>     m /= d;
>>>
>>> @@ -72,3 +67,102 @@ rte_reciprocal_value(uint32_t d)
>>>
>>>     return R;
>>>  }
>>> +
>>> +/* Code taken from Hacker's Delight:
>>> + * http://www.hackersdelight.org/HDcode/divlu.c.
>>> + * License permits inclusion here per:
>>> + * http://www.hackersdelight.org/permissions.htm
>>> + */
>>> +static inline uint64_t
>>> +divide_128_div_64_to_64(uint64_t u1, uint64_t u0, uint64_t v, uint64_t *r)
>>> +{
>>> +   const uint64_t b = (1ULL << 32); /* Number base (16 bits). */
>>> +   uint64_t un1, un0,           /* Norm. dividend LSD's. */
>>> +                    vn1, vn0,           /* Norm. divisor digits. */
>>> +                    q1, q0,             /* Quotient digits. */
>>> +                    un64, un21, un10,   /* Dividend digit pairs. */
>>> +                    rhat;               /* A remainder. */
>>> +   int s;                       /* Shift amount for norm. */
>>> +
>>> +    /* If overflow, set rem. to an impossible value. */
>>> +   if (u1 >= v) {
>>> +           if (r != NULL)
>>> +                   *r = (uint64_t) -1;
>>> +           return (uint64_t) -1;
>>> +   }
>>> +
>>> +   /* Count leading zeros. */
>>> +   s = __builtin_clzll(v);
>>> +   if (s > 0) {
>>> +           v = v << s;
>>> +           un64 = (u1 << s) | ((u0 >> (64 - s)) & (-s >> 31));
>>> +           un10 = u0 << s;
>>> +   } else {
>>> +
>>> +           un64 = u1 | u0;
>>> +           un10 = u0;
>>> +   }
>>> +
>>> +   vn1 = v >> 32;
>>> +   vn0 = v & 0xFFFFFFFF;
>>> +
>>> +   un1 = un10 >> 32;
>>> +   un0 = un10 & 0xFFFFFFFF;
>>> +
>>> +   q1 = un64/vn1;
>>> +   rhat = un64 - q1*vn1;
>>> +again1:
>>> +   if (q1 >= b || q1*vn0 > b*rhat + un1) {
>>> +           q1 = q1 - 1;
>>> +           rhat = rhat + vn1;
>>> +           if (rhat < b)
>>> +                   goto again1;
>>> +   }
>>> +
>>> +   un21 = un64*b + un1 - q1*v;
>>> +
>>> +   q0 = un21/vn1;
>>> +   rhat = un21 - q0*vn1;
>>> +again2:
>>> +   if (q0 >= b || q0*vn0 > b*rhat + un0) {
>>> +           q0 = q0 - 1;
>>> +           rhat = rhat + vn1;
>>> +           if (rhat < b)
>>> +                   goto again2;
>>> +   }
>>> +
>>> +   if (r != NULL)
>>> +           *r = (un21*b + un0 - q0*v) >> s;
>>> +   return q1*b + q0;
>>> +}
>>> +
>>> +struct rte_reciprocal_u64
>>> +rte_reciprocal_value_u64(uint64_t d)
>>> +{
>>> +   struct rte_reciprocal_u64 R;
>>> +
>>> +   const uint32_t fld = 63 - __builtin_clzll(d);
>>> +
>>> +   if ((d & (d - 1)) == 0) {
>>> +           R.m = 0;
>>> +           R.sh1 = (fld - 1) | 0x40;
>>> +   } else {
>>> +           uint64_t rem;
>>> +           uint64_t multiplier;
>>> +           uint8_t more;
>>> +
>>> +           multiplier = divide_128_div_64_to_64(1ULL << fld, 0, d, &rem);
>>> +           multiplier += multiplier;
>>> +
>>> +           const uint64_t twice_rem = rem + rem;
>>> +           if (twice_rem >= d || twice_rem < rem)
>>> +                   multiplier += 1;
>>> +           more = fld;
>>> +           R.m = 1 + multiplier;
>>> +           R.sh1 = more | 0x40;
>>> +   }
>>> +
>>> +   R.sh1 &= 0x3F;
>>> +
>>> +   return R;
>>> +}
>>> diff --git a/lib/librte_eal/linuxapp/eal/rte_eal_version.map 
>>> b/lib/librte_eal/linuxapp/eal/rte_eal_version.map
>>> index 2070cba..2671627 100644
>>> --- a/lib/librte_eal/linuxapp/eal/rte_eal_version.map
>>> +++ b/lib/librte_eal/linuxapp/eal/rte_eal_version.map
>>> @@ -246,6 +246,7 @@ EXPERIMENTAL {
>>>  DPDK_17.11 {
>>>     global:
>>>
>>> -   rte_reciprocal_value;
>>> +   rte_reciprocal_value_u32;
>>> +   rte_reciprocal_value_u64;
>>>
>>>  } DPDK_17.08;
>>> diff --git a/lib/librte_sched/Makefile b/lib/librte_sched/Makefile
>>> index 569656b..a2fd6f3 100644
>>> --- a/lib/librte_sched/Makefile
>>> +++ b/lib/librte_sched/Makefile
>>> @@ -54,6 +54,8 @@ LIBABIVER := 1
>>>  SRCS-$(CONFIG_RTE_LIBRTE_SCHED) += rte_sched.c rte_red.c rte_approx.c
>>>
>>>  # install includes
>>> -SYMLINK-$(CONFIG_RTE_LIBRTE_SCHED)-include := rte_sched.h rte_bitmap.h 
>>> rte_sched_common.h rte_red.h rte_approx.h
>>> +SYMLINK-$(CONFIG_RTE_LIBRTE_SCHED)-include := rte_sched.h rte_bitmap.h
>>> +SYMLINK-$(CONFIG_RTE_LIBRTE_SCHED)-include += rte_sched_common.h rte_red.h
>>> +SYMLINK-$(CONFIG_RTE_LIBRTE_SCHED)-include += rte_approx.h
>>>
>>>  include $(RTE_SDK)/mk/rte.lib.mk
>>> diff --git a/lib/librte_sched/rte_sched.c b/lib/librte_sched/rte_sched.c
>>> index 3b8ccaa..7bb6d51 100644
>>> --- a/lib/librte_sched/rte_sched.c
>>> +++ b/lib/librte_sched/rte_sched.c
>>> @@ -228,7 +228,7 @@ struct rte_sched_port {
>>>     uint64_t time_cpu_cycles;     /* Current CPU time measured in CPU cyles 
>>> */
>>>     uint64_t time_cpu_bytes;      /* Current CPU time measured in bytes */
>>>     uint64_t time;                /* Current NIC TX time measured in bytes 
>>> */
>>> -   struct rte_reciprocal inv_cycles_per_byte; /* CPU cycles per byte */
>>> +   struct rte_reciprocal_u32 inv_cycles_per_byte; /* CPU cycles per byte */
>>>
>>>     /* Scheduling loop detection */
>>>     uint32_t pipe_loop;
>>> @@ -677,7 +677,7 @@ rte_sched_port_config(struct rte_sched_port_params 
>>> *params)
>>>
>>>     cycles_per_byte = (rte_get_tsc_hz() << RTE_SCHED_TIME_SHIFT)
>>>             / params->rate;
>>> -   port->inv_cycles_per_byte = rte_reciprocal_value(cycles_per_byte);
>>> +   port->inv_cycles_per_byte = rte_reciprocal_value_u32(cycles_per_byte);
>>>
>>>     /* Scheduling loop detection */
>>>     port->pipe_loop = RTE_SCHED_PIPE_INVALID;
>>> @@ -2147,8 +2147,9 @@ rte_sched_port_time_resync(struct rte_sched_port 
>>> *port)
>>>     uint64_t bytes_diff;
>>>
>>>     /* Compute elapsed time in bytes */
>>> -   bytes_diff = rte_reciprocal_divide(cycles_diff << RTE_SCHED_TIME_SHIFT,
>>> -                                      port->inv_cycles_per_byte);
>>> +   bytes_diff = rte_reciprocal_divide_u32(
>>> +                   cycles_diff << RTE_SCHED_TIME_SHIFT,
>>> +                   &port->inv_cycles_per_byte);
>>>
>>>     /* Advance port time */
>>>     port->time_cpu_cycles = cycles;
>>>
>>

Reply via email to