date:20220514

[PATCH v7] eal/ppc: fix compilation for musl

2022-05-14 Thread Duncan Bellamy

musl lacks __ppc_get_timebase() but has __builtin_ppc_get_timebase()

Signed-off-by: Duncan Bellamy 
---
 lib/eal/ppc/include/rte_cycles.h |  7 ++
 lib/eal/ppc/rte_cycles.c | 39 
 2 files changed, 46 insertions(+)

diff --git a/lib/eal/ppc/include/rte_cycles.h b/lib/eal/ppc/include/rte_cycles.h
index 5585f9273c..666fc9b0bf 100644
--- a/lib/eal/ppc/include/rte_cycles.h
+++ b/lib/eal/ppc/include/rte_cycles.h
@@ -10,7 +10,10 @@
 extern "C" {
 #endif
 
+#include 
+#ifdef __GLIBC__
 #include 
+#endif
 
 #include "generic/rte_cycles.h"
 
@@ -26,7 +29,11 @@ extern "C" {
 static inline uint64_t
 rte_rdtsc(void)
 {
+#ifdef __GLIBC__
return __ppc_get_timebase();
+#else
+   return __builtin_ppc_get_timebase();
+#endif
 }
 
 static inline uint64_t
diff --git a/lib/eal/ppc/rte_cycles.c b/lib/eal/ppc/rte_cycles.c
index 3180adb0ff..cd4bdff8b8 100644
--- a/lib/eal/ppc/rte_cycles.c
+++ b/lib/eal/ppc/rte_cycles.c
@@ -2,12 +2,51 @@
  * Copyright (C) IBM Corporation 2019.
  */
 
+#include 
+#ifdef __GLIBC__
 #include 
+#elif RTE_EXEC_ENV_LINUX
+#include 
+#include 
+#endif
 
 #include "eal_private.h"
 
 uint64_t
 get_tsc_freq_arch(void)
 {
+#ifdef __GLIBC__
return __ppc_get_timebase_freq();
+#elif RTE_EXEC_ENV_LINUX
+   static unsigned long base;
+   char buf[512];
+   ssize_t nr;
+   FILE *f;
+
+   if (base != 0)
+   goto out;
+
+   f = fopen("/proc/cpuinfo", "rb");
+   if (f == NULL)
+   goto out;
+
+   while (fgets(buf, sizeof(buf), f) != NULL) {
+   char *ret = strstr(buf, "timebase");
+
+   if (ret == NULL)
+   continue;
+   ret += sizeof("timebase") - 1;
+   ret = strchr(ret, ':');
+   if (ret == NULL)
+   continue;
+   base = strtoul(ret + 1, NULL, 10);
+   break;
+   }
+   fclose(f);
+out:
+   return (uint64_t) base;
+#else
+   return 0;
+#endif
+
 }
-- 
2.34.1

Re: [PATCH] event/dlb2: allow CQ depths up to 1024

2022-05-14 Thread Jerin Jacob

On Sat, Apr 9, 2022 at 8:43 PM Timothy McDaniel
 wrote:
>
> Updated to allow overriding the default CQ depth of 32.  Since there are
> only 2048 DLB history list entries, increasing the cq depth decreases
> the number of available ldb ports to 2048/max_cq_depth. Resource query
> will take this into account and return the correct maximum number of
> ldb ports.

Changed ldb to LDB

>
> Signed-off-by: Timothy McDaniel 

Applied to dpdk-next-net-eventdev/for-main. Thanks

> ---
>  drivers/event/dlb2/dlb2.c   | 57 ++---
>  drivers/event/dlb2/dlb2_priv.h  | 10 --
>  drivers/event/dlb2/pf/dlb2_pf.c |  3 +-
>  3 files changed, 62 insertions(+), 8 deletions(-)
>
> diff --git a/drivers/event/dlb2/dlb2.c b/drivers/event/dlb2/dlb2.c
> index 7789dd74e0..36f07d0061 100644
> --- a/drivers/event/dlb2/dlb2.c
> +++ b/drivers/event/dlb2/dlb2.c
> @@ -55,7 +55,7 @@ static struct rte_event_dev_info evdev_dlb2_default_info = {
> .max_event_queue_priority_levels = DLB2_QID_PRIORITIES,
> .max_event_priority_levels = DLB2_QID_PRIORITIES,
> .max_event_ports = DLB2_MAX_NUM_LDB_PORTS,
> -   .max_event_port_dequeue_depth = DLB2_MAX_CQ_DEPTH,
> +   .max_event_port_dequeue_depth = DLB2_DEFAULT_CQ_DEPTH,
> .max_event_port_enqueue_depth = DLB2_MAX_ENQUEUE_DEPTH,
> .max_event_port_links = DLB2_MAX_NUM_QIDS_PER_LDB_CQ,
> .max_num_events = DLB2_MAX_NUM_LDB_CREDITS,
> @@ -111,6 +111,7 @@ dlb2_hw_query_resources(struct dlb2_eventdev *dlb2)
>  {
> struct dlb2_hw_dev *handle = &dlb2->qm_instance;
> struct dlb2_hw_resource_info *dlb2_info = &handle->info;
> +   int num_ldb_ports;
> int ret;
>
> /* Query driver resources provisioned for this device */
> @@ -127,11 +128,15 @@ dlb2_hw_query_resources(struct dlb2_eventdev *dlb2)
>  * The capabilities (CAPs) were set at compile time.
>  */
>
> +   if (dlb2->max_cq_depth != DLB2_DEFAULT_CQ_DEPTH)
> +   num_ldb_ports = DLB2_MAX_HL_ENTRIES / dlb2->max_cq_depth;
> +   else
> +   num_ldb_ports = dlb2->hw_rsrc_query_results.num_ldb_ports;
> +
> evdev_dlb2_default_info.max_event_queues =
> dlb2->hw_rsrc_query_results.num_ldb_queues;
>
> -   evdev_dlb2_default_info.max_event_ports =
> -   dlb2->hw_rsrc_query_results.num_ldb_ports;
> +   evdev_dlb2_default_info.max_event_ports = num_ldb_ports;
>
> if (dlb2->version == DLB2_HW_V2_5) {
> evdev_dlb2_default_info.max_num_events =
> @@ -159,8 +164,7 @@ dlb2_hw_query_resources(struct dlb2_eventdev *dlb2)
> handle->info.hw_rsrc_max.num_ldb_queues =
> dlb2->hw_rsrc_query_results.num_ldb_queues;
>
> -   handle->info.hw_rsrc_max.num_ldb_ports =
> -   dlb2->hw_rsrc_query_results.num_ldb_ports;
> +   handle->info.hw_rsrc_max.num_ldb_ports = num_ldb_ports;
>
> handle->info.hw_rsrc_max.num_dir_ports =
> dlb2->hw_rsrc_query_results.num_dir_ports;
> @@ -212,6 +216,36 @@ set_numa_node(const char *key __rte_unused, const char 
> *value, void *opaque)
> return 0;
>  }
>
> +
> +static int
> +set_max_cq_depth(const char *key __rte_unused,
> +const char *value,
> +void *opaque)
> +{
> +   int *max_cq_depth = opaque;
> +   int ret;
> +
> +   if (value == NULL || opaque == NULL) {
> +   DLB2_LOG_ERR("NULL pointer\n");
> +   return -EINVAL;
> +   }
> +
> +   ret = dlb2_string_to_int(max_cq_depth, value);
> +   if (ret < 0)
> +   return ret;
> +
> +   if (*max_cq_depth < DLB2_MIN_CQ_DEPTH_OVERRIDE ||
> +   *max_cq_depth > DLB2_MAX_CQ_DEPTH_OVERRIDE ||
> +   !rte_is_power_of_2(*max_cq_depth)) {
> +   DLB2_LOG_ERR("dlb2: max_cq_depth %d and %d and a power of 
> 2\n",
> +DLB2_MIN_CQ_DEPTH_OVERRIDE,
> +DLB2_MAX_CQ_DEPTH_OVERRIDE);
> +   return -EINVAL;
> +   }
> +
> +   return 0;
> +}
> +
>  static int
>  set_max_num_events(const char *key __rte_unused,
>const char *value,
> @@ -4504,6 +4538,7 @@ dlb2_primary_eventdev_probe(struct rte_eventdev *dev,
> dlb2->hw_credit_quanta = dlb2_args->hw_credit_quanta;
> dlb2->default_depth_thresh = dlb2_args->default_depth_thresh;
> dlb2->vector_opts_enabled = dlb2_args->vector_opts_enabled;
> +   dlb2->max_cq_depth = dlb2_args->max_cq_depth;
>
> err = dlb2_iface_open(&dlb2->qm_instance, name);
> if (err < 0) {
> @@ -4609,6 +4644,7 @@ dlb2_parse_params(const char *params,
>  DLB2_HW_CREDIT_QUANTA_ARG,
>  DLB2_DEPTH_THRESH_ARG,
>  DLB2_VECTOR_OPTS_ENAB_ARG,
> +DLB2_MAX_CQ_DEPTH,
>

Re: [PATCH] event/dlb2: add support for single 512B write of 4 QEs

2022-05-14 Thread Jerin Jacob

On Sat, Apr 9, 2022 at 8:48 PM Timothy McDaniel
 wrote:
>
> On Xeon, as 512b accesses are available, movdir64 instruction is able to
> perform 512b read and write to DLB producer port. In order for movdir64
> to be able to pull its data from store buffers (store-buffer-forwarding)
> (before actual write), data should be in single 512b write format.
> This commit add change when code is built for Xeon with 512b AVX support
> to make single 512b write of all 4 QEs instead of 4x64b writes.
>
> Signed-off-by: Timothy McDaniel 
> ---
>  drivers/event/dlb2/dlb2.c | 86 ++-
>  1 file changed, 67 insertions(+), 19 deletions(-)
>
> diff --git a/drivers/event/dlb2/dlb2.c b/drivers/event/dlb2/dlb2.c
> index 36f07d0061..e2a5303310 100644
> --- a/drivers/event/dlb2/dlb2.c
> +++ b/drivers/event/dlb2/dlb2.c
> @@ -2776,25 +2776,73 @@ dlb2_event_build_hcws(struct dlb2_port *qm_port,
> ev[3].event_type,
>  DLB2_QE_EV_TYPE_WORD + 4);
>
> -   /* Store the metadata to memory (use the double-precision
> -* _mm_storeh_pd because there is no integer function for
> -* storing the upper 64b):
> -* qe[0] metadata = sse_qe[0][63:0]
> -* qe[1] metadata = sse_qe[0][127:64]
> -* qe[2] metadata = sse_qe[1][63:0]
> -* qe[3] metadata = sse_qe[1][127:64]
> -*/
> -   _mm_storel_epi64((__m128i *)&qe[0].u.opaque_data, sse_qe[0]);
> -   _mm_storeh_pd((double *)&qe[1].u.opaque_data,
> - (__m128d)sse_qe[0]);
> -   _mm_storel_epi64((__m128i *)&qe[2].u.opaque_data, sse_qe[1]);
> -   _mm_storeh_pd((double *)&qe[3].u.opaque_data,
> - (__m128d)sse_qe[1]);
> -
> -   qe[0].data = ev[0].u64;
> -   qe[1].data = ev[1].u64;
> -   qe[2].data = ev[2].u64;
> -   qe[3].data = ev[3].u64;
> + #ifdef __AVX512VL__

+ x86 maintainers

We need a runtime check based on CPU flags. Right? As the build and
run machine can be different?

> +
> +   /*
> +* 1) Build avx512 QE store and build each
> +*QE individually as XMM register
> +* 2) Merge the 4 XMM registers/QEs into single AVX512
> +*register
> +* 3) Store single avx512 register to &qe[0] (4x QEs
> +*stored in 1x store)
> +*/
> +
> +   __m128i v_qe0 = _mm_setzero_si128();
> +   uint64_t meta = _mm_extract_epi64(sse_qe[0], 0);
> +   v_qe0 = _mm_insert_epi64(v_qe0, ev[0].u64, 0);
> +   v_qe0 = _mm_insert_epi64(v_qe0, meta, 1);
> +
> +   __m128i v_qe1 = _mm_setzero_si128();
> +   meta = _mm_extract_epi64(sse_qe[0], 1);
> +   v_qe1 = _mm_insert_epi64(v_qe1, ev[1].u64, 0);
> +   v_qe1 = _mm_insert_epi64(v_qe1, meta, 1);
> +
> +   __m128i v_qe2 = _mm_setzero_si128();
> +   meta = _mm_extract_epi64(sse_qe[1], 0);
> +   v_qe2 = _mm_insert_epi64(v_qe2, ev[2].u64, 0);
> +   v_qe2 = _mm_insert_epi64(v_qe2, meta, 1);
> +
> +   __m128i v_qe3 = _mm_setzero_si128();
> +   meta = _mm_extract_epi64(sse_qe[1], 1);
> +   v_qe3 = _mm_insert_epi64(v_qe3, ev[3].u64, 0);
> +   v_qe3 = _mm_insert_epi64(v_qe3, meta, 1);
> +
> +   /* we have 4x XMM registers, one per QE. */
> +   __m512i v_all_qes = _mm512_setzero_si512();
> +   v_all_qes = _mm512_inserti32x4(v_all_qes, v_qe0, 0);
> +   v_all_qes = _mm512_inserti32x4(v_all_qes, v_qe1, 1);
> +   v_all_qes = _mm512_inserti32x4(v_all_qes, v_qe2, 2);
> +   v_all_qes = _mm512_inserti32x4(v_all_qes, v_qe3, 3);
> +
> +   /*
> +* store the 4x QEs in a single register to the 
> scratch
> +* space of the PMD
> +*/
> +   _mm512_store_si512(&qe[0], v_all_qes);
> +#else
> +   /*
> +* Store the metadata to memory (use the 
> double-precision
> +* _mm_storeh_pd because there is no integer function 
> for
> +* storing the upper 64b):
> +* qe[0] metadata = sse_qe[0][63:0]
> +* qe[1] metadata = sse_qe[0][127:64]
> +* qe[2] metadata = sse_qe[1][63:0]
> +* qe[3] metadata = sse_qe[1][127:64]
> +

Re: [PATCH 1/1] common/cnxk: added new macros to platform layer

2022-05-14 Thread Jerin Jacob

On Tue, Apr 12, 2022 at 11:12 PM Srikanth Yalavarthi
 wrote:
>
> Added new macros for pointer operations, bitwise operations,
> spinlocks and 32 bit read and write.
>
> Signed-off-by: Srikanth Yalavarthi 
> ---
>  drivers/common/cnxk/roc_bits.h | 12 
>  drivers/common/cnxk/roc_platform.h | 28 +++-
>  2 files changed, 31 insertions(+), 9 deletions(-)
>
> diff --git a/drivers/common/cnxk/roc_bits.h b/drivers/common/cnxk/roc_bits.h
> index 11216d9d63..ce3dffa08d 100644
> --- a/drivers/common/cnxk/roc_bits.h
> +++ b/drivers/common/cnxk/roc_bits.h
> @@ -29,4 +29,16 @@
>  (~0ULL >> (BITS_PER_LONG_LONG - 1 - (h
>  #endif
>
> +#ifndef IS_BIT_SET
> +#define IS_BIT_SET(num, n) ((num) & (1 << (n)))
> +#endif
> +
> +#ifndef SET_BIT
> +#define SET_BIT(num, n) ((num) | (1 << (n)))
> +#endif
> +
> +#ifndef CLEAR_BIT
> +#define CLEAR_BIT(num, n) ((num) &= ~((1) << (n)))
> +#endif


lib/eal/include/rte_bitops.h has similar ops already, Please use those
schemes now.
ie.
#define plt_bit_relaxed_get32 rte_bit_relaxed_get32


>  #endif /* _ROC_BITS_H_ */
> diff --git a/drivers/common/cnxk/roc_platform.h 
> b/drivers/common/cnxk/roc_platform.h
> index 28004b1743..3671e55c23 100644
> --- a/drivers/common/cnxk/roc_platform.h
> +++ b/drivers/common/cnxk/roc_platform.h
> @@ -41,6 +41,7 @@
>  #define PLT_MEMZONE_NAMESIZERTE_MEMZONE_NAMESIZE
>  #define PLT_STD_C11 RTE_STD_C11
>  #define PLT_PTR_ADD RTE_PTR_ADD
> +#define PLT_PTR_SUB RTE_PTR_SUB
>  #define PLT_PTR_DIFFRTE_PTR_DIFF
>  #define PLT_MAX_RXTX_INTR_VEC_ID RTE_MAX_RXTX_INTR_VEC_ID
>  #define PLT_INTR_VEC_RXTX_OFFSET RTE_INTR_VEC_RXTX_OFFSET
> @@ -70,12 +71,16 @@
>  #define PLT_U32_CAST(val) ((uint32_t)(val))
>  #define PLT_U16_CAST(val) ((uint16_t)(val))
>
> +/* Add / Sub pointer with scalar and cast to uint64_t */
> +#define PLT_PTR_ADD_U64_CAST(__ptr, __x) PLT_U64_CAST(PLT_PTR_ADD(__ptr, 
> __x))
> +#define PLT_PTR_SUB_U64_CAST(__ptr, __x) PLT_U64_CAST(PLT_PTR_SUB(__ptr, 
> __x))
> +
>  /** Divide ceil */
> -#define PLT_DIV_CEIL(x, y) \
> -   ({  \
> -   __typeof(x) __x = x;\
> -   __typeof(y) __y = y;\
> -   (__x + __y - 1) / __y;  \
> +#define PLT_DIV_CEIL(x, y)   
>   \
> +   ({
>  \
> +   __typeof(x) __x = x;  
>  \
> +   __typeof(y) __y = y;  
>  \
> +   (__x + __y - 1) / __y;
>  \
> })

Please remove formatting changes.


>
>  #define __plt_cache_aligned __rte_cache_aligned
> @@ -113,10 +118,11 @@
>  #define plt_bitmap_scanrte_bitmap_scan
>  #define plt_bitmap_get_memory_footprint rte_bitmap_get_memory_footprint
>
> -#define plt_spinlock_t rte_spinlock_t
> -#define plt_spinlock_init   rte_spinlock_init
> -#define plt_spinlock_lock   rte_spinlock_lock
> -#define plt_spinlock_unlock rte_spinlock_unlock
> +#define plt_spinlock_t  rte_spinlock_t
> +#define plt_spinlock_initrte_spinlock_init
> +#define plt_spinlock_lockrte_spinlock_lock
> +#define plt_spinlock_unlock  rte_spinlock_unlock
> +#define plt_spinlock_trylock rte_spinlock_trylock
>
>  #define plt_intr_callback_register   rte_intr_callback_register
>  #define plt_intr_callback_unregister rte_intr_callback_unregister
> @@ -165,6 +171,10 @@
>  #define plt_write64(val, addr)   
>   \
> rte_write64_relaxed((val), (volatile void *)(addr))
>
> +#define plt_read32(addr) rte_read32_relaxed((volatile void *)(addr))
> +#define plt_write32(val, addr)   
>   \
> +   rte_write32_relaxed((val), (volatile void *)(addr))
> +
>  #define plt_wmb()  rte_wmb()
>  #define plt_rmb()  rte_rmb()
>  #define plt_io_wmb()   rte_io_wmb()
> --
> 2.17.1
>

Re: [PATCH] event/cnxk: fix out of bounds access

2022-05-14 Thread Jerin Jacob

On Sun, Apr 24, 2022 at 9:55 PM Gowrishankar Muthukrishnan
 wrote:
>
> Fix out of bounds array access reporrted in coverity scan.

Fixed reported  typo

>
> Coverity issue: 375817
> Fixes: 2351506401e ("event/cnxk: add SSO selftest and dump")
>
> Signed-off-by: Gowrishankar Muthukrishnan 

Acked-by: Jerin Jacob 

Applied to dpdk-next-net-eventdev/for-main. Thanks

> ---
>  drivers/event/cnxk/cnxk_eventdev_selftest.c | 12 
>  1 file changed, 12 insertions(+)
>
> diff --git a/drivers/event/cnxk/cnxk_eventdev_selftest.c 
> b/drivers/event/cnxk/cnxk_eventdev_selftest.c
> index 2fe6467f88..3aa6f081a7 100644
> --- a/drivers/event/cnxk/cnxk_eventdev_selftest.c
> +++ b/drivers/event/cnxk/cnxk_eventdev_selftest.c
> @@ -626,6 +626,12 @@ launch_workers_and_wait(int (*main_thread)(void *),
> /* start core */ -1,
> /* skip main */ 1,
> /* wrap */ 0);
> +   if (w_lcore == RTE_MAX_LCORE) {
> +   plt_err("Failed to get next available lcore");
> +   free(param);
> +   return -1;
> +   }
> +
> rte_eal_remote_launch(main_thread, ¶m[0], w_lcore);
>
> for (port = 1; port < nb_workers; port++) {
> @@ -635,6 +641,12 @@ launch_workers_and_wait(int (*main_thread)(void *),
> param[port].dequeue_tmo_ticks = dequeue_tmo_ticks;
> rte_atomic_thread_fence(__ATOMIC_RELEASE);
> w_lcore = rte_get_next_lcore(w_lcore, 1, 0);
> +   if (w_lcore == RTE_MAX_LCORE) {
> +   plt_err("Failed to get next available lcore");
> +   free(param);
> +   return -1;
> +   }
> +
> rte_eal_remote_launch(worker_thread, ¶m[port], w_lcore);
> }
>
> --
> 2.25.1
>

Re: [PATCH] libpcapng: fix timestamp wrapping in output files

2022-05-14 Thread Quentin Armitage

On Wed, 2022-05-11 at 09:46 -0700, Stephen Hemminger wrote:
> On Sat,  7 May 2022 17:12:36 +0100
> Quentin Armitage  wrote:
> 
> > In pcap_tsc_to_ns(), delta * NSEC_PER_SEC will overflow approx 8
> > seconds after pcap_init is called when using a TSC with a frequency
> > of 2.5GHz.
> > 
> > To avoid the overflow, reread the time and TSC once
> > delta * NSEC_PER_SEC > (1 << 63). In order to ensure that there
> > is no overflow if there is a several second gap between calls to
> > pcapng_tsc_to_ns() the actual check to reread the clock is:
> >   delta > ((1ULL << 63) / NSEC_PER_SEC)
> > 
> > Fixes: 8d23ce8f5ee ("pcapng: add new library for writing pcapng files")
> > Cc: sta...@dpdk.org
> > 
> > Signed-off-by: Quentin Armitage 
> 
> What about something like this instead.
> 
> diff --git a/lib/pcapng/rte_pcapng.c b/lib/pcapng/rte_pcapng.c
> index 90b2f5bc6905..c5534301bf2c 100644
> --- a/lib/pcapng/rte_pcapng.c
> +++ b/lib/pcapng/rte_pcapng.c
> @@ -19,6 +19,7 @@
>  #include 
>  #include 
>  #include 
> +#include 
>  #include 
>  
>  #include "pcapng_proto.h"
> @@ -34,27 +35,39 @@ struct rte_pcapng {
>  };
>  
>  /* For converting TSC cycles to PCAPNG ns format */
> -struct pcapng_time {
> +#define TICK_SCALE 16u
> +static struct {
> uint64_t ns;
> uint64_t cycles;
> +   struct rte_reciprocal_u64 inverse;
>  } pcapng_time;
>  
>  RTE_INIT(pcapng_init)
>  {
> struct timespec ts;
> +   uint64_t scale_tick_per_ns;
>  
> pcapng_time.cycles = rte_get_tsc_cycles();
> clock_gettime(CLOCK_REALTIME, &ts);
> pcapng_time.ns = rte_timespec_to_ns(&ts);
> +
> +   scale_tick_per_ns = (rte_get_tsc_hz() * TICK_SCALE) / NSEC_PER_SEC;
> +   pcapng_time.inverse = rte_reciprocal_value_u64(scale_tick_per_ns);
>  }
>  
>  /* PCAPNG timestamps are in nanoseconds */
>  static uint64_t pcapng_tsc_to_ns(uint64_t cycles)
>  {
> -   uint64_t delta;
> +   uint64_t delta, elapsed;
>  
> delta = cycles - pcapng_time.cycles;
> -   return pcapng_time.ns + (delta * NSEC_PER_SEC) / rte_get_tsc_hz();
> +
> +   /* Compute elapsed time in nanoseconds scaled by TICK_SCALE
> +    * since the start of the capture.
> +    * With scale of 4 this will roll over in 36 years.
> +    */
> +   elapsed = rte_reciprocal_divide_u64(delta, &pcapng_time.inverse);
> +   return pcapng_time.ns + elapsed / TICK_SCALE;
>  }
>  
>  /* length of option including padding */
> 

The final statement of pcapng_tsc_to_ns() should be:
return pcapng_time.ns + elapsed * TICK_SCALE;

There is also a problem that rte_get_tsc_hz() returns eal_tsc_resolution_hz, but
this is not initialized until rte_eal_init() is called, so rte_get_tsc_hz()
cannot be called from a constructor function.

While both of the above problems can easily be solved, I think there is a
problem with accuracy with this approach. With a 3GHz clock, scale_tick_per_ns
would be 48. For other clock speeds there can be a truncation in the
calculation. With a 3.3GHz clock, scale_tick_per_ns will be truncated from 52.8
to 52, resulting in a 1.5% or so error in the time returned by
pcapng_tsc_to_ns() (a 2.3GHz clock results in a 2.2% error). Increasing
TICK_SCALE reduces the %age error, but also reduces the time before overflow
occurs.

If the approach in the following patch is considered to be acceptable, I would
be very happy to submit an updated patch. The one concern I have about the patch
is introducing a new constructor priority, RTE_PRIORITY_TIMER, which may be
considered to be inappropriate. If it is inappropriate, then the simplest
alternative would be to introduce a new function rte_tsc_get_hz_init() which
calls set_tsc_freq() if eal_tsc_resolution_hz has not been initialized
(alternatively rte_get_tsc_hz() could be modified to make the check, but that
then produces an overhead every time the function is called).

diff --git a/lib/eal/common/eal_common_timer.c
b/lib/eal/common/eal_common_timer.c
index 5686a5102b..cb3fa1e240 100644
--- a/lib/eal/common/eal_common_timer.c
+++ b/lib/eal/common/eal_common_timer.c
@@ -54,6 +54,9 @@ set_tsc_freq(void)
struct rte_mem_config *mcfg = rte_eal_get_configuration()->mem_config;
uint64_t freq;
 
+   if (eal_tsc_resolution_hz)
+   return;
+
if (rte_eal_process_type() == RTE_PROC_SECONDARY) {
/*
 * Just use the primary process calculated TSC rate in any
@@ -86,3 +89,8 @@ RTE_INIT(rte_timer_init)
/* set rte_delay_us_block as a delay function */
rte_delay_us_callback_register(rte_delay_us_block);
 }
+
+RTE_INIT_PRIO(rte_tsc_init, TIMER)
+{
+   set_tsc_freq();
+}
diff --git a/lib/eal/include/rte_common.h b/lib/eal/include/rte_common.h
index 67587025ab..a0d64ff4f2 100644
--- a/lib/eal/include/rte_common.h
+++ b/lib/eal/include/rte_common.h
@@ -161,6 +161,7 @@ typedef uint16_t unaligned_uint16_t;
 
 #define RTE_PRIORITY_LOG 101
 #define RTE_PRIORITY_BUS 110
+#define RTE_PRIO

[PATCH v7] eal/ppc: fix compilation for musl

Re: [PATCH] event/dlb2: allow CQ depths up to 1024

Re: [PATCH] event/dlb2: add support for single 512B write of 4 QEs

Re: [PATCH 1/1] common/cnxk: added new macros to platform layer

Re: [PATCH] event/cnxk: fix out of bounds access

Re: [PATCH] libpcapng: fix timestamp wrapping in output files

6 matches

Site Navigation

Mail list logo

Footer information