The computation of timestamp is more easily done in pdump
than pcapng. The initialization is easier and makes the pcapng
library have no global state.

It also makes it easier to add HW timestamp support later.

Simplify the computation of nanoseconds from TSC to a two
step process which avoids numeric overflow issues. The previous
code was not thread safe as well.

Fixes: c882eb544842 ("pcapng: fix timestamp wrapping in output files")
Signed-off-by: Stephen Hemminger <step...@networkplumber.org>
---
 lib/pcapng/rte_pcapng.c | 71 ++---------------------------------------
 lib/pcapng/rte_pcapng.h |  2 +-
 lib/pdump/rte_pdump.c   | 56 +++++++++++++++++++++++++++++---
 3 files changed, 55 insertions(+), 74 deletions(-)

diff --git a/lib/pcapng/rte_pcapng.c b/lib/pcapng/rte_pcapng.c
index ddce7bc87141..f6b3bd0ca718 100644
--- a/lib/pcapng/rte_pcapng.c
+++ b/lib/pcapng/rte_pcapng.c
@@ -25,7 +25,6 @@
 #include <rte_mbuf.h>
 #include <rte_os_shim.h>
 #include <rte_pcapng.h>
-#include <rte_reciprocal.h>
 #include <rte_time.h>
 
 #include "pcapng_proto.h"
@@ -43,15 +42,6 @@ struct rte_pcapng {
        uint32_t port_index[RTE_MAX_ETHPORTS];
 };
 
-/* For converting TSC cycles to PCAPNG ns format */
-static struct pcapng_time {
-       uint64_t ns;
-       uint64_t cycles;
-       uint64_t tsc_hz;
-       struct rte_reciprocal_u64 tsc_hz_inverse;
-} pcapng_time;
-
-
 #ifdef RTE_EXEC_ENV_WINDOWS
 /*
  * Windows does not have writev() call.
@@ -102,58 +92,6 @@ static ssize_t writev(int fd, const struct iovec *iov, int 
iovcnt)
 #define if_indextoname(ifindex, ifname) NULL
 #endif
 
-static inline void
-pcapng_init(void)
-{
-       struct timespec ts;
-
-       pcapng_time.cycles = rte_get_tsc_cycles();
-       clock_gettime(CLOCK_REALTIME, &ts);
-       pcapng_time.cycles = (pcapng_time.cycles + rte_get_tsc_cycles()) / 2;
-       pcapng_time.ns = rte_timespec_to_ns(&ts);
-
-       pcapng_time.tsc_hz = rte_get_tsc_hz();
-       pcapng_time.tsc_hz_inverse = 
rte_reciprocal_value_u64(pcapng_time.tsc_hz);
-}
-
-/* PCAPNG timestamps are in nanoseconds */
-static uint64_t pcapng_tsc_to_ns(uint64_t cycles)
-{
-       uint64_t delta, secs;
-
-       if (!pcapng_time.tsc_hz)
-               pcapng_init();
-
-       /* In essence the calculation is:
-        *   delta = (cycles - pcapng_time.cycles) * NSEC_PRE_SEC / 
rte_get_tsc_hz()
-        * but this overflows within 4 to 8 seconds depending on TSC frequency.
-        * Instead, if delta >= pcapng_time.tsc_hz:
-        *   Increase pcapng_time.ns and pcapng_time.cycles by the number of
-        *   whole seconds in delta and reduce delta accordingly.
-        * delta will therefore always lie in the interval [0, 
pcapng_time.tsc_hz),
-        * which will not overflow when multiplied by NSEC_PER_SEC provided the
-        * TSC frequency < approx 18.4GHz.
-        *
-        * Currently all TSCs operate below 5GHz.
-        */
-       delta = cycles - pcapng_time.cycles;
-       if (unlikely(delta >= pcapng_time.tsc_hz)) {
-               if (likely(delta < pcapng_time.tsc_hz * 2)) {
-                       delta -= pcapng_time.tsc_hz;
-                       pcapng_time.cycles += pcapng_time.tsc_hz;
-                       pcapng_time.ns += NSEC_PER_SEC;
-               } else {
-                       secs = rte_reciprocal_divide_u64(delta, 
&pcapng_time.tsc_hz_inverse);
-                       delta -= secs * pcapng_time.tsc_hz;
-                       pcapng_time.cycles += secs * pcapng_time.tsc_hz;
-                       pcapng_time.ns += secs * NSEC_PER_SEC;
-               }
-       }
-
-       return pcapng_time.ns + rte_reciprocal_divide_u64(delta * NSEC_PER_SEC,
-                                                         
&pcapng_time.tsc_hz_inverse);
-}
-
 /* length of option including padding */
 static uint16_t pcapng_optlen(uint16_t len)
 {
@@ -518,7 +456,7 @@ struct rte_mbuf *
 rte_pcapng_copy(uint16_t port_id, uint32_t queue,
                const struct rte_mbuf *md,
                struct rte_mempool *mp,
-               uint32_t length, uint64_t cycles,
+               uint32_t length, uint64_t timestamp,
                enum rte_pcapng_direction direction,
                const char *comment)
 {
@@ -527,14 +465,11 @@ rte_pcapng_copy(uint16_t port_id, uint32_t queue,
        struct pcapng_option *opt;
        uint16_t optlen;
        struct rte_mbuf *mc;
-       uint64_t ns;
        bool rss_hash;
 
 #ifdef RTE_LIBRTE_ETHDEV_DEBUG
        RTE_ETH_VALID_PORTID_OR_ERR_RET(port_id, NULL);
 #endif
-       ns = pcapng_tsc_to_ns(cycles);
-
        orig_len = rte_pktmbuf_pkt_len(md);
 
        /* Take snapshot of the data */
@@ -639,8 +574,8 @@ rte_pcapng_copy(uint16_t port_id, uint32_t queue,
        /* Interface index is filled in later during write */
        mc->port = port_id;
 
-       epb->timestamp_hi = ns >> 32;
-       epb->timestamp_lo = (uint32_t)ns;
+       epb->timestamp_hi = timestamp >> 32;
+       epb->timestamp_lo = (uint32_t)timestamp;
        epb->capture_length = data_len;
        epb->original_length = orig_len;
 
diff --git a/lib/pcapng/rte_pcapng.h b/lib/pcapng/rte_pcapng.h
index 1225ed5536ff..b9a9ee23ad1d 100644
--- a/lib/pcapng/rte_pcapng.h
+++ b/lib/pcapng/rte_pcapng.h
@@ -122,7 +122,7 @@ enum rte_pcapng_direction {
  *   The upper limit on bytes to copy.  Passing UINT32_MAX
  *   means all data (after offset).
  * @param timestamp
- *   The timestamp in TSC cycles.
+ *   The timestamp in nanoseconds since 1/1/1970.
  * @param direction
  *   The direction of the packer: receive, transmit or unknown.
  * @param comment
diff --git a/lib/pdump/rte_pdump.c b/lib/pdump/rte_pdump.c
index a70085bd0211..384abf5e27ad 100644
--- a/lib/pdump/rte_pdump.c
+++ b/lib/pdump/rte_pdump.c
@@ -10,7 +10,9 @@
 #include <rte_log.h>
 #include <rte_memzone.h>
 #include <rte_errno.h>
+#include <rte_reciprocal.h>
 #include <rte_string_fns.h>
+#include <rte_time.h>
 #include <rte_pcapng.h>
 
 #include "rte_pdump.h"
@@ -78,6 +80,33 @@ static struct {
        const struct rte_memzone *mz;
 } *pdump_stats;
 
+/* Time conversion values */
+static struct {
+       uint64_t offset_ns;     /* ns since 1/1/1970 when initialized */
+       uint64_t tsc_base;      /* TSC when initialized */
+       uint64_t tsc_hz;        /* copy of rte_tsc_hz() */
+       struct rte_reciprocal_u64 tsc_hz_inverse; /* inverse of tsc_hz */
+} pdump_time;
+
+/* Convert from TSC (CPU cycles) to nanoseconds */
+static uint64_t pdump_timestamp(void)
+{
+       uint64_t delta, secs, ns;
+
+       delta = rte_get_tsc_cycles() - pdump_time.tsc_base;
+
+       /* Avoid numeric wraparound by computing seconds first */
+       secs = rte_reciprocal_divide_u64(delta, &pdump_time.tsc_hz_inverse);
+
+       /* Remove the seconds portion */
+       delta -= secs * pdump_time.tsc_hz;
+       ns = rte_reciprocal_divide_u64(delta * NS_PER_S,
+                                      &pdump_time.tsc_hz_inverse);
+
+       return secs * NS_PER_S + ns + pdump_time.offset_ns;
+}
+
+
 /* Create a clone of mbuf to be placed into ring. */
 static void
 pdump_copy(uint16_t port_id, uint16_t queue,
@@ -90,7 +119,7 @@ pdump_copy(uint16_t port_id, uint16_t queue,
        int ring_enq;
        uint16_t d_pkts = 0;
        struct rte_mbuf *dup_bufs[nb_pkts];
-       uint64_t ts;
+       uint64_t timestamp = 0;
        struct rte_ring *ring;
        struct rte_mempool *mp;
        struct rte_mbuf *p;
@@ -99,7 +128,6 @@ pdump_copy(uint16_t port_id, uint16_t queue,
        if (cbs->filter)
                rte_bpf_exec_burst(cbs->filter, (void **)pkts, rcs, nb_pkts);
 
-       ts = rte_get_tsc_cycles();
        ring = cbs->ring;
        mp = cbs->mp;
        for (i = 0; i < nb_pkts; i++) {
@@ -119,12 +147,17 @@ pdump_copy(uint16_t port_id, uint16_t queue,
                 * If using pcapng then want to wrap packets
                 * otherwise a simple copy.
                 */
-               if (cbs->ver == V2)
+               if (cbs->ver == V2) {
+                       /* calculate timestamp on first packet */
+                       if (timestamp == 0)
+                               timestamp = pdump_timestamp();
+
                        p = rte_pcapng_copy(port_id, queue,
                                            pkts[i], mp, cbs->snaplen,
-                                           ts, direction, NULL);
-               else
+                                           timestamp, direction, NULL);
+               } else {
                        p = rte_pktmbuf_copy(pkts[i], mp, 0, cbs->snaplen);
+               }
 
                if (unlikely(p == NULL))
                        __atomic_fetch_add(&stats->nombuf, 1, __ATOMIC_RELAXED);
@@ -421,8 +454,21 @@ int
 rte_pdump_init(void)
 {
        const struct rte_memzone *mz;
+       struct timespec ts;
+       uint64_t cycles;
        int ret;
 
+       /* Compute time base offsets */
+       cycles = rte_get_tsc_cycles();
+       clock_gettime(CLOCK_REALTIME, &ts);
+
+       /* put initial TSC value in middle of clock_gettime() call */
+       pdump_time.tsc_base = (cycles + rte_get_tsc_cycles()) / 2;
+       pdump_time.offset_ns = rte_timespec_to_ns(&ts);
+
+       pdump_time.tsc_hz = rte_get_tsc_hz();
+       pdump_time.tsc_hz_inverse = rte_reciprocal_value_u64(pdump_time.tsc_hz);
+
        mz = rte_memzone_reserve(MZ_RTE_PDUMP_STATS, sizeof(*pdump_stats),
                                 rte_socket_id(), 0);
        if (mz == NULL) {
-- 
2.39.2

Reply via email to