On 08/10/2018 03:49 PM, Jens Freimann wrote: > This adds a new forwarding mode to testpmd to simulate > more realistic behavior of a guest machine engaged in receiving > and sending packets performing Virtual Network Function (VNF). >
Hi Jens, comments below, thanks, Kevin. > The goal is to enable a simple way of measuring performance impact on > cache and memory footprint utilization from various VNF co-located on > the same host machine. For this it does: > > * Buffer packets in a FIFO: > > Create a fifo to buffer received packets. Once it flows over put > those packets into the actual tx queue. The fifo is created per tx > queue and its size can be set with the --buffersize-before-sending > commandline parameter. --noisy-tx-sw-buffer-flushtime > > A second commandline parameter is used to set a timeout in > milliseconds after which the fifo is flushed. > > --noisy-tx-sw-buffer-size [packet numbers] > Keep the mbuf in a FIFO and forward the over flooding packets from the > FIFO. This queue is per TX-queue (after all other packet processing). > > --noisy-tx-sw-buffer-flushtime [delay] > Flush the packet queue if no packets have been seen during > [delay]. As long as packets are seen, the timer is reset. > > Add several options to simulate route lookups (memory reads) in tables > that can be quite large, as well as route hit statistics update. > These options simulates the while stack traversal and > will trash the cache. Memory access is random. > > * simulate route lookups: > > Allocate a buffer and perform reads and writes on it as specified by > commandline options: > > --noisy-lkup-memory [size] > Size of the VNF internal memory (MB), in which the random > read/write will be done, allocated by rte_malloc (hugepages). > > --noisy-lkup-num-writes [num] > Number of random writes in memory per packet should be > performed, simulating hit-flags update. 64 bits per write, > all write in different cache lines. > > --noisy-lkup-num-reads [num] > Number of random reads in memory per packet should be > performed, simulating FIB/table lookups. 64 bits per read, > all write in different cache lines. > > --noisy-lkup-num-reads-writes [num] > Number of random reads and writes in memory per packet should > be performed, simulating stats update. 64 bits per read-write, all > reads and writes in different cache lines. > > Signed-off-by: Jens Freimann <jfreim...@redhat.com> > --- > app/test-pmd/Makefile | 1 + > app/test-pmd/meson.build | 1 + > app/test-pmd/noisy_vnf.c | 269 > ++++++++++++++++++++++++++++ > app/test-pmd/parameters.c | 60 +++++++ > app/test-pmd/testpmd.c | 35 ++++ > app/test-pmd/testpmd.h | 10 ++ > doc/guides/testpmd_app_ug/run_app.rst | 33 ++++ > doc/guides/testpmd_app_ug/testpmd_funcs.rst | 7 +- > 8 files changed, 414 insertions(+), 2 deletions(-) > create mode 100644 app/test-pmd/noisy_vnf.c > > diff --git a/app/test-pmd/Makefile b/app/test-pmd/Makefile > index 2b4d604..e2581ca 100644 > --- a/app/test-pmd/Makefile > +++ b/app/test-pmd/Makefile > @@ -33,6 +33,7 @@ SRCS-y += rxonly.c > SRCS-y += txonly.c > SRCS-y += csumonly.c > SRCS-y += icmpecho.c > +SRCS-y += noisy_vnf.c > SRCS-$(CONFIG_RTE_LIBRTE_IEEE1588) += ieee1588fwd.c > SRCS-$(CONFIG_RTE_LIBRTE_BPF) += bpf_cmd.c > > diff --git a/app/test-pmd/meson.build b/app/test-pmd/meson.build > index a0b3be0..9ef6ed9 100644 > --- a/app/test-pmd/meson.build > +++ b/app/test-pmd/meson.build > @@ -17,6 +17,7 @@ sources = files('cmdline.c', > 'iofwd.c', > 'macfwd.c', > 'macswap.c', > + 'noisy_vnf.c', > 'parameters.c', > 'rxonly.c', > 'testpmd.c', > diff --git a/app/test-pmd/noisy_vnf.c b/app/test-pmd/noisy_vnf.c > new file mode 100644 > index 0000000..dcde7d0 > --- /dev/null > +++ b/app/test-pmd/noisy_vnf.c > @@ -0,0 +1,269 @@ > +/* SPDX-License-Identifier: BSD-3-Clause > + * Copyright(c) 2018 Red Hat Corp. > + */ > + > +#include <stdarg.h> > +#include <stdio.h> > +#include <stdbool.h> > +#include <string.h> > +#include <errno.h> > +#include <stdint.h> > +#include <unistd.h> > +#include <inttypes.h> > + > +#include <sys/queue.h> > +#include <sys/stat.h> > + > +#include <rte_common.h> > +#include <rte_log.h> > +#include <rte_debug.h> > +#include <rte_cycles.h> > +#include <rte_memory.h> > +#include <rte_launch.h> > +#include <rte_eal.h> > +#include <rte_per_lcore.h> > +#include <rte_lcore.h> > +#include <rte_memcpy.h> > +#include <rte_mempool.h> > +#include <rte_mbuf.h> > +#include <rte_ethdev.h> > +#include <rte_flow.h> > +#include <rte_malloc.h> > + > +#include "testpmd.h" > + > +struct noisy_config { > + struct rte_ring *f; > + uint64_t prev_time; > + char *vnf_mem; > + bool do_buffering; > + bool do_flush; > + bool do_sim; > +}; > + > +struct noisy_config *noisy_cfg[RTE_MAX_ETHPORTS]; > + > +static inline void > +do_write(char *vnf_mem) > +{ > + uint64_t i = rte_rand(); > + uint64_t w = rte_rand(); > + > + vnf_mem[i % ((noisy_lkup_mem_sz * 1024 * 1024) / > + RTE_CACHE_LINE_SIZE)] = w; > +} > + > +static inline void > +do_read(char *vnf_mem) > +{ > + uint64_t i = rte_rand(); > + uint64_t r; > + > + r = vnf_mem[i % ((noisy_lkup_mem_sz * 1024 * 1024) / > + RTE_CACHE_LINE_SIZE)]; > + r++; > +} > + > +static inline void > +do_readwrite(char *vnf_mem) > +{ > + do_read(vnf_mem); > + do_write(vnf_mem); > +} > + > +/* > + * Simulate route lookups as defined by commandline parameters > + */ > +static void > +sim_memory_lookups(struct noisy_config *ncf, uint16_t nb_pkts) > +{ > + uint16_t i, j; > + > + if (!ncf->do_sim) > + return; > + > + for (i = 0; i < nb_pkts; i++) { > + for (j = 0; j < noisy_lkup_num_writes; j++) > + do_write(ncf->vnf_mem); > + for (j = 0; j < noisy_lkup_num_reads; j++) > + do_read(ncf->vnf_mem); > + for (j = 0; j < noisy_lkup_num_reads_writes; j++) > + do_readwrite(ncf->vnf_mem); > + } > +} > + > +static uint16_t > +do_retry(uint16_t nb_rx, uint16_t nb_tx, struct rte_mbuf **pkts, > + struct fwd_stream *fs) > +{ > + uint32_t retry = 0; > + > + while (nb_tx < nb_rx && retry++ < burst_tx_retry_num) { > + rte_delay_us(burst_tx_delay_time); > + nb_tx += rte_eth_tx_burst(fs->tx_port, fs->tx_queue, > + &pkts[nb_tx], nb_rx - nb_tx); > + } > + > + return nb_tx; > +} > + > +static uint32_t > +dropped_pkts(struct rte_mbuf **pkts, uint16_t nb_rx, uint16_t nb_tx) > +{ > + if (nb_tx < nb_rx) { > + do { > + rte_pktmbuf_free(pkts[nb_tx]); > + } while (++nb_tx < nb_rx); > + } > + > + return nb_rx - nb_tx; > +} > + > +/* > + * Forwarding of packets in noisy VNF mode. Forward packets but perform > + * memory operations first as specified on cmdline. > + * > + * Depending on which commandline parameters are specified we have > + * different cases to handle: > + * > + * 1. No FIFO size was given, so we don't do buffering of incoming > + * packets. This case is pretty much what iofwd does but in this case > + * we also do simulation of memory accesses (depending on which > + * parameters were specified for it). > + * 2. User wants do buffer packets in a FIFO and sent out overflowing > + * packets. > + * 3. User wants a FIFO and specifies a time in ms to flush all packets > + * out of the FIFO It reads like these are mutually exclusive, whereas I think they can be combined too. Maybe you can just add a line about combinations or something. > + */ > +static void > +pkt_burst_noisy_vnf(struct fwd_stream *fs) > +{ > + const uint64_t freq_khz = rte_get_timer_hz() / 1000; > + struct noisy_config *ncf = noisy_cfg[fs->rx_port]; > + struct rte_mbuf *pkts_burst[MAX_PKT_BURST]; > + struct rte_mbuf *tmp_pkts[MAX_PKT_BURST]; > + uint16_t nb_deqd = 0; > + uint16_t nb_rx = 0; > + uint16_t nb_tx = 0; > + uint16_t nb_enqd; > + unsigned int fifo_free; > + uint64_t delta_ms; > + bool needs_flush = false; > + uint64_t now; > + > + nb_rx = rte_eth_rx_burst(fs->rx_port, fs->rx_queue, > + pkts_burst, nb_pkt_per_burst); > + if (unlikely(nb_rx == 0)) Perhaps you should jump down to 'if (ncf->do_flush)' to check if it's time to flush some pkts regardless that you did not receive any new ones? > + return; > + fs->rx_packets += nb_rx; > + > + if (!ncf->do_buffering) { > + sim_memory_lookups(ncf, nb_rx); > + nb_tx = rte_eth_tx_burst(fs->tx_port, fs->tx_queue, > + pkts_burst, nb_rx); > + if (unlikely(nb_tx < nb_rx) && fs->retry_enabled) > + nb_tx += do_retry(nb_rx, nb_tx, pkts_burst, fs); > + fs->tx_packets += nb_tx; > + fs->fwd_dropped += dropped_pkts(pkts_burst, nb_rx, nb_tx); > + return; > + } > + > + fifo_free = rte_ring_free_count(ncf->f); > + if (fifo_free >= nb_rx) { > + nb_enqd = rte_ring_enqueue_burst(ncf->f, > + (void **) pkts_burst, nb_rx, NULL); > + if (nb_enqd < nb_rx) > + fs->fwd_dropped += (nb_rx - nb_enqd); unlikely this would happen, but you'd need to free these pkts > + sim_memory_lookups(ncf, nb_rx); > + } else if (fifo_free < nb_rx) { I don't think the if is needed, the else condition already gaurantees this > + nb_deqd = rte_ring_dequeue_burst(ncf->f, > + (void **) tmp_pkts, nb_rx, NULL); > + nb_enqd = rte_ring_enqueue_burst(ncf->f, > + (void **) pkts_burst, nb_deqd, NULL); > + if (nb_deqd > 0) { > + nb_tx = rte_eth_tx_burst(fs->tx_port, > + fs->tx_queue, tmp_pkts, > + nb_deqd); > + if (unlikely(nb_tx < nb_rx) && fs->retry_enabled) > + nb_tx += do_retry(nb_rx, nb_tx, tmp_pkts, fs); > + fs->fwd_dropped += dropped_pkts(tmp_pkts, nb_deqd, > + nb_tx); Shouldn't there be sim_memory_lookups() for this path? > + } > + } > + > + if (ncf->do_flush) { > + if (!ncf->prev_time) > + now = ncf->prev_time = rte_get_timer_cycles(); > + else > + now = rte_get_timer_cycles(); > + delta_ms = (now - ncf->prev_time) / freq_khz; > + needs_flush = delta_ms >= noisy_tx_sw_buf_flush_time && > + noisy_tx_sw_buf_flush_time > 0 && !nb_tx; > + } > + while (needs_flush && !rte_ring_empty(ncf->f)) { > + unsigned int sent; > + nb_deqd = rte_ring_dequeue_burst(ncf->f, (void **)tmp_pkts, > + MAX_PKT_BURST, NULL); > + sent = rte_eth_tx_burst(fs->tx_port, fs->tx_queue, > + tmp_pkts, nb_deqd); > + if (unlikely(sent < nb_deqd) && fs->retry_enabled) > + nb_tx += do_retry(nb_rx, nb_tx, tmp_pkts, fs); > + fs->fwd_dropped += dropped_pkts(tmp_pkts, nb_deqd, sent); > + ncf->prev_time = rte_get_timer_cycles(); > + } > +} > + > +#define NOISY_STRSIZE 256 > +#define NOISY_RING "noisy_ring_%d\n" > + > +static void > +noisy_fwd_end(portid_t pi) > +{ > + rte_free(noisy_cfg[pi]); this should be freed last > + rte_ring_free(noisy_cfg[pi]->f); > + rte_free(noisy_cfg[pi]->vnf_mem); > +} > + > +static void > +noisy_fwd_begin(portid_t pi) > +{ > + struct noisy_config *n; > + char name[NOISY_STRSIZE]; > + > + noisy_cfg[pi] = rte_zmalloc("testpmd noisy fifo and timers", > + nb_fwd_ports * sizeof(struct noisy_config), > + RTE_CACHE_LINE_SIZE); I don't think 'nb_fwd_ports *' is needed. afaict, you'll just need one struct per port. > + if (noisy_cfg == NULL) { > + rte_exit(EXIT_FAILURE, > + "rte_zmalloc(%d) struct noisy_config) failed\n", > + (int) nb_fwd_ports); > + } > + n = noisy_cfg[pi]; > + snprintf(name, NOISY_STRSIZE, NOISY_RING, pi); > + n->f = rte_ring_create(name, noisy_tx_sw_bufsz, > + rte_socket_id(), 0); Could be NULL > + if (noisy_lkup_mem_sz > 0) { > + n->vnf_mem = (char *) rte_zmalloc("vnf sim memory", > + noisy_lkup_mem_sz * 1024 * 1024, > + RTE_CACHE_LINE_SIZE); > + if (n->vnf_mem == NULL) > + rte_exit(EXIT_FAILURE, > + "rte_zmalloc(%" PRIu64 ") for vnf memory) > failed\n", > + noisy_lkup_mem_sz); > + } else if (noisy_lkup_mem_sz == 0 && noisy_tx_sw_bufsz == 0) { > + rte_exit(EXIT_FAILURE, > + "--noisy-tx-sw-buffer-size or --noisy-lkup-mem-size > must be > 0\n"); s/mem-size/memory/ > + } > + n->do_buffering = noisy_tx_sw_bufsz > 0; > + n->do_flush = noisy_tx_sw_buf_flush_time > 0; > + n->do_sim = noisy_lkup_num_writes + noisy_lkup_num_reads + > + noisy_lkup_num_reads_writes; > +} > + > +struct fwd_engine noisy_vnf_engine = { > + .fwd_mode_name = "noisy", > + .port_fwd_begin = noisy_fwd_begin, > + .port_fwd_end = noisy_fwd_end, > + .packet_fwd = pkt_burst_noisy_vnf, > +}; > + > diff --git a/app/test-pmd/parameters.c b/app/test-pmd/parameters.c > index 962fad7..85b44af 100644 > --- a/app/test-pmd/parameters.c > +++ b/app/test-pmd/parameters.c > @@ -625,6 +625,12 @@ > { "vxlan-gpe-port", 1, 0, 0 }, > { "mlockall", 0, 0, 0 }, > { "no-mlockall", 0, 0, 0 }, > + { "noisy-tx-sw-buffer-size", 1, 0, 0 }, > + { "noisy-tx-sw-buffer-flushtime", 1, 0, 0 }, > + { "noisy-lkup-memory", 1, 0, 0 }, > + { "noisy-lkup-num-writes", 1, 0, 0 }, > + { "noisy-lkup-num-reads", 1, 0, 0 }, > + { "noisy-lkup-num-reads-writes", 1, 0, 0 }, > { 0, 0, 0, 0 }, > }; > > @@ -1147,6 +1153,60 @@ > do_mlockall = 1; > if (!strcmp(lgopts[opt_idx].name, "no-mlockall")) > do_mlockall = 0; > + if (!strcmp(lgopts[opt_idx].name, > + "noisy-tx-sw-buffer-size")) { > + n = atoi(optarg); > + if (n > 0) > + noisy_tx_sw_bufsz = n; > + else > + rte_exit(EXIT_FAILURE, > + "noisy-tx-sw-buffer-size must > be > 0\n"); I commented in v4 about if this/others could be 'n >= 0' to allow for setting to the default etc. No problem if you prefer not to allow it, just not sure if comment is missed or not? > + } > + if (!strcmp(lgopts[opt_idx].name, > + "noisy-tx-sw-buffer-flushtime")) { > + n = atoi(optarg); > + if (n >= 0) > + noisy_tx_sw_buf_flush_time = n; > + else > + rte_exit(EXIT_FAILURE, > + "noisy-tx-sw-buffer-flushtime > must be > 0\n"); log should match '>= 0' > + } > + if (!strcmp(lgopts[opt_idx].name, > + "noisy-lkup-memory")) { > + n = atoi(optarg); > + if (n > 0) > + noisy_lkup_mem_sz = n; > + else > + rte_exit(EXIT_FAILURE, > + "noisy-lkup-memory must be > > 0\n"); > + } > + if (!strcmp(lgopts[opt_idx].name, > + "noisy-lkup-num-writes")) { > + n = atoi(optarg); > + if (n > 0) > + noisy_lkup_num_writes = n; > + else > + rte_exit(EXIT_FAILURE, > + "noisy-lkup-num-writes must be > > 0\n"); > + } > + if (!strcmp(lgopts[opt_idx].name, > + "noisy-lkup-num-reads")) { > + n = atoi(optarg); > + if (n > 0) > + noisy_lkup_num_reads = n; > + else > + rte_exit(EXIT_FAILURE, > + "noisy-lkup-num-reads must be > > 0\n"); > + } > + if (!strcmp(lgopts[opt_idx].name, > + "noisy-lkup-num-reads-writes")) { > + n = atoi(optarg); > + if (n > 0) > + noisy_lkup_num_reads_writes = n; > + else > + rte_exit(EXIT_FAILURE, > + "noisy-lkup-num-reads-writes > must be > 0\n"); > + } > break; > case 'h': > usage(argv[0]); > diff --git a/app/test-pmd/testpmd.c b/app/test-pmd/testpmd.c > index ee48db2..2b1b8c3 100644 > --- a/app/test-pmd/testpmd.c > +++ b/app/test-pmd/testpmd.c > @@ -157,6 +157,7 @@ struct fwd_engine * fwd_engines[] = { > &tx_only_engine, > &csum_fwd_engine, > &icmp_echo_engine, > + &noisy_vnf_engine, > #if defined RTE_LIBRTE_PMD_SOFTNIC > &softnic_fwd_engine, > #endif > @@ -253,6 +254,40 @@ struct fwd_engine * fwd_engines[] = { > int16_t tx_rs_thresh = RTE_PMD_PARAM_UNSET; > > /* > + * Configurable value of buffered packets before sending. > + */ > +uint16_t noisy_tx_sw_bufsz; > + > +/* > + * Configurable value of packet buffer timeout. > + */ > +uint16_t noisy_tx_sw_buf_flush_time; > + > +/* > + * Configurable value for size of VNF internal memory area > + * used for simulating noisy neighbour behaviour > + */ > +uint64_t noisy_lkup_mem_sz; > + > +/* > + * Configurable value of number of random writes done in > + * VNF simulation memory area. > + */ > +uint64_t noisy_lkup_num_writes; > + > +/* > + * Configurable value of number of random reads done in > + * VNF simulation memory area. > + */ > +uint64_t noisy_lkup_num_reads; > + > +/* > + * Configurable value of number of random reads/wirtes done in typo > + * VNF simulation memory area. > + */ > +uint64_t noisy_lkup_num_reads_writes; > + > +/* > * Receive Side Scaling (RSS) configuration. > */ > uint64_t rss_hf = ETH_RSS_IP; /* RSS IP by default. */ > diff --git a/app/test-pmd/testpmd.h b/app/test-pmd/testpmd.h > index a1f66147..796114f 100644 > --- a/app/test-pmd/testpmd.h > +++ b/app/test-pmd/testpmd.h > @@ -122,6 +122,8 @@ struct fwd_stream { > #endif > }; > > + > + Added whitespace > /** Descriptor for a single flow. */ > struct port_flow { > size_t size; /**< Allocated space including data[]. */ > @@ -243,6 +245,7 @@ struct fwd_engine { > extern struct fwd_engine tx_only_engine; > extern struct fwd_engine csum_fwd_engine; > extern struct fwd_engine icmp_echo_engine; > +extern struct fwd_engine noisy_vnf_engine; > #ifdef SOFTNIC > extern struct fwd_engine softnic_fwd_engine; > #endif > @@ -375,6 +378,13 @@ struct queue_stats_mappings { > extern int16_t tx_free_thresh; > extern int16_t tx_rs_thresh; > > +extern uint16_t noisy_tx_sw_bufsz; > +extern uint16_t noisy_tx_sw_buf_flush_time; > +extern uint64_t noisy_lkup_mem_sz; > +extern uint64_t noisy_lkup_num_writes; > +extern uint64_t noisy_lkup_num_reads; > +extern uint64_t noisy_lkup_num_reads_writes; > + > extern uint8_t dcb_config; > extern uint8_t dcb_test; > > diff --git a/doc/guides/testpmd_app_ug/run_app.rst > b/doc/guides/testpmd_app_ug/run_app.rst > index f301c2b..2bf4cf5 100644 > --- a/doc/guides/testpmd_app_ug/run_app.rst > +++ b/doc/guides/testpmd_app_ug/run_app.rst > @@ -340,6 +340,7 @@ The commandline options are: > icmpecho > ieee1588 > tm > + noisy > > * ``--rss-ip`` > > @@ -498,3 +499,35 @@ The commandline options are: > * ``--no-mlockall`` > > Disable locking all memory. > + > +* ``--noisy-tx-sw-buffer-size`` > + > + Set the number of maximum elements of the FIFO queue to be created > + for buffering packets. Only available with the noisy forwarding mode. > + The default value is 0. > + > +* ``--noisy-tx-sw-buffer-flushtime`` > + > + Set the time before packets in the FIFO queue is flushed. > + Only available with the noisy forwarding mode. The default value is 0. > + > +* ``--noisy-lkup-memory=N`` > + Not sure about the convention of using N. Other descriptions seem to reference it explicitly. > + Set the size of the noisy neighbour simulation memory buffer in MB. > + Only available with the noisy forwarding mode. The default value is 0. > + > + > +* ``--noisy-lkup-num-reads=N`` > + > + Set the number of reads to be done in noisy neighbour simulation memory > buffer. > + Only available with the noisy forwarding mode. The default value is 0. > + > +* ``--noisy-lkup-num-writes=N`` > + > + Set the number of writes to be done in noisy neighbour simulation memory > buffer. > + Only available with the noisy forwarding mode. The default value is 0. > + > +* ``--noisy-lkup-num-reads-writes=N`` > + > + Set the number of r/w accesses to be done in noisy neighbour simulation > memory buffer. > + Only available with the noisy forwarding mode. The default value is 0. > diff --git a/doc/guides/testpmd_app_ug/testpmd_funcs.rst > b/doc/guides/testpmd_app_ug/testpmd_funcs.rst > index dde205a..e2f9db2 100644 > --- a/doc/guides/testpmd_app_ug/testpmd_funcs.rst > +++ b/doc/guides/testpmd_app_ug/testpmd_funcs.rst > @@ -289,7 +289,7 @@ set fwd > Set the packet forwarding mode:: > > testpmd> set fwd (io|mac|macswap|flowgen| \ > - rxonly|txonly|csum|icmpecho) (""|retry) > + rxonly|txonly|csum|icmpecho|noisy) (""|retry) > > ``retry`` can be specified for forwarding engines except ``rx_only``. > > @@ -323,8 +323,11 @@ The available information categories are: > * ``softnic``: Demonstrates the softnic forwarding operation. In this mode, > packet forwarding is > similar to I/O mode except for the fact that packets are loopback to the > softnic ports only. Therefore, portmask parameter should be set to softnic > port only. The various software based custom NIC pipelines specified through > the softnic firmware (DPDK packet framework script) can be tested in this > mode. Furthermore, it allows to build 5-level hierarchical QoS scheduler as a > default option that can be enabled through CLI once testpmd application is > initialised. The user can modify the default scheduler hierarchy or can > specify the new QoS Scheduler hierarchy through CLI. Requires > ``CONFIG_RTE_LIBRTE_PMD_SOFTNIC=y``. > > -Example:: > +* ``noisy``: Noisy neighbour simulation. > + Simulate more realistic behavior of a guest machine engaged in receiving > + and sending packets performing Virtual Network Function (VNF). > > +Example:: > testpmd> set fwd rxonly > > Set rxonly packet forwarding mode > There's a 'new blank line at EOF warning' here when applying