Use the new port mirror API instead of pdump. Signed-off-by: Stephen Hemminger <step...@networkplumber.org> --- app/dumpcap/main.c | 361 +++++++++++++++++++++++++++++++--------- app/dumpcap/meson.build | 2 +- 2 files changed, 284 insertions(+), 79 deletions(-)
diff --git a/app/dumpcap/main.c b/app/dumpcap/main.c index 3d3c0dbc66..7adbab71c8 100644 --- a/app/dumpcap/main.c +++ b/app/dumpcap/main.c @@ -36,8 +36,6 @@ #include <rte_mbuf.h> #include <rte_mempool.h> #include <rte_pcapng.h> -#include <rte_pdump.h> -#include <rte_ring.h> #include <rte_string_fns.h> #include <rte_thread.h> #include <rte_time.h> @@ -92,16 +90,22 @@ struct capture_options { struct interface { TAILQ_ENTRY(interface) next; uint16_t port; + struct rte_bpf *filter; + uint64_t filteraccept; struct capture_options opts; - struct rte_bpf_prm *bpf_prm; char name[RTE_ETH_NAME_MAX_LEN]; + struct rte_eth_stats start_stats; const char *ifname; const char *ifdescr; }; +static int timestamp_dynfield = -1; +static uint64_t timestamp_dynflag; + TAILQ_HEAD(interface_list, interface); static struct interface_list interfaces = TAILQ_HEAD_INITIALIZER(interfaces); +static struct interface *port2intf[RTE_MAX_ETHPORTS]; /* Can do either pcap or pcapng format output */ typedef union { @@ -239,14 +243,16 @@ static void find_interfaces(void) TAILQ_FOREACH(intf, &interfaces, next) { /* if name is valid then just record port */ - if (rte_eth_dev_get_port_by_name(intf->name, &intf->port) == 0) - continue; + if (rte_eth_dev_get_port_by_name(intf->name, &intf->port) != 0) { + /* maybe got passed port number string as name */ + intf->port = get_uint(intf->name, "port_number", UINT16_MAX); + if (rte_eth_dev_get_name_by_port(intf->port, intf->name) < 0) + rte_exit(EXIT_FAILURE, "Invalid port number %u\n", + intf->port); + } - /* maybe got passed port number string as name */ - intf->port = get_uint(intf->name, "port_number", UINT16_MAX); - if (rte_eth_dev_get_name_by_port(intf->port, intf->name) < 0) - rte_exit(EXIT_FAILURE, "Invalid port number %u\n", - intf->port); + if (rte_eth_stats_get(intf->port, &intf->start_stats) < 0) + rte_exit(EXIT_FAILURE, "Could not read stats for port %u\n", intf->port); } } @@ -266,6 +272,9 @@ static void set_default_interface(void) intf = add_interface(name); intf->port = p; + + if (rte_eth_stats_get(intf->port, &intf->start_stats) < 0) + rte_exit(EXIT_FAILURE, "Could not read stats for default port %u\n", intf->port); return; } rte_exit(EXIT_FAILURE, "No usable interfaces found\n"); @@ -295,6 +304,9 @@ static void compile_filters(void) struct bpf_program bf; pcap_t *pcap; + /* cache for filter packets */ + port2intf[intf->port] = intf; + pcap = pcap_open_dead(DLT_EN10MB, intf->opts.snap_len); if (!pcap) rte_exit(EXIT_FAILURE, "can not open pcap\n"); @@ -324,7 +336,12 @@ static void compile_filters(void) exit(0); } - intf->bpf_prm = bpf_prm; + + intf->filter = rte_bpf_load(bpf_prm); + if (intf->filter == NULL) + rte_exit(EXIT_FAILURE, + "BPF load failed '%s'\n%s(%d)\n", + intf->name, rte_strerror(rte_errno), rte_errno); /* Don't care about original program any more */ pcap_freecode(&bf); @@ -518,13 +535,12 @@ static void statistics_loop(void) } static void -cleanup_pdump_resources(void) +disable_mirror_ports(void) { struct interface *intf; TAILQ_FOREACH(intf, &interfaces, next) { - rte_pdump_disable(intf->port, - RTE_PDUMP_ALL_QUEUES, RTE_PDUMP_FLAG_RXTX); + rte_eth_remove_mirror(intf->port); if (intf->opts.promisc_mode) rte_eth_promiscuous_disable(intf->port); } @@ -552,7 +568,6 @@ enable_primary_monitor(void) { int ret; - /* Once primary exits, so will pdump. */ ret = rte_eal_alarm_set(MONITOR_INTERVAL, monitor_primary, NULL); if (ret < 0) fprintf(stderr, "Fail to enable monitor:%d\n", ret); @@ -571,23 +586,24 @@ disable_primary_monitor(void) static void report_packet_stats(dumpcap_out_t out) { - struct rte_pdump_stats pdump_stats; struct interface *intf; - uint64_t ifrecv, ifdrop; - double percent; fputc('\n', stderr); + TAILQ_FOREACH(intf, &interfaces, next) { - if (rte_pdump_stats(intf->port, &pdump_stats) < 0) + uint64_t ifrecv, ifdrop; + struct rte_eth_stats stats; + double percent; + + if (rte_eth_stats_get(intf->port, &stats) < 0) continue; - /* do what Wiretap does */ - ifrecv = pdump_stats.accepted + pdump_stats.filtered; - ifdrop = pdump_stats.nombuf + pdump_stats.ringfull; + ifrecv = stats.ipackets - intf->start_stats.ipackets; + ifdrop = (stats.rx_nombuf - intf->start_stats.rx_nombuf) + + (stats.imissed - intf->start_stats.imissed); if (use_pcapng) - rte_pcapng_write_stats(out.pcapng, intf->port, - ifrecv, ifdrop, NULL); + rte_pcapng_write_stats(out.pcapng, intf->port, ifrecv, ifdrop, NULL); if (ifrecv == 0) percent = 0; @@ -668,12 +684,21 @@ static void dpdk_init(void) rte_exit(EXIT_FAILURE, "Can not restore original CPU affinity\n"); } -/* Create packet ring shared between callbacks and process */ -static struct rte_ring *create_ring(void) +/* Create ring port to redirect packet to. + * This could be much simpler if the ring PMD API (rte_eth_from_rings) + * worked from a secondary process, but it doesn't. + */ +static struct rte_ring * +create_ring_dev(char *vdev_name, uint16_t *mirror_port) { - struct rte_ring *ring; + struct rte_eth_dev_owner owner = { 0 }; + struct rte_eth_conf dev_conf = { 0 }; + struct rte_ring *ring = NULL; char ring_name[RTE_RING_NAMESIZE]; + char vdev_args[128]; size_t size, log2; + uint16_t port; + int ret; /* Find next power of 2 >= size. */ size = ring_size; @@ -686,17 +711,60 @@ static struct rte_ring *create_ring(void) ring_size = size; } - /* Want one ring per invocation of program */ - snprintf(ring_name, sizeof(ring_name), - "dumpcap-%d", getpid()); + ret = rte_eth_dev_owner_new(&owner.id); + if (ret < 0) + rte_exit(EXIT_FAILURE, "rte_eth_dev_owner_new failed: %s\n", + strerror(-ret)); - ring = rte_ring_create(ring_name, ring_size, - rte_socket_id(), 0); + /* Give the vdev a unique name */ + snprintf(ring_name, sizeof(ring_name), "dumpcap%"PRIu64, owner.id); + ring = rte_ring_create(ring_name, ring_size, rte_socket_id(), + RING_F_MP_RTS_ENQ | RING_F_SC_DEQ); if (ring == NULL) rte_exit(EXIT_FAILURE, "Could not create ring :%s\n", rte_strerror(rte_errno)); + strlcpy(owner.name, ring_name, sizeof(owner.name)); + + snprintf(vdev_name, RTE_DEV_NAME_MAX_LEN, + "net_ring-dumpcap%"PRIu64, owner.id); + snprintf(vdev_args, sizeof(vdev_args), + "ring=%s,timestamp", ring_name); + + if (rte_eal_hotplug_add("vdev", vdev_name, vdev_args) < 0) + rte_exit(EXIT_FAILURE, + "rte_eal_hotplug_add of %s failed:%s\n", + vdev_name, rte_strerror(rte_errno)); + + ret = rte_eth_dev_get_port_by_name(vdev_name, &port); + if (ret != 0) { + fprintf(stderr, "Could not port for %s: %s\n", + vdev_name, strerror(-ret)); + goto unplug; + } + + ret = rte_eth_dev_owner_set(port, &owner); + if (ret != 0) { + fprintf(stderr, "Could not set owner for port for %u: %s\n", + port, strerror(-ret)); + goto unplug; + } + + ret = rte_eth_dev_configure(port, 1, 1, &dev_conf); + if (ret < 0) { + fprintf(stderr, "Could not configure port %u: %s\n", + port, strerror(-ret)); + goto unplug; + } + + *mirror_port = port; return ring; + +unplug: + rte_eal_hotplug_remove("vdev", vdev_name); + rte_ring_free(ring); + rte_eal_cleanup(); + exit(EXIT_FAILURE); } static struct rte_mempool *create_mempool(void) @@ -796,7 +864,7 @@ static dumpcap_out_t create_output(void) version(), capture_comment); if (ret.pcapng == NULL) rte_exit(EXIT_FAILURE, "pcapng_fdopen failed: %s\n", - strerror(rte_errno)); + rte_strerror(rte_errno)); free(os); TAILQ_FOREACH(intf, &interfaces, next) { @@ -823,37 +891,30 @@ static dumpcap_out_t create_output(void) return ret; } -static void enable_pdump(struct rte_ring *r, struct rte_mempool *mp) +static int enable_mirror(uint16_t mirror_port, struct rte_mempool *mpool) { struct interface *intf; unsigned int count = 0; - uint32_t flags; int ret; - flags = RTE_PDUMP_FLAG_RXTX; - if (use_pcapng) - flags |= RTE_PDUMP_FLAG_PCAPNG; + ret = rte_eth_dev_start(mirror_port); + if (ret < 0) + rte_exit(EXIT_FAILURE, "Could not start mirror port %u: %s\n", + mirror_port, strerror(-ret)); TAILQ_FOREACH(intf, &interfaces, next) { - ret = rte_pdump_enable_bpf(intf->port, RTE_PDUMP_ALL_QUEUES, - flags, intf->opts.snap_len, - r, mp, intf->bpf_prm); + struct rte_eth_mirror_conf conf = { + .mp = mpool, + .snaplen = intf->opts.snap_len, + .direction = RTE_MIRROR_DIRECTION_INGRESS | + RTE_MIRROR_DIRECTION_EGRESS, + }; + + ret = rte_eth_add_mirror(intf->port, mirror_port, &conf); if (ret < 0) { - const struct interface *intf2; - - /* unwind any previous enables */ - TAILQ_FOREACH(intf2, &interfaces, next) { - if (intf == intf2) - break; - rte_pdump_disable(intf2->port, - RTE_PDUMP_ALL_QUEUES, RTE_PDUMP_FLAG_RXTX); - if (intf2->opts.promisc_mode) - rte_eth_promiscuous_disable(intf2->port); - } - rte_exit(EXIT_FAILURE, - "Packet dump enable on %u:%s failed %s\n", - intf->port, intf->name, - rte_strerror(rte_errno)); + fprintf(stderr, "Port mirror on %u:%s failed %s\n", + intf->port, intf->name, strerror(-ret)); + return -1; } if (intf->opts.promisc_mode) { @@ -868,6 +929,31 @@ static void enable_pdump(struct rte_ring *r, struct rte_mempool *mp) } ++count; } + return count; +} + +static void setup_mbuf(void) +{ + int offset; + + offset = rte_mbuf_dynfield_lookup(RTE_MBUF_DYNFIELD_TIMESTAMP_NAME, NULL); + if (offset < 0) { + fprintf(stderr, "Could not find timestamp dynamic field\n"); + return; + } + timestamp_dynfield = offset; + + offset = rte_mbuf_dynflag_lookup(RTE_MBUF_DYNFLAG_RX_TIMESTAMP_NAME, NULL); + if (offset < 0) { + fprintf(stderr, "Could not find timestamp flag\n"); + return; + } + timestamp_dynflag = RTE_BIT64(offset); +} + +static void show_capturing(unsigned int count) +{ + struct interface *intf; fputs("Capturing on ", stdout); TAILQ_FOREACH(intf, &interfaces, next) { @@ -898,6 +984,42 @@ static void show_count(uint64_t count) bt = fprintf(stderr, "%"PRIu64" ", count); } +static ssize_t +pcapng_write_packets(rte_pcapng_t *pcapng, + struct rte_mbuf *pkts[], uint16_t n) +{ + struct rte_mbuf *towrite[BURST_SIZE]; + unsigned int count = 0; + unsigned int i; + + for (i = 0; i < n; i++) { + struct rte_mbuf *m = pkts[i]; + const struct rte_mbuf_mirror *orig = &m->hash.mirror; + enum rte_pcapng_direction direction; + uint64_t ts; + + if (orig->direction == RTE_MIRROR_DIRECTION_INGRESS) + direction = RTE_PCAPNG_DIRECTION_IN; + else if (orig->direction == RTE_MIRROR_DIRECTION_EGRESS) + direction = RTE_PCAPNG_DIRECTION_OUT; + else + direction = RTE_PCAPNG_DIRECTION_UNKNOWN; + + if (likely(m->ol_flags & timestamp_dynflag)) + ts = *RTE_MBUF_DYNFIELD(m, timestamp_dynfield, rte_mbuf_timestamp_t *); + else + ts = rte_get_tsc_cycles(); + + if (unlikely(rte_pcapng_insert(m, orig->queue_id, direction, + orig->orig_len, ts, NULL) < 0)) + continue; /* skip no headroom? */ + + towrite[count++] = m; + } + + return rte_pcapng_write_packets(pcapng, towrite, count); +} + /* Write multiple packets in older pcap format */ static ssize_t pcap_write_packets(pcap_dumper_t *dumper, @@ -930,16 +1052,82 @@ pcap_write_packets(pcap_dumper_t *dumper, return total; } +static unsigned int +filter_burst(struct interface *intf, struct rte_mbuf *pkts[], unsigned int n) +{ + uint64_t results[BURST_SIZE]; + struct rte_mbuf *towrite[BURST_SIZE]; + unsigned int i, count; + uint32_t matches; + + matches = rte_bpf_exec_burst(intf->filter, (void **)pkts, results, n); + if (matches == n) + return n; + intf->filteraccept += matches; + + for (i = 0, count = 0; i < n; i++) { + if (results[i]) + towrite[count++] = pkts[i]; + else + rte_pktmbuf_free(pkts[i]); + } + memcpy(pkts, towrite, count * sizeof(struct rte_mbuf *)); + return count; +} + +static ssize_t +process_burst(uint16_t in_port, dumpcap_out_t out, + struct rte_mbuf *pkts[], unsigned int n) +{ + struct interface *intf; + ssize_t written; + + if (in_port >= RTE_MAX_ETHPORTS) + goto invalid_port; + + intf = port2intf[in_port]; + if (intf == NULL) + goto invalid_port; + + if (intf->filter) { + n = filter_burst(intf, pkts, n); + if (n == 0) + return 0; + } + + packets_received += n; + if (use_pcapng) + written = pcapng_write_packets(out.pcapng, pkts, n); + else + written = pcap_write_packets(out.dumper, pkts, n); + + if (written > 0) + file_size += written; + + rte_pktmbuf_free_bulk(pkts, n); + return written; + +invalid_port: + static bool warn_once = true; + if (warn_once) { + warn_once = false; + fprintf(stderr, "Packet in ring from port %u\n", in_port); + } + + rte_pktmbuf_free_bulk(pkts, n); + return 0; +} + /* Process all packets in ring and dump to capture file */ -static int process_ring(dumpcap_out_t out, struct rte_ring *r) +static int process_ring(dumpcap_out_t out, struct rte_ring *ring) { struct rte_mbuf *pkts[BURST_SIZE]; - unsigned int avail, n; + uint16_t in_port; + unsigned int i, n, start, avail; static unsigned int empty_count; - ssize_t written; + int ret; - n = rte_ring_sc_dequeue_burst(r, (void **) pkts, BURST_SIZE, - &avail); + n = rte_ring_sc_dequeue_burst(ring, (void **)pkts, BURST_SIZE, &avail); if (n == 0) { /* don't consume endless amounts of cpu if idle */ if (empty_count < SLEEP_THRESHOLD) @@ -951,18 +1139,24 @@ static int process_ring(dumpcap_out_t out, struct rte_ring *r) empty_count = (avail == 0); - if (use_pcapng) - written = rte_pcapng_write_packets(out.pcapng, pkts, n); - else - written = pcap_write_packets(out.dumper, pkts, n); + /* process all packets mirrored from same port */ + start = 0; + in_port = pkts[start]->port; + for (i = 1; i < n; i++) { + if (likely(pkts[i]->port == in_port)) + continue; + ret = process_burst(in_port, out, pkts + start, i - start); + if (ret < 0) + return ret; /* stop on file write error */ - rte_pktmbuf_free_bulk(pkts, n); + start = i; + in_port = pkts[i]->port; + } - if (written < 0) - return -1; + ret = process_burst(in_port, out, pkts + start, n - start); + if (ret < 0) + return ret; - file_size += written; - packets_received += n; if (!quiet) show_count(packets_received); @@ -971,15 +1165,18 @@ static int process_ring(dumpcap_out_t out, struct rte_ring *r) int main(int argc, char **argv) { - struct rte_ring *r; struct rte_mempool *mp; struct sigaction action = { .sa_flags = SA_RESTART, .sa_handler = signal_handler, }; struct sigaction origaction; + struct rte_ring *ring = NULL; + char vdev_name[RTE_DEV_NAME_MAX_LEN]; + uint16_t mirror_port = UINT16_MAX; dumpcap_out_t out; char *p; + int ret; p = strrchr(argv[0], '/'); if (p == NULL) @@ -1018,12 +1215,19 @@ int main(int argc, char **argv) exit(0); } - r = create_ring(); mp = create_mempool(); + ring = create_ring_dev(vdev_name, &mirror_port); out = create_output(); + ret = enable_mirror(mirror_port, mp); + if (ret < 0) + goto cleanup; + + setup_mbuf(); + + show_capturing(ret); + start_time = time(NULL); - enable_pdump(r, mp); if (!quiet) { fprintf(stderr, "Packets captured: "); @@ -1031,7 +1235,7 @@ int main(int argc, char **argv) } while (!rte_atomic_load_explicit(&quit_signal, rte_memory_order_relaxed)) { - if (process_ring(out, r) < 0) { + if (process_ring(out, ring) < 0) { fprintf(stderr, "pcapng file write failed; %s\n", strerror(errno)); break; @@ -1048,19 +1252,20 @@ int main(int argc, char **argv) break; } - disable_primary_monitor(); - if (rte_eal_primary_proc_alive(NULL)) report_packet_stats(out); +cleanup: if (use_pcapng) rte_pcapng_close(out.pcapng); else pcap_dump_close(out.dumper); - cleanup_pdump_resources(); + disable_primary_monitor(); + disable_mirror_ports(); - rte_ring_free(r); + rte_eal_hotplug_remove("vdev", vdev_name); + rte_ring_free(ring); rte_mempool_free(mp); return rte_eal_cleanup() ? EXIT_FAILURE : 0; diff --git a/app/dumpcap/meson.build b/app/dumpcap/meson.build index 69c016c780..6212291d40 100644 --- a/app/dumpcap/meson.build +++ b/app/dumpcap/meson.build @@ -14,4 +14,4 @@ endif ext_deps += pcap_dep sources = files('main.c') -deps += ['ethdev', 'pdump', 'pcapng', 'bpf'] +deps += ['net_ring', 'ethdev', 'pcapng', 'bpf'] -- 2.47.2