Thanks for patch, It looks good, I have comments mostly related to coding style.
Subject line should follow following format [PATCH <n>/<m>] <area>: <summary> On Thu, May 1, 2014 at 3:58 AM, Gerald Rogers <gerald.rog...@intel.com> wrote: > This patch enables the client dpdk rings within the netdev-dpdk. It adds > a new dpdk device called dpdkr (other naming suggestions?). This allows > for the use of shared memory to communicate with other dpdk applications, > on the host or within a virtual machine. Instructions for use are in > INSTALL.DPDK. > > This has been tested on Intel multi-core platforms and with the client > application within the host. > > > Signed-off-by: Gerald Rogers <gerald.rog...@intel.com> > Signed-off-by: Gerald Rogers <gerald.rog...@intel.com> > Duplicate signed-off line. > diff --git a/INSTALL.DPDK b/INSTALL.DPDK > index 3e0247a..ec1b814 100644 > --- a/INSTALL.DPDK > +++ b/INSTALL.DPDK > @@ -74,6 +74,44 @@ Once first DPDK port is added vswitchd, it creates Polling > thread and > polls dpdk device in continuous loop. Therefore CPU utilization > for that thread is always 100%. > > +DPDK Rings : > + > +Following the steps above to create a bridge, you can now add dpdk rings > +as a port to the vswitch. OVS will expect the DPDK ring device name to > +start with dpdkr and end with portid. > + > + ovs-vsctl add-port br0 dpdkr0 -- set Interface dpdkr0 type=dpdkr > + > +DPDK rings client test application > + > +Included in the test directory is a sample DPDK application for testing > +the rings. This is from the base dpdk directory and modified to work > +with the ring naming used within ovs. > + > +location tests/ovs_client > + > +To run the client : > + > +ovsclient -c 1 -n 4 --proc-type=secondary -- --n "port id you gave dpdkr" > + > +It is essential to have --proc-type=secondary > + > +The application simply receives an mbuf on the receive queue of the > +ethernet ring and then places that same mbuf on the transmit ring of > +the ethernet ring. It is a trivial loopback application. > + > +In addition to executing the client in the host, you can execute it within > +a guest VM. To do so you will need a patched qemu. You can download the > +patch and getting started guid at : > + > +https://01.org/packet-processing/downloads > + > +A general rule of thumb for better performance is that the client > +application shouldn't be assigned the same dpdk core mask "-c" as > +the vswitchd. > + > + > + > Restrictions: > ------------- > > @@ -86,6 +124,11 @@ Restrictions: > device queues configured. > - Work with 1500 MTU, needs few changes in DPDK lib to fix this issue. > - Currently DPDK port does not make use any offload functionality. > + ivshmem > + - The shared memory is currently restricted to the use of a 1GB > + huge pages. I am not sure why do we have this limit. > + - All huge pages are shared amongst the host, clients, virtual > + machines etc. > > Bug Reporting: > -------------- > diff --git a/lib/netdev-dpdk.c b/lib/netdev-dpdk.c > index fd991ab..a90913f 100644 > --- a/lib/netdev-dpdk.c > +++ b/lib/netdev-dpdk.c > @@ -148,6 +148,23 @@ struct dpdk_tx_queue { > struct rte_mbuf *burst_pkts[MAX_TX_QUEUE_LEN]; > }; > > +/* dpdk has no way to remove dpdk ring ethernet devices > + so we have to keep them around once they've been created > +*/ > + > +static struct list dpdk_ring_list OVS_GUARDED_BY(dpdk_mutex) > + = LIST_INITIALIZER(&dpdk_ring_list); > + > +struct dpdk_ring { > + /* For the client rings */ > + struct rte_ring * cring_tx; > + struct rte_ring * cring_rx; No need to have space after *. > + int port_id; /* dpdkr port id */ > + int eth_port_id; /* ethernet device port id */ > + struct list list_node OVS_GUARDED_BY(mutex); > + > +}; Extra blank line. > + > struct netdev_dpdk { > struct netdev up; > int port_id; > @@ -167,9 +184,14 @@ struct netdev_dpdk { > uint8_t hwaddr[ETH_ADDR_LEN]; > enum netdev_flags flags; > > + > struct rte_eth_link link; > int link_reset_cnt; > > + /* If a shared memory ring */ > + > + struct dpdk_ring *ivshmem; > + > /* In dpdk_list. */ > struct list list_node OVS_GUARDED_BY(dpdk_mutex); > }; > @@ -571,6 +593,7 @@ dpdk_queue_flush(struct netdev_dpdk *dev, int qid) > if (txq->count == 0) { > return; > } > + > rte_spinlock_lock(&txq->tx_lock); > nb_tx = rte_eth_tx_burst(dev->port_id, qid, txq->burst_pkts, txq->count); > if (nb_tx != txq->count) { > @@ -593,6 +616,7 @@ netdev_dpdk_rxq_recv(struct netdev_rxq *rxq_, struct > ofpbuf **packets, int *c) > > dpdk_queue_flush(dev, rxq_->queue_id); > > + > nb_rx = rte_eth_rx_burst(rx->port_id, rxq_->queue_id, > (struct rte_mbuf **) packets, MAX_RX_QUEUE_LEN); > if (!nb_rx) { > @@ -1170,6 +1194,200 @@ static struct netdev_class netdev_dpdk_class = { > NULL, /* rxq_drain */ > }; > > +/* Client Rings */ > + > +static int > +dpdk_classr_init(void) > +{ > + VLOG_INFO("Initialized dpdk client handlers:\n"); > + return 0; > +} > + > +static int > +netdev_dpdkr_construct(struct netdev *netdev_) > +{ > + struct netdev_dpdk *netdev = netdev_dpdk_cast(netdev_); > + struct dpdk_ring *ivshmem; > + unsigned int port_no; > + char *cport; > + int err=0; > + int found=0; > + char name[10]; We can move name-definition to if-else block where is used. > + > + Extra blank line. > + if (rte_eal_init_ret) { > + return rte_eal_init_ret; > + } > + > + ovs_mutex_lock(&dpdk_mutex); > + cport = netdev_->name + 5; /* Names always start with "dpdkr" */ > + > + if (strncmp(netdev_->name, "dpdkr", 5)) { > + VLOG_ERR("Not a valid dpdkr name %s:\n",netdev_->name); > + err = ENODEV; > + goto unlock_dpdk; > + } > + > + port_no = strtol(cport, 0, 0); /* string must be null terminated */ > + > + ovs_mutex_init(&netdev->mutex); > + > + ovs_mutex_lock(&netdev->mutex); > + netdev->flags = 0; > + > + netdev->mtu = ETHER_MTU; > + netdev->max_packet_len = MTU_TO_MAX_LEN(netdev->mtu); > + > + /* TODO: need to discover device node at run time. */ > + netdev->socket_id = SOCKET0; > + > + netdev->dpdk_mp = dpdk_mp_get(netdev->socket_id, netdev->mtu); > + if (!netdev->dpdk_mp) { > + VLOG_ERR("Unable to get memory pool\n"); > + err = ENOMEM; > + goto unlock_dev; > + } > + > + /* look through our list to find the device */ > + LIST_FOR_EACH(ivshmem, list_node, &dpdk_ring_list){ > + if (ivshmem->port_id == port_no) { > + VLOG_INFO("Found dpdk ring device %s:\n", netdev_->name); > + found = 1; > + break; > + } > + } > + if (found) { > + netdev->ivshmem = ivshmem; "netdev->ivshmem" is only assigned not used anywhere in code. > + netdev->port_id = ivshmem->eth_port_id; /* really all that is needed > */ > + } > + else { > + /* Need to create the device rings */ > + > + ivshmem = dpdk_rte_mzalloc(sizeof *ivshmem); > + if (ivshmem == NULL) > + goto unlock_dev; > + > + snprintf(name,10,"%s_tx",netdev_->name); > + ivshmem->cring_tx = rte_ring_create(name, MAX_RX_QUEUE_LEN, SOCKET0, > 0); > + if (ivshmem->cring_tx == NULL) > + goto unlock_dev; > + > + snprintf(name,10,"%s_rx",netdev_->name); > + ivshmem->cring_rx = rte_ring_create(name, MAX_RX_QUEUE_LEN, SOCKET0, > 0); Do we need to relation between ring size when we create device and when we open it? > + if (ivshmem->cring_rx == NULL) > + goto unlock_dev; Need to free memory in error case. > + > + err = rte_eth_from_rings(&ivshmem->cring_rx, 1, &ivshmem->cring_tx, > 1, SOCKET0); > + if (err < 0) { > + goto unlock_dev; > + } Extra space > + > + ivshmem->port_id = port_no; > + ivshmem->eth_port_id = netdev->port_id = rte_eth_dev_count()-1 ; > + > + netdev->ivshmem=ivshmem; > + Need space. > + list_push_back(&dpdk_ring_list, &ivshmem->list_node); > + > + } > + > + err = dpdk_eth_dev_init(netdev); > + if (err) { > + goto unlock_dev; > + } > + > + list_push_back(&dpdk_list, &netdev->list_node); > + > + err = 0; > +unlock_dev: > + ovs_mutex_unlock(&netdev->mutex); > +unlock_dpdk: > + ovs_mutex_unlock(&dpdk_mutex); > + return err; > +} > + > +static void > +netdev_dpdkr_destruct(struct netdev *netdev_) > +{ > + struct netdev_dpdk *dev = netdev_dpdk_cast(netdev_); > + > + ovs_mutex_lock(&dev->mutex); > + rte_eth_dev_stop(dev->port_id); > + > + ovs_mutex_unlock(&dev->mutex); > + > + ovs_mutex_lock(&dpdk_mutex); > + list_remove(&dev->list_node); > + dpdk_mp_put(dev->dpdk_mp); > + ovs_mutex_unlock(&dpdk_mutex); > + > + ovs_mutex_destroy(&dev->mutex); > +} > + > +static struct netdev_class netdev_dpdkr_class = { > + "dpdkr", > + dpdk_classr_init, /* init */ > + NULL, /* netdev_dpdk_run */ > + NULL, /* netdev_dpdk_wait */ > + > + netdev_dpdk_alloc, > + netdev_dpdkr_construct, > + netdev_dpdkr_destruct, > + netdev_dpdk_dealloc, > + netdev_dpdk_get_config, > + NULL, /* netdev_dpdk_set_config */ > + NULL, /* get_tunnel_config */ > + > + netdev_dpdk_send, /* send */ > + NULL, /* send_wait */ > + > + netdev_dpdk_set_etheraddr, > + netdev_dpdk_get_etheraddr, > + netdev_dpdk_get_mtu, > + netdev_dpdk_set_mtu, > + netdev_dpdk_get_ifindex, > + netdev_dpdk_get_carrier, > + netdev_dpdk_get_carrier_resets, > + netdev_dpdk_set_miimon, > + netdev_dpdk_get_stats, > + netdev_dpdk_set_stats, > + netdev_dpdk_get_features, > + NULL, /* set_advertisements */ > + > + NULL, /* set_policing */ > + NULL, /* get_qos_types */ > + NULL, /* get_qos_capabilities */ > + NULL, /* get_qos */ > + NULL, /* set_qos */ > + NULL, /* get_queue */ > + NULL, /* set_queue */ > + NULL, /* delete_queue */ > + NULL, /* get_queue_stats */ > + NULL, /* queue_dump_start */ > + NULL, /* queue_dump_next */ > + NULL, /* queue_dump_done */ > + NULL, /* dump_queue_stats */ > + > + NULL, /* get_in4 */ > + NULL, /* set_in4 */ > + NULL, /* get_in6 */ > + NULL, /* add_router */ > + NULL, /* get_next_hop */ > + netdev_dpdk_get_status, > + NULL, /* arp_lookup */ > + > + netdev_dpdk_update_flags, > + > + netdev_dpdk_rxq_alloc, > + netdev_dpdk_rxq_construct, > + netdev_dpdk_rxq_destruct, > + netdev_dpdk_rxq_dealloc, > + netdev_dpdk_rxq_recv, > + NULL, /* rx_wait */ > + NULL, /* rxq_drain */ > +}; > + > + We can avoid dup definition, checkout TUNNEL_CLASS() in netdev-vport.c > int > dpdk_init(int argc, char **argv) > { > @@ -1196,6 +1414,7 @@ void > netdev_dpdk_register(void) > { > netdev_register_provider(&netdev_dpdk_class); > + netdev_register_provider(&netdev_dpdkr_class); > } > > int > diff --git a/lib/netdev-dpdk.h b/lib/netdev-dpdk.h > index 2807310..8cb715a 100644 > --- a/lib/netdev-dpdk.h > +++ b/lib/netdev-dpdk.h > @@ -10,6 +10,7 @@ > #include <rte_eal.h> > #include <rte_debug.h> > #include <rte_ethdev.h> > +#include <rte_eth_ring.h> > #include <rte_errno.h> > #include <rte_memzone.h> > #include <rte_memcpy.h> > diff --git a/lib/netdev.c b/lib/netdev.c > index 2fc1834..610de13 100644 > --- a/lib/netdev.c > +++ b/lib/netdev.c > @@ -98,7 +98,7 @@ netdev_n_rxq(const struct netdev *netdev) > bool > netdev_is_pmd(const struct netdev *netdev) > { > - return !strcmp(netdev->netdev_class->type, "dpdk"); > + return !strncmp(netdev->netdev_class->type, "dpdk",4); > } It would be easier to understand if dkdpr is explicitly checked. > > static void > diff --git a/tests/automake.mk b/tests/automake.mk > index bf80702..0067812 100644 > --- a/tests/automake.mk > +++ b/tests/automake.mk > @@ -204,6 +204,11 @@ tests/idltest.ovsidl: $(IDLTEST_IDL_FILES) > > tests/idltest.c: tests/idltest.h > > +noinst_PROGRAMS += tests/ovsclient > +tests_ovsclient_SOURCES = \ > + tests/ovs_client/ovs_client.c > +tests_ovsclient_LDADD = lib/libopenvswitch.la $(LIBS) > + > noinst_PROGRAMS += tests/ovstest > tests_ovstest_SOURCES = \ > tests/ovstest.c \ > diff --git a/tests/ovs_client/ovs_client.c b/tests/ovs_client/ovs_client.c > new file mode 100644 > index 0000000..07f49ed > --- /dev/null > +++ b/tests/ovs_client/ovs_client.c This file need lot of coding style fixes. > @@ -0,0 +1,217 @@ > +/* > + * BSD LICENSE > + * > + * Copyright(c) 2010-2014 Intel Corporation. All rights reserved. > + * All rights reserved. > + * > + * Redistribution and use in source and binary forms, with or without > + * modification, are permitted provided that the following conditions > + * are met: > + * > + * * Redistributions of source code must retain the above copyright > + * notice, this list of conditions and the following disclaimer. > + * * Redistributions in binary form must reproduce the above copyright > + * notice, this list of conditions and the following disclaimer in > + * the documentation and/or other materials provided with the > + * distribution. > + * * Neither the name of Intel Corporation nor the names of its > + * contributors may be used to endorse or promote products derived > + * from this software without specific prior written permission. > + * > + * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS > + * "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT > + * LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR > + * A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT > + * OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, > + * SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT > + * LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, > + * DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY > + * THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT > + * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE > + * OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. > + * > + */ > + > +#include <getopt.h> > + > +#include <config.h> > +#include <rte_config.h> > +#include <rte_mbuf.h> > +#include <rte_ether.h> > +#include <rte_string_fns.h> > +#include <rte_ip.h> > +#include <rte_byteorder.h> > + > +/* Number of packets to attempt to read from queue */ > +#define PKT_READ_SIZE ((uint16_t)32) > + > +/* define common names for structures shared between ovs_dpdk and client */ > +#define MP_CLIENT_RXQ_NAME "dpdkr%u_tx" > +#define MP_CLIENT_TXQ_NAME "dpdkr%u_rx" > + > +#define RTE_LOGTYPE_APP RTE_LOGTYPE_USER1 > + > +#define BASE_10 10 > + > +/* our client id number - tells us which rx queue to read, and tx > + * queue to write to. */ > +static uint8_t client_id = 0; > + > +int no_pkt; > +int pkt; > + > +/* > + * Given the rx queue name template above, get the queue name > + */ > +static inline const char * > +get_rx_queue_name(unsigned id) > +{ > + /* buffer for return value. Size calculated by %u being replaced > + * by maximum 3 digits (plus an extra byte for safety) */ > + static char buffer[sizeof(MP_CLIENT_RXQ_NAME) + 2]; > + > + rte_snprintf(buffer, sizeof(buffer) - 1, MP_CLIENT_RXQ_NAME, id); > + return buffer; > +} > + > +/* > + * Given the tx queue name template above, get the queue name > + */ > +static inline const char * > +get_tx_queue_name(unsigned id) > +{ > + /* buffer for return value. Size calculated by %u being replaced > + * by maximum 3 digits (plus an extra byte for safety) */ > + static char buffer[sizeof(MP_CLIENT_TXQ_NAME) + 2]; > + > + rte_snprintf(buffer, sizeof(buffer) - 1, MP_CLIENT_TXQ_NAME, id); > + return buffer; > +} > + > +/* > + * print a usage message > + */ > +static void > +usage(const char *progname) > +{ > + printf("\nUsage: %s [EAL args] -- -n <client_id>\n", progname); > +} > + > +/* > + * Convert the client id number from a string to an int. > + */ > +static int > +parse_client_num(const char *client) > +{ > + char *end = NULL; > + unsigned long temp = 0; > + > + if (client == NULL || *client == '\0') > + return -1; > + > + temp = strtoul(client, &end, BASE_10); > + /* If valid string argument is provided, terminating '/0' character > + * is stored in 'end' */ > + if (end == NULL || *end != '\0') > + return -1; > + > + client_id = (uint8_t)temp; > + return 0; > +} > + > +/* > + * Parse the application arguments to the client app. > + */ > +static int > +parse_app_args(int argc, char *argv[]) > +{ > + int option_index = 0, opt = 0; > + char **argvopt = argv; > + const char *progname = NULL; > + static struct option lgopts[] = { > + {NULL, 0, 0, 0 } > + }; > + progname = argv[0]; > + > + while ((opt = getopt_long(argc, argvopt, "n:s:d:", lgopts, > + &option_index)) != EOF){ > + switch (opt){ > + case 'n': > + if (parse_client_num(optarg) != 0){ > + usage(progname); > + return -1; > + } > + break; > + default: > + usage(progname); > + return -1; > + } > + } > + > + return 0; > +} > + > +/* > + * Application main function - loops through > + * receiving and processing packets. Never returns > + */ > +int > +main(int argc, char *argv[]) > +{ > + struct rte_ring *rx_ring = NULL; > + struct rte_ring *tx_ring = NULL; > + int retval = 0; > + void *pkts[PKT_READ_SIZE]; > + int rslt = 0; > + > + if ((retval = rte_eal_init(argc, argv)) < 0) > + return -1; > + > + argc -= retval; > + argv += retval; > + > + if (parse_app_args(argc, argv) < 0) > + rte_exit(EXIT_FAILURE, "Invalid command-line arguments\n"); > + > + rx_ring = rte_ring_lookup(get_rx_queue_name(client_id)); > + if (rx_ring == NULL) > + rte_exit(EXIT_FAILURE, "Cannot get RX ring - is server > process running?\n"); > + > + tx_ring = rte_ring_lookup(get_tx_queue_name(client_id)); > + if (tx_ring == NULL) > + rte_exit(EXIT_FAILURE, "Cannot get TX ring - is server > process running?\n"); > + > + RTE_LOG(INFO, APP, "Finished Process Init.\n"); > + > + printf("\nClient process %d handling packets\n", client_id); > + printf("[Press Ctrl-C to quit ...]\n"); > + > + for (;;) { > + unsigned rx_pkts = PKT_READ_SIZE; > + > + /* try dequeuing max possible packets first, if that fails, > get the > + * most we can. Loop body should only execute once, maximum */ > + while (rx_pkts > 0 && > + unlikely(rte_ring_dequeue_bulk(rx_ring, pkts, > rx_pkts) != 0)) > + rx_pkts = (uint16_t)RTE_MIN(rte_ring_count(rx_ring), > PKT_READ_SIZE); > + > + if(rx_pkts>0) > + { > + > + pkt++; > + > + /* blocking enqueue */ > + do { > + rslt = rte_ring_enqueue_bulk(tx_ring, pkts, > rx_pkts); > + } while (rslt == -ENOBUFS); > + } > + else > + no_pkt++; > + > + if (!(pkt % 100000)) > + { > + printf("pkt %d %d\n", pkt, no_pkt); > + pkt=no_pkt=0; > + } > + } > +} > _______________________________________________ > dev mailing list > dev@openvswitch.org > http://openvswitch.org/mailman/listinfo/dev _______________________________________________ dev mailing list dev@openvswitch.org http://openvswitch.org/mailman/listinfo/dev