This patch adds a sample which creates vhost-user socket based on
vdpa driver. vdpa driver can help to set up vhost datapath so this
app doesn't need to spend a dedicated worker thread on vhost
enqueue/dequeue operations.

Below are setup steps for your reference:

1. Make sure your kernnel vhost module and QEMU support vIOMMU.
   - OS: CentOS 7.4
   - QEMU: 2.10.1
   - Guest OS: CentOS 7.2
   - Nested VM OS: CentOS 7.2

2. enable VT-x feature for vCPU in VM.
   modprobe kvm_intel nested=1

3. Start a VM with a virtio-net-pci device.
   ./qemu-2.10.1/x86_64-softmmu/qemu-system-x86_64 -enable-kvm -cpu host \
   <snip>
   -machine q35 \
   -device intel-iommu \
   -netdev tap,id=mytap,ifname=vdpa,vhostforce=on \
   -device virtio-net-pci,netdev=mytap,mac=00:aa:bb:cc:dd:ee,\
   disable-modern=off,disable-legacy=on,iommu_platform=on \

4. Bind VFIO-pci to virtio_net_pci device
   a) login to VM;
   b) modprobe vfio-pci
   c) rmmod vfio_iommu_type1
   d) modprobe vfio_iommu_type1 allow_unsafe_interrupts=1
   e) ./usertools/dpdk-devbind.py -b vfio-pci 00:03.0

5. Start vdpa sample
   ./examples/vdpa/build/vdpa -c 0x2 -n 4 --socket-mem 1024 --no-pci \
    --vdev "net_vdpa_virtio_pci0,bdf=0000:00:03.0" -- --bdf 0000:00:03.0 \
    --iface /tmp/vhost-user- --devcnt 1  --queue 1

6. Start nested VM
   ./qemu-2.10.1/x86_64-softmmu/qemu-system-x86_64 -cpu host -enable-kvm \
   <snip>
   -mem-prealloc \
   -chardev socket,id=char0,path=/tmp/vhost-user-0 \
   -netdev type=vhost-user,id=vdpa,chardev=char0,vhostforce \
   -device virtio-net-pci,netdev=vdpa,mac=00:aa:bb:cc:dd:ee \

7. Login the nested VM, and verify the virtio in nested VM can communicate
   with tap device on the host.

Signed-off-by: Xiao Wang <xiao.w.w...@intel.com>
---
 examples/vdpa/Makefile |  32 ++++
 examples/vdpa/main.c   | 387 +++++++++++++++++++++++++++++++++++++++++++++++++
 2 files changed, 419 insertions(+)
 create mode 100644 examples/vdpa/Makefile
 create mode 100644 examples/vdpa/main.c

diff --git a/examples/vdpa/Makefile b/examples/vdpa/Makefile
new file mode 100644
index 000000000..42672a2bc
--- /dev/null
+++ b/examples/vdpa/Makefile
@@ -0,0 +1,32 @@
+# SPDX-License-Identifier: BSD-3-Clause
+# Copyright(c) 2018 Intel Corporation
+
+ifeq ($(RTE_SDK),)
+$(error "Please define RTE_SDK environment variable")
+endif
+
+# Default target, can be overridden by command line or environment
+RTE_TARGET ?= x86_64-native-linuxapp-gcc
+
+include $(RTE_SDK)/mk/rte.vars.mk
+
+ifneq ($(CONFIG_RTE_EXEC_ENV),"linuxapp")
+$(info This application can only operate in a linuxapp environment, \
+please change the definition of the RTE_TARGET environment variable)
+all:
+else
+
+# binary name
+APP = vdpa
+
+# all source are stored in SRCS-y
+SRCS-y := main.c
+
+CFLAGS += -O2 -D_FILE_OFFSET_BITS=64
+CFLAGS += $(WERROR_FLAGS)
+CFLAGS += -D_GNU_SOURCE
+CFLAGS += -DALLOW_EXPERIMENTAL_API
+
+include $(RTE_SDK)/mk/rte.extapp.mk
+
+endif
diff --git a/examples/vdpa/main.c b/examples/vdpa/main.c
new file mode 100644
index 000000000..1c9143469
--- /dev/null
+++ b/examples/vdpa/main.c
@@ -0,0 +1,387 @@
+/* SPDX-License-Identifier: BSD-3-Clause
+ * Copyright(c) 2018 Intel Corporation
+ */
+
+#include <getopt.h>
+#include <signal.h>
+#include <stdint.h>
+#include <string.h>
+#include <unistd.h>
+
+#include <rte_ethdev.h>
+#include <rte_malloc.h>
+#include <rte_vhost.h>
+#include <rte_vdpa.h>
+
+#define NUM_MBUFS 8191
+#define MBUF_CACHE_SIZE 250
+
+#define RX_RING_SIZE 128
+#define TX_RING_SIZE 128
+
+#define MAX_PATH_LEN 128
+#define MAX_VDPA_SAMPLE_PORTS 1024
+
+struct vdpa_port {
+       char ifname[MAX_PATH_LEN];
+       int eid;
+       int did;
+       int vid;
+};
+
+struct vdpa_port vports[MAX_VDPA_SAMPLE_PORTS];
+
+struct rte_vdpa_eng_attr attr;
+struct rte_vdpa_eng_addr dev_id;
+char iface[MAX_PATH_LEN];
+int queue;
+int devcnt;
+
+/* display usage */
+static void
+vdpa_usage(const char *prgname)
+{
+       printf("%s [EAL options]"
+               " -- --bdf B:D:F --iface <path> --devcnt ND  --queue NQ\n"
+               " --bdf B:D:F, the PCI device used for vdpa\n"
+               " --iface <path>: The path of the socket file\n"
+               " --devcnt ND: number of vhost sockets to be created, default 
1\n"
+               " --queue NQ: number of queue pairs to be configured, default 
1\n",
+               prgname);
+}
+
+static int
+get_unsigned(const char *str, int base)
+{
+       unsigned long num;
+       char *end = NULL;
+
+       errno = 0;
+       num = strtoul(str, &end, base);
+       if (str[0] == '\0' || end == NULL || *end != '\0' || errno != 0)
+               return -1;
+
+       return num;
+}
+
+static int
+parse_args(int argc, char **argv)
+{
+       static const char *short_option = "";
+       static struct option long_option[] = {
+               {"bdf", required_argument, NULL, 0},
+               {"queue", required_argument, NULL, 0},
+               {"devcnt", required_argument, NULL, 0},
+               {"iface", required_argument, NULL, 0},
+               {NULL, 0, 0, 0},
+       };
+       char str[MAX_PATH_LEN];
+       int opt, idx;
+       int num[4] = {0};
+       int i, j;
+       char *prgname = argv[0];
+
+       while ((opt = getopt_long(argc, argv, short_option, long_option, &idx))
+                       != EOF) {
+               switch (opt) {
+               case 0:
+                       if (strncmp(long_option[idx].name, "bdf",
+                                               MAX_PATH_LEN) == 0) {
+                               strcpy(str, optarg);
+                               i = strlen(str) - 1;
+                               j = 3;
+                               while (i > 0 && j >= 0) {
+                                       while ((str[i - 1] != ':'
+                                                       && str[i - 1] != '.')
+                                                       && i > 0)
+                                               i--;
+                                       num[j--] = get_unsigned(&str[i], 16);
+                                       i--;
+                                       if (i >= 0)
+                                               str[i] = '\0';
+                               }
+                               dev_id.pci_addr.domain = num[0];
+                               dev_id.pci_addr.bus = num[1];
+                               dev_id.pci_addr.devid = num[2];
+                               dev_id.pci_addr.function = num[3];
+                               printf("bdf %04x:%02x:%02x.%02x\n",
+                                               dev_id.pci_addr.domain,
+                                               dev_id.pci_addr.bus,
+                                               dev_id.pci_addr.devid,
+                                               dev_id.pci_addr.function);
+                       } else if (strncmp(long_option[idx].name, "queue",
+                                               MAX_PATH_LEN) == 0) {
+                               queue = get_unsigned(optarg, 10);
+                               printf("queue %d\n", queue);
+                       } else if (strncmp(long_option[idx].name, "devcnt",
+                                               MAX_PATH_LEN) == 0) {
+                               devcnt = get_unsigned(optarg, 10);
+                               printf("devcnt %d\n", devcnt);
+                       } else if (strncmp(long_option[idx].name, "iface",
+                                               MAX_PATH_LEN) == 0) {
+                               strncpy(iface, optarg, MAX_PATH_LEN);
+                               printf("iface %s\n", iface);
+                       }
+
+                       break;
+
+               default:
+                       vdpa_usage(prgname);
+                       return -1;
+               }
+       }
+
+       if (queue <= 0 || devcnt <= 0 || *iface == '\0') {
+               vdpa_usage(prgname);
+               return -1;
+       }
+
+       return 0;
+}
+
+static void
+data_init(void)
+{
+       devcnt = 1;
+       queue = 1;
+       memset(&dev_id, 0, sizeof(dev_id));
+       memset(iface, 0, MAX_PATH_LEN * sizeof(iface[0]));
+       memset(vports, 0, MAX_VDPA_SAMPLE_PORTS * sizeof(vports[0]));
+
+       return;
+}
+
+static void
+signal_handler(int signum)
+{
+       uint16_t portid, nb_ports;
+
+       if (signum == SIGINT || signum == SIGTERM) {
+               printf("\nSignal %d received, preparing to exit...\n",
+                               signum);
+               nb_ports = rte_eth_dev_count();
+               for (portid = 0; portid < nb_ports; portid++) {
+                       printf("Closing port %d...\n", portid);
+                       rte_eth_dev_stop(portid);
+                       rte_eth_dev_close(portid);
+               }
+               exit(0);
+       }
+}
+
+static int
+new_device(int vid)
+{
+       char ifname[MAX_PATH_LEN];
+       int i;
+
+       rte_vhost_get_ifname(vid, ifname, sizeof(ifname));
+       for (i = 0; i < MAX_VDPA_SAMPLE_PORTS; i++) {
+               if (strcmp(ifname, vports[i].ifname) == 0) {
+                       printf("\nport %s connected, eid: %d, did %d\n",
+                                       ifname, vports[i].eid, vports[i].did);
+                       vports[i].vid = vid;
+                       break;
+               }
+       }
+
+       if (i >= MAX_VDPA_SAMPLE_PORTS)
+               return -1;
+
+       return 0;
+}
+
+static void
+destroy_device(int vid)
+{
+       char ifname[MAX_PATH_LEN];
+       int i;
+
+       rte_vhost_get_ifname(vid, ifname, sizeof(ifname));
+       for (i = 0; i < MAX_VDPA_SAMPLE_PORTS; i++) {
+               if (strcmp(ifname, vports[i].ifname) == 0) {
+                       printf("\nport %s disconnected, eid: %d, did %d\n",
+                                       ifname, vports[i].eid, vports[i].did);
+                       vports[i].vid = vid;
+                       break;
+               }
+       }
+
+       return;
+}
+
+static const struct vhost_device_ops vdpa_sample_devops = {
+       .new_device = new_device,
+       .destroy_device = destroy_device,
+       .vring_state_changed = NULL,
+       .features_changed = NULL,
+       .new_connection = NULL,
+       .destroy_connection = NULL,
+};
+
+static const struct rte_eth_conf port_conf_default = {
+       .rxmode = {
+               .max_rx_pkt_len = ETHER_MAX_LEN,
+               .ignore_offload_bitfield = 1,
+       },
+};
+
+static inline int
+port_init(uint16_t port, struct rte_mempool *mbuf_pool)
+{
+       uint16_t rx_rings = 1, tx_rings = 1;
+       uint16_t nb_rxd = RX_RING_SIZE;
+       uint16_t nb_txd = TX_RING_SIZE;
+       int retval;
+       uint16_t q;
+       struct rte_eth_dev_info dev_info;
+       struct rte_eth_txconf txconf;
+       struct ether_addr addr;
+
+       if (port >= rte_eth_dev_count())
+               return -1;
+
+       rte_eth_dev_info_get(port, &dev_info);
+
+       /* Configure the Ethernet device. */
+       retval = rte_eth_dev_configure(port, rx_rings, tx_rings,
+                       &port_conf_default);
+       if (retval < 0)
+               return retval;
+
+       /* Allocate and set up 1 Rx queue per Ethernet port. */
+       for (q = 0; q < rx_rings; q++) {
+               retval = rte_eth_rx_queue_setup(port, q, nb_rxd,
+                               rte_eth_dev_socket_id(port), NULL, mbuf_pool);
+               if (retval < 0)
+                       return retval;
+       }
+
+       txconf = dev_info.default_txconf;
+       /* Allocate and set up 1 Tx queue per Ethernet port. */
+       for (q = 0; q < tx_rings; q++) {
+               retval = rte_eth_tx_queue_setup(port, q, nb_txd,
+                               rte_eth_dev_socket_id(port), &txconf);
+               if (retval < 0)
+                       return retval;
+       }
+
+       /* Start the Ethernet port. */
+       retval = rte_eth_dev_start(port);
+       if (retval < 0)
+               return retval;
+
+       /* Display the port MAC address. */
+       rte_eth_macaddr_get(port, &addr);
+       printf("Port %u MAC: %02" PRIx8 " %02" PRIx8 " %02" PRIx8
+                          " %02" PRIx8 " %02" PRIx8 " %02" PRIx8 "\n",
+                       port,
+                       addr.addr_bytes[0], addr.addr_bytes[1],
+                       addr.addr_bytes[2], addr.addr_bytes[3],
+                       addr.addr_bytes[4], addr.addr_bytes[5]);
+
+       return 0;
+}
+
+int
+main(int argc, char *argv[])
+{
+       char ifname[MAX_PATH_LEN];
+       uint16_t nb_ports, portid;
+       struct rte_mempool *mbuf_pool;
+       char ch;
+       int i, eid, did;
+       int ret;
+       uint64_t flags = 0;
+
+       ret = rte_eal_init(argc, argv);
+       if (ret < 0)
+               rte_exit(EXIT_FAILURE, "eal init failed\n");
+       argc -= ret;
+       argv += ret;
+
+       signal(SIGINT, signal_handler);
+       signal(SIGTERM, signal_handler);
+
+       nb_ports = rte_eth_dev_count();
+       mbuf_pool = rte_pktmbuf_pool_create("MBUF_POOL", NUM_MBUFS * nb_ports,
+               MBUF_CACHE_SIZE, 0, RTE_MBUF_DEFAULT_BUF_SIZE, rte_socket_id());
+
+       /* Initialize all ports. */
+       for (portid = 0; portid < nb_ports; portid++)
+               if (port_init(portid, mbuf_pool) != 0)
+                       rte_exit(EXIT_FAILURE, "Cannot init port %d\n",
+                                       portid);
+
+       data_init();
+
+       ret = parse_args(argc, argv);
+       if (ret < 0)
+               rte_exit(EXIT_FAILURE, "invalid argument\n");
+
+       eid = rte_vdpa_find_engine_id(&dev_id);
+       if (eid < 0)
+               rte_exit(EXIT_FAILURE, "no vDPA engine found\n");
+
+       printf("\nuse engine %d to create vhost socket\n", eid);
+       rte_vdpa_info_query(eid, &attr);
+       if (devcnt > (int)attr.dev_num)
+               rte_exit(EXIT_FAILURE, "not enough devices in engine\n");
+
+       if (queue > (int)attr.queue_num)
+               rte_exit(EXIT_FAILURE, "not enough queues in engine\n");
+
+       for (i = 0; i <  RTE_MIN(MAX_VDPA_SAMPLE_PORTS, devcnt); i++) {
+               snprintf(ifname, sizeof(ifname), "%s%d", iface, i);
+               did = i;
+               vports[i].eid = eid;
+               vports[i].did = did;
+               strcpy(vports[i].ifname, ifname);
+
+               ret = rte_vhost_driver_register(ifname, flags);
+               if (ret != 0)
+                       rte_exit(EXIT_FAILURE,
+                                       "register driver failed: %s\n",
+                                       ifname);
+
+               ret = rte_vhost_driver_callback_register(ifname,
+                               &vdpa_sample_devops);
+               if (ret != 0)
+                       rte_exit(EXIT_FAILURE,
+                                       "register driver ops failed: %s\n",
+                                       ifname);
+
+               rte_vhost_driver_set_vdpa_eid(ifname, eid);
+               rte_vhost_driver_set_vdpa_did(ifname, did);
+               /*
+                * Configure vhost port with vDPA device's maximum capability.
+                * App has the flexibility to change the features, queue num.
+                */
+               rte_vhost_driver_set_queue_num(ifname, attr.queue_num);
+               rte_vhost_driver_set_features(ifname, attr.features);
+               rte_vhost_driver_set_protocol_features(ifname,
+                               attr.protocol_features);
+
+               if (rte_vhost_driver_start(ifname) < 0)
+                       rte_exit(EXIT_FAILURE,
+                                       "start vhost driver failed: %s\n",
+                                       ifname);
+       }
+
+       printf("enter \'q\' to quit\n");
+       while (scanf("%c", &ch)) {
+               if (ch == 'q')
+                       break;
+               while (ch != '\n')
+                       scanf("%c", &ch);
+               printf("enter \'q\' to quit\n");
+       }
+
+       for (portid = 0; portid < nb_ports; portid++) {
+               printf("Closing port %d...\n", portid);
+               rte_eth_dev_stop(portid);
+               rte_eth_dev_close(portid);
+       }
+
+       return 0;
+}
-- 
2.15.1

Reply via email to