vdpa: add virtio-net PCI device driver

Xiao Wang Fri, 29 Dec 2017 01:30:07 -0800

This sample shows an instance of vDPA device driver based on vDPA
lib, this driver uses a standard virtio-net PCI device as vDPA
device, which can serve as a backend for a virtio-net pci device
in nested VM.


The key driver ops implemented are:

* vdpa_virtio_eng_init
Prepare a resource pool to be used as vDPA device for a engine.

* vdpa_virtio_eng_uninit
Reset the vDPA resource pool for a engine.

* vdpa_virtio_dev_init
Allocate a device for corresponding vhost socket.

* vdpa_virtio_dev_uninit
Free a device that is previously allocated.

* vdpa_virtio_dev_conf
With the guest virtio information recorded in virtio_net structure,
driver configures device and IOMMU to set up vhost datapath, which
includes: vring operation, VFIO interrupt, kick relay.

* vdpa_virtio_dev_close
Unset the stuff that are configured in dev_conf.

* device capability reporting, e.g. queue number, features.

Below are setup steps for your reference:

1. Make sure your kernnel vhost module and QEMU support vIOMMU.
   - OS: CentOS 7.4
   - QEMU: 2.10.1
   - Guest OS: CentOS 7.2
   - Nested VM OS: CentOS 7.2

2. enable VT-x feature for vCPU in VM.
   modprobe kvm_intel nested=1

3. Start a VM with a virtio-net-pci device.
   ./qemu-2.10.1/x86_64-softmmu/qemu-system-x86_64 -enable-kvm -cpu host \
   <snip>
   -machine q35 \
   -device intel-iommu \
   -netdev tap,id=mytap,ifname=vdpa,vhostforce=on \
   -device virtio-net-pci,netdev=mytap,mac=00:aa:bb:cc:dd:ee,\
   disable-modern=off,disable-legacy=on,iommu_platform=on \

4. Bind VFIO-pci to virtio_net_pci device
   a) login to VM;
   b) modprobe vfio-pci
   c) rmmod vfio_iommu_type1
   d) modprobe vfio_iommu_type1 allow_unsafe_interrupts=1
   e) ./usertools/dpdk-devbind.py -b vfio-pci 00:03.0

5. Start vDPA sample
   Based on DPDK 17.11 and the vDPA RFC patch, apply this patch set.
   Sample compilation is just like the other DPDK samples.

   ./examples/vdpa/build/vdpa -c 0x6 -n 4 --socket-mem 512 --no-pci -- \
   --bdf 0000:00:03.0 --devcnt 1 --engine vdpa_virtio_net \
   --iface /tmp/vhost-user- --queue 1

6. Start nested VM
   ./qemu-2.10.1/x86_64-softmmu/qemu-system-x86_64 -cpu host -enable-kvm \
   <snip>
   -mem-prealloc \
   -chardev socket,id=char0,path=/tmp/vhost-user-0 \
   -netdev type=vhost-user,id=vdpa,chardev=char0,vhostforce \
   -device virtio-net-pci,netdev=vdpa,mac=00:aa:bb:cc:dd:ee \

7. Login the nested VM, and verify the virtio in nested VM can communicate
   with tap device on host.

Signed-off-by: Xiao Wang <xiao.w.w...@intel.com>
---
 examples/vdpa/Makefile          |   59 ++
 examples/vdpa/main.c            |  321 ++++++++++
 examples/vdpa/vdpa_virtio_net.c | 1274 +++++++++++++++++++++++++++++++++++++++
 examples/vdpa/vdpa_virtio_net.h |  144 +++++
 4 files changed, 1798 insertions(+)
 create mode 100644 examples/vdpa/Makefile
 create mode 100644 examples/vdpa/main.c
 create mode 100644 examples/vdpa/vdpa_virtio_net.c
 create mode 100644 examples/vdpa/vdpa_virtio_net.h

diff --git a/examples/vdpa/Makefile b/examples/vdpa/Makefile
new file mode 100644
index 0000000..6571a05
--- /dev/null
+++ b/examples/vdpa/Makefile
@@ -0,0 +1,59 @@
+#   BSD LICENSE
+#
+#   Copyright(c) 2010-2017 Intel Corporation. All rights reserved.
+#   All rights reserved.
+#
+#   Redistribution and use in source and binary forms, with or without
+#   modification, are permitted provided that the following conditions
+#   are met:
+#
+#     * Redistributions of source code must retain the above copyright
+#       notice, this list of conditions and the following disclaimer.
+#     * Redistributions in binary form must reproduce the above copyright
+#       notice, this list of conditions and the following disclaimer in
+#       the documentation and/or other materials provided with the
+#       distribution.
+#     * Neither the name of Intel Corporation nor the names of its
+#       contributors may be used to endorse or promote products derived
+#       from this software without specific prior written permission.
+#
+#   THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
+#   "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
+#   LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
+#   A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT
+#   OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
+#   SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
+#   LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
+#   DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
+#   THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
+#   (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
+#   OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
+
+ifeq ($(RTE_SDK),)
+$(error "Please define RTE_SDK environment variable")
+endif
+
+# Default target, can be overridden by command line or environment
+RTE_TARGET ?= x86_64-native-linuxapp-gcc
+
+include $(RTE_SDK)/mk/rte.vars.mk
+
+ifneq ($(CONFIG_RTE_EXEC_ENV),"linuxapp")
+$(info This application can only operate in a linuxapp environment, \
+please change the definition of the RTE_TARGET environment variable)
+all:
+else
+
+# binary name
+APP = vdpa
+
+# all source are stored in SRCS-y
+SRCS-y := main.c vdpa_virtio_net.c
+
+CFLAGS += -O2 -D_FILE_OFFSET_BITS=64
+CFLAGS += $(WERROR_FLAGS)
+CFLAGS += -D_GNU_SOURCE
+
+include $(RTE_SDK)/mk/rte.extapp.mk
+
+endif
diff --git a/examples/vdpa/main.c b/examples/vdpa/main.c
new file mode 100644
index 0000000..3cf6c78
--- /dev/null
+++ b/examples/vdpa/main.c
@@ -0,0 +1,321 @@
+/*-
+ *   BSD LICENSE
+ *
+ *   Copyright(c) 2010-2017 Intel Corporation. All rights reserved.
+ *   All rights reserved.
+ *
+ *   Redistribution and use in source and binary forms, with or without
+ *   modification, are permitted provided that the following conditions
+ *   are met:
+ *
+ *     * Redistributions of source code must retain the above copyright
+ *       notice, this list of conditions and the following disclaimer.
+ *     * Redistributions in binary form must reproduce the above copyright
+ *       notice, this list of conditions and the following disclaimer in
+ *       the documentation and/or other materials provided with the
+ *       distribution.
+ *     * Neither the name of Intel Corporation nor the names of its
+ *       contributors may be used to endorse or promote products derived
+ *       from this software without specific prior written permission.
+ *
+ *   THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
+ *   "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
+ *   LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
+ *   A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT
+ *   OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
+ *   SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
+ *   LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
+ *   DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
+ *   THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
+ *   (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
+ *   OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
+ */
+
+#include <getopt.h>
+#include <signal.h>
+#include <stdint.h>
+#include <string.h>
+#include <unistd.h>
+
+#include <rte_cycles.h>
+#include <rte_ethdev.h>
+#include <rte_malloc.h>
+#include <rte_vhost.h>
+#include <rte_vdpa.h>
+#include "vdpa_virtio_net.h"
+
+#define MAX_PATH_LEN 128
+#define MAX_VDPA_SAMPLE_PORTS 8
+
+struct vdpa_port {
+       char ifname[MAX_PATH_LEN];
+       int eid;
+       int did;
+       int vid;
+};
+
+struct vdpa_port vports[MAX_VDPA_SAMPLE_PORTS];
+struct rte_vdpa_eng_id dev_id;
+char engine[MAX_PATH_LEN];
+char iface[MAX_PATH_LEN];
+int engid;
+int queue;
+int devcnt;
+
+static int
+get_unsigned(const char *str, int base)
+{
+       unsigned long num;
+       char *end = NULL;
+
+       errno = 0;
+       num = strtoul(str, &end, base);
+       if ((str[0] == '\0') || (end == NULL) || (*end != '\0') || (errno != 0))
+               return -1;
+
+       return num;
+}
+
+static int
+parse_args(int argc, char **argv)
+{
+       static const char *short_option = "";
+       static struct option long_option[] = {
+               {"bdf", required_argument, NULL, 0},
+               {"engine", required_argument, NULL, 0},
+               {"queue", required_argument, NULL, 0},
+               {"devcnt", required_argument, NULL, 0},
+               {"iface", required_argument, NULL, 0},
+               {NULL, 0, 0, 0},
+       };
+       char str[MAX_PATH_LEN];
+       int opt, idx;
+       int num[4];
+       int i, j;
+
+       while ((opt = getopt_long(argc, argv, short_option, long_option, &idx))
+                       != EOF) {
+               switch (opt) {
+               case 0:
+                       if (strncmp(long_option[idx].name, "bdf",
+                                               MAX_PATH_LEN) == 0) {
+                               strcpy(str, optarg);
+                               memset(num, 0, 4 * sizeof(num[0]));
+                               i = strlen(str) - 1;
+                               j = 3;
+                               while (i > 0 && j >= 0) {
+                                       while ((str[i - 1] != ':'
+                                                       && str[i - 1] != '.')
+                                                       && i > 0)
+                                               i--;
+                                       num[j--] = get_unsigned(&str[i], 16);
+                                       i--;
+                                       if (i >= 0)
+                                               str[i] = '\0';
+                               }
+                               dev_id.pci_addr.domain = num[0];
+                               dev_id.pci_addr.bus = num[1];
+                               dev_id.pci_addr.devid = num[2];
+                               dev_id.pci_addr.function = num[3];
+                               printf("bdf %04x:%02x:%02x.%02x\n",
+                                               dev_id.pci_addr.domain,
+                                               dev_id.pci_addr.bus,
+                                               dev_id.pci_addr.devid,
+                                               dev_id.pci_addr.function);
+                       } else if (strncmp(long_option[idx].name, "queue",
+                                               MAX_PATH_LEN) == 0) {
+                               queue = get_unsigned(optarg, 10);
+                               printf("queue %d\n", queue);
+                       } else if (strncmp(long_option[idx].name, "devcnt",
+                                               MAX_PATH_LEN) == 0) {
+                               devcnt = get_unsigned(optarg, 10);
+                               printf("devcnt %d\n", devcnt);
+                       } else if (strncmp(long_option[idx].name, "engine",
+                                               MAX_PATH_LEN) == 0) {
+                               strcpy(engine, optarg);
+                               printf("engine %s\n", engine);
+                       } else if (strncmp(long_option[idx].name, "iface",
+                                               MAX_PATH_LEN) == 0) {
+                               strcpy(iface, optarg);
+                               printf("iface %s\n", iface);
+                       }
+                       break;
+               default:
+                       return -1;
+               }
+       }
+
+       return 0;
+}
+
+static int
+register_engine(void)
+{
+       if (strlen(engine) <= 0)
+               return -1;
+
+       engid = rte_vdpa_register_engine(engine, &dev_id);
+
+       return engid;
+}
+
+static int
+unregister_engine(void)
+{
+       if (engid < 0)
+               return -1;
+
+       engid = rte_vdpa_unregister_engine(engid);
+
+       return engid;
+}
+
+static int
+init(void)
+{
+       devcnt = MAX_VDPA_SAMPLE_PORTS;
+       engid = -1;
+       queue = 1;
+       memset(&dev_id, 0, sizeof(dev_id));
+       memset(engine, 0, MAX_PATH_LEN * sizeof(engine[0]));
+       memset(iface, 0, MAX_PATH_LEN * sizeof(iface[0]));
+
+       return 0;
+}
+
+static void
+sigint_handler(__rte_unused int signum)
+{
+       exit(0);
+}
+
+static int
+new_device(int vid)
+{
+       char ifname[MAX_PATH_LEN];
+       int i;
+
+       rte_vhost_get_ifname(vid, ifname, sizeof(ifname));
+       for (i = 0; i < MAX_VDPA_SAMPLE_PORTS; i++) {
+               if (strcmp(ifname, vports[i].ifname) == 0) {
+                       vports[i].vid = vid;
+                       break;
+               }
+       }
+
+       if (i >= MAX_VDPA_SAMPLE_PORTS)
+               return -1;
+
+       rte_vhost_set_vdpa_eid(vid, vports[i].eid);
+       rte_vhost_set_vdpa_did(vid, vports[i].did);
+
+       return 0;
+}
+
+static void
+destroy_device(int vid)
+{
+       int i;
+
+       for (i = 0; i < MAX_VDPA_SAMPLE_PORTS; i++)
+               if (vid == vports[i].vid) {
+                       vports[i].vid = -1;
+                       break;
+               }
+}
+
+static const struct vhost_device_ops vdpa_devops = {
+       .new_device = new_device,
+       .destroy_device = destroy_device,
+       .vring_state_changed = NULL,
+       .features_changed = NULL,
+       .new_connection = NULL,
+       .destroy_connection = NULL,
+};
+
+int
+main(int argc, char *argv[])
+{
+       char ifname[MAX_PATH_LEN];
+       char ch;
+       int did, ret, i;
+       uint64_t flags = 0;
+
+       signal(SIGINT, sigint_handler);
+       ret = rte_eal_init(argc, argv);
+       if (ret < 0)
+               rte_exit(EXIT_FAILURE, "eal init failed\n");
+
+       argc -= ret;
+       argv += ret;
+       ret = init();
+       if (ret)
+               rte_exit(EXIT_FAILURE, "app init failed\n");
+
+       ret = parse_args(argc, argv);
+       if (ret < 0)
+               rte_exit(EXIT_FAILURE, "invalid argument\n");
+
+       ret = register_engine();
+       if (ret < 0)
+               rte_exit(EXIT_FAILURE, "register engine failed\n");
+
+       if (devcnt > vdpa_virtio_get_device_num(engid))
+               rte_exit(EXIT_FAILURE, "not enough devices in engine\n");
+
+       for (i = 0; i < devcnt; i++) {
+               sprintf(ifname, "%s%d", iface, i);
+               /* for vdpa devices, need to reserve resource via driver */
+               did = vdpa_virtio_dev_init(engid, ifname);
+               vports[i].eid = engid;
+               vports[i].did = did;
+               strcpy(vports[i].ifname, ifname);
+
+               ret = rte_vhost_driver_register(ifname, flags);
+               if (ret != 0)
+                       rte_exit(EXIT_FAILURE,
+                                       "register driver failed: %s\n",
+                                       ifname);
+
+               rte_vhost_driver_callback_register(ifname, &vdpa_devops);
+               if (ret != 0)
+                       rte_exit(EXIT_FAILURE,
+                                       "register driver ops failed: %s\n",
+                                       ifname);
+               /* for vdpa devices, need to set capabilities via vhost lib */
+               rte_vhost_driver_set_queue_num(ifname,
+                               RTE_MIN(vdpa_virtio_get_queue_num(engid, did),
+                                       queue));
+               rte_vhost_driver_set_features(ifname,
+                               vdpa_virtio_get_features(engid, did));
+               rte_vhost_driver_set_protocol_features(ifname,
+                               vdpa_virtio_get_protocol_features(engid, did));
+
+               if (rte_vhost_driver_start(ifname) < 0)
+                       rte_exit(EXIT_FAILURE,
+                                       "start vhost driver failed: %s\n",
+                                       ifname);
+
+               /* for vdpa devices, need to start device via driver */
+               vdpa_virtio_dev_start(engid, did);
+       }
+
+       printf("enter \'q\' to quit\n");
+       while (scanf("%c", &ch)) {
+               if (ch == 'q')
+                       break;
+               while (ch != '\n')
+                       scanf("%c", &ch);
+               printf("enter \'q\' to quit\n");
+       }
+
+       /* for vdpa devices, need to free resources via driver */
+       for (i = 0; i < devcnt; i++) {
+               vdpa_virtio_dev_stop(vports[i].eid, vports[i].did);
+               vdpa_virtio_dev_uninit(vports[i].eid, vports[i].did);
+       }
+
+       ret = unregister_engine();
+
+       return ret;
+}
diff --git a/examples/vdpa/vdpa_virtio_net.c b/examples/vdpa/vdpa_virtio_net.c
new file mode 100644
index 0000000..62ab797
--- /dev/null
+++ b/examples/vdpa/vdpa_virtio_net.c
@@ -0,0 +1,1274 @@
+/*-
+ *   BSD LICENSE
+ *
+ *   Copyright(c) 2010-2017 Intel Corporation. All rights reserved.
+ *   All rights reserved.
+ *
+ *   Redistribution and use in source and binary forms, with or without
+ *   modification, are permitted provided that the following conditions
+ *   are met:
+ *
+ *     * Redistributions of source code must retain the above copyright
+ *       notice, this list of conditions and the following disclaimer.
+ *     * Redistributions in binary form must reproduce the above copyright
+ *       notice, this list of conditions and the following disclaimer in
+ *       the documentation and/or other materials provided with the
+ *       distribution.
+ *     * Neither the name of Intel Corporation nor the names of its
+ *       contributors may be used to endorse or promote products derived
+ *       from this software without specific prior written permission.
+ *
+ *   THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
+ *   "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
+ *   LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
+ *   A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT
+ *   OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
+ *   SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
+ *   LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
+ *   DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
+ *   THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
+ *   (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
+ *   OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
+ */
+
+#include <stdlib.h>
+#include <unistd.h>
+#include <pthread.h>
+#include <sys/syscall.h>
+#include <linux/pci_regs.h>
+#include <sys/epoll.h>
+#include <sys/eventfd.h>
+#include <sys/socket.h>
+#include <sys/mman.h>
+#include <sys/types.h>
+#include <sys/stat.h>
+#include <fcntl.h>
+#include <sys/ioctl.h>
+#include <rte_log.h>
+#include <rte_ethdev.h>
+#include <rte_io.h>
+#include <rte_malloc.h>
+#include <rte_memzone.h>
+#include <rte_cycles.h>
+#include "vdpa_virtio_net.h"
+
+#define True 1
+#define False 0
+
+#define PCI_CAPABILITY_LIST    0x34
+#define PCI_CAP_ID_VNDR                0x09
+
+/*
+ * Maximum number of virtqueues per device.
+ */
+#define VIRTIO_MAX_VIRTQUEUES 1
+
+/* Common configuration */
+#define VIRTIO_PCI_CAP_COMMON_CFG      1
+/* Notifications */
+#define VIRTIO_PCI_CAP_NOTIFY_CFG      2
+/* ISR Status */
+#define VIRTIO_PCI_CAP_ISR_CFG         3
+/* Device specific configuration */
+#define VIRTIO_PCI_CAP_DEVICE_CFG      4
+/* PCI configuration access */
+#define VIRTIO_PCI_CAP_PCI_CFG         5
+
+/* The feature bitmap for virtio net */
+#define VIRTIO_NET_F_CSUM      0       /* Host handles pkts w/ partial csum */
+#define VIRTIO_NET_F_GUEST_CSUM        1       /* Guest handles pkts w/ 
partial csum */
+#define VIRTIO_NET_F_MAC       5       /* Host has given MAC address. */
+#define VIRTIO_NET_F_GUEST_TSO4        7       /* Guest can handle TSOv4 in. */
+#define VIRTIO_NET_F_GUEST_TSO6        8       /* Guest can handle TSOv6 in. */
+#define VIRTIO_NET_F_GUEST_ECN 9       /* Guest can handle TSO w/ ECN in. */
+#define VIRTIO_NET_F_GUEST_UFO 10      /* Guest can handle UFO in. */
+#define VIRTIO_NET_F_HOST_TSO4 11      /* Host can handle TSOv4 in. */
+#define VIRTIO_NET_F_HOST_TSO6 12      /* Host can handle TSOv6 in. */
+#define VIRTIO_NET_F_HOST_ECN  13      /* Host can handle TSO w/ ECN in. */
+#define VIRTIO_NET_F_HOST_UFO  14      /* Host can handle UFO in. */
+#define VIRTIO_NET_F_MRG_RXBUF 15      /* Host can merge receive buffers. */
+#define VIRTIO_NET_F_STATUS    16      /* virtio_net_config.status available */
+#define VIRTIO_NET_F_CTRL_VQ   17      /* Control channel available */
+#define VIRTIO_NET_F_CTRL_RX   18      /* Control channel RX mode support */
+#define VIRTIO_NET_F_CTRL_VLAN 19      /* Control channel VLAN filtering */
+#define VIRTIO_NET_F_CTRL_RX_EXTRA 20  /* Extra RX mode control support */
+#define VIRTIO_NET_F_GUEST_ANNOUNCE 21 /* Guest can announce device on the 
network */
+#define VIRTIO_NET_F_MQ                22      /* Device supports Receive Flow 
Steering */
+#define VIRTIO_NET_F_CTRL_MAC_ADDR 23  /* Set MAC address */
+
+#define VIRTIO_F_NOTIFY_ON_EMPTY       24
+
+#define VIRTIO_F_ANY_LAYOUT            27
+
+/* We support indirect buffer descriptors */
+#define VIRTIO_RING_F_INDIRECT_DESC    28
+
+#define VIRTIO_F_VERSION_1             32
+#define VIRTIO_F_IOMMU_PLATFORM        33
+
+/*
+ * Some VirtIO feature bits (currently bits 28 through 31) are
+ * reserved for the transport being used (eg. virtio_ring), the
+ * rest are per-device feature bits.
+ */
+#define VIRTIO_TRANSPORT_F_START 28
+#define VIRTIO_NET_VIRTIO_TRANSPORT_F_END   34
+
+/*
+ * The Guest publishes the used index for which it expects an interrupt
+ * at the end of the avail ring.
+ * The Host publishes the avail index for which it expects a kick
+ * at the end of the used ring.
+ */
+#define VIRTIO_RING_F_EVENT_IDX                29
+
+/* Status byte for driver to report progress. */
+#define VIRTIO_CONFIG_STATUS_RESET     0x00
+#define VIRTIO_CONFIG_STATUS_ACK       0x01
+#define VIRTIO_CONFIG_STATUS_DRIVER    0x02
+#define VIRTIO_CONFIG_STATUS_DRIVER_OK 0x04
+#define VIRTIO_CONFIG_STATUS_FEATURES_OK 0x08
+#define VIRTIO_CONFIG_STATUS_FAILED    0x80
+#define VIRTIO_MSI_NO_VECTOR 0xFFFF
+
+#define VFIO_GET_REGION_ADDR(x) ((uint64_t) x << 40ULL)
+#define MSIX_IRQ_SET_BUF_LEN (sizeof(struct vfio_irq_set) + \
+               sizeof(int) * (32 + 1))
+#define PAGE_SIZE   (sysconf(_SC_PAGESIZE))
+#define PAGE_MASK   (~(PAGE_SIZE - 1))
+
+static int pool_initiated[MAX_VDPA_ENGINE_NUM] = {0};
+static int total_virtio[MAX_VDPA_ENGINE_NUM] = {0};
+
+struct virtio_net_pci 
virtio_net_pool[MAX_VDPA_ENGINE_NUM][MAX_VDPA_DEVICE_VIRTIO];
+
+void *pci_find_max_end_va(void);
+int pci_get_kernel_driver_by_path(const char *filename, char *dri_name);
+int pci_parse_sysfs_resource(const char *filename, struct rte_pci_device *dev);
+
+int vfio_get_container_fd(void);
+int vfio_get_group_fd(int iommu_group_no);
+int vfio_get_group_no(const char *sysfs_base,
+               const char *dev_addr, int *iommu_group_no);
+
+int pci_vfio_get_msix_bar(int fd, struct pci_msix_table *msix_table);
+int pci_vfio_setup_interrupts(struct rte_pci_device *dev, int vfio_dev_fd);
+int pci_vfio_set_bus_master(int dev_fd, bool op);
+
+extern void *pci_map_addr;
+
+static int
+read_pci_dev(struct rte_pci_device *dev)
+{
+       char filename[PATH_MAX];
+       char dev_dir[PATH_MAX];
+       char driver[PATH_MAX];
+       int ret;
+
+       snprintf(dev_dir, sizeof(dev_dir), "%s/" PCI_PRI_FMT,
+                       rte_pci_get_sysfs_path(),
+                       dev->addr.domain, dev->addr.bus,
+                       dev->addr.devid, dev->addr.function);
+       if (access(dev_dir, R_OK) != 0) {
+               DEBUG("\n%s: %s not exist\n", __func__, dev_dir);
+               return -1;
+       }
+
+       /* parse resources */
+       snprintf(filename, sizeof(filename), "%s/resource", dev_dir);
+       if (pci_parse_sysfs_resource(filename, dev) < 0) {
+               DEBUG("%s(): cannot parse resource\n", __func__);
+               return -1;
+       }
+
+       /* parse driver */
+       snprintf(filename, sizeof(filename), "%s/driver", dev_dir);
+       ret = pci_get_kernel_driver_by_path(filename, driver);
+       if (ret < 0) {
+               DEBUG("Fail to get kernel driver\n");
+               return -1;
+       }
+
+       if (ret > 0 || strcmp(driver, "vfio-pci") != 0) {
+               DEBUG("Kernel driver is not vfio-pci\n");
+               return -1;
+       }
+       return 0;
+}
+
+static inline int invalid_port(int eid, int did)
+{
+       if (did < 0 || did >= total_virtio[eid])
+               return 1;
+       return 0;
+}
+
+static int extract_index(char *path)
+{
+       int i, len, device_id;
+       char *str, *end;
+       len = strlen(path);
+       for (i = len - 1; i >= 0; i--) {
+               if (path[i] == '-')
+                       break;
+       }
+       str = &path[i+1];
+       device_id = strtoul(str, &end, 10);
+       if ((str[0] == '\0') || (end == NULL) || (*end != '\0') || (errno != 0))
+               return -1;
+
+       return device_id;
+}
+
+static int virtio_net_alloc_hw_ele(int eid, char *args)
+{
+       int device_id;
+
+       DEBUG("\n%s: the vhost socket path %s\n", __func__, args);
+       device_id = extract_index(args);
+       if (device_id < 0 || device_id >= total_virtio[eid]) {
+               DEBUG("\n%s: device_id %d must be within the interval 0 ~ %d\n",
+                               __func__, device_id, total_virtio[eid] - 1);
+               return -1;
+       }
+
+       if (True == virtio_net_pool[eid][device_id].used) {
+               DEBUG("\n%s: device_id %d has been taken already\n",
+                               __func__, device_id);
+               return -1;
+       }
+
+       virtio_net_pool[eid][device_id].used = True;
+       return device_id;
+}
+
+static int virtio_net_free_hw_ele(int eid, int did)
+{
+       if (invalid_port(eid, did))
+               return -1;
+
+       virtio_net_pool[eid][did].used = False;
+       return 0;
+}
+
+static void *notify_relay(void *arg)
+{
+       int i, kickfd, epfd, nfds = 0;
+       struct virtio_net *dev = (struct virtio_net *)arg;
+       struct virtio_net_pci *vpci = &virtio_net_pool[dev->eid][dev->did];
+       struct virtio_net_hw *hw = &virtio_net_pool[dev->eid][dev->did].hw;
+       uint32_t n, qid, q_num = dev->nr_vring;
+       struct epoll_event events[q_num];
+       struct epoll_event ev;
+       struct vhost_virtqueue **vq = dev->virtqueue;
+       uint64_t buf;
+       int nbytes;
+
+       epfd = epoll_create(32);
+       vpci->epfd = epfd;
+       for (n = 0; n < dev->nr_vring; n++) {
+               ev.data.u32 = n;
+               ev.events = EPOLLIN | EPOLLPRI;
+               if (epoll_ctl(epfd, EPOLL_CTL_ADD, vq[n]->kickfd, &ev) < 0) {
+                       DEBUG("Error epoll add failed, %s\n", strerror(errno));
+                       return NULL;
+               }
+       }
+
+       for (;;) {
+               nfds = epoll_wait(epfd, events, q_num, -1);
+               if (nfds < 0) {
+                       if (errno == EINTR)
+                               continue;
+                       DEBUG("epoll_wait return fail\n");
+                       return NULL;
+               } else if (nfds == 0)
+                       continue;
+
+               for (i = 0; i < nfds; i++) {
+                       qid = events[i].data.u32;
+                       kickfd = vq[qid]->kickfd;
+
+                       do {
+                               nbytes = read(kickfd, &buf, 8);
+                               if (nbytes < 0) {
+                                       if (errno == EINTR || errno == 
EWOULDBLOCK ||
+                                                       errno == EAGAIN)
+                                               continue;
+                                       DEBUG("Error reading from kickfd %d: 
%s\n",
+                                                       kickfd, 
strerror(errno));
+                               } else if (nbytes == 0)
+                                       DEBUG("Read nothing from kickfd %d\n", 
kickfd);
+                               break;
+                       } while (1);
+
+                       rte_write16(qid, hw->notify_addr[qid]);
+               }
+       }
+
+       return NULL;
+}
+
+static int setup_notify_relay(struct virtio_net *dev)
+{
+       struct virtio_net_pci *vpci = &virtio_net_pool[dev->eid][dev->did];
+       int ret;
+
+       ret = pthread_create(&vpci->tid, NULL, notify_relay, dev);
+       if (ret != 0) {
+               DEBUG("failed to create notify relay pthread\n");
+               return -1;
+       }
+
+       return 0;
+}
+
+static int unset_notify_relay(struct virtio_net *dev)
+{
+       struct virtio_net_pci *vpci = &virtio_net_pool[dev->eid][dev->did];
+       void *status;
+       if (vpci->tid) {
+               pthread_cancel(vpci->tid);
+               pthread_join(vpci->tid, &status);
+               DEBUG("\n%s device_id %d, cancel relay tid %lu\n", __func__,
+                               dev->did, vpci->tid);
+       }
+       vpci->tid = 0;
+
+       if (vpci->epfd >= 0) {
+               close(vpci->epfd);
+               DEBUG("\n%s close epfd %d\n", __func__, vpci->epfd);
+       }
+       vpci->epfd = -1;
+
+       return 0;
+}
+
+static void *
+get_cap_addr(struct rte_pci_device *dev, struct virtio_net_pci_cap *cap)
+{
+       uint8_t bar = cap->bar;
+       uint32_t length = cap->length;
+       uint32_t offset = cap->offset;
+       uint8_t *base;
+
+       if (bar > 5) {
+               DEBUG("invalid bar: %u", bar);
+               return NULL;
+       }
+
+       if (offset + length < offset) {
+               DEBUG("offset(%u) + length(%u) overflows",
+                       offset, length);
+               return NULL;
+       }
+
+       if (offset + length > dev->mem_resource[bar].len) {
+               DEBUG("invalid cap: overflows bar space: %u > %" PRIu64,
+                       offset + length, dev->mem_resource[bar].len);
+               return NULL;
+       }
+
+       base = dev->mem_resource[bar].addr;
+       if (base == NULL) {
+               DEBUG("bar %u base addr is NULL", bar);
+               return NULL;
+       }
+
+       return base + offset;
+}
+
+static int vfio_setup_device(const char *sysfs_base, const char *dev_addr,
+               int *vfio_dev_fd, struct vfio_device_info *device_info,
+               struct virtio_net_hw *hw)
+{
+       struct vfio_group_status group_status = {
+                       .argsz = sizeof(group_status)
+       };
+       int vfio_group_fd;
+       int iommu_group_no;
+       int ret;
+       struct vfio_config *vfio_cfg;
+
+       vfio_cfg = &(hw->vfio_cfg);
+       vfio_cfg->group_fd = -1;
+       vfio_cfg->group_no = -1;
+       vfio_cfg->vfio_container_fd = vfio_get_container_fd();
+
+       /* check if we have VFIO driver enabled */
+       if (vfio_cfg->vfio_container_fd == -1) {
+               DEBUG("VFIO support could not be initialized\n");
+               return -1;
+       }
+
+       /* get group number */
+       ret = vfio_get_group_no(sysfs_base, dev_addr, &iommu_group_no);
+       if (ret <= 0) {
+               DEBUG("%s not managed by VFIO driver\n", dev_addr);
+               return -1;
+       }
+
+       /* get the actual group fd */
+       vfio_group_fd = vfio_get_group_fd(iommu_group_no);
+       DEBUG("\nget group no %u group fd %u\n", iommu_group_no, vfio_group_fd);
+       if (vfio_group_fd <= 0)
+               return -1;
+
+       /* store group fd */
+       vfio_cfg->group_no = iommu_group_no;
+       vfio_cfg->group_fd = vfio_group_fd;
+
+       /* check if the group is viable */
+       ret = ioctl(vfio_group_fd, VFIO_GROUP_GET_STATUS, &group_status);
+       if (ret) {
+               DEBUG("%s cannot get group status, error %i (%s)\n",
+                               dev_addr, errno, strerror(errno));
+               close(vfio_group_fd);
+               return -1;
+       } else if (!(group_status.flags & VFIO_GROUP_FLAGS_VIABLE)) {
+               DEBUG("%s VFIO group is not viable!\n", dev_addr);
+               close(vfio_group_fd);
+               return -1;
+       }
+
+       /* check if group does not have a container yet */
+       if (!(group_status.flags & VFIO_GROUP_FLAGS_CONTAINER_SET)) {
+               /* add group to a container */
+               ret = ioctl(vfio_group_fd, VFIO_GROUP_SET_CONTAINER,
+                               &vfio_cfg->vfio_container_fd);
+               if (ret) {
+                       DEBUG("%s cannot add VFIO group to container, error %i 
(%s)\n",
+                                       dev_addr, errno, strerror(errno));
+                       close(vfio_group_fd);
+                       return -1;
+               }
+               DEBUG("\nvfio_group_fd %u ---> container_fd %u\n",
+                               vfio_group_fd, vfio_cfg->vfio_container_fd);
+       }
+
+       ret = ioctl(vfio_cfg->vfio_container_fd, VFIO_SET_IOMMU, 
VFIO_TYPE1_IOMMU);
+       if (ret) {
+               DEBUG("%s set IOMMU type failed, error %i (%s)\n",
+                               dev_addr, errno, strerror(errno));
+               return -1;
+       }
+
+       /* get a file descriptor for the device */
+       *vfio_dev_fd = ioctl(vfio_group_fd, VFIO_GROUP_GET_DEVICE_FD, dev_addr);
+       if (*vfio_dev_fd < 0) {
+               DEBUG("%s not managed by VFIO driver\n", dev_addr);
+               return -1;
+       }
+
+       /* test and setup the device */
+       ret = ioctl(*vfio_dev_fd, VFIO_DEVICE_GET_INFO, device_info);
+       if (ret) {
+               DEBUG("%s cannot get device info, error %i (%s)\n",
+                               dev_addr, errno, strerror(errno));
+               close(*vfio_dev_fd);
+               return -1;
+       }
+
+       return 0;
+}
+
+static int
+virtio_net_pci_vfio_map_resource(struct virtio_net_pci *vpci)
+{
+       struct rte_pci_device *pdev = &vpci->pdev;
+       struct vfio_device_info device_info = { .argsz = sizeof(device_info) };
+       char pci_addr[PATH_MAX] = {0};
+       int vfio_dev_fd;
+       struct rte_pci_addr *loc = &pdev->addr;
+       int i, ret, nb_maps;
+
+       uint32_t ioport_bar;
+       struct pci_msix_table msix_table;
+
+       pdev->intr_handle.fd = -1;
+       pdev->intr_handle.type = RTE_INTR_HANDLE_UNKNOWN;
+
+       /* store PCI address string */
+       snprintf(pci_addr, sizeof(pci_addr), PCI_PRI_FMT,
+                       loc->domain, loc->bus, loc->devid, loc->function);
+
+       ret = vfio_setup_device(rte_pci_get_sysfs_path(), pci_addr,
+                       &vfio_dev_fd, &device_info, &vpci->hw);
+       if (ret)
+               return ret;
+
+       ret = pci_vfio_get_msix_bar(vfio_dev_fd, &msix_table);
+       if (ret < 0) {
+               DEBUG("%s cannot get MSI-X BAR number!\n", pci_addr);
+               close(vfio_dev_fd);
+               return -1;
+       }
+
+       /* get number of regions (up to BAR5) */
+       nb_maps = RTE_MIN((int) device_info.num_regions,
+                               VFIO_PCI_BAR5_REGION_INDEX + 1);
+
+       /* map BARs */
+       for (i = 0; i < nb_maps; i++) {
+               struct vfio_region_info reg = { .argsz = sizeof(reg) };
+               void *bar_addr;
+
+               reg.index = i;
+               ret = ioctl(vfio_dev_fd, VFIO_DEVICE_GET_REGION_INFO, &reg);
+
+               if (ret) {
+                       DEBUG("%s cannot get device region info error %i 
(%s)\n",
+                                       pci_addr, errno, strerror(errno));
+                       goto fail;
+               }
+
+               ret = pread(vfio_dev_fd, &ioport_bar, sizeof(ioport_bar),
+                               
VFIO_GET_REGION_ADDR(VFIO_PCI_CONFIG_REGION_INDEX)
+                               + PCI_BASE_ADDRESS_0 + i * 4);
+
+               if (ret != sizeof(ioport_bar)) {
+                       DEBUG("Cannot read command (%x) from config space!\n",
+                               PCI_BASE_ADDRESS_0 + i * 4);
+                       goto fail;
+               }
+
+               /* check for io port region */
+               if (ioport_bar & PCI_BASE_ADDRESS_SPACE_IO)
+                       continue;
+
+               /* skip non-mmapable BARs */
+               if ((reg.flags & VFIO_REGION_INFO_FLAG_MMAP) == 0)
+                       continue;
+
+               if (i == msix_table.bar_index)
+                       continue;
+
+               /* try mapping somewhere close to the end of hugepages */
+               if (pci_map_addr == NULL)
+                       pci_map_addr = pci_find_max_end_va();
+
+               bar_addr = pci_map_addr;
+               pci_map_addr = RTE_PTR_ADD(bar_addr, (size_t) reg.size);
+
+               /* reserve the address using an inaccessible mapping */
+               bar_addr = mmap(bar_addr, reg.size, 0, MAP_PRIVATE |
+                               MAP_ANONYMOUS, -1, 0);
+               if (bar_addr != MAP_FAILED) {
+                       void *map_addr = NULL;
+                       if (reg.size) {
+                               map_addr = pci_map_resource(bar_addr, 
vfio_dev_fd,
+                                       reg.offset, reg.size, MAP_FIXED);
+                       }
+
+                       if (map_addr == MAP_FAILED || !map_addr) {
+                               munmap(bar_addr, reg.size);
+                               bar_addr = MAP_FAILED;
+                       }
+               }
+
+               if (bar_addr == MAP_FAILED) {
+                       DEBUG("%s mapping BAR%i failed: %s\n", pci_addr, i,
+                                       strerror(errno));
+                       goto fail;
+               }
+               pdev->mem_resource[i].addr = bar_addr;
+       }
+
+       if (pci_vfio_setup_interrupts(pdev, vfio_dev_fd) != 0) {
+               DEBUG("%s error setting up interrupts!\n", pci_addr);
+               goto fail;
+       }
+
+       /* set bus mastering for the device */
+       if (pci_vfio_set_bus_master(vfio_dev_fd, true)) {
+               DEBUG("%s cannot set up bus mastering!\n", pci_addr);
+               goto fail;
+       }
+
+       /* Reset the device */
+       ioctl(vfio_dev_fd, VFIO_DEVICE_RESET);
+       vpci->hw.vfio_cfg.vfio_dev_fd = vfio_dev_fd;
+
+       return 0;
+
+fail:
+       close(vfio_dev_fd);
+       return -1;
+}
+
+/* With vfio-pci, map config space to virtio_net_hw. */
+static int
+virtio_net_map_pci(struct virtio_net_pci *vpci)
+{
+       uint8_t pos;
+       struct virtio_net_pci_cap cap;
+       struct rte_pci_device *dev = &vpci->pdev;
+       struct virtio_net_hw *hw = &vpci->hw;
+       int ret;
+
+       if (virtio_net_pci_vfio_map_resource(vpci)) {
+               DEBUG("failed to map pci device!\n");
+               return -1;
+       }
+
+       ret = rte_pci_read_config(dev, &pos, 1, PCI_CAPABILITY_LIST);
+       if (ret < 0) {
+               DEBUG("failed to read pci capability list\n");
+               return -1;
+       }
+
+       while (pos) {
+               ret = rte_pci_read_config(dev, &cap, sizeof(cap), pos);
+               if (ret < 0) {
+                       DEBUG("failed to read pci cap at pos: %x", pos);
+                       break;
+               }
+
+               if (cap.cap_vndr != PCI_CAP_ID_VNDR)
+                       goto next;
+
+               DEBUG("[%2x] cfg type: %u, bar: %u, offset: %04x, len: %u\n",
+                       pos, cap.cfg_type, cap.bar, cap.offset, cap.length);
+
+               switch (cap.cfg_type) {
+               case VIRTIO_PCI_CAP_COMMON_CFG:
+                       hw->common_cfg = get_cap_addr(dev, &cap);
+                       break;
+               case VIRTIO_PCI_CAP_NOTIFY_CFG:
+                       rte_pci_read_config(dev, &hw->notify_off_multiplier,
+                                               4, pos + sizeof(cap));
+                       hw->notify_base = get_cap_addr(dev, &cap);
+                       break;
+               case VIRTIO_PCI_CAP_DEVICE_CFG:
+                       hw->dev_cfg = get_cap_addr(dev, &cap);
+                       break;
+               case VIRTIO_PCI_CAP_ISR_CFG:
+                       hw->isr = get_cap_addr(dev, &cap);
+                       break;
+               }
+
+next:
+               pos = cap.cap_next;
+       }
+
+       if (hw->common_cfg == NULL || hw->notify_base == NULL ||
+                       hw->dev_cfg == NULL || hw->isr == NULL) {
+               DEBUG("no modern virtio pci device found.\n");
+               return -1;
+       }
+
+       DEBUG("capability mapping:\ncommon cfg: %p\ndevice cfg: %p\n"
+                       "isr cfg: %p\nnotify base: %p\nmultiplier: %u\n",
+                       hw->common_cfg, hw->dev_cfg,
+                       hw->isr, hw->notify_base, hw->notify_off_multiplier);
+
+       return 0;
+}
+
+static uint8_t
+virtio_net_get_status(struct virtio_net_hw *hw)
+{
+       return rte_read8(&hw->common_cfg->device_status);
+}
+
+static void
+virtio_net_set_status(struct virtio_net_hw *hw, uint8_t status)
+{
+       rte_write8(status, &hw->common_cfg->device_status);
+}
+
+static void
+virtio_net_vtpci_reset(struct virtio_net_hw *hw)
+{
+       virtio_net_set_status(hw, VIRTIO_CONFIG_STATUS_RESET);
+       /* flush status write */
+       while (virtio_net_get_status(hw))
+               rte_delay_ms(1);
+}
+
+static void
+virtio_net_vtpci_set_status(struct virtio_net_hw *hw, uint8_t status)
+{
+       if (status != VIRTIO_CONFIG_STATUS_RESET)
+               status |= virtio_net_get_status(hw);
+
+       virtio_net_set_status(hw, status);
+       virtio_net_get_status(hw);
+}
+
+static uint64_t
+virtio_net_get_features(struct virtio_net_hw *hw)
+{
+       uint32_t features_lo, features_hi;
+       struct virtio_net_pci_common_cfg *cfg = hw->common_cfg;
+
+       rte_write32(0, &cfg->device_feature_select);
+       features_lo = rte_read32(&cfg->device_feature);
+
+       rte_write32(1, &cfg->device_feature_select);
+       features_hi = rte_read32(&cfg->device_feature);
+
+       return ((uint64_t)features_hi << 32) | features_lo;
+}
+
+static void
+virtio_net_set_features(struct virtio_net_hw *hw, uint64_t features)
+{
+       struct virtio_net_pci_common_cfg *cfg = hw->common_cfg;
+
+       /** vIOMMU to support this virtio device used as vDPA device **/
+       features |= (1ULL << VIRTIO_F_IOMMU_PLATFORM);
+
+       rte_write32(0, &cfg->guest_feature_select);
+       rte_write32(features & ((1ULL << 32) - 1), &cfg->guest_feature);
+
+       rte_write32(1, &cfg->guest_feature_select);
+       rte_write32(features >> 32, &cfg->guest_feature);
+}
+
+static int
+virtio_net_negotiate_features(struct virtio_net_hw *hw, uint64_t req_features)
+{
+       uint64_t host_features;
+
+       DEBUG("%s: qemu and guest negotiated feature: 0x%lx\n",
+                       __func__, req_features);
+
+       /* Read device(host) feature bits */
+       host_features = virtio_net_get_features(hw);
+       DEBUG("%s: VIRTIO_NET device supported feature: 0x%lx\n",
+                       __func__, host_features);
+
+       hw->req_guest_features = req_features;
+       hw->guest_features = req_features & host_features;
+       virtio_net_set_features(hw, hw->guest_features);
+       DEBUG("%s: VIRTIO_NET device configed feature: 0x%lx\n",
+                       __func__, hw->guest_features);
+
+       virtio_net_vtpci_set_status(hw, VIRTIO_CONFIG_STATUS_FEATURES_OK);
+       if (!(virtio_net_get_status(hw) & VIRTIO_CONFIG_STATUS_FEATURES_OK)) {
+               DEBUG("failed to set FEATURES_OK status!\n");
+               return -1;
+       }
+
+       return 0;
+}
+
+static inline void
+virtio_net_io_write64_twopart(uint64_t val, uint32_t *lo, uint32_t *hi)
+{
+       rte_write32(val & ((1ULL << 32) - 1), lo);
+       rte_write32(val >> 32, hi);
+}
+
+static inline uint64_t qva_to_gpa(struct virtio_net *dev, uint64_t qva)
+{
+       struct rte_vhost_mem_region *reg;
+       uint32_t i;
+       uint64_t gpa = 0;
+
+       for (i = 0; i < dev->mem->nregions; i++) {
+               reg = &dev->mem->regions[i];
+
+               if (qva >= reg->host_user_addr &&
+                               qva < reg->host_user_addr + reg->size) {
+                       gpa = qva - reg->host_user_addr + reg->guest_phys_addr;
+               }
+       }
+
+       if (gpa == 0)
+               rte_panic("failed to get gpa\n");
+
+       return gpa;
+}
+
+static int virtio_net_config_queues(struct virtio_net *dev)
+{
+       struct virtio_net_hw *hw = &virtio_net_pool[dev->eid][dev->did].hw;
+       struct virtio_net_pci_common_cfg *cfg = hw->common_cfg;
+       struct vhost_virtqueue *vq;
+       uint64_t desc_addr, avail_addr, used_addr;
+       uint32_t i;
+       uint16_t notify_off;
+
+       for (i = 0; i < dev->nr_vring; i++) {
+               vq = dev->virtqueue[i];
+               desc_addr = qva_to_gpa(dev, (uint64_t)vq->desc);
+               avail_addr = qva_to_gpa(dev, (uint64_t)vq->avail);
+               used_addr = qva_to_gpa(dev, (uint64_t)vq->used);
+
+               rte_write16(i, &cfg->queue_select);
+               virtio_net_io_write64_twopart(desc_addr, &cfg->queue_desc_lo,
+                               &cfg->queue_desc_hi);
+               virtio_net_io_write64_twopart(avail_addr, &cfg->queue_avail_lo,
+                               &cfg->queue_avail_hi);
+               virtio_net_io_write64_twopart(used_addr, &cfg->queue_used_lo,
+                               &cfg->queue_used_hi);
+               rte_write16((uint16_t)vq->size, &cfg->queue_size);
+
+               notify_off = rte_read16(&cfg->queue_notify_off);
+               hw->notify_addr[i] = (void *)((uint8_t *)hw->notify_base +
+                               notify_off * hw->notify_off_multiplier);
+               rte_write16(1, &cfg->queue_enable);
+
+               DEBUG("queue %u addresses:\n"
+                               "desc_addr: 0x%lx\tavail_addr: 
0x%lx\tused_addr: 0x%lx\n"
+                               "queue size: %u\t\tnotify addr: %p\tnotify 
offset: %u\n",
+                               i, desc_addr, avail_addr, used_addr,
+                               vq->size, hw->notify_addr[i], notify_off);
+       }
+
+       return 0;
+}
+
+static int virtio_net_config_irqs(struct virtio_net *dev)
+{
+       uint32_t i;
+       struct virtio_net_hw *hw = &virtio_net_pool[dev->eid][dev->did].hw;
+       struct virtio_net_pci_common_cfg *cfg = hw->common_cfg;
+
+       rte_write16(0, &cfg->msix_config);
+       if (rte_read16(&cfg->msix_config) == VIRTIO_MSI_NO_VECTOR) {
+               DEBUG("For LSC, allocate msix vec failed\n");
+               return -1;
+       }
+
+       for (i = 0; i < dev->nr_vring; i++) {
+               rte_write16(i, &cfg->queue_select);
+               rte_write16(i + 1, &cfg->queue_msix_vector);
+               if (rte_read16(&cfg->queue_msix_vector) == 
VIRTIO_MSI_NO_VECTOR) {
+                       DEBUG("queue id %u, allocate msix vec failed\n", i);
+                       return -1;
+               }
+       }
+       DEBUG("\n%s config irqs OK, num of queues %u\n", __func__, i);
+       return 0;
+}
+
+static void virtio_net_stop_queues(struct virtio_net *dev)
+{
+       uint32_t i;
+       struct virtio_net_hw *hw = &virtio_net_pool[dev->eid][dev->did].hw;
+       struct virtio_net_pci_common_cfg *cfg = hw->common_cfg;
+
+       rte_write16(VIRTIO_MSI_NO_VECTOR, &cfg->msix_config);
+       for (i = 0; i < dev->nr_vring; i++) {
+               rte_write16(i, &cfg->queue_select);
+               rte_write16(0, &cfg->queue_enable);
+               rte_write16(VIRTIO_MSI_NO_VECTOR, &cfg->queue_msix_vector);
+       }
+}
+
+static int virtio_net_enable_vfio_intr(struct virtio_net *dev)
+{
+       int ret;
+       uint32_t i, len;
+       char irq_set_buf[MSIX_IRQ_SET_BUF_LEN];
+       struct vfio_irq_set *irq_set;
+       int *fd_ptr;
+       struct virtio_net_pci *vpci;
+
+       vpci = &virtio_net_pool[dev->eid][dev->did];
+       len = sizeof(irq_set_buf);
+       irq_set = (struct vfio_irq_set *) irq_set_buf;
+       irq_set->argsz = len;
+       irq_set->count = dev->nr_vring + 1;
+       irq_set->flags = VFIO_IRQ_SET_DATA_EVENTFD | 
VFIO_IRQ_SET_ACTION_TRIGGER;
+       irq_set->index = VFIO_PCI_MSIX_IRQ_INDEX;
+       irq_set->start = 0;
+       fd_ptr = (int *) &irq_set->data;
+       fd_ptr[RTE_INTR_VEC_ZERO_OFFSET] = vpci->pdev.intr_handle.fd;
+
+       DEBUG("\n%s device_id %d LSC fd %u, vfio_dev_fd %u\n", __func__,
+                       dev->did, vpci->pdev.intr_handle.fd,
+                       vpci->pdev.intr_handle.vfio_dev_fd);
+       for (i = 0; i < dev->nr_vring; i++)
+               fd_ptr[RTE_INTR_VEC_RXTX_OFFSET + i] = 
dev->virtqueue[i]->callfd;
+
+       ret = ioctl(vpci->pdev.intr_handle.vfio_dev_fd,
+                       VFIO_DEVICE_SET_IRQS, irq_set);
+
+       if (ret) {
+               DEBUG("Error enabling MSI-X interrupts, dev id %u\n", dev->did);
+               return -1;
+       }
+
+       return 0;
+}
+
+static int virtio_net_disable_vfio_intr(struct virtio_net *dev)
+{
+       int len, ret;
+       char irq_set_buf[MSIX_IRQ_SET_BUF_LEN];
+       struct vfio_irq_set *irq_set;
+       struct virtio_net_pci *vpci;
+
+       vpci = &virtio_net_pool[dev->eid][dev->did];
+       len = sizeof(irq_set_buf);
+       irq_set = (struct vfio_irq_set *) irq_set_buf;
+       irq_set->argsz = len;
+       irq_set->count = 0;
+       irq_set->flags = VFIO_IRQ_SET_DATA_NONE | VFIO_IRQ_SET_ACTION_TRIGGER;
+       irq_set->index = VFIO_PCI_MSIX_IRQ_INDEX;
+       irq_set->start = 0;
+
+       ret = ioctl(vpci->pdev.intr_handle.vfio_dev_fd,
+                       VFIO_DEVICE_SET_IRQS, irq_set);
+       if (ret) {
+               DEBUG("Error disabling MSI-X interrupts, dev id %u\n", 
dev->did);
+               return -1;
+       }
+
+       return 0;
+}
+
+static int virtio_net_conf_pci(struct virtio_net *dev)
+{
+       struct virtio_net_hw *hw = &virtio_net_pool[dev->eid][dev->did].hw;
+       uint64_t features = dev->features;
+
+       /* Reset the device although not necessary at startup. */
+       virtio_net_vtpci_reset(hw);
+
+       /* Tell the host we've noticed this device. */
+       virtio_net_vtpci_set_status(hw, VIRTIO_CONFIG_STATUS_ACK);
+
+       /* Tell the host we've known how to drive the device. */
+       virtio_net_vtpci_set_status(hw, VIRTIO_CONFIG_STATUS_DRIVER);
+
+       if (virtio_net_negotiate_features(hw, features) < 0)
+               return -1;
+
+       virtio_net_config_queues(dev);
+       virtio_net_config_irqs(dev);
+       virtio_net_vtpci_set_status(hw, VIRTIO_CONFIG_STATUS_DRIVER_OK);
+       return 0;
+}
+
+static int
+virtio_net_pci_dma_map(int vfio_container_fd, struct virtio_net *vdev)
+{
+       uint32_t i, ret;
+       struct rte_vhost_memory *mem = vdev->mem;
+
+       for (i = 0; i < mem->nregions; i++) {
+               struct vfio_iommu_type1_dma_map dma_map;
+               struct rte_vhost_mem_region *reg;
+               reg = &mem->regions[i];
+
+               DEBUG("\n%s device_id %d vfio_container_fd %d, %u th regison, 
total %u region\n",
+                               __func__, vdev->did, vfio_container_fd,
+                               i, mem->nregions);
+               DEBUG("region %u host_user_addr 0x%lx, guest_phys_addr 0x%lx, 
size 0x%0lx\n",
+                               i, reg->host_user_addr,
+                               reg->guest_phys_addr, reg->size);
+
+               memset(&dma_map, 0, sizeof(dma_map));
+               dma_map.argsz = sizeof(struct vfio_iommu_type1_dma_map);
+               dma_map.vaddr = reg->host_user_addr;
+               dma_map.size = reg->size;
+               dma_map.iova = reg->guest_phys_addr;
+               dma_map.flags = VFIO_DMA_MAP_FLAG_READ | 
VFIO_DMA_MAP_FLAG_WRITE;
+
+               ret = ioctl(vfio_container_fd, VFIO_IOMMU_MAP_DMA, &dma_map);
+               if (ret) {
+                       DEBUG(" cannot set up DMA remapping, error %i (%s)\n",
+                                       errno, strerror(errno));
+                       return -1;
+               }
+       }
+
+       return 0;
+}
+
+static int
+virtio_net_pci_dma_unmap(int vfio_container_fd, struct virtio_net *vdev)
+{
+       uint32_t i, ret;
+       struct rte_vhost_memory *mem = vdev->mem;
+
+       /* VM start fails */
+       if (mem == NULL)
+               return 0;
+
+       for (i = 0; i < mem->nregions; i++) {
+               struct vfio_iommu_type1_dma_unmap dma_unmap;
+               struct rte_vhost_mem_region *reg;
+               reg = &mem->regions[i];
+
+               DEBUG("region %u host_user_addr 0x%lx, guest_phys_addr 0x%lx, 
size 0x%0lx\n",
+                               i, reg->host_user_addr,
+                               reg->guest_phys_addr, reg->size);
+
+               memset(&dma_unmap, 0, sizeof(dma_unmap));
+               dma_unmap.argsz = sizeof(struct vfio_iommu_type1_dma_unmap);
+               dma_unmap.size = reg->size;
+               dma_unmap.iova = reg->guest_phys_addr;
+               dma_unmap.flags = 0;
+
+               ret = ioctl(vfio_container_fd, VFIO_IOMMU_UNMAP_DMA, 
&dma_unmap);
+
+               if (ret) {
+                       DEBUG(" cannot unset DMA remapping, error %i (%s)\n",
+                                       errno, strerror(errno));
+                       return -1;
+               }
+       }
+
+       return 0;
+}
+
+static int disable_device(struct virtio_net *dev)
+{
+       struct virtio_net_hw *hw = &virtio_net_pool[dev->eid][dev->did].hw;
+
+       virtio_net_stop_queues(dev);
+       virtio_net_vtpci_reset(hw);
+
+       return 0;
+}
+
+static int virtio_net_dev_config(struct virtio_net *dev)
+{
+       if (!dev || invalid_port(dev->eid, dev->did)) {
+               DEBUG("Invalid virtio_net struct\n");
+               return -1;
+       }
+
+       int ret;
+       struct rte_pci_device *pdev = &virtio_net_pool[dev->eid][dev->did].pdev;
+       struct virtio_net_hw *hw = &virtio_net_pool[dev->eid][dev->did].hw;
+
+       DEBUG("Try to config device: dev id %u bus %02x device %02x function 
%0x\n",
+                       dev->did, pdev->addr.bus,
+                       pdev->addr.devid, pdev->addr.function);
+
+       DEBUG("\ndevice_id %d mapped, set DMAR\n", dev->did);
+       ret = virtio_net_pci_dma_map(hw->vfio_cfg.vfio_container_fd, dev);
+       if (ret) {
+               DEBUG("device_id %u DMA remapping failed, error %i (%s)\n",
+                               dev->did, errno, strerror(errno));
+               return -1;
+       }
+
+       DEBUG("\ndevice_id %d DMAR set, config it\n", dev->did);
+       ret = virtio_net_conf_pci(dev);
+       if (ret)
+               return -1;
+
+       ret = virtio_net_enable_vfio_intr(dev);
+       if (ret)
+               return -1;
+
+       ret = setup_notify_relay(dev);
+       if (ret)
+               return -1;
+
+       return 0;
+}
+
+static int virtio_net_dev_close(struct virtio_net *dev)
+{
+       int ret;
+       struct virtio_net_pci *vpci = &virtio_net_pool[dev->eid][dev->did];
+       int vfio_container_fd = vpci->hw.vfio_cfg.vfio_container_fd;
+
+       if (!dev || invalid_port(dev->eid, dev->did)) {
+               DEBUG("Invalid virtio_net struct\n");
+               return -1;
+       }
+
+       disable_device(dev);
+       unset_notify_relay(dev);
+       ret = virtio_net_disable_vfio_intr(dev);
+       if (ret < 0)
+               return -1;
+
+       DEBUG("\n%s: unset DMAR for device_id %d\n", __func__, dev->did);
+       ret = virtio_net_pci_dma_unmap(vfio_container_fd, dev);
+       if (ret) {
+               DEBUG("device_id %u DMA reset DMAR failed, error %i (%s)\n",
+                               dev->did, errno, strerror(errno));
+               return -1;
+       }
+
+       return 0;
+}
+
+static int virtio_net_pool_init(int eid)
+{
+       uint32_t i, ret;
+       struct virtio_net_pci *vpci;
+       struct rte_pci_device *pdev;
+       struct rte_pci_addr *eng_addr;
+       char dev_dir[PATH_MAX];
+
+       eng_addr = &(vdpa_engines[eid]->eng_attr.id->pci_addr);
+       snprintf(dev_dir, sizeof(dev_dir), "%s/" PCI_PRI_FMT,
+                       rte_pci_get_sysfs_path(),
+                       eng_addr->domain, eng_addr->bus,
+                       eng_addr->devid, eng_addr->function);
+
+       if (access(dev_dir, R_OK) != 0) {
+               DEBUG("%s: "PCI_PRI_FMT" does not exist\n", __func__,
+                               eng_addr->domain, eng_addr->bus,
+                               eng_addr->devid, eng_addr->function);
+               return -1;
+       }
+
+       memset((char *)virtio_net_pool[eid], 0, sizeof(virtio_net_pool[0]));
+
+       for (i = 0; i < MAX_VDPA_DEVICE_VIRTIO; i++) {
+               pdev = &virtio_net_pool[eid][i].pdev;
+               pdev->addr.domain = eng_addr->domain;
+               pdev->addr.bus = eng_addr->bus;
+               pdev->addr.devid = eng_addr->devid;
+               pdev->addr.function = eng_addr->function;
+               /*
+                * Assume that the virtio-net-pci listed is like
+                * 00:03.0, 00:04.0, 00:05,0, and so on.
+                */
+               pdev->addr.devid += i % 8;
+               if (read_pci_dev(pdev) < 0) {
+                       DEBUG("Read PCI device failed, dev id %d\n", i);
+                       errno = 0;
+                       break;
+               }
+               DEBUG("%s: detected "PCI_PRI_FMT"\n", __func__,
+                               pdev->addr.domain, pdev->addr.bus,
+                               pdev->addr.devid, pdev->addr.function);
+
+               /* Take control of an device by mapping it with vfio. */
+               vpci = &virtio_net_pool[eid][i];
+               ret = virtio_net_map_pci(vpci);
+               if (ret) {
+                       DEBUG("\npci map to userspace failed\n");
+                       break;
+               }
+       }
+       total_virtio[eid] = i;
+
+       if (total_virtio[eid] <= 0) {
+               DEBUG("\n%s: find no virtio devices\n", __func__);
+               return -1;
+       }
+
+       pool_initiated[eid] = 1;
+       return 0;
+}
+
+static int virtio_net_pool_uninit(int eid)
+{
+       int i;
+       struct vfio_config *vfio_cfg;
+
+       for (i = 0; i < total_virtio[eid]; i++) {
+               vfio_cfg = &virtio_net_pool[eid][i].hw.vfio_cfg;
+               close(vfio_cfg->vfio_dev_fd);
+               close(vfio_cfg->group_fd);
+               close(vfio_cfg->vfio_container_fd);
+       }
+
+       total_virtio[eid] = 0;
+       pool_initiated[eid] = 0;
+       memset((char *)virtio_net_pool[eid], 0, sizeof(virtio_net_pool[0]));
+
+       return 0;
+}
+
+static int vdpa_virtio_eng_init(int eid,
+               struct rte_vdpa_eng_id *id __rte_unused)
+{
+       if (!pool_initiated[eid] && virtio_net_pool_init(eid) == 0)
+               return 0;
+       return -1;
+}
+
+static int vdpa_virtio_eng_uninit(int eid __rte_unused)
+{
+       if (pool_initiated[eid] && virtio_net_pool_uninit(eid) == 0)
+               return 0;
+       return -1;
+}
+
+static int vdpa_virtio_dev_conf(int vid)
+{
+       struct virtio_net *dev = get_device(vid);
+
+       return virtio_net_dev_config(dev);
+}
+
+static int vdpa_virtio_dev_close(int vid)
+{
+       struct virtio_net *dev = get_device(vid);
+
+       return virtio_net_dev_close(dev);
+}
+
+int vdpa_virtio_get_device_num(int eid __rte_unused)
+{
+       /* Assume we have MAX_VDPA_DEVICE_VIRTIO virtio_net_pci devices */
+       return MAX_VDPA_DEVICE_VIRTIO;
+}
+
+int vdpa_virtio_get_queue_num(int eid __rte_unused, int did __rte_unused)
+{
+       return MAX_QUEUES_VIRTIO;
+}
+
+#define VDPA_SUPPORTED_FEATURES \
+               ((1ULL << VIRTIO_F_ANY_LAYOUT) | \
+               (1ULL << VIRTIO_F_VERSION_1) | \
+               (1ULL << VIRTIO_NET_F_MRG_RXBUF) | \
+               (1ULL << VHOST_USER_F_PROTOCOL_FEATURES))
+
+uint64_t vdpa_virtio_get_features(int eid __rte_unused, int did __rte_unused)
+{
+       return VDPA_SUPPORTED_FEATURES;
+}
+
+#define VDPA_SUPPORTED_PROTOCOL_FEATURES \
+               ((1ULL << VHOST_USER_PROTOCOL_F_LOG_SHMFD) |\
+               (1ULL << VHOST_USER_PROTOCOL_F_REPLY_ACK))
+
+uint64_t vdpa_virtio_get_protocol_features(int eid __rte_unused,
+               int did __rte_unused)
+{
+       return VDPA_SUPPORTED_PROTOCOL_FEATURES;
+}
+
+int vdpa_virtio_dev_init(int eid, char *args)
+{
+       return virtio_net_alloc_hw_ele(eid, args);
+}
+
+int vdpa_virtio_dev_uninit(int eid, int did)
+{
+       return virtio_net_free_hw_ele(eid, did);
+}
+
+int vdpa_virtio_dev_start(int eid __rte_unused, int did __rte_unused)
+{
+       return 0;
+}
+
+int vdpa_virtio_dev_stop(int eid __rte_unused, int did __rte_unused)
+{
+       return 0;
+}
+
+struct rte_vdpa_eng_driver vdpa_virtio_net_driver = {
+       .name = "vdpa_virtio_net",
+       .eng_ops = {
+               .eng_init = vdpa_virtio_eng_init,
+               .eng_uninit = vdpa_virtio_eng_uninit,
+       },
+       .dev_ops = {
+               .dev_conf = vdpa_virtio_dev_conf,
+               .dev_close = vdpa_virtio_dev_close,
+               .vring_state_set = NULL,
+               .migration_done = NULL,
+       },
+};
+
+RTE_VDPA_REGISTER_DRIVER(vdpa_virtio_net, vdpa_virtio_net_driver);
diff --git a/examples/vdpa/vdpa_virtio_net.h b/examples/vdpa/vdpa_virtio_net.h
new file mode 100644
index 0000000..b0a386a
--- /dev/null
+++ b/examples/vdpa/vdpa_virtio_net.h
@@ -0,0 +1,144 @@
+/*-
+ *   BSD LICENSE
+ *
+ *   Copyright(c) 2010-2017 Intel Corporation. All rights reserved.
+ *   All rights reserved.
+ *
+ *   Redistribution and use in source and binary forms, with or without
+ *   modification, are permitted provided that the following conditions
+
+ *   are met:
+ *
+ *     * Redistributions of source code must retain the above copyright
+ *       notice, this list of conditions and the following disclaimer.
+ *     * Redistributions in binary form must reproduce the above copyright
+ *       notice, this list of conditions and the following disclaimer in
+ *       the documentation and/or other materials provided with the
+ *       distribution.
+ *     * Neither the name of Intel Corporation nor the names of its
+ *       contributors may be used to endorse or promote products derived
+ *       from this software without specific prior written permission.
+ *
+ *   THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
+ *   "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
+ *   LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
+ *   A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT
+ *   OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
+ *   SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
+ *   LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
+ *   DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
+ *   THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
+ *   (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
+ *   OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
+ */
+
+#ifndef _VDPA_VIRTIO_NET_H_
+#define _VDPA_VIRTIO_NET_H
+#include <stdint.h>
+#include <stdbool.h>
+#include <pthread.h>
+#include <linux/vfio.h>
+#include <rte_bus_pci.h>
+#include <rte_vdpa.h>
+#include <vhost.h>
+#include <vhost_user.h>
+
+#define MAX_VDPA_DEVICE_VIRTIO 8
+#define MAX_QUEUES_VIRTIO      1
+
+/* This is the PCI capability header: */
+struct virtio_net_pci_cap {
+       uint8_t cap_vndr;    /* Generic PCI field: PCI_CAP_ID_VNDR */
+       uint8_t cap_next;    /* Generic PCI field: next ptr. */
+       uint8_t cap_len;     /* Generic PCI field: capability length */
+       uint8_t cfg_type;    /* Identifies the structure. */
+       uint8_t bar;         /* Where to find it. */
+       uint8_t padding[3];  /* Pad to full dword. */
+       uint32_t offset;     /* Offset within bar. */
+       uint32_t length;     /* Length of the structure, in bytes. */
+};
+
+struct virtio_net_pci_notify_cap {
+       struct virtio_net_pci_cap cap;
+       uint32_t notify_off_multiplier; /* Multiplier for queue_notify_off. */
+};
+
+/* Fields in VIRTIO_PCI_CAP_COMMON_CFG: */
+struct virtio_net_pci_common_cfg {
+       /* About the whole device. */
+       uint32_t device_feature_select; /* read-write */
+       uint32_t device_feature;        /* read-only */
+       uint32_t guest_feature_select;  /* read-write */
+       uint32_t guest_feature;         /* read-write */
+       uint16_t msix_config;           /* read-write */
+       uint16_t num_queues;            /* read-only */
+       uint8_t device_status;          /* read-write */
+       uint8_t config_generation;      /* read-only */
+
+       /* About a specific virtqueue. */
+       uint16_t queue_select;          /* read-write */
+       uint16_t queue_size;            /* read-write, power of 2. */
+       uint16_t queue_msix_vector;     /* read-write */
+       uint16_t queue_enable;          /* read-write */
+       uint16_t queue_notify_off;      /* read-only */
+       uint32_t queue_desc_lo;         /* read-write */
+       uint32_t queue_desc_hi;         /* read-write */
+       uint32_t queue_avail_lo;        /* read-write */
+       uint32_t queue_avail_hi;        /* read-write */
+       uint32_t queue_used_lo;         /* read-write */
+       uint32_t queue_used_hi;         /* read-write */
+};
+
+struct virtio_net_net_config {
+       /* The config defining mac address (if VIRTIO_NET_F_MAC) */
+       uint8_t    mac[ETHER_ADDR_LEN];
+       /* See VIRTIO_NET_F_STATUS and VIRTIO_NET_S_* above */
+       uint16_t   status;
+       uint16_t   max_virtqueue_pairs;
+} __attribute__((packed));
+
+struct vfio_config {
+       int vfio_dev_fd;
+       int vfio_container_fd;
+       int group_no;
+       int group_fd;
+};
+
+struct virtio_net_hw {
+       struct virtnet_ctl *cvq;
+       uint64_t    req_guest_features;
+       uint64_t    guest_features;
+       uint32_t    notify_off_multiplier;
+       uint8_t     *isr;
+       uint16_t    *notify_base;
+       struct virtio_net_pci_common_cfg *common_cfg;
+       struct virtio_net_net_config *dev_cfg;
+       uint16_t    *notify_addr[MAX_QUEUES_VIRTIO * 2];
+       struct vfio_config vfio_cfg;
+};
+
+struct virtio_net_pci {
+       bool used;
+       struct rte_pci_device pdev;
+       struct virtio_net_hw hw;
+       pthread_t tid;  /* thread for notify relay */
+       int epfd;
+};
+
+#define RTE_LIBRTE_VHOST_VIRTIO_NET_DEBUG
+#ifdef RTE_LIBRTE_VHOST_VIRTIO_NET_DEBUG
+       #define DEBUG(fmt, ...) printf(fmt, ##__VA_ARGS__)
+#else
+       #define DEBUG(fmt, ...) do {} while (0)
+#endif
+
+int vdpa_virtio_get_device_num(int eid);
+int vdpa_virtio_get_queue_num(int eid, int did);
+uint64_t vdpa_virtio_get_features(int eid, int did);
+uint64_t vdpa_virtio_get_protocol_features(int eid, int did);
+int vdpa_virtio_dev_init(int eid, char *args);
+int vdpa_virtio_dev_uninit(int eid, int did);
+int vdpa_virtio_dev_start(int eid, int did);
+int vdpa_virtio_dev_stop(int eid, int did);
+
+#endif /* _VDPA_VIRTIO_NET_H_ */
-- 
1.8.3.1

[dpdk-dev] [PATCH RFC 4/4] examples/vdpa: add virtio-net PCI device driver

Reply via email to