Support to run vhost-pmd vdev in the secondary process. We obtain
information, like memory regions, kickfd, callfd, through
primary/secondary communication channel.

And by invoking rte_vhost_set_vring_effective_fd, we can set the
kickfd which can be recognized by the secondary process.

Signed-off-by: Jianfeng Tan <jianfeng....@intel.com>
---
 drivers/net/vhost/rte_eth_vhost.c | 200 +++++++++++++++++++++++++++++++++++---
 1 file changed, 187 insertions(+), 13 deletions(-)

diff --git a/drivers/net/vhost/rte_eth_vhost.c 
b/drivers/net/vhost/rte_eth_vhost.c
index 04179b4..9d296aa 100644
--- a/drivers/net/vhost/rte_eth_vhost.c
+++ b/drivers/net/vhost/rte_eth_vhost.c
@@ -33,6 +33,7 @@
 #include <unistd.h>
 #include <pthread.h>
 #include <stdbool.h>
+#include <sys/mman.h>
 
 #include <rte_mbuf.h>
 #include <rte_ethdev.h>
@@ -46,6 +47,20 @@
 
 #include "rte_eth_vhost.h"
 
+#define VHOST_MSG_TYPE_REGIONS 1
+#define VHOST_MSG_TYPE_SET_FDS 2
+#define VHOST_MSG_TYPE_INIT    3
+
+struct vhost_params {
+       int type;
+       union {
+               int vid;
+               int portid;
+       };
+       int vring_idx;
+       struct rte_vhost_mem_region regions[0];
+};
+
 enum {VIRTIO_RXQ, VIRTIO_TXQ, VIRTIO_QNUM};
 
 #define ETH_VHOST_IFACE_ARG            "iface"
@@ -550,6 +565,66 @@ update_queuing_status(struct rte_eth_dev *dev)
 }
 
 static int
+share_device(int vid)
+{
+       uint32_t i, vring_num;
+       int len;
+       int fds[8];
+       struct rte_vhost_memory *mem;
+       struct vhost_params *params;
+       struct rte_vhost_vring vring;
+
+       /* share mem table */
+       if (rte_vhost_get_mem_table(vid, &mem) < 0) {
+               RTE_LOG(ERR, PMD, "Failed to get mem table\n");
+               return 0;
+       }
+       for (i = 0; i < mem->nregions; ++i)
+               fds[i] = mem->regions[i].fd;
+
+       len = sizeof(struct rte_vhost_mem_region) * mem->nregions;
+       params = malloc(sizeof(*params) + len);
+       if (params == NULL) {
+               RTE_LOG(ERR, PMD, "Failed to allocate memory\n");
+               return -1;
+       }
+
+       params->type = VHOST_MSG_TYPE_REGIONS;
+       params->vid = vid;
+       memcpy(params->regions, mem->regions, len);
+
+       if (rte_eal_mp_sendmsg("vhost pmd", params, sizeof(*params) + len,
+                              fds, mem->nregions) < 0) {
+               RTE_LOG(ERR, PMD, "Failed to share mem table\n");
+               free(params);
+               return -1;
+       }
+
+       /* share callfd and kickfd */
+       params->type = VHOST_MSG_TYPE_SET_FDS;
+       vring_num = rte_vhost_get_vring_num(vid);
+       for (i = 0; i < vring_num; i++) {
+               if (rte_vhost_get_vhost_vring(vid, i, &vring) < 0) {
+                       RTE_LOG(ERR, PMD, "Failed to get vring, idx = %d\n", i);
+                       free(params);
+                       return -1;
+               }
+
+               params->vring_idx = i;
+               fds[0] = vring.callfd;
+               fds[1] = vring.kickfd;
+               if (rte_eal_mp_sendmsg("vhost pmd", params,
+                                      sizeof(*params), fds, 2) < 0) {
+                       RTE_LOG(ERR, PMD, "Failed to set fds\n");
+                       return -1;
+               }
+       }
+
+       free(params);
+       return 0;
+}
+
+static int
 new_device(int vid)
 {
        struct rte_eth_dev *eth_dev;
@@ -610,6 +685,8 @@ new_device(int vid)
        _rte_eth_dev_callback_process(eth_dev, RTE_ETH_EVENT_INTR_LSC,
                                      NULL, NULL);
 
+       share_device(vid);
+
        return 0;
 }
 
@@ -1025,13 +1102,6 @@ eth_dev_vhost_create(struct rte_vdev_device *dev, char 
*iface_name,
        RTE_LOG(INFO, PMD, "Creating VHOST-USER backend on numa socket %u\n",
                numa_node);
 
-       /* now do all data allocation - for eth_dev structure and internal
-        * (private) data
-        */
-       data = rte_zmalloc_socket(name, sizeof(*data), 0, numa_node);
-       if (data == NULL)
-               goto error;
-
        list = rte_zmalloc_socket(name, sizeof(*list), 0, numa_node);
        if (list == NULL)
                goto error;
@@ -1073,11 +1143,7 @@ eth_dev_vhost_create(struct rte_vdev_device *dev, char 
*iface_name,
        rte_spinlock_init(&vring_state->lock);
        vring_states[eth_dev->data->port_id] = vring_state;
 
-       /* We'll replace the 'data' originally allocated by eth_dev. So the
-        * vhost PMD resources won't be shared between multi processes.
-        */
-       rte_memcpy(data, eth_dev->data, sizeof(*data));
-       eth_dev->data = data;
+       data = eth_dev->data;
 
        data->nb_rx_queues = queues;
        data->nb_tx_queues = queues;
@@ -1125,6 +1191,30 @@ eth_dev_vhost_create(struct rte_vdev_device *dev, char 
*iface_name,
        return -1;
 }
 
+static int
+eth_dev_vhost_attach(struct rte_vdev_device *dev)
+{
+       struct rte_eth_dev *eth_dev = NULL;
+       struct rte_eth_dev_data *data = NULL;
+
+       RTE_LOG(INFO, PMD, "Attach vhost user port\n");
+
+       /* reserve an ethdev entry */
+       eth_dev = rte_eth_vdev_allocate(dev, sizeof(struct pmd_internal));
+       if (eth_dev == NULL)
+               return -1;
+
+       eth_dev->dev_ops = &ops;
+
+       /* finally assign rx and tx ops */
+       eth_dev->rx_pkt_burst = eth_vhost_rx;
+       eth_dev->tx_pkt_burst = eth_vhost_tx;
+
+       data = eth_dev->data;
+
+       return data->port_id;
+}
+
 static inline int
 open_iface(const char *key __rte_unused, const char *value, void *extra_args)
 {
@@ -1154,10 +1244,84 @@ open_int(const char *key __rte_unused, const char 
*value, void *extra_args)
 }
 
 static int
+vhost_pmd_action(const void *params, int len, int fds[], int fds_num)
+{
+       int i;
+       int vid;
+       void *base_addr;
+       const struct vhost_params *p = params;
+       const struct rte_vhost_mem_region *regions;
+
+       if (len < (int)sizeof(*p)) {
+               RTE_LOG(ERR, PMD, "message if too short\n");
+               return -1;
+       }
+
+       switch (p->type) {
+       case VHOST_MSG_TYPE_REGIONS:
+               regions = p->regions;
+               for (i = 0; i < fds_num; ++i) {
+                       base_addr = mmap(regions[i].mmap_addr,
+                                        regions[i].mmap_size,
+                                        PROT_READ | PROT_WRITE,
+                                        MAP_FIXED | MAP_SHARED, fds[i], 0);
+                       if (base_addr != regions[i].mmap_addr) {
+                               RTE_LOG(ERR, PMD,
+                                       "vhost in secondary mmap error: %s\n",
+                                       strerror(errno));
+                               break;
+                       }
+               }
+               break;
+       case VHOST_MSG_TYPE_SET_FDS:
+               rte_vhost_set_vring_effective_fd(p->vid,
+                                                p->vring_idx,
+                                                fds[0], fds[1]);
+               break;
+       case VHOST_MSG_TYPE_INIT:
+               vid = rte_eth_vhost_get_vid_from_port_id(p->portid);
+               share_device(vid);
+               break;
+       }
+
+       return 0;
+}
+
+static int
+probe_secondary(struct rte_vdev_device *dev)
+{
+       int portid = eth_dev_vhost_attach(dev);
+       struct rte_eth_dev *eth_dev;
+       struct pmd_internal *internal;
+       struct vhost_params p;
+
+       if (portid < 0)
+               return -1;
+
+       eth_dev = &rte_eth_devices[portid];
+       internal = eth_dev->data->dev_private;
+
+       if (!internal ||
+           rte_atomic32_read(&internal->dev_attached) == 0) {
+               RTE_LOG(INFO, PMD, "%s is not ready\n", dev->device.name);
+               return 0;
+       }
+
+       p.type = VHOST_MSG_TYPE_INIT;
+       p.portid = portid;
+       if (rte_eal_mp_sendmsg("vhost pmd", &p, sizeof(p), NULL, 0) < 0) {
+               RTE_LOG(ERR, PMD, "Failed to send request for init\n");
+               return -1;
+       }
+
+       return 0;
+}
+
+static int
 rte_pmd_vhost_probe(struct rte_vdev_device *dev)
 {
        struct rte_kvargs *kvlist = NULL;
-       int ret = 0;
+       int ret;
        char *iface_name;
        uint16_t queues;
        uint64_t flags = 0;
@@ -1167,6 +1331,15 @@ rte_pmd_vhost_probe(struct rte_vdev_device *dev)
        RTE_LOG(INFO, PMD, "Initializing pmd_vhost for %s\n",
                rte_vdev_device_name(dev));
 
+       ret = rte_eal_mp_action_register("vhost pmd", vhost_pmd_action);
+       if (ret < 0 && ret != -EEXIST) {
+               RTE_LOG(ERR, PMD, "vhost fails to add action\n");
+               return -1;
+       }
+
+       if (rte_eal_process_type() == RTE_PROC_SECONDARY)
+               return probe_secondary(dev);
+
        kvlist = rte_kvargs_parse(rte_vdev_device_args(dev), valid_arguments);
        if (kvlist == NULL)
                return -1;
@@ -1216,6 +1389,7 @@ rte_pmd_vhost_probe(struct rte_vdev_device *dev)
        eth_dev_vhost_create(dev, iface_name, queues, dev->device.numa_node,
                flags);
 
+       ret = 0;
 out_free:
        rte_kvargs_free(kvlist);
        return ret;
-- 
2.7.4

Reply via email to