From: "Guo, Jia" <jia....@intel.com>

This patch aim to add a variable "uevent_fd" in structure
"rte_intr_handle" for enable kernel object uevent monitoring,
and add some uevent API in rte eal interrupt, that is
“rte_uevent_connect” and “rte_uevent_get”, so that all driver
could use these API to monitor and read out the uevent, then
corresponding to handle these uevent, such as detach or attach
the device.

Signed-off-by: Guo, Jia <jia....@intel.com>
---
v3->v2: refine some return error
        refine the string searching logic to aviod memory issue
---
 lib/librte_eal/common/eal_common_pci_uio.c         |   6 +-
 lib/librte_eal/linuxapp/eal/eal_interrupts.c       | 136 ++++++++++++++++++++-
 lib/librte_eal/linuxapp/eal/eal_pci_uio.c          |   6 +
 .../linuxapp/eal/include/exec-env/rte_interrupts.h |  37 ++++++
 4 files changed, 182 insertions(+), 3 deletions(-)

diff --git a/lib/librte_eal/common/eal_common_pci_uio.c 
b/lib/librte_eal/common/eal_common_pci_uio.c
index 367a681..5b62f70 100644
--- a/lib/librte_eal/common/eal_common_pci_uio.c
+++ b/lib/librte_eal/common/eal_common_pci_uio.c
@@ -117,6 +117,7 @@
 
        dev->intr_handle.fd = -1;
        dev->intr_handle.uio_cfg_fd = -1;
+       dev->intr_handle.uevent_fd = -1;
        dev->intr_handle.type = RTE_INTR_HANDLE_UNKNOWN;
 
        /* secondary processes - use already recorded details */
@@ -227,7 +228,10 @@
                close(dev->intr_handle.uio_cfg_fd);
                dev->intr_handle.uio_cfg_fd = -1;
        }
-
+       if (dev->intr_handle.uevent_fd >= 0) {
+               close(dev->intr_handle.uevent_fd);
+               dev->intr_handle.uevent_fd = -1;
+       }
        dev->intr_handle.fd = -1;
        dev->intr_handle.type = RTE_INTR_HANDLE_UNKNOWN;
 }
diff --git a/lib/librte_eal/linuxapp/eal/eal_interrupts.c 
b/lib/librte_eal/linuxapp/eal/eal_interrupts.c
index 2e3bd12..2c4a3fb 100644
--- a/lib/librte_eal/linuxapp/eal/eal_interrupts.c
+++ b/lib/librte_eal/linuxapp/eal/eal_interrupts.c
@@ -65,6 +65,10 @@
 #include <rte_errno.h>
 #include <rte_spinlock.h>
 
+#include <sys/socket.h>
+#include <linux/netlink.h>
+#include <sys/epoll.h>
+
 #include "eal_private.h"
 #include "eal_vfio.h"
 #include "eal_thread.h"
@@ -669,10 +673,13 @@ struct rte_intr_source {
                        RTE_SET_USED(r);
                        return -1;
                }
+
                rte_spinlock_lock(&intr_lock);
                TAILQ_FOREACH(src, &intr_sources, next)
-                       if (src->intr_handle.fd ==
-                                       events[n].data.fd)
+                       if ((src->intr_handle.fd ==
+                                       events[n].data.fd) ||
+                               (src->intr_handle.uevent_fd ==
+                                       events[n].data.fd))
                                break;
                if (src == NULL){
                        rte_spinlock_unlock(&intr_lock);
@@ -858,7 +865,24 @@ static __attribute__((noreturn)) void *
                        }
                        else
                                numfds++;
+
+                       /**
+                        * add device uevent file descriptor
+                        * into wait list for uevent monitoring.
+                        */
+                       ev.events = EPOLLIN | EPOLLPRI | EPOLLRDHUP | EPOLLHUP;
+                       ev.data.fd = src->intr_handle.uevent_fd;
+                       if (epoll_ctl(pfd, EPOLL_CTL_ADD,
+                                       src->intr_handle.uevent_fd, &ev) < 0){
+                               rte_panic("Error adding uevent_fd %d epoll_ctl"
+                                       ", %s\n",
+                                       src->intr_handle.uevent_fd,
+                                       strerror(errno));
+                       } else
+                               numfds++;
                }
+
+
                rte_spinlock_unlock(&intr_lock);
                /* serve the interrupt */
                eal_intr_handle_interrupts(pfd, numfds);
@@ -1255,3 +1279,111 @@ static __attribute__((noreturn)) void *
 
        return 0;
 }
+
+int
+rte_uevent_connect(void)
+{
+       struct sockaddr_nl addr;
+       int ret;
+       int netlink_fd = -1;
+       int size = 64 * 1024;
+       int nonblock = 1;
+       memset(&addr, 0, sizeof(addr));
+       addr.nl_family = AF_NETLINK;
+       addr.nl_pid = 0;
+       addr.nl_groups = 0xffffffff;
+
+       netlink_fd = socket(PF_NETLINK, SOCK_DGRAM, NETLINK_KOBJECT_UEVENT);
+       if (netlink_fd < 0)
+               return -1;
+
+       setsockopt(netlink_fd, SOL_SOCKET, SO_RCVBUFFORCE, &size, sizeof(size));
+
+       ret = ioctl(netlink_fd, FIONBIO, &nonblock);
+       if (ret != 0) {
+               RTE_LOG(ERR, EAL,
+               "ioctl(FIONBIO) failed\n");
+               close(netlink_fd);
+               return -1;
+       }
+
+       if (bind(netlink_fd, (struct sockaddr *) &addr, sizeof(addr)) < 0) {
+               close(netlink_fd);
+               return -1;
+       }
+
+       return netlink_fd;
+}
+
+static int
+parse_event(const char *buf, struct rte_uevent *event)
+{
+       char action[RTE_UEVENT_MSG_LEN];
+       char subsystem[RTE_UEVENT_MSG_LEN];
+       char dev_path[RTE_UEVENT_MSG_LEN];
+       int i = 0;
+
+       memset(action, 0, RTE_UEVENT_MSG_LEN);
+       memset(subsystem, 0, RTE_UEVENT_MSG_LEN);
+       memset(dev_path, 0, RTE_UEVENT_MSG_LEN);
+
+       while (i < RTE_UEVENT_MSG_LEN) {
+               for (; i < RTE_UEVENT_MSG_LEN; i++) {
+                       if (*buf)
+                               break;
+                       buf++;
+               }
+               if (!strncmp(buf, "ACTION=", 7)) {
+                       buf += 7;
+                       i += 7;
+                       snprintf(action, sizeof(action), "%s", buf);
+               } else if (!strncmp(buf, "DEVPATH=", 8)) {
+                       buf += 8;
+                       i += 8;
+                       snprintf(dev_path, sizeof(dev_path), "%s", buf);
+               } else if (!strncmp(buf, "SUBSYSTEM=", 10)) {
+                       buf += 10;
+                       i += 10;
+                       snprintf(subsystem, sizeof(subsystem), "%s", buf);
+               }
+               for (; i < RTE_UEVENT_MSG_LEN; i++) {
+                       if (*buf == '\0')
+                               break;
+                       buf++;
+               }
+       }
+
+       if (!strncmp(subsystem, "uio", 3)) {
+
+               event->subsystem = RTE_UEVENT_SUBSYSTEM_UIO;
+               if (!strncmp(action, "add", 3))
+                       event->action = RTE_UEVENT_ADD;
+               if (!strncmp(action, "remove", 6))
+                       event->action = RTE_UEVENT_REMOVE;
+               return 0;
+       }
+
+       return -1;
+}
+
+int
+rte_uevent_get(int fd, struct rte_uevent *uevent)
+{
+       int ret;
+       char buf[RTE_UEVENT_MSG_LEN];
+
+       memset(uevent, 0, sizeof(struct rte_uevent));
+       memset(buf, 0, RTE_UEVENT_MSG_LEN);
+
+       ret = recv(fd, buf, RTE_UEVENT_MSG_LEN - 1, MSG_DONTWAIT);
+       if (ret > 0)
+               return parse_event(buf, uevent);
+       else if (ret < 0) {
+               RTE_LOG(ERR, EAL,
+               "Socket read error(%d): %s\n",
+               errno, strerror(errno));
+               return -1;
+       } else
+               /* connection closed */
+               return -1;
+}
diff --git a/lib/librte_eal/linuxapp/eal/eal_pci_uio.c 
b/lib/librte_eal/linuxapp/eal/eal_pci_uio.c
index fa10329..eae9cd5 100644
--- a/lib/librte_eal/linuxapp/eal/eal_pci_uio.c
+++ b/lib/librte_eal/linuxapp/eal/eal_pci_uio.c
@@ -231,6 +231,10 @@
                close(dev->intr_handle.uio_cfg_fd);
                dev->intr_handle.uio_cfg_fd = -1;
        }
+       if (dev->intr_handle.uevent_fd >= 0) {
+               close(dev->intr_handle.uevent_fd);
+               dev->intr_handle.uevent_fd = -1;
+       }
        if (dev->intr_handle.fd >= 0) {
                close(dev->intr_handle.fd);
                dev->intr_handle.fd = -1;
@@ -276,6 +280,8 @@
                goto error;
        }
 
+       dev->intr_handle.uevent_fd = rte_uevent_connect();
+
        if (dev->kdrv == RTE_KDRV_IGB_UIO)
                dev->intr_handle.type = RTE_INTR_HANDLE_UIO;
        else {
diff --git a/lib/librte_eal/linuxapp/eal/include/exec-env/rte_interrupts.h 
b/lib/librte_eal/linuxapp/eal/include/exec-env/rte_interrupts.h
index 6daffeb..0b31a22 100644
--- a/lib/librte_eal/linuxapp/eal/include/exec-env/rte_interrupts.h
+++ b/lib/librte_eal/linuxapp/eal/include/exec-env/rte_interrupts.h
@@ -90,6 +90,7 @@ struct rte_intr_handle {
                                        for uio_pci_generic */
        };
        int fd;  /**< interrupt event file descriptor */
+       int uevent_fd;   /**< uevent file descriptor */
        enum rte_intr_handle_type type;  /**< handle type */
        uint32_t max_intr;             /**< max interrupt requested */
        uint32_t nb_efd;               /**< number of available efd(event fd) */
@@ -99,6 +100,19 @@ struct rte_intr_handle {
        int *intr_vec;                 /**< intr vector number array */
 };
 
+#define RTE_UEVENT_MSG_LEN 4096
+#define RTE_UEVENT_SUBSYSTEM_UIO 1
+
+enum rte_uevent_action {
+       RTE_UEVENT_ADD = 0,             /**< uevent type of device add */
+       RTE_UEVENT_REMOVE = 1,  /**< uevent type of device remove*/
+};
+
+struct rte_uevent {
+       enum rte_uevent_action action;  /**< uevent action type */
+       int subsystem;                          /**< subsystem id */
+};
+
 #define RTE_EPOLL_PER_THREAD        -1  /**< to hint using per thread epfd */
 
 /**
@@ -236,4 +250,27 @@ struct rte_intr_handle {
 int
 rte_intr_cap_multiple(struct rte_intr_handle *intr_handle);
 
+/**
+ * It read out the uevent from the specific file descriptor.
+ *
+ * @param fd
+ *   The fd which the uevent associated to
+ * @param uevent
+ *   Pointer to the uevent which read from the monitoring fd.
+ * @return
+ *   - On success, zero.
+ *   - On failure, a negative value.
+ */
+int
+rte_uevent_get(int fd, struct rte_uevent *uevent);
+
+/**
+ * Connect to the device uevent file descriptor.
+ * @return
+ *   - On success, the connected uevent fd.
+ *   - On failure, a negative value.
+ */
+int
+rte_uevent_connect(void);
+
 #endif /* _RTE_LINUXAPP_INTERRUPTS_H_ */
-- 
1.8.3.1

Reply via email to