The current implementation is somewhat convoluted and inefficient,
and can lead to race conditions when updating sigio fds. This change
rewrites the implementation based on epoll and tgkill for improved
scalability and reliability.

Signed-off-by: Tiwei Bie <tiwei....@antgroup.com>
---
 arch/um/drivers/random.c       |   2 +-
 arch/um/drivers/rtc_user.c     |   2 +-
 arch/um/include/shared/os.h    |   2 +-
 arch/um/include/shared/sigio.h |   1 -
 arch/um/kernel/sigio.c         |  26 ---
 arch/um/os-Linux/sigio.c       | 330 +++++----------------------------
 6 files changed, 47 insertions(+), 316 deletions(-)

diff --git a/arch/um/drivers/random.c b/arch/um/drivers/random.c
index da985e0dc69a..ca08c91f47a3 100644
--- a/arch/um/drivers/random.c
+++ b/arch/um/drivers/random.c
@@ -79,7 +79,7 @@ static int __init rng_init (void)
        if (err < 0)
                goto err_out_cleanup_hw;
 
-       sigio_broken(random_fd);
+       sigio_broken();
        hwrng.name = RNG_MODULE_NAME;
        hwrng.read = rng_dev_read;
 
diff --git a/arch/um/drivers/rtc_user.c b/arch/um/drivers/rtc_user.c
index 7c3cec4c68cf..51e79f3148cd 100644
--- a/arch/um/drivers/rtc_user.c
+++ b/arch/um/drivers/rtc_user.c
@@ -39,7 +39,7 @@ int uml_rtc_start(bool timetravel)
                }
 
                /* apparently timerfd won't send SIGIO, use workaround */
-               sigio_broken(uml_rtc_irq_fds[0]);
+               sigio_broken();
                err = add_sigio_fd(uml_rtc_irq_fds[0]);
                if (err < 0) {
                        close(uml_rtc_irq_fds[0]);
diff --git a/arch/um/include/shared/os.h b/arch/um/include/shared/os.h
index c4f8f990ffb8..9a146912dd75 100644
--- a/arch/um/include/shared/os.h
+++ b/arch/um/include/shared/os.h
@@ -315,7 +315,7 @@ extern void um_irqs_resume(void);
 extern int add_sigio_fd(int fd);
 extern int ignore_sigio_fd(int fd);
 extern void maybe_sigio_broken(int fd);
-extern void sigio_broken(int fd);
+extern void sigio_broken(void);
 /*
  * unlocked versions for IRQ controller code.
  *
diff --git a/arch/um/include/shared/sigio.h b/arch/um/include/shared/sigio.h
index e60c8b227844..c6c2edce1f6d 100644
--- a/arch/um/include/shared/sigio.h
+++ b/arch/um/include/shared/sigio.h
@@ -6,7 +6,6 @@
 #ifndef __SIGIO_H__
 #define __SIGIO_H__
 
-extern int write_sigio_irq(int fd);
 extern void sigio_lock(void);
 extern void sigio_unlock(void);
 
diff --git a/arch/um/kernel/sigio.c b/arch/um/kernel/sigio.c
index 5085a50c3b8c..4fc04742048a 100644
--- a/arch/um/kernel/sigio.c
+++ b/arch/um/kernel/sigio.c
@@ -8,32 +8,6 @@
 #include <os.h>
 #include <sigio.h>
 
-/* Protected by sigio_lock() called from write_sigio_workaround */
-static int sigio_irq_fd = -1;
-
-static irqreturn_t sigio_interrupt(int irq, void *data)
-{
-       char c;
-
-       os_read_file(sigio_irq_fd, &c, sizeof(c));
-       return IRQ_HANDLED;
-}
-
-int write_sigio_irq(int fd)
-{
-       int err;
-
-       err = um_request_irq(SIGIO_WRITE_IRQ, fd, IRQ_READ, sigio_interrupt,
-                            0, "write sigio", NULL);
-       if (err < 0) {
-               printk(KERN_ERR "write_sigio_irq : um_request_irq failed, "
-                      "err = %d\n", err);
-               return -1;
-       }
-       sigio_irq_fd = fd;
-       return 0;
-}
-
 /* These are called from os-Linux/sigio.c to protect its pollfds arrays. */
 static DEFINE_MUTEX(sigio_mutex);
 
diff --git a/arch/um/os-Linux/sigio.c b/arch/um/os-Linux/sigio.c
index 61b348a2ea97..a05a6ecee756 100644
--- a/arch/um/os-Linux/sigio.c
+++ b/arch/um/os-Linux/sigio.c
@@ -11,6 +11,7 @@
 #include <sched.h>
 #include <signal.h>
 #include <string.h>
+#include <sys/epoll.h>
 #include <kern_util.h>
 #include <init.h>
 #include <os.h>
@@ -23,180 +24,49 @@
  */
 static struct os_helper_thread *write_sigio_td;
 
-/*
- * These arrays are initialized before the sigio thread is started, and
- * the descriptors closed after it is killed.  So, it can't see them change.
- * On the UML side, they are changed under the sigio_lock.
- */
-#define SIGIO_FDS_INIT {-1, -1}
-
-static int write_sigio_fds[2] = SIGIO_FDS_INIT;
-static int sigio_private[2] = SIGIO_FDS_INIT;
+static int epollfd = -1;
 
-struct pollfds {
-       struct pollfd *poll;
-       int size;
-       int used;
-};
+#define MAX_EPOLL_EVENTS 64
 
-/*
- * Protected by sigio_lock().  Used by the sigio thread, but the UML thread
- * synchronizes with it.
- */
-static struct pollfds current_poll;
-static struct pollfds next_poll;
-static struct pollfds all_sigio_fds;
+static struct epoll_event epoll_events[MAX_EPOLL_EVENTS];
 
 static void *write_sigio_thread(void *unused)
 {
-       struct pollfds *fds, tmp;
-       struct pollfd *p;
-       int i, n, respond_fd;
-       char c;
+       int pid = getpid();
+       int r;
 
        os_fix_helper_thread_signals();
 
-       fds = &current_poll;
        while (1) {
-               n = poll(fds->poll, fds->used, -1);
-               if (n < 0) {
+               r = epoll_wait(epollfd, epoll_events, MAX_EPOLL_EVENTS, -1);
+               if (r < 0) {
                        if (errno == EINTR)
                                continue;
-                       printk(UM_KERN_ERR "write_sigio_thread : poll returned "
-                              "%d, errno = %d\n", n, errno);
+                       printk(UM_KERN_ERR "%s: epoll_wait failed, errno = 
%d\n",
+                              __func__, errno);
                }
-               for (i = 0; i < fds->used; i++) {
-                       p = &fds->poll[i];
-                       if (p->revents == 0)
-                               continue;
-                       if (p->fd == sigio_private[1]) {
-                               CATCH_EINTR(n = read(sigio_private[1], &c,
-                                                    sizeof(c)));
-                               if (n != sizeof(c))
-                                       printk(UM_KERN_ERR
-                                              "write_sigio_thread : "
-                                              "read on socket failed, "
-                                              "err = %d\n", errno);
-                               tmp = current_poll;
-                               current_poll = next_poll;
-                               next_poll = tmp;
-                               respond_fd = sigio_private[1];
-                       }
-                       else {
-                               respond_fd = write_sigio_fds[1];
-                               fds->used--;
-                               memmove(&fds->poll[i], &fds->poll[i + 1],
-                                       (fds->used - i) * sizeof(*fds->poll));
-                       }
-
-                       CATCH_EINTR(n = write(respond_fd, &c, sizeof(c)));
-                       if (n != sizeof(c))
-                               printk(UM_KERN_ERR "write_sigio_thread : "
-                                      "write on socket failed, err = %d\n",
-                                      errno);
-               }
-       }
-
-       return NULL;
-}
-
-static int need_poll(struct pollfds *polls, int n)
-{
-       struct pollfd *new;
-
-       if (n <= polls->size)
-               return 0;
-
-       new = uml_kmalloc(n * sizeof(struct pollfd), UM_GFP_ATOMIC);
-       if (new == NULL) {
-               printk(UM_KERN_ERR "need_poll : failed to allocate new "
-                      "pollfds\n");
-               return -ENOMEM;
-       }
-
-       memcpy(new, polls->poll, polls->used * sizeof(struct pollfd));
-       kfree(polls->poll);
-
-       polls->poll = new;
-       polls->size = n;
-       return 0;
-}
-
-/*
- * Must be called with sigio_lock held, because it's needed by the marked
- * critical section.
- */
-static void update_thread(void)
-{
-       unsigned long flags;
-       int n;
-       char c;
-
-       flags = um_set_signals_trace(0);
-       CATCH_EINTR(n = write(sigio_private[0], &c, sizeof(c)));
-       if (n != sizeof(c)) {
-               printk(UM_KERN_ERR "update_thread : write failed, err = %d\n",
-                      errno);
-               goto fail;
-       }
 
-       CATCH_EINTR(n = read(sigio_private[0], &c, sizeof(c)));
-       if (n != sizeof(c)) {
-               printk(UM_KERN_ERR "update_thread : read failed, err = %d\n",
-                      errno);
-               goto fail;
+               CATCH_EINTR(r = tgkill(pid, pid, SIGIO));
+               if (r < 0)
+                       printk(UM_KERN_ERR "%s: tgkill failed, errno = %d\n",
+                              __func__, errno);
        }
 
-       um_set_signals_trace(flags);
-       return;
- fail:
-       /* Critical section start */
-       if (write_sigio_td) {
-               os_kill_helper_thread(write_sigio_td);
-               write_sigio_td = NULL;
-       }
-       close(sigio_private[0]);
-       close(sigio_private[1]);
-       close(write_sigio_fds[0]);
-       close(write_sigio_fds[1]);
-       /* Critical section end */
-       um_set_signals_trace(flags);
+       return NULL;
 }
 
 int __add_sigio_fd(int fd)
 {
-       struct pollfd *p;
-       int err, i, n;
-
-       for (i = 0; i < all_sigio_fds.used; i++) {
-               if (all_sigio_fds.poll[i].fd == fd)
-                       break;
-       }
-       if (i == all_sigio_fds.used)
-               return -ENOSPC;
-
-       p = &all_sigio_fds.poll[i];
-
-       for (i = 0; i < current_poll.used; i++) {
-               if (current_poll.poll[i].fd == fd)
-                       return 0;
-       }
-
-       n = current_poll.used;
-       err = need_poll(&next_poll, n + 1);
-       if (err)
-               return err;
-
-       memcpy(next_poll.poll, current_poll.poll,
-              current_poll.used * sizeof(struct pollfd));
-       next_poll.poll[n] = *p;
-       next_poll.used = n + 1;
-       update_thread();
-
-       return 0;
+       struct epoll_event event = {
+               .data.fd = fd,
+               .events = EPOLLIN | EPOLLET,
+       };
+       int r;
+
+       CATCH_EINTR(r = epoll_ctl(epollfd, EPOLL_CTL_ADD, fd, &event));
+       return r < 0 ? -errno : 0;
 }
 
-
 int add_sigio_fd(int fd)
 {
        int err;
@@ -210,38 +80,11 @@ int add_sigio_fd(int fd)
 
 int __ignore_sigio_fd(int fd)
 {
-       struct pollfd *p;
-       int err, i, n = 0;
-
-       /*
-        * This is called from exitcalls elsewhere in UML - if
-        * sigio_cleanup has already run, then update_thread will hang
-        * or fail because the thread is no longer running.
-        */
-       if (!write_sigio_td)
-               return -EIO;
-
-       for (i = 0; i < current_poll.used; i++) {
-               if (current_poll.poll[i].fd == fd)
-                       break;
-       }
-       if (i == current_poll.used)
-               return -ENOENT;
-
-       err = need_poll(&next_poll, current_poll.used - 1);
-       if (err)
-               return err;
+       struct epoll_event event;
+       int r;
 
-       for (i = 0; i < current_poll.used; i++) {
-               p = &current_poll.poll[i];
-               if (p->fd != fd)
-                       next_poll.poll[n++] = *p;
-       }
-       next_poll.used = current_poll.used - 1;
-
-       update_thread();
-
-       return 0;
+       CATCH_EINTR(r = epoll_ctl(epollfd, EPOLL_CTL_DEL, fd, &event));
+       return r < 0 ? -errno : 0;
 }
 
 int ignore_sigio_fd(int fd)
@@ -255,122 +98,37 @@ int ignore_sigio_fd(int fd)
        return err;
 }
 
-static struct pollfd *setup_initial_poll(int fd)
-{
-       struct pollfd *p;
-
-       p = uml_kmalloc(sizeof(struct pollfd), UM_GFP_KERNEL);
-       if (p == NULL) {
-               printk(UM_KERN_ERR "setup_initial_poll : failed to allocate "
-                      "poll\n");
-               return NULL;
-       }
-       *p = ((struct pollfd) { .fd             = fd,
-                               .events         = POLLIN,
-                               .revents        = 0 });
-       return p;
-}
-
 static void write_sigio_workaround(void)
 {
-       struct pollfd *p;
        int err;
-       int l_write_sigio_fds[2];
-       int l_sigio_private[2];
-       struct os_helper_thread *l_write_sigio_td;
-
-       /* We call this *tons* of times - and most ones we must just fail. */
-       sigio_lock();
-       l_write_sigio_td = write_sigio_td;
-       sigio_unlock();
-
-       if (l_write_sigio_td)
-               return;
-
-       err = os_pipe(l_write_sigio_fds, 1, 1);
-       if (err < 0) {
-               printk(UM_KERN_ERR "write_sigio_workaround - os_pipe 1 failed, "
-                      "err = %d\n", -err);
-               return;
-       }
-       err = os_pipe(l_sigio_private, 1, 1);
-       if (err < 0) {
-               printk(UM_KERN_ERR "write_sigio_workaround - os_pipe 2 failed, "
-                      "err = %d\n", -err);
-               goto out_close1;
-       }
-
-       p = setup_initial_poll(l_sigio_private[1]);
-       if (!p)
-               goto out_close2;
 
        sigio_lock();
-
-       /*
-        * Did we race? Don't try to optimize this, please, it's not so likely
-        * to happen, and no more than once at the boot.
-        */
        if (write_sigio_td)
-               goto out_free;
-
-       current_poll = ((struct pollfds) { .poll        = p,
-                                          .used        = 1,
-                                          .size        = 1 });
-
-       if (write_sigio_irq(l_write_sigio_fds[0]))
-               goto out_clear_poll;
+               goto out;
 
-       memcpy(write_sigio_fds, l_write_sigio_fds, sizeof(l_write_sigio_fds));
-       memcpy(sigio_private, l_sigio_private, sizeof(l_sigio_private));
+       epollfd = epoll_create(MAX_EPOLL_EVENTS);
+       if (epollfd < 0) {
+               printk(UM_KERN_ERR "%s: epoll_create failed, errno = %d\n",
+                      __func__, errno);
+               goto out;
+       }
 
        err = os_run_helper_thread(&write_sigio_td, write_sigio_thread, NULL);
-       if (err < 0)
-               goto out_clear;
-
-       sigio_unlock();
-       return;
+       if (err < 0) {
+               printk(UM_KERN_ERR "%s: os_run_helper_thread failed, errno = 
%d\n",
+                      __func__, -err);
+               close(epollfd);
+               epollfd = -1;
+               goto out;
+       }
 
-out_clear:
-       write_sigio_td = NULL;
-       write_sigio_fds[0] = -1;
-       write_sigio_fds[1] = -1;
-       sigio_private[0] = -1;
-       sigio_private[1] = -1;
-out_clear_poll:
-       current_poll = ((struct pollfds) { .poll        = NULL,
-                                          .size        = 0,
-                                          .used        = 0 });
-out_free:
+out:
        sigio_unlock();
-       kfree(p);
-out_close2:
-       close(l_sigio_private[0]);
-       close(l_sigio_private[1]);
-out_close1:
-       close(l_write_sigio_fds[0]);
-       close(l_write_sigio_fds[1]);
 }
 
-void sigio_broken(int fd)
+void sigio_broken(void)
 {
-       int err;
-
        write_sigio_workaround();
-
-       sigio_lock();
-       err = need_poll(&all_sigio_fds, all_sigio_fds.used + 1);
-       if (err) {
-               printk(UM_KERN_ERR "maybe_sigio_broken - failed to add pollfd "
-                      "for descriptor %d\n", fd);
-               goto out;
-       }
-
-       all_sigio_fds.poll[all_sigio_fds.used++] =
-               ((struct pollfd) { .fd          = fd,
-                                  .events      = POLLIN,
-                                  .revents     = 0 });
-out:
-       sigio_unlock();
 }
 
 /* Changed during early boot */
@@ -384,7 +142,7 @@ void maybe_sigio_broken(int fd)
        if (pty_output_sigio)
                return;
 
-       sigio_broken(fd);
+       sigio_broken();
 }
 
 static void sigio_cleanup(void)
-- 
2.34.1


Reply via email to