Re: [PATCH] io_uring: IORING_OP_TIMEOUT support

Jens Axboe Fri, 20 Sep 2019 16:52:26 -0700

On 9/20/19 5:10 PM, Jens Axboe wrote:
> On 9/20/19 3:26 PM, Jens Axboe wrote:
>> But sounds like we are in violent agreement. I'll post a new patch for
>> this soonish.
> 
> How about this? You pass in number of events in sqe->off. If that amount
> of events happen before the timer expires, then the timer is deleted and
> the completion posted. The timeout cqe->res will be -ETIME if the timer
> expired, and 0 if it got removed due to hitting the number of events.
> 
> Lightly tested, works for me.


Found a missing increment case when I wrote up the test code. This
one passes the test code I put in the liburing 'timeout' branch.


diff --git a/fs/io_uring.c b/fs/io_uring.c
index 05a299e80159..3ae9489f6fc1 100644
--- a/fs/io_uring.c
+++ b/fs/io_uring.c
@@ -200,6 +200,7 @@ struct io_ring_ctx {
                struct io_uring_sqe     *sq_sqes;
 
                struct list_head        defer_list;
+               struct list_head        timeout_list;
        } ____cacheline_aligned_in_smp;
 
        /* IO offload */
@@ -216,6 +217,7 @@ struct io_ring_ctx {
                struct wait_queue_head  cq_wait;
                struct fasync_struct    *cq_fasync;
                struct eventfd_ctx      *cq_ev_fd;
+               atomic_t                cq_timeouts;
        } ____cacheline_aligned_in_smp;
 
        struct io_rings *rings;
@@ -283,6 +285,12 @@ struct io_poll_iocb {
        struct wait_queue_entry         wait;
 };
 
+struct io_timeout {
+       struct file                     *file;
+       unsigned                        count;
+       struct hrtimer                  timer;
+};
+
 /*
  * NOTE! Each of the iocb union members has the file pointer
  * as the first entry in their struct definition. So you can
@@ -294,6 +302,7 @@ struct io_kiocb {
                struct file             *file;
                struct kiocb            rw;
                struct io_poll_iocb     poll;
+               struct io_timeout       timeout;
        };
 
        struct sqe_submit       submit;
@@ -344,6 +353,8 @@ struct io_submit_state {
 };
 
 static void io_sq_wq_submit_work(struct work_struct *work);
+static void io_cqring_fill_event(struct io_ring_ctx *ctx, u64 ki_user_data,
+                                long res);
 static void __io_free_req(struct io_kiocb *req);
 
 static struct kmem_cache *req_cachep;
@@ -400,6 +411,7 @@ static struct io_ring_ctx *io_ring_ctx_alloc(struct 
io_uring_params *p)
        INIT_LIST_HEAD(&ctx->poll_list);
        INIT_LIST_HEAD(&ctx->cancel_list);
        INIT_LIST_HEAD(&ctx->defer_list);
+       INIT_LIST_HEAD(&ctx->timeout_list);
        return ctx;
 }
 
@@ -460,10 +472,41 @@ static inline void io_queue_async_work(struct io_ring_ctx 
*ctx,
        queue_work(ctx->sqo_wq[rw], &req->work);
 }
 
+static void io_kill_timeout(struct io_kiocb *req)
+{
+       int ret;
+
+       ret = hrtimer_try_to_cancel(&req->timeout.timer);
+       if (ret != -1) {
+               atomic_inc(&req->ctx->cq_timeouts);
+               list_del(&req->list);
+               io_cqring_fill_event(req->ctx, req->user_data, 0);
+               __io_free_req(req);
+       }
+}
+
+static void io_kill_timeouts(struct io_ring_ctx *ctx)
+{
+       struct io_kiocb *req, *tmp;
+
+       spin_lock_irq(&ctx->completion_lock);
+       list_for_each_entry_safe(req, tmp, &ctx->timeout_list, list)
+               io_kill_timeout(req);
+       spin_unlock_irq(&ctx->completion_lock);
+}
+
 static void io_commit_cqring(struct io_ring_ctx *ctx)
 {
        struct io_kiocb *req;
 
+       if (!list_empty(&ctx->timeout_list)) {
+               struct io_kiocb *tmp;
+
+               list_for_each_entry_safe(req, tmp, &ctx->timeout_list, list)
+                       if (!--req->timeout.count)
+                               io_kill_timeout(req);
+       }
+
        __io_commit_cqring(ctx);
 
        while ((req = io_get_deferred_req(ctx)) != NULL) {
@@ -1765,6 +1808,60 @@ static int io_poll_add(struct io_kiocb *req, const 
struct io_uring_sqe *sqe)
        return ipt.error;
 }
 
+static enum hrtimer_restart io_timeout_fn(struct hrtimer *timer)
+{
+       struct io_ring_ctx *ctx;
+       struct io_kiocb *req;
+       unsigned long flags;
+
+       req = container_of(timer, struct io_kiocb, timeout.timer);
+       ctx = req->ctx;
+       atomic_inc(&ctx->cq_timeouts);
+
+       spin_lock_irqsave(&ctx->completion_lock, flags);
+       list_del(&req->list);
+
+       io_cqring_fill_event(ctx, req->user_data, -ETIME);
+       io_commit_cqring(ctx);
+       spin_unlock_irqrestore(&ctx->completion_lock, flags);
+
+       io_cqring_ev_posted(ctx);
+
+       io_put_req(req);
+       return HRTIMER_NORESTART;
+}
+
+static int io_timeout(struct io_kiocb *req, const struct io_uring_sqe *sqe)
+{
+       struct io_ring_ctx *ctx = req->ctx;
+       struct timespec ts;
+
+       if (unlikely(ctx->flags & IORING_SETUP_IOPOLL))
+               return -EINVAL;
+       if (sqe->flags || sqe->ioprio || sqe->buf_index || sqe->len != 1)
+               return -EINVAL;
+       if (copy_from_user(&ts, (void __user *) sqe->addr, sizeof(ts)))
+               return -EFAULT;
+
+       /*
+        * sqe->off holds how many events that need to occur for this
+        * timeout event to be satisfied.
+        */
+       req->timeout.count = READ_ONCE(sqe->off);
+       if (!req->timeout.count)
+               req->timeout.count = 1;
+
+       spin_lock_irq(&ctx->completion_lock);
+       list_add_tail(&req->list, &ctx->timeout_list);
+       spin_unlock_irq(&ctx->completion_lock);
+
+       hrtimer_init(&req->timeout.timer, CLOCK_MONOTONIC, HRTIMER_MODE_REL);
+       req->timeout.timer.function = io_timeout_fn;
+       hrtimer_start(&req->timeout.timer, timespec_to_ktime(ts),
+                       HRTIMER_MODE_REL);
+       return 0;
+}
+
 static int io_req_defer(struct io_ring_ctx *ctx, struct io_kiocb *req,
                        const struct io_uring_sqe *sqe)
 {
@@ -1842,6 +1939,9 @@ static int __io_submit_sqe(struct io_ring_ctx *ctx, 
struct io_kiocb *req,
        case IORING_OP_RECVMSG:
                ret = io_recvmsg(req, s->sqe, force_nonblock);
                break;
+       case IORING_OP_TIMEOUT:
+               ret = io_timeout(req, s->sqe);
+               break;
        default:
                ret = -EINVAL;
                break;
@@ -2599,6 +2699,7 @@ static int io_cqring_wait(struct io_ring_ctx *ctx, int 
min_events,
                          const sigset_t __user *sig, size_t sigsz)
 {
        struct io_rings *rings = ctx->rings;
+       unsigned nr_timeouts;
        int ret;
 
        if (io_cqring_events(rings) >= min_events)
@@ -2617,7 +2718,15 @@ static int io_cqring_wait(struct io_ring_ctx *ctx, int 
min_events,
                        return ret;
        }
 
-       ret = wait_event_interruptible(ctx->wait, io_cqring_events(rings) >= 
min_events);
+       nr_timeouts = atomic_read(&ctx->cq_timeouts);
+       /*
+        * Return if we have enough events, or if a timeout occured since
+        * we started waiting. For timeouts, we always want to return to
+        * userspace.
+        */
+       ret = wait_event_interruptible(ctx->wait,
+                               io_cqring_events(rings) >= min_events ||
+                               atomic_read(&ctx->cq_timeouts) != nr_timeouts);
        restore_saved_sigmask_unless(ret == -ERESTARTSYS);
        if (ret == -ERESTARTSYS)
                ret = -EINTR;
@@ -3288,6 +3397,7 @@ static void io_ring_ctx_wait_and_kill(struct io_ring_ctx 
*ctx)
        percpu_ref_kill(&ctx->refs);
        mutex_unlock(&ctx->uring_lock);
 
+       io_kill_timeouts(ctx);
        io_poll_remove_all(ctx);
        io_iopoll_reap_events(ctx);
        wait_for_completion(&ctx->ctx_done);
diff --git a/include/uapi/linux/io_uring.h b/include/uapi/linux/io_uring.h
index 96ee9d94b73e..cf3101dc6b1e 100644
--- a/include/uapi/linux/io_uring.h
+++ b/include/uapi/linux/io_uring.h
@@ -61,6 +61,7 @@ struct io_uring_sqe {
 #define IORING_OP_SYNC_FILE_RANGE      8
 #define IORING_OP_SENDMSG      9
 #define IORING_OP_RECVMSG      10
+#define IORING_OP_TIMEOUT      11
 
 /*
  * sqe->fsync_flags

-- 
Jens Axboe

Re: [PATCH] io_uring: IORING_OP_TIMEOUT support

Reply via email to