From: Stanislav Kinsburskiy <skinsbur...@virtuozzo.com> This adds ioctl, which allows to set ring buffer tail and to wait till aio requests are finished.
v2: Add pseudosuper check https://jira.sw.ru/browse/PSBM-42488 Signed-off-by: Kirill Tkhai <ktk...@virtuozzo.com> Reviewed-by: Cyrill Gorcunov <gorcu...@openvz.org> khorenko@: we don't support migration of incomplete aio requests https://jira.sw.ru/browse/PSBM-41425 so using added instruments we wait till all AIO requests are completed and migrate the results (AIO req contexts with status). ====================================== ve/aio: Enumerate ioctl numbers right Do not use common used numbers, use custom. Also, make error codes different. https://jira.sw.ru/browse/PSBM-42488 Signed-off-by: Kirill Tkhai <ktk...@virtuozzo.com> Acked-by: Cyrill Gorcunov <gorcu...@openvz.org> ====================================== ve/aio: Kill ve_aio_set_tail() Since tail is restored using submitting requests to write in /dev/null, we do not need this interface anymore. https://jira.sw.ru/browse/PSBM-42488 Signed-off-by: Kirill Tkhai <ktk...@virtuozzo.com> Acked-by: Cyrill Gorcunov <gorcu...@openvz.org> ====================================== ve/aio: Wait for all inflight AIO reqs of a task Make it wait all task's AIO contexts instead of a single AIO request. This minimizes the number of syscall we do to dump aios. https://jira.sw.ru/browse/PSBM-42488 Signed-off-by: Kirill Tkhai <ktk...@virtuozzo.com> Acked-by: Cyrill Gorcunov <gorcu...@openvz.org> ====================================== Ported with respect to ms commits: 34e83fc ("aio: reqs_active -> reqs_available") 723be6e ("aio: percpu ioctx refcount") db446a0 ("aio: convert the ioctx list to table lookup v3") https://jira.sw.ru/browse/PSBM-123159 Signed-off-by: Alexander Mikhalitsyn <alexander.mikhalit...@virtuozzo.com> +++ aio: ioctl(VE_AIO_IOC_WAIT_ACTIVE) in-flight reqs counting fix We have to take into account percpu part of reqs_available counter on struct kioctx. mFixes: f5d1279 ("ve/aio: Add a handle to checkpoint/restore AIO context") https://jira.sw.ru/browse/PSBM-128710 Signed-off-by: Alexander Mikhalitsyn <alexander.mikhalit...@virtuozzo.com> Reviewed-by: Pavel Tikhomirov <ptikhomi...@virtuozzo.com> (cherry-picked from vz8 commit c04e652e2451 ("ve/aio: Add a handle to checkpoint/restore AIO context")) Signed-off-by: Nikita Yushchenko <nikita.yushche...@virtuozzo.com> --- fs/aio.c | 92 +++++++++++++++++++++++++++++++++++++++++++++ fs/proc/base.c | 27 +++++++++++++ include/linux/aio.h | 13 +++++++ 3 files changed, 132 insertions(+) diff --git a/fs/aio.c b/fs/aio.c index d2e99e348b7a..779a528bcc25 100644 --- a/fs/aio.c +++ b/fs/aio.c @@ -22,6 +22,7 @@ #include <linux/refcount.h> #include <linux/uio.h> +#include <linux/sched/mm.h> #include <linux/sched/signal.h> #include <linux/fs.h> #include <linux/file.h> @@ -2274,3 +2275,94 @@ COMPAT_SYSCALL_DEFINE6(io_pgetevents_time64, return ret; } #endif + +#ifdef CONFIG_VE +static bool has_reqs_active(struct kioctx *ctx) +{ + unsigned long flags; + unsigned nr; + int cpu; + unsigned reqs_avail_batch = 0; + + spin_lock_irqsave(&ctx->completion_lock, flags); + /* + * See get_reqs_available()/put_reqs_available() about + * how reqs_available distributed between atomic + * ctx->reqs_available and percpu ctx->cpu reqs_available. + */ + for_each_possible_cpu(cpu) + reqs_avail_batch += per_cpu_ptr(ctx->cpu, cpu)->reqs_available; + nr = ctx->nr_events - 1; + nr -= atomic_read(&ctx->reqs_available) + reqs_avail_batch; + nr -= ctx->completed_events; + spin_unlock_irqrestore(&ctx->completion_lock, flags); + + return !!nr; +} + +static int ve_aio_wait_inflight_reqs(struct task_struct *p) +{ + struct mm_struct *mm; + struct kioctx_table *table; + int ret, i; + + if (p->flags & PF_KTHREAD) + return -EINVAL; + + task_lock(p); + mm = p->mm; + if (mm) + atomic_inc(&mm->mm_count); + task_unlock(p); + if (!mm) + return -ESRCH; + +again: + spin_lock_irq(&mm->ioctx_lock); + rcu_read_lock(); + table = rcu_dereference(mm->ioctx_table); + for (i = 0; i < table->nr; i++) { + struct kioctx *ctx; + + ctx = rcu_dereference(table->table[i]); + if (!ctx) + continue; + + if (!has_reqs_active(ctx)) + continue; + + percpu_ref_get(&ctx->users); + rcu_read_unlock(); + spin_unlock_irq(&mm->ioctx_lock); + + ret = wait_event_interruptible(ctx->wait, !has_reqs_active(ctx)); + percpu_ref_put(&ctx->users); + + if (ret) + goto mmdrop; + goto again; + } + + rcu_read_unlock(); + spin_unlock_irq(&mm->ioctx_lock); + ret = 0; +mmdrop: + mmdrop(mm); + return ret; +} + +int ve_aio_ioctl(struct task_struct *task, unsigned int cmd, unsigned long arg) +{ + int ret; + + switch (cmd) { + case VE_AIO_IOC_WAIT_ACTIVE: + ret = ve_aio_wait_inflight_reqs(task); + break; + default: + ret = -EINVAL; + } + + return ret; +} +#endif diff --git a/fs/proc/base.c b/fs/proc/base.c index 2c25b9039a4c..6fb9575976a6 100644 --- a/fs/proc/base.c +++ b/fs/proc/base.c @@ -95,6 +95,7 @@ #include <linux/posix-timers.h> #include <linux/time_namespace.h> #include <linux/resctrl.h> +#include <linux/aio.h> #include <trace/events/oom.h> #include "internal.h" #include "fd.h" @@ -2601,6 +2602,29 @@ static const struct file_operations proc_pid_set_timerslack_ns_operations = { .release = single_release, }; +#ifdef CONFIG_VE +static long proc_aio_ioctl(struct file *file, unsigned int cmd, unsigned long arg) +{ + struct inode *inode = file_inode(file); + struct task_struct *task; + int ret; + + task = get_proc_task(inode); + if (!task) + return -ESRCH; + + ret = ve_aio_ioctl(task, cmd, arg); + + put_task_struct(task); + + return ret; +} + +static const struct file_operations proc_aio_operations = { + .unlocked_ioctl = proc_aio_ioctl, +}; +#endif /* CONFIG_VE */ + static struct dentry *proc_pident_instantiate(struct dentry *dentry, struct task_struct *task, const void *ptr) { @@ -3272,6 +3296,9 @@ static const struct pid_entry tgid_base_stuff[] = { REG("timers", S_IRUGO, proc_timers_operations), #endif REG("timerslack_ns", S_IRUGO|S_IWUGO, proc_pid_set_timerslack_ns_operations), +#ifdef CONFIG_CHECKPOINT_RESTORE + REG("aio", S_IRUGO|S_IWUSR, proc_aio_operations), +#endif #ifdef CONFIG_LIVEPATCH ONE("patch_state", S_IRUSR, proc_pid_patch_state), #endif diff --git a/include/linux/aio.h b/include/linux/aio.h index 4b7a331156ff..ccaaae0db31d 100644 --- a/include/linux/aio.h +++ b/include/linux/aio.h @@ -12,14 +12,27 @@ typedef int (kiocb_cancel_fn)(struct kiocb *); #define AIO_MAX_NR_DEFAULT 0x10000 +struct ve_ioc_arg +{ + aio_context_t ctx_id; + unsigned val; +}; + +#define VE_AIO_IOC_WAIT_ACTIVE _IOW('a', 1, struct ve_ioc_arg) + /* prototypes */ #ifdef CONFIG_AIO extern void exit_aio(struct mm_struct *mm); void kiocb_set_cancel_fn(struct kiocb *req, kiocb_cancel_fn *cancel); +#ifdef CONFIG_VE +int ve_aio_ioctl(struct task_struct *, unsigned int, unsigned long); +#endif #else static inline void exit_aio(struct mm_struct *mm) { } static inline void kiocb_set_cancel_fn(struct kiocb *req, kiocb_cancel_fn *cancel) { } +static int ve_aio_ioctl(struct task_struct *task, unsigned int cmd, + unsigned long arg) { return 0; } #endif /* CONFIG_AIO */ #endif /* __LINUX__AIO_H */ -- 2.30.2 _______________________________________________ Devel mailing list Devel@openvz.org https://lists.openvz.org/mailman/listinfo/devel