On Wed, Jan 24, 2018 at 10:16:18PM -0800, Paul E. McKenney wrote: > On Tue, Jan 23, 2018 at 03:59:26PM +0800, liangli...@huawei.com wrote: > > From: Heng Zhang <hen...@huawei.com> > > > > This RCU implementation (PRCU) is based on a fast consensus protocol > > published in the following paper: > > > > Fast Consensus Using Bounded Staleness for Scalable Read-mostly > > Synchronization. > > Haibo Chen, Heng Zhang, Ran Liu, Binyu Zang, and Haibing Guan. > > IEEE Transactions on Parallel and Distributed Systems (TPDS), 2016. > > https://dl.acm.org/citation.cfm?id=3024114.3024143 > > > > Signed-off-by: Heng Zhang <hen...@huawei.com> > > Signed-off-by: Lihao Liang <liangli...@huawei.com> > > A few comments and questions interspersed. > > Thanx, Paul > > > --- > > include/linux/prcu.h | 37 +++++++++++++++ > > kernel/rcu/Makefile | 2 +- > > kernel/rcu/prcu.c | 125 > > +++++++++++++++++++++++++++++++++++++++++++++++++++ > > kernel/sched/core.c | 2 + > > 4 files changed, 165 insertions(+), 1 deletion(-) > > create mode 100644 include/linux/prcu.h > > create mode 100644 kernel/rcu/prcu.c > > > > diff --git a/include/linux/prcu.h b/include/linux/prcu.h > > new file mode 100644 > > index 00000000..653b4633 > > --- /dev/null > > +++ b/include/linux/prcu.h > > @@ -0,0 +1,37 @@ > > +#ifndef __LINUX_PRCU_H > > +#define __LINUX_PRCU_H > > + > > +#include <linux/atomic.h> > > +#include <linux/mutex.h> > > +#include <linux/wait.h> > > + > > +#define CONFIG_PRCU > > + > > +struct prcu_local_struct { > > + unsigned int locked; > > + unsigned int online; > > + unsigned long long version; > > +}; > > + > > +struct prcu_struct { > > + atomic64_t global_version; > > + atomic_t active_ctr; > > + struct mutex mtx; > > + wait_queue_head_t wait_q; > > +}; > > + > > +#ifdef CONFIG_PRCU > > +void prcu_read_lock(void); > > +void prcu_read_unlock(void); > > +void synchronize_prcu(void); > > +void prcu_note_context_switch(void); > > + > > +#else /* #ifdef CONFIG_PRCU */ > > + > > +#define prcu_read_lock() do {} while (0) > > +#define prcu_read_unlock() do {} while (0) > > +#define synchronize_prcu() do {} while (0) > > +#define prcu_note_context_switch() do {} while (0) > > If CONFIG_PRCU=n and some code is built that uses PRCU, shouldn't you > get a build error rather than an error-free but inoperative PRCU? > > Of course, Peter's question about purpose of the patch set applies > here as well. > > > + > > +#endif /* #ifdef CONFIG_PRCU */ > > +#endif /* __LINUX_PRCU_H */ > > diff --git a/kernel/rcu/Makefile b/kernel/rcu/Makefile > > index 23803c7d..8791419c 100644 > > --- a/kernel/rcu/Makefile > > +++ b/kernel/rcu/Makefile > > @@ -2,7 +2,7 @@ > > # and is generally not a function of system call inputs. > > KCOV_INSTRUMENT := n > > > > -obj-y += update.o sync.o > > +obj-y += update.o sync.o prcu.o > > obj-$(CONFIG_CLASSIC_SRCU) += srcu.o > > obj-$(CONFIG_TREE_SRCU) += srcutree.o > > obj-$(CONFIG_TINY_SRCU) += srcutiny.o > > diff --git a/kernel/rcu/prcu.c b/kernel/rcu/prcu.c > > new file mode 100644 > > index 00000000..a00b9420 > > --- /dev/null > > +++ b/kernel/rcu/prcu.c > > @@ -0,0 +1,125 @@ > > +#include <linux/smp.h> > > +#include <linux/prcu.h> > > +#include <linux/percpu.h> > > +#include <linux/compiler.h> > > +#include <linux/sched.h> > > + > > +#include <asm/barrier.h> > > + > > +DEFINE_PER_CPU_SHARED_ALIGNED(struct prcu_local_struct, prcu_local); > > + > > +struct prcu_struct global_prcu = { > > + .global_version = ATOMIC64_INIT(0), > > + .active_ctr = ATOMIC_INIT(0), > > + .mtx = __MUTEX_INITIALIZER(global_prcu.mtx), > > + .wait_q = __WAIT_QUEUE_HEAD_INITIALIZER(global_prcu.wait_q) > > +}; > > +struct prcu_struct *prcu = &global_prcu; > > + > > +static inline void prcu_report(struct prcu_local_struct *local) > > +{ > > + unsigned long long global_version; > > + unsigned long long local_version; > > + > > + global_version = atomic64_read(&prcu->global_version); > > + local_version = local->version; > > + if (global_version > local_version) > > + cmpxchg(&local->version, local_version, global_version); > > +} > > + > > +void prcu_read_lock(void) > > +{ > > + struct prcu_local_struct *local; > > + > > + local = get_cpu_ptr(&prcu_local); > > + if (!local->online) { > > + WRITE_ONCE(local->online, 1); > > + smp_mb(); > > + } > > + > > + local->locked++; > > + put_cpu_ptr(&prcu_local); > > +} > > +EXPORT_SYMBOL(prcu_read_lock); > > + > > +void prcu_read_unlock(void) > > +{ > > + int locked; > > + struct prcu_local_struct *local; > > + > > + barrier(); > > + local = get_cpu_ptr(&prcu_local); > > + locked = local->locked; > > + if (locked) { > > + local->locked--; > > + if (locked == 1) > > + prcu_report(local); > > Is ordering important here? It looks to me that the compiler could > rearrange some of the accesses within prcu_report() with the local->locked > decrement. There appears to be some potential for load and store tearing, > though perhaps you have verified that your compiler avoids this on > the architecture that you are using. > > > + put_cpu_ptr(&prcu_local); > > + } else { > > Hmmm... We get here if the RCU read-side critical section was preempted. > If none of them are preempted, ->active_ctr remains zero. > > > + put_cpu_ptr(&prcu_local); > > + if (!atomic_dec_return(&prcu->active_ctr)) > > + wake_up(&prcu->wait_q); > > + } > > +} > > +EXPORT_SYMBOL(prcu_read_unlock); > > + > > +static void prcu_handler(void *info) > > +{ > > + struct prcu_local_struct *local; > > + > > + local = this_cpu_ptr(&prcu_local); > > + if (!local->locked)
And I think a smp_mb() is needed here, because in the following case: CPU 0 CPU 1 ================== ========================== {X is initially 0} WRITE_ONCE(X, 1); prcu_read_unlock(void): if (locked) { synchronize_prcu(void): ... <send IPI to CPU 0> local->locked--; # switch to IPI WRITE_ONCE(local->version,....) <read CPU 0 version to be latest> <return> r1 = READ_ONCE(X); r1 could be 0, which breaks RCU guarantees. > > + WRITE_ONCE(local->version, > > atomic64_read(&prcu->global_version)); > > +} > > + > > +void synchronize_prcu(void) > > +{ > > + int cpu; > > + cpumask_t cpus; > > + unsigned long long version; > > + struct prcu_local_struct *local; > > + > > + version = atomic64_add_return(1, &prcu->global_version); > > + mutex_lock(&prcu->mtx); > > + > > + local = get_cpu_ptr(&prcu_local); > > + local->version = version; > > + put_cpu_ptr(&prcu_local); > > + > > + cpumask_clear(&cpus); > > + for_each_possible_cpu(cpu) { > > + local = per_cpu_ptr(&prcu_local, cpu); > > + if (!READ_ONCE(local->online)) > > + continue; > > + if (READ_ONCE(local->version) < version) { > > On 32-bit systems, given that ->version is long long, you might see > load tearing. And on some 32-bit systems, the cmpxchg() in prcu_hander() > might not build. > /me curious about why an atomic64_t is used here for global version. I think maybe 32bit global version still suffices. Regards, Boqun > Or is the idea that only prcu_handler() updates ->version? But in that > case, you wouldn't need the READ_ONCE() above. What am I missing here? > > > + smp_call_function_single(cpu, prcu_handler, NULL, 0); > > + cpumask_set_cpu(cpu, &cpus); > > + } > > + } > > + > > + for_each_cpu(cpu, &cpus) { > > + local = per_cpu_ptr(&prcu_local, cpu); > > + while (READ_ONCE(local->version) < version) > > This ->version read can also tear on some 32-bit systems, and this > one most definitely can race with the prcu_handler() above. Does the > algorithm operate correctly in that case? (It doesn't look that way > to me, but I might be missing something.) Or are 32-bit systems excluded? > > > + cpu_relax(); > > + } > > I might be missing something, but I believe we need a memory barrier > here on non-TSO systems. Without that, couldn't we miss a preemption? > > > + > > + if (atomic_read(&prcu->active_ctr)) > > + wait_event(prcu->wait_q, !atomic_read(&prcu->active_ctr)); > > + > > + mutex_unlock(&prcu->mtx); > > +} > > +EXPORT_SYMBOL(synchronize_prcu); > > + > > +void prcu_note_context_switch(void) > > +{ > > + struct prcu_local_struct *local; > > + > > + local = get_cpu_ptr(&prcu_local); > > + if (local->locked) { > > + atomic_add(local->locked, &prcu->active_ctr); > > + local->locked = 0; > > + } > > + local->online = 0; > > + prcu_report(local); > > + put_cpu_ptr(&prcu_local); > > +} > > diff --git a/kernel/sched/core.c b/kernel/sched/core.c > > index 326d4f88..a308581b 100644 > > --- a/kernel/sched/core.c > > +++ b/kernel/sched/core.c > > @@ -15,6 +15,7 @@ > > #include <linux/init_task.h> > > #include <linux/context_tracking.h> > > #include <linux/rcupdate_wait.h> > > +#include <linux/prcu.h> > > > > #include <linux/blkdev.h> > > #include <linux/kprobes.h> > > @@ -3383,6 +3384,7 @@ static void __sched notrace __schedule(bool preempt) > > > > local_irq_disable(); > > rcu_note_context_switch(preempt); > > + prcu_note_context_switch(); > > > > /* > > * Make sure that signal_pending_state()->signal_pending() below > > -- > > 2.14.1.729.g59c0ea183 > > >
signature.asc
Description: PGP signature