On 05/06/2016 05:20 PM, Waiman Long wrote:
> Currently, it is not possible to determine for sure if a reader
> owns a rwsem by looking at the content of the rwsem data structure.
> This patch adds a new state RWSEM_READER_OWNED to the owner field
> to indicate that readers currently own the lock. This enables us to
> address the following 2 issues in the rwsem optimistic spinning code:
> 
>  1) rwsem_can_spin_on_owner() will disallow optimistic spinning if
>     the owner field is NULL which can mean either the readers own
>     the lock or the owning writer hasn't set the owner field yet.
>     In the latter case, we miss the chance to do optimistic spinning.
> 
>  2) While a writer is spinning and a reader takes the lock, the writer
>     will continue to spin in the main rwsem_optimistic_spin() loop as
>     the owner is NULL.
> 
> Adding the new state will allow optimistic spinning to go forward as
> long as the owner field is not RWSEM_READER_OWNED and the owner is
> running, if set, but stop immediately when that state has been reached.

Really good idea.
Some comments below.


> On a 4-socket Haswell machine running on a 4.6-rc1 based kernel, the
> fio test with multithreaded randrw and randwrite tests on the same
> file on a XFS partition on top of a NVDIMM were run, the aggregated
> bandwidths before and after the patch were as follows:
> 
>   Test      BW before patch     BW after patch  % change
>   ----      ---------------     --------------  --------
>   randrw         988 MB/s          1192 MB/s      +21%
>   randwrite     1513 MB/s          1623 MB/s      +7.3%
> 
> The perf profile of the rwsem_down_write_failed() function in randrw
> before and after the patch were:
> 
>    19.95%  5.88%  fio  [kernel.vmlinux]  [k] rwsem_down_write_failed
>    14.20%  1.52%  fio  [kernel.vmlinux]  [k] rwsem_down_write_failed
> 
> The actual CPU cycles spend in rwsem_down_write_failed() dropped from
> 5.88% to 1.52% after the patch.
> 
> The xfstests was also run and no regression was observed.
> 
> Signed-off-by: Waiman Long <waiman.l...@hpe.com>
> ---
>  v1->v2:
>   - Add rwsem_is_reader_owned() helper & rename rwsem_reader_owned()
>     to rwsem_set_reader_owned().
>   - Add more comments to clarify the purpose of some of the code
>     changes.
> 
>  kernel/locking/rwsem-xadd.c |   39 ++++++++++++++++++---------------------
>  kernel/locking/rwsem.c      |    8 ++++++--
>  kernel/locking/rwsem.h      |   41 +++++++++++++++++++++++++++++++++++++++++
>  3 files changed, 65 insertions(+), 23 deletions(-)
> 
> diff --git a/kernel/locking/rwsem-xadd.c b/kernel/locking/rwsem-xadd.c
> index df4dcb8..620a286 100644
> --- a/kernel/locking/rwsem-xadd.c
> +++ b/kernel/locking/rwsem-xadd.c
> @@ -155,6 +155,12 @@ __rwsem_do_wake(struct rw_semaphore *sem, enum 
> rwsem_wake_type wake_type)
>                       /* Last active locker left. Retry waking readers. */
>                       goto try_reader_grant;
>               }
> +             /*
> +              * It is not really necessary to set it to reader-owned here,
> +              * but it gives the spinners an early indication that the
> +              * readers now have the lock.
> +              */
> +             rwsem_set_reader_owned(sem);
>       }
>  
>       /* Grant an infinite number of read locks to the readers at the front
> @@ -306,16 +312,11 @@ static inline bool rwsem_can_spin_on_owner(struct 
> rw_semaphore *sem)
>  
>       rcu_read_lock();
>       owner = READ_ONCE(sem->owner);
> -     if (!owner) {
> -             long count = READ_ONCE(sem->count);
> +     if (!rwsem_is_writer_owned(owner)) {
>               /*
> -              * If sem->owner is not set, yet we have just recently entered 
> the
> -              * slowpath with the lock being active, then there is a 
> possibility
> -              * reader(s) may have the lock. To be safe, bail spinning in 
> these
> -              * situations.
> +              * Don't spin if the rwsem is readers owned.
>                */
> -             if (count & RWSEM_ACTIVE_MASK)
> -                     ret = false;
> +             ret = !rwsem_is_reader_owned(owner);
>               goto done;
>       }

I'm not a big fan of all the helpers; istm like they're obfuscating the more
important requirements of rwsem. For example, this reduces to

        rcu_read_lock();
        owner = READ_ONCE(sem->owner);
        ret = !owner || (rwsem_is_writer_owned(owner) && owner->on_cpu);
        rcu_read_unlock();
        return ret;

  
> @@ -328,8 +329,6 @@ done:
>  static noinline
>  bool rwsem_spin_on_owner(struct rw_semaphore *sem, struct task_struct *owner)
>  {
> -     long count;
> -
>       rcu_read_lock();
>       while (sem->owner == owner) {
>               /*
> @@ -350,16 +349,11 @@ bool rwsem_spin_on_owner(struct rw_semaphore *sem, 
> struct task_struct *owner)
>       }
>       rcu_read_unlock();
>  
> -     if (READ_ONCE(sem->owner))
> -             return true; /* new owner, continue spinning */
> -
>       /*
> -      * When the owner is not set, the lock could be free or
> -      * held by readers. Check the counter to verify the
> -      * state.
> +      * If there is a new owner or the owner is not set, we continue
> +      * spinning.
>        */
> -     count = READ_ONCE(sem->count);
> -     return (count == 0 || count == RWSEM_WAITING_BIAS);
> +     return !rwsem_is_reader_owned(READ_ONCE(sem->owner));

It doesn't make sense to force reload sem->owner here; if sem->owner
is not being reloaded then the loop above will execute forever.

Arguably, this check should be bumped out to the optimistic spin and
reload/check the owner there?

Or better yet; don't pass the owner in as a parameter at all, but
instead snapshot the owner and check its ownership on entry.

Because see below...

>  }
>  
>  static bool rwsem_optimistic_spin(struct rw_semaphore *sem)
> @@ -378,7 +372,8 @@ static bool rwsem_optimistic_spin(struct rw_semaphore 
> *sem)
>  
>       while (true) {
>               owner = READ_ONCE(sem->owner);
> -             if (owner && !rwsem_spin_on_owner(sem, owner))
> +             if (rwsem_is_writer_owned(owner) &&
> +                !rwsem_spin_on_owner(sem, owner))
>                       break;
>  
>               /* wait_lock will be acquired if write_lock is obtained */
> @@ -391,9 +386,11 @@ static bool rwsem_optimistic_spin(struct rw_semaphore 
> *sem)
>                * When there's no owner, we might have preempted between the
>                * owner acquiring the lock and setting the owner field. If
>                * we're an RT task that will live-lock because we won't let
> -              * the owner complete.
> +              * the owner complete. We also quit if the lock is owned by
> +              * readers.
>                */
> -             if (!owner && (need_resched() || rt_task(current)))
> +             if (rwsem_is_reader_owned(owner) ||
> +                (!owner && (need_resched() || rt_task(current))))

This is using the stale owner value that was cached before spinning on the 
owner;
That can't be right.

Regards,
Peter Hurley

>                       break;
>  
>               /*
> diff --git a/kernel/locking/rwsem.c b/kernel/locking/rwsem.c
> index c817216..5838f56 100644
> --- a/kernel/locking/rwsem.c
> +++ b/kernel/locking/rwsem.c
> @@ -22,6 +22,7 @@ void __sched down_read(struct rw_semaphore *sem)
>       rwsem_acquire_read(&sem->dep_map, 0, 0, _RET_IP_);
>  
>       LOCK_CONTENDED(sem, __down_read_trylock, __down_read);
> +     rwsem_set_reader_owned(sem);
>  }
>  
>  EXPORT_SYMBOL(down_read);
> @@ -33,8 +34,10 @@ int down_read_trylock(struct rw_semaphore *sem)
>  {
>       int ret = __down_read_trylock(sem);
>  
> -     if (ret == 1)
> +     if (ret == 1) {
>               rwsem_acquire_read(&sem->dep_map, 0, 1, _RET_IP_);
> +             rwsem_set_reader_owned(sem);
> +     }
>       return ret;
>  }
>  
> @@ -124,7 +127,7 @@ void downgrade_write(struct rw_semaphore *sem)
>        * lockdep: a downgraded write will live on as a write
>        * dependency.
>        */
> -     rwsem_clear_owner(sem);
> +     rwsem_set_reader_owned(sem);
>       __downgrade_write(sem);
>  }
>  
> @@ -138,6 +141,7 @@ void down_read_nested(struct rw_semaphore *sem, int 
> subclass)
>       rwsem_acquire_read(&sem->dep_map, subclass, 0, _RET_IP_);
>  
>       LOCK_CONTENDED(sem, __down_read_trylock, __down_read);
> +     rwsem_set_reader_owned(sem);
>  }
>  
>  EXPORT_SYMBOL(down_read_nested);
> diff --git a/kernel/locking/rwsem.h b/kernel/locking/rwsem.h
> index 870ed9a..d7fea18 100644
> --- a/kernel/locking/rwsem.h
> +++ b/kernel/locking/rwsem.h
> @@ -1,3 +1,20 @@
> +/*
> + * The owner field of the rw_semaphore structure will be set to
> + * RWSEM_READ_OWNED when a reader grabs the lock. A writer will clear
> + * the owner field when it unlocks. A reader, on the other hand, will
> + * not touch the owner field when it unlocks.
> + *
> + * In essence, the owner field now has the following 3 states:
> + *  1) 0
> + *     - lock is free or the owner hasn't set the field yet
> + *  2) RWSEM_READER_OWNED
> + *     - lock is currently or previously owned by readers (lock is free
> + *       or not set by owner yet)
> + *  3) Other non-zero value
> + *     - a writer owns the lock
> + */
> +#define RWSEM_READER_OWNED   1UL
> +
>  #ifdef CONFIG_RWSEM_SPIN_ON_OWNER
>  static inline void rwsem_set_owner(struct rw_semaphore *sem)
>  {
> @@ -9,6 +26,26 @@ static inline void rwsem_clear_owner(struct rw_semaphore 
> *sem)
>       sem->owner = NULL;
>  }
>  
> +static inline void rwsem_set_reader_owned(struct rw_semaphore *sem)
> +{
> +     /*
> +      * We check the owner value first to make sure that we will only
> +      * do a write to the rwsem cacheline when it is really necessary
> +      * to minimize cacheline contention.
> +      */
> +     if (sem->owner != (struct task_struct *)RWSEM_READER_OWNED)
> +             sem->owner = (struct task_struct *)RWSEM_READER_OWNED;
> +}
> +
> +static inline bool rwsem_is_writer_owned(struct task_struct *owner)
> +{
> +     return (unsigned long)owner > RWSEM_READER_OWNED;
> +}
> +
> +static inline bool rwsem_is_reader_owned(struct task_struct *owner)
> +{
> +     return owner == (struct task_struct *)RWSEM_READER_OWNED;
> +}
>  #else
>  static inline void rwsem_set_owner(struct rw_semaphore *sem)
>  {
> @@ -17,4 +54,8 @@ static inline void rwsem_set_owner(struct rw_semaphore *sem)
>  static inline void rwsem_clear_owner(struct rw_semaphore *sem)
>  {
>  }
> +
> +static inline void rwsem_set_reader_owned(struct rw_semaphore *sem)
> +{
> +}
>  #endif
> 

Reply via email to