This microbenchmark simulates how the use of different futex types
can affect the actual performanace of userspace rwlock locks. The
usage is:

        perf bench futex rwlock <options>

Three sets of simple rwlock lock and unlock functions are implemented
using the wait-wake, TP futexes and glibc rwlock respectively. This
microbenchmark then runs the locking rate measurement tests using
either one of those rwlocks or all of them consecutively.

Signed-off-by: Waiman Long <long...@redhat.com>
---
 tools/perf/bench/bench.h       |   1 +
 tools/perf/bench/futex-locks.c | 842 ++++++++++++++++++++++++++++++++++++++++-
 tools/perf/bench/futex.h       |  24 +-
 tools/perf/builtin-bench.c     |   1 +
 4 files changed, 850 insertions(+), 18 deletions(-)

diff --git a/tools/perf/bench/bench.h b/tools/perf/bench/bench.h
index b0632df..a7e2037 100644
--- a/tools/perf/bench/bench.h
+++ b/tools/perf/bench/bench.h
@@ -37,6 +37,7 @@
 /* pi futexes */
 int bench_futex_lock_pi(int argc, const char **argv, const char *prefix);
 int bench_futex_mutex(int argc, const char **argv, const char *prefix);
+int bench_futex_rwlock(int argc, const char **argv, const char *prefix);
 
 #define BENCH_FORMAT_DEFAULT_STR       "default"
 #define BENCH_FORMAT_DEFAULT           0
diff --git a/tools/perf/bench/futex-locks.c b/tools/perf/bench/futex-locks.c
index ec1e627..4008691 100644
--- a/tools/perf/bench/futex-locks.c
+++ b/tools/perf/bench/futex-locks.c
@@ -10,6 +10,12 @@
  * unlock functions are written to implenment a mutex lock using the
  * wait-wake, PI and TP futexes respectively. These functions serve as the
  * basis for measuring the locking throughput.
+ *
+ * Three sets of simple reader/writer lock and unlock functions are also
+ * implemented using the wait-wake and TP futexes as well as the Glibc
+ * rwlock respectively for performance measurement purpose. The TP futex
+ * writer lock/unlock functions are the same as that of the mutex functions.
+ * That is not the case for the wait-wake futexes.
  */
 
 #include <pthread.h>
@@ -21,11 +27,13 @@
 #include <subcmd/parse-options.h>
 #include <linux/compiler.h>
 #include <linux/kernel.h>
+#include <asm/byteorder.h>
 #include <errno.h>
 #include "bench.h"
 #include "futex.h"
 
 #include <err.h>
+#include <limits.h>
 #include <stdlib.h>
 #include <sys/time.h>
 
@@ -42,15 +50,25 @@
  */
 enum {
        STAT_OPS,       /* # of exclusive locking operations    */
+       STAT_SOPS,      /* # of shared locking operations       */
        STAT_LOCKS,     /* # of exclusive lock futex calls      */
        STAT_UNLOCKS,   /* # of exclusive unlock futex calls    */
        STAT_SLEEPS,    /* # of exclusive lock sleeps           */
+       STAT_SLOCKS,    /* # of shared lock futex calls         */
+       STAT_SUNLOCKS,  /* # of shared unlock futex calls       */
+       STAT_SSLEEPS,   /* # of shared lock sleeps              */
        STAT_EAGAINS,   /* # of EAGAIN errors                   */
        STAT_WAKEUPS,   /* # of wakeups (unlock return)         */
        STAT_HANDOFFS,  /* # of lock handoff (TP only)          */
        STAT_STEALS,    /* # of lock steals (TP only)           */
+       STAT_SLRETRIES, /* # of shared lock retries             */
+       STAT_SURETRIES, /* # of shared unlock retries           */
+       STAT_SALONES,   /* # of shared locking alone ops        */
+       STAT_SGROUPS,   /* # of shared locking group ops        */
        STAT_LOCKERRS,  /* # of exclusive lock errors           */
        STAT_UNLKERRS,  /* # of exclusive unlock errors         */
+       STAT_SLOCKERRS, /* # of shared lock errors              */
+       STAT_SUNLKERRS, /* # of shared unlock errors            */
        STAT_NUM        /* Total # of statistical count         */
 };
 
@@ -60,6 +78,8 @@ enum {
 enum {
        TIME_LOCK,      /* Total exclusive lock syscall time    */
        TIME_UNLK,      /* Total exclusive unlock syscall time  */
+       TIME_SLOCK,     /* Total shared lock syscall time       */
+       TIME_SUNLK,     /* Total shared unlock syscall time     */
        TIME_NUM,
 };
 
@@ -102,7 +122,11 @@ struct worker {
 static unsigned int threads_stopping;
 static struct stats throughput_stats;
 static lock_fn_t mutex_lock_fn;
+static lock_fn_t read_lock_fn;
+static lock_fn_t write_lock_fn;
 static unlock_fn_t mutex_unlock_fn;
+static unlock_fn_t read_unlock_fn;
+static unlock_fn_t write_unlock_fn;
 
 /*
  * Lock/unlock syscall time macro
@@ -150,6 +174,26 @@ static inline void stat_inc(int tid __maybe_unused, int 
item __maybe_unused)
 #endif
 
 /*
+ * For rwlock, the default is to have each thread acts as both reader and
+ * writer. The use of the -p option will force the use of separate threads
+ * for readers and writers. The numbers of reader and writer threads are
+ * determined by reader percentage and the total number of threads used.
+ * So the actual ratio of reader and writer operations may not be close
+ * to the given reader percentage.
+ */
+static bool xthread;
+static bool pwriter;                   /* Prefer writer flag */
+static int rthread_threshold = -1;     /* (tid < threshold) => reader */
+static unsigned int rpercent = 50;     /* Reader percentage */
+
+/*
+ * Glibc rwlock
+ */
+static pthread_rwlock_t __cacheline_aligned rwlock;
+static pthread_rwlockattr_t rwlock_attr;
+static bool rwlock_inited, rwlock_attr_inited;
+
+/*
  * The latency value within a lock critical section (load) and between locking
  * operations is in term of the number of cpu_relax() calls that are being
  * issued.
@@ -172,6 +216,27 @@ static inline void stat_inc(int tid __maybe_unused, int 
item __maybe_unused)
        NULL
 };
 
+static const struct option rwlock_options[] = {
+       OPT_INTEGER ('d', "locklat",    &locklat,  "Specify inter-locking 
latency (default = 1)"),
+       OPT_STRING  ('f', "ftype",      &ftype,    "type", "Specify futex type: 
WW, TP, GC, all (default)"),
+       OPT_INTEGER ('L', "loadlat",    &loadlat,  "Specify load latency 
(default = 1)"),
+       OPT_UINTEGER('R', "read-%",     &rpercent, "Specify reader percentage 
(default 50%)"),
+       OPT_UINTEGER('r', "runtime",    &nsecs,    "Specify runtime (in 
seconds, default = 10s)"),
+       OPT_BOOLEAN ('S', "shared",     &fshared,  "Use shared futexes instead 
of private ones"),
+       OPT_BOOLEAN ('T', "timestat",   &timestat, "Track lock/unlock syscall 
times"),
+       OPT_UINTEGER('t', "threads",    &nthreads, "Specify number of threads, 
default = # of CPUs"),
+       OPT_BOOLEAN ('v', "verbose",    &verbose,  "Verbose mode: display 
thread-level details"),
+       OPT_BOOLEAN ('W', "prefer-wr",  &pwriter,  "Prefer writers instead of 
readers"),
+       OPT_INTEGER ('w', "wait-ratio", &wratio,   "Specify <n>/1024 of load is 
1us sleep, default = 0"),
+       OPT_BOOLEAN ('x', "xthread",    &xthread,  "Use separate reader/writer 
threads"),
+       OPT_END()
+};
+
+static const char * const bench_futex_rwlock_usage[] = {
+       "perf bench futex rwlock <options>",
+       NULL
+};
+
 /**
  * futex_cmpxchg() - atomic compare and exchange
  * @uaddr:     The address of the futex to be modified
@@ -221,6 +286,17 @@ static inline int atomic_inc_return(futex_t *uaddr)
        return __sync_add_and_fetch(uaddr, 1);
 }
 
+/**
+ * atomic_add_return - atomically add a number & return the new value
+ * @uaddr:      The address of the futex to be added
+ * @val  :     The integer value to be added
+ * Return:     The new value
+ */
+static inline int atomic_add_return(futex_t *uaddr, int val)
+{
+       return  __sync_add_and_fetch(uaddr, val);
+}
+
 /**********************[ MUTEX lock/unlock functions ]*********************/
 
 /*
@@ -502,6 +578,498 @@ static void tp_mutex_unlock(futex_t *futex, int tid)
                stat_add(tid, STAT_WAKEUPS, ret);
 }
 
+/**********************[ RWLOCK lock/unlock functions ]********************/
+
+/*
+ * Wait-wake futex reader/writer lock/unlock functions
+ *
+ * This implementation is based on the reader-preferring futex eventcount
+ * rwlocks posted on http://locklessinc.com/articles/sleeping_rwlocks with
+ * some modification.
+ *
+ * It is assumed the passed-in futex have sufficient trailing space to
+ * be used by the bigger reader/writer lock structure.
+ */
+#ifdef __LITTLE_ENDIAN
+#define LSB(field)     unsigned char field
+#else
+#define LSB(field)     struct {                        \
+                               unsigned char __pad[3]; \
+                               unsigned char field;    \
+                       }
+#endif
+
+#define RW_WLOCKED     (1U << 0)
+#define RW_READER      (1U << 8)
+#define RW_EC_CONTEND  (1U << 0)
+#define RW_EC_INC      (1U << 8)
+
+struct rwlock {
+       /*
+        * Bits 0-7 : writer lock
+        * Bits 8-31: reader count
+        */
+       union {
+               futex_t val;
+               LSB(wlocked);
+       } lock;
+
+       /* Writer event count */
+       union {
+               futex_t val;
+               LSB(contend);
+       } write_ec;
+
+       /* Reader event count */
+       union {
+               futex_t val;
+               LSB(contend);
+       } read_ec;
+};
+
+static struct rwlock __cacheline_aligned rwfutex;
+
+/*
+ * Reader preferring rwlock functions
+ */
+static void ww_write_lock(futex_t *futex __maybe_unused, int tid)
+{
+       struct timespec stime, etime;
+       struct rwlock *rw = &rwfutex;
+
+       for (;;) {
+               futex_t ec = rw->write_ec.val | RW_EC_CONTEND;
+               int ret;
+
+               /* Set the write lock if there is no reader */
+               if (!rw->lock.val &&
+                  (futex_cmpxchg(&rw->lock.val, 0, RW_WLOCKED) == 0))
+                       return;
+
+               rw->write_ec.contend = 1;
+
+               if (timestat) {
+                       clock_gettime(CLOCK_REALTIME, &stime);
+                       ret = futex_wait(&rw->write_ec.val, ec, NULL, flags);
+                       clock_gettime(CLOCK_REALTIME, &etime);
+                       systime_add(tid, TIME_LOCK, &stime, &etime);
+               } else {
+                       ret = futex_wait(&rw->write_ec.val, ec, NULL, flags);
+               }
+
+               stat_inc(tid, STAT_LOCKS);
+               if (ret < 0) {
+                       if (errno == EAGAIN)
+                               stat_inc(tid, STAT_EAGAINS);
+                       else
+                               stat_inc(tid, STAT_LOCKERRS);
+               }
+
+               /* Other writers may exist */
+               rw->write_ec.contend = 1;
+       }
+}
+
+static void ww_write_unlock(futex_t *futex __maybe_unused, int tid)
+{
+       struct timespec stime, etime;
+       struct rwlock *rw = &rwfutex;
+       int ret;
+
+       rw->lock.wlocked = 0;   /* Unlock */
+
+       /* Wake all the readers */
+       atomic_add_return(&rw->read_ec.val, RW_EC_INC);
+       if (rw->read_ec.contend) {
+               rw->read_ec.contend = 0;
+               if (timestat) {
+                       clock_gettime(CLOCK_REALTIME, &stime);
+                       ret = futex_wake(&rw->read_ec.val, INT_MAX, flags);
+                       clock_gettime(CLOCK_REALTIME, &etime);
+                       systime_add(tid, TIME_UNLK, &stime, &etime);
+               } else {
+                       ret = futex_wake(&rw->read_ec.val, INT_MAX, flags);
+               }
+               stat_inc(tid, STAT_UNLOCKS);
+               if (ret < 0)
+                       stat_inc(tid, STAT_UNLKERRS);
+               else
+                       stat_add(tid, STAT_WAKEUPS, ret);
+               if (ret > 0)
+                       return;
+       }
+
+       /* Wake a writer */
+       atomic_add_return(&rw->write_ec.val, RW_EC_INC);
+       if (rw->write_ec.contend) {
+               rw->write_ec.contend = 0;
+               if (timestat) {
+                       clock_gettime(CLOCK_REALTIME, &stime);
+                       ret = futex_wake(&rw->write_ec.val, 1, flags);
+                       clock_gettime(CLOCK_REALTIME, &etime);
+                       systime_add(tid, TIME_UNLK, &stime, &etime);
+               } else {
+                       ret = futex_wake(&rw->write_ec.val, 1, flags);
+               }
+               stat_inc(tid, STAT_UNLOCKS);
+               if (ret < 0)
+                       stat_inc(tid, STAT_UNLKERRS);
+               else
+                       stat_add(tid, STAT_WAKEUPS, ret);
+       }
+}
+
+static void ww_read_lock(futex_t *futex __maybe_unused, int tid)
+{
+       struct timespec stime, etime;
+       struct rwlock *rw = &rwfutex;
+       futex_t ec = rw->read_ec.val, state;
+       int ret;
+
+       state = atomic_add_return(&rw->lock.val, RW_READER);
+
+       while (state & RW_WLOCKED) {
+               ec |= RW_EC_CONTEND;
+               rw->read_ec.contend = 1;
+
+               /* Sleep until no longer held by a writer */
+               if (timestat) {
+                       clock_gettime(CLOCK_REALTIME, &stime);
+                       ret = futex_wait(&rw->read_ec.val, ec, NULL, flags);
+                       clock_gettime(CLOCK_REALTIME, &etime);
+                       systime_add(tid, TIME_SLOCK, &stime, &etime);
+               } else {
+                       ret = futex_wait(&rw->read_ec.val, ec, NULL, flags);
+               }
+               stat_inc(tid, STAT_SLOCKS);
+               if (ret < 0) {
+                       if (errno == EAGAIN)
+                               stat_inc(tid, STAT_EAGAINS);
+                       else
+                               stat_inc(tid, STAT_SLOCKERRS);
+               }
+
+               ec = rw->read_ec.val;
+               barrier();
+               state = rw->lock.val;
+       }
+}
+
+static void ww_read_unlock(futex_t *futex __maybe_unused, int tid)
+{
+       struct timespec stime, etime;
+       struct rwlock *rw = &rwfutex;
+       futex_t state;
+       int ret;
+
+       /* Read unlock */
+       state = atomic_add_return(&rw->lock.val, -RW_READER);
+
+       /* Other readers there, don't do anything */
+       if (state >> 8)
+               return;
+
+       /* We may need to wake up a writer */
+       atomic_add_return(&rw->write_ec.val, RW_EC_INC);
+       if (rw->write_ec.contend) {
+               rw->write_ec.contend = 0;
+               if (timestat) {
+                       clock_gettime(CLOCK_REALTIME, &stime);
+                       ret = futex_wake(&rw->write_ec.val, 1, flags);
+                       clock_gettime(CLOCK_REALTIME, &etime);
+                       systime_add(tid, TIME_SUNLK, &stime, &etime);
+               } else {
+                       ret = futex_wake(&rw->write_ec.val, 1, flags);
+               }
+               stat_inc(tid, STAT_SUNLOCKS);
+               if (ret < 0)
+                       stat_inc(tid, STAT_SUNLKERRS);
+               else
+                       stat_add(tid, STAT_WAKEUPS, ret);
+       }
+}
+
+/*
+ * Writer perferring rwlock functions
+ */
+#define ww2_write_lock ww_write_lock
+#define ww2_read_unlock        ww_read_unlock
+
+static void ww2_write_unlock(futex_t *futex __maybe_unused, int tid)
+{
+       struct timespec stime, etime;
+       struct rwlock *rw = &rwfutex;
+       int ret;
+
+       rw->lock.wlocked = 0;   /* Unlock */
+
+       /* Wake a writer */
+       atomic_add_return(&rw->write_ec.val, RW_EC_INC);
+       if (rw->write_ec.contend) {
+               rw->write_ec.contend = 0;
+               if (timestat) {
+                       clock_gettime(CLOCK_REALTIME, &stime);
+                       ret = futex_wake(&rw->write_ec.val, 1, flags);
+                       clock_gettime(CLOCK_REALTIME, &etime);
+                       systime_add(tid, TIME_UNLK, &stime, &etime);
+               } else {
+                       ret = futex_wake(&rw->write_ec.val, 1, flags);
+               }
+               stat_inc(tid, STAT_UNLOCKS);
+               if (ret < 0)
+                       stat_inc(tid, STAT_UNLKERRS);
+               else
+                       stat_add(tid, STAT_WAKEUPS, ret);
+               if (ret > 0)
+                       return;
+       }
+
+       /* Wake all the readers */
+       atomic_add_return(&rw->read_ec.val, RW_EC_INC);
+       if (rw->read_ec.contend) {
+               rw->read_ec.contend = 0;
+               if (timestat) {
+                       clock_gettime(CLOCK_REALTIME, &stime);
+                       ret = futex_wake(&rw->read_ec.val, INT_MAX, flags);
+                       clock_gettime(CLOCK_REALTIME, &etime);
+                       systime_add(tid, TIME_UNLK, &stime, &etime);
+               } else {
+                       ret = futex_wake(&rw->read_ec.val, INT_MAX, flags);
+               }
+               stat_inc(tid, STAT_UNLOCKS);
+               if (ret < 0)
+                       stat_inc(tid, STAT_UNLKERRS);
+               else
+                       stat_add(tid, STAT_WAKEUPS, ret);
+       }
+}
+
+static void ww2_read_lock(futex_t *futex __maybe_unused, int tid)
+{
+       struct timespec stime, etime;
+       struct rwlock *rw = &rwfutex;
+
+       for (;;) {
+               futex_t ec = rw->read_ec.val | RW_EC_CONTEND;
+               futex_t state;
+               int ret;
+
+               if (!rw->write_ec.contend) {
+                       state = atomic_add_return(&rw->lock.val, RW_READER);
+
+                       if (!(state & RW_WLOCKED))
+                               return;
+
+                       /* Unlock */
+                       state = atomic_add_return(&rw->lock.val, -RW_READER);
+               } else {
+                       atomic_add_return(&rw->write_ec.val, RW_EC_INC);
+                       if (rw->write_ec.contend) {
+                               rw->write_ec.contend = 0;
+
+                               /*  Wake a writer, and then try again */
+                               if (timestat) {
+                                       clock_gettime(CLOCK_REALTIME, &stime);
+                                       ret = futex_wake(&rw->write_ec.val, 1,
+                                                        flags);
+                                       clock_gettime(CLOCK_REALTIME, &etime);
+                                       systime_add(tid, TIME_SUNLK, &stime,
+                                                   &etime);
+                               } else {
+                                       ret = futex_wake(&rw->write_ec.val, 1,
+                                                        flags);
+                               }
+                               stat_inc(tid, STAT_SUNLOCKS);
+                               if (ret < 0)
+                                       stat_inc(tid, STAT_SUNLKERRS);
+                               else
+                                       stat_add(tid, STAT_WAKEUPS, ret);
+                               continue;
+                       }
+               }
+
+               rw->read_ec.contend = 1;
+               if (rw->read_ec.val != ec)
+                       continue;
+
+               /* Sleep until no longer held by a writer */
+               if (timestat) {
+                       clock_gettime(CLOCK_REALTIME, &stime);
+                       ret = futex_wait(&rw->read_ec.val, ec, NULL, flags);
+                       clock_gettime(CLOCK_REALTIME, &etime);
+                       systime_add(tid, TIME_SLOCK, &stime, &etime);
+               } else {
+                       ret = futex_wait(&rw->read_ec.val, ec, NULL, flags);
+               }
+               stat_inc(tid, STAT_SLOCKS);
+               if (ret < 0) {
+                       if (errno == EAGAIN)
+                               stat_inc(tid, STAT_EAGAINS);
+                       else
+                               stat_inc(tid, STAT_SLOCKERRS);
+               }
+       }
+}
+
+/*
+ * TP futex reader/writer lock/unlock functions
+ */
+#define tp_write_lock          tp_mutex_lock
+#define tp_write_unlock                tp_mutex_unlock
+#define FUTEX_FLAGS_MASK       (3UL << 30)
+
+static void tp_read_lock(futex_t *futex, int tid)
+{
+       struct timespec stime, etime;
+       futex_t val;
+       int ret, retry = 0;
+
+       val = futex_cmpxchg(futex, 0, FUTEX_SHARED + 1);
+       if (!val)
+               return;
+
+       for (;;) {
+               futex_t old = val, new;
+
+               /*
+                * Try to increment the reader count only if
+                * 1) the FUTEX_SHARED bit is set; and
+                * 2) none of the flags bits or FUTEX_SHARED_UNLOCK is set.
+                */
+               if (!old)
+                       new = FUTEX_SHARED + 1;
+               else if ((old & FUTEX_SHARED) &&
+                       !(old & (FUTEX_FLAGS_MASK|FUTEX_SHARED_UNLOCK)))
+                       new = old + 1;
+               else
+                       break;
+               val = futex_cmpxchg(futex, old, new);
+               if (val == old)
+                       goto out;
+               retry++;
+       }
+
+       for (;;) {
+               if (timestat) {
+                       clock_gettime(CLOCK_REALTIME, &stime);
+                       ret = futex_lock_shared(futex, NULL, flags);
+                       clock_gettime(CLOCK_REALTIME, &etime);
+                       systime_add(tid, TIME_SLOCK, &stime, &etime);
+               } else {
+                       ret = futex_lock_shared(futex, NULL, flags);
+               }
+               stat_inc(tid, STAT_SLOCKS);
+               if (ret >= 0)
+                       break;
+               stat_inc(tid, STAT_SLOCKERRS);
+       }
+       /*
+        * Get # of sleeps & locking method
+        */
+       stat_add(tid, STAT_SSLEEPS, ret >> 16);
+       if (ret & 0x100)
+                stat_inc(tid, STAT_SALONES);   /* In alone mode */
+       if (ret & 0x200)
+                stat_inc(tid, STAT_SGROUPS);   /* In group mode */
+
+       ret &= 0xff;
+       if (!ret)
+               stat_inc(tid, STAT_STEALS);
+       else if (ret == 2)
+               stat_inc(tid, STAT_HANDOFFS);
+out:
+       if (unlikely(retry))
+               stat_add(tid, STAT_SLRETRIES, retry);
+}
+
+static void tp_read_unlock(futex_t *futex, int tid)
+{
+       struct timespec stime, etime;
+       futex_t old, val;
+       int ret, retry = 0;
+
+       val = atomic_dec_return(futex);
+       if (!(val & FUTEX_SHARED)) {
+               fprintf(stderr,
+                       "tp_read_unlock: Incorrect futex value = 0x%x\n", val);
+               exit(1);
+       }
+
+       for (;;) {
+               /*
+                * Return if not the last reader, not in shared locking
+                * mode or the unlock bit has been set.
+                */
+               if ((val & (FUTEX_SCNT_MASK|FUTEX_SHARED_UNLOCK)) ||
+                  !(val & FUTEX_SHARED))
+                       return;
+
+               if (val & ~FUTEX_SHARED_TID_MASK) {
+                       /*
+                        * Only one task that can set the FUTEX_SHARED_UNLOCK
+                        * bit will do the unlock.
+                        */
+                       old = futex_cmpxchg(futex, val,
+                                           val|FUTEX_SHARED_UNLOCK);
+                       if (old == val)
+                               break;
+               } else {
+                       /*
+                        * Try to clear the futex.
+                        */
+                       old = futex_cmpxchg(futex, val, 0);
+                       if (old == val)
+                               goto out;
+               }
+               val = old;
+               retry++;
+       }
+
+       if (timestat) {
+               clock_gettime(CLOCK_REALTIME, &stime);
+               ret = futex_unlock_shared(futex, flags);
+               clock_gettime(CLOCK_REALTIME, &etime);
+               systime_add(tid, TIME_SUNLK, &stime, &etime);
+       } else {
+               ret = futex_unlock_shared(futex, flags);
+       }
+       stat_inc(tid, STAT_SUNLOCKS);
+       if (ret < 0)
+               stat_inc(tid, STAT_SUNLKERRS);
+       else
+               stat_add(tid, STAT_WAKEUPS, ret);
+out:
+       if (unlikely(retry))
+               stat_add(tid, STAT_SURETRIES, retry);
+}
+
+/*
+ * Glibc read/write lock
+ */
+static void gc_write_lock(futex_t *futex __maybe_unused,
+                         int tid __maybe_unused)
+{
+       pthread_rwlock_wrlock(&rwlock);
+}
+
+static void gc_write_unlock(futex_t *futex __maybe_unused,
+                           int tid __maybe_unused)
+{
+       pthread_rwlock_unlock(&rwlock);
+}
+
+static void gc_read_lock(futex_t *futex __maybe_unused,
+                        int tid __maybe_unused)
+{
+       pthread_rwlock_rdlock(&rwlock);
+}
+
+static void gc_read_unlock(futex_t *futex __maybe_unused,
+                          int tid __maybe_unused)
+{
+       pthread_rwlock_unlock(&rwlock);
+}
+
 /**************************************************************************/
 
 /*
@@ -576,6 +1144,77 @@ static void *mutex_workerfn(void *arg)
        return NULL;
 }
 
+static void *rwlock_workerfn(void *arg)
+{
+       long tid = (long)arg;
+       struct worker *w = &worker[tid];
+       lock_fn_t rlock_fn = read_lock_fn;
+       lock_fn_t wlock_fn = write_lock_fn;
+       unlock_fn_t runlock_fn = read_unlock_fn;
+       unlock_fn_t wunlock_fn = write_unlock_fn;
+
+       thread_id = gettid();
+       counter = 0;
+
+       atomic_dec_return(&threads_starting);
+
+       /*
+        * Busy wait until asked to start
+        */
+       while (!worker_start)
+               cpu_relax();
+
+       if (rthread_threshold >= 0) {
+               if (tid < rthread_threshold) {
+                       do {
+                               rlock_fn(w->futex, tid);
+                               load(tid);
+                               runlock_fn(w->futex, tid);
+                               w->stats[STAT_SOPS]++;
+                               csdelay();
+                       } while (!done);
+               } else {
+                       do {
+                               wlock_fn(w->futex, tid);
+                               load(tid);
+                               wunlock_fn(w->futex, tid);
+                               w->stats[STAT_OPS]++;
+                               csdelay();
+                       } while (!done);
+               }
+               goto out;
+       }
+
+       while (!done) {
+               int rcnt = rpercent;
+               int wcnt = 100 - rcnt;
+
+               do {
+                       if (wcnt) {
+                               wlock_fn(w->futex, tid);
+                               load(tid);
+                               wunlock_fn(w->futex, tid);
+                               w->stats[STAT_OPS]++;
+                               wcnt--;
+                               csdelay();
+                       }
+                       if (rcnt) {
+                               rlock_fn(w->futex, tid);
+                               load(tid);
+                               runlock_fn(w->futex, tid);
+                               w->stats[STAT_SOPS]++;
+                               rcnt--;
+                               csdelay();
+                       }
+               }  while (!done && (rcnt + wcnt));
+       }
+out:
+       if (verbose)
+               printf("[thread %3ld (%d)] exited.\n", tid, thread_id);
+       atomic_inc_return(&threads_stopping);
+       return NULL;
+}
+
 static void create_threads(struct worker *w, pthread_attr_t *thread_attr,
                           void *(*workerfn)(void *arg), long tid)
 {
@@ -631,6 +1270,66 @@ static int futex_mutex_type(const char **ptype)
        return 0;
 }
 
+static int futex_rwlock_type(const char **ptype)
+{
+       const char *type = *ptype;
+
+       if (!strcasecmp(type, "WW")) {
+               *ptype = "WW";
+               pfutex = &rwfutex.lock.val;
+               if (pwriter) {
+                       read_lock_fn = ww2_read_lock;
+                       read_unlock_fn = ww2_read_unlock;
+                       write_lock_fn = ww2_write_lock;
+                       write_unlock_fn = ww2_write_unlock;
+               } else {
+                       read_lock_fn = ww_read_lock;
+                       read_unlock_fn = ww_read_unlock;
+                       write_lock_fn = ww_write_lock;
+                       write_unlock_fn = ww_write_unlock;
+               }
+       } else if (!strcasecmp(type, "TP")) {
+               *ptype = "TP";
+               read_lock_fn = tp_read_lock;
+               read_unlock_fn = tp_read_unlock;
+               write_lock_fn = tp_write_lock;
+               write_unlock_fn = tp_write_unlock;
+
+               /*
+                * Check if TP futex is supported.
+                */
+               futex_unlock(&global_futex, 0);
+               if (errno == ENOSYS) {
+                       fprintf(stderr,
+                           "\nTP futexes are not supported by the kernel!\n");
+                       return -1;
+               }
+       } else if (!strcasecmp(type, "GC")) {
+               pthread_rwlockattr_t *attr = NULL;
+
+               *ptype = "GC";
+               read_lock_fn = gc_read_lock;
+               read_unlock_fn = gc_read_unlock;
+               write_lock_fn = gc_write_lock;
+               write_unlock_fn = gc_write_unlock;
+               if (pwriter || fshared) {
+                       pthread_rwlockattr_init(&rwlock_attr);
+                       attr = &rwlock_attr;
+                       rwlock_attr_inited = true;
+                       if (fshared)
+                               pthread_rwlockattr_setpshared(attr, true);
+                       if (pwriter)
+                               pthread_rwlockattr_setkind_np(attr,
+                                 PTHREAD_RWLOCK_PREFER_WRITER_NONRECURSIVE_NP);
+               }
+               pthread_rwlock_init(&rwlock, attr);
+               rwlock_inited = true;
+       } else {
+               return -1;
+       }
+       return 0;
+}
+
 static void futex_test_driver(const char *futex_type,
                              int (*proc_type)(const char **ptype),
                              void *(*workerfn)(void *arg))
@@ -647,15 +1346,25 @@ static void futex_test_driver(const char *futex_type,
         */
        const char *desc[STAT_NUM] = {
                [STAT_OPS]       = "Total exclusive locking ops",
+               [STAT_SOPS]      = "Total shared locking ops",
                [STAT_LOCKS]     = "Exclusive lock futex calls",
                [STAT_UNLOCKS]   = "Exclusive unlock futex calls",
                [STAT_SLEEPS]    = "Exclusive lock sleeps",
+               [STAT_SLOCKS]    = "Shared lock futex calls",
+               [STAT_SUNLOCKS]  = "Shared unlock futex calls",
+               [STAT_SSLEEPS]   = "Shared lock sleeps",
                [STAT_WAKEUPS]   = "Process wakeups",
                [STAT_EAGAINS]   = "EAGAIN lock errors",
                [STAT_HANDOFFS]  = "Lock handoffs",
                [STAT_STEALS]    = "Lock stealings",
+               [STAT_SLRETRIES] = "Shared lock retries",
+               [STAT_SURETRIES] = "Shared unlock retries",
+               [STAT_SALONES]   = "Shared lock ops (alone mode)",
+               [STAT_SGROUPS]   = "Shared lock ops (group mode)",
                [STAT_LOCKERRS]  = "\nExclusive lock errors",
                [STAT_UNLKERRS]  = "\nExclusive unlock errors",
+               [STAT_SLOCKERRS] = "\nShared lock errors",
+               [STAT_SUNLKERRS] = "\nShared unlock errors",
        };
 
        if (exit_now)
@@ -667,9 +1376,18 @@ static void futex_test_driver(const char *futex_type,
        }
 
        printf("\n=====================================\n");
-       printf("[PID %d]: %d threads doing %s futex lockings (load=%d) for %d 
secs.\n\n",
+       printf("[PID %d]: %d threads doing %s futex lockings (load=%d) for %d 
secs.\n",
               getpid(), nthreads, futex_type, loadlat, nsecs);
 
+       if (xthread) {
+               /*
+                * Compute numbers of reader and writer threads.
+                */
+               rthread_threshold = (rpercent * nthreads + 50)/100;
+               printf("\t\t{%d reader threads, %d writer threads}\n",
+                       rthread_threshold, nthreads - rthread_threshold);
+       }
+       printf("\n");
        init_stats(&throughput_stats);
 
        *pfutex = 0;
@@ -739,11 +1457,15 @@ static void futex_test_driver(const char *futex_type,
                /*
                 * Get a rounded estimate of the # of locking ops/sec.
                 */
-               u64 tp = (u64)worker[i].stats[STAT_OPS] * 1000000 / us;
+               u64 tp = (u64)(worker[i].stats[STAT_OPS] +
+                              worker[i].stats[STAT_SOPS]) * 1000000 / us;
 
                for (j = 0; j < STAT_NUM; j++)
                        total.stats[j] += worker[i].stats[j];
 
+               for (j = 0; j < TIME_NUM; j++)
+                       total.times[j] += worker[i].times[j];
+
                update_stats(&throughput_stats, tp);
                if (verbose)
                        printf("[thread %3d] futex: %p [ %'ld ops/sec ]\n",
@@ -767,21 +1489,41 @@ static void futex_test_driver(const char *futex_type,
                if (total.stats[STAT_UNLOCKS])
                        printf("Avg exclusive unlock syscall = %'ldns\n",
                            total.times[TIME_UNLK]/total.stats[STAT_UNLOCKS]);
+               if (total.stats[STAT_SLOCKS])
+                       printf("Avg shared lock syscall      = %'ldns\n",
+                           total.times[TIME_SLOCK]/total.stats[STAT_SLOCKS]);
+               if (total.stats[STAT_SUNLOCKS])
+                       printf("Avg shared unlock syscall    = %'ldns\n",
+                           total.times[TIME_SUNLK]/total.stats[STAT_SUNLOCKS]);
        }
 
        printf("\nPercentages:\n");
        if (total.stats[STAT_LOCKS])
                printf("Exclusive lock futex calls   = %.1f%%\n",
                        stat_percent(&total, STAT_LOCKS, STAT_OPS));
+       if (total.stats[STAT_SLOCKS])
+               printf("Shared lock futex calls      = %.1f%%\n",
+                       stat_percent(&total, STAT_SLOCKS, STAT_SOPS));
        if (total.stats[STAT_UNLOCKS])
                printf("Exclusive unlock futex calls = %.1f%%\n",
                        stat_percent(&total, STAT_UNLOCKS, STAT_OPS));
+       if (total.stats[STAT_SUNLOCKS])
+               printf("Shared unlock futex calls    = %.1f%%\n",
+                       stat_percent(&total, STAT_SUNLOCKS, STAT_SOPS));
        if (total.stats[STAT_EAGAINS])
                printf("EAGAIN lock errors           = %.1f%%\n",
-                       stat_percent(&total, STAT_EAGAINS, STAT_LOCKS));
+                       (double)total.stats[STAT_EAGAINS] * 100 /
+                       (total.stats[STAT_LOCKS] + total.stats[STAT_SLOCKS]));
        if (total.stats[STAT_WAKEUPS])
                printf("Process wakeups              = %.1f%%\n",
-                       stat_percent(&total, STAT_WAKEUPS, STAT_UNLOCKS));
+                       (double)total.stats[STAT_WAKEUPS] * 100 /
+                       (total.stats[STAT_UNLOCKS] +
+                        total.stats[STAT_SUNLOCKS]));
+       if (xthread)
+                printf("Reader operations            = %.1f%%\n",
+                       (double)total.stats[STAT_SOPS] * 100 /
+                       (total.stats[STAT_OPS] + total.stats[STAT_SOPS]));
+
 
        printf("\nPer-thread Locking Rates:\n");
        printf("Avg = %'d ops/sec (+- %.2f%%)\n", (int)(avg + 0.5),
@@ -789,28 +1531,55 @@ static void futex_test_driver(const char *futex_type,
        printf("Min = %'d ops/sec\n", (int)throughput_stats.min);
        printf("Max = %'d ops/sec\n", (int)throughput_stats.max);
 
+       /*
+        * Compute the averagge reader and writer locking operation rates
+        * with separate reader and writer threads.
+        */
+       if (xthread) {
+               u64 tp;
+
+               /* Reader stats */
+               memset(&throughput_stats, 0, sizeof(throughput_stats));
+               for (i = 0, tp = 0; i < rthread_threshold; i++) {
+                       tp = (u64)worker[i].stats[STAT_SOPS] * 1000000 / us;
+                       update_stats(&throughput_stats, tp);
+               }
+               avg    = avg_stats(&throughput_stats);
+               stddev = stddev_stats(&throughput_stats);
+               printf("\nReader avg = %'d ops/sec (+- %.2f%%)\n",
+                       (int)(avg + 0.5), rel_stddev_stats(stddev, avg));
+
+               /* Writer stats */
+               memset(&throughput_stats, 0, sizeof(throughput_stats));
+               for (tp = 0; i < (int)nthreads; i++) {
+                       tp = (u64)worker[i].stats[STAT_OPS] * 1000000 / us;
+                       update_stats(&throughput_stats, tp);
+               }
+               avg    = avg_stats(&throughput_stats);
+               stddev = stddev_stats(&throughput_stats);
+               printf("Writer avg = %'d ops/sec (+- %.2f%%)\n",
+                       (int)(avg + 0.5), rel_stddev_stats(stddev, avg));
+       }
+
        if (*pfutex != 0)
                printf("\nResidual futex value = 0x%x\n", *pfutex);
 
        /* Clear the workers area */
        memset(worker, 0, sizeof(*worker) * nthreads);
+
+       if (rwlock_inited)
+               pthread_rwlock_destroy(&rwlock);
+       if (rwlock_attr_inited)
+               pthread_rwlockattr_destroy(&rwlock_attr);
 }
 
-int bench_futex_mutex(int argc, const char **argv,
-                     const char *prefix __maybe_unused)
+static void bench_futex_common(struct sigaction *act)
 {
-       struct sigaction act;
-
-       argc = parse_options(argc, argv, mutex_options,
-                            bench_futex_mutex_usage, 0);
-       if (argc)
-               goto err;
-
        ncpus = sysconf(_SC_NPROCESSORS_ONLN);
 
-       sigfillset(&act.sa_mask);
-       act.sa_sigaction = toggle_done;
-       sigaction(SIGINT, &act, NULL);
+       sigfillset(&act->sa_mask);
+       act->sa_sigaction = toggle_done;
+       sigaction(SIGINT, act, NULL);
 
        if (!nthreads)
                nthreads = ncpus;
@@ -828,6 +1597,19 @@ int bench_futex_mutex(int argc, const char **argv,
 
        if (!fshared)
                flags = FUTEX_PRIVATE_FLAG;
+}
+
+int bench_futex_mutex(int argc, const char **argv,
+                     const char *prefix __maybe_unused)
+{
+       struct sigaction act;
+
+       argc = parse_options(argc, argv, mutex_options,
+                            bench_futex_mutex_usage, 0);
+       if (argc)
+               goto err;
+
+       bench_futex_common(&act);
 
        if (!ftype || !strcmp(ftype, "all")) {
                futex_test_driver("WW", futex_mutex_type, mutex_workerfn);
@@ -842,3 +1624,31 @@ int bench_futex_mutex(int argc, const char **argv,
        usage_with_options(bench_futex_mutex_usage, mutex_options);
        exit(EXIT_FAILURE);
 }
+
+int bench_futex_rwlock(int argc, const char **argv,
+                     const char *prefix __maybe_unused)
+{
+       struct sigaction act;
+
+       argc = parse_options(argc, argv, rwlock_options,
+                            bench_futex_rwlock_usage, 0);
+       if (argc)
+               goto err;
+
+       ncpus = sysconf(_SC_NPROCESSORS_ONLN);
+
+       bench_futex_common(&act);
+
+       if (!ftype || !strcmp(ftype, "all")) {
+               futex_test_driver("WW", futex_rwlock_type, rwlock_workerfn);
+               futex_test_driver("TP", futex_rwlock_type, rwlock_workerfn);
+               futex_test_driver("GC", futex_rwlock_type, rwlock_workerfn);
+       } else {
+               futex_test_driver(ftype, futex_rwlock_type, rwlock_workerfn);
+       }
+       free(worker_alloc);
+       return 0;
+err:
+       usage_with_options(bench_futex_rwlock_usage, rwlock_options);
+       exit(EXIT_FAILURE);
+}
diff --git a/tools/perf/bench/futex.h b/tools/perf/bench/futex.h
index be1eb7a..199cc77 100644
--- a/tools/perf/bench/futex.h
+++ b/tools/perf/bench/futex.h
@@ -88,8 +88,10 @@
 }
 
 #ifndef FUTEX_LOCK
-#define FUTEX_LOCK     0xff    /* Disable the use of TP futexes */
-#define FUTEX_UNLOCK   0xff
+#define FUTEX_LOCK             0xff    /* Disable the use of TP futexes */
+#define FUTEX_UNLOCK           0xff
+#define FUTEX_LOCK_SHARED      0xff
+#define FUTEX_UNLOCK_SHARED    0xff
 #endif /* FUTEX_LOCK */
 
 /**
@@ -110,6 +112,24 @@
        return futex(uaddr, FUTEX_UNLOCK, 0, NULL, NULL, 0, opflags);
 }
 
+/**
+ * futex_lock_shared() - shared locking of TP futex
+ */
+static inline int
+futex_lock_shared(u_int32_t *uaddr, struct timespec *timeout, int opflags)
+{
+       return futex(uaddr, FUTEX_LOCK_SHARED, 0, timeout, NULL, 0, opflags);
+}
+
+/**
+ * futex_unlock_shared() - shared unlocking of TP futex
+ */
+static inline int
+futex_unlock_shared(u_int32_t *uaddr, int opflags)
+{
+       return futex(uaddr, FUTEX_UNLOCK_SHARED, 0, NULL, NULL, 0, opflags);
+}
+
 #ifndef HAVE_PTHREAD_ATTR_SETAFFINITY_NP
 #include <pthread.h>
 static inline int pthread_attr_setaffinity_np(pthread_attr_t *attr,
diff --git a/tools/perf/builtin-bench.c b/tools/perf/builtin-bench.c
index bf4418d..424a014 100644
--- a/tools/perf/builtin-bench.c
+++ b/tools/perf/builtin-bench.c
@@ -64,6 +64,7 @@ struct bench {
        /* pi-futexes */
        { "lock-pi",    "Benchmark for futex lock_pi calls",            
bench_futex_lock_pi     },
        { "mutex",      "Benchmark for mutex locks using futexes",      
bench_futex_mutex       },
+       { "rwlock",     "Benchmark for rwlocks using futexes",          
bench_futex_rwlock      },
        { "all",        "Run all futex benchmarks",                     NULL    
                },
        { NULL,         NULL,                                           NULL    
                }
 };
-- 
1.8.3.1

--
To unsubscribe from this list: send the line "unsubscribe linux-doc" in
the body of a message to majord...@vger.kernel.org
More majordomo info at  http://vger.kernel.org/majordomo-info.html

Reply via email to