To better understand how the TP futexes are performing, it is useful to
return the internal status on the TP futexes. The FUTEX_LOCK futex(2)
syscall will now return a positive status code if no error happens. The
status code consists of the following 3 fields:

 1) Bits 00-07: code on how the lock is acquired.
 2) Bits 08-15: reserved
 3) Bits 16-30: how many time the task sleeps in the optimistic
    spinning loop.

By returning the TP status code, an external monitoring or tracking
program can have a macro view of how the TP futexes are performing.

Signed-off-by: Waiman Long <long...@redhat.com>
---
 kernel/futex.c | 43 ++++++++++++++++++++++++++++++++-----------
 1 file changed, 32 insertions(+), 11 deletions(-)

diff --git a/kernel/futex.c b/kernel/futex.c
index 711a2b4..3308cc3 100644
--- a/kernel/futex.c
+++ b/kernel/futex.c
@@ -3367,7 +3367,23 @@ void exit_robust_list(struct task_struct *curr)
  */
 #define TP_HANDOFF_TIMEOUT     5000000 /* 5ms  */
 
-/**
+/*
+ * The futex_lock() function returns the internal status of the TP futex.
+ * The status code consists of the following 3 fields:
+ * 1) bits 00-07: code on how the lock is acquired
+ *                0 - steals the lock
+ *                1 - top waiter (mutex owner) acquires the lock
+ *                2 - handed off the lock
+ * 2) bits 08-15: reserved
+ * 3) bits 15-30: how many times the task has slept or yield to scheduler
+ *               in futex_spin_on_owner().
+ */
+#define TP_LOCK_STOLEN         0
+#define TP_LOCK_ACQUIRED       1
+#define TP_LOCK_HANDOFF                2
+#define TP_STATUS_SLEEP(val, sleep)    ((val)|((sleep) << 16))
+
+ /**
  * lookup_futex_state - Looking up the futex state structure.
  * @hb:                 hash bucket
  * @key:        futex key
@@ -3451,9 +3467,11 @@ static inline int put_futex_state_unlocked(struct 
futex_state *state)
  *   preserve the flag bits
  * endif
  *
- * Return: 1 if lock acquired;
+ * Return: TP_LOCK_ACQUIRED if lock acquired;
+ *        TP_LOCK_HANDOFF if lock was handed off;
  *        0 if lock acquisition failed;
  *        -EFAULT if an error happened.
+ *        *puval will contain the latest futex value when trylock fails.
  */
 static inline int futex_trylock(u32 __user *uaddr, const u32 vpid, u32 *puval,
                                const bool waiter)
@@ -3466,7 +3484,7 @@ static inline int futex_trylock(u32 __user *uaddr, const 
u32 vpid, u32 *puval,
        uval = *puval;
 
        if (waiter && (uval & FUTEX_TID_MASK) == vpid)
-               return 1;
+               return TP_LOCK_HANDOFF;
 
        if (uval & FUTEX_TID_MASK)
                return 0;       /* Trylock fails */
@@ -3477,7 +3495,7 @@ static inline int futex_trylock(u32 __user *uaddr, const 
u32 vpid, u32 *puval,
        if (unlikely(cmpxchg_futex_value(puval, uaddr, uval, vpid|flags)))
                return -EFAULT;
 
-       return *puval == uval;
+       return (*puval == uval) ? TP_LOCK_ACQUIRED : 0;
 }
 
 /**
@@ -3491,7 +3509,8 @@ static inline int futex_trylock(u32 __user *uaddr, const 
u32 vpid, u32 *puval,
  * of faulting in the futex word. This function should only be called from
  * within futex_spin_on_owner().
  *
- * Return: 1 if lock acquired;
+ * Return: TP_LOCK_ACQUIRED if lock acquired;
+ *        TP_LOCK_HANDOFF if lock was handed off;
  *        0 if lock acquisition failed;
  *        -EFAULT if an error happened.
  */
@@ -3552,7 +3571,7 @@ static inline int futex_set_waiters_bit(u32 __user 
*uaddr, u32 *puval)
  * unless the pid wraps around and the perceived owner is not the real owner.
  * To guard against this case, we will have to use the robust futex feature.
  *
- * Return: 0 if futex acquired, < 0 if an error happens.
+ * Return: TP status code if lock acquired, < 0 if an error happens.
  */
 static int futex_spin_on_owner(u32 __user *uaddr, const u32 vpid,
                               struct futex_state *state)
@@ -3562,6 +3581,7 @@ static int futex_spin_on_owner(u32 __user *uaddr, const 
u32 vpid,
        "\tLock is now acquired by pid %d!\n"
 
        int ret, loopcnt = 1;
+       int nsleep = 0;
        bool handoff_set = false;
        u32 uval;
        u32 owner_pid = 0;
@@ -3621,6 +3641,7 @@ static int futex_spin_on_owner(u32 __user *uaddr, const 
u32 vpid,
                if (need_resched()) {
                        __set_current_state(TASK_RUNNING);
                        schedule_preempt_disabled();
+                       nsleep++;
                        loopcnt = 0;
                        continue;
                }
@@ -3691,6 +3712,7 @@ static int futex_spin_on_owner(u32 __user *uaddr, const 
u32 vpid,
                 */
                if (!(uval & FUTEX_OWNER_DIED) && (uval & FUTEX_WAITERS)) {
                        schedule_preempt_disabled();
+                       nsleep++;
                        loopcnt = 0;
                }
                __set_current_state(TASK_RUNNING);
@@ -3717,7 +3739,7 @@ static int futex_spin_on_owner(u32 __user *uaddr, const 
u32 vpid,
        WRITE_ONCE(state->handoff_pid, 0);
 
        preempt_enable();
-       return ret;
+       return (ret < 0) ? ret : TP_STATUS_SLEEP(ret, nsleep);
 }
 
 /*
@@ -3731,8 +3753,7 @@ static int futex_spin_on_owner(u32 __user *uaddr, const 
u32 vpid,
  * This function is not inlined so that it can show up separately in perf
  * profile for performance analysis purpose.
  *
- * Return: 0   - lock acquired
- *        < 0 - an error happens
+ * Return: TP status code if lock acquired, < 0 if an error happens.
  */
 static noinline int futex_lock(u32 __user *uaddr, unsigned int flags)
 {
@@ -3747,7 +3768,7 @@ static noinline int futex_lock(u32 __user *uaddr, 
unsigned int flags)
         */
        ret = futex_trylock(uaddr, vpid, &uval, false);
        if (ret)
-               goto out;
+               return (ret < 0) ? ret : TP_LOCK_STOLEN;
 
        /*
         * Detect deadlocks.
@@ -3815,7 +3836,7 @@ static noinline int futex_lock(u32 __user *uaddr, 
unsigned int flags)
        put_futex_key(&key);
 
 out:
-       return (ret < 0) ? ret : 0;
+       return ret;
 }
 
 /*
-- 
1.8.3.1

--
To unsubscribe from this list: send the line "unsubscribe linux-doc" in
the body of a message to majord...@vger.kernel.org
More majordomo info at  http://vger.kernel.org/majordomo-info.html

Reply via email to