Work in progress, not for inclusion.

This patch modified the RCU priority booster to explicitly sleep when
there are no RCU readers in need of priority boosting.  This should be
a power-consumption improvement over the one-second polling cycle in
the underlying RCU priority-boosting patch.

Signed-off-by: Paul E. McKenney <[EMAIL PROTECTED]>
---

 include/linux/rcupreempt.h |   15 ++++++
 kernel/rcupreempt.c        |  102 ++++++++++++++++++++++++++++++++++++++++++++-
 2 files changed, 115 insertions(+), 2 deletions(-)

diff -urpNa -X dontdiff linux-2.6.22-G-boosttorture/include/linux/rcupreempt.h 
linux-2.6.22-H-boostsleep/include/linux/rcupreempt.h
--- linux-2.6.22-G-boosttorture/include/linux/rcupreempt.h      2007-08-24 
11:24:59.000000000 -0700
+++ linux-2.6.22-H-boostsleep/include/linux/rcupreempt.h        2007-08-24 
18:12:41.000000000 -0700
@@ -60,6 +60,21 @@ enum rcu_boost_state {
 
 #define N_RCU_BOOST_STATE (RCU_BOOST_INVALID + 1)
 
+/*
+ * RCU-booster state with respect to sleeping.  The RCU booster
+ * sleeps when no task has recently been seen sleeping in an RCU
+ * read-side critical section, and is awakened when a new sleeper
+ * appears.
+ */
+enum rcu_booster_state {
+       RCU_BOOSTER_ACTIVE = 0,   /* RCU booster actively scanning. */
+       RCU_BOOSTER_DROWSY = 1,   /* RCU booster is considering sleeping. */
+       RCU_BOOSTER_SLEEPING = 2, /* RCU booster is asleep. */
+       RCU_BOOSTER_INVALID = 3,  /* For bogus state sightings. */
+};
+
+#define N_RCU_BOOSTER_STATE (RCU_BOOSTER_INVALID + 1)
+
 #endif /* #ifdef CONFIG_PREEMPT_RCU_BOOST */
 
 #define call_rcu_bh(head, rcu) call_rcu(head, rcu)
diff -urpNa -X dontdiff linux-2.6.22-G-boosttorture/kernel/rcupreempt.c 
linux-2.6.22-H-boostsleep/kernel/rcupreempt.c
--- linux-2.6.22-G-boosttorture/kernel/rcupreempt.c     2007-08-27 
15:42:57.000000000 -0700
+++ linux-2.6.22-H-boostsleep/kernel/rcupreempt.c       2007-08-27 
15:42:37.000000000 -0700
@@ -108,6 +108,7 @@ struct rcu_boost_dat {
        unsigned long rbs_unboosted;
 #ifdef CONFIG_PREEMPT_RCU_BOOST_STATS
        unsigned long rbs_stats[N_RCU_BOOST_DAT_EVENTS][N_RCU_BOOST_STATE];
+       unsigned long rbs_qw_stats[N_RCU_BOOSTER_STATE];
 #endif /* #ifdef CONFIG_PREEMPT_RCU_BOOST_STATS */
 };
 #define RCU_BOOST_ELEMENTS 4
@@ -115,6 +116,10 @@ struct rcu_boost_dat {
 static int rcu_boost_idx = -1; /* invalid value for early RCU use. */
 static DEFINE_PER_CPU(struct rcu_boost_dat, rcu_boost_dat[RCU_BOOST_ELEMENTS]);
 static struct task_struct *rcu_boost_task;
+static DEFINE_SPINLOCK(rcu_boost_quiesce_lock);
+static enum rcu_booster_state rcu_booster_quiesce_state = RCU_BOOSTER_ACTIVE;
+static unsigned long rbs_qs_stats[2][N_RCU_BOOSTER_STATE];
+wait_queue_head_t rcu_booster_quiesce_wq;
 
 #ifdef CONFIG_PREEMPT_RCU_BOOST_STATS
 
@@ -171,6 +176,15 @@ static char *rcu_boost_state_error[] = {
         "?  ?",  /* unlock */
 };
 
+/* Labels for RCU booster state printout. */
+
+static char *rcu_booster_state_label[] = {
+       "Active",
+       "Drowsy",
+       "Sleeping",
+       "???",
+};
+
 /*
  * Print out RCU booster task statistics at the specified interval.
  */
@@ -221,6 +235,14 @@ static void rcu_boost_dat_stat_print(voi
                                                       
cpu)[i].rbs_stats[event][state];
                        }
                }
+       for (state = 0; state < N_RCU_BOOSTER_STATE; state++) {
+               sum.rbs_qw_stats[state] = 0;
+               for_each_possible_cpu(cpu)
+                       for (i = 0; i < RCU_BOOST_ELEMENTS; i++)
+                               sum.rbs_qw_stats[state] +=
+                                       per_cpu(rcu_boost_dat,
+                                               cpu)[i].rbs_qw_stats[state];
+       }
 
        /* Print them out! */
 
@@ -240,6 +262,24 @@ static void rcu_boost_dat_stat_print(voi
                       rcu_boost_state_event[event], buf);
        }
 
+       printk(KERN_INFO "RCU booster state: %s\n",
+              rcu_booster_quiesce_state >= 0 &&
+              rcu_booster_quiesce_state < N_RCU_BOOSTER_STATE
+               ? rcu_booster_state_label[rcu_booster_quiesce_state]
+               : "???");
+       i = 0;
+       for (state = 0; state < N_RCU_BOOSTER_STATE; state++)
+               i += sprintf(&buf[i], " %ld", rbs_qs_stats[0][state]);
+       printk(KERN_INFO "No tasks found: %s\n", buf);
+       i = 0;
+       for (state = 0; state < N_RCU_BOOSTER_STATE; state++)
+               i += sprintf(&buf[i], " %ld", rbs_qs_stats[1][state]);
+       printk(KERN_INFO "Tasks found: %s\n", buf);
+       i = 0;
+       for (state = 0; state < N_RCU_BOOSTER_STATE; state++)
+               i += sprintf(&buf[i], " %ld", sum.rbs_qw_stats[state]);
+       printk(KERN_INFO "Awaken opportunities: %s\n", buf);
+
        /* Go away and don't come back for awhile. */
 
        lastprint = xtime.tv_sec;
@@ -293,6 +333,8 @@ static void init_rcu_boost_early(void)
                                for (j = 0; j < N_RCU_BOOST_DAT_EVENTS; j++)
                                        for (k = 0; k < N_RCU_BOOST_STATE; k++)
                                                rbdp[i].rbs_stats[j][k] = 0;
+                               for (j = 0; j < N_RCU_BOOSTER_STATE; j++)
+                                       rbdp[i].rbs_qw_stats[j] = 0;
                        }
 #endif /* #ifdef CONFIG_PREEMPT_RCU_BOOST_STATS */
                }
@@ -378,10 +420,11 @@ static void rcu_unboost_prio(struct task
 /*
  * Boost all of the RCU-reader tasks on the specified list.
  */
-static void rcu_boost_one_reader_list(struct rcu_boost_dat *rbdp)
+static int rcu_boost_one_reader_list(struct rcu_boost_dat *rbdp)
 {
        LIST_HEAD(list);
        unsigned long flags;
+       int retval = 0;
        struct task_struct *taskp;
 
        /*
@@ -397,6 +440,7 @@ static void rcu_boost_one_reader_list(st
        list_splice_init(&rbdp->rbs_toboost, &list);
        list_splice_init(&rbdp->rbs_boosted, &list);
        while (!list_empty(&list)) {
+               retval = 1;
 
                /*
                 * Pause for a bit before boosting each task.
@@ -438,6 +482,36 @@ static void rcu_boost_one_reader_list(st
                list_add_tail(&taskp->rcub_entry, &rbdp->rbs_boosted);
        }
        spin_unlock_irqrestore(&rbdp->rbs_lock, flags);
+       return retval;
+}
+
+/*
+ * Examine state to see if it is time to sleep.
+ */
+static void rcu_booster_try_sleep(int yo)
+{
+       spin_lock(&rcu_boost_quiesce_lock);
+       if (rcu_booster_quiesce_state < 0 ||
+           rcu_booster_quiesce_state >= N_RCU_BOOSTER_STATE)
+               rcu_booster_quiesce_state = RCU_BOOST_INVALID;
+       rbs_qs_stats[yo != 0][rcu_booster_quiesce_state]++;
+       if (yo != 0) {
+               rcu_booster_quiesce_state = RCU_BOOSTER_ACTIVE;
+       } else {
+               if (rcu_booster_quiesce_state == RCU_BOOSTER_ACTIVE) {
+                       rcu_booster_quiesce_state = RCU_BOOSTER_DROWSY;
+               } else if (rcu_booster_quiesce_state == RCU_BOOSTER_DROWSY) {
+                       rcu_booster_quiesce_state = RCU_BOOSTER_SLEEPING;
+                       spin_unlock(&rcu_boost_quiesce_lock);
+                       __wait_event(rcu_booster_quiesce_wq,
+                                    rcu_booster_quiesce_state ==
+                                    RCU_BOOSTER_ACTIVE);
+                       spin_lock(&rcu_boost_quiesce_lock);
+               } else {
+                       rcu_booster_quiesce_state = RCU_BOOSTER_ACTIVE;
+               }
+       }
+       spin_unlock(&rcu_boost_quiesce_lock);
 }
 
 /*
@@ -448,15 +522,21 @@ static int rcu_booster(void *arg)
 {
        int cpu;
        struct sched_param sp = { .sched_priority = PREEMPT_RCU_BOOSTER_PRIO, };
+       int yo = 0;
 
        sched_setscheduler(current, SCHED_RR, &sp);
        current->flags |= PF_NOFREEZE;
+       init_waitqueue_head(&rcu_booster_quiesce_wq);
 
        do {
 
                /* Advance the lists of tasks. */
 
                rcu_boost_idx = (rcu_boost_idx + 1) % RCU_BOOST_ELEMENTS;
+               if (rcu_boost_idx == 0) {
+                       rcu_booster_try_sleep(yo);
+                       yo = 0;
+               }
                for_each_possible_cpu(cpu) {
 
                        /*
@@ -469,7 +549,7 @@ static int rcu_booster(void *arg)
                         * nothing.
                         */
 
-                       rcu_boost_one_reader_list(rcu_rbd_boosting(cpu));
+                       yo += rcu_boost_one_reader_list(rcu_rbd_boosting(cpu));
 
                        /*
                         * Large SMP systems may need to sleep sometimes
@@ -511,6 +591,23 @@ void init_rcu_boost_late(void)
 }
 
 /*
+ * Awaken the RCU priority booster if neecessary.
+ */
+static void rcu_preempt_wake(struct rcu_boost_dat *rbdp)
+{
+       spin_lock(&rcu_boost_quiesce_lock);
+       if (rcu_booster_quiesce_state >= N_RCU_BOOSTER_STATE)
+               rcu_booster_quiesce_state = RCU_BOOSTER_INVALID;
+       rbdp->rbs_qw_stats[rcu_booster_quiesce_state]++;
+       if (rcu_booster_quiesce_state == RCU_BOOSTER_SLEEPING) {
+               rcu_booster_quiesce_state = RCU_BOOSTER_ACTIVE;
+               wake_up(&rcu_booster_quiesce_wq);
+       } else if (rcu_booster_quiesce_state != RCU_BOOSTER_ACTIVE)
+               rcu_booster_quiesce_state = RCU_BOOSTER_ACTIVE;
+       spin_unlock(&rcu_boost_quiesce_lock);
+}
+
+/*
  * Update task's RCU-boost state to reflect blocking in RCU read-side
  * critical section, so that the RCU-boost task can find it in case it
  * later needs its priority boosted.
@@ -532,6 +629,7 @@ void __rcu_preempt_boost(void)
        }
        spin_lock(&rbdp->rbs_lock);
        rbdp->rbs_blocked++;
+       rcu_preempt_wake(rbdp);
 
        /*
         * Update state.  We hold the lock and aren't yet on the list,
-
To unsubscribe from this list: send the line "unsubscribe linux-kernel" in
the body of a message to [EMAIL PROTECTED]
More majordomo info at  http://vger.kernel.org/majordomo-info.html
Please read the FAQ at  http://www.tux.org/lkml/

Reply via email to