Author: mav
Date: Mon Jun  4 07:16:12 2012
New Revision: 236547
URL: http://svn.freebsd.org/changeset/base/236547

Log:
  MFC r234066:
  Microoptimize cpu_search().
  
  According to profiling, it makes one take 6% of CPU time on hackbench
  with its million of context switches per second, instead of 8% before.

Modified:
  stable/9/sys/kern/sched_ule.c
Directory Properties:
  stable/9/sys/   (props changed)

Modified: stable/9/sys/kern/sched_ule.c
==============================================================================
--- stable/9/sys/kern/sched_ule.c       Mon Jun  4 07:12:36 2012        
(r236546)
+++ stable/9/sys/kern/sched_ule.c       Mon Jun  4 07:16:12 2012        
(r236547)
@@ -615,32 +615,34 @@ cpu_search(const struct cpu_group *cg, s
        cpuset_t cpumask;
        struct cpu_group *child;
        struct tdq *tdq;
-       int cpu, i, hload, lload, load, total, rnd;
+       int cpu, i, hload, lload, load, total, rnd, *rndptr;
 
        total = 0;
        cpumask = cg->cg_mask;
        if (match & CPU_SEARCH_LOWEST) {
                lload = INT_MAX;
-               low->cs_load = INT_MAX;
                lgroup = *low;
        }
        if (match & CPU_SEARCH_HIGHEST) {
-               hload = -1;
-               high->cs_load = -1;
+               hload = INT_MIN;
                hgroup = *high;
        }
 
        /* Iterate through the child CPU groups and then remaining CPUs. */
-       for (i = 0, cpu = 0; i <= cg->cg_children; ) {
-               if (i >= cg->cg_children) {
-                       while (cpu <= mp_maxid && !CPU_ISSET(cpu, &cpumask))
-                               cpu++;
-                       if (cpu > mp_maxid)
+       for (i = cg->cg_children, cpu = mp_maxid; i >= 0; ) {
+               if (i == 0) {
+                       while (cpu >= 0 && !CPU_ISSET(cpu, &cpumask))
+                               cpu--;
+                       if (cpu < 0)
                                break;
                        child = NULL;
                } else
-                       child = &cg->cg_child[i];
+                       child = &cg->cg_child[i - 1];
 
+               if (match & CPU_SEARCH_LOWEST)
+                       lgroup.cs_cpu = -1;
+               if (match & CPU_SEARCH_HIGHEST)
+                       hgroup.cs_cpu = -1;
                if (child) {                    /* Handle child CPU group. */
                        CPU_NAND(&cpumask, &child->cg_mask);
                        switch (match) {
@@ -657,23 +659,23 @@ cpu_search(const struct cpu_group *cg, s
                } else {                        /* Handle child CPU. */
                        tdq = TDQ_CPU(cpu);
                        load = tdq->tdq_load * 256;
-                       rnd = DPCPU_SET(randomval,
-                           DPCPU_GET(randomval) * 69069 + 5) >> 26;
+                       rndptr = DPCPU_PTR(randomval);
+                       rnd = (*rndptr = *rndptr * 69069 + 5) >> 26;
                        if (match & CPU_SEARCH_LOWEST) {
                                if (cpu == low->cs_prefer)
                                        load -= 64;
                                /* If that CPU is allowed and get data. */
-                               if (CPU_ISSET(cpu, &lgroup.cs_mask) &&
-                                   tdq->tdq_lowpri > lgroup.cs_pri &&
-                                   tdq->tdq_load <= lgroup.cs_limit) {
+                               if (tdq->tdq_lowpri > lgroup.cs_pri &&
+                                   tdq->tdq_load <= lgroup.cs_limit &&
+                                   CPU_ISSET(cpu, &lgroup.cs_mask)) {
                                        lgroup.cs_cpu = cpu;
                                        lgroup.cs_load = load - rnd;
                                }
                        }
                        if (match & CPU_SEARCH_HIGHEST)
-                               if (CPU_ISSET(cpu, &hgroup.cs_mask) &&
-                                   tdq->tdq_load >= hgroup.cs_limit &&
-                                   tdq->tdq_transferable) {
+                               if (tdq->tdq_load >= hgroup.cs_limit &&
+                                   tdq->tdq_transferable &&
+                                   CPU_ISSET(cpu, &hgroup.cs_mask)) {
                                        hgroup.cs_cpu = cpu;
                                        hgroup.cs_load = load - rnd;
                                }
@@ -682,7 +684,7 @@ cpu_search(const struct cpu_group *cg, s
 
                /* We have info about child item. Compare it. */
                if (match & CPU_SEARCH_LOWEST) {
-                       if (lgroup.cs_load != INT_MAX &&
+                       if (lgroup.cs_cpu >= 0 &&
                            (load < lload ||
                             (load == lload && lgroup.cs_load < low->cs_load))) 
{
                                lload = load;
@@ -691,17 +693,19 @@ cpu_search(const struct cpu_group *cg, s
                        }
                }
                if (match & CPU_SEARCH_HIGHEST)
-                       if (hgroup.cs_load >= 0 &&
+                       if (hgroup.cs_cpu >= 0 &&
                            (load > hload ||
                             (load == hload && hgroup.cs_load > 
high->cs_load))) {
                                hload = load;
                                high->cs_cpu = hgroup.cs_cpu;
                                high->cs_load = hgroup.cs_load;
                        }
-               if (child)
-                       i++;
-               else
-                       cpu++;
+               if (child) {
+                       i--;
+                       if (i == 0 && CPU_EMPTY(&cpumask))
+                               break;
+               } else
+                       cpu--;
        }
        return (total);
 }
_______________________________________________
svn-src-stable-9@freebsd.org mailing list
http://lists.freebsd.org/mailman/listinfo/svn-src-stable-9
To unsubscribe, send any mail to "svn-src-stable-9-unsubscr...@freebsd.org"

Reply via email to