This change goes from heapify() ops done by swapping with parent/child
so that the item to fix moves along, to heapify() ops done by just
pulling the parent/child chain by 1 pos, then storing the item to fix
just at the end. On a non-trivial heapify(), this performs roughly half
stores wrt swaps.

This has been measured to achieve up to 10% of speed-up for cpudl_set()
calls, with a randomly generated workload of 1K,10K,100K random heap
insertions and deletions (75% cpudl_set() calls with is_valid=1 and
25% with is_valid=0), and randomly generated cpu IDs, with up to 256
CPUs, as measured on an Intel Core2 Duo.

Cc: Peter Zijlstra <pet...@infradead.org>
Cc: Juri Lelli <juri.le...@arm.com>
Cc: Luca Abeni <luca.ab...@unitn.it>
Reviewed-by: Luca Abeni <luca.ab...@unitn.it>
Reviewed-by: Juri Lelli <juri.le...@arm.com>
Signed-off-by: Tommaso Cucinotta <tommaso.cucino...@sssup.it>
---
 kernel/sched/cpudeadline.c | 66 +++++++++++++++++++++++++++++++---------------
 1 file changed, 45 insertions(+), 21 deletions(-)

diff --git a/kernel/sched/cpudeadline.c b/kernel/sched/cpudeadline.c
index 0acb0d4..0ace75a 100644
--- a/kernel/sched/cpudeadline.c
+++ b/kernel/sched/cpudeadline.c
@@ -31,48 +31,72 @@ static inline int right_child(int i)
        return (i << 1) + 2;
 }
 
-static void cpudl_exchange(struct cpudl *cp, int a, int b)
-{
-       int cpu_a = cp->elements[a].cpu, cpu_b = cp->elements[b].cpu;
-
-       swap(cp->elements[a].cpu, cp->elements[b].cpu);
-       swap(cp->elements[a].dl , cp->elements[b].dl );
-
-       swap(cp->elements[cpu_a].idx, cp->elements[cpu_b].idx);
-}
-
 static void cpudl_heapify_down(struct cpudl *cp, int idx)
 {
        int l, r, largest;
 
+       int orig_cpu = cp->elements[idx].cpu;
+       u64 orig_dl = cp->elements[idx].dl;
+
+       if (left_child(idx) >= cp->size)
+               return;
+
        /* adapted from lib/prio_heap.c */
        while(1) {
+               u64 largest_dl;
                l = left_child(idx);
                r = right_child(idx);
                largest = idx;
+               largest_dl = orig_dl;
 
-               if ((l < cp->size) && dl_time_before(cp->elements[idx].dl,
-                                                       cp->elements[l].dl))
+               if ((l < cp->size) && dl_time_before(orig_dl,
+                                               cp->elements[l].dl)) {
                        largest = l;
-               if ((r < cp->size) && dl_time_before(cp->elements[largest].dl,
-                                                       cp->elements[r].dl))
+                       largest_dl = cp->elements[l].dl;
+               }
+               if ((r < cp->size) && dl_time_before(largest_dl,
+                                               cp->elements[r].dl))
                        largest = r;
+
                if (largest == idx)
                        break;
 
-               /* Push idx down the heap one level and bump one up */
-               cpudl_exchange(cp, largest, idx);
+               /* pull largest child onto idx */
+               cp->elements[idx].cpu = cp->elements[largest].cpu;
+               cp->elements[idx].dl = cp->elements[largest].dl;
+               cp->elements[cp->elements[idx].cpu].idx = idx;
                idx = largest;
        }
+       /* actual push down of saved original values orig_* */
+       cp->elements[idx].cpu = orig_cpu;
+       cp->elements[idx].dl = orig_dl;
+       cp->elements[cp->elements[idx].cpu].idx = idx;
 }
 
 static void cpudl_heapify_up(struct cpudl *cp, int idx)
 {
-       while (idx > 0 && dl_time_before(cp->elements[parent(idx)].dl,
-                       cp->elements[idx].dl)) {
-               cpudl_exchange(cp, idx, parent(idx));
-               idx = parent(idx);
-       }
+       int p;
+
+       int orig_cpu = cp->elements[idx].cpu;
+       u64 orig_dl = cp->elements[idx].dl;
+
+       if (idx == 0)
+               return;
+
+       do {
+               p = parent(idx);
+               if (dl_time_before(orig_dl, cp->elements[p].dl))
+                       break;
+               /* pull parent onto idx */
+               cp->elements[idx].cpu = cp->elements[p].cpu;
+               cp->elements[idx].dl = cp->elements[p].dl;
+               cp->elements[cp->elements[idx].cpu].idx = idx;
+               idx = p;
+       } while (idx != 0);
+       /* actual push up of saved original values orig_* */
+       cp->elements[idx].cpu = orig_cpu;
+       cp->elements[idx].dl = orig_dl;
+       cp->elements[cp->elements[idx].cpu].idx = idx;
 }
 
 static void cpudl_heapify(struct cpudl *cp, int idx)
-- 
2.7.4

Reply via email to