The generic x >> s gives the following result:

  18:   35 25 ff e0     addic.  r9,r5,-32
  1c:   41 80 00 10     blt     2c <shift+0x14>
  20:   7c 64 4c 30     srw     r4,r3,r9
  24:   38 60 00 00     li      r3,0
...
  2c:   54 69 08 3c     rlwinm  r9,r3,1,0,30
  30:   21 45 00 1f     subfic  r10,r5,31
  34:   7c 84 2c 30     srw     r4,r4,r5
  38:   7d 29 50 30     slw     r9,r9,r10
  3c:   7c 63 2c 30     srw     r3,r3,r5
  40:   7d 24 23 78     or      r4,r9,r4

In our case the shift is always < 32. In addition,  the upper 32 bits
of the result are likely nul. Lets GCC know it, it also optimises the
following calculations.

With the patch, we get:
   0:   21 25 00 20     subfic  r9,r5,32
   4:   7c 69 48 30     slw     r9,r3,r9
   8:   7c 84 2c 30     srw     r4,r4,r5
   c:   7d 24 23 78     or      r4,r9,r4
  10:   7c 63 2c 30     srw     r3,r3,r5

Performance before the patch:
clock-gettime-realtime:    vdso: 1033 nsec/call

After the patch:
clock-gettime-realtime:    vdso: 941 nsec/call

Signed-off-by: Christophe Leroy <christophe.le...@c-s.fr>
---
 arch/powerpc/include/asm/vdso/gettimeofday.h | 17 +++++++++++++++++
 1 file changed, 17 insertions(+)

diff --git a/arch/powerpc/include/asm/vdso/gettimeofday.h 
b/arch/powerpc/include/asm/vdso/gettimeofday.h
index 74b6eef8fbe9..716a137ab166 100644
--- a/arch/powerpc/include/asm/vdso/gettimeofday.h
+++ b/arch/powerpc/include/asm/vdso/gettimeofday.h
@@ -95,6 +95,23 @@ static __always_inline u64 vdso_calc_delta(u64 cycles, u64 
last, u64 mask, u32 m
 }
 #define vdso_calc_delta vdso_calc_delta
 
+#ifndef __powerpc64__
+static __always_inline u64 vdso_shift_ns(u64 ns, unsigned long shift)
+{
+       u32 hi = ns >> 32;
+       u32 lo = ns;
+
+       lo = (lo >> shift) | (hi << (32 - shift));
+       hi >>= shift;
+
+       if (likely(hi == 0))
+               return lo;
+
+       return ((u64)hi << 32) | lo;
+}
+#define vdso_shift_ns vdso_shift_ns
+#endif
+
 #endif /* !__ASSEMBLY__ */
 
 #endif /* __ASM_VDSO_GETTIMEOFDAY_H */
-- 
2.13.3

Reply via email to