I know Torsten Duwe has tried to add the ticket spinlock for powerpc
one year ago [1]. But it make no progress due to the conflict between
PPC_SPLPAR and lockref. We still don't find a better way to handle
this. But instead of waiting forever for a perfect solution, can't we
just use the ticket spinlock for the !CONFIG_PPC_SPLPAR?

This is a very rough patch based on arm64 codes. I want to make sure
that this is acceptable before going step further. This just passed
build and boot test on a fsl t4240rdb board. I have done a simple
performance benchmark by running the following command ten times before
and after applying this patch:
        ./perf bench sched messaging

                                Before        After
    Averaged total time [sec]:  0.403         0.367

So we can see a ~9% performance enhancing. This patch depends on this
one [2].

[1] https://lists.ozlabs.org/pipermail/linuxppc-dev/2014-February/115195.html
[2] http://patchwork.ozlabs.org/patch/447563/

Signed-off-by: Kevin Hao <haoke...@gmail.com>
---
 arch/powerpc/include/asm/spinlock.h       | 79 ++++++++++++++++++++++++++++++-
 arch/powerpc/include/asm/spinlock_types.h | 16 +++++++
 arch/powerpc/lib/locks.c                  |  2 +-
 3 files changed, 95 insertions(+), 2 deletions(-)

diff --git a/arch/powerpc/include/asm/spinlock.h 
b/arch/powerpc/include/asm/spinlock.h
index d303cdad2519..3faf2507abe9 100644
--- a/arch/powerpc/include/asm/spinlock.h
+++ b/arch/powerpc/include/asm/spinlock.h
@@ -54,6 +54,7 @@
 #define SYNC_IO
 #endif
 
+#ifdef CONFIG_PPC_SPLPAR
 static __always_inline int arch_spin_value_unlocked(arch_spinlock_t lock)
 {
        return lock.slock == 0;
@@ -89,6 +90,40 @@ static inline unsigned long 
__arch_spin_trylock(arch_spinlock_t *lock)
        return tmp;
 }
 
+#else
+static inline int arch_spin_value_unlocked(arch_spinlock_t lock)
+{
+       return lock.owner == lock.next;
+}
+
+static inline int arch_spin_is_locked(arch_spinlock_t *lock)
+{
+       return !arch_spin_value_unlocked(READ_ONCE(*lock));
+}
+
+static inline unsigned long __arch_spin_trylock(arch_spinlock_t *lock)
+{
+       unsigned int tmp;
+       arch_spinlock_t lockval;
+
+       __asm__ __volatile__ (
+"1:    " PPC_LWARX(%0,0,%2,1) "\n\
+       rotlwi          %1,%0,16\n\
+       xor.            %1,%1,%0\n\
+       bne-            2f\n\
+       add             %0,%0,%3\n\
+       stwcx.          %0,0,%2\n\
+       bne-            1b\n"
+       PPC_ACQUIRE_BARRIER
+"2:"
+       : "=&r" (lockval), "=&r" (tmp)
+       : "r" (lock), "r" (1 << TICKET_SHIFT)
+       : "cr0", "memory");
+
+       return tmp;
+}
+#endif
+
 static inline int arch_spin_trylock(arch_spinlock_t *lock)
 {
        CLEAR_IO_SYNC;
@@ -120,6 +155,7 @@ extern void __rw_yield(arch_rwlock_t *lock);
 #define SHARED_PROCESSOR       0
 #endif
 
+#ifdef CONFIG_PPC_SPLPAR
 static inline void arch_spin_lock(arch_spinlock_t *lock)
 {
        CLEAR_IO_SYNC;
@@ -155,16 +191,57 @@ void arch_spin_lock_flags(arch_spinlock_t *lock, unsigned 
long flags)
                local_irq_restore(flags_dis);
        }
 }
+#else
+#define arch_spin_lock_flags(lock, flags) arch_spin_lock(lock)
+
+static inline int arch_spin_is_contended(arch_spinlock_t *lock)
+{
+       arch_spinlock_t lockval = READ_ONCE(*lock);
+       return (lockval.next - lockval.owner) > 1;
+}
+#define arch_spin_is_contended arch_spin_is_contended
+
+static inline void arch_spin_lock(arch_spinlock_t *lock)
+{
+       unsigned int tmp;
+       arch_spinlock_t lockval;
+
+       CLEAR_IO_SYNC;
+       __asm__ __volatile__ (
+"1:    " PPC_LWARX(%0,0,%2,1) "\n\
+       add             %1,%0,%4\n\
+       stwcx.          %1,0,%2\n\
+       bne-            1b\n\
+       rotlwi          %1,%0,16\n\
+       cmpw            %1,%0\n\
+       beq             3f\n\
+       rlwinm          %0,%0,16,16,31\n\
+2:     or              1,1,1\n\
+       lhz             %1,0(%3)\n\
+       cmpw            %1,%0\n\
+       bne             2b\n\
+       or              2,2,2\n\
+3:"
+       PPC_ACQUIRE_BARRIER
+       : "=&r" (lockval), "=&r" (tmp)
+       : "r"(lock), "r" (&lock->owner), "r" (1 << TICKET_SHIFT)
+       : "cr0", "memory");
+}
+#endif
 
 static inline void arch_spin_unlock(arch_spinlock_t *lock)
 {
        SYNC_IO;
        __asm__ __volatile__("# arch_spin_unlock\n\t"
                                PPC_RELEASE_BARRIER: : :"memory");
+#ifdef CONFIG_PPC_SPLPAR
        lock->slock = 0;
+#else
+       lock->owner++;
+#endif
 }
 
-#ifdef CONFIG_PPC64
+#ifdef CONFIG_PPC_SPLPAR
 extern void arch_spin_unlock_wait(arch_spinlock_t *lock);
 #else
 #define arch_spin_unlock_wait(lock) \
diff --git a/arch/powerpc/include/asm/spinlock_types.h 
b/arch/powerpc/include/asm/spinlock_types.h
index 2351adc4fdc4..1af94f290363 100644
--- a/arch/powerpc/include/asm/spinlock_types.h
+++ b/arch/powerpc/include/asm/spinlock_types.h
@@ -5,11 +5,27 @@
 # error "please don't include this file directly"
 #endif
 
+#ifdef CONFIG_PPC_SPLPAR
 typedef struct {
        volatile unsigned int slock;
 } arch_spinlock_t;
 
 #define __ARCH_SPIN_LOCK_UNLOCKED      { 0 }
+#else
+#define TICKET_SHIFT   16
+
+typedef struct {
+#ifdef __BIG_ENDIAN__
+       u16 next;
+       u16 owner;
+#else
+       u16 owner;
+       u16 next;
+#endif
+} __aligned(4) arch_spinlock_t;
+
+#define __ARCH_SPIN_LOCK_UNLOCKED      { 0 , 0 }
+#endif /*CONFIG_PPC_SPLPAR*/
 
 typedef struct {
        volatile signed int lock;
diff --git a/arch/powerpc/lib/locks.c b/arch/powerpc/lib/locks.c
index 170a0346f756..fe3d21eeb10d 100644
--- a/arch/powerpc/lib/locks.c
+++ b/arch/powerpc/lib/locks.c
@@ -66,7 +66,6 @@ void __rw_yield(arch_rwlock_t *rw)
        plpar_hcall_norets(H_CONFER,
                get_hard_smp_processor_id(holder_cpu), yield_count);
 }
-#endif
 
 void arch_spin_unlock_wait(arch_spinlock_t *lock)
 {
@@ -83,3 +82,4 @@ void arch_spin_unlock_wait(arch_spinlock_t *lock)
 }
 
 EXPORT_SYMBOL(arch_spin_unlock_wait);
+#endif
-- 
2.1.0

_______________________________________________
Linuxppc-dev mailing list
Linuxppc-dev@lists.ozlabs.org
https://lists.ozlabs.org/listinfo/linuxppc-dev

Reply via email to