On Fri, May 16, 2025 at 01:06:54PM +0200, Gabriel Paubert wrote:
> 
> It won't work for big endian, nor for 32 bit obviously.

Good catch, I will restrict the Kconfig option to little-endian.
The accelerated crypto code which uses this is already restricted
to little-endian anyway.

The Kconfig option is also dependent on PPC64 so 32-bit shouldn't
be a problem.

> Besides that, in arch/power/kernel/misc_32.S, you'll find a branchless
> version of these functions. It's for 64 bit shifts on 32 bit big-endian
> but it can easily be adapted to 128 bit shifts on 64 bit processors
> (swapping r3 and r4 depending on endianness).

Nice.  I've replaced the shift code with one based on misc_32.S.

> Several functions of kernel/misc_32.S should arguably be moved to lib/.

I'll leave that to someone else :)

Thanks,

---8<---
When optimising for size, gcc generates out-of-line calls for 128-bit
integer shifts.  Add these functions to avoid build errors.

Also restrict ARCH_SUPPORTS_INT128 to little-endian since the only
user that prompted this poly1305 only supports that.

Fixes: c66d7ebbe2fa ("crypto: powerpc/poly1305 - Add SIMD fallback")
Reported-by: kernel test robot <l...@intel.com>
Closes: 
https://urldefense.com/v3/__https://lore.kernel.org/oe-kbuild-all/202505152053.frkekjce-...@intel.com/__;!!D9dNQwwGXtA!TSuOAutxjuD3Hp-RC0Fw9dTNuagdCKeNLTN71tv_OmhUxyAPLfIfwwpZop5pKFXgS4Jfkt830_tEMkbo7rsvYg$
 
Suggested-by: Gabriel Paubert <paub...@iram.es>
Signed-off-by: Herbert Xu <herb...@gondor.apana.org.au>

diff --git a/arch/powerpc/Kconfig b/arch/powerpc/Kconfig
index 651e0c32957a..7a7d39fa8b01 100644
--- a/arch/powerpc/Kconfig
+++ b/arch/powerpc/Kconfig
@@ -173,7 +173,7 @@ config PPC
        select ARCH_STACKWALK
        select ARCH_SUPPORTS_ATOMIC_RMW
        select ARCH_SUPPORTS_DEBUG_PAGEALLOC    if PPC_BOOK3S || PPC_8xx
-       select ARCH_SUPPORTS_INT128             if PPC64 && CC_HAS_INT128
+       select ARCH_SUPPORTS_INT128             if PPC64 && CC_HAS_INT128 && 
CPU_LITTLE_ENDIAN
        select ARCH_USE_BUILTIN_BSWAP
        select ARCH_USE_CMPXCHG_LOCKREF         if PPC64
        select ARCH_USE_MEMTEST
diff --git a/arch/powerpc/lib/Makefile b/arch/powerpc/lib/Makefile
index 1cd74673cbf7..a41c071c1652 100644
--- a/arch/powerpc/lib/Makefile
+++ b/arch/powerpc/lib/Makefile
@@ -87,3 +87,5 @@ obj-$(CONFIG_CRC_T10DIF_ARCH) += crc-t10dif-powerpc.o
 crc-t10dif-powerpc-y := crc-t10dif-glue.o crct10dif-vpmsum_asm.o
 
 obj-$(CONFIG_PPC64) += $(obj64-y)
+
+obj-$(CONFIG_ARCH_SUPPORTS_INT128) += tishift.o
diff --git a/arch/powerpc/lib/tishift.S b/arch/powerpc/lib/tishift.S
new file mode 100644
index 000000000000..f63748b5e1c5
--- /dev/null
+++ b/arch/powerpc/lib/tishift.S
@@ -0,0 +1,47 @@
+/* SPDX-License-Identifier: GPL-2.0-or-later */
+/*
+ * Copyright (C) 1995-1996 Gary Thomas (g...@linuxppc.org)
+ * Largely rewritten by Cort Dougan (c...@cs.nmt.edu)
+ * and Paul Mackerras.
+ * Copyright (c) 2025 Herbert Xu <herb...@gondor.apana.org.au>
+ */
+#include <asm/ppc_asm.h>
+#include <linux/export.h>
+
+_GLOBAL(__lshrti3)
+       subfic  r6,r5,64
+       srd     r3,r3,r5        # LSW = count > 63 ? 0 : LSW >> count
+       addi    r7,r5,-64       # could be xori, or addi with -64
+       sld     r6,r4,r6        # t1 = count > 63 ? 0 : MSW << (64-count)
+       srd     r7,r4,r7        # t2 = count < 64 ? 0 : MSW >> (count-64)
+       or      r3,r3,r6        # LSW |= t1
+       srd     r4,r4,r5        # MSW = MSW >> count
+       or      r3,r3,r7        # LSW |= t2
+       blr
+EXPORT_SYMBOL(__lshrti3)
+
+_GLOBAL(__ashrti3)
+       subfic  r6,r5,64
+       srd     r3,r3,r5        # LSW = count > 63 ? 0 : LSW >> count
+       addi    r7,r5,-64       # could be xori, or addi with -64
+       sld     r6,r4,r6        # t1 = count > 63 ? 0 : MSW << (64-count)
+       rlwinm  r8,r7,0,64      # t3 = (count < 64) ? 64 : 0
+       srad    r7,r4,r7        # t2 = MSW >> (count-64)
+       or      r3,r3,r6        # LSW |= t1
+       sld     r7,r7,r8        # t2 = (count < 64) ? 0 : t2
+       srad    r4,r4,r5        # MSW = MSW >> count
+       or      r3,r3,r7        # LSW |= t2
+       blr
+EXPORT_SYMBOL(__ashrti3)
+
+_GLOBAL(__ashlti3)
+       subfic  r6,r5,64
+       sld     r4,r4,r5        # MSW = count > 64 ? 0 : MSW << count
+       addi    r7,r5,-64       # could be xori, or addi with -64
+       srd     r6,r3,r6        # t1 = count > 63 ? 0 : LSW >> (64-count)
+       sld     r7,r3,r7        # t2 = count < 64 ? 0 : LSW << (count-64)
+       or      r4,r4,r6        # MSW |= t1
+       sld     r3,r3,r5        # LSW = LSW << count
+       or      r4,r4,r7        # MSW |= t2
+       blr
+EXPORT_SYMBOL(__ashlti3)
-- 
Email: Herbert Xu <herb...@gondor.apana.org.au>
Home Page: http://gondor.apana.org.au/~herbert/
PGP Key: http://gondor.apana.org.au/~herbert/pubkey.txt

Reply via email to