From: Milan Tripkovic <[email protected]> Add an assembly implementation of memcmp() for RISC-V. The implementation uses word-aligned loads for the generic path and the ZBB extension for optimized sign resolution, with an assembly fallback for non-ZBB systems.
Benchmark results (QEMU TCG, rv64): Len | Default |NoALIGN | ALIGN | %NoALIGN | %ALIGN ------|---------|--------|--------|----------|-------- 1 B | 20.3 | 21.5 | 20.9 | +5.9% | +3.0% 7 B | 88.9 | 96.9 | 155.7 | +9.0% | +75.1% 8 B | 89.6 | 110.5 | 176.2 | +23.3% | +96.7% 16 B | 134.4 | 172.8 | 334.8 | +28.6% | +149.1% 31 B | 163.5 | 211.9 | 606.2 | +29.6% | +270.8% 64 B | 203.8 | 240.3 | 968.6 | +17.9% | +375.3% 127 B | 224.6 | 269.0 | 1362.8 | +19.8% | +506.8% 512 B | 235.7 | 269.9 | 1913.7 | +14.5% | +711.9% 1024 B| 256.8 | 283.5 | 2123.6 | +10.4% | +726.9% 4096 B| 263.8 | 299.7 | 2290.4 | +13.6% | +768.2% Signed-off-by: Milan Tripkovic <[email protected]> --- v5 changes: - Optimized generic memcmp path with word-aligned loads. - Implemented non-ZBB fallback to byte comparison per review. - Link to v4: https://lore.kernel.org/all/[email protected]/ v2 changes: - Added alignment checks for buffers to avoid expensive misaligned loads. - Optimized the loop using end-pointers to reduce per-iteration overhead. - Implemented word-aligned tail handling using ZBB shifts. - Removed redundant pointer equality (a0 == a1) check. - Retained BE support via #ifndef; ZBB rev8 is used for the LE fast-path. - Link to v1: https://lore.kernel.org/all/[email protected]/ arch/riscv/include/asm/string.h | 2 + arch/riscv/lib/Makefile | 1 + arch/riscv/lib/memcmp.S | 106 ++++++++++++++++++++++++++++++++ arch/riscv/purgatory/Makefile | 5 +- 4 files changed, 113 insertions(+), 1 deletion(-) create mode 100644 arch/riscv/lib/memcmp.S diff --git a/arch/riscv/include/asm/string.h b/arch/riscv/include/asm/string.h index 764ffe8f6..5c5299678 100644 --- a/arch/riscv/include/asm/string.h +++ b/arch/riscv/include/asm/string.h @@ -18,6 +18,8 @@ extern asmlinkage void *__memcpy(void *, const void *, size_t); #define __HAVE_ARCH_MEMMOVE extern asmlinkage void *memmove(void *, const void *, size_t); extern asmlinkage void *__memmove(void *, const void *, size_t); +#define __HAVE_ARCH_MEMCMP +extern asmlinkage int memcmp(const void *, const void *, size_t); #if !(defined(CONFIG_KASAN_GENERIC) || defined(CONFIG_KASAN_SW_TAGS)) #define __HAVE_ARCH_STRCMP diff --git a/arch/riscv/lib/Makefile b/arch/riscv/lib/Makefile index 6f767b2a3..b529e1be1 100644 --- a/arch/riscv/lib/Makefile +++ b/arch/riscv/lib/Makefile @@ -3,6 +3,7 @@ lib-y += delay.o lib-y += memcpy.o lib-y += memset.o lib-y += memmove.o +lib-y += memcmp.o ifeq ($(CONFIG_KASAN_GENERIC)$(CONFIG_KASAN_SW_TAGS),) lib-y += strcmp.o lib-y += strlen.o diff --git a/arch/riscv/lib/memcmp.S b/arch/riscv/lib/memcmp.S new file mode 100644 index 000000000..cb71cd5ae --- /dev/null +++ b/arch/riscv/lib/memcmp.S @@ -0,0 +1,106 @@ +/* SPDX-License-Identifier: GPL-2.0-only */ + +#include <linux/linkage.h> +#include <asm/asm.h> +#include <asm/alternative-macros.h> +#include <asm/hwcap.h> + +#if defined(CONFIG_RISCV_ISA_ZBB) && defined(CONFIG_TOOLCHAIN_HAS_ZBB) +.option push +.option arch,+zbb +#endif + +/* int memcmp(const void *cs, const void *ct, size_t n) */ +SYM_FUNC_START(memcmp) +/* + * Parameters + * a0 - Pointer to first memory block (cs), also return value + * a1 - Pointer to second memory block (ct) + * a2 - Number of bytes to compare (n), decremented during loop + * + * Returns + * a0 - 0 if equal, positive if cs > ct, negative if cs < ct + * + * Clobbers + * t0, t1, t2, t3, t4 + */ + add t3, a0, a2 + or t0, a0, a1 + andi t0, t0, (SZREG - 1) + bnez t0, 5f + + addi t4, t3, -SZREG + bltu t4, a0, 7f + +1: + REG_L t1, 0(a0) + REG_L t2, 0(a1) + bne t1, t2, 2f + addi a0, a0, SZREG + addi a1, a1, SZREG + bleu a0, t4, 1b + +#if defined(CONFIG_TOOLCHAIN_HAS_ZBB) +7: + __ALTERNATIVE_CFG("j 5f", "nop", 0, RISCV_ISA_EXT_ZBB, + IS_ENABLED(CONFIG_RISCV_ISA_ZBB)) + + beq a0, t3, 4f + REG_L t1, 0(a0) + REG_L t2, 0(a1) + + sub t0, t3, a0 + li t4, SZREG + sub t0, t4, t0 + slli t0, t0, 3 + +#ifndef CONFIG_CPU_BIG_ENDIAN + rev8 t1, t1 + rev8 t2, t2 +#endif + srl t1, t1, t0 + srl t2, t2, t0 + + bne t1, t2, 8f + li a0, 0 + ret +#else + j 5f +#endif +5: + beq a0, t3, 4f +6: + lbu t1, 0(a0) + lbu t2, 0(a1) + bne t1, t2, 3f + addi a0, a0, 1 + addi a1, a1, 1 + bne a0, t3, 6b + +4: li a0, 0 + ret +2: +#if !defined(CONFIG_CPU_BIG_ENDIAN) && defined(CONFIG_TOOLCHAIN_HAS_ZBB) + __ALTERNATIVE_CFG("j 5b", "nop", 0, RISCV_ISA_EXT_ZBB, IS_ENABLED(CONFIG_RISCV_ISA_ZBB)) + rev8 t1, t1 + rev8 t2, t2 +#elif !defined(CONFIG_CPU_BIG_ENDIAN) + j 5b +#endif +8: + sltu a0, t2, t1 + sltu t0, t1, t2 + sub a0, a0, t0 + ret + +3: + sub a0, t1, t2 + ret + +SYM_FUNC_END(memcmp) +SYM_FUNC_ALIAS(__pi_memcmp, memcmp) +EXPORT_SYMBOL(memcmp) + +#if defined(CONFIG_RISCV_ISA_ZBB) && defined(CONFIG_TOOLCHAIN_HAS_ZBB) +.option pop +#endif diff --git a/arch/riscv/purgatory/Makefile b/arch/riscv/purgatory/Makefile index b0358a78f..456929971 100644 --- a/arch/riscv/purgatory/Makefile +++ b/arch/riscv/purgatory/Makefile @@ -1,6 +1,6 @@ # SPDX-License-Identifier: GPL-2.0 -purgatory-y := purgatory.o sha256.o entry.o string.o ctype.o memcpy.o memset.o +purgatory-y := purgatory.o sha256.o entry.o string.o ctype.o memcpy.o memset.o memcmp.o ifeq ($(CONFIG_KASAN_GENERIC)$(CONFIG_KASAN_SW_TAGS),) purgatory-y += strcmp.o strlen.o strncmp.o strnlen.o strchr.o strrchr.o endif @@ -41,6 +41,9 @@ $(obj)/strchr.o: $(srctree)/arch/riscv/lib/strchr.S FORCE $(obj)/strrchr.o: $(srctree)/arch/riscv/lib/strrchr.S FORCE $(call if_changed_rule,as_o_S) +$(obj)/memcmp.o: $(srctree)/arch/riscv/lib/memcmp.S FORCE + $(call if_changed_rule,as_o_S) + CFLAGS_sha256.o := -D__DISABLE_EXPORTS -D__NO_FORTIFY CFLAGS_string.o := -D__DISABLE_EXPORTS CFLAGS_ctype.o := -D__DISABLE_EXPORTS -- 2.43.0

