https://github.com/wangpc-pp updated https://github.com/llvm/llvm-project/pull/107548
>From f21cfcfc90330ee3856746b6315a81a00313b0e0 Mon Sep 17 00:00:00 2001 From: Wang Pengcheng <wangpengcheng...@bytedance.com> Date: Fri, 6 Sep 2024 17:20:51 +0800 Subject: [PATCH 1/5] =?UTF-8?q?[=F0=9D=98=80=F0=9D=97=BD=F0=9D=97=BF]=20in?= =?UTF-8?q?itial=20version?= MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Created using spr 1.3.6-beta.1 --- .../Target/RISCV/RISCVTargetTransformInfo.cpp | 15 + .../Target/RISCV/RISCVTargetTransformInfo.h | 3 + llvm/test/CodeGen/RISCV/memcmp.ll | 932 ++++++++++++++++++ 3 files changed, 950 insertions(+) create mode 100644 llvm/test/CodeGen/RISCV/memcmp.ll diff --git a/llvm/lib/Target/RISCV/RISCVTargetTransformInfo.cpp b/llvm/lib/Target/RISCV/RISCVTargetTransformInfo.cpp index e809e15eacf696..ad532aadc83266 100644 --- a/llvm/lib/Target/RISCV/RISCVTargetTransformInfo.cpp +++ b/llvm/lib/Target/RISCV/RISCVTargetTransformInfo.cpp @@ -2113,3 +2113,18 @@ bool RISCVTTIImpl::shouldConsiderAddressTypePromotion( } return Considerable; } + +RISCVTTIImpl::TTI::MemCmpExpansionOptions +RISCVTTIImpl::enableMemCmpExpansion(bool OptSize, bool IsZeroCmp) const { + TTI::MemCmpExpansionOptions Options; + // FIXME: Vector haven't been tested. + Options.AllowOverlappingLoads = + (ST->enableUnalignedScalarMem() || ST->enableUnalignedScalarMem()); + Options.MaxNumLoads = TLI->getMaxExpandSizeMemcmp(OptSize); + Options.NumLoadsPerBlock = Options.MaxNumLoads; + if (ST->is64Bit()) + Options.LoadSizes.push_back(8); + llvm::append_range(Options.LoadSizes, ArrayRef({4, 2, 1})); + Options.AllowedTailExpansions = {3, 5, 6}; + return Options; +} diff --git a/llvm/lib/Target/RISCV/RISCVTargetTransformInfo.h b/llvm/lib/Target/RISCV/RISCVTargetTransformInfo.h index 763b89bfec0a66..ee9bed09df97f3 100644 --- a/llvm/lib/Target/RISCV/RISCVTargetTransformInfo.h +++ b/llvm/lib/Target/RISCV/RISCVTargetTransformInfo.h @@ -404,6 +404,9 @@ class RISCVTTIImpl : public BasicTTIImplBase<RISCVTTIImpl> { shouldConsiderAddressTypePromotion(const Instruction &I, bool &AllowPromotionWithoutCommonHeader); std::optional<unsigned> getMinPageSize() const { return 4096; } + + TTI::MemCmpExpansionOptions enableMemCmpExpansion(bool OptSize, + bool IsZeroCmp) const; }; } // end namespace llvm diff --git a/llvm/test/CodeGen/RISCV/memcmp.ll b/llvm/test/CodeGen/RISCV/memcmp.ll new file mode 100644 index 00000000000000..652cd02e2c750a --- /dev/null +++ b/llvm/test/CodeGen/RISCV/memcmp.ll @@ -0,0 +1,932 @@ +; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py UTC_ARGS: --version 5 +; RUN: sed 's/iXLen/i32/g' %s | llc -mtriple=riscv32 -O2 | FileCheck %s --check-prefix=CHECK-ALIGNED-RV32 +; RUN: sed 's/iXLen/i64/g' %s | llc -mtriple=riscv64 -O2 | FileCheck %s --check-prefix=CHECK-ALIGNED-RV64 +; RUN: sed 's/iXLen/i32/g' %s | llc -mtriple=riscv32 -mattr=+unaligned-scalar-mem -O2 \ +; RUN: | FileCheck %s --check-prefix=CHECK-UNALIGNED-RV32 +; RUN: sed 's/iXLen/i64/g' %s | llc -mtriple=riscv64 -mattr=+unaligned-scalar-mem -O2 \ +; RUN: | FileCheck %s --check-prefix=CHECK-UNALIGNED-RV64 + +declare i32 @bcmp(i8*, i8*, iXLen) nounwind readonly +declare i32 @memcmp(i8*, i8*, iXLen) nounwind readonly + +define i1 @bcmp_size_15(i8* %s1, i8* %s2) { +; CHECK-ALIGNED-RV32-LABEL: bcmp_size_15: +; CHECK-ALIGNED-RV32: # %bb.0: # %entry +; CHECK-ALIGNED-RV32-NEXT: lbu a2, 1(a0) +; CHECK-ALIGNED-RV32-NEXT: lbu a3, 0(a0) +; CHECK-ALIGNED-RV32-NEXT: lbu a4, 2(a0) +; CHECK-ALIGNED-RV32-NEXT: lbu a5, 3(a0) +; CHECK-ALIGNED-RV32-NEXT: slli a2, a2, 8 +; CHECK-ALIGNED-RV32-NEXT: or a2, a2, a3 +; CHECK-ALIGNED-RV32-NEXT: slli a4, a4, 16 +; CHECK-ALIGNED-RV32-NEXT: slli a5, a5, 24 +; CHECK-ALIGNED-RV32-NEXT: or a4, a5, a4 +; CHECK-ALIGNED-RV32-NEXT: or a2, a4, a2 +; CHECK-ALIGNED-RV32-NEXT: lbu a3, 1(a1) +; CHECK-ALIGNED-RV32-NEXT: lbu a4, 0(a1) +; CHECK-ALIGNED-RV32-NEXT: lbu a5, 2(a1) +; CHECK-ALIGNED-RV32-NEXT: lbu a6, 3(a1) +; CHECK-ALIGNED-RV32-NEXT: slli a3, a3, 8 +; CHECK-ALIGNED-RV32-NEXT: or a3, a3, a4 +; CHECK-ALIGNED-RV32-NEXT: slli a5, a5, 16 +; CHECK-ALIGNED-RV32-NEXT: slli a6, a6, 24 +; CHECK-ALIGNED-RV32-NEXT: or a4, a6, a5 +; CHECK-ALIGNED-RV32-NEXT: or a3, a4, a3 +; CHECK-ALIGNED-RV32-NEXT: xor a2, a2, a3 +; CHECK-ALIGNED-RV32-NEXT: lbu a3, 5(a0) +; CHECK-ALIGNED-RV32-NEXT: lbu a4, 4(a0) +; CHECK-ALIGNED-RV32-NEXT: lbu a5, 6(a0) +; CHECK-ALIGNED-RV32-NEXT: lbu a6, 7(a0) +; CHECK-ALIGNED-RV32-NEXT: slli a3, a3, 8 +; CHECK-ALIGNED-RV32-NEXT: or a3, a3, a4 +; CHECK-ALIGNED-RV32-NEXT: slli a5, a5, 16 +; CHECK-ALIGNED-RV32-NEXT: slli a6, a6, 24 +; CHECK-ALIGNED-RV32-NEXT: or a4, a6, a5 +; CHECK-ALIGNED-RV32-NEXT: or a3, a4, a3 +; CHECK-ALIGNED-RV32-NEXT: lbu a4, 5(a1) +; CHECK-ALIGNED-RV32-NEXT: lbu a5, 4(a1) +; CHECK-ALIGNED-RV32-NEXT: lbu a6, 6(a1) +; CHECK-ALIGNED-RV32-NEXT: lbu a7, 7(a1) +; CHECK-ALIGNED-RV32-NEXT: slli a4, a4, 8 +; CHECK-ALIGNED-RV32-NEXT: or a4, a4, a5 +; CHECK-ALIGNED-RV32-NEXT: slli a6, a6, 16 +; CHECK-ALIGNED-RV32-NEXT: slli a7, a7, 24 +; CHECK-ALIGNED-RV32-NEXT: or a5, a7, a6 +; CHECK-ALIGNED-RV32-NEXT: or a4, a5, a4 +; CHECK-ALIGNED-RV32-NEXT: xor a3, a3, a4 +; CHECK-ALIGNED-RV32-NEXT: lbu a4, 9(a0) +; CHECK-ALIGNED-RV32-NEXT: lbu a5, 8(a0) +; CHECK-ALIGNED-RV32-NEXT: lbu a6, 10(a0) +; CHECK-ALIGNED-RV32-NEXT: lbu a7, 11(a0) +; CHECK-ALIGNED-RV32-NEXT: slli a4, a4, 8 +; CHECK-ALIGNED-RV32-NEXT: or a4, a4, a5 +; CHECK-ALIGNED-RV32-NEXT: slli a6, a6, 16 +; CHECK-ALIGNED-RV32-NEXT: slli a7, a7, 24 +; CHECK-ALIGNED-RV32-NEXT: or a5, a7, a6 +; CHECK-ALIGNED-RV32-NEXT: or a4, a5, a4 +; CHECK-ALIGNED-RV32-NEXT: lbu a5, 9(a1) +; CHECK-ALIGNED-RV32-NEXT: lbu a6, 8(a1) +; CHECK-ALIGNED-RV32-NEXT: lbu a7, 10(a1) +; CHECK-ALIGNED-RV32-NEXT: lbu t0, 11(a1) +; CHECK-ALIGNED-RV32-NEXT: slli a5, a5, 8 +; CHECK-ALIGNED-RV32-NEXT: or a5, a5, a6 +; CHECK-ALIGNED-RV32-NEXT: slli a7, a7, 16 +; CHECK-ALIGNED-RV32-NEXT: slli t0, t0, 24 +; CHECK-ALIGNED-RV32-NEXT: or a6, t0, a7 +; CHECK-ALIGNED-RV32-NEXT: lbu a7, 13(a0) +; CHECK-ALIGNED-RV32-NEXT: lbu t0, 12(a0) +; CHECK-ALIGNED-RV32-NEXT: or a5, a6, a5 +; CHECK-ALIGNED-RV32-NEXT: xor a4, a4, a5 +; CHECK-ALIGNED-RV32-NEXT: slli a7, a7, 8 +; CHECK-ALIGNED-RV32-NEXT: or a5, a7, t0 +; CHECK-ALIGNED-RV32-NEXT: lbu a6, 13(a1) +; CHECK-ALIGNED-RV32-NEXT: lbu a7, 12(a1) +; CHECK-ALIGNED-RV32-NEXT: lbu a0, 14(a0) +; CHECK-ALIGNED-RV32-NEXT: lbu a1, 14(a1) +; CHECK-ALIGNED-RV32-NEXT: slli a6, a6, 8 +; CHECK-ALIGNED-RV32-NEXT: or a6, a6, a7 +; CHECK-ALIGNED-RV32-NEXT: xor a5, a5, a6 +; CHECK-ALIGNED-RV32-NEXT: xor a0, a0, a1 +; CHECK-ALIGNED-RV32-NEXT: or a2, a2, a3 +; CHECK-ALIGNED-RV32-NEXT: or a4, a4, a5 +; CHECK-ALIGNED-RV32-NEXT: or a2, a2, a4 +; CHECK-ALIGNED-RV32-NEXT: or a0, a2, a0 +; CHECK-ALIGNED-RV32-NEXT: seqz a0, a0 +; CHECK-ALIGNED-RV32-NEXT: ret +; +; CHECK-ALIGNED-RV64-LABEL: bcmp_size_15: +; CHECK-ALIGNED-RV64: # %bb.0: # %entry +; CHECK-ALIGNED-RV64-NEXT: lbu a2, 1(a0) +; CHECK-ALIGNED-RV64-NEXT: lbu a3, 0(a0) +; CHECK-ALIGNED-RV64-NEXT: lbu a4, 2(a0) +; CHECK-ALIGNED-RV64-NEXT: lbu a5, 3(a0) +; CHECK-ALIGNED-RV64-NEXT: slli a2, a2, 8 +; CHECK-ALIGNED-RV64-NEXT: or a2, a2, a3 +; CHECK-ALIGNED-RV64-NEXT: slli a4, a4, 16 +; CHECK-ALIGNED-RV64-NEXT: slli a5, a5, 24 +; CHECK-ALIGNED-RV64-NEXT: or a4, a5, a4 +; CHECK-ALIGNED-RV64-NEXT: or a2, a4, a2 +; CHECK-ALIGNED-RV64-NEXT: lbu a3, 5(a0) +; CHECK-ALIGNED-RV64-NEXT: lbu a4, 4(a0) +; CHECK-ALIGNED-RV64-NEXT: lbu a5, 6(a0) +; CHECK-ALIGNED-RV64-NEXT: lbu a6, 7(a0) +; CHECK-ALIGNED-RV64-NEXT: slli a3, a3, 8 +; CHECK-ALIGNED-RV64-NEXT: or a3, a3, a4 +; CHECK-ALIGNED-RV64-NEXT: slli a5, a5, 16 +; CHECK-ALIGNED-RV64-NEXT: slli a6, a6, 24 +; CHECK-ALIGNED-RV64-NEXT: or a4, a6, a5 +; CHECK-ALIGNED-RV64-NEXT: or a3, a4, a3 +; CHECK-ALIGNED-RV64-NEXT: slli a3, a3, 32 +; CHECK-ALIGNED-RV64-NEXT: or a2, a3, a2 +; CHECK-ALIGNED-RV64-NEXT: lbu a3, 1(a1) +; CHECK-ALIGNED-RV64-NEXT: lbu a4, 0(a1) +; CHECK-ALIGNED-RV64-NEXT: lbu a5, 2(a1) +; CHECK-ALIGNED-RV64-NEXT: lbu a6, 3(a1) +; CHECK-ALIGNED-RV64-NEXT: slli a3, a3, 8 +; CHECK-ALIGNED-RV64-NEXT: or a3, a3, a4 +; CHECK-ALIGNED-RV64-NEXT: slli a5, a5, 16 +; CHECK-ALIGNED-RV64-NEXT: slli a6, a6, 24 +; CHECK-ALIGNED-RV64-NEXT: or a4, a6, a5 +; CHECK-ALIGNED-RV64-NEXT: or a3, a4, a3 +; CHECK-ALIGNED-RV64-NEXT: lbu a4, 5(a1) +; CHECK-ALIGNED-RV64-NEXT: lbu a5, 4(a1) +; CHECK-ALIGNED-RV64-NEXT: lbu a6, 6(a1) +; CHECK-ALIGNED-RV64-NEXT: lbu a7, 7(a1) +; CHECK-ALIGNED-RV64-NEXT: slli a4, a4, 8 +; CHECK-ALIGNED-RV64-NEXT: or a4, a4, a5 +; CHECK-ALIGNED-RV64-NEXT: slli a6, a6, 16 +; CHECK-ALIGNED-RV64-NEXT: slli a7, a7, 24 +; CHECK-ALIGNED-RV64-NEXT: or a5, a7, a6 +; CHECK-ALIGNED-RV64-NEXT: or a4, a5, a4 +; CHECK-ALIGNED-RV64-NEXT: slli a4, a4, 32 +; CHECK-ALIGNED-RV64-NEXT: or a3, a4, a3 +; CHECK-ALIGNED-RV64-NEXT: xor a2, a2, a3 +; CHECK-ALIGNED-RV64-NEXT: lbu a3, 9(a0) +; CHECK-ALIGNED-RV64-NEXT: lbu a4, 8(a0) +; CHECK-ALIGNED-RV64-NEXT: lbu a5, 10(a0) +; CHECK-ALIGNED-RV64-NEXT: lbu a6, 11(a0) +; CHECK-ALIGNED-RV64-NEXT: slli a3, a3, 8 +; CHECK-ALIGNED-RV64-NEXT: or a3, a3, a4 +; CHECK-ALIGNED-RV64-NEXT: slli a5, a5, 16 +; CHECK-ALIGNED-RV64-NEXT: slli a6, a6, 24 +; CHECK-ALIGNED-RV64-NEXT: or a4, a6, a5 +; CHECK-ALIGNED-RV64-NEXT: or a3, a4, a3 +; CHECK-ALIGNED-RV64-NEXT: lbu a4, 9(a1) +; CHECK-ALIGNED-RV64-NEXT: lbu a5, 8(a1) +; CHECK-ALIGNED-RV64-NEXT: lbu a6, 10(a1) +; CHECK-ALIGNED-RV64-NEXT: lbu a7, 11(a1) +; CHECK-ALIGNED-RV64-NEXT: slli a4, a4, 8 +; CHECK-ALIGNED-RV64-NEXT: or a4, a4, a5 +; CHECK-ALIGNED-RV64-NEXT: slli a6, a6, 16 +; CHECK-ALIGNED-RV64-NEXT: slli a7, a7, 24 +; CHECK-ALIGNED-RV64-NEXT: or a5, a7, a6 +; CHECK-ALIGNED-RV64-NEXT: lbu a6, 13(a0) +; CHECK-ALIGNED-RV64-NEXT: lbu a7, 12(a0) +; CHECK-ALIGNED-RV64-NEXT: or a4, a5, a4 +; CHECK-ALIGNED-RV64-NEXT: xor a3, a3, a4 +; CHECK-ALIGNED-RV64-NEXT: slli a6, a6, 8 +; CHECK-ALIGNED-RV64-NEXT: or a4, a6, a7 +; CHECK-ALIGNED-RV64-NEXT: lbu a5, 13(a1) +; CHECK-ALIGNED-RV64-NEXT: lbu a6, 12(a1) +; CHECK-ALIGNED-RV64-NEXT: lbu a0, 14(a0) +; CHECK-ALIGNED-RV64-NEXT: lbu a1, 14(a1) +; CHECK-ALIGNED-RV64-NEXT: slli a5, a5, 8 +; CHECK-ALIGNED-RV64-NEXT: or a5, a5, a6 +; CHECK-ALIGNED-RV64-NEXT: xor a4, a4, a5 +; CHECK-ALIGNED-RV64-NEXT: xor a0, a0, a1 +; CHECK-ALIGNED-RV64-NEXT: or a0, a4, a0 +; CHECK-ALIGNED-RV64-NEXT: or a0, a3, a0 +; CHECK-ALIGNED-RV64-NEXT: or a0, a2, a0 +; CHECK-ALIGNED-RV64-NEXT: seqz a0, a0 +; CHECK-ALIGNED-RV64-NEXT: ret +; +; CHECK-UNALIGNED-RV32-LABEL: bcmp_size_15: +; CHECK-UNALIGNED-RV32: # %bb.0: # %entry +; CHECK-UNALIGNED-RV32-NEXT: lw a2, 0(a0) +; CHECK-UNALIGNED-RV32-NEXT: lw a3, 0(a1) +; CHECK-UNALIGNED-RV32-NEXT: lw a4, 4(a0) +; CHECK-UNALIGNED-RV32-NEXT: lw a5, 4(a1) +; CHECK-UNALIGNED-RV32-NEXT: lw a6, 8(a0) +; CHECK-UNALIGNED-RV32-NEXT: lw a7, 8(a1) +; CHECK-UNALIGNED-RV32-NEXT: lw a0, 11(a0) +; CHECK-UNALIGNED-RV32-NEXT: lw a1, 11(a1) +; CHECK-UNALIGNED-RV32-NEXT: xor a2, a2, a3 +; CHECK-UNALIGNED-RV32-NEXT: xor a4, a4, a5 +; CHECK-UNALIGNED-RV32-NEXT: xor a3, a6, a7 +; CHECK-UNALIGNED-RV32-NEXT: xor a0, a0, a1 +; CHECK-UNALIGNED-RV32-NEXT: or a2, a2, a4 +; CHECK-UNALIGNED-RV32-NEXT: or a0, a3, a0 +; CHECK-UNALIGNED-RV32-NEXT: or a0, a2, a0 +; CHECK-UNALIGNED-RV32-NEXT: seqz a0, a0 +; CHECK-UNALIGNED-RV32-NEXT: ret +; +; CHECK-UNALIGNED-RV64-LABEL: bcmp_size_15: +; CHECK-UNALIGNED-RV64: # %bb.0: # %entry +; CHECK-UNALIGNED-RV64-NEXT: ld a2, 0(a0) +; CHECK-UNALIGNED-RV64-NEXT: ld a3, 0(a1) +; CHECK-UNALIGNED-RV64-NEXT: ld a0, 7(a0) +; CHECK-UNALIGNED-RV64-NEXT: ld a1, 7(a1) +; CHECK-UNALIGNED-RV64-NEXT: xor a2, a2, a3 +; CHECK-UNALIGNED-RV64-NEXT: xor a0, a0, a1 +; CHECK-UNALIGNED-RV64-NEXT: or a0, a2, a0 +; CHECK-UNALIGNED-RV64-NEXT: seqz a0, a0 +; CHECK-UNALIGNED-RV64-NEXT: ret +entry: + %bcmp = call i32 @bcmp(i8* %s1, i8* %s2, iXLen 15) + %ret = icmp eq i32 %bcmp, 0 + ret i1 %ret +} + +define i1 @bcmp_size_31(i8* %s1, i8* %s2) { +; CHECK-ALIGNED-RV32-LABEL: bcmp_size_31: +; CHECK-ALIGNED-RV32: # %bb.0: # %entry +; CHECK-ALIGNED-RV32-NEXT: addi sp, sp, -16 +; CHECK-ALIGNED-RV32-NEXT: .cfi_def_cfa_offset 16 +; CHECK-ALIGNED-RV32-NEXT: sw ra, 12(sp) # 4-byte Folded Spill +; CHECK-ALIGNED-RV32-NEXT: .cfi_offset ra, -4 +; CHECK-ALIGNED-RV32-NEXT: li a2, 31 +; CHECK-ALIGNED-RV32-NEXT: call bcmp +; CHECK-ALIGNED-RV32-NEXT: seqz a0, a0 +; CHECK-ALIGNED-RV32-NEXT: lw ra, 12(sp) # 4-byte Folded Reload +; CHECK-ALIGNED-RV32-NEXT: addi sp, sp, 16 +; CHECK-ALIGNED-RV32-NEXT: ret +; +; CHECK-ALIGNED-RV64-LABEL: bcmp_size_31: +; CHECK-ALIGNED-RV64: # %bb.0: # %entry +; CHECK-ALIGNED-RV64-NEXT: lbu a2, 1(a0) +; CHECK-ALIGNED-RV64-NEXT: lbu a3, 0(a0) +; CHECK-ALIGNED-RV64-NEXT: lbu a4, 2(a0) +; CHECK-ALIGNED-RV64-NEXT: lbu a5, 3(a0) +; CHECK-ALIGNED-RV64-NEXT: slli a2, a2, 8 +; CHECK-ALIGNED-RV64-NEXT: or a2, a2, a3 +; CHECK-ALIGNED-RV64-NEXT: slli a4, a4, 16 +; CHECK-ALIGNED-RV64-NEXT: slli a5, a5, 24 +; CHECK-ALIGNED-RV64-NEXT: or a4, a5, a4 +; CHECK-ALIGNED-RV64-NEXT: or a2, a4, a2 +; CHECK-ALIGNED-RV64-NEXT: lbu a3, 5(a0) +; CHECK-ALIGNED-RV64-NEXT: lbu a4, 4(a0) +; CHECK-ALIGNED-RV64-NEXT: lbu a5, 6(a0) +; CHECK-ALIGNED-RV64-NEXT: lbu a6, 7(a0) +; CHECK-ALIGNED-RV64-NEXT: slli a3, a3, 8 +; CHECK-ALIGNED-RV64-NEXT: or a3, a3, a4 +; CHECK-ALIGNED-RV64-NEXT: slli a5, a5, 16 +; CHECK-ALIGNED-RV64-NEXT: slli a6, a6, 24 +; CHECK-ALIGNED-RV64-NEXT: or a4, a6, a5 +; CHECK-ALIGNED-RV64-NEXT: or a3, a4, a3 +; CHECK-ALIGNED-RV64-NEXT: slli a3, a3, 32 +; CHECK-ALIGNED-RV64-NEXT: or a2, a3, a2 +; CHECK-ALIGNED-RV64-NEXT: lbu a3, 1(a1) +; CHECK-ALIGNED-RV64-NEXT: lbu a4, 0(a1) +; CHECK-ALIGNED-RV64-NEXT: lbu a5, 2(a1) +; CHECK-ALIGNED-RV64-NEXT: lbu a6, 3(a1) +; CHECK-ALIGNED-RV64-NEXT: slli a3, a3, 8 +; CHECK-ALIGNED-RV64-NEXT: or a3, a3, a4 +; CHECK-ALIGNED-RV64-NEXT: slli a5, a5, 16 +; CHECK-ALIGNED-RV64-NEXT: slli a6, a6, 24 +; CHECK-ALIGNED-RV64-NEXT: or a4, a6, a5 +; CHECK-ALIGNED-RV64-NEXT: or a3, a4, a3 +; CHECK-ALIGNED-RV64-NEXT: lbu a4, 5(a1) +; CHECK-ALIGNED-RV64-NEXT: lbu a5, 4(a1) +; CHECK-ALIGNED-RV64-NEXT: lbu a6, 6(a1) +; CHECK-ALIGNED-RV64-NEXT: lbu a7, 7(a1) +; CHECK-ALIGNED-RV64-NEXT: slli a4, a4, 8 +; CHECK-ALIGNED-RV64-NEXT: or a4, a4, a5 +; CHECK-ALIGNED-RV64-NEXT: slli a6, a6, 16 +; CHECK-ALIGNED-RV64-NEXT: slli a7, a7, 24 +; CHECK-ALIGNED-RV64-NEXT: or a5, a7, a6 +; CHECK-ALIGNED-RV64-NEXT: or a4, a5, a4 +; CHECK-ALIGNED-RV64-NEXT: slli a4, a4, 32 +; CHECK-ALIGNED-RV64-NEXT: or a3, a4, a3 +; CHECK-ALIGNED-RV64-NEXT: xor a2, a2, a3 +; CHECK-ALIGNED-RV64-NEXT: lbu a3, 9(a0) +; CHECK-ALIGNED-RV64-NEXT: lbu a4, 8(a0) +; CHECK-ALIGNED-RV64-NEXT: lbu a5, 10(a0) +; CHECK-ALIGNED-RV64-NEXT: lbu a6, 11(a0) +; CHECK-ALIGNED-RV64-NEXT: slli a3, a3, 8 +; CHECK-ALIGNED-RV64-NEXT: or a3, a3, a4 +; CHECK-ALIGNED-RV64-NEXT: slli a5, a5, 16 +; CHECK-ALIGNED-RV64-NEXT: slli a6, a6, 24 +; CHECK-ALIGNED-RV64-NEXT: or a4, a6, a5 +; CHECK-ALIGNED-RV64-NEXT: or a3, a4, a3 +; CHECK-ALIGNED-RV64-NEXT: lbu a4, 13(a0) +; CHECK-ALIGNED-RV64-NEXT: lbu a5, 12(a0) +; CHECK-ALIGNED-RV64-NEXT: lbu a6, 14(a0) +; CHECK-ALIGNED-RV64-NEXT: lbu a7, 15(a0) +; CHECK-ALIGNED-RV64-NEXT: slli a4, a4, 8 +; CHECK-ALIGNED-RV64-NEXT: or a4, a4, a5 +; CHECK-ALIGNED-RV64-NEXT: slli a6, a6, 16 +; CHECK-ALIGNED-RV64-NEXT: slli a7, a7, 24 +; CHECK-ALIGNED-RV64-NEXT: or a5, a7, a6 +; CHECK-ALIGNED-RV64-NEXT: or a4, a5, a4 +; CHECK-ALIGNED-RV64-NEXT: slli a4, a4, 32 +; CHECK-ALIGNED-RV64-NEXT: or a3, a4, a3 +; CHECK-ALIGNED-RV64-NEXT: lbu a4, 9(a1) +; CHECK-ALIGNED-RV64-NEXT: lbu a5, 8(a1) +; CHECK-ALIGNED-RV64-NEXT: lbu a6, 10(a1) +; CHECK-ALIGNED-RV64-NEXT: lbu a7, 11(a1) +; CHECK-ALIGNED-RV64-NEXT: slli a4, a4, 8 +; CHECK-ALIGNED-RV64-NEXT: or a4, a4, a5 +; CHECK-ALIGNED-RV64-NEXT: slli a6, a6, 16 +; CHECK-ALIGNED-RV64-NEXT: slli a7, a7, 24 +; CHECK-ALIGNED-RV64-NEXT: or a5, a7, a6 +; CHECK-ALIGNED-RV64-NEXT: or a4, a5, a4 +; CHECK-ALIGNED-RV64-NEXT: lbu a5, 13(a1) +; CHECK-ALIGNED-RV64-NEXT: lbu a6, 12(a1) +; CHECK-ALIGNED-RV64-NEXT: lbu a7, 14(a1) +; CHECK-ALIGNED-RV64-NEXT: lbu t0, 15(a1) +; CHECK-ALIGNED-RV64-NEXT: slli a5, a5, 8 +; CHECK-ALIGNED-RV64-NEXT: or a5, a5, a6 +; CHECK-ALIGNED-RV64-NEXT: slli a7, a7, 16 +; CHECK-ALIGNED-RV64-NEXT: slli t0, t0, 24 +; CHECK-ALIGNED-RV64-NEXT: or a6, t0, a7 +; CHECK-ALIGNED-RV64-NEXT: or a5, a6, a5 +; CHECK-ALIGNED-RV64-NEXT: slli a5, a5, 32 +; CHECK-ALIGNED-RV64-NEXT: or a4, a5, a4 +; CHECK-ALIGNED-RV64-NEXT: xor a3, a3, a4 +; CHECK-ALIGNED-RV64-NEXT: lbu a4, 17(a0) +; CHECK-ALIGNED-RV64-NEXT: lbu a5, 16(a0) +; CHECK-ALIGNED-RV64-NEXT: lbu a6, 18(a0) +; CHECK-ALIGNED-RV64-NEXT: lbu a7, 19(a0) +; CHECK-ALIGNED-RV64-NEXT: slli a4, a4, 8 +; CHECK-ALIGNED-RV64-NEXT: or a4, a4, a5 +; CHECK-ALIGNED-RV64-NEXT: slli a6, a6, 16 +; CHECK-ALIGNED-RV64-NEXT: slli a7, a7, 24 +; CHECK-ALIGNED-RV64-NEXT: or a5, a7, a6 +; CHECK-ALIGNED-RV64-NEXT: or a4, a5, a4 +; CHECK-ALIGNED-RV64-NEXT: lbu a5, 21(a0) +; CHECK-ALIGNED-RV64-NEXT: lbu a6, 20(a0) +; CHECK-ALIGNED-RV64-NEXT: lbu a7, 22(a0) +; CHECK-ALIGNED-RV64-NEXT: lbu t0, 23(a0) +; CHECK-ALIGNED-RV64-NEXT: slli a5, a5, 8 +; CHECK-ALIGNED-RV64-NEXT: or a5, a5, a6 +; CHECK-ALIGNED-RV64-NEXT: slli a7, a7, 16 +; CHECK-ALIGNED-RV64-NEXT: slli t0, t0, 24 +; CHECK-ALIGNED-RV64-NEXT: or a6, t0, a7 +; CHECK-ALIGNED-RV64-NEXT: or a5, a6, a5 +; CHECK-ALIGNED-RV64-NEXT: slli a5, a5, 32 +; CHECK-ALIGNED-RV64-NEXT: or a4, a5, a4 +; CHECK-ALIGNED-RV64-NEXT: lbu a5, 17(a1) +; CHECK-ALIGNED-RV64-NEXT: lbu a6, 16(a1) +; CHECK-ALIGNED-RV64-NEXT: lbu a7, 18(a1) +; CHECK-ALIGNED-RV64-NEXT: lbu t0, 19(a1) +; CHECK-ALIGNED-RV64-NEXT: slli a5, a5, 8 +; CHECK-ALIGNED-RV64-NEXT: or a5, a5, a6 +; CHECK-ALIGNED-RV64-NEXT: slli a7, a7, 16 +; CHECK-ALIGNED-RV64-NEXT: slli t0, t0, 24 +; CHECK-ALIGNED-RV64-NEXT: or a6, t0, a7 +; CHECK-ALIGNED-RV64-NEXT: or a5, a6, a5 +; CHECK-ALIGNED-RV64-NEXT: lbu a6, 21(a1) +; CHECK-ALIGNED-RV64-NEXT: lbu a7, 20(a1) +; CHECK-ALIGNED-RV64-NEXT: lbu t0, 22(a1) +; CHECK-ALIGNED-RV64-NEXT: lbu t1, 23(a1) +; CHECK-ALIGNED-RV64-NEXT: slli a6, a6, 8 +; CHECK-ALIGNED-RV64-NEXT: or a6, a6, a7 +; CHECK-ALIGNED-RV64-NEXT: slli t0, t0, 16 +; CHECK-ALIGNED-RV64-NEXT: slli t1, t1, 24 +; CHECK-ALIGNED-RV64-NEXT: or a7, t1, t0 +; CHECK-ALIGNED-RV64-NEXT: or a6, a7, a6 +; CHECK-ALIGNED-RV64-NEXT: slli a6, a6, 32 +; CHECK-ALIGNED-RV64-NEXT: or a5, a6, a5 +; CHECK-ALIGNED-RV64-NEXT: xor a4, a4, a5 +; CHECK-ALIGNED-RV64-NEXT: lbu a5, 25(a0) +; CHECK-ALIGNED-RV64-NEXT: lbu a6, 24(a0) +; CHECK-ALIGNED-RV64-NEXT: lbu a7, 26(a0) +; CHECK-ALIGNED-RV64-NEXT: lbu t0, 27(a0) +; CHECK-ALIGNED-RV64-NEXT: slli a5, a5, 8 +; CHECK-ALIGNED-RV64-NEXT: or a5, a5, a6 +; CHECK-ALIGNED-RV64-NEXT: slli a7, a7, 16 +; CHECK-ALIGNED-RV64-NEXT: slli t0, t0, 24 +; CHECK-ALIGNED-RV64-NEXT: or a6, t0, a7 +; CHECK-ALIGNED-RV64-NEXT: or a5, a6, a5 +; CHECK-ALIGNED-RV64-NEXT: lbu a6, 25(a1) +; CHECK-ALIGNED-RV64-NEXT: lbu a7, 24(a1) +; CHECK-ALIGNED-RV64-NEXT: lbu t0, 26(a1) +; CHECK-ALIGNED-RV64-NEXT: lbu t1, 27(a1) +; CHECK-ALIGNED-RV64-NEXT: slli a6, a6, 8 +; CHECK-ALIGNED-RV64-NEXT: or a6, a6, a7 +; CHECK-ALIGNED-RV64-NEXT: slli t0, t0, 16 +; CHECK-ALIGNED-RV64-NEXT: slli t1, t1, 24 +; CHECK-ALIGNED-RV64-NEXT: or a7, t1, t0 +; CHECK-ALIGNED-RV64-NEXT: lbu t0, 29(a0) +; CHECK-ALIGNED-RV64-NEXT: lbu t1, 28(a0) +; CHECK-ALIGNED-RV64-NEXT: or a6, a7, a6 +; CHECK-ALIGNED-RV64-NEXT: xor a5, a5, a6 +; CHECK-ALIGNED-RV64-NEXT: slli t0, t0, 8 +; CHECK-ALIGNED-RV64-NEXT: or a6, t0, t1 +; CHECK-ALIGNED-RV64-NEXT: lbu a7, 29(a1) +; CHECK-ALIGNED-RV64-NEXT: lbu t0, 28(a1) +; CHECK-ALIGNED-RV64-NEXT: lbu a0, 30(a0) +; CHECK-ALIGNED-RV64-NEXT: lbu a1, 30(a1) +; CHECK-ALIGNED-RV64-NEXT: slli a7, a7, 8 +; CHECK-ALIGNED-RV64-NEXT: or a7, a7, t0 +; CHECK-ALIGNED-RV64-NEXT: xor a6, a6, a7 +; CHECK-ALIGNED-RV64-NEXT: xor a0, a0, a1 +; CHECK-ALIGNED-RV64-NEXT: or a2, a2, a3 +; CHECK-ALIGNED-RV64-NEXT: or a4, a4, a5 +; CHECK-ALIGNED-RV64-NEXT: or a0, a6, a0 +; CHECK-ALIGNED-RV64-NEXT: or a2, a2, a4 +; CHECK-ALIGNED-RV64-NEXT: or a0, a2, a0 +; CHECK-ALIGNED-RV64-NEXT: seqz a0, a0 +; CHECK-ALIGNED-RV64-NEXT: ret +; +; CHECK-UNALIGNED-RV32-LABEL: bcmp_size_31: +; CHECK-UNALIGNED-RV32: # %bb.0: # %entry +; CHECK-UNALIGNED-RV32-NEXT: lw a2, 0(a0) +; CHECK-UNALIGNED-RV32-NEXT: lw a3, 0(a1) +; CHECK-UNALIGNED-RV32-NEXT: lw a4, 4(a0) +; CHECK-UNALIGNED-RV32-NEXT: lw a5, 4(a1) +; CHECK-UNALIGNED-RV32-NEXT: lw a6, 8(a0) +; CHECK-UNALIGNED-RV32-NEXT: lw a7, 8(a1) +; CHECK-UNALIGNED-RV32-NEXT: lw t0, 12(a0) +; CHECK-UNALIGNED-RV32-NEXT: lw t1, 12(a1) +; CHECK-UNALIGNED-RV32-NEXT: xor a2, a2, a3 +; CHECK-UNALIGNED-RV32-NEXT: xor a4, a4, a5 +; CHECK-UNALIGNED-RV32-NEXT: xor a3, a6, a7 +; CHECK-UNALIGNED-RV32-NEXT: xor a5, t0, t1 +; CHECK-UNALIGNED-RV32-NEXT: lw a6, 16(a0) +; CHECK-UNALIGNED-RV32-NEXT: lw a7, 16(a1) +; CHECK-UNALIGNED-RV32-NEXT: lw t0, 20(a0) +; CHECK-UNALIGNED-RV32-NEXT: lw t1, 20(a1) +; CHECK-UNALIGNED-RV32-NEXT: lw t2, 24(a0) +; CHECK-UNALIGNED-RV32-NEXT: lw t3, 24(a1) +; CHECK-UNALIGNED-RV32-NEXT: lw a0, 27(a0) +; CHECK-UNALIGNED-RV32-NEXT: lw a1, 27(a1) +; CHECK-UNALIGNED-RV32-NEXT: xor a6, a6, a7 +; CHECK-UNALIGNED-RV32-NEXT: xor a7, t0, t1 +; CHECK-UNALIGNED-RV32-NEXT: xor t0, t2, t3 +; CHECK-UNALIGNED-RV32-NEXT: xor a0, a0, a1 +; CHECK-UNALIGNED-RV32-NEXT: or a2, a2, a4 +; CHECK-UNALIGNED-RV32-NEXT: or a3, a3, a5 +; CHECK-UNALIGNED-RV32-NEXT: or a1, a6, a7 +; CHECK-UNALIGNED-RV32-NEXT: or a0, t0, a0 +; CHECK-UNALIGNED-RV32-NEXT: or a2, a2, a3 +; CHECK-UNALIGNED-RV32-NEXT: or a0, a1, a0 +; CHECK-UNALIGNED-RV32-NEXT: or a0, a2, a0 +; CHECK-UNALIGNED-RV32-NEXT: seqz a0, a0 +; CHECK-UNALIGNED-RV32-NEXT: ret +; +; CHECK-UNALIGNED-RV64-LABEL: bcmp_size_31: +; CHECK-UNALIGNED-RV64: # %bb.0: # %entry +; CHECK-UNALIGNED-RV64-NEXT: ld a2, 0(a0) +; CHECK-UNALIGNED-RV64-NEXT: ld a3, 0(a1) +; CHECK-UNALIGNED-RV64-NEXT: ld a4, 8(a0) +; CHECK-UNALIGNED-RV64-NEXT: ld a5, 8(a1) +; CHECK-UNALIGNED-RV64-NEXT: ld a6, 16(a0) +; CHECK-UNALIGNED-RV64-NEXT: ld a7, 16(a1) +; CHECK-UNALIGNED-RV64-NEXT: ld a0, 23(a0) +; CHECK-UNALIGNED-RV64-NEXT: ld a1, 23(a1) +; CHECK-UNALIGNED-RV64-NEXT: xor a2, a2, a3 +; CHECK-UNALIGNED-RV64-NEXT: xor a4, a4, a5 +; CHECK-UNALIGNED-RV64-NEXT: xor a3, a6, a7 +; CHECK-UNALIGNED-RV64-NEXT: xor a0, a0, a1 +; CHECK-UNALIGNED-RV64-NEXT: or a2, a2, a4 +; CHECK-UNALIGNED-RV64-NEXT: or a0, a3, a0 +; CHECK-UNALIGNED-RV64-NEXT: or a0, a2, a0 +; CHECK-UNALIGNED-RV64-NEXT: seqz a0, a0 +; CHECK-UNALIGNED-RV64-NEXT: ret +entry: + %bcmp = call i32 @bcmp(i8* %s1, i8* %s2, iXLen 31) + %ret = icmp eq i32 %bcmp, 0 + ret i1 %ret +} + +define i1 @memcmp_size_15(i8* %s1, i8* %s2) { +; CHECK-ALIGNED-RV32-LABEL: memcmp_size_15: +; CHECK-ALIGNED-RV32: # %bb.0: # %entry +; CHECK-ALIGNED-RV32-NEXT: lbu a2, 1(a0) +; CHECK-ALIGNED-RV32-NEXT: lbu a3, 0(a0) +; CHECK-ALIGNED-RV32-NEXT: lbu a4, 2(a0) +; CHECK-ALIGNED-RV32-NEXT: lbu a5, 3(a0) +; CHECK-ALIGNED-RV32-NEXT: slli a2, a2, 8 +; CHECK-ALIGNED-RV32-NEXT: or a2, a2, a3 +; CHECK-ALIGNED-RV32-NEXT: slli a4, a4, 16 +; CHECK-ALIGNED-RV32-NEXT: slli a5, a5, 24 +; CHECK-ALIGNED-RV32-NEXT: or a4, a5, a4 +; CHECK-ALIGNED-RV32-NEXT: or a2, a4, a2 +; CHECK-ALIGNED-RV32-NEXT: lbu a3, 1(a1) +; CHECK-ALIGNED-RV32-NEXT: lbu a4, 0(a1) +; CHECK-ALIGNED-RV32-NEXT: lbu a5, 2(a1) +; CHECK-ALIGNED-RV32-NEXT: lbu a6, 3(a1) +; CHECK-ALIGNED-RV32-NEXT: slli a3, a3, 8 +; CHECK-ALIGNED-RV32-NEXT: or a3, a3, a4 +; CHECK-ALIGNED-RV32-NEXT: slli a5, a5, 16 +; CHECK-ALIGNED-RV32-NEXT: slli a6, a6, 24 +; CHECK-ALIGNED-RV32-NEXT: or a4, a6, a5 +; CHECK-ALIGNED-RV32-NEXT: or a3, a4, a3 +; CHECK-ALIGNED-RV32-NEXT: xor a2, a2, a3 +; CHECK-ALIGNED-RV32-NEXT: lbu a3, 5(a0) +; CHECK-ALIGNED-RV32-NEXT: lbu a4, 4(a0) +; CHECK-ALIGNED-RV32-NEXT: lbu a5, 6(a0) +; CHECK-ALIGNED-RV32-NEXT: lbu a6, 7(a0) +; CHECK-ALIGNED-RV32-NEXT: slli a3, a3, 8 +; CHECK-ALIGNED-RV32-NEXT: or a3, a3, a4 +; CHECK-ALIGNED-RV32-NEXT: slli a5, a5, 16 +; CHECK-ALIGNED-RV32-NEXT: slli a6, a6, 24 +; CHECK-ALIGNED-RV32-NEXT: or a4, a6, a5 +; CHECK-ALIGNED-RV32-NEXT: or a3, a4, a3 +; CHECK-ALIGNED-RV32-NEXT: lbu a4, 5(a1) +; CHECK-ALIGNED-RV32-NEXT: lbu a5, 4(a1) +; CHECK-ALIGNED-RV32-NEXT: lbu a6, 6(a1) +; CHECK-ALIGNED-RV32-NEXT: lbu a7, 7(a1) +; CHECK-ALIGNED-RV32-NEXT: slli a4, a4, 8 +; CHECK-ALIGNED-RV32-NEXT: or a4, a4, a5 +; CHECK-ALIGNED-RV32-NEXT: slli a6, a6, 16 +; CHECK-ALIGNED-RV32-NEXT: slli a7, a7, 24 +; CHECK-ALIGNED-RV32-NEXT: or a5, a7, a6 +; CHECK-ALIGNED-RV32-NEXT: or a4, a5, a4 +; CHECK-ALIGNED-RV32-NEXT: xor a3, a3, a4 +; CHECK-ALIGNED-RV32-NEXT: lbu a4, 9(a0) +; CHECK-ALIGNED-RV32-NEXT: lbu a5, 8(a0) +; CHECK-ALIGNED-RV32-NEXT: lbu a6, 10(a0) +; CHECK-ALIGNED-RV32-NEXT: lbu a7, 11(a0) +; CHECK-ALIGNED-RV32-NEXT: slli a4, a4, 8 +; CHECK-ALIGNED-RV32-NEXT: or a4, a4, a5 +; CHECK-ALIGNED-RV32-NEXT: slli a6, a6, 16 +; CHECK-ALIGNED-RV32-NEXT: slli a7, a7, 24 +; CHECK-ALIGNED-RV32-NEXT: or a5, a7, a6 +; CHECK-ALIGNED-RV32-NEXT: or a4, a5, a4 +; CHECK-ALIGNED-RV32-NEXT: lbu a5, 9(a1) +; CHECK-ALIGNED-RV32-NEXT: lbu a6, 8(a1) +; CHECK-ALIGNED-RV32-NEXT: lbu a7, 10(a1) +; CHECK-ALIGNED-RV32-NEXT: lbu t0, 11(a1) +; CHECK-ALIGNED-RV32-NEXT: slli a5, a5, 8 +; CHECK-ALIGNED-RV32-NEXT: or a5, a5, a6 +; CHECK-ALIGNED-RV32-NEXT: slli a7, a7, 16 +; CHECK-ALIGNED-RV32-NEXT: slli t0, t0, 24 +; CHECK-ALIGNED-RV32-NEXT: or a6, t0, a7 +; CHECK-ALIGNED-RV32-NEXT: lbu a7, 13(a0) +; CHECK-ALIGNED-RV32-NEXT: lbu t0, 12(a0) +; CHECK-ALIGNED-RV32-NEXT: or a5, a6, a5 +; CHECK-ALIGNED-RV32-NEXT: xor a4, a4, a5 +; CHECK-ALIGNED-RV32-NEXT: slli a7, a7, 8 +; CHECK-ALIGNED-RV32-NEXT: or a5, a7, t0 +; CHECK-ALIGNED-RV32-NEXT: lbu a6, 13(a1) +; CHECK-ALIGNED-RV32-NEXT: lbu a7, 12(a1) +; CHECK-ALIGNED-RV32-NEXT: lbu a0, 14(a0) +; CHECK-ALIGNED-RV32-NEXT: lbu a1, 14(a1) +; CHECK-ALIGNED-RV32-NEXT: slli a6, a6, 8 +; CHECK-ALIGNED-RV32-NEXT: or a6, a6, a7 +; CHECK-ALIGNED-RV32-NEXT: xor a5, a5, a6 +; CHECK-ALIGNED-RV32-NEXT: xor a0, a0, a1 +; CHECK-ALIGNED-RV32-NEXT: or a2, a2, a3 +; CHECK-ALIGNED-RV32-NEXT: or a4, a4, a5 +; CHECK-ALIGNED-RV32-NEXT: or a2, a2, a4 +; CHECK-ALIGNED-RV32-NEXT: or a0, a2, a0 +; CHECK-ALIGNED-RV32-NEXT: seqz a0, a0 +; CHECK-ALIGNED-RV32-NEXT: ret +; +; CHECK-ALIGNED-RV64-LABEL: memcmp_size_15: +; CHECK-ALIGNED-RV64: # %bb.0: # %entry +; CHECK-ALIGNED-RV64-NEXT: lbu a2, 1(a0) +; CHECK-ALIGNED-RV64-NEXT: lbu a3, 0(a0) +; CHECK-ALIGNED-RV64-NEXT: lbu a4, 2(a0) +; CHECK-ALIGNED-RV64-NEXT: lbu a5, 3(a0) +; CHECK-ALIGNED-RV64-NEXT: slli a2, a2, 8 +; CHECK-ALIGNED-RV64-NEXT: or a2, a2, a3 +; CHECK-ALIGNED-RV64-NEXT: slli a4, a4, 16 +; CHECK-ALIGNED-RV64-NEXT: slli a5, a5, 24 +; CHECK-ALIGNED-RV64-NEXT: or a4, a5, a4 +; CHECK-ALIGNED-RV64-NEXT: or a2, a4, a2 +; CHECK-ALIGNED-RV64-NEXT: lbu a3, 5(a0) +; CHECK-ALIGNED-RV64-NEXT: lbu a4, 4(a0) +; CHECK-ALIGNED-RV64-NEXT: lbu a5, 6(a0) +; CHECK-ALIGNED-RV64-NEXT: lbu a6, 7(a0) +; CHECK-ALIGNED-RV64-NEXT: slli a3, a3, 8 +; CHECK-ALIGNED-RV64-NEXT: or a3, a3, a4 +; CHECK-ALIGNED-RV64-NEXT: slli a5, a5, 16 +; CHECK-ALIGNED-RV64-NEXT: slli a6, a6, 24 +; CHECK-ALIGNED-RV64-NEXT: or a4, a6, a5 +; CHECK-ALIGNED-RV64-NEXT: or a3, a4, a3 +; CHECK-ALIGNED-RV64-NEXT: slli a3, a3, 32 +; CHECK-ALIGNED-RV64-NEXT: or a2, a3, a2 +; CHECK-ALIGNED-RV64-NEXT: lbu a3, 1(a1) +; CHECK-ALIGNED-RV64-NEXT: lbu a4, 0(a1) +; CHECK-ALIGNED-RV64-NEXT: lbu a5, 2(a1) +; CHECK-ALIGNED-RV64-NEXT: lbu a6, 3(a1) +; CHECK-ALIGNED-RV64-NEXT: slli a3, a3, 8 +; CHECK-ALIGNED-RV64-NEXT: or a3, a3, a4 +; CHECK-ALIGNED-RV64-NEXT: slli a5, a5, 16 +; CHECK-ALIGNED-RV64-NEXT: slli a6, a6, 24 +; CHECK-ALIGNED-RV64-NEXT: or a4, a6, a5 +; CHECK-ALIGNED-RV64-NEXT: or a3, a4, a3 +; CHECK-ALIGNED-RV64-NEXT: lbu a4, 5(a1) +; CHECK-ALIGNED-RV64-NEXT: lbu a5, 4(a1) +; CHECK-ALIGNED-RV64-NEXT: lbu a6, 6(a1) +; CHECK-ALIGNED-RV64-NEXT: lbu a7, 7(a1) +; CHECK-ALIGNED-RV64-NEXT: slli a4, a4, 8 +; CHECK-ALIGNED-RV64-NEXT: or a4, a4, a5 +; CHECK-ALIGNED-RV64-NEXT: slli a6, a6, 16 +; CHECK-ALIGNED-RV64-NEXT: slli a7, a7, 24 +; CHECK-ALIGNED-RV64-NEXT: or a5, a7, a6 +; CHECK-ALIGNED-RV64-NEXT: or a4, a5, a4 +; CHECK-ALIGNED-RV64-NEXT: slli a4, a4, 32 +; CHECK-ALIGNED-RV64-NEXT: or a3, a4, a3 +; CHECK-ALIGNED-RV64-NEXT: xor a2, a2, a3 +; CHECK-ALIGNED-RV64-NEXT: lbu a3, 9(a0) +; CHECK-ALIGNED-RV64-NEXT: lbu a4, 8(a0) +; CHECK-ALIGNED-RV64-NEXT: lbu a5, 10(a0) +; CHECK-ALIGNED-RV64-NEXT: lbu a6, 11(a0) +; CHECK-ALIGNED-RV64-NEXT: slli a3, a3, 8 +; CHECK-ALIGNED-RV64-NEXT: or a3, a3, a4 +; CHECK-ALIGNED-RV64-NEXT: slli a5, a5, 16 +; CHECK-ALIGNED-RV64-NEXT: slli a6, a6, 24 +; CHECK-ALIGNED-RV64-NEXT: or a4, a6, a5 +; CHECK-ALIGNED-RV64-NEXT: or a3, a4, a3 +; CHECK-ALIGNED-RV64-NEXT: lbu a4, 9(a1) +; CHECK-ALIGNED-RV64-NEXT: lbu a5, 8(a1) +; CHECK-ALIGNED-RV64-NEXT: lbu a6, 10(a1) +; CHECK-ALIGNED-RV64-NEXT: lbu a7, 11(a1) +; CHECK-ALIGNED-RV64-NEXT: slli a4, a4, 8 +; CHECK-ALIGNED-RV64-NEXT: or a4, a4, a5 +; CHECK-ALIGNED-RV64-NEXT: slli a6, a6, 16 +; CHECK-ALIGNED-RV64-NEXT: slli a7, a7, 24 +; CHECK-ALIGNED-RV64-NEXT: or a5, a7, a6 +; CHECK-ALIGNED-RV64-NEXT: lbu a6, 13(a0) +; CHECK-ALIGNED-RV64-NEXT: lbu a7, 12(a0) +; CHECK-ALIGNED-RV64-NEXT: or a4, a5, a4 +; CHECK-ALIGNED-RV64-NEXT: xor a3, a3, a4 +; CHECK-ALIGNED-RV64-NEXT: slli a6, a6, 8 +; CHECK-ALIGNED-RV64-NEXT: or a4, a6, a7 +; CHECK-ALIGNED-RV64-NEXT: lbu a5, 13(a1) +; CHECK-ALIGNED-RV64-NEXT: lbu a6, 12(a1) +; CHECK-ALIGNED-RV64-NEXT: lbu a0, 14(a0) +; CHECK-ALIGNED-RV64-NEXT: lbu a1, 14(a1) +; CHECK-ALIGNED-RV64-NEXT: slli a5, a5, 8 +; CHECK-ALIGNED-RV64-NEXT: or a5, a5, a6 +; CHECK-ALIGNED-RV64-NEXT: xor a4, a4, a5 +; CHECK-ALIGNED-RV64-NEXT: xor a0, a0, a1 +; CHECK-ALIGNED-RV64-NEXT: or a0, a4, a0 +; CHECK-ALIGNED-RV64-NEXT: or a0, a3, a0 +; CHECK-ALIGNED-RV64-NEXT: or a0, a2, a0 +; CHECK-ALIGNED-RV64-NEXT: seqz a0, a0 +; CHECK-ALIGNED-RV64-NEXT: ret +; +; CHECK-UNALIGNED-RV32-LABEL: memcmp_size_15: +; CHECK-UNALIGNED-RV32: # %bb.0: # %entry +; CHECK-UNALIGNED-RV32-NEXT: lw a2, 0(a0) +; CHECK-UNALIGNED-RV32-NEXT: lw a3, 0(a1) +; CHECK-UNALIGNED-RV32-NEXT: lw a4, 4(a0) +; CHECK-UNALIGNED-RV32-NEXT: lw a5, 4(a1) +; CHECK-UNALIGNED-RV32-NEXT: lw a6, 8(a0) +; CHECK-UNALIGNED-RV32-NEXT: lw a7, 8(a1) +; CHECK-UNALIGNED-RV32-NEXT: lw a0, 11(a0) +; CHECK-UNALIGNED-RV32-NEXT: lw a1, 11(a1) +; CHECK-UNALIGNED-RV32-NEXT: xor a2, a2, a3 +; CHECK-UNALIGNED-RV32-NEXT: xor a4, a4, a5 +; CHECK-UNALIGNED-RV32-NEXT: xor a3, a6, a7 +; CHECK-UNALIGNED-RV32-NEXT: xor a0, a0, a1 +; CHECK-UNALIGNED-RV32-NEXT: or a2, a2, a4 +; CHECK-UNALIGNED-RV32-NEXT: or a0, a3, a0 +; CHECK-UNALIGNED-RV32-NEXT: or a0, a2, a0 +; CHECK-UNALIGNED-RV32-NEXT: seqz a0, a0 +; CHECK-UNALIGNED-RV32-NEXT: ret +; +; CHECK-UNALIGNED-RV64-LABEL: memcmp_size_15: +; CHECK-UNALIGNED-RV64: # %bb.0: # %entry +; CHECK-UNALIGNED-RV64-NEXT: ld a2, 0(a0) +; CHECK-UNALIGNED-RV64-NEXT: ld a3, 0(a1) +; CHECK-UNALIGNED-RV64-NEXT: ld a0, 7(a0) +; CHECK-UNALIGNED-RV64-NEXT: ld a1, 7(a1) +; CHECK-UNALIGNED-RV64-NEXT: xor a2, a2, a3 +; CHECK-UNALIGNED-RV64-NEXT: xor a0, a0, a1 +; CHECK-UNALIGNED-RV64-NEXT: or a0, a2, a0 +; CHECK-UNALIGNED-RV64-NEXT: seqz a0, a0 +; CHECK-UNALIGNED-RV64-NEXT: ret +entry: + %memcmp = call i32 @memcmp(i8* %s1, i8* %s2, iXLen 15) + %ret = icmp eq i32 %memcmp, 0 + ret i1 %ret +} + +define i1 @memcmp_size_31(i8* %s1, i8* %s2) { +; CHECK-ALIGNED-RV32-LABEL: memcmp_size_31: +; CHECK-ALIGNED-RV32: # %bb.0: # %entry +; CHECK-ALIGNED-RV32-NEXT: addi sp, sp, -16 +; CHECK-ALIGNED-RV32-NEXT: .cfi_def_cfa_offset 16 +; CHECK-ALIGNED-RV32-NEXT: sw ra, 12(sp) # 4-byte Folded Spill +; CHECK-ALIGNED-RV32-NEXT: .cfi_offset ra, -4 +; CHECK-ALIGNED-RV32-NEXT: li a2, 31 +; CHECK-ALIGNED-RV32-NEXT: call memcmp +; CHECK-ALIGNED-RV32-NEXT: seqz a0, a0 +; CHECK-ALIGNED-RV32-NEXT: lw ra, 12(sp) # 4-byte Folded Reload +; CHECK-ALIGNED-RV32-NEXT: addi sp, sp, 16 +; CHECK-ALIGNED-RV32-NEXT: ret +; +; CHECK-ALIGNED-RV64-LABEL: memcmp_size_31: +; CHECK-ALIGNED-RV64: # %bb.0: # %entry +; CHECK-ALIGNED-RV64-NEXT: lbu a2, 1(a0) +; CHECK-ALIGNED-RV64-NEXT: lbu a3, 0(a0) +; CHECK-ALIGNED-RV64-NEXT: lbu a4, 2(a0) +; CHECK-ALIGNED-RV64-NEXT: lbu a5, 3(a0) +; CHECK-ALIGNED-RV64-NEXT: slli a2, a2, 8 +; CHECK-ALIGNED-RV64-NEXT: or a2, a2, a3 +; CHECK-ALIGNED-RV64-NEXT: slli a4, a4, 16 +; CHECK-ALIGNED-RV64-NEXT: slli a5, a5, 24 +; CHECK-ALIGNED-RV64-NEXT: or a4, a5, a4 +; CHECK-ALIGNED-RV64-NEXT: or a2, a4, a2 +; CHECK-ALIGNED-RV64-NEXT: lbu a3, 5(a0) +; CHECK-ALIGNED-RV64-NEXT: lbu a4, 4(a0) +; CHECK-ALIGNED-RV64-NEXT: lbu a5, 6(a0) +; CHECK-ALIGNED-RV64-NEXT: lbu a6, 7(a0) +; CHECK-ALIGNED-RV64-NEXT: slli a3, a3, 8 +; CHECK-ALIGNED-RV64-NEXT: or a3, a3, a4 +; CHECK-ALIGNED-RV64-NEXT: slli a5, a5, 16 +; CHECK-ALIGNED-RV64-NEXT: slli a6, a6, 24 +; CHECK-ALIGNED-RV64-NEXT: or a4, a6, a5 +; CHECK-ALIGNED-RV64-NEXT: or a3, a4, a3 +; CHECK-ALIGNED-RV64-NEXT: slli a3, a3, 32 +; CHECK-ALIGNED-RV64-NEXT: or a2, a3, a2 +; CHECK-ALIGNED-RV64-NEXT: lbu a3, 1(a1) +; CHECK-ALIGNED-RV64-NEXT: lbu a4, 0(a1) +; CHECK-ALIGNED-RV64-NEXT: lbu a5, 2(a1) +; CHECK-ALIGNED-RV64-NEXT: lbu a6, 3(a1) +; CHECK-ALIGNED-RV64-NEXT: slli a3, a3, 8 +; CHECK-ALIGNED-RV64-NEXT: or a3, a3, a4 +; CHECK-ALIGNED-RV64-NEXT: slli a5, a5, 16 +; CHECK-ALIGNED-RV64-NEXT: slli a6, a6, 24 +; CHECK-ALIGNED-RV64-NEXT: or a4, a6, a5 +; CHECK-ALIGNED-RV64-NEXT: or a3, a4, a3 +; CHECK-ALIGNED-RV64-NEXT: lbu a4, 5(a1) +; CHECK-ALIGNED-RV64-NEXT: lbu a5, 4(a1) +; CHECK-ALIGNED-RV64-NEXT: lbu a6, 6(a1) +; CHECK-ALIGNED-RV64-NEXT: lbu a7, 7(a1) +; CHECK-ALIGNED-RV64-NEXT: slli a4, a4, 8 +; CHECK-ALIGNED-RV64-NEXT: or a4, a4, a5 +; CHECK-ALIGNED-RV64-NEXT: slli a6, a6, 16 +; CHECK-ALIGNED-RV64-NEXT: slli a7, a7, 24 +; CHECK-ALIGNED-RV64-NEXT: or a5, a7, a6 +; CHECK-ALIGNED-RV64-NEXT: or a4, a5, a4 +; CHECK-ALIGNED-RV64-NEXT: slli a4, a4, 32 +; CHECK-ALIGNED-RV64-NEXT: or a3, a4, a3 +; CHECK-ALIGNED-RV64-NEXT: xor a2, a2, a3 +; CHECK-ALIGNED-RV64-NEXT: lbu a3, 9(a0) +; CHECK-ALIGNED-RV64-NEXT: lbu a4, 8(a0) +; CHECK-ALIGNED-RV64-NEXT: lbu a5, 10(a0) +; CHECK-ALIGNED-RV64-NEXT: lbu a6, 11(a0) +; CHECK-ALIGNED-RV64-NEXT: slli a3, a3, 8 +; CHECK-ALIGNED-RV64-NEXT: or a3, a3, a4 +; CHECK-ALIGNED-RV64-NEXT: slli a5, a5, 16 +; CHECK-ALIGNED-RV64-NEXT: slli a6, a6, 24 +; CHECK-ALIGNED-RV64-NEXT: or a4, a6, a5 +; CHECK-ALIGNED-RV64-NEXT: or a3, a4, a3 +; CHECK-ALIGNED-RV64-NEXT: lbu a4, 13(a0) +; CHECK-ALIGNED-RV64-NEXT: lbu a5, 12(a0) +; CHECK-ALIGNED-RV64-NEXT: lbu a6, 14(a0) +; CHECK-ALIGNED-RV64-NEXT: lbu a7, 15(a0) +; CHECK-ALIGNED-RV64-NEXT: slli a4, a4, 8 +; CHECK-ALIGNED-RV64-NEXT: or a4, a4, a5 +; CHECK-ALIGNED-RV64-NEXT: slli a6, a6, 16 +; CHECK-ALIGNED-RV64-NEXT: slli a7, a7, 24 +; CHECK-ALIGNED-RV64-NEXT: or a5, a7, a6 +; CHECK-ALIGNED-RV64-NEXT: or a4, a5, a4 +; CHECK-ALIGNED-RV64-NEXT: slli a4, a4, 32 +; CHECK-ALIGNED-RV64-NEXT: or a3, a4, a3 +; CHECK-ALIGNED-RV64-NEXT: lbu a4, 9(a1) +; CHECK-ALIGNED-RV64-NEXT: lbu a5, 8(a1) +; CHECK-ALIGNED-RV64-NEXT: lbu a6, 10(a1) +; CHECK-ALIGNED-RV64-NEXT: lbu a7, 11(a1) +; CHECK-ALIGNED-RV64-NEXT: slli a4, a4, 8 +; CHECK-ALIGNED-RV64-NEXT: or a4, a4, a5 +; CHECK-ALIGNED-RV64-NEXT: slli a6, a6, 16 +; CHECK-ALIGNED-RV64-NEXT: slli a7, a7, 24 +; CHECK-ALIGNED-RV64-NEXT: or a5, a7, a6 +; CHECK-ALIGNED-RV64-NEXT: or a4, a5, a4 +; CHECK-ALIGNED-RV64-NEXT: lbu a5, 13(a1) +; CHECK-ALIGNED-RV64-NEXT: lbu a6, 12(a1) +; CHECK-ALIGNED-RV64-NEXT: lbu a7, 14(a1) +; CHECK-ALIGNED-RV64-NEXT: lbu t0, 15(a1) +; CHECK-ALIGNED-RV64-NEXT: slli a5, a5, 8 +; CHECK-ALIGNED-RV64-NEXT: or a5, a5, a6 +; CHECK-ALIGNED-RV64-NEXT: slli a7, a7, 16 +; CHECK-ALIGNED-RV64-NEXT: slli t0, t0, 24 +; CHECK-ALIGNED-RV64-NEXT: or a6, t0, a7 +; CHECK-ALIGNED-RV64-NEXT: or a5, a6, a5 +; CHECK-ALIGNED-RV64-NEXT: slli a5, a5, 32 +; CHECK-ALIGNED-RV64-NEXT: or a4, a5, a4 +; CHECK-ALIGNED-RV64-NEXT: xor a3, a3, a4 +; CHECK-ALIGNED-RV64-NEXT: lbu a4, 17(a0) +; CHECK-ALIGNED-RV64-NEXT: lbu a5, 16(a0) +; CHECK-ALIGNED-RV64-NEXT: lbu a6, 18(a0) +; CHECK-ALIGNED-RV64-NEXT: lbu a7, 19(a0) +; CHECK-ALIGNED-RV64-NEXT: slli a4, a4, 8 +; CHECK-ALIGNED-RV64-NEXT: or a4, a4, a5 +; CHECK-ALIGNED-RV64-NEXT: slli a6, a6, 16 +; CHECK-ALIGNED-RV64-NEXT: slli a7, a7, 24 +; CHECK-ALIGNED-RV64-NEXT: or a5, a7, a6 +; CHECK-ALIGNED-RV64-NEXT: or a4, a5, a4 +; CHECK-ALIGNED-RV64-NEXT: lbu a5, 21(a0) +; CHECK-ALIGNED-RV64-NEXT: lbu a6, 20(a0) +; CHECK-ALIGNED-RV64-NEXT: lbu a7, 22(a0) +; CHECK-ALIGNED-RV64-NEXT: lbu t0, 23(a0) +; CHECK-ALIGNED-RV64-NEXT: slli a5, a5, 8 +; CHECK-ALIGNED-RV64-NEXT: or a5, a5, a6 +; CHECK-ALIGNED-RV64-NEXT: slli a7, a7, 16 +; CHECK-ALIGNED-RV64-NEXT: slli t0, t0, 24 +; CHECK-ALIGNED-RV64-NEXT: or a6, t0, a7 +; CHECK-ALIGNED-RV64-NEXT: or a5, a6, a5 +; CHECK-ALIGNED-RV64-NEXT: slli a5, a5, 32 +; CHECK-ALIGNED-RV64-NEXT: or a4, a5, a4 +; CHECK-ALIGNED-RV64-NEXT: lbu a5, 17(a1) +; CHECK-ALIGNED-RV64-NEXT: lbu a6, 16(a1) +; CHECK-ALIGNED-RV64-NEXT: lbu a7, 18(a1) +; CHECK-ALIGNED-RV64-NEXT: lbu t0, 19(a1) +; CHECK-ALIGNED-RV64-NEXT: slli a5, a5, 8 +; CHECK-ALIGNED-RV64-NEXT: or a5, a5, a6 +; CHECK-ALIGNED-RV64-NEXT: slli a7, a7, 16 +; CHECK-ALIGNED-RV64-NEXT: slli t0, t0, 24 +; CHECK-ALIGNED-RV64-NEXT: or a6, t0, a7 +; CHECK-ALIGNED-RV64-NEXT: or a5, a6, a5 +; CHECK-ALIGNED-RV64-NEXT: lbu a6, 21(a1) +; CHECK-ALIGNED-RV64-NEXT: lbu a7, 20(a1) +; CHECK-ALIGNED-RV64-NEXT: lbu t0, 22(a1) +; CHECK-ALIGNED-RV64-NEXT: lbu t1, 23(a1) +; CHECK-ALIGNED-RV64-NEXT: slli a6, a6, 8 +; CHECK-ALIGNED-RV64-NEXT: or a6, a6, a7 +; CHECK-ALIGNED-RV64-NEXT: slli t0, t0, 16 +; CHECK-ALIGNED-RV64-NEXT: slli t1, t1, 24 +; CHECK-ALIGNED-RV64-NEXT: or a7, t1, t0 +; CHECK-ALIGNED-RV64-NEXT: or a6, a7, a6 +; CHECK-ALIGNED-RV64-NEXT: slli a6, a6, 32 +; CHECK-ALIGNED-RV64-NEXT: or a5, a6, a5 +; CHECK-ALIGNED-RV64-NEXT: xor a4, a4, a5 +; CHECK-ALIGNED-RV64-NEXT: lbu a5, 25(a0) +; CHECK-ALIGNED-RV64-NEXT: lbu a6, 24(a0) +; CHECK-ALIGNED-RV64-NEXT: lbu a7, 26(a0) +; CHECK-ALIGNED-RV64-NEXT: lbu t0, 27(a0) +; CHECK-ALIGNED-RV64-NEXT: slli a5, a5, 8 +; CHECK-ALIGNED-RV64-NEXT: or a5, a5, a6 +; CHECK-ALIGNED-RV64-NEXT: slli a7, a7, 16 +; CHECK-ALIGNED-RV64-NEXT: slli t0, t0, 24 +; CHECK-ALIGNED-RV64-NEXT: or a6, t0, a7 +; CHECK-ALIGNED-RV64-NEXT: or a5, a6, a5 +; CHECK-ALIGNED-RV64-NEXT: lbu a6, 25(a1) +; CHECK-ALIGNED-RV64-NEXT: lbu a7, 24(a1) +; CHECK-ALIGNED-RV64-NEXT: lbu t0, 26(a1) +; CHECK-ALIGNED-RV64-NEXT: lbu t1, 27(a1) +; CHECK-ALIGNED-RV64-NEXT: slli a6, a6, 8 +; CHECK-ALIGNED-RV64-NEXT: or a6, a6, a7 +; CHECK-ALIGNED-RV64-NEXT: slli t0, t0, 16 +; CHECK-ALIGNED-RV64-NEXT: slli t1, t1, 24 +; CHECK-ALIGNED-RV64-NEXT: or a7, t1, t0 +; CHECK-ALIGNED-RV64-NEXT: lbu t0, 29(a0) +; CHECK-ALIGNED-RV64-NEXT: lbu t1, 28(a0) +; CHECK-ALIGNED-RV64-NEXT: or a6, a7, a6 +; CHECK-ALIGNED-RV64-NEXT: xor a5, a5, a6 +; CHECK-ALIGNED-RV64-NEXT: slli t0, t0, 8 +; CHECK-ALIGNED-RV64-NEXT: or a6, t0, t1 +; CHECK-ALIGNED-RV64-NEXT: lbu a7, 29(a1) +; CHECK-ALIGNED-RV64-NEXT: lbu t0, 28(a1) +; CHECK-ALIGNED-RV64-NEXT: lbu a0, 30(a0) +; CHECK-ALIGNED-RV64-NEXT: lbu a1, 30(a1) +; CHECK-ALIGNED-RV64-NEXT: slli a7, a7, 8 +; CHECK-ALIGNED-RV64-NEXT: or a7, a7, t0 +; CHECK-ALIGNED-RV64-NEXT: xor a6, a6, a7 +; CHECK-ALIGNED-RV64-NEXT: xor a0, a0, a1 +; CHECK-ALIGNED-RV64-NEXT: or a2, a2, a3 +; CHECK-ALIGNED-RV64-NEXT: or a4, a4, a5 +; CHECK-ALIGNED-RV64-NEXT: or a0, a6, a0 +; CHECK-ALIGNED-RV64-NEXT: or a2, a2, a4 +; CHECK-ALIGNED-RV64-NEXT: or a0, a2, a0 +; CHECK-ALIGNED-RV64-NEXT: seqz a0, a0 +; CHECK-ALIGNED-RV64-NEXT: ret +; +; CHECK-UNALIGNED-RV32-LABEL: memcmp_size_31: +; CHECK-UNALIGNED-RV32: # %bb.0: # %entry +; CHECK-UNALIGNED-RV32-NEXT: lw a2, 0(a0) +; CHECK-UNALIGNED-RV32-NEXT: lw a3, 0(a1) +; CHECK-UNALIGNED-RV32-NEXT: lw a4, 4(a0) +; CHECK-UNALIGNED-RV32-NEXT: lw a5, 4(a1) +; CHECK-UNALIGNED-RV32-NEXT: lw a6, 8(a0) +; CHECK-UNALIGNED-RV32-NEXT: lw a7, 8(a1) +; CHECK-UNALIGNED-RV32-NEXT: lw t0, 12(a0) +; CHECK-UNALIGNED-RV32-NEXT: lw t1, 12(a1) +; CHECK-UNALIGNED-RV32-NEXT: xor a2, a2, a3 +; CHECK-UNALIGNED-RV32-NEXT: xor a4, a4, a5 +; CHECK-UNALIGNED-RV32-NEXT: xor a3, a6, a7 +; CHECK-UNALIGNED-RV32-NEXT: xor a5, t0, t1 +; CHECK-UNALIGNED-RV32-NEXT: lw a6, 16(a0) +; CHECK-UNALIGNED-RV32-NEXT: lw a7, 16(a1) +; CHECK-UNALIGNED-RV32-NEXT: lw t0, 20(a0) +; CHECK-UNALIGNED-RV32-NEXT: lw t1, 20(a1) +; CHECK-UNALIGNED-RV32-NEXT: lw t2, 24(a0) +; CHECK-UNALIGNED-RV32-NEXT: lw t3, 24(a1) +; CHECK-UNALIGNED-RV32-NEXT: lw a0, 27(a0) +; CHECK-UNALIGNED-RV32-NEXT: lw a1, 27(a1) +; CHECK-UNALIGNED-RV32-NEXT: xor a6, a6, a7 +; CHECK-UNALIGNED-RV32-NEXT: xor a7, t0, t1 +; CHECK-UNALIGNED-RV32-NEXT: xor t0, t2, t3 +; CHECK-UNALIGNED-RV32-NEXT: xor a0, a0, a1 +; CHECK-UNALIGNED-RV32-NEXT: or a2, a2, a4 +; CHECK-UNALIGNED-RV32-NEXT: or a3, a3, a5 +; CHECK-UNALIGNED-RV32-NEXT: or a1, a6, a7 +; CHECK-UNALIGNED-RV32-NEXT: or a0, t0, a0 +; CHECK-UNALIGNED-RV32-NEXT: or a2, a2, a3 +; CHECK-UNALIGNED-RV32-NEXT: or a0, a1, a0 +; CHECK-UNALIGNED-RV32-NEXT: or a0, a2, a0 +; CHECK-UNALIGNED-RV32-NEXT: seqz a0, a0 +; CHECK-UNALIGNED-RV32-NEXT: ret +; +; CHECK-UNALIGNED-RV64-LABEL: memcmp_size_31: +; CHECK-UNALIGNED-RV64: # %bb.0: # %entry +; CHECK-UNALIGNED-RV64-NEXT: ld a2, 0(a0) +; CHECK-UNALIGNED-RV64-NEXT: ld a3, 0(a1) +; CHECK-UNALIGNED-RV64-NEXT: ld a4, 8(a0) +; CHECK-UNALIGNED-RV64-NEXT: ld a5, 8(a1) +; CHECK-UNALIGNED-RV64-NEXT: ld a6, 16(a0) +; CHECK-UNALIGNED-RV64-NEXT: ld a7, 16(a1) +; CHECK-UNALIGNED-RV64-NEXT: ld a0, 23(a0) +; CHECK-UNALIGNED-RV64-NEXT: ld a1, 23(a1) +; CHECK-UNALIGNED-RV64-NEXT: xor a2, a2, a3 +; CHECK-UNALIGNED-RV64-NEXT: xor a4, a4, a5 +; CHECK-UNALIGNED-RV64-NEXT: xor a3, a6, a7 +; CHECK-UNALIGNED-RV64-NEXT: xor a0, a0, a1 +; CHECK-UNALIGNED-RV64-NEXT: or a2, a2, a4 +; CHECK-UNALIGNED-RV64-NEXT: or a0, a3, a0 +; CHECK-UNALIGNED-RV64-NEXT: or a0, a2, a0 +; CHECK-UNALIGNED-RV64-NEXT: seqz a0, a0 +; CHECK-UNALIGNED-RV64-NEXT: ret +entry: + %memcmp = call i32 @memcmp(i8* %s1, i8* %s2, iXLen 31) + %ret = icmp eq i32 %memcmp, 0 + ret i1 %ret +} >From 2caea13ab7795c32476a7102028b9ac0a3ebf9b6 Mon Sep 17 00:00:00 2001 From: Wang Pengcheng <wangpengcheng...@bytedance.com> Date: Fri, 6 Sep 2024 18:07:49 +0800 Subject: [PATCH 2/5] Fix copt-paste mistake Created using spr 1.3.6-beta.1 --- llvm/lib/Target/RISCV/RISCVTargetTransformInfo.cpp | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/llvm/lib/Target/RISCV/RISCVTargetTransformInfo.cpp b/llvm/lib/Target/RISCV/RISCVTargetTransformInfo.cpp index ad532aadc83266..ae5ebf16dc6da0 100644 --- a/llvm/lib/Target/RISCV/RISCVTargetTransformInfo.cpp +++ b/llvm/lib/Target/RISCV/RISCVTargetTransformInfo.cpp @@ -2119,7 +2119,7 @@ RISCVTTIImpl::enableMemCmpExpansion(bool OptSize, bool IsZeroCmp) const { TTI::MemCmpExpansionOptions Options; // FIXME: Vector haven't been tested. Options.AllowOverlappingLoads = - (ST->enableUnalignedScalarMem() || ST->enableUnalignedScalarMem()); + (ST->enableUnalignedScalarMem() || ST->enableUnalignedVectorMem()); Options.MaxNumLoads = TLI->getMaxExpandSizeMemcmp(OptSize); Options.NumLoadsPerBlock = Options.MaxNumLoads; if (ST->is64Bit()) >From a96e1aaf9a4cbe8e8dd09f4f4b1260b5c63541df Mon Sep 17 00:00:00 2001 From: Wang Pengcheng <wangpengcheng...@bytedance.com> Date: Mon, 9 Sep 2024 21:44:22 +0800 Subject: [PATCH 3/5] Don't add 5/6 to AllowedTailExpansions for RV32 Created using spr 1.3.6-beta.1 --- llvm/lib/Target/RISCV/RISCVTargetTransformInfo.cpp | 4 +++- 1 file changed, 3 insertions(+), 1 deletion(-) diff --git a/llvm/lib/Target/RISCV/RISCVTargetTransformInfo.cpp b/llvm/lib/Target/RISCV/RISCVTargetTransformInfo.cpp index ae5ebf16dc6da0..f2fe52bb939e07 100644 --- a/llvm/lib/Target/RISCV/RISCVTargetTransformInfo.cpp +++ b/llvm/lib/Target/RISCV/RISCVTargetTransformInfo.cpp @@ -2125,6 +2125,8 @@ RISCVTTIImpl::enableMemCmpExpansion(bool OptSize, bool IsZeroCmp) const { if (ST->is64Bit()) Options.LoadSizes.push_back(8); llvm::append_range(Options.LoadSizes, ArrayRef({4, 2, 1})); - Options.AllowedTailExpansions = {3, 5, 6}; + Options.AllowedTailExpansions = {3}; + if (ST->is64Bit()) + llvm::append_range(Options.AllowedTailExpansions, ArrayRef{5, 6}); return Options; } >From 86b823d7f70d847b140af48cd1c0c7a461897d77 Mon Sep 17 00:00:00 2001 From: Wang Pengcheng <wangpengcheng...@bytedance.com> Date: Mon, 9 Sep 2024 21:52:13 +0800 Subject: [PATCH 4/5] Remove AllowedTailExpansions Created using spr 1.3.6-beta.1 --- llvm/lib/Target/RISCV/RISCVTargetTransformInfo.cpp | 3 --- 1 file changed, 3 deletions(-) diff --git a/llvm/lib/Target/RISCV/RISCVTargetTransformInfo.cpp b/llvm/lib/Target/RISCV/RISCVTargetTransformInfo.cpp index f2fe52bb939e07..2ec4483f072d5a 100644 --- a/llvm/lib/Target/RISCV/RISCVTargetTransformInfo.cpp +++ b/llvm/lib/Target/RISCV/RISCVTargetTransformInfo.cpp @@ -2125,8 +2125,5 @@ RISCVTTIImpl::enableMemCmpExpansion(bool OptSize, bool IsZeroCmp) const { if (ST->is64Bit()) Options.LoadSizes.push_back(8); llvm::append_range(Options.LoadSizes, ArrayRef({4, 2, 1})); - Options.AllowedTailExpansions = {3}; - if (ST->is64Bit()) - llvm::append_range(Options.AllowedTailExpansions, ArrayRef{5, 6}); return Options; } >From e709f8d43874d8370ec39a29e8e05e6a4f612da0 Mon Sep 17 00:00:00 2001 From: Wang Pengcheng <wangpengcheng...@bytedance.com> Date: Fri, 13 Sep 2024 13:13:09 +0800 Subject: [PATCH 5/5] Explicitly set the LoadSizes Created using spr 1.3.6-beta.1 --- llvm/lib/Target/RISCV/RISCVTargetTransformInfo.cpp | 5 +++-- 1 file changed, 3 insertions(+), 2 deletions(-) diff --git a/llvm/lib/Target/RISCV/RISCVTargetTransformInfo.cpp b/llvm/lib/Target/RISCV/RISCVTargetTransformInfo.cpp index 2ec4483f072d5a..9bc262c8a06a33 100644 --- a/llvm/lib/Target/RISCV/RISCVTargetTransformInfo.cpp +++ b/llvm/lib/Target/RISCV/RISCVTargetTransformInfo.cpp @@ -2123,7 +2123,8 @@ RISCVTTIImpl::enableMemCmpExpansion(bool OptSize, bool IsZeroCmp) const { Options.MaxNumLoads = TLI->getMaxExpandSizeMemcmp(OptSize); Options.NumLoadsPerBlock = Options.MaxNumLoads; if (ST->is64Bit()) - Options.LoadSizes.push_back(8); - llvm::append_range(Options.LoadSizes, ArrayRef({4, 2, 1})); + Options.LoadSizes = {8, 4, 2, 1}; + else + Options.LoadSizes = {4, 2, 1}; return Options; } _______________________________________________ llvm-branch-commits mailing list llvm-branch-commits@lists.llvm.org https://lists.llvm.org/cgi-bin/mailman/listinfo/llvm-branch-commits