https://github.com/davemgreen updated https://github.com/llvm/llvm-project/pull/98196
>From b46d892a43b034dd515987f37441fc842710dc62 Mon Sep 17 00:00:00 2001 From: Haowei Wu <hao...@google.com> Date: Wed, 31 Jul 2024 12:12:26 +0100 Subject: [PATCH] Revert "[C++20] [Modules] Always emit the inline builtins" This reverts commit ca8a4111f796fe8533e0af95557875b15becff06. This patch broke clang test on Windows x64 msvc --- clang/lib/CodeGen/CodeGenModule.cpp | 10 +- clang/test/Modules/inline-builtins.cppm | 34 --- .../Target/AArch64/AArch64FrameLowering.cpp | 3 +- llvm/lib/Target/AArch64/AArch64InstrInfo.cpp | 257 ++++++++++++++++-- llvm/lib/Target/AArch64/AArch64InstrInfo.h | 5 +- .../CodeGen/AArch64/sched-postidxalias.mir | 63 +++++ 6 files changed, 314 insertions(+), 58 deletions(-) delete mode 100644 clang/test/Modules/inline-builtins.cppm create mode 100644 llvm/test/CodeGen/AArch64/sched-postidxalias.mir diff --git a/clang/lib/CodeGen/CodeGenModule.cpp b/clang/lib/CodeGen/CodeGenModule.cpp index 5a575c535b505..344a0e538f22a 100644 --- a/clang/lib/CodeGen/CodeGenModule.cpp +++ b/clang/lib/CodeGen/CodeGenModule.cpp @@ -4022,11 +4022,6 @@ bool CodeGenModule::shouldEmitFunction(GlobalDecl GD) { return true; const auto *F = cast<FunctionDecl>(GD.getDecl()); - // Inline builtins declaration must be emitted. They often are fortified - // functions. - if (F->isInlineBuiltinDeclaration()) - return true; - if (CodeGenOpts.OptimizationLevel == 0 && !F->hasAttr<AlwaysInlineAttr>()) return false; @@ -4072,6 +4067,11 @@ bool CodeGenModule::shouldEmitFunction(GlobalDecl GD) { } } + // Inline builtins declaration must be emitted. They often are fortified + // functions. + if (F->isInlineBuiltinDeclaration()) + return true; + // PR9614. Avoid cases where the source code is lying to us. An available // externally function should have an equivalent function somewhere else, // but a function that calls itself through asm label/`__builtin_` trickery is diff --git a/clang/test/Modules/inline-builtins.cppm b/clang/test/Modules/inline-builtins.cppm deleted file mode 100644 index 8f41ae2659513..0000000000000 --- a/clang/test/Modules/inline-builtins.cppm +++ /dev/null @@ -1,34 +0,0 @@ -// RUN: rm -rf %t -// RUN: split-file %s %t -// RUN: cd %t -// -// RUN: %clang_cc1 -std=c++20 -O3 %t/a.cppm -emit-module-interface -o %t/a.pcm -// RUN: %clang_cc1 -std=c++20 -O3 %t/test.cc -fmodule-file=a=%t/a.pcm \ -// RUN: -emit-llvm -o - | FileCheck %t/test.cc - -//--- memmove.h -typedef long unsigned int size_t; -extern "C" void *memmove (void *__dest, const void *__src, size_t __n) - throw () __attribute__ ((__nonnull__ (1, 2))); -extern "C" __inline __attribute__ ((__always_inline__)) __attribute__ ((__gnu_inline__)) void * - memmove (void *__dest, const void *__src, size_t __len) throw () -{ - return __builtin_memmove(__dest, __src, __len); -} - -//--- a.cppm -module; -#include "memmove.h" -export module a; -export using ::memmove; - -//--- test.cc -import a; - -void test() { - int a, b; - unsigned c = 0; - memmove(&a, &b, c); -} - -// CHECK-NOT: memmove diff --git a/llvm/lib/Target/AArch64/AArch64FrameLowering.cpp b/llvm/lib/Target/AArch64/AArch64FrameLowering.cpp index bd530903bb664..f28511c74dcd6 100644 --- a/llvm/lib/Target/AArch64/AArch64FrameLowering.cpp +++ b/llvm/lib/Target/AArch64/AArch64FrameLowering.cpp @@ -1487,7 +1487,8 @@ static MachineBasicBlock::iterator convertCalleeSaveRestoreToSPPrePostIncDec( // If the first store isn't right where we want SP then we can't fold the // update in so create a normal arithmetic instruction instead. if (MBBI->getOperand(MBBI->getNumOperands() - 1).getImm() != 0 || - CSStackSizeInc < MinOffset || CSStackSizeInc > MaxOffset) { + CSStackSizeInc < MinOffset * (int64_t)Scale.getFixedValue() || + CSStackSizeInc > MaxOffset * (int64_t)Scale.getFixedValue()) { // If we are destroying the frame, make sure we add the increment after the // last frame operation. if (FrameFlag == MachineInstr::FrameDestroy) diff --git a/llvm/lib/Target/AArch64/AArch64InstrInfo.cpp b/llvm/lib/Target/AArch64/AArch64InstrInfo.cpp index 6cd9a1a817086..0320c6e78ff9c 100644 --- a/llvm/lib/Target/AArch64/AArch64InstrInfo.cpp +++ b/llvm/lib/Target/AArch64/AArch64InstrInfo.cpp @@ -3487,6 +3487,229 @@ MachineInstr *AArch64InstrInfo::emitLdStWithAddr(MachineInstr &MemI, "Function must not be called with an addressing mode it can't handle"); } +/// Return true if the opcode is a post-index ld/st instruction, which really +/// loads from base+0. +static bool isPostIndexLdStOpcode(unsigned Opcode) { + switch (Opcode) { + default: + return false; + case AArch64::LD1Fourv16b_POST: + case AArch64::LD1Fourv1d_POST: + case AArch64::LD1Fourv2d_POST: + case AArch64::LD1Fourv2s_POST: + case AArch64::LD1Fourv4h_POST: + case AArch64::LD1Fourv4s_POST: + case AArch64::LD1Fourv8b_POST: + case AArch64::LD1Fourv8h_POST: + case AArch64::LD1Onev16b_POST: + case AArch64::LD1Onev1d_POST: + case AArch64::LD1Onev2d_POST: + case AArch64::LD1Onev2s_POST: + case AArch64::LD1Onev4h_POST: + case AArch64::LD1Onev4s_POST: + case AArch64::LD1Onev8b_POST: + case AArch64::LD1Onev8h_POST: + case AArch64::LD1Rv16b_POST: + case AArch64::LD1Rv1d_POST: + case AArch64::LD1Rv2d_POST: + case AArch64::LD1Rv2s_POST: + case AArch64::LD1Rv4h_POST: + case AArch64::LD1Rv4s_POST: + case AArch64::LD1Rv8b_POST: + case AArch64::LD1Rv8h_POST: + case AArch64::LD1Threev16b_POST: + case AArch64::LD1Threev1d_POST: + case AArch64::LD1Threev2d_POST: + case AArch64::LD1Threev2s_POST: + case AArch64::LD1Threev4h_POST: + case AArch64::LD1Threev4s_POST: + case AArch64::LD1Threev8b_POST: + case AArch64::LD1Threev8h_POST: + case AArch64::LD1Twov16b_POST: + case AArch64::LD1Twov1d_POST: + case AArch64::LD1Twov2d_POST: + case AArch64::LD1Twov2s_POST: + case AArch64::LD1Twov4h_POST: + case AArch64::LD1Twov4s_POST: + case AArch64::LD1Twov8b_POST: + case AArch64::LD1Twov8h_POST: + case AArch64::LD1i16_POST: + case AArch64::LD1i32_POST: + case AArch64::LD1i64_POST: + case AArch64::LD1i8_POST: + case AArch64::LD2Rv16b_POST: + case AArch64::LD2Rv1d_POST: + case AArch64::LD2Rv2d_POST: + case AArch64::LD2Rv2s_POST: + case AArch64::LD2Rv4h_POST: + case AArch64::LD2Rv4s_POST: + case AArch64::LD2Rv8b_POST: + case AArch64::LD2Rv8h_POST: + case AArch64::LD2Twov16b_POST: + case AArch64::LD2Twov2d_POST: + case AArch64::LD2Twov2s_POST: + case AArch64::LD2Twov4h_POST: + case AArch64::LD2Twov4s_POST: + case AArch64::LD2Twov8b_POST: + case AArch64::LD2Twov8h_POST: + case AArch64::LD2i16_POST: + case AArch64::LD2i32_POST: + case AArch64::LD2i64_POST: + case AArch64::LD2i8_POST: + case AArch64::LD3Rv16b_POST: + case AArch64::LD3Rv1d_POST: + case AArch64::LD3Rv2d_POST: + case AArch64::LD3Rv2s_POST: + case AArch64::LD3Rv4h_POST: + case AArch64::LD3Rv4s_POST: + case AArch64::LD3Rv8b_POST: + case AArch64::LD3Rv8h_POST: + case AArch64::LD3Threev16b_POST: + case AArch64::LD3Threev2d_POST: + case AArch64::LD3Threev2s_POST: + case AArch64::LD3Threev4h_POST: + case AArch64::LD3Threev4s_POST: + case AArch64::LD3Threev8b_POST: + case AArch64::LD3Threev8h_POST: + case AArch64::LD3i16_POST: + case AArch64::LD3i32_POST: + case AArch64::LD3i64_POST: + case AArch64::LD3i8_POST: + case AArch64::LD4Fourv16b_POST: + case AArch64::LD4Fourv2d_POST: + case AArch64::LD4Fourv2s_POST: + case AArch64::LD4Fourv4h_POST: + case AArch64::LD4Fourv4s_POST: + case AArch64::LD4Fourv8b_POST: + case AArch64::LD4Fourv8h_POST: + case AArch64::LD4Rv16b_POST: + case AArch64::LD4Rv1d_POST: + case AArch64::LD4Rv2d_POST: + case AArch64::LD4Rv2s_POST: + case AArch64::LD4Rv4h_POST: + case AArch64::LD4Rv4s_POST: + case AArch64::LD4Rv8b_POST: + case AArch64::LD4Rv8h_POST: + case AArch64::LD4i16_POST: + case AArch64::LD4i32_POST: + case AArch64::LD4i64_POST: + case AArch64::LD4i8_POST: + case AArch64::LDAPRWpost: + case AArch64::LDAPRXpost: + case AArch64::LDIAPPWpost: + case AArch64::LDIAPPXpost: + case AArch64::LDPDpost: + case AArch64::LDPQpost: + case AArch64::LDPSWpost: + case AArch64::LDPSpost: + case AArch64::LDPWpost: + case AArch64::LDPXpost: + case AArch64::LDRBBpost: + case AArch64::LDRBpost: + case AArch64::LDRDpost: + case AArch64::LDRHHpost: + case AArch64::LDRHpost: + case AArch64::LDRQpost: + case AArch64::LDRSBWpost: + case AArch64::LDRSBXpost: + case AArch64::LDRSHWpost: + case AArch64::LDRSHXpost: + case AArch64::LDRSWpost: + case AArch64::LDRSpost: + case AArch64::LDRWpost: + case AArch64::LDRXpost: + case AArch64::ST1Fourv16b_POST: + case AArch64::ST1Fourv1d_POST: + case AArch64::ST1Fourv2d_POST: + case AArch64::ST1Fourv2s_POST: + case AArch64::ST1Fourv4h_POST: + case AArch64::ST1Fourv4s_POST: + case AArch64::ST1Fourv8b_POST: + case AArch64::ST1Fourv8h_POST: + case AArch64::ST1Onev16b_POST: + case AArch64::ST1Onev1d_POST: + case AArch64::ST1Onev2d_POST: + case AArch64::ST1Onev2s_POST: + case AArch64::ST1Onev4h_POST: + case AArch64::ST1Onev4s_POST: + case AArch64::ST1Onev8b_POST: + case AArch64::ST1Onev8h_POST: + case AArch64::ST1Threev16b_POST: + case AArch64::ST1Threev1d_POST: + case AArch64::ST1Threev2d_POST: + case AArch64::ST1Threev2s_POST: + case AArch64::ST1Threev4h_POST: + case AArch64::ST1Threev4s_POST: + case AArch64::ST1Threev8b_POST: + case AArch64::ST1Threev8h_POST: + case AArch64::ST1Twov16b_POST: + case AArch64::ST1Twov1d_POST: + case AArch64::ST1Twov2d_POST: + case AArch64::ST1Twov2s_POST: + case AArch64::ST1Twov4h_POST: + case AArch64::ST1Twov4s_POST: + case AArch64::ST1Twov8b_POST: + case AArch64::ST1Twov8h_POST: + case AArch64::ST1i16_POST: + case AArch64::ST1i32_POST: + case AArch64::ST1i64_POST: + case AArch64::ST1i8_POST: + case AArch64::ST2GPostIndex: + case AArch64::ST2Twov16b_POST: + case AArch64::ST2Twov2d_POST: + case AArch64::ST2Twov2s_POST: + case AArch64::ST2Twov4h_POST: + case AArch64::ST2Twov4s_POST: + case AArch64::ST2Twov8b_POST: + case AArch64::ST2Twov8h_POST: + case AArch64::ST2i16_POST: + case AArch64::ST2i32_POST: + case AArch64::ST2i64_POST: + case AArch64::ST2i8_POST: + case AArch64::ST3Threev16b_POST: + case AArch64::ST3Threev2d_POST: + case AArch64::ST3Threev2s_POST: + case AArch64::ST3Threev4h_POST: + case AArch64::ST3Threev4s_POST: + case AArch64::ST3Threev8b_POST: + case AArch64::ST3Threev8h_POST: + case AArch64::ST3i16_POST: + case AArch64::ST3i32_POST: + case AArch64::ST3i64_POST: + case AArch64::ST3i8_POST: + case AArch64::ST4Fourv16b_POST: + case AArch64::ST4Fourv2d_POST: + case AArch64::ST4Fourv2s_POST: + case AArch64::ST4Fourv4h_POST: + case AArch64::ST4Fourv4s_POST: + case AArch64::ST4Fourv8b_POST: + case AArch64::ST4Fourv8h_POST: + case AArch64::ST4i16_POST: + case AArch64::ST4i32_POST: + case AArch64::ST4i64_POST: + case AArch64::ST4i8_POST: + case AArch64::STGPostIndex: + case AArch64::STGPpost: + case AArch64::STPDpost: + case AArch64::STPQpost: + case AArch64::STPSpost: + case AArch64::STPWpost: + case AArch64::STPXpost: + case AArch64::STRBBpost: + case AArch64::STRBpost: + case AArch64::STRDpost: + case AArch64::STRHHpost: + case AArch64::STRHpost: + case AArch64::STRQpost: + case AArch64::STRSpost: + case AArch64::STRWpost: + case AArch64::STRXpost: + case AArch64::STZ2GPostIndex: + case AArch64::STZGPostIndex: + return true; + } +} + bool AArch64InstrInfo::getMemOperandWithOffsetWidth( const MachineInstr &LdSt, const MachineOperand *&BaseOp, int64_t &Offset, bool &OffsetIsScalable, TypeSize &Width, @@ -3518,8 +3741,11 @@ bool AArch64InstrInfo::getMemOperandWithOffsetWidth( // Compute the offset. Offset is calculated as the immediate operand // multiplied by the scaling factor. Unscaled instructions have scaling factor - // set to 1. - if (LdSt.getNumExplicitOperands() == 3) { + // set to 1. Postindex are a special case which have an offset of 0. + if (isPostIndexLdStOpcode(LdSt.getOpcode())) { + BaseOp = &LdSt.getOperand(2); + Offset = 0; + } else if (LdSt.getNumExplicitOperands() == 3) { BaseOp = &LdSt.getOperand(1); Offset = LdSt.getOperand(2).getImm() * Scale.getKnownMinValue(); } else { @@ -3529,10 +3755,7 @@ bool AArch64InstrInfo::getMemOperandWithOffsetWidth( } OffsetIsScalable = Scale.isScalable(); - if (!BaseOp->isReg() && !BaseOp->isFI()) - return false; - - return true; + return BaseOp->isReg() || BaseOp->isFI(); } MachineOperand & @@ -3622,8 +3845,8 @@ bool AArch64InstrInfo::getMemOpInfo(unsigned Opcode, TypeSize &Scale, break; case AArch64::STRWpost: case AArch64::LDRWpost: - Scale = TypeSize::getFixed(4); - Width = TypeSize::getFixed(32); + Scale = TypeSize::getFixed(1); + Width = TypeSize::getFixed(4); MinOffset = -256; MaxOffset = 255; break; @@ -3696,7 +3919,7 @@ bool AArch64InstrInfo::getMemOpInfo(unsigned Opcode, TypeSize &Scale, case AArch64::STPQi: case AArch64::STNPQi: Scale = TypeSize::getFixed(16); - Width = TypeSize::getFixed(32); + Width = TypeSize::getFixed(16 * 2); MinOffset = -64; MaxOffset = 63; break; @@ -3709,7 +3932,7 @@ bool AArch64InstrInfo::getMemOpInfo(unsigned Opcode, TypeSize &Scale, case AArch64::STNPXi: case AArch64::STNPDi: Scale = TypeSize::getFixed(8); - Width = TypeSize::getFixed(16); + Width = TypeSize::getFixed(8 * 2); MinOffset = -64; MaxOffset = 63; break; @@ -3722,7 +3945,7 @@ bool AArch64InstrInfo::getMemOpInfo(unsigned Opcode, TypeSize &Scale, case AArch64::STNPWi: case AArch64::STNPSi: Scale = TypeSize::getFixed(4); - Width = TypeSize::getFixed(8); + Width = TypeSize::getFixed(4 * 2); MinOffset = -64; MaxOffset = 63; break; @@ -3730,18 +3953,18 @@ bool AArch64InstrInfo::getMemOpInfo(unsigned Opcode, TypeSize &Scale, case AArch64::STPQpre: case AArch64::LDPQpost: Scale = TypeSize::getFixed(16); - Width = TypeSize::getFixed(16); - MinOffset = -1024; - MaxOffset = 1008; + Width = TypeSize::getFixed(16 * 2); + MinOffset = -64; + MaxOffset = 63; break; case AArch64::STPXpre: case AArch64::LDPXpost: case AArch64::STPDpre: case AArch64::LDPDpost: Scale = TypeSize::getFixed(8); - Width = TypeSize::getFixed(8); - MinOffset = -512; - MaxOffset = 504; + Width = TypeSize::getFixed(8 * 2); + MinOffset = -64; + MaxOffset = 63; break; case AArch64::StoreSwiftAsyncContext: // Store is an STRXui, but there might be an ADDXri in the expansion too. diff --git a/llvm/lib/Target/AArch64/AArch64InstrInfo.h b/llvm/lib/Target/AArch64/AArch64InstrInfo.h index 7e5ac423f09ce..24a500aa190ab 100644 --- a/llvm/lib/Target/AArch64/AArch64InstrInfo.h +++ b/llvm/lib/Target/AArch64/AArch64InstrInfo.h @@ -317,7 +317,10 @@ class AArch64InstrInfo final : public AArch64GenInstrInfo { /// Returns true if opcode \p Opc is a memory operation. If it is, set /// \p Scale, \p Width, \p MinOffset, and \p MaxOffset accordingly. /// - /// For unscaled instructions, \p Scale is set to 1. + /// For unscaled instructions, \p Scale is set to 1. All values are in bytes. + /// MinOffset/MaxOffset are the un-scaled limits of the immediate in the + /// instruction, the actual offset limit is [MinOffset*Scale, + /// MaxOffset*Scale]. static bool getMemOpInfo(unsigned Opcode, TypeSize &Scale, TypeSize &Width, int64_t &MinOffset, int64_t &MaxOffset); diff --git a/llvm/test/CodeGen/AArch64/sched-postidxalias.mir b/llvm/test/CodeGen/AArch64/sched-postidxalias.mir new file mode 100644 index 0000000000000..98ee0fa21b2dd --- /dev/null +++ b/llvm/test/CodeGen/AArch64/sched-postidxalias.mir @@ -0,0 +1,63 @@ +# NOTE: Assertions have been autogenerated by utils/update_mir_test_checks.py UTC_ARGS: --version 5 +# RUN: llc -mtriple=aarch64 -mcpu=cortex-a55 -run-pass=machine-scheduler -debug-only=machine-scheduler -o - %s 2>&1 | FileCheck %s +# REQUIRES: asserts + +# Both the accesses should have an offset of 0 +# CHECK: Num BaseOps: 1, Offset: 0, OffsetIsScalable: 0, Width: LocationSize::precise(4) +# CHECK: Num BaseOps: 1, Offset: 0, OffsetIsScalable: 0, Width: LocationSize::precise(4) + +--- | + target datalayout = "e-m:e-i8:8:32-i16:16:32-i64:64-i128:128-n32:64-S128-Fn32" + target triple = "aarch64" + + define ptr @post(ptr %p, i32 %d1, i32 %d2) { + entry: + %d3 = mul i32 %d1, %d2 + %q = getelementptr i64, ptr %p, i64 3 + %r = getelementptr i64, ptr %p, i64 3 + store i32 %d3, ptr %p, align 8 + %0 = load i32, ptr %r, align 8 + store i32 %d1, ptr %p, align 8 + %add.ptr = getelementptr inbounds i8, ptr %p, i64 24 + ret ptr %add.ptr + } + +... +--- +name: post +tracksRegLiveness: true +registers: + - { id: 0, class: gpr64common, preferred-register: '' } + - { id: 1, class: gpr32, preferred-register: '' } + - { id: 2, class: gpr32, preferred-register: '' } + - { id: 3, class: gpr32, preferred-register: '' } + - { id: 4, class: gpr64common, preferred-register: '' } +liveins: + - { reg: '$x0', virtual-reg: '%0' } + - { reg: '$w1', virtual-reg: '%1' } + - { reg: '$w2', virtual-reg: '%2' } +body: | + bb.0.entry: + liveins: $x0, $w1, $w2 + + ; CHECK-LABEL: name: post + ; CHECK: liveins: $x0, $w1, $w2 + ; CHECK-NEXT: {{ $}} + ; CHECK-NEXT: [[COPY:%[0-9]+]]:gpr32 = COPY $w2 + ; CHECK-NEXT: [[COPY1:%[0-9]+]]:gpr32 = COPY $w1 + ; CHECK-NEXT: [[MADDWrrr:%[0-9]+]]:gpr32 = MADDWrrr [[COPY1]], [[COPY]], $wzr + ; CHECK-NEXT: [[COPY2:%[0-9]+]]:gpr64common = COPY $x0 + ; CHECK-NEXT: STRWui [[MADDWrrr]], [[COPY2]], 0 :: (store (s32) into %ir.p, align 8) + ; CHECK-NEXT: early-clobber [[COPY2]]:gpr64common = STRWpost [[COPY1]], [[COPY2]], 24 :: (store (s32) into %ir.p, align 8) + ; CHECK-NEXT: $x0 = COPY [[COPY2]] + ; CHECK-NEXT: RET_ReallyLR implicit $x0 + %2:gpr32 = COPY $w2 + %1:gpr32 = COPY $w1 + %4:gpr64common = COPY $x0 + %3:gpr32 = MADDWrrr %1, %2, $wzr + STRWui %3, %4, 0 :: (store (s32) into %ir.p, align 8) + early-clobber %4:gpr64common = STRWpost %1, %4, 24 :: (store (s32) into %ir.p, align 8) + $x0 = COPY %4 + RET_ReallyLR implicit $x0 + +... _______________________________________________ cfe-commits mailing list cfe-commits@lists.llvm.org https://lists.llvm.org/cgi-bin/mailman/listinfo/cfe-commits