[llvm-branch-commits] [llvm] [SelectionDAG][X86] Widen <2 x T> vector types for atomic load (PR #120598)
@@ -146,6 +146,47 @@ define <1 x i64> @atomic_vec1_i64_align(ptr %x) nounwind { ret <1 x i64> %ret } +define <2 x i8> @atomic_vec2_i8(ptr %x) { +; CHECK3-LABEL: atomic_vec2_i8: +; CHECK3: ## %bb.0: +; CHECK3-NEXT:movzwl (%rdi), %eax +; CHECK3-NEXT:retq +; +; CHECK0-LABEL: atomic_vec2_i8: +; CHECK0: ## %bb.0: +; CHECK0-NEXT:movw (%rdi), %ax +; CHECK0-NEXT:retq + %ret = load atomic <2 x i8>, ptr %x acquire, align 4 + ret <2 x i8> %ret +} + +define <2 x i16> @atomic_vec2_i16(ptr %x) { +; CHECK-LABEL: atomic_vec2_i16: +; CHECK: ## %bb.0: +; CHECK-NEXT:movl (%rdi), %eax +; CHECK-NEXT:retq + %ret = load atomic <2 x i16>, ptr %x acquire, align 4 + ret <2 x i16> %ret +} arsenm wrote: Test 2 x half, 2 x bfloat, 4 x 16-bit elts https://github.com/llvm/llvm-project/pull/120598 ___ llvm-branch-commits mailing list llvm-branch-commits@lists.llvm.org https://lists.llvm.org/cgi-bin/mailman/listinfo/llvm-branch-commits
[llvm-branch-commits] [llvm] [SelectionDAG][X86] Widen <2 x T> vector types for atomic load (PR #120598)
@@ -1129,8 +1130,9 @@ class LLVM_LIBRARY_VISIBILITY DAGTypeLegalizer { /// resulting wider type. It takes: /// LdChain: list of chains for the load to be generated. /// Ld: load to widen - SDValue GenWidenVectorLoads(SmallVectorImpl &LdChain, - LoadSDNode *LD); + template arsenm wrote: This shouldn't need a template argument? https://github.com/llvm/llvm-project/pull/120598 ___ llvm-branch-commits mailing list llvm-branch-commits@lists.llvm.org https://lists.llvm.org/cgi-bin/mailman/listinfo/llvm-branch-commits
[llvm-branch-commits] [llvm] [SelectionDAG][X86] Widen <2 x T> vector types for atomic load (PR #120598)
@@ -5901,6 +5904,30 @@ SDValue DAGTypeLegalizer::WidenVecRes_INSERT_VECTOR_ELT(SDNode *N) { N->getOperand(1), N->getOperand(2)); } +SDValue DAGTypeLegalizer::WidenVecRes_ATOMIC_LOAD(AtomicSDNode *N) { + SmallVector LdChain; // Chain for the series of load + SDValue Result = GenWidenVectorLoads(LdChain, N, true /*IsAtomic*/); arsenm wrote: ```suggestion SDValue Result = GenWidenVectorLoads(LdChain, N, /*IsAtomic=*/true); ``` https://github.com/llvm/llvm-project/pull/120598 ___ llvm-branch-commits mailing list llvm-branch-commits@lists.llvm.org https://lists.llvm.org/cgi-bin/mailman/listinfo/llvm-branch-commits
[llvm-branch-commits] [llvm] [SelectionDAG][X86] Widen <2 x T> vector types for atomic load (PR #120598)
@@ -52242,6 +52243,81 @@ static SDValue combineConstantPoolLoads(SDNode *N, const SDLoc &dl, return SDValue(); } +static MVT getScalarTypeFromVectorType(MVT VT) { arsenm wrote: This is just getIntegerVT(DAG.getContext(), VT.getSizeInBits()) https://github.com/llvm/llvm-project/pull/120598 ___ llvm-branch-commits mailing list llvm-branch-commits@lists.llvm.org https://lists.llvm.org/cgi-bin/mailman/listinfo/llvm-branch-commits
[llvm-branch-commits] [llvm] [SelectionDAG][X86] Widen <2 x T> vector types for atomic load (PR #120598)
@@ -5901,6 +5904,30 @@ SDValue DAGTypeLegalizer::WidenVecRes_INSERT_VECTOR_ELT(SDNode *N) { N->getOperand(1), N->getOperand(2)); } +SDValue DAGTypeLegalizer::WidenVecRes_ATOMIC_LOAD(AtomicSDNode *N) { + SmallVector LdChain; // Chain for the series of load + SDValue Result = GenWidenVectorLoads(LdChain, N, true /*IsAtomic*/); + + if (Result) { +// If we generate a single load, we can use that for the chain. Otherwise, +// build a factor node to remember the multiple loads are independent and +// chain to that. +SDValue NewChain; +if (LdChain.size() == 1) + NewChain = LdChain[0]; +else + NewChain = DAG.getNode(ISD::TokenFactor, SDLoc(N), MVT::Other, LdChain); arsenm wrote: I think getNode will handle the 1 entry case for you https://github.com/llvm/llvm-project/pull/120598 ___ llvm-branch-commits mailing list llvm-branch-commits@lists.llvm.org https://lists.llvm.org/cgi-bin/mailman/listinfo/llvm-branch-commits
[llvm-branch-commits] [llvm] [SelectionDAG][X86] Widen <2 x T> vector types for atomic load (PR #120598)
@@ -52242,6 +52243,81 @@ static SDValue combineConstantPoolLoads(SDNode *N, const SDLoc &dl, return SDValue(); } +static MVT getScalarTypeFromVectorType(MVT VT) { + if (VT == MVT::v2i8) +return MVT::i16; + if (VT == MVT::v2i16 || VT == MVT::v2f16 || VT == MVT::v2bf16) +return MVT::i32; + if (VT == MVT::v2i32 || VT == MVT::v2f32) +return MVT::i64; + LLVM_DEBUG(dbgs() << VT << '\n'); + llvm_unreachable("Invalid VT for scalar type translation"); +} + +static SDValue combineAtomicLoad(SDNode *N, SelectionDAG &DAG, arsenm wrote: I don't think any of this custom handling in the target should be necessary https://github.com/llvm/llvm-project/pull/120598 ___ llvm-branch-commits mailing list llvm-branch-commits@lists.llvm.org https://lists.llvm.org/cgi-bin/mailman/listinfo/llvm-branch-commits
[llvm-branch-commits] [compiler-rt] d198e1d - Revert "[compiler-rt][rtsan] fopencookie support. (#120864)"
Author: gulfemsavrun Date: 2025-01-02T18:22:29-08:00 New Revision: d198e1d914e8bb9522651da345e5aa8ba6917fad URL: https://github.com/llvm/llvm-project/commit/d198e1d914e8bb9522651da345e5aa8ba6917fad DIFF: https://github.com/llvm/llvm-project/commit/d198e1d914e8bb9522651da345e5aa8ba6917fad.diff LOG: Revert "[compiler-rt][rtsan] fopencookie support. (#120864)" This reverts commit 4b577830033066cfd1b2acf4fcf39950678b27bd. Added: Modified: compiler-rt/lib/rtsan/rtsan_interceptors_posix.cpp compiler-rt/lib/rtsan/tests/rtsan_test_interceptors_posix.cpp Removed: diff --git a/compiler-rt/lib/rtsan/rtsan_interceptors_posix.cpp b/compiler-rt/lib/rtsan/rtsan_interceptors_posix.cpp index 072923ab35ae0d..4e51f464b57304 100644 --- a/compiler-rt/lib/rtsan/rtsan_interceptors_posix.cpp +++ b/compiler-rt/lib/rtsan/rtsan_interceptors_posix.cpp @@ -297,12 +297,6 @@ INTERCEPTOR(FILE *, fdopen, int fd, const char *mode) { return REAL(fdopen)(fd, mode); } -INTERCEPTOR(FILE *, fopencookie, void *cookie, const char *mode, -cookie_io_functions_t funcs) { - __rtsan_notify_intercepted_call("fopencookie"); - return REAL(fopencookie)(cookie, mode, funcs); -} - #if SANITIZER_INTERCEPT_OPEN_MEMSTREAM INTERCEPTOR(FILE *, open_memstream, char **buf, size_t *size) { __rtsan_notify_intercepted_call("open_memstream"); @@ -978,7 +972,6 @@ void __rtsan::InitializeInterceptors() { INTERCEPT_FUNCTION(fputs); INTERCEPT_FUNCTION(fdopen); INTERCEPT_FUNCTION(freopen); - INTERCEPT_FUNCTION(fopencookie); RTSAN_MAYBE_INTERCEPT_OPEN_MEMSTREAM; RTSAN_MAYBE_INTERCEPT_FMEMOPEN; INTERCEPT_FUNCTION(lseek); diff --git a/compiler-rt/lib/rtsan/tests/rtsan_test_interceptors_posix.cpp b/compiler-rt/lib/rtsan/tests/rtsan_test_interceptors_posix.cpp index c9c4d7fc4e99ee..b052dd859dcdf6 100644 --- a/compiler-rt/lib/rtsan/tests/rtsan_test_interceptors_posix.cpp +++ b/compiler-rt/lib/rtsan/tests/rtsan_test_interceptors_posix.cpp @@ -353,29 +353,6 @@ TEST_F(RtsanFileTest, FopenDiesWhenRealtime) { ExpectNonRealtimeSurvival(Func); } -TEST_F(RtsanFileTest, FopenCookieDieWhenRealtime) { - FILE *f = fopen(GetTemporaryFilePath(), "w"); - EXPECT_THAT(f, Ne(nullptr)); - struct fholder { -FILE *fp; -size_t read; - } fh = {f, 0}; - auto CookieRead = [this](void *cookie, char *buf, size_t size) { -fholder *p = reinterpret_cast(cookie); -p->read = fread(static_cast(buf), 1, size, p->fp); -EXPECT_NE(0, p->read); - }; - cookie_io_functions_t funcs = {(cookie_read_function_t *)&CookieRead, nullptr, - nullptr, nullptr}; - auto Func = [&fh, &funcs]() { -FILE *f = fopencookie(&fh, "w", funcs); -EXPECT_THAT(f, Ne(nullptr)); - }; - - ExpectRealtimeDeath(Func, "fopencookie"); - ExpectNonRealtimeSurvival(Func); -} - #if SANITIZER_INTERCEPT_OPEN_MEMSTREAM TEST_F(RtsanFileTest, OpenMemstreamDiesWhenRealtime) { char *buffer; ___ llvm-branch-commits mailing list llvm-branch-commits@lists.llvm.org https://lists.llvm.org/cgi-bin/mailman/listinfo/llvm-branch-commits
[llvm-branch-commits] [llvm] [DataLayout][LangRef] Split non-integral and unstable pointer properties (PR #105735)
https://github.com/arichardson updated https://github.com/llvm/llvm-project/pull/105735 >From e4bd1181d160b8728e7d4158417a83e183bd1709 Mon Sep 17 00:00:00 2001 From: Alex Richardson Date: Thu, 22 Aug 2024 14:36:04 -0700 Subject: [PATCH 1/3] fix indentation in langref Created using spr 1.3.6-beta.1 --- llvm/docs/LangRef.rst | 12 ++-- 1 file changed, 6 insertions(+), 6 deletions(-) diff --git a/llvm/docs/LangRef.rst b/llvm/docs/LangRef.rst index 200224c78be004..1a59fba65815cc 100644 --- a/llvm/docs/LangRef.rst +++ b/llvm/docs/LangRef.rst @@ -3103,19 +3103,19 @@ as follows: ``A`` Specifies the address space of objects created by '``alloca``'. Defaults to the default address space of 0. -``p[][n]::[:][:]`` +``p[][]::[:][:]`` This specifies the *size* of a pointer and its and \erred alignments for address space ``n``. is optional and defaults to . The fourth parameter is the size of the index that used for address calculation, which must be less than or equal to the pointer size. If not specified, the default index size is equal to the pointer size. All sizes -are in bits. The address space, ``n``, is optional, and if not specified, -denotes the default address space 0. The value of ``n`` must be -in the range [1,2^24). +are in bits. The , is optional, and if not specified, +denotes the default address space 0. The value of must +be in the range [1,2^24). The optional are used to specify properties of pointers in this -address space: the character ``u`` marks pointers as having an unstable -representation and ```n`` marks pointers as non-integral (i.e. having +address space: the character ``u`` marks pointers as having an unstable +representation and ``n`` marks pointers as non-integral (i.e. having additional metadata). See :ref:`Non-Integral Pointer Types `. ``i:[:]`` >From db97145d3a653f2999b5935f9b1cb4550230689d Mon Sep 17 00:00:00 2001 From: Alex Richardson Date: Fri, 25 Oct 2024 12:51:11 -0700 Subject: [PATCH 2/3] include feedback Created using spr 1.3.6-beta.1 --- llvm/docs/LangRef.rst | 30 +- llvm/include/llvm/IR/DataLayout.h | 8 2 files changed, 21 insertions(+), 17 deletions(-) diff --git a/llvm/docs/LangRef.rst b/llvm/docs/LangRef.rst index c137318af678b6..3c3d0e0b4ab8ee 100644 --- a/llvm/docs/LangRef.rst +++ b/llvm/docs/LangRef.rst @@ -659,7 +659,7 @@ LLVM IR optionally allows the frontend to denote pointers in certain address spaces as "non-integral" or "unstable" (or both "non-integral" and "unstable") via the :ref:`datalayout string`. -These exact implications of these properties are target-specific, but the +The exact implications of these properties are target-specific, but the following IR semantics and restrictions to optimization passes apply: Unstable pointer representation @@ -668,7 +668,7 @@ Unstable pointer representation Pointers in this address space have an *unspecified* bitwise representation (i.e. not backed by a fixed integer). The bitwise pattern of such pointers is allowed to change in a target-specific way. For example, this could be a pointer -type used for with copying garbage collection where the garbage collector could +type used with copying garbage collection where the garbage collector could update the pointer at any time in the collection sweep. ``inttoptr`` and ``ptrtoint`` instructions have the same semantics as for @@ -705,10 +705,10 @@ representation of the pointer. Non-integral pointer representation ^^^ -Pointers are not represented as an address, but may instead include +Pointers are not represented as just an address, but may instead include additional metadata such as bounds information or a temporal identifier. Examples include AMDGPU buffer descriptors with a 128-bit fat pointer and a -32-bit offset or CHERI capabilities that contain bounds, permissions and an +32-bit offset, or CHERI capabilities that contain bounds, permissions and an out-of-band validity bit. In general, these pointers cannot be re-created from just an integer value. @@ -716,23 +716,25 @@ In most cases pointers with a non-integral representation behave exactly the same as an integral pointer, the only difference is that it is not possible to create a pointer just from an address. -"Non-integral" pointers also impose restrictions on the optimizer, but in -general these are less restrictive than for "unstable" pointers. The main +"Non-integral" pointers also impose restrictions on transformation passes, but +in general these are less restrictive than for "unstable" pointers. The main difference compared to integral pointers is that ``inttoptr`` instructions should not be inserted by passes as they may not be able to create a valid pointer. This property also means that ``inttoptr(ptrtoint(x))`` cannot be folded to ``x`` as the ``
[llvm-branch-commits] [llvm] [DataLayout][LangRef] Split non-integral and unstable pointer properties (PR #105735)
https://github.com/arichardson edited https://github.com/llvm/llvm-project/pull/105735 ___ llvm-branch-commits mailing list llvm-branch-commits@lists.llvm.org https://lists.llvm.org/cgi-bin/mailman/listinfo/llvm-branch-commits
[llvm-branch-commits] [llvm] [DataLayout][LangRef] Split non-integral and unstable pointer properties (PR #105735)
https://github.com/arichardson updated https://github.com/llvm/llvm-project/pull/105735 >From e4bd1181d160b8728e7d4158417a83e183bd1709 Mon Sep 17 00:00:00 2001 From: Alex Richardson Date: Thu, 22 Aug 2024 14:36:04 -0700 Subject: [PATCH 1/3] fix indentation in langref Created using spr 1.3.6-beta.1 --- llvm/docs/LangRef.rst | 12 ++-- 1 file changed, 6 insertions(+), 6 deletions(-) diff --git a/llvm/docs/LangRef.rst b/llvm/docs/LangRef.rst index 200224c78be004..1a59fba65815cc 100644 --- a/llvm/docs/LangRef.rst +++ b/llvm/docs/LangRef.rst @@ -3103,19 +3103,19 @@ as follows: ``A`` Specifies the address space of objects created by '``alloca``'. Defaults to the default address space of 0. -``p[][n]::[:][:]`` +``p[][]::[:][:]`` This specifies the *size* of a pointer and its and \erred alignments for address space ``n``. is optional and defaults to . The fourth parameter is the size of the index that used for address calculation, which must be less than or equal to the pointer size. If not specified, the default index size is equal to the pointer size. All sizes -are in bits. The address space, ``n``, is optional, and if not specified, -denotes the default address space 0. The value of ``n`` must be -in the range [1,2^24). +are in bits. The , is optional, and if not specified, +denotes the default address space 0. The value of must +be in the range [1,2^24). The optional are used to specify properties of pointers in this -address space: the character ``u`` marks pointers as having an unstable -representation and ```n`` marks pointers as non-integral (i.e. having +address space: the character ``u`` marks pointers as having an unstable +representation and ``n`` marks pointers as non-integral (i.e. having additional metadata). See :ref:`Non-Integral Pointer Types `. ``i:[:]`` >From db97145d3a653f2999b5935f9b1cb4550230689d Mon Sep 17 00:00:00 2001 From: Alex Richardson Date: Fri, 25 Oct 2024 12:51:11 -0700 Subject: [PATCH 2/3] include feedback Created using spr 1.3.6-beta.1 --- llvm/docs/LangRef.rst | 30 +- llvm/include/llvm/IR/DataLayout.h | 8 2 files changed, 21 insertions(+), 17 deletions(-) diff --git a/llvm/docs/LangRef.rst b/llvm/docs/LangRef.rst index c137318af678b6..3c3d0e0b4ab8ee 100644 --- a/llvm/docs/LangRef.rst +++ b/llvm/docs/LangRef.rst @@ -659,7 +659,7 @@ LLVM IR optionally allows the frontend to denote pointers in certain address spaces as "non-integral" or "unstable" (or both "non-integral" and "unstable") via the :ref:`datalayout string`. -These exact implications of these properties are target-specific, but the +The exact implications of these properties are target-specific, but the following IR semantics and restrictions to optimization passes apply: Unstable pointer representation @@ -668,7 +668,7 @@ Unstable pointer representation Pointers in this address space have an *unspecified* bitwise representation (i.e. not backed by a fixed integer). The bitwise pattern of such pointers is allowed to change in a target-specific way. For example, this could be a pointer -type used for with copying garbage collection where the garbage collector could +type used with copying garbage collection where the garbage collector could update the pointer at any time in the collection sweep. ``inttoptr`` and ``ptrtoint`` instructions have the same semantics as for @@ -705,10 +705,10 @@ representation of the pointer. Non-integral pointer representation ^^^ -Pointers are not represented as an address, but may instead include +Pointers are not represented as just an address, but may instead include additional metadata such as bounds information or a temporal identifier. Examples include AMDGPU buffer descriptors with a 128-bit fat pointer and a -32-bit offset or CHERI capabilities that contain bounds, permissions and an +32-bit offset, or CHERI capabilities that contain bounds, permissions and an out-of-band validity bit. In general, these pointers cannot be re-created from just an integer value. @@ -716,23 +716,25 @@ In most cases pointers with a non-integral representation behave exactly the same as an integral pointer, the only difference is that it is not possible to create a pointer just from an address. -"Non-integral" pointers also impose restrictions on the optimizer, but in -general these are less restrictive than for "unstable" pointers. The main +"Non-integral" pointers also impose restrictions on transformation passes, but +in general these are less restrictive than for "unstable" pointers. The main difference compared to integral pointers is that ``inttoptr`` instructions should not be inserted by passes as they may not be able to create a valid pointer. This property also means that ``inttoptr(ptrtoint(x))`` cannot be folded to ``x`` as the ``
[llvm-branch-commits] [lld][WebAssembly] Replace config-> with ctx.arg. (PR #119835)
https://github.com/MaskRay updated https://github.com/llvm/llvm-project/pull/119835 ___ llvm-branch-commits mailing list llvm-branch-commits@lists.llvm.org https://lists.llvm.org/cgi-bin/mailman/listinfo/llvm-branch-commits
[llvm-branch-commits] [lld][WebAssembly] Replace config-> with ctx.arg. (PR #119835)
https://github.com/MaskRay updated https://github.com/llvm/llvm-project/pull/119835 ___ llvm-branch-commits mailing list llvm-branch-commits@lists.llvm.org https://lists.llvm.org/cgi-bin/mailman/listinfo/llvm-branch-commits
[llvm-branch-commits] [flang] [flang][cuda] Convert cuf.sync_descriptor to runtime call (PR #121524)
https://github.com/wangzpgi approved this pull request. https://github.com/llvm/llvm-project/pull/121524 ___ llvm-branch-commits mailing list llvm-branch-commits@lists.llvm.org https://lists.llvm.org/cgi-bin/mailman/listinfo/llvm-branch-commits
[llvm-branch-commits] [llvm] [LoongArch] Avoid scheduling relaxable code sequence and attach relax relocs (PR #121330)
@@ -187,18 +187,23 @@ bool LoongArchPreRAExpandPseudo::expandPcalau12iInstPair( MachineInstr &MI = *MBBI; DebugLoc DL = MI.getDebugLoc(); + const auto &STI = MF->getSubtarget(); + bool EnableRelax = STI.hasFeature(LoongArch::FeatureRelax); + Register DestReg = MI.getOperand(0).getReg(); Register ScratchReg = MF->getRegInfo().createVirtualRegister(&LoongArch::GPRRegClass); MachineOperand &Symbol = MI.getOperand(1); BuildMI(MBB, MBBI, DL, TII->get(LoongArch::PCALAU12I), ScratchReg) - .addDisp(Symbol, 0, FlagsHi); + .addDisp(Symbol, 0, + EnableRelax ? LoongArchII::addRelaxFlag(FlagsHi) : FlagsHi); heiher wrote: ```c++ EnableRelax ? LoongArchII::addRelaxFlag(FlagsHi) : FlagsHi ``` -> ```c++ LoongArchII::encodeFlags(FlagsHi, EnableRelax) ``` ```c++ static inline unsigned encodeFlags(unsigned Flags, bool Relax) { return Flags | (Relax ? MO_RELAX : 0); } ``` https://github.com/llvm/llvm-project/pull/121330 ___ llvm-branch-commits mailing list llvm-branch-commits@lists.llvm.org https://lists.llvm.org/cgi-bin/mailman/listinfo/llvm-branch-commits
[llvm-branch-commits] [llvm] [LoongArch] Avoid scheduling relaxable code sequence and attach relax relocs (PR #121330)
https://github.com/heiher approved this pull request. LGTM. Thanks. https://github.com/llvm/llvm-project/pull/121330 ___ llvm-branch-commits mailing list llvm-branch-commits@lists.llvm.org https://lists.llvm.org/cgi-bin/mailman/listinfo/llvm-branch-commits
[llvm-branch-commits] [llvm] [LoongArch] Avoid scheduling relaxable code sequence and attach relax relocs (PR #121330)
https://github.com/heiher edited https://github.com/llvm/llvm-project/pull/121330 ___ llvm-branch-commits mailing list llvm-branch-commits@lists.llvm.org https://lists.llvm.org/cgi-bin/mailman/listinfo/llvm-branch-commits
[llvm-branch-commits] [llvm] release/19.x: [RISCV] Don't create BuildPairF64 or SplitF64 nodes without D or Zdinx. (#116159) (PR #121175)
https://github.com/lenary approved this pull request. LGTM https://github.com/llvm/llvm-project/pull/121175 ___ llvm-branch-commits mailing list llvm-branch-commits@lists.llvm.org https://lists.llvm.org/cgi-bin/mailman/listinfo/llvm-branch-commits
[llvm-branch-commits] [mlir] [mlir][rocdl] Add AMDGPU-specific `cf.assert` lowering (PR #121067)
https://github.com/krzysz00 approved this pull request. 1. Approved 2. Wrt OpenCL ... I hope legalization didn't get broken, but in the OpenCL flow, pryntf should lower to ... `printf()`, which the compiler will handle. Or at least that's my recollection of how that goes from staring at the AMDGPU backend ~a year ago https://github.com/llvm/llvm-project/pull/121067 ___ llvm-branch-commits mailing list llvm-branch-commits@lists.llvm.org https://lists.llvm.org/cgi-bin/mailman/listinfo/llvm-branch-commits
[llvm-branch-commits] [llvm] [AtomicExpand] Add bitcasts when expanding load atomic vector (PR #120716)
https://github.com/jofrn updated https://github.com/llvm/llvm-project/pull/120716 >From dc2032fe981e47d958ed4bf9d931e11e95ff430b Mon Sep 17 00:00:00 2001 From: jofrn Date: Fri, 20 Dec 2024 06:14:28 -0500 Subject: [PATCH] [AtomicExpand] Add bitcasts when expanding load atomic vector AtomicExpand fails for aligned `load atomic ` because it does not find a compatible library call. This change adds appropriate bitcasts so that the call can be lowered. commit-id:f430c1af --- llvm/lib/CodeGen/AtomicExpandPass.cpp | 22 +++-- llvm/test/CodeGen/ARM/atomic-load-store.ll | 54 ++ llvm/test/CodeGen/X86/atomic-load-store.ll | 15 ++ 3 files changed, 88 insertions(+), 3 deletions(-) diff --git a/llvm/lib/CodeGen/AtomicExpandPass.cpp b/llvm/lib/CodeGen/AtomicExpandPass.cpp index a75fa688d87a8d..34d043c534da27 100644 --- a/llvm/lib/CodeGen/AtomicExpandPass.cpp +++ b/llvm/lib/CodeGen/AtomicExpandPass.cpp @@ -2060,9 +2060,25 @@ bool AtomicExpandImpl::expandAtomicOpToLibcall( I->replaceAllUsesWith(V); } else if (HasResult) { Value *V; -if (UseSizedLibcall) - V = Builder.CreateBitOrPointerCast(Result, I->getType()); -else { +if (UseSizedLibcall) { + // Add bitcasts from Result's T scalar type to I's <2 x T/2> vector type + if (I->getType()->getScalarType()->isIntOrPtrTy() && I->getType()->isVectorTy() && + !Result->getType()->isVectorTy()) { +TypeSize Size = Result->getType()->getPrimitiveSizeInBits(); +assert((unsigned)Size % 2 == 0); +unsigned HalfSize = (unsigned)Size / 2; +Value *Lo = Builder.CreateTrunc(Result, IntegerType::get(Ctx, HalfSize)); +Value *RS = Builder.CreateLShr(Result, ConstantInt::get(IntegerType::get(Ctx, Size), HalfSize)); +Value *Hi = Builder.CreateTrunc(RS, IntegerType::get(Ctx, HalfSize)); +Value *Vec = Builder.CreateInsertElement( +VectorType::get(IntegerType::get(Ctx, HalfSize), cast(I->getType())->getElementCount()), +Lo, ConstantInt::get(IntegerType::get(Ctx, 32), 0)); +Vec = Builder.CreateInsertElement(Vec, Hi, ConstantInt::get(IntegerType::get(Ctx, 32), 1)); +V = Builder.CreateBitOrPointerCast(Vec, I->getType()); + } + else +V = Builder.CreateBitOrPointerCast(Result, I->getType()); +} else { V = Builder.CreateAlignedLoad(I->getType(), AllocaResult, AllocaAlignment); Builder.CreateLifetimeEnd(AllocaResult, SizeVal64); diff --git a/llvm/test/CodeGen/ARM/atomic-load-store.ll b/llvm/test/CodeGen/ARM/atomic-load-store.ll index 560dfde356c29d..e4ca564255f684 100644 --- a/llvm/test/CodeGen/ARM/atomic-load-store.ll +++ b/llvm/test/CodeGen/ARM/atomic-load-store.ll @@ -983,3 +983,57 @@ define void @store_atomic_f64__seq_cst(ptr %ptr, double %val1) { store atomic double %val1, ptr %ptr seq_cst, align 8 ret void } + +define <1 x ptr> @atomic_vec1_ptr(ptr %x) #0 { +; ARM-LABEL: atomic_vec1_ptr: +; ARM: @ %bb.0: +; ARM-NEXT:ldr r0, [r0] +; ARM-NEXT:dmb ish +; ARM-NEXT:bx lr +; +; ARMOPTNONE-LABEL: atomic_vec1_ptr: +; ARMOPTNONE: @ %bb.0: +; ARMOPTNONE-NEXT:ldr r0, [r0] +; ARMOPTNONE-NEXT:dmb ish +; ARMOPTNONE-NEXT:bx lr +; +; THUMBTWO-LABEL: atomic_vec1_ptr: +; THUMBTWO: @ %bb.0: +; THUMBTWO-NEXT:ldr r0, [r0] +; THUMBTWO-NEXT:dmb ish +; THUMBTWO-NEXT:bx lr +; +; THUMBONE-LABEL: atomic_vec1_ptr: +; THUMBONE: @ %bb.0: +; THUMBONE-NEXT:push {r7, lr} +; THUMBONE-NEXT:movs r1, #0 +; THUMBONE-NEXT:mov r2, r1 +; THUMBONE-NEXT:bl __sync_val_compare_and_swap_4 +; THUMBONE-NEXT:pop {r7, pc} +; +; ARMV4-LABEL: atomic_vec1_ptr: +; ARMV4: @ %bb.0: +; ARMV4-NEXT:push {r11, lr} +; ARMV4-NEXT:mov r1, #2 +; ARMV4-NEXT:bl __atomic_load_4 +; ARMV4-NEXT:mov r1, #255 +; ARMV4-NEXT:orr r1, r1, #65280 +; ARMV4-NEXT:and r0, r0, r1 +; ARMV4-NEXT:pop {r11, lr} +; ARMV4-NEXT:mov pc, lr +; +; ARMV6-LABEL: atomic_vec1_ptr: +; ARMV6: @ %bb.0: +; ARMV6-NEXT:ldr r0, [r0] +; ARMV6-NEXT:mov r1, #0 +; ARMV6-NEXT:mcr p15, #0, r1, c7, c10, #5 +; ARMV6-NEXT:bx lr +; +; THUMBM-LABEL: atomic_vec1_ptr: +; THUMBM: @ %bb.0: +; THUMBM-NEXT:ldr r0, [r0] +; THUMBM-NEXT:dmb sy +; THUMBM-NEXT:bx lr + %ret = load atomic <1 x ptr>, ptr %x acquire, align 4 + ret <1 x ptr> %ret +} diff --git a/llvm/test/CodeGen/X86/atomic-load-store.ll b/llvm/test/CodeGen/X86/atomic-load-store.ll index 34e541c029e1b2..2e103b261fb9d7 100644 --- a/llvm/test/CodeGen/X86/atomic-load-store.ll +++ b/llvm/test/CodeGen/X86/atomic-load-store.ll @@ -354,6 +354,21 @@ define <2 x i32> @atomic_vec2_i32(ptr %x) nounwind { ret <2 x i32> %ret } +define <2 x ptr> @atomic_vec2_ptr_align(ptr %x) nounwind { +; CHECK-LABEL: atomic_vec2_ptr_align: +; CHECK: ## %bb.0: +; CHECK-NEXT:pushq %rax +; CHECK-NEXT:movl $2, %esi +; CHECK-NEXT:callq ___atom
[llvm-branch-commits] [llvm] [X86] load atomic vector tests for combine (PR #120640)
https://github.com/jofrn updated https://github.com/llvm/llvm-project/pull/120640 >From e11194d684cc7d71024d034389b3ffcdc18d854e Mon Sep 17 00:00:00 2001 From: jofrn Date: Thu, 19 Dec 2024 16:25:55 -0500 Subject: [PATCH] [X86] load atomic vector tests for combine Vector types of 2 elements that don't require widening are lowered via the prior commit's combine method, which is also needed to avoid a vector move. This change adds the tests that depend strictly on combineAtomicLoad so that SelectionDAG can translate vectors with type bfloat,half. commit-id:3a045357 --- llvm/test/CodeGen/X86/atomic-load-store.ll | 18 ++ 1 file changed, 18 insertions(+) diff --git a/llvm/test/CodeGen/X86/atomic-load-store.ll b/llvm/test/CodeGen/X86/atomic-load-store.ll index 7b2e5e60eca20e..34e541c029e1b2 100644 --- a/llvm/test/CodeGen/X86/atomic-load-store.ll +++ b/llvm/test/CodeGen/X86/atomic-load-store.ll @@ -187,6 +187,24 @@ define <2 x float> @atomic_vec2_float_align(ptr %x) { ret <2 x float> %ret } +define <2 x half> @atomic_vec2_half(ptr %x) { +; CHECK-LABEL: atomic_vec2_half: +; CHECK: ## %bb.0: +; CHECK-NEXT:movl (%rdi), %eax +; CHECK-NEXT:retq + %ret = load atomic <2 x half>, ptr %x acquire, align 4 + ret <2 x half> %ret +} + +define <2 x bfloat> @atomic_vec2_bfloat(ptr %x) { +; CHECK-LABEL: atomic_vec2_bfloat: +; CHECK: ## %bb.0: +; CHECK-NEXT:movl (%rdi), %eax +; CHECK-NEXT:retq + %ret = load atomic <2 x bfloat>, ptr %x acquire, align 4 + ret <2 x bfloat> %ret +} + define <1 x ptr> @atomic_vec1_ptr(ptr %x) nounwind { ; CHECK3-LABEL: atomic_vec1_ptr: ; CHECK3: ## %bb.0: ___ llvm-branch-commits mailing list llvm-branch-commits@lists.llvm.org https://lists.llvm.org/cgi-bin/mailman/listinfo/llvm-branch-commits
[llvm-branch-commits] [llvm] [AtomicExpand] Add bitcasts when expanding load atomic vector (PR #120716)
https://github.com/jofrn updated https://github.com/llvm/llvm-project/pull/120716 >From 45e70355f6101f3b5f6a39248310d1e0a756754f Mon Sep 17 00:00:00 2001 From: jofrn Date: Fri, 20 Dec 2024 06:14:28 -0500 Subject: [PATCH] [AtomicExpand] Add bitcasts when expanding load atomic vector AtomicExpand fails for aligned `load atomic ` because it does not find a compatible library call. This change adds appropriate bitcasts so that the call can be lowered. commit-id:f430c1af --- llvm/lib/CodeGen/AtomicExpandPass.cpp | 25 +-- llvm/test/CodeGen/ARM/atomic-load-store.ll | 52 ++ llvm/test/CodeGen/X86/atomic-load-store.ll | 15 +++ 3 files changed, 89 insertions(+), 3 deletions(-) diff --git a/llvm/lib/CodeGen/AtomicExpandPass.cpp b/llvm/lib/CodeGen/AtomicExpandPass.cpp index a75fa688d87a8d..490cc9aa4c3c80 100644 --- a/llvm/lib/CodeGen/AtomicExpandPass.cpp +++ b/llvm/lib/CodeGen/AtomicExpandPass.cpp @@ -2060,9 +2060,28 @@ bool AtomicExpandImpl::expandAtomicOpToLibcall( I->replaceAllUsesWith(V); } else if (HasResult) { Value *V; -if (UseSizedLibcall) - V = Builder.CreateBitOrPointerCast(Result, I->getType()); -else { +if (UseSizedLibcall) { + // Add bitcasts from Result's T scalar type to I's <2 x T/2> vector type + if (I->getType()->getScalarType()->isIntOrPtrTy() && + I->getType()->isVectorTy() && !Result->getType()->isVectorTy()) { +TypeSize Size = Result->getType()->getPrimitiveSizeInBits(); +assert((unsigned)Size % 2 == 0); +unsigned HalfSize = (unsigned)Size / 2; +Value *Lo = +Builder.CreateTrunc(Result, IntegerType::get(Ctx, HalfSize)); +Value *RS = Builder.CreateLShr( +Result, ConstantInt::get(IntegerType::get(Ctx, Size), HalfSize)); +Value *Hi = Builder.CreateTrunc(RS, IntegerType::get(Ctx, HalfSize)); +Value *Vec = Builder.CreateInsertElement( +VectorType::get(IntegerType::get(Ctx, HalfSize), +cast(I->getType())->getElementCount()), +Lo, ConstantInt::get(IntegerType::get(Ctx, 32), 0)); +Vec = Builder.CreateInsertElement( +Vec, Hi, ConstantInt::get(IntegerType::get(Ctx, 32), 1)); +V = Builder.CreateBitOrPointerCast(Vec, I->getType()); + } else +V = Builder.CreateBitOrPointerCast(Result, I->getType()); +} else { V = Builder.CreateAlignedLoad(I->getType(), AllocaResult, AllocaAlignment); Builder.CreateLifetimeEnd(AllocaResult, SizeVal64); diff --git a/llvm/test/CodeGen/ARM/atomic-load-store.ll b/llvm/test/CodeGen/ARM/atomic-load-store.ll index 560dfde356c29d..5e300cf01fef50 100644 --- a/llvm/test/CodeGen/ARM/atomic-load-store.ll +++ b/llvm/test/CodeGen/ARM/atomic-load-store.ll @@ -983,3 +983,55 @@ define void @store_atomic_f64__seq_cst(ptr %ptr, double %val1) { store atomic double %val1, ptr %ptr seq_cst, align 8 ret void } + +define <1 x ptr> @atomic_vec1_ptr(ptr %x) #0 { +; ARM-LABEL: atomic_vec1_ptr: +; ARM: @ %bb.0: +; ARM-NEXT:ldr r0, [r0] +; ARM-NEXT:dmb ish +; ARM-NEXT:bx lr +; +; ARMOPTNONE-LABEL: atomic_vec1_ptr: +; ARMOPTNONE: @ %bb.0: +; ARMOPTNONE-NEXT:ldr r0, [r0] +; ARMOPTNONE-NEXT:dmb ish +; ARMOPTNONE-NEXT:bx lr +; +; THUMBTWO-LABEL: atomic_vec1_ptr: +; THUMBTWO: @ %bb.0: +; THUMBTWO-NEXT:ldr r0, [r0] +; THUMBTWO-NEXT:dmb ish +; THUMBTWO-NEXT:bx lr +; +; THUMBONE-LABEL: atomic_vec1_ptr: +; THUMBONE: @ %bb.0: +; THUMBONE-NEXT:push {r7, lr} +; THUMBONE-NEXT:movs r1, #0 +; THUMBONE-NEXT:mov r2, r1 +; THUMBONE-NEXT:bl __sync_val_compare_and_swap_4 +; THUMBONE-NEXT:pop {r7, pc} +; +; ARMV4-LABEL: atomic_vec1_ptr: +; ARMV4: @ %bb.0: +; ARMV4-NEXT:push {r11, lr} +; ARMV4-NEXT:mov r1, #2 +; ARMV4-NEXT:bl __atomic_load_4 +; ARMV4-NEXT:mov r0, #0 +; ARMV4-NEXT:pop {r11, lr} +; ARMV4-NEXT:mov pc, lr +; +; ARMV6-LABEL: atomic_vec1_ptr: +; ARMV6: @ %bb.0: +; ARMV6-NEXT:ldr r0, [r0] +; ARMV6-NEXT:mov r1, #0 +; ARMV6-NEXT:mcr p15, #0, r1, c7, c10, #5 +; ARMV6-NEXT:bx lr +; +; THUMBM-LABEL: atomic_vec1_ptr: +; THUMBM: @ %bb.0: +; THUMBM-NEXT:ldr r0, [r0] +; THUMBM-NEXT:dmb sy +; THUMBM-NEXT:bx lr + %ret = load atomic <1 x ptr>, ptr %x acquire, align 4 + ret <1 x ptr> %ret +} diff --git a/llvm/test/CodeGen/X86/atomic-load-store.ll b/llvm/test/CodeGen/X86/atomic-load-store.ll index 34e541c029e1b2..2e103b261fb9d7 100644 --- a/llvm/test/CodeGen/X86/atomic-load-store.ll +++ b/llvm/test/CodeGen/X86/atomic-load-store.ll @@ -354,6 +354,21 @@ define <2 x i32> @atomic_vec2_i32(ptr %x) nounwind { ret <2 x i32> %ret } +define <2 x ptr> @atomic_vec2_ptr_align(ptr %x) nounwind { +; CHECK-LABEL: atomic_vec2_ptr_align: +; CHECK: ## %bb.0: +; CHECK-NEXT:pushq %rax +; CHECK-NEXT:movl $2, %esi +; CHECK-NEXT:callq ___atomic_load_16
[llvm-branch-commits] [llvm] [X86] Manage atomic load of fp -> int promotion in DAG (PR #120386)
https://github.com/jofrn updated https://github.com/llvm/llvm-project/pull/120386 >From 315a7446d236fff0254de14a9a7bda3bb3d7a9d4 Mon Sep 17 00:00:00 2001 From: jofrn Date: Wed, 18 Dec 2024 03:38:23 -0500 Subject: [PATCH] [X86] Manage atomic load of fp -> int promotion in DAG When lowering atomic <1 x T> vector types with floats, selection can fail since this pattern is unsupported. To support this, floats can be casted to an integer type of the same size. commit-id:f9d761c5 --- llvm/lib/Target/X86/X86ISelLowering.cpp| 4 +++ llvm/test/CodeGen/X86/atomic-load-store.ll | 37 ++ 2 files changed, 41 insertions(+) diff --git a/llvm/lib/Target/X86/X86ISelLowering.cpp b/llvm/lib/Target/X86/X86ISelLowering.cpp index a0514e93d6598b..fda93a2eb38745 100644 --- a/llvm/lib/Target/X86/X86ISelLowering.cpp +++ b/llvm/lib/Target/X86/X86ISelLowering.cpp @@ -2608,6 +2608,10 @@ X86TargetLowering::X86TargetLowering(const X86TargetMachine &TM, setOperationAction(Op, MVT::f32, Promote); } + setOperationPromotedToType(ISD::ATOMIC_LOAD, MVT::f16, MVT::i16); + setOperationPromotedToType(ISD::ATOMIC_LOAD, MVT::f32, MVT::i32); + setOperationPromotedToType(ISD::ATOMIC_LOAD, MVT::f64, MVT::i64); + // We have target-specific dag combine patterns for the following nodes: setTargetDAGCombine({ISD::VECTOR_SHUFFLE, ISD::SCALAR_TO_VECTOR, diff --git a/llvm/test/CodeGen/X86/atomic-load-store.ll b/llvm/test/CodeGen/X86/atomic-load-store.ll index d23cfb89f9fc87..6efcbb80c0ce6d 100644 --- a/llvm/test/CodeGen/X86/atomic-load-store.ll +++ b/llvm/test/CodeGen/X86/atomic-load-store.ll @@ -145,3 +145,40 @@ define <1 x i64> @atomic_vec1_i64_align(ptr %x) nounwind { %ret = load atomic <1 x i64>, ptr %x acquire, align 8 ret <1 x i64> %ret } + +define <1 x half> @atomic_vec1_half(ptr %x) { +; CHECK3-LABEL: atomic_vec1_half: +; CHECK3: ## %bb.0: +; CHECK3-NEXT:movzwl (%rdi), %eax +; CHECK3-NEXT:pinsrw $0, %eax, %xmm0 +; CHECK3-NEXT:retq +; +; CHECK0-LABEL: atomic_vec1_half: +; CHECK0: ## %bb.0: +; CHECK0-NEXT:movw (%rdi), %cx +; CHECK0-NEXT:## implicit-def: $eax +; CHECK0-NEXT:movw %cx, %ax +; CHECK0-NEXT:## implicit-def: $xmm0 +; CHECK0-NEXT:pinsrw $0, %eax, %xmm0 +; CHECK0-NEXT:retq + %ret = load atomic <1 x half>, ptr %x acquire, align 2 + ret <1 x half> %ret +} + +define <1 x float> @atomic_vec1_float(ptr %x) { +; CHECK-LABEL: atomic_vec1_float: +; CHECK: ## %bb.0: +; CHECK-NEXT:movss {{.*#+}} xmm0 = mem[0],zero,zero,zero +; CHECK-NEXT:retq + %ret = load atomic <1 x float>, ptr %x acquire, align 4 + ret <1 x float> %ret +} + +define <1 x double> @atomic_vec1_double_align(ptr %x) nounwind { +; CHECK-LABEL: atomic_vec1_double_align: +; CHECK: ## %bb.0: +; CHECK-NEXT:movsd {{.*#+}} xmm0 = mem[0],zero +; CHECK-NEXT:retq + %ret = load atomic <1 x double>, ptr %x acquire, align 8 + ret <1 x double> %ret +} ___ llvm-branch-commits mailing list llvm-branch-commits@lists.llvm.org https://lists.llvm.org/cgi-bin/mailman/listinfo/llvm-branch-commits
[llvm-branch-commits] [llvm] [SelectionDAG][X86] Widen <2 x T> vector types for atomic load (PR #120598)
https://github.com/jofrn updated https://github.com/llvm/llvm-project/pull/120598 >From 3a82883ef143c15cd3f213b83d3bda492c0e0d9c Mon Sep 17 00:00:00 2001 From: jofrn Date: Thu, 19 Dec 2024 11:19:39 -0500 Subject: [PATCH] [SelectionDAG][X86] Widen <2 x T> vector types for atomic load Vector types of 2 elements must be widened. This change does this for vector types of atomic load in SelectionDAG so that it can translate aligned vectors of >1 size. Also, it combines the v2 type into its equivalent scalar type so as to avoid a move to vector. commit-id:2894ccd1 --- llvm/lib/CodeGen/SelectionDAG/LegalizeTypes.h | 6 +- .../SelectionDAG/LegalizeVectorTypes.cpp | 39 - llvm/lib/Target/X86/X86ISelLowering.cpp | 79 ++- llvm/test/CodeGen/X86/atomic-load-store.ll| 41 ++ 4 files changed, 159 insertions(+), 6 deletions(-) diff --git a/llvm/lib/CodeGen/SelectionDAG/LegalizeTypes.h b/llvm/lib/CodeGen/SelectionDAG/LegalizeTypes.h index b81c9f87cb27d7..3b3dddc44e3682 100644 --- a/llvm/lib/CodeGen/SelectionDAG/LegalizeTypes.h +++ b/llvm/lib/CodeGen/SelectionDAG/LegalizeTypes.h @@ -1046,6 +1046,7 @@ class LLVM_LIBRARY_VISIBILITY DAGTypeLegalizer { SDValue WidenVecRes_EXTRACT_SUBVECTOR(SDNode* N); SDValue WidenVecRes_INSERT_SUBVECTOR(SDNode *N); SDValue WidenVecRes_INSERT_VECTOR_ELT(SDNode* N); + SDValue WidenVecRes_ATOMIC_LOAD(AtomicSDNode *N); SDValue WidenVecRes_LOAD(SDNode* N); SDValue WidenVecRes_VP_LOAD(VPLoadSDNode *N); SDValue WidenVecRes_VP_STRIDED_LOAD(VPStridedLoadSDNode *N); @@ -1129,8 +1130,9 @@ class LLVM_LIBRARY_VISIBILITY DAGTypeLegalizer { /// resulting wider type. It takes: /// LdChain: list of chains for the load to be generated. /// Ld: load to widen - SDValue GenWidenVectorLoads(SmallVectorImpl &LdChain, - LoadSDNode *LD); + template + SDValue GenWidenVectorLoads(SmallVectorImpl &LdChain, T *LD, + bool IsAtomic = false); /// Helper function to generate a set of extension loads to load a vector with /// a resulting wider type. It takes: diff --git a/llvm/lib/CodeGen/SelectionDAG/LegalizeVectorTypes.cpp b/llvm/lib/CodeGen/SelectionDAG/LegalizeVectorTypes.cpp index c85e4ba2cfa5a7..7c4caa96244b8b 100644 --- a/llvm/lib/CodeGen/SelectionDAG/LegalizeVectorTypes.cpp +++ b/llvm/lib/CodeGen/SelectionDAG/LegalizeVectorTypes.cpp @@ -4515,6 +4515,9 @@ void DAGTypeLegalizer::WidenVectorResult(SDNode *N, unsigned ResNo) { break; case ISD::EXTRACT_SUBVECTOR: Res = WidenVecRes_EXTRACT_SUBVECTOR(N); break; case ISD::INSERT_VECTOR_ELT: Res = WidenVecRes_INSERT_VECTOR_ELT(N); break; + case ISD::ATOMIC_LOAD: +Res = WidenVecRes_ATOMIC_LOAD(cast(N)); +break; case ISD::LOAD: Res = WidenVecRes_LOAD(N); break; case ISD::STEP_VECTOR: case ISD::SPLAT_VECTOR: @@ -5901,6 +5904,30 @@ SDValue DAGTypeLegalizer::WidenVecRes_INSERT_VECTOR_ELT(SDNode *N) { N->getOperand(1), N->getOperand(2)); } +SDValue DAGTypeLegalizer::WidenVecRes_ATOMIC_LOAD(AtomicSDNode *N) { + SmallVector LdChain; // Chain for the series of load + SDValue Result = GenWidenVectorLoads(LdChain, N, true /*IsAtomic*/); + + if (Result) { +// If we generate a single load, we can use that for the chain. Otherwise, +// build a factor node to remember the multiple loads are independent and +// chain to that. +SDValue NewChain; +if (LdChain.size() == 1) + NewChain = LdChain[0]; +else + NewChain = DAG.getNode(ISD::TokenFactor, SDLoc(N), MVT::Other, LdChain); + +// Modified the chain - switch anything that used the old chain to use +// the new one. +ReplaceValueWith(SDValue(N, 1), NewChain); + +return Result; + } + + report_fatal_error("Unable to widen atomic vector load"); +} + SDValue DAGTypeLegalizer::WidenVecRes_LOAD(SDNode *N) { LoadSDNode *LD = cast(N); ISD::LoadExtType ExtType = LD->getExtensionType(); @@ -7699,8 +7726,9 @@ static SDValue BuildVectorFromScalar(SelectionDAG& DAG, EVT VecTy, return DAG.getNode(ISD::BITCAST, dl, VecTy, VecOp); } +template SDValue DAGTypeLegalizer::GenWidenVectorLoads(SmallVectorImpl &LdChain, - LoadSDNode *LD) { + T *LD, bool IsAtomic) { // The strategy assumes that we can efficiently load power-of-two widths. // The routine chops the vector into the largest vector loads with the same // element type or scalar loads and then recombines it to the widen vector @@ -7757,8 +7785,13 @@ SDValue DAGTypeLegalizer::GenWidenVectorLoads(SmallVectorImpl &LdChain, } while (TypeSize::isKnownGT(RemainingWidth, NewVTWidth)); } - SDValue LdOp = DAG.getLoad(*FirstVT, dl, Chain, BasePtr, LD->getPointerInfo(), - LD->getOriginalAlign(), MMOFlags, AAInfo); + SDValue LdOp; + if (IsAtomic) +LdOp = DAG.getAtomic(ISD::A
[llvm-branch-commits] [llvm] [X86] load atomic vector tests for combine (PR #120640)
https://github.com/jofrn updated https://github.com/llvm/llvm-project/pull/120640 >From e11194d684cc7d71024d034389b3ffcdc18d854e Mon Sep 17 00:00:00 2001 From: jofrn Date: Thu, 19 Dec 2024 16:25:55 -0500 Subject: [PATCH] [X86] load atomic vector tests for combine Vector types of 2 elements that don't require widening are lowered via the prior commit's combine method, which is also needed to avoid a vector move. This change adds the tests that depend strictly on combineAtomicLoad so that SelectionDAG can translate vectors with type bfloat,half. commit-id:3a045357 --- llvm/test/CodeGen/X86/atomic-load-store.ll | 18 ++ 1 file changed, 18 insertions(+) diff --git a/llvm/test/CodeGen/X86/atomic-load-store.ll b/llvm/test/CodeGen/X86/atomic-load-store.ll index 7b2e5e60eca20e..34e541c029e1b2 100644 --- a/llvm/test/CodeGen/X86/atomic-load-store.ll +++ b/llvm/test/CodeGen/X86/atomic-load-store.ll @@ -187,6 +187,24 @@ define <2 x float> @atomic_vec2_float_align(ptr %x) { ret <2 x float> %ret } +define <2 x half> @atomic_vec2_half(ptr %x) { +; CHECK-LABEL: atomic_vec2_half: +; CHECK: ## %bb.0: +; CHECK-NEXT:movl (%rdi), %eax +; CHECK-NEXT:retq + %ret = load atomic <2 x half>, ptr %x acquire, align 4 + ret <2 x half> %ret +} + +define <2 x bfloat> @atomic_vec2_bfloat(ptr %x) { +; CHECK-LABEL: atomic_vec2_bfloat: +; CHECK: ## %bb.0: +; CHECK-NEXT:movl (%rdi), %eax +; CHECK-NEXT:retq + %ret = load atomic <2 x bfloat>, ptr %x acquire, align 4 + ret <2 x bfloat> %ret +} + define <1 x ptr> @atomic_vec1_ptr(ptr %x) nounwind { ; CHECK3-LABEL: atomic_vec1_ptr: ; CHECK3: ## %bb.0: ___ llvm-branch-commits mailing list llvm-branch-commits@lists.llvm.org https://lists.llvm.org/cgi-bin/mailman/listinfo/llvm-branch-commits
[llvm-branch-commits] [llvm] [SelectionDAG] Legalize <1 x T> vector types for atomic load (PR #120385)
https://github.com/jofrn updated https://github.com/llvm/llvm-project/pull/120385 >From a3f70e19cbf29b162c63b37c3062e84fce2d7e4f Mon Sep 17 00:00:00 2001 From: jofrn Date: Wed, 18 Dec 2024 03:37:17 -0500 Subject: [PATCH] [SelectionDAG] Legalize <1 x T> vector types for atomic load `load atomic <1 x T>` is not valid. This change legalizes vector types of atomic load via scalarization in SelectionDAG so that it can, for example, translate from `v1i32` to `i32`. commit-id:5c36cc8c --- llvm/lib/CodeGen/SelectionDAG/LegalizeTypes.h | 1 + .../SelectionDAG/LegalizeVectorTypes.cpp | 15 +++ llvm/test/CodeGen/X86/atomic-load-store.ll| 121 +- 3 files changed, 135 insertions(+), 2 deletions(-) diff --git a/llvm/lib/CodeGen/SelectionDAG/LegalizeTypes.h b/llvm/lib/CodeGen/SelectionDAG/LegalizeTypes.h index 571a710cc92a34..b81c9f87cb27d7 100644 --- a/llvm/lib/CodeGen/SelectionDAG/LegalizeTypes.h +++ b/llvm/lib/CodeGen/SelectionDAG/LegalizeTypes.h @@ -861,6 +861,7 @@ class LLVM_LIBRARY_VISIBILITY DAGTypeLegalizer { SDValue ScalarizeVecRes_ExpOp(SDNode *N); SDValue ScalarizeVecRes_INSERT_VECTOR_ELT(SDNode *N); SDValue ScalarizeVecRes_LOAD(LoadSDNode *N); + SDValue ScalarizeVecRes_ATOMIC_LOAD(AtomicSDNode *N); SDValue ScalarizeVecRes_SCALAR_TO_VECTOR(SDNode *N); SDValue ScalarizeVecRes_VSELECT(SDNode *N); SDValue ScalarizeVecRes_SELECT(SDNode *N); diff --git a/llvm/lib/CodeGen/SelectionDAG/LegalizeVectorTypes.cpp b/llvm/lib/CodeGen/SelectionDAG/LegalizeVectorTypes.cpp index 107454a92e356c..c85e4ba2cfa5a7 100644 --- a/llvm/lib/CodeGen/SelectionDAG/LegalizeVectorTypes.cpp +++ b/llvm/lib/CodeGen/SelectionDAG/LegalizeVectorTypes.cpp @@ -60,6 +60,9 @@ void DAGTypeLegalizer::ScalarizeVectorResult(SDNode *N, unsigned ResNo) { case ISD::FP_ROUND: R = ScalarizeVecRes_FP_ROUND(N); break; case ISD::FPOWI: R = ScalarizeVecRes_ExpOp(N); break; case ISD::INSERT_VECTOR_ELT: R = ScalarizeVecRes_INSERT_VECTOR_ELT(N); break; + case ISD::ATOMIC_LOAD: +R = ScalarizeVecRes_ATOMIC_LOAD(cast(N)); +break; case ISD::LOAD: R = ScalarizeVecRes_LOAD(cast(N));break; case ISD::SCALAR_TO_VECTOR: R = ScalarizeVecRes_SCALAR_TO_VECTOR(N); break; case ISD::SIGN_EXTEND_INREG: R = ScalarizeVecRes_InregOp(N); break; @@ -451,6 +454,18 @@ SDValue DAGTypeLegalizer::ScalarizeVecRes_INSERT_VECTOR_ELT(SDNode *N) { return Op; } +SDValue DAGTypeLegalizer::ScalarizeVecRes_ATOMIC_LOAD(AtomicSDNode *N) { + SDValue Result = DAG.getAtomic( + ISD::ATOMIC_LOAD, SDLoc(N), N->getMemoryVT().getVectorElementType(), + N->getValueType(0).getVectorElementType(), N->getChain(), N->getBasePtr(), + N->getMemOperand()); + + // Legalize the chain result - switch anything that used the old chain to + // use the new one. + ReplaceValueWith(SDValue(N, 1), Result.getValue(1)); + return Result; +} + SDValue DAGTypeLegalizer::ScalarizeVecRes_LOAD(LoadSDNode *N) { assert(N->isUnindexed() && "Indexed vector load?"); diff --git a/llvm/test/CodeGen/X86/atomic-load-store.ll b/llvm/test/CodeGen/X86/atomic-load-store.ll index 5bce4401f7bdb0..d23cfb89f9fc87 100644 --- a/llvm/test/CodeGen/X86/atomic-load-store.ll +++ b/llvm/test/CodeGen/X86/atomic-load-store.ll @@ -1,6 +1,6 @@ ; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py -; RUN: llc < %s -mtriple=x86_64-apple-macosx10.7.0 -verify-machineinstrs | FileCheck %s -; RUN: llc < %s -mtriple=x86_64-apple-macosx10.7.0 -verify-machineinstrs -O0 | FileCheck %s +; RUN: llc < %s -mtriple=x86_64-apple-macosx10.7.0 -verify-machineinstrs | FileCheck %s --check-prefixes=CHECK,CHECK3 +; RUN: llc < %s -mtriple=x86_64-apple-macosx10.7.0 -verify-machineinstrs -O0 | FileCheck %s --check-prefixes=CHECK,CHECK0 define void @test1(ptr %ptr, i32 %val1) { ; CHECK-LABEL: test1: @@ -28,3 +28,120 @@ define i32 @test3(ptr %ptr) { %val = load atomic i32, ptr %ptr seq_cst, align 4 ret i32 %val } + +define <1 x i32> @atomic_vec1_i32(ptr %x) { +; CHECK-LABEL: atomic_vec1_i32: +; CHECK: ## %bb.0: +; CHECK-NEXT:movl (%rdi), %eax +; CHECK-NEXT:retq + %ret = load atomic <1 x i32>, ptr %x acquire, align 4 + ret <1 x i32> %ret +} + +define <1 x i8> @atomic_vec1_i8(ptr %x) { +; CHECK3-LABEL: atomic_vec1_i8: +; CHECK3: ## %bb.0: +; CHECK3-NEXT:movzbl (%rdi), %eax +; CHECK3-NEXT:retq +; +; CHECK0-LABEL: atomic_vec1_i8: +; CHECK0: ## %bb.0: +; CHECK0-NEXT:movb (%rdi), %al +; CHECK0-NEXT:retq + %ret = load atomic <1 x i8>, ptr %x acquire, align 1 + ret <1 x i8> %ret +} + +define <1 x i16> @atomic_vec1_i16(ptr %x) { +; CHECK3-LABEL: atomic_vec1_i16: +; CHECK3: ## %bb.0: +; CHECK3-NEXT:movzwl (%rdi), %eax +; CHECK3-NEXT:retq +; +; CHECK0-LABEL: atomic_vec1_i16: +; CHECK0: ## %bb.0: +; CHECK0-NEXT:movw (%rdi), %ax +; CHECK0-NEXT:retq + %ret = load atomic <1 x i16>, ptr %x acquire, align 2 + ret <1 x i16> %ret +} + +define <1
[llvm-branch-commits] [llvm] [AtomicExpand] Add bitcasts when expanding load atomic vector (PR #120716)
https://github.com/jofrn updated https://github.com/llvm/llvm-project/pull/120716 >From 45e70355f6101f3b5f6a39248310d1e0a756754f Mon Sep 17 00:00:00 2001 From: jofrn Date: Fri, 20 Dec 2024 06:14:28 -0500 Subject: [PATCH] [AtomicExpand] Add bitcasts when expanding load atomic vector AtomicExpand fails for aligned `load atomic ` because it does not find a compatible library call. This change adds appropriate bitcasts so that the call can be lowered. commit-id:f430c1af --- llvm/lib/CodeGen/AtomicExpandPass.cpp | 25 +-- llvm/test/CodeGen/ARM/atomic-load-store.ll | 52 ++ llvm/test/CodeGen/X86/atomic-load-store.ll | 15 +++ 3 files changed, 89 insertions(+), 3 deletions(-) diff --git a/llvm/lib/CodeGen/AtomicExpandPass.cpp b/llvm/lib/CodeGen/AtomicExpandPass.cpp index a75fa688d87a8d..490cc9aa4c3c80 100644 --- a/llvm/lib/CodeGen/AtomicExpandPass.cpp +++ b/llvm/lib/CodeGen/AtomicExpandPass.cpp @@ -2060,9 +2060,28 @@ bool AtomicExpandImpl::expandAtomicOpToLibcall( I->replaceAllUsesWith(V); } else if (HasResult) { Value *V; -if (UseSizedLibcall) - V = Builder.CreateBitOrPointerCast(Result, I->getType()); -else { +if (UseSizedLibcall) { + // Add bitcasts from Result's T scalar type to I's <2 x T/2> vector type + if (I->getType()->getScalarType()->isIntOrPtrTy() && + I->getType()->isVectorTy() && !Result->getType()->isVectorTy()) { +TypeSize Size = Result->getType()->getPrimitiveSizeInBits(); +assert((unsigned)Size % 2 == 0); +unsigned HalfSize = (unsigned)Size / 2; +Value *Lo = +Builder.CreateTrunc(Result, IntegerType::get(Ctx, HalfSize)); +Value *RS = Builder.CreateLShr( +Result, ConstantInt::get(IntegerType::get(Ctx, Size), HalfSize)); +Value *Hi = Builder.CreateTrunc(RS, IntegerType::get(Ctx, HalfSize)); +Value *Vec = Builder.CreateInsertElement( +VectorType::get(IntegerType::get(Ctx, HalfSize), +cast(I->getType())->getElementCount()), +Lo, ConstantInt::get(IntegerType::get(Ctx, 32), 0)); +Vec = Builder.CreateInsertElement( +Vec, Hi, ConstantInt::get(IntegerType::get(Ctx, 32), 1)); +V = Builder.CreateBitOrPointerCast(Vec, I->getType()); + } else +V = Builder.CreateBitOrPointerCast(Result, I->getType()); +} else { V = Builder.CreateAlignedLoad(I->getType(), AllocaResult, AllocaAlignment); Builder.CreateLifetimeEnd(AllocaResult, SizeVal64); diff --git a/llvm/test/CodeGen/ARM/atomic-load-store.ll b/llvm/test/CodeGen/ARM/atomic-load-store.ll index 560dfde356c29d..5e300cf01fef50 100644 --- a/llvm/test/CodeGen/ARM/atomic-load-store.ll +++ b/llvm/test/CodeGen/ARM/atomic-load-store.ll @@ -983,3 +983,55 @@ define void @store_atomic_f64__seq_cst(ptr %ptr, double %val1) { store atomic double %val1, ptr %ptr seq_cst, align 8 ret void } + +define <1 x ptr> @atomic_vec1_ptr(ptr %x) #0 { +; ARM-LABEL: atomic_vec1_ptr: +; ARM: @ %bb.0: +; ARM-NEXT:ldr r0, [r0] +; ARM-NEXT:dmb ish +; ARM-NEXT:bx lr +; +; ARMOPTNONE-LABEL: atomic_vec1_ptr: +; ARMOPTNONE: @ %bb.0: +; ARMOPTNONE-NEXT:ldr r0, [r0] +; ARMOPTNONE-NEXT:dmb ish +; ARMOPTNONE-NEXT:bx lr +; +; THUMBTWO-LABEL: atomic_vec1_ptr: +; THUMBTWO: @ %bb.0: +; THUMBTWO-NEXT:ldr r0, [r0] +; THUMBTWO-NEXT:dmb ish +; THUMBTWO-NEXT:bx lr +; +; THUMBONE-LABEL: atomic_vec1_ptr: +; THUMBONE: @ %bb.0: +; THUMBONE-NEXT:push {r7, lr} +; THUMBONE-NEXT:movs r1, #0 +; THUMBONE-NEXT:mov r2, r1 +; THUMBONE-NEXT:bl __sync_val_compare_and_swap_4 +; THUMBONE-NEXT:pop {r7, pc} +; +; ARMV4-LABEL: atomic_vec1_ptr: +; ARMV4: @ %bb.0: +; ARMV4-NEXT:push {r11, lr} +; ARMV4-NEXT:mov r1, #2 +; ARMV4-NEXT:bl __atomic_load_4 +; ARMV4-NEXT:mov r0, #0 +; ARMV4-NEXT:pop {r11, lr} +; ARMV4-NEXT:mov pc, lr +; +; ARMV6-LABEL: atomic_vec1_ptr: +; ARMV6: @ %bb.0: +; ARMV6-NEXT:ldr r0, [r0] +; ARMV6-NEXT:mov r1, #0 +; ARMV6-NEXT:mcr p15, #0, r1, c7, c10, #5 +; ARMV6-NEXT:bx lr +; +; THUMBM-LABEL: atomic_vec1_ptr: +; THUMBM: @ %bb.0: +; THUMBM-NEXT:ldr r0, [r0] +; THUMBM-NEXT:dmb sy +; THUMBM-NEXT:bx lr + %ret = load atomic <1 x ptr>, ptr %x acquire, align 4 + ret <1 x ptr> %ret +} diff --git a/llvm/test/CodeGen/X86/atomic-load-store.ll b/llvm/test/CodeGen/X86/atomic-load-store.ll index 34e541c029e1b2..2e103b261fb9d7 100644 --- a/llvm/test/CodeGen/X86/atomic-load-store.ll +++ b/llvm/test/CodeGen/X86/atomic-load-store.ll @@ -354,6 +354,21 @@ define <2 x i32> @atomic_vec2_i32(ptr %x) nounwind { ret <2 x i32> %ret } +define <2 x ptr> @atomic_vec2_ptr_align(ptr %x) nounwind { +; CHECK-LABEL: atomic_vec2_ptr_align: +; CHECK: ## %bb.0: +; CHECK-NEXT:pushq %rax +; CHECK-NEXT:movl $2, %esi +; CHECK-NEXT:callq ___atomic_load_16
[llvm-branch-commits] [llvm] [X86] Add atomic vector tests for unaligned >1 sizes. (PR #120387)
https://github.com/jofrn updated https://github.com/llvm/llvm-project/pull/120387 >From e1008c74c7109e937099c8fc33354e8d986f8d99 Mon Sep 17 00:00:00 2001 From: jofrn Date: Wed, 18 Dec 2024 03:40:32 -0500 Subject: [PATCH] [X86] Add atomic vector tests for unaligned >1 sizes. Unaligned atomic vectors with size >1 are lowered to calls. Adding their tests separately here. commit-id:a06a5cc6 --- llvm/test/CodeGen/X86/atomic-load-store.ll | 253 + 1 file changed, 253 insertions(+) diff --git a/llvm/test/CodeGen/X86/atomic-load-store.ll b/llvm/test/CodeGen/X86/atomic-load-store.ll index 6efcbb80c0ce6d..39e9fdfa5e62b0 100644 --- a/llvm/test/CodeGen/X86/atomic-load-store.ll +++ b/llvm/test/CodeGen/X86/atomic-load-store.ll @@ -146,6 +146,34 @@ define <1 x i64> @atomic_vec1_i64_align(ptr %x) nounwind { ret <1 x i64> %ret } +define <1 x ptr> @atomic_vec1_ptr(ptr %x) nounwind { +; CHECK3-LABEL: atomic_vec1_ptr: +; CHECK3: ## %bb.0: +; CHECK3-NEXT:pushq %rax +; CHECK3-NEXT:movq %rdi, %rsi +; CHECK3-NEXT:movq %rsp, %rdx +; CHECK3-NEXT:movl $8, %edi +; CHECK3-NEXT:movl $2, %ecx +; CHECK3-NEXT:callq ___atomic_load +; CHECK3-NEXT:movq (%rsp), %rax +; CHECK3-NEXT:popq %rcx +; CHECK3-NEXT:retq +; +; CHECK0-LABEL: atomic_vec1_ptr: +; CHECK0: ## %bb.0: +; CHECK0-NEXT:pushq %rax +; CHECK0-NEXT:movq %rdi, %rsi +; CHECK0-NEXT:movl $8, %edi +; CHECK0-NEXT:movq %rsp, %rdx +; CHECK0-NEXT:movl $2, %ecx +; CHECK0-NEXT:callq ___atomic_load +; CHECK0-NEXT:movq (%rsp), %rax +; CHECK0-NEXT:popq %rcx +; CHECK0-NEXT:retq + %ret = load atomic <1 x ptr>, ptr %x acquire, align 4 + ret <1 x ptr> %ret +} + define <1 x half> @atomic_vec1_half(ptr %x) { ; CHECK3-LABEL: atomic_vec1_half: ; CHECK3: ## %bb.0: @@ -182,3 +210,228 @@ define <1 x double> @atomic_vec1_double_align(ptr %x) nounwind { %ret = load atomic <1 x double>, ptr %x acquire, align 8 ret <1 x double> %ret } + +define <1 x i64> @atomic_vec1_i64(ptr %x) nounwind { +; CHECK3-LABEL: atomic_vec1_i64: +; CHECK3: ## %bb.0: +; CHECK3-NEXT:pushq %rax +; CHECK3-NEXT:movq %rdi, %rsi +; CHECK3-NEXT:movq %rsp, %rdx +; CHECK3-NEXT:movl $8, %edi +; CHECK3-NEXT:movl $2, %ecx +; CHECK3-NEXT:callq ___atomic_load +; CHECK3-NEXT:movq (%rsp), %rax +; CHECK3-NEXT:popq %rcx +; CHECK3-NEXT:retq +; +; CHECK0-LABEL: atomic_vec1_i64: +; CHECK0: ## %bb.0: +; CHECK0-NEXT:pushq %rax +; CHECK0-NEXT:movq %rdi, %rsi +; CHECK0-NEXT:movl $8, %edi +; CHECK0-NEXT:movq %rsp, %rdx +; CHECK0-NEXT:movl $2, %ecx +; CHECK0-NEXT:callq ___atomic_load +; CHECK0-NEXT:movq (%rsp), %rax +; CHECK0-NEXT:popq %rcx +; CHECK0-NEXT:retq + %ret = load atomic <1 x i64>, ptr %x acquire, align 4 + ret <1 x i64> %ret +} + +define <1 x double> @atomic_vec1_double(ptr %x) nounwind { +; CHECK3-LABEL: atomic_vec1_double: +; CHECK3: ## %bb.0: +; CHECK3-NEXT:pushq %rax +; CHECK3-NEXT:movq %rdi, %rsi +; CHECK3-NEXT:movq %rsp, %rdx +; CHECK3-NEXT:movl $8, %edi +; CHECK3-NEXT:movl $2, %ecx +; CHECK3-NEXT:callq ___atomic_load +; CHECK3-NEXT:movsd {{.*#+}} xmm0 = mem[0],zero +; CHECK3-NEXT:popq %rax +; CHECK3-NEXT:retq +; +; CHECK0-LABEL: atomic_vec1_double: +; CHECK0: ## %bb.0: +; CHECK0-NEXT:pushq %rax +; CHECK0-NEXT:movq %rdi, %rsi +; CHECK0-NEXT:movl $8, %edi +; CHECK0-NEXT:movq %rsp, %rdx +; CHECK0-NEXT:movl $2, %ecx +; CHECK0-NEXT:callq ___atomic_load +; CHECK0-NEXT:movsd {{.*#+}} xmm0 = mem[0],zero +; CHECK0-NEXT:popq %rax +; CHECK0-NEXT:retq + %ret = load atomic <1 x double>, ptr %x acquire, align 4 + ret <1 x double> %ret +} + +define <2 x i32> @atomic_vec2_i32(ptr %x) nounwind { +; CHECK3-LABEL: atomic_vec2_i32: +; CHECK3: ## %bb.0: +; CHECK3-NEXT:pushq %rax +; CHECK3-NEXT:movq %rdi, %rsi +; CHECK3-NEXT:movq %rsp, %rdx +; CHECK3-NEXT:movl $8, %edi +; CHECK3-NEXT:movl $2, %ecx +; CHECK3-NEXT:callq ___atomic_load +; CHECK3-NEXT:movsd {{.*#+}} xmm0 = mem[0],zero +; CHECK3-NEXT:popq %rax +; CHECK3-NEXT:retq +; +; CHECK0-LABEL: atomic_vec2_i32: +; CHECK0: ## %bb.0: +; CHECK0-NEXT:pushq %rax +; CHECK0-NEXT:movq %rdi, %rsi +; CHECK0-NEXT:movl $8, %edi +; CHECK0-NEXT:movq %rsp, %rdx +; CHECK0-NEXT:movl $2, %ecx +; CHECK0-NEXT:callq ___atomic_load +; CHECK0-NEXT:movq {{.*#+}} xmm0 = mem[0],zero +; CHECK0-NEXT:popq %rax +; CHECK0-NEXT:retq + %ret = load atomic <2 x i32>, ptr %x acquire, align 4 + ret <2 x i32> %ret +} + +define <4 x float> @atomic_vec4_float_align(ptr %x) nounwind { +; CHECK-LABEL: atomic_vec4_float_align: +; CHECK: ## %bb.0: +; CHECK-NEXT:pushq %rax +; CHECK-NEXT:movl $2, %esi +; CHECK-NEXT:callq ___atomic_load_16 +; CHECK-NEXT:movq %rdx, %xmm1 +; CHECK-NEXT:movq %rax, %xmm0 +; CHECK-NEXT:punpcklqdq {{.*#+}} xmm0 = xmm
[llvm-branch-commits] [llvm] [SelectionDAG][X86] Widen <2 x T> vector types for atomic load (PR #120598)
https://github.com/jofrn updated https://github.com/llvm/llvm-project/pull/120598 >From 3a82883ef143c15cd3f213b83d3bda492c0e0d9c Mon Sep 17 00:00:00 2001 From: jofrn Date: Thu, 19 Dec 2024 11:19:39 -0500 Subject: [PATCH] [SelectionDAG][X86] Widen <2 x T> vector types for atomic load Vector types of 2 elements must be widened. This change does this for vector types of atomic load in SelectionDAG so that it can translate aligned vectors of >1 size. Also, it combines the v2 type into its equivalent scalar type so as to avoid a move to vector. commit-id:2894ccd1 --- llvm/lib/CodeGen/SelectionDAG/LegalizeTypes.h | 6 +- .../SelectionDAG/LegalizeVectorTypes.cpp | 39 - llvm/lib/Target/X86/X86ISelLowering.cpp | 79 ++- llvm/test/CodeGen/X86/atomic-load-store.ll| 41 ++ 4 files changed, 159 insertions(+), 6 deletions(-) diff --git a/llvm/lib/CodeGen/SelectionDAG/LegalizeTypes.h b/llvm/lib/CodeGen/SelectionDAG/LegalizeTypes.h index b81c9f87cb27d7..3b3dddc44e3682 100644 --- a/llvm/lib/CodeGen/SelectionDAG/LegalizeTypes.h +++ b/llvm/lib/CodeGen/SelectionDAG/LegalizeTypes.h @@ -1046,6 +1046,7 @@ class LLVM_LIBRARY_VISIBILITY DAGTypeLegalizer { SDValue WidenVecRes_EXTRACT_SUBVECTOR(SDNode* N); SDValue WidenVecRes_INSERT_SUBVECTOR(SDNode *N); SDValue WidenVecRes_INSERT_VECTOR_ELT(SDNode* N); + SDValue WidenVecRes_ATOMIC_LOAD(AtomicSDNode *N); SDValue WidenVecRes_LOAD(SDNode* N); SDValue WidenVecRes_VP_LOAD(VPLoadSDNode *N); SDValue WidenVecRes_VP_STRIDED_LOAD(VPStridedLoadSDNode *N); @@ -1129,8 +1130,9 @@ class LLVM_LIBRARY_VISIBILITY DAGTypeLegalizer { /// resulting wider type. It takes: /// LdChain: list of chains for the load to be generated. /// Ld: load to widen - SDValue GenWidenVectorLoads(SmallVectorImpl &LdChain, - LoadSDNode *LD); + template + SDValue GenWidenVectorLoads(SmallVectorImpl &LdChain, T *LD, + bool IsAtomic = false); /// Helper function to generate a set of extension loads to load a vector with /// a resulting wider type. It takes: diff --git a/llvm/lib/CodeGen/SelectionDAG/LegalizeVectorTypes.cpp b/llvm/lib/CodeGen/SelectionDAG/LegalizeVectorTypes.cpp index c85e4ba2cfa5a7..7c4caa96244b8b 100644 --- a/llvm/lib/CodeGen/SelectionDAG/LegalizeVectorTypes.cpp +++ b/llvm/lib/CodeGen/SelectionDAG/LegalizeVectorTypes.cpp @@ -4515,6 +4515,9 @@ void DAGTypeLegalizer::WidenVectorResult(SDNode *N, unsigned ResNo) { break; case ISD::EXTRACT_SUBVECTOR: Res = WidenVecRes_EXTRACT_SUBVECTOR(N); break; case ISD::INSERT_VECTOR_ELT: Res = WidenVecRes_INSERT_VECTOR_ELT(N); break; + case ISD::ATOMIC_LOAD: +Res = WidenVecRes_ATOMIC_LOAD(cast(N)); +break; case ISD::LOAD: Res = WidenVecRes_LOAD(N); break; case ISD::STEP_VECTOR: case ISD::SPLAT_VECTOR: @@ -5901,6 +5904,30 @@ SDValue DAGTypeLegalizer::WidenVecRes_INSERT_VECTOR_ELT(SDNode *N) { N->getOperand(1), N->getOperand(2)); } +SDValue DAGTypeLegalizer::WidenVecRes_ATOMIC_LOAD(AtomicSDNode *N) { + SmallVector LdChain; // Chain for the series of load + SDValue Result = GenWidenVectorLoads(LdChain, N, true /*IsAtomic*/); + + if (Result) { +// If we generate a single load, we can use that for the chain. Otherwise, +// build a factor node to remember the multiple loads are independent and +// chain to that. +SDValue NewChain; +if (LdChain.size() == 1) + NewChain = LdChain[0]; +else + NewChain = DAG.getNode(ISD::TokenFactor, SDLoc(N), MVT::Other, LdChain); + +// Modified the chain - switch anything that used the old chain to use +// the new one. +ReplaceValueWith(SDValue(N, 1), NewChain); + +return Result; + } + + report_fatal_error("Unable to widen atomic vector load"); +} + SDValue DAGTypeLegalizer::WidenVecRes_LOAD(SDNode *N) { LoadSDNode *LD = cast(N); ISD::LoadExtType ExtType = LD->getExtensionType(); @@ -7699,8 +7726,9 @@ static SDValue BuildVectorFromScalar(SelectionDAG& DAG, EVT VecTy, return DAG.getNode(ISD::BITCAST, dl, VecTy, VecOp); } +template SDValue DAGTypeLegalizer::GenWidenVectorLoads(SmallVectorImpl &LdChain, - LoadSDNode *LD) { + T *LD, bool IsAtomic) { // The strategy assumes that we can efficiently load power-of-two widths. // The routine chops the vector into the largest vector loads with the same // element type or scalar loads and then recombines it to the widen vector @@ -7757,8 +7785,13 @@ SDValue DAGTypeLegalizer::GenWidenVectorLoads(SmallVectorImpl &LdChain, } while (TypeSize::isKnownGT(RemainingWidth, NewVTWidth)); } - SDValue LdOp = DAG.getLoad(*FirstVT, dl, Chain, BasePtr, LD->getPointerInfo(), - LD->getOriginalAlign(), MMOFlags, AAInfo); + SDValue LdOp; + if (IsAtomic) +LdOp = DAG.getAtomic(ISD::A
[llvm-branch-commits] [llvm] [AtomicExpand] Add bitcasts when expanding load atomic vector (PR #120716)
https://github.com/jofrn updated https://github.com/llvm/llvm-project/pull/120716 >From 45e70355f6101f3b5f6a39248310d1e0a756754f Mon Sep 17 00:00:00 2001 From: jofrn Date: Fri, 20 Dec 2024 06:14:28 -0500 Subject: [PATCH] [AtomicExpand] Add bitcasts when expanding load atomic vector AtomicExpand fails for aligned `load atomic ` because it does not find a compatible library call. This change adds appropriate bitcasts so that the call can be lowered. commit-id:f430c1af --- llvm/lib/CodeGen/AtomicExpandPass.cpp | 25 +-- llvm/test/CodeGen/ARM/atomic-load-store.ll | 52 ++ llvm/test/CodeGen/X86/atomic-load-store.ll | 15 +++ 3 files changed, 89 insertions(+), 3 deletions(-) diff --git a/llvm/lib/CodeGen/AtomicExpandPass.cpp b/llvm/lib/CodeGen/AtomicExpandPass.cpp index a75fa688d87a8d..490cc9aa4c3c80 100644 --- a/llvm/lib/CodeGen/AtomicExpandPass.cpp +++ b/llvm/lib/CodeGen/AtomicExpandPass.cpp @@ -2060,9 +2060,28 @@ bool AtomicExpandImpl::expandAtomicOpToLibcall( I->replaceAllUsesWith(V); } else if (HasResult) { Value *V; -if (UseSizedLibcall) - V = Builder.CreateBitOrPointerCast(Result, I->getType()); -else { +if (UseSizedLibcall) { + // Add bitcasts from Result's T scalar type to I's <2 x T/2> vector type + if (I->getType()->getScalarType()->isIntOrPtrTy() && + I->getType()->isVectorTy() && !Result->getType()->isVectorTy()) { +TypeSize Size = Result->getType()->getPrimitiveSizeInBits(); +assert((unsigned)Size % 2 == 0); +unsigned HalfSize = (unsigned)Size / 2; +Value *Lo = +Builder.CreateTrunc(Result, IntegerType::get(Ctx, HalfSize)); +Value *RS = Builder.CreateLShr( +Result, ConstantInt::get(IntegerType::get(Ctx, Size), HalfSize)); +Value *Hi = Builder.CreateTrunc(RS, IntegerType::get(Ctx, HalfSize)); +Value *Vec = Builder.CreateInsertElement( +VectorType::get(IntegerType::get(Ctx, HalfSize), +cast(I->getType())->getElementCount()), +Lo, ConstantInt::get(IntegerType::get(Ctx, 32), 0)); +Vec = Builder.CreateInsertElement( +Vec, Hi, ConstantInt::get(IntegerType::get(Ctx, 32), 1)); +V = Builder.CreateBitOrPointerCast(Vec, I->getType()); + } else +V = Builder.CreateBitOrPointerCast(Result, I->getType()); +} else { V = Builder.CreateAlignedLoad(I->getType(), AllocaResult, AllocaAlignment); Builder.CreateLifetimeEnd(AllocaResult, SizeVal64); diff --git a/llvm/test/CodeGen/ARM/atomic-load-store.ll b/llvm/test/CodeGen/ARM/atomic-load-store.ll index 560dfde356c29d..5e300cf01fef50 100644 --- a/llvm/test/CodeGen/ARM/atomic-load-store.ll +++ b/llvm/test/CodeGen/ARM/atomic-load-store.ll @@ -983,3 +983,55 @@ define void @store_atomic_f64__seq_cst(ptr %ptr, double %val1) { store atomic double %val1, ptr %ptr seq_cst, align 8 ret void } + +define <1 x ptr> @atomic_vec1_ptr(ptr %x) #0 { +; ARM-LABEL: atomic_vec1_ptr: +; ARM: @ %bb.0: +; ARM-NEXT:ldr r0, [r0] +; ARM-NEXT:dmb ish +; ARM-NEXT:bx lr +; +; ARMOPTNONE-LABEL: atomic_vec1_ptr: +; ARMOPTNONE: @ %bb.0: +; ARMOPTNONE-NEXT:ldr r0, [r0] +; ARMOPTNONE-NEXT:dmb ish +; ARMOPTNONE-NEXT:bx lr +; +; THUMBTWO-LABEL: atomic_vec1_ptr: +; THUMBTWO: @ %bb.0: +; THUMBTWO-NEXT:ldr r0, [r0] +; THUMBTWO-NEXT:dmb ish +; THUMBTWO-NEXT:bx lr +; +; THUMBONE-LABEL: atomic_vec1_ptr: +; THUMBONE: @ %bb.0: +; THUMBONE-NEXT:push {r7, lr} +; THUMBONE-NEXT:movs r1, #0 +; THUMBONE-NEXT:mov r2, r1 +; THUMBONE-NEXT:bl __sync_val_compare_and_swap_4 +; THUMBONE-NEXT:pop {r7, pc} +; +; ARMV4-LABEL: atomic_vec1_ptr: +; ARMV4: @ %bb.0: +; ARMV4-NEXT:push {r11, lr} +; ARMV4-NEXT:mov r1, #2 +; ARMV4-NEXT:bl __atomic_load_4 +; ARMV4-NEXT:mov r0, #0 +; ARMV4-NEXT:pop {r11, lr} +; ARMV4-NEXT:mov pc, lr +; +; ARMV6-LABEL: atomic_vec1_ptr: +; ARMV6: @ %bb.0: +; ARMV6-NEXT:ldr r0, [r0] +; ARMV6-NEXT:mov r1, #0 +; ARMV6-NEXT:mcr p15, #0, r1, c7, c10, #5 +; ARMV6-NEXT:bx lr +; +; THUMBM-LABEL: atomic_vec1_ptr: +; THUMBM: @ %bb.0: +; THUMBM-NEXT:ldr r0, [r0] +; THUMBM-NEXT:dmb sy +; THUMBM-NEXT:bx lr + %ret = load atomic <1 x ptr>, ptr %x acquire, align 4 + ret <1 x ptr> %ret +} diff --git a/llvm/test/CodeGen/X86/atomic-load-store.ll b/llvm/test/CodeGen/X86/atomic-load-store.ll index 34e541c029e1b2..2e103b261fb9d7 100644 --- a/llvm/test/CodeGen/X86/atomic-load-store.ll +++ b/llvm/test/CodeGen/X86/atomic-load-store.ll @@ -354,6 +354,21 @@ define <2 x i32> @atomic_vec2_i32(ptr %x) nounwind { ret <2 x i32> %ret } +define <2 x ptr> @atomic_vec2_ptr_align(ptr %x) nounwind { +; CHECK-LABEL: atomic_vec2_ptr_align: +; CHECK: ## %bb.0: +; CHECK-NEXT:pushq %rax +; CHECK-NEXT:movl $2, %esi +; CHECK-NEXT:callq ___atomic_load_16
[llvm-branch-commits] [llvm] [SelectionDAG][X86] Widen <2 x T> vector types for atomic load (PR #120598)
https://github.com/jofrn updated https://github.com/llvm/llvm-project/pull/120598 >From 3a82883ef143c15cd3f213b83d3bda492c0e0d9c Mon Sep 17 00:00:00 2001 From: jofrn Date: Thu, 19 Dec 2024 11:19:39 -0500 Subject: [PATCH] [SelectionDAG][X86] Widen <2 x T> vector types for atomic load Vector types of 2 elements must be widened. This change does this for vector types of atomic load in SelectionDAG so that it can translate aligned vectors of >1 size. Also, it combines the v2 type into its equivalent scalar type so as to avoid a move to vector. commit-id:2894ccd1 --- llvm/lib/CodeGen/SelectionDAG/LegalizeTypes.h | 6 +- .../SelectionDAG/LegalizeVectorTypes.cpp | 39 - llvm/lib/Target/X86/X86ISelLowering.cpp | 79 ++- llvm/test/CodeGen/X86/atomic-load-store.ll| 41 ++ 4 files changed, 159 insertions(+), 6 deletions(-) diff --git a/llvm/lib/CodeGen/SelectionDAG/LegalizeTypes.h b/llvm/lib/CodeGen/SelectionDAG/LegalizeTypes.h index b81c9f87cb27d7..3b3dddc44e3682 100644 --- a/llvm/lib/CodeGen/SelectionDAG/LegalizeTypes.h +++ b/llvm/lib/CodeGen/SelectionDAG/LegalizeTypes.h @@ -1046,6 +1046,7 @@ class LLVM_LIBRARY_VISIBILITY DAGTypeLegalizer { SDValue WidenVecRes_EXTRACT_SUBVECTOR(SDNode* N); SDValue WidenVecRes_INSERT_SUBVECTOR(SDNode *N); SDValue WidenVecRes_INSERT_VECTOR_ELT(SDNode* N); + SDValue WidenVecRes_ATOMIC_LOAD(AtomicSDNode *N); SDValue WidenVecRes_LOAD(SDNode* N); SDValue WidenVecRes_VP_LOAD(VPLoadSDNode *N); SDValue WidenVecRes_VP_STRIDED_LOAD(VPStridedLoadSDNode *N); @@ -1129,8 +1130,9 @@ class LLVM_LIBRARY_VISIBILITY DAGTypeLegalizer { /// resulting wider type. It takes: /// LdChain: list of chains for the load to be generated. /// Ld: load to widen - SDValue GenWidenVectorLoads(SmallVectorImpl &LdChain, - LoadSDNode *LD); + template + SDValue GenWidenVectorLoads(SmallVectorImpl &LdChain, T *LD, + bool IsAtomic = false); /// Helper function to generate a set of extension loads to load a vector with /// a resulting wider type. It takes: diff --git a/llvm/lib/CodeGen/SelectionDAG/LegalizeVectorTypes.cpp b/llvm/lib/CodeGen/SelectionDAG/LegalizeVectorTypes.cpp index c85e4ba2cfa5a7..7c4caa96244b8b 100644 --- a/llvm/lib/CodeGen/SelectionDAG/LegalizeVectorTypes.cpp +++ b/llvm/lib/CodeGen/SelectionDAG/LegalizeVectorTypes.cpp @@ -4515,6 +4515,9 @@ void DAGTypeLegalizer::WidenVectorResult(SDNode *N, unsigned ResNo) { break; case ISD::EXTRACT_SUBVECTOR: Res = WidenVecRes_EXTRACT_SUBVECTOR(N); break; case ISD::INSERT_VECTOR_ELT: Res = WidenVecRes_INSERT_VECTOR_ELT(N); break; + case ISD::ATOMIC_LOAD: +Res = WidenVecRes_ATOMIC_LOAD(cast(N)); +break; case ISD::LOAD: Res = WidenVecRes_LOAD(N); break; case ISD::STEP_VECTOR: case ISD::SPLAT_VECTOR: @@ -5901,6 +5904,30 @@ SDValue DAGTypeLegalizer::WidenVecRes_INSERT_VECTOR_ELT(SDNode *N) { N->getOperand(1), N->getOperand(2)); } +SDValue DAGTypeLegalizer::WidenVecRes_ATOMIC_LOAD(AtomicSDNode *N) { + SmallVector LdChain; // Chain for the series of load + SDValue Result = GenWidenVectorLoads(LdChain, N, true /*IsAtomic*/); + + if (Result) { +// If we generate a single load, we can use that for the chain. Otherwise, +// build a factor node to remember the multiple loads are independent and +// chain to that. +SDValue NewChain; +if (LdChain.size() == 1) + NewChain = LdChain[0]; +else + NewChain = DAG.getNode(ISD::TokenFactor, SDLoc(N), MVT::Other, LdChain); + +// Modified the chain - switch anything that used the old chain to use +// the new one. +ReplaceValueWith(SDValue(N, 1), NewChain); + +return Result; + } + + report_fatal_error("Unable to widen atomic vector load"); +} + SDValue DAGTypeLegalizer::WidenVecRes_LOAD(SDNode *N) { LoadSDNode *LD = cast(N); ISD::LoadExtType ExtType = LD->getExtensionType(); @@ -7699,8 +7726,9 @@ static SDValue BuildVectorFromScalar(SelectionDAG& DAG, EVT VecTy, return DAG.getNode(ISD::BITCAST, dl, VecTy, VecOp); } +template SDValue DAGTypeLegalizer::GenWidenVectorLoads(SmallVectorImpl &LdChain, - LoadSDNode *LD) { + T *LD, bool IsAtomic) { // The strategy assumes that we can efficiently load power-of-two widths. // The routine chops the vector into the largest vector loads with the same // element type or scalar loads and then recombines it to the widen vector @@ -7757,8 +7785,13 @@ SDValue DAGTypeLegalizer::GenWidenVectorLoads(SmallVectorImpl &LdChain, } while (TypeSize::isKnownGT(RemainingWidth, NewVTWidth)); } - SDValue LdOp = DAG.getLoad(*FirstVT, dl, Chain, BasePtr, LD->getPointerInfo(), - LD->getOriginalAlign(), MMOFlags, AAInfo); + SDValue LdOp; + if (IsAtomic) +LdOp = DAG.getAtomic(ISD::A
[llvm-branch-commits] [llvm] [SelectionDAG] Legalize <1 x T> vector types for atomic load (PR #120385)
https://github.com/jofrn updated https://github.com/llvm/llvm-project/pull/120385 >From a3f70e19cbf29b162c63b37c3062e84fce2d7e4f Mon Sep 17 00:00:00 2001 From: jofrn Date: Wed, 18 Dec 2024 03:37:17 -0500 Subject: [PATCH] [SelectionDAG] Legalize <1 x T> vector types for atomic load `load atomic <1 x T>` is not valid. This change legalizes vector types of atomic load via scalarization in SelectionDAG so that it can, for example, translate from `v1i32` to `i32`. commit-id:5c36cc8c --- llvm/lib/CodeGen/SelectionDAG/LegalizeTypes.h | 1 + .../SelectionDAG/LegalizeVectorTypes.cpp | 15 +++ llvm/test/CodeGen/X86/atomic-load-store.ll| 121 +- 3 files changed, 135 insertions(+), 2 deletions(-) diff --git a/llvm/lib/CodeGen/SelectionDAG/LegalizeTypes.h b/llvm/lib/CodeGen/SelectionDAG/LegalizeTypes.h index 571a710cc92a34..b81c9f87cb27d7 100644 --- a/llvm/lib/CodeGen/SelectionDAG/LegalizeTypes.h +++ b/llvm/lib/CodeGen/SelectionDAG/LegalizeTypes.h @@ -861,6 +861,7 @@ class LLVM_LIBRARY_VISIBILITY DAGTypeLegalizer { SDValue ScalarizeVecRes_ExpOp(SDNode *N); SDValue ScalarizeVecRes_INSERT_VECTOR_ELT(SDNode *N); SDValue ScalarizeVecRes_LOAD(LoadSDNode *N); + SDValue ScalarizeVecRes_ATOMIC_LOAD(AtomicSDNode *N); SDValue ScalarizeVecRes_SCALAR_TO_VECTOR(SDNode *N); SDValue ScalarizeVecRes_VSELECT(SDNode *N); SDValue ScalarizeVecRes_SELECT(SDNode *N); diff --git a/llvm/lib/CodeGen/SelectionDAG/LegalizeVectorTypes.cpp b/llvm/lib/CodeGen/SelectionDAG/LegalizeVectorTypes.cpp index 107454a92e356c..c85e4ba2cfa5a7 100644 --- a/llvm/lib/CodeGen/SelectionDAG/LegalizeVectorTypes.cpp +++ b/llvm/lib/CodeGen/SelectionDAG/LegalizeVectorTypes.cpp @@ -60,6 +60,9 @@ void DAGTypeLegalizer::ScalarizeVectorResult(SDNode *N, unsigned ResNo) { case ISD::FP_ROUND: R = ScalarizeVecRes_FP_ROUND(N); break; case ISD::FPOWI: R = ScalarizeVecRes_ExpOp(N); break; case ISD::INSERT_VECTOR_ELT: R = ScalarizeVecRes_INSERT_VECTOR_ELT(N); break; + case ISD::ATOMIC_LOAD: +R = ScalarizeVecRes_ATOMIC_LOAD(cast(N)); +break; case ISD::LOAD: R = ScalarizeVecRes_LOAD(cast(N));break; case ISD::SCALAR_TO_VECTOR: R = ScalarizeVecRes_SCALAR_TO_VECTOR(N); break; case ISD::SIGN_EXTEND_INREG: R = ScalarizeVecRes_InregOp(N); break; @@ -451,6 +454,18 @@ SDValue DAGTypeLegalizer::ScalarizeVecRes_INSERT_VECTOR_ELT(SDNode *N) { return Op; } +SDValue DAGTypeLegalizer::ScalarizeVecRes_ATOMIC_LOAD(AtomicSDNode *N) { + SDValue Result = DAG.getAtomic( + ISD::ATOMIC_LOAD, SDLoc(N), N->getMemoryVT().getVectorElementType(), + N->getValueType(0).getVectorElementType(), N->getChain(), N->getBasePtr(), + N->getMemOperand()); + + // Legalize the chain result - switch anything that used the old chain to + // use the new one. + ReplaceValueWith(SDValue(N, 1), Result.getValue(1)); + return Result; +} + SDValue DAGTypeLegalizer::ScalarizeVecRes_LOAD(LoadSDNode *N) { assert(N->isUnindexed() && "Indexed vector load?"); diff --git a/llvm/test/CodeGen/X86/atomic-load-store.ll b/llvm/test/CodeGen/X86/atomic-load-store.ll index 5bce4401f7bdb0..d23cfb89f9fc87 100644 --- a/llvm/test/CodeGen/X86/atomic-load-store.ll +++ b/llvm/test/CodeGen/X86/atomic-load-store.ll @@ -1,6 +1,6 @@ ; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py -; RUN: llc < %s -mtriple=x86_64-apple-macosx10.7.0 -verify-machineinstrs | FileCheck %s -; RUN: llc < %s -mtriple=x86_64-apple-macosx10.7.0 -verify-machineinstrs -O0 | FileCheck %s +; RUN: llc < %s -mtriple=x86_64-apple-macosx10.7.0 -verify-machineinstrs | FileCheck %s --check-prefixes=CHECK,CHECK3 +; RUN: llc < %s -mtriple=x86_64-apple-macosx10.7.0 -verify-machineinstrs -O0 | FileCheck %s --check-prefixes=CHECK,CHECK0 define void @test1(ptr %ptr, i32 %val1) { ; CHECK-LABEL: test1: @@ -28,3 +28,120 @@ define i32 @test3(ptr %ptr) { %val = load atomic i32, ptr %ptr seq_cst, align 4 ret i32 %val } + +define <1 x i32> @atomic_vec1_i32(ptr %x) { +; CHECK-LABEL: atomic_vec1_i32: +; CHECK: ## %bb.0: +; CHECK-NEXT:movl (%rdi), %eax +; CHECK-NEXT:retq + %ret = load atomic <1 x i32>, ptr %x acquire, align 4 + ret <1 x i32> %ret +} + +define <1 x i8> @atomic_vec1_i8(ptr %x) { +; CHECK3-LABEL: atomic_vec1_i8: +; CHECK3: ## %bb.0: +; CHECK3-NEXT:movzbl (%rdi), %eax +; CHECK3-NEXT:retq +; +; CHECK0-LABEL: atomic_vec1_i8: +; CHECK0: ## %bb.0: +; CHECK0-NEXT:movb (%rdi), %al +; CHECK0-NEXT:retq + %ret = load atomic <1 x i8>, ptr %x acquire, align 1 + ret <1 x i8> %ret +} + +define <1 x i16> @atomic_vec1_i16(ptr %x) { +; CHECK3-LABEL: atomic_vec1_i16: +; CHECK3: ## %bb.0: +; CHECK3-NEXT:movzwl (%rdi), %eax +; CHECK3-NEXT:retq +; +; CHECK0-LABEL: atomic_vec1_i16: +; CHECK0: ## %bb.0: +; CHECK0-NEXT:movw (%rdi), %ax +; CHECK0-NEXT:retq + %ret = load atomic <1 x i16>, ptr %x acquire, align 2 + ret <1 x i16> %ret +} + +define <1
[llvm-branch-commits] [llvm] [X86] Manage atomic load of fp -> int promotion in DAG (PR #120386)
https://github.com/jofrn updated https://github.com/llvm/llvm-project/pull/120386 >From 315a7446d236fff0254de14a9a7bda3bb3d7a9d4 Mon Sep 17 00:00:00 2001 From: jofrn Date: Wed, 18 Dec 2024 03:38:23 -0500 Subject: [PATCH] [X86] Manage atomic load of fp -> int promotion in DAG When lowering atomic <1 x T> vector types with floats, selection can fail since this pattern is unsupported. To support this, floats can be casted to an integer type of the same size. commit-id:f9d761c5 --- llvm/lib/Target/X86/X86ISelLowering.cpp| 4 +++ llvm/test/CodeGen/X86/atomic-load-store.ll | 37 ++ 2 files changed, 41 insertions(+) diff --git a/llvm/lib/Target/X86/X86ISelLowering.cpp b/llvm/lib/Target/X86/X86ISelLowering.cpp index a0514e93d6598b..fda93a2eb38745 100644 --- a/llvm/lib/Target/X86/X86ISelLowering.cpp +++ b/llvm/lib/Target/X86/X86ISelLowering.cpp @@ -2608,6 +2608,10 @@ X86TargetLowering::X86TargetLowering(const X86TargetMachine &TM, setOperationAction(Op, MVT::f32, Promote); } + setOperationPromotedToType(ISD::ATOMIC_LOAD, MVT::f16, MVT::i16); + setOperationPromotedToType(ISD::ATOMIC_LOAD, MVT::f32, MVT::i32); + setOperationPromotedToType(ISD::ATOMIC_LOAD, MVT::f64, MVT::i64); + // We have target-specific dag combine patterns for the following nodes: setTargetDAGCombine({ISD::VECTOR_SHUFFLE, ISD::SCALAR_TO_VECTOR, diff --git a/llvm/test/CodeGen/X86/atomic-load-store.ll b/llvm/test/CodeGen/X86/atomic-load-store.ll index d23cfb89f9fc87..6efcbb80c0ce6d 100644 --- a/llvm/test/CodeGen/X86/atomic-load-store.ll +++ b/llvm/test/CodeGen/X86/atomic-load-store.ll @@ -145,3 +145,40 @@ define <1 x i64> @atomic_vec1_i64_align(ptr %x) nounwind { %ret = load atomic <1 x i64>, ptr %x acquire, align 8 ret <1 x i64> %ret } + +define <1 x half> @atomic_vec1_half(ptr %x) { +; CHECK3-LABEL: atomic_vec1_half: +; CHECK3: ## %bb.0: +; CHECK3-NEXT:movzwl (%rdi), %eax +; CHECK3-NEXT:pinsrw $0, %eax, %xmm0 +; CHECK3-NEXT:retq +; +; CHECK0-LABEL: atomic_vec1_half: +; CHECK0: ## %bb.0: +; CHECK0-NEXT:movw (%rdi), %cx +; CHECK0-NEXT:## implicit-def: $eax +; CHECK0-NEXT:movw %cx, %ax +; CHECK0-NEXT:## implicit-def: $xmm0 +; CHECK0-NEXT:pinsrw $0, %eax, %xmm0 +; CHECK0-NEXT:retq + %ret = load atomic <1 x half>, ptr %x acquire, align 2 + ret <1 x half> %ret +} + +define <1 x float> @atomic_vec1_float(ptr %x) { +; CHECK-LABEL: atomic_vec1_float: +; CHECK: ## %bb.0: +; CHECK-NEXT:movss {{.*#+}} xmm0 = mem[0],zero,zero,zero +; CHECK-NEXT:retq + %ret = load atomic <1 x float>, ptr %x acquire, align 4 + ret <1 x float> %ret +} + +define <1 x double> @atomic_vec1_double_align(ptr %x) nounwind { +; CHECK-LABEL: atomic_vec1_double_align: +; CHECK: ## %bb.0: +; CHECK-NEXT:movsd {{.*#+}} xmm0 = mem[0],zero +; CHECK-NEXT:retq + %ret = load atomic <1 x double>, ptr %x acquire, align 8 + ret <1 x double> %ret +} ___ llvm-branch-commits mailing list llvm-branch-commits@lists.llvm.org https://lists.llvm.org/cgi-bin/mailman/listinfo/llvm-branch-commits
[llvm-branch-commits] [llvm] [X86] load atomic vector tests for combine (PR #120640)
https://github.com/jofrn updated https://github.com/llvm/llvm-project/pull/120640 >From e11194d684cc7d71024d034389b3ffcdc18d854e Mon Sep 17 00:00:00 2001 From: jofrn Date: Thu, 19 Dec 2024 16:25:55 -0500 Subject: [PATCH] [X86] load atomic vector tests for combine Vector types of 2 elements that don't require widening are lowered via the prior commit's combine method, which is also needed to avoid a vector move. This change adds the tests that depend strictly on combineAtomicLoad so that SelectionDAG can translate vectors with type bfloat,half. commit-id:3a045357 --- llvm/test/CodeGen/X86/atomic-load-store.ll | 18 ++ 1 file changed, 18 insertions(+) diff --git a/llvm/test/CodeGen/X86/atomic-load-store.ll b/llvm/test/CodeGen/X86/atomic-load-store.ll index 7b2e5e60eca20e..34e541c029e1b2 100644 --- a/llvm/test/CodeGen/X86/atomic-load-store.ll +++ b/llvm/test/CodeGen/X86/atomic-load-store.ll @@ -187,6 +187,24 @@ define <2 x float> @atomic_vec2_float_align(ptr %x) { ret <2 x float> %ret } +define <2 x half> @atomic_vec2_half(ptr %x) { +; CHECK-LABEL: atomic_vec2_half: +; CHECK: ## %bb.0: +; CHECK-NEXT:movl (%rdi), %eax +; CHECK-NEXT:retq + %ret = load atomic <2 x half>, ptr %x acquire, align 4 + ret <2 x half> %ret +} + +define <2 x bfloat> @atomic_vec2_bfloat(ptr %x) { +; CHECK-LABEL: atomic_vec2_bfloat: +; CHECK: ## %bb.0: +; CHECK-NEXT:movl (%rdi), %eax +; CHECK-NEXT:retq + %ret = load atomic <2 x bfloat>, ptr %x acquire, align 4 + ret <2 x bfloat> %ret +} + define <1 x ptr> @atomic_vec1_ptr(ptr %x) nounwind { ; CHECK3-LABEL: atomic_vec1_ptr: ; CHECK3: ## %bb.0: ___ llvm-branch-commits mailing list llvm-branch-commits@lists.llvm.org https://lists.llvm.org/cgi-bin/mailman/listinfo/llvm-branch-commits
[llvm-branch-commits] [llvm] [X86] Add atomic vector tests for unaligned >1 sizes. (PR #120387)
https://github.com/jofrn updated https://github.com/llvm/llvm-project/pull/120387 >From e1008c74c7109e937099c8fc33354e8d986f8d99 Mon Sep 17 00:00:00 2001 From: jofrn Date: Wed, 18 Dec 2024 03:40:32 -0500 Subject: [PATCH] [X86] Add atomic vector tests for unaligned >1 sizes. Unaligned atomic vectors with size >1 are lowered to calls. Adding their tests separately here. commit-id:a06a5cc6 --- llvm/test/CodeGen/X86/atomic-load-store.ll | 253 + 1 file changed, 253 insertions(+) diff --git a/llvm/test/CodeGen/X86/atomic-load-store.ll b/llvm/test/CodeGen/X86/atomic-load-store.ll index 6efcbb80c0ce6d..39e9fdfa5e62b0 100644 --- a/llvm/test/CodeGen/X86/atomic-load-store.ll +++ b/llvm/test/CodeGen/X86/atomic-load-store.ll @@ -146,6 +146,34 @@ define <1 x i64> @atomic_vec1_i64_align(ptr %x) nounwind { ret <1 x i64> %ret } +define <1 x ptr> @atomic_vec1_ptr(ptr %x) nounwind { +; CHECK3-LABEL: atomic_vec1_ptr: +; CHECK3: ## %bb.0: +; CHECK3-NEXT:pushq %rax +; CHECK3-NEXT:movq %rdi, %rsi +; CHECK3-NEXT:movq %rsp, %rdx +; CHECK3-NEXT:movl $8, %edi +; CHECK3-NEXT:movl $2, %ecx +; CHECK3-NEXT:callq ___atomic_load +; CHECK3-NEXT:movq (%rsp), %rax +; CHECK3-NEXT:popq %rcx +; CHECK3-NEXT:retq +; +; CHECK0-LABEL: atomic_vec1_ptr: +; CHECK0: ## %bb.0: +; CHECK0-NEXT:pushq %rax +; CHECK0-NEXT:movq %rdi, %rsi +; CHECK0-NEXT:movl $8, %edi +; CHECK0-NEXT:movq %rsp, %rdx +; CHECK0-NEXT:movl $2, %ecx +; CHECK0-NEXT:callq ___atomic_load +; CHECK0-NEXT:movq (%rsp), %rax +; CHECK0-NEXT:popq %rcx +; CHECK0-NEXT:retq + %ret = load atomic <1 x ptr>, ptr %x acquire, align 4 + ret <1 x ptr> %ret +} + define <1 x half> @atomic_vec1_half(ptr %x) { ; CHECK3-LABEL: atomic_vec1_half: ; CHECK3: ## %bb.0: @@ -182,3 +210,228 @@ define <1 x double> @atomic_vec1_double_align(ptr %x) nounwind { %ret = load atomic <1 x double>, ptr %x acquire, align 8 ret <1 x double> %ret } + +define <1 x i64> @atomic_vec1_i64(ptr %x) nounwind { +; CHECK3-LABEL: atomic_vec1_i64: +; CHECK3: ## %bb.0: +; CHECK3-NEXT:pushq %rax +; CHECK3-NEXT:movq %rdi, %rsi +; CHECK3-NEXT:movq %rsp, %rdx +; CHECK3-NEXT:movl $8, %edi +; CHECK3-NEXT:movl $2, %ecx +; CHECK3-NEXT:callq ___atomic_load +; CHECK3-NEXT:movq (%rsp), %rax +; CHECK3-NEXT:popq %rcx +; CHECK3-NEXT:retq +; +; CHECK0-LABEL: atomic_vec1_i64: +; CHECK0: ## %bb.0: +; CHECK0-NEXT:pushq %rax +; CHECK0-NEXT:movq %rdi, %rsi +; CHECK0-NEXT:movl $8, %edi +; CHECK0-NEXT:movq %rsp, %rdx +; CHECK0-NEXT:movl $2, %ecx +; CHECK0-NEXT:callq ___atomic_load +; CHECK0-NEXT:movq (%rsp), %rax +; CHECK0-NEXT:popq %rcx +; CHECK0-NEXT:retq + %ret = load atomic <1 x i64>, ptr %x acquire, align 4 + ret <1 x i64> %ret +} + +define <1 x double> @atomic_vec1_double(ptr %x) nounwind { +; CHECK3-LABEL: atomic_vec1_double: +; CHECK3: ## %bb.0: +; CHECK3-NEXT:pushq %rax +; CHECK3-NEXT:movq %rdi, %rsi +; CHECK3-NEXT:movq %rsp, %rdx +; CHECK3-NEXT:movl $8, %edi +; CHECK3-NEXT:movl $2, %ecx +; CHECK3-NEXT:callq ___atomic_load +; CHECK3-NEXT:movsd {{.*#+}} xmm0 = mem[0],zero +; CHECK3-NEXT:popq %rax +; CHECK3-NEXT:retq +; +; CHECK0-LABEL: atomic_vec1_double: +; CHECK0: ## %bb.0: +; CHECK0-NEXT:pushq %rax +; CHECK0-NEXT:movq %rdi, %rsi +; CHECK0-NEXT:movl $8, %edi +; CHECK0-NEXT:movq %rsp, %rdx +; CHECK0-NEXT:movl $2, %ecx +; CHECK0-NEXT:callq ___atomic_load +; CHECK0-NEXT:movsd {{.*#+}} xmm0 = mem[0],zero +; CHECK0-NEXT:popq %rax +; CHECK0-NEXT:retq + %ret = load atomic <1 x double>, ptr %x acquire, align 4 + ret <1 x double> %ret +} + +define <2 x i32> @atomic_vec2_i32(ptr %x) nounwind { +; CHECK3-LABEL: atomic_vec2_i32: +; CHECK3: ## %bb.0: +; CHECK3-NEXT:pushq %rax +; CHECK3-NEXT:movq %rdi, %rsi +; CHECK3-NEXT:movq %rsp, %rdx +; CHECK3-NEXT:movl $8, %edi +; CHECK3-NEXT:movl $2, %ecx +; CHECK3-NEXT:callq ___atomic_load +; CHECK3-NEXT:movsd {{.*#+}} xmm0 = mem[0],zero +; CHECK3-NEXT:popq %rax +; CHECK3-NEXT:retq +; +; CHECK0-LABEL: atomic_vec2_i32: +; CHECK0: ## %bb.0: +; CHECK0-NEXT:pushq %rax +; CHECK0-NEXT:movq %rdi, %rsi +; CHECK0-NEXT:movl $8, %edi +; CHECK0-NEXT:movq %rsp, %rdx +; CHECK0-NEXT:movl $2, %ecx +; CHECK0-NEXT:callq ___atomic_load +; CHECK0-NEXT:movq {{.*#+}} xmm0 = mem[0],zero +; CHECK0-NEXT:popq %rax +; CHECK0-NEXT:retq + %ret = load atomic <2 x i32>, ptr %x acquire, align 4 + ret <2 x i32> %ret +} + +define <4 x float> @atomic_vec4_float_align(ptr %x) nounwind { +; CHECK-LABEL: atomic_vec4_float_align: +; CHECK: ## %bb.0: +; CHECK-NEXT:pushq %rax +; CHECK-NEXT:movl $2, %esi +; CHECK-NEXT:callq ___atomic_load_16 +; CHECK-NEXT:movq %rdx, %xmm1 +; CHECK-NEXT:movq %rax, %xmm0 +; CHECK-NEXT:punpcklqdq {{.*#+}} xmm0 = xmm
[llvm-branch-commits] [llvm] [X86] Add atomic vector tests for unaligned >1 sizes. (PR #120387)
https://github.com/jofrn updated https://github.com/llvm/llvm-project/pull/120387 >From e1008c74c7109e937099c8fc33354e8d986f8d99 Mon Sep 17 00:00:00 2001 From: jofrn Date: Wed, 18 Dec 2024 03:40:32 -0500 Subject: [PATCH] [X86] Add atomic vector tests for unaligned >1 sizes. Unaligned atomic vectors with size >1 are lowered to calls. Adding their tests separately here. commit-id:a06a5cc6 --- llvm/test/CodeGen/X86/atomic-load-store.ll | 253 + 1 file changed, 253 insertions(+) diff --git a/llvm/test/CodeGen/X86/atomic-load-store.ll b/llvm/test/CodeGen/X86/atomic-load-store.ll index 6efcbb80c0ce6d..39e9fdfa5e62b0 100644 --- a/llvm/test/CodeGen/X86/atomic-load-store.ll +++ b/llvm/test/CodeGen/X86/atomic-load-store.ll @@ -146,6 +146,34 @@ define <1 x i64> @atomic_vec1_i64_align(ptr %x) nounwind { ret <1 x i64> %ret } +define <1 x ptr> @atomic_vec1_ptr(ptr %x) nounwind { +; CHECK3-LABEL: atomic_vec1_ptr: +; CHECK3: ## %bb.0: +; CHECK3-NEXT:pushq %rax +; CHECK3-NEXT:movq %rdi, %rsi +; CHECK3-NEXT:movq %rsp, %rdx +; CHECK3-NEXT:movl $8, %edi +; CHECK3-NEXT:movl $2, %ecx +; CHECK3-NEXT:callq ___atomic_load +; CHECK3-NEXT:movq (%rsp), %rax +; CHECK3-NEXT:popq %rcx +; CHECK3-NEXT:retq +; +; CHECK0-LABEL: atomic_vec1_ptr: +; CHECK0: ## %bb.0: +; CHECK0-NEXT:pushq %rax +; CHECK0-NEXT:movq %rdi, %rsi +; CHECK0-NEXT:movl $8, %edi +; CHECK0-NEXT:movq %rsp, %rdx +; CHECK0-NEXT:movl $2, %ecx +; CHECK0-NEXT:callq ___atomic_load +; CHECK0-NEXT:movq (%rsp), %rax +; CHECK0-NEXT:popq %rcx +; CHECK0-NEXT:retq + %ret = load atomic <1 x ptr>, ptr %x acquire, align 4 + ret <1 x ptr> %ret +} + define <1 x half> @atomic_vec1_half(ptr %x) { ; CHECK3-LABEL: atomic_vec1_half: ; CHECK3: ## %bb.0: @@ -182,3 +210,228 @@ define <1 x double> @atomic_vec1_double_align(ptr %x) nounwind { %ret = load atomic <1 x double>, ptr %x acquire, align 8 ret <1 x double> %ret } + +define <1 x i64> @atomic_vec1_i64(ptr %x) nounwind { +; CHECK3-LABEL: atomic_vec1_i64: +; CHECK3: ## %bb.0: +; CHECK3-NEXT:pushq %rax +; CHECK3-NEXT:movq %rdi, %rsi +; CHECK3-NEXT:movq %rsp, %rdx +; CHECK3-NEXT:movl $8, %edi +; CHECK3-NEXT:movl $2, %ecx +; CHECK3-NEXT:callq ___atomic_load +; CHECK3-NEXT:movq (%rsp), %rax +; CHECK3-NEXT:popq %rcx +; CHECK3-NEXT:retq +; +; CHECK0-LABEL: atomic_vec1_i64: +; CHECK0: ## %bb.0: +; CHECK0-NEXT:pushq %rax +; CHECK0-NEXT:movq %rdi, %rsi +; CHECK0-NEXT:movl $8, %edi +; CHECK0-NEXT:movq %rsp, %rdx +; CHECK0-NEXT:movl $2, %ecx +; CHECK0-NEXT:callq ___atomic_load +; CHECK0-NEXT:movq (%rsp), %rax +; CHECK0-NEXT:popq %rcx +; CHECK0-NEXT:retq + %ret = load atomic <1 x i64>, ptr %x acquire, align 4 + ret <1 x i64> %ret +} + +define <1 x double> @atomic_vec1_double(ptr %x) nounwind { +; CHECK3-LABEL: atomic_vec1_double: +; CHECK3: ## %bb.0: +; CHECK3-NEXT:pushq %rax +; CHECK3-NEXT:movq %rdi, %rsi +; CHECK3-NEXT:movq %rsp, %rdx +; CHECK3-NEXT:movl $8, %edi +; CHECK3-NEXT:movl $2, %ecx +; CHECK3-NEXT:callq ___atomic_load +; CHECK3-NEXT:movsd {{.*#+}} xmm0 = mem[0],zero +; CHECK3-NEXT:popq %rax +; CHECK3-NEXT:retq +; +; CHECK0-LABEL: atomic_vec1_double: +; CHECK0: ## %bb.0: +; CHECK0-NEXT:pushq %rax +; CHECK0-NEXT:movq %rdi, %rsi +; CHECK0-NEXT:movl $8, %edi +; CHECK0-NEXT:movq %rsp, %rdx +; CHECK0-NEXT:movl $2, %ecx +; CHECK0-NEXT:callq ___atomic_load +; CHECK0-NEXT:movsd {{.*#+}} xmm0 = mem[0],zero +; CHECK0-NEXT:popq %rax +; CHECK0-NEXT:retq + %ret = load atomic <1 x double>, ptr %x acquire, align 4 + ret <1 x double> %ret +} + +define <2 x i32> @atomic_vec2_i32(ptr %x) nounwind { +; CHECK3-LABEL: atomic_vec2_i32: +; CHECK3: ## %bb.0: +; CHECK3-NEXT:pushq %rax +; CHECK3-NEXT:movq %rdi, %rsi +; CHECK3-NEXT:movq %rsp, %rdx +; CHECK3-NEXT:movl $8, %edi +; CHECK3-NEXT:movl $2, %ecx +; CHECK3-NEXT:callq ___atomic_load +; CHECK3-NEXT:movsd {{.*#+}} xmm0 = mem[0],zero +; CHECK3-NEXT:popq %rax +; CHECK3-NEXT:retq +; +; CHECK0-LABEL: atomic_vec2_i32: +; CHECK0: ## %bb.0: +; CHECK0-NEXT:pushq %rax +; CHECK0-NEXT:movq %rdi, %rsi +; CHECK0-NEXT:movl $8, %edi +; CHECK0-NEXT:movq %rsp, %rdx +; CHECK0-NEXT:movl $2, %ecx +; CHECK0-NEXT:callq ___atomic_load +; CHECK0-NEXT:movq {{.*#+}} xmm0 = mem[0],zero +; CHECK0-NEXT:popq %rax +; CHECK0-NEXT:retq + %ret = load atomic <2 x i32>, ptr %x acquire, align 4 + ret <2 x i32> %ret +} + +define <4 x float> @atomic_vec4_float_align(ptr %x) nounwind { +; CHECK-LABEL: atomic_vec4_float_align: +; CHECK: ## %bb.0: +; CHECK-NEXT:pushq %rax +; CHECK-NEXT:movl $2, %esi +; CHECK-NEXT:callq ___atomic_load_16 +; CHECK-NEXT:movq %rdx, %xmm1 +; CHECK-NEXT:movq %rax, %xmm0 +; CHECK-NEXT:punpcklqdq {{.*#+}} xmm0 = xmm
[llvm-branch-commits] [flang] [flang][cuda] Convert cuf.sync_descriptor to runtime call (PR #121524)
https://github.com/clementval created https://github.com/llvm/llvm-project/pull/121524 Convert the op to a new entry point in the runtime `CUFSyncGlobalDescriptor` >From 822f3740a56b689c50aa5c983910e2115da0e62c Mon Sep 17 00:00:00 2001 From: Valentin Clement Date: Thu, 2 Jan 2025 13:58:14 -0800 Subject: [PATCH] [flang][cuda] Convert cuf.sync_descriptor to runtime call --- flang/include/flang/Runtime/CUDA/descriptor.h | 4 ++ .../Optimizer/Transforms/CUFOpConversion.cpp | 42 ++- flang/runtime/CUDA/descriptor.cpp | 7 flang/test/Fir/CUDA/cuda-sync-desc.mlir | 20 + 4 files changed, 72 insertions(+), 1 deletion(-) create mode 100644 flang/test/Fir/CUDA/cuda-sync-desc.mlir diff --git a/flang/include/flang/Runtime/CUDA/descriptor.h b/flang/include/flang/Runtime/CUDA/descriptor.h index 55878aaac57fb3..0ee7feca10e44c 100644 --- a/flang/include/flang/Runtime/CUDA/descriptor.h +++ b/flang/include/flang/Runtime/CUDA/descriptor.h @@ -33,6 +33,10 @@ void *RTDECL(CUFGetDeviceAddress)( void RTDECL(CUFDescriptorSync)(Descriptor *dst, const Descriptor *src, const char *sourceFile = nullptr, int sourceLine = 0); +/// Get the device address of registered with the \p hostPtr and sync them. +void RTDECL(CUFSyncGlobalDescriptor)( +void *hostPtr, const char *sourceFile = nullptr, int sourceLine = 0); + } // extern "C" } // namespace Fortran::runtime::cuda diff --git a/flang/lib/Optimizer/Transforms/CUFOpConversion.cpp b/flang/lib/Optimizer/Transforms/CUFOpConversion.cpp index fb0ef246546444..f08f9e412b8857 100644 --- a/flang/lib/Optimizer/Transforms/CUFOpConversion.cpp +++ b/flang/lib/Optimizer/Transforms/CUFOpConversion.cpp @@ -788,6 +788,45 @@ struct CUFLaunchOpConversion const mlir::SymbolTable &symTab; }; +struct CUFSyncDescriptorOpConversion +: public mlir::OpRewritePattern { + using OpRewritePattern::OpRewritePattern; + + CUFSyncDescriptorOpConversion(mlir::MLIRContext *context, +const mlir::SymbolTable &symTab) + : OpRewritePattern(context), symTab{symTab} {} + + mlir::LogicalResult + matchAndRewrite(cuf::SyncDescriptorOp op, + mlir::PatternRewriter &rewriter) const override { +auto mod = op->getParentOfType(); +fir::FirOpBuilder builder(rewriter, mod); +mlir::Location loc = op.getLoc(); + +auto globalOp = mod.lookupSymbol(op.getGlobalName()); +if (!globalOp) + return mlir::failure(); + +auto hostAddr = builder.create( +loc, fir::ReferenceType::get(globalOp.getType()), op.getGlobalName()); +mlir::func::FuncOp callee = +fir::runtime::getRuntimeFunc(loc, + builder); +auto fTy = callee.getFunctionType(); +mlir::Value sourceFile = fir::factory::locationToFilename(builder, loc); +mlir::Value sourceLine = +fir::factory::locationToLineNo(builder, loc, fTy.getInput(2)); +llvm::SmallVector args{fir::runtime::createArguments( +builder, loc, fTy, hostAddr, sourceFile, sourceLine)}; +builder.create(loc, callee, args); +op.erase(); +return mlir::success(); + } + +private: + const mlir::SymbolTable &symTab; +}; + class CUFOpConversion : public fir::impl::CUFOpConversionBase { public: void runOnOperation() override { @@ -851,7 +890,8 @@ void cuf::populateCUFToFIRConversionPatterns( CUFFreeOpConversion>(patterns.getContext()); patterns.insert(patterns.getContext(), symtab, &dl, &converter); - patterns.insert(patterns.getContext(), symtab); + patterns.insert( + patterns.getContext(), symtab); } void cuf::populateFIRCUFConversionPatterns(const mlir::SymbolTable &symtab, diff --git a/flang/runtime/CUDA/descriptor.cpp b/flang/runtime/CUDA/descriptor.cpp index 391c47e84241d4..947eeb66aa3d6c 100644 --- a/flang/runtime/CUDA/descriptor.cpp +++ b/flang/runtime/CUDA/descriptor.cpp @@ -46,6 +46,13 @@ void RTDEF(CUFDescriptorSync)(Descriptor *dst, const Descriptor *src, (void *)dst, (const void *)src, count, cudaMemcpyHostToDevice)); } +void RTDEF(CUFSyncGlobalDescriptor)( +void *hostPtr, const char *sourceFile, int sourceLine) { + void *devAddr{RTNAME(CUFGetDeviceAddress)(hostPtr, sourceFile, sourceLine)}; + RTNAME(CUFDescriptorSync) + ((Descriptor *)devAddr, (Descriptor *)hostPtr, sourceFile, sourceLine); +} + RT_EXT_API_GROUP_END } } // namespace Fortran::runtime::cuda diff --git a/flang/test/Fir/CUDA/cuda-sync-desc.mlir b/flang/test/Fir/CUDA/cuda-sync-desc.mlir new file mode 100644 index 00..20b317f34a7f26 --- /dev/null +++ b/flang/test/Fir/CUDA/cuda-sync-desc.mlir @@ -0,0 +1,20 @@ +// RUN: fir-opt --cuf-convert %s | FileCheck %s + +module attributes {dlti.dl_spec = #dlti.dl_spec : vector<2xi64>, i8 = dense<8> : vector<2xi64>, i32 = dense<32> : vector<2xi64>, f80 = dense<128> : vector<2xi64>, i1 = dense<8> : vector<2
[llvm-branch-commits] [flang] [flang][cuda] Convert cuf.sync_descriptor to runtime call (PR #121524)
llvmbot wrote: @llvm/pr-subscribers-flang-fir-hlfir @llvm/pr-subscribers-flang-runtime Author: Valentin Clement (バレンタイン クレメン) (clementval) Changes Convert the op to a new entry point in the runtime `CUFSyncGlobalDescriptor` --- Full diff: https://github.com/llvm/llvm-project/pull/121524.diff 4 Files Affected: - (modified) flang/include/flang/Runtime/CUDA/descriptor.h (+4) - (modified) flang/lib/Optimizer/Transforms/CUFOpConversion.cpp (+41-1) - (modified) flang/runtime/CUDA/descriptor.cpp (+7) - (added) flang/test/Fir/CUDA/cuda-sync-desc.mlir (+20) ``diff diff --git a/flang/include/flang/Runtime/CUDA/descriptor.h b/flang/include/flang/Runtime/CUDA/descriptor.h index 55878aaac57fb3..0ee7feca10e44c 100644 --- a/flang/include/flang/Runtime/CUDA/descriptor.h +++ b/flang/include/flang/Runtime/CUDA/descriptor.h @@ -33,6 +33,10 @@ void *RTDECL(CUFGetDeviceAddress)( void RTDECL(CUFDescriptorSync)(Descriptor *dst, const Descriptor *src, const char *sourceFile = nullptr, int sourceLine = 0); +/// Get the device address of registered with the \p hostPtr and sync them. +void RTDECL(CUFSyncGlobalDescriptor)( +void *hostPtr, const char *sourceFile = nullptr, int sourceLine = 0); + } // extern "C" } // namespace Fortran::runtime::cuda diff --git a/flang/lib/Optimizer/Transforms/CUFOpConversion.cpp b/flang/lib/Optimizer/Transforms/CUFOpConversion.cpp index fb0ef246546444..f08f9e412b8857 100644 --- a/flang/lib/Optimizer/Transforms/CUFOpConversion.cpp +++ b/flang/lib/Optimizer/Transforms/CUFOpConversion.cpp @@ -788,6 +788,45 @@ struct CUFLaunchOpConversion const mlir::SymbolTable &symTab; }; +struct CUFSyncDescriptorOpConversion +: public mlir::OpRewritePattern { + using OpRewritePattern::OpRewritePattern; + + CUFSyncDescriptorOpConversion(mlir::MLIRContext *context, +const mlir::SymbolTable &symTab) + : OpRewritePattern(context), symTab{symTab} {} + + mlir::LogicalResult + matchAndRewrite(cuf::SyncDescriptorOp op, + mlir::PatternRewriter &rewriter) const override { +auto mod = op->getParentOfType(); +fir::FirOpBuilder builder(rewriter, mod); +mlir::Location loc = op.getLoc(); + +auto globalOp = mod.lookupSymbol(op.getGlobalName()); +if (!globalOp) + return mlir::failure(); + +auto hostAddr = builder.create( +loc, fir::ReferenceType::get(globalOp.getType()), op.getGlobalName()); +mlir::func::FuncOp callee = +fir::runtime::getRuntimeFunc(loc, + builder); +auto fTy = callee.getFunctionType(); +mlir::Value sourceFile = fir::factory::locationToFilename(builder, loc); +mlir::Value sourceLine = +fir::factory::locationToLineNo(builder, loc, fTy.getInput(2)); +llvm::SmallVector args{fir::runtime::createArguments( +builder, loc, fTy, hostAddr, sourceFile, sourceLine)}; +builder.create(loc, callee, args); +op.erase(); +return mlir::success(); + } + +private: + const mlir::SymbolTable &symTab; +}; + class CUFOpConversion : public fir::impl::CUFOpConversionBase { public: void runOnOperation() override { @@ -851,7 +890,8 @@ void cuf::populateCUFToFIRConversionPatterns( CUFFreeOpConversion>(patterns.getContext()); patterns.insert(patterns.getContext(), symtab, &dl, &converter); - patterns.insert(patterns.getContext(), symtab); + patterns.insert( + patterns.getContext(), symtab); } void cuf::populateFIRCUFConversionPatterns(const mlir::SymbolTable &symtab, diff --git a/flang/runtime/CUDA/descriptor.cpp b/flang/runtime/CUDA/descriptor.cpp index 391c47e84241d4..947eeb66aa3d6c 100644 --- a/flang/runtime/CUDA/descriptor.cpp +++ b/flang/runtime/CUDA/descriptor.cpp @@ -46,6 +46,13 @@ void RTDEF(CUFDescriptorSync)(Descriptor *dst, const Descriptor *src, (void *)dst, (const void *)src, count, cudaMemcpyHostToDevice)); } +void RTDEF(CUFSyncGlobalDescriptor)( +void *hostPtr, const char *sourceFile, int sourceLine) { + void *devAddr{RTNAME(CUFGetDeviceAddress)(hostPtr, sourceFile, sourceLine)}; + RTNAME(CUFDescriptorSync) + ((Descriptor *)devAddr, (Descriptor *)hostPtr, sourceFile, sourceLine); +} + RT_EXT_API_GROUP_END } } // namespace Fortran::runtime::cuda diff --git a/flang/test/Fir/CUDA/cuda-sync-desc.mlir b/flang/test/Fir/CUDA/cuda-sync-desc.mlir new file mode 100644 index 00..20b317f34a7f26 --- /dev/null +++ b/flang/test/Fir/CUDA/cuda-sync-desc.mlir @@ -0,0 +1,20 @@ +// RUN: fir-opt --cuf-convert %s | FileCheck %s + +module attributes {dlti.dl_spec = #dlti.dl_spec : vector<2xi64>, i8 = dense<8> : vector<2xi64>, i32 = dense<32> : vector<2xi64>, f80 = dense<128> : vector<2xi64>, i1 = dense<8> : vector<2xi64>, !llvm.ptr = dense<64> : vector<4xi64>, i64 = dense<64> : vector<2xi64>, i128 = dense<128> : vector<2xi64>, f128 = dense<128> : vector<2xi64>
[llvm-branch-commits] [llvm] [X86] Manage atomic load of fp -> int promotion in DAG (PR #120386)
https://github.com/jofrn updated https://github.com/llvm/llvm-project/pull/120386 >From 315a7446d236fff0254de14a9a7bda3bb3d7a9d4 Mon Sep 17 00:00:00 2001 From: jofrn Date: Wed, 18 Dec 2024 03:38:23 -0500 Subject: [PATCH] [X86] Manage atomic load of fp -> int promotion in DAG When lowering atomic <1 x T> vector types with floats, selection can fail since this pattern is unsupported. To support this, floats can be casted to an integer type of the same size. commit-id:f9d761c5 --- llvm/lib/Target/X86/X86ISelLowering.cpp| 4 +++ llvm/test/CodeGen/X86/atomic-load-store.ll | 37 ++ 2 files changed, 41 insertions(+) diff --git a/llvm/lib/Target/X86/X86ISelLowering.cpp b/llvm/lib/Target/X86/X86ISelLowering.cpp index a0514e93d6598b..fda93a2eb38745 100644 --- a/llvm/lib/Target/X86/X86ISelLowering.cpp +++ b/llvm/lib/Target/X86/X86ISelLowering.cpp @@ -2608,6 +2608,10 @@ X86TargetLowering::X86TargetLowering(const X86TargetMachine &TM, setOperationAction(Op, MVT::f32, Promote); } + setOperationPromotedToType(ISD::ATOMIC_LOAD, MVT::f16, MVT::i16); + setOperationPromotedToType(ISD::ATOMIC_LOAD, MVT::f32, MVT::i32); + setOperationPromotedToType(ISD::ATOMIC_LOAD, MVT::f64, MVT::i64); + // We have target-specific dag combine patterns for the following nodes: setTargetDAGCombine({ISD::VECTOR_SHUFFLE, ISD::SCALAR_TO_VECTOR, diff --git a/llvm/test/CodeGen/X86/atomic-load-store.ll b/llvm/test/CodeGen/X86/atomic-load-store.ll index d23cfb89f9fc87..6efcbb80c0ce6d 100644 --- a/llvm/test/CodeGen/X86/atomic-load-store.ll +++ b/llvm/test/CodeGen/X86/atomic-load-store.ll @@ -145,3 +145,40 @@ define <1 x i64> @atomic_vec1_i64_align(ptr %x) nounwind { %ret = load atomic <1 x i64>, ptr %x acquire, align 8 ret <1 x i64> %ret } + +define <1 x half> @atomic_vec1_half(ptr %x) { +; CHECK3-LABEL: atomic_vec1_half: +; CHECK3: ## %bb.0: +; CHECK3-NEXT:movzwl (%rdi), %eax +; CHECK3-NEXT:pinsrw $0, %eax, %xmm0 +; CHECK3-NEXT:retq +; +; CHECK0-LABEL: atomic_vec1_half: +; CHECK0: ## %bb.0: +; CHECK0-NEXT:movw (%rdi), %cx +; CHECK0-NEXT:## implicit-def: $eax +; CHECK0-NEXT:movw %cx, %ax +; CHECK0-NEXT:## implicit-def: $xmm0 +; CHECK0-NEXT:pinsrw $0, %eax, %xmm0 +; CHECK0-NEXT:retq + %ret = load atomic <1 x half>, ptr %x acquire, align 2 + ret <1 x half> %ret +} + +define <1 x float> @atomic_vec1_float(ptr %x) { +; CHECK-LABEL: atomic_vec1_float: +; CHECK: ## %bb.0: +; CHECK-NEXT:movss {{.*#+}} xmm0 = mem[0],zero,zero,zero +; CHECK-NEXT:retq + %ret = load atomic <1 x float>, ptr %x acquire, align 4 + ret <1 x float> %ret +} + +define <1 x double> @atomic_vec1_double_align(ptr %x) nounwind { +; CHECK-LABEL: atomic_vec1_double_align: +; CHECK: ## %bb.0: +; CHECK-NEXT:movsd {{.*#+}} xmm0 = mem[0],zero +; CHECK-NEXT:retq + %ret = load atomic <1 x double>, ptr %x acquire, align 8 + ret <1 x double> %ret +} ___ llvm-branch-commits mailing list llvm-branch-commits@lists.llvm.org https://lists.llvm.org/cgi-bin/mailman/listinfo/llvm-branch-commits
[llvm-branch-commits] [llvm] 1557818 - Revert "[llvm-(min-)tblgen] Avoid redundant source compilation (#114494)"
Author: Michael Kruse Date: 2025-01-02T23:26:12+01:00 New Revision: 155781842fcf9899db39a3e80236a16dc5da13f3 URL: https://github.com/llvm/llvm-project/commit/155781842fcf9899db39a3e80236a16dc5da13f3 DIFF: https://github.com/llvm/llvm-project/commit/155781842fcf9899db39a3e80236a16dc5da13f3.diff LOG: Revert "[llvm-(min-)tblgen] Avoid redundant source compilation (#114494)" This reverts commit f6cb56902c6dcafede21eb6662910b6ff661fc0f. Added: llvm/utils/TableGen/ARMTargetDefEmitter.cpp llvm/utils/TableGen/Attributes.cpp llvm/utils/TableGen/DirectiveEmitter.cpp llvm/utils/TableGen/IntrinsicEmitter.cpp llvm/utils/TableGen/RISCVTargetDefEmitter.cpp llvm/utils/TableGen/TableGen.cpp llvm/utils/TableGen/VTEmitter.cpp Modified: llvm/utils/TableGen/Basic/CMakeLists.txt llvm/utils/TableGen/CMakeLists.txt Removed: llvm/utils/TableGen/Basic/ARMTargetDefEmitter.cpp llvm/utils/TableGen/Basic/Attributes.cpp llvm/utils/TableGen/Basic/DirectiveEmitter.cpp llvm/utils/TableGen/Basic/IntrinsicEmitter.cpp llvm/utils/TableGen/Basic/RISCVTargetDefEmitter.cpp llvm/utils/TableGen/Basic/TableGen.cpp llvm/utils/TableGen/Basic/TableGen.h llvm/utils/TableGen/Basic/VTEmitter.cpp llvm/utils/TableGen/llvm-min-tblgen.cpp llvm/utils/TableGen/llvm-tblgen.cpp diff --git a/llvm/utils/TableGen/Basic/ARMTargetDefEmitter.cpp b/llvm/utils/TableGen/ARMTargetDefEmitter.cpp similarity index 100% rename from llvm/utils/TableGen/Basic/ARMTargetDefEmitter.cpp rename to llvm/utils/TableGen/ARMTargetDefEmitter.cpp diff --git a/llvm/utils/TableGen/Basic/Attributes.cpp b/llvm/utils/TableGen/Attributes.cpp similarity index 100% rename from llvm/utils/TableGen/Basic/Attributes.cpp rename to llvm/utils/TableGen/Attributes.cpp diff --git a/llvm/utils/TableGen/Basic/CMakeLists.txt b/llvm/utils/TableGen/Basic/CMakeLists.txt index b058fba78eb05a..41d737e8d418e2 100644 --- a/llvm/utils/TableGen/Basic/CMakeLists.txt +++ b/llvm/utils/TableGen/Basic/CMakeLists.txt @@ -9,15 +9,8 @@ set(LLVM_LINK_COMPONENTS ) add_llvm_library(LLVMTableGenBasic OBJECT EXCLUDE_FROM_ALL DISABLE_LLVM_LINK_LLVM_DYLIB - ARMTargetDefEmitter.cpp - Attributes.cpp CodeGenIntrinsics.cpp - DirectiveEmitter.cpp - IntrinsicEmitter.cpp - RISCVTargetDefEmitter.cpp SDNodeProperties.cpp - TableGen.cpp - VTEmitter.cpp ) # Users may include its headers as "Basic/*.h" diff --git a/llvm/utils/TableGen/Basic/TableGen.h b/llvm/utils/TableGen/Basic/TableGen.h deleted file mode 100644 index 630aea62fcf902..00 --- a/llvm/utils/TableGen/Basic/TableGen.h +++ /dev/null @@ -1,13 +0,0 @@ -//===- TableGen.h -===// -// -// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. -// See https://llvm.org/LICENSE.txt for license information. -// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception -// -//===--===// -// -// Shared entry point for llvm-tblgen and llvm-min-tblgen. -// -//===--===// - -int tblgen_main(int argc, char **argv); diff --git a/llvm/utils/TableGen/CMakeLists.txt b/llvm/utils/TableGen/CMakeLists.txt index e4b686803c9769..ba1e4aa01b48d6 100644 --- a/llvm/utils/TableGen/CMakeLists.txt +++ b/llvm/utils/TableGen/CMakeLists.txt @@ -5,17 +5,20 @@ add_subdirectory(Basic) # code needed by the backends. add_subdirectory(Common) +set(LLVM_LINK_COMPONENTS Support) + # llvm-min-tablegen only contains a subset of backends necessary to # build llvm/include. It must not depend on TableGenCommon, as # TableGenCommon depends on this already to generate things such as # ValueType definitions. -# Sources included in both, llvm-min-tblgen and llvm-tblgen, must be included -# into LLVMTableGenBasic to avoid redundant compilation and problems with build -# caches. -# At least one source file must be included directly to avoid CMake problems. -# E.g. CMake derives which linker to use from the types of sources added. add_tablegen(llvm-min-tblgen LLVM_HEADERS - llvm-min-tblgen.cpp + TableGen.cpp + ARMTargetDefEmitter.cpp + Attributes.cpp + DirectiveEmitter.cpp + IntrinsicEmitter.cpp + RISCVTargetDefEmitter.cpp + VTEmitter.cpp $ PARTIAL_SOURCES_INTENDED @@ -29,8 +32,10 @@ set(LLVM_LINK_COMPONENTS add_tablegen(llvm-tblgen LLVM DESTINATION "${LLVM_TOOLS_INSTALL_DIR}" EXPORT LLVM + ARMTargetDefEmitter.cpp AsmMatcherEmitter.cpp AsmWriterEmitter.cpp + Attributes.cpp CallingConvEmitter.cpp CodeEmitterGen.cpp CodeGenMapTable.cpp @@ -43,6 +48,7 @@ add_tablegen(llvm-tblgen LLVM DecoderEmitter.cpp DFAEmitter.cpp DFAPacketizerEmitter.cpp + DirectiveEmitter.cpp DisassemblerEmitter.cpp DXILEmitter.cpp ExegesisEmitter.cpp @@ -51,15 +57,18 @@ ad
[llvm-branch-commits] [llvm] Spiller: Detach legacy pass and supply analyses instead (PR #119181)
https://github.com/arsenm approved this pull request. https://github.com/llvm/llvm-project/pull/119181 ___ llvm-branch-commits mailing list llvm-branch-commits@lists.llvm.org https://lists.llvm.org/cgi-bin/mailman/listinfo/llvm-branch-commits
[llvm-branch-commits] [lld][WebAssembly] Replace config-> with ctx.arg. (PR #119835)
https://github.com/sbc100 approved this pull request. https://github.com/llvm/llvm-project/pull/119835 ___ llvm-branch-commits mailing list llvm-branch-commits@lists.llvm.org https://lists.llvm.org/cgi-bin/mailman/listinfo/llvm-branch-commits
[llvm-branch-commits] [llvm] [Flang-RT] Build libflang_rt.so (PR #120213)
https://github.com/Meinersbur ready_for_review https://github.com/llvm/llvm-project/pull/120213 ___ llvm-branch-commits mailing list llvm-branch-commits@lists.llvm.org https://lists.llvm.org/cgi-bin/mailman/listinfo/llvm-branch-commits
[llvm-branch-commits] [flang] [flang][OpenMP] Allow utility constructs in specification part (PR #121509)
llvmbot wrote: @llvm/pr-subscribers-flang-parser @llvm/pr-subscribers-flang-fir-hlfir Author: Krzysztof Parzyszek (kparzysz) Changes Allow utility constructs (error and nothing) to appear in the specification part as well as the execution part. The exception is "ERROR AT(EXECUTION)" which should only be in the execution part. In case of ambiguity (the boundary between the specification and the execution part), utility constructs will be parsed as belonging to the specification part. In such cases move them to the execution part in the OpenMP canonicalization code. --- Patch is 23.70 KiB, truncated to 20.00 KiB below, full version: https://github.com/llvm/llvm-project/pull/121509.diff 10 Files Affected: - (modified) flang/include/flang/Parser/parse-tree.h (+1-1) - (modified) flang/lib/Lower/OpenMP/OpenMP.cpp (+4) - (modified) flang/lib/Parser/openmp-parsers.cpp (+3-1) - (modified) flang/lib/Parser/unparse.cpp (+42-59) - (modified) flang/lib/Semantics/canonicalize-omp.cpp (+162) - (modified) flang/lib/Semantics/check-omp-structure.cpp (+18-1) - (modified) flang/lib/Semantics/check-omp-structure.h (+6-2) - (modified) flang/test/Parser/OpenMP/error-unparse.f90 (+11-7) - (modified) flang/test/Parser/OpenMP/nothing.f90 (+100) - (added) flang/test/Semantics/OpenMP/error.f90 (+8) ``diff diff --git a/flang/include/flang/Parser/parse-tree.h b/flang/include/flang/Parser/parse-tree.h index 9df7c6d5e39c31..b693e001e5e4b4 100644 --- a/flang/include/flang/Parser/parse-tree.h +++ b/flang/include/flang/Parser/parse-tree.h @@ -4342,7 +4342,7 @@ struct OpenMPDeclarativeConstruct { std::variant + OpenMPRequiresConstruct, OpenMPUtilityConstruct> u; }; diff --git a/flang/lib/Lower/OpenMP/OpenMP.cpp b/flang/lib/Lower/OpenMP/OpenMP.cpp index fe6d82125a9e01..0a84162291573a 100644 --- a/flang/lib/Lower/OpenMP/OpenMP.cpp +++ b/flang/lib/Lower/OpenMP/OpenMP.cpp @@ -2586,6 +2586,10 @@ static void genOMPDispatch(lower::AbstractConverter &converter, //===--===// // OpenMPDeclarativeConstruct visitors //===--===// +static void genOMP(lower::AbstractConverter &converter, lower::SymMap &symTable, + semantics::SemanticsContext &semaCtx, + lower::pft::Evaluation &eval, + const parser::OpenMPUtilityConstruct &); static void genOMP(lower::AbstractConverter &converter, lower::SymMap &symTable, diff --git a/flang/lib/Parser/openmp-parsers.cpp b/flang/lib/Parser/openmp-parsers.cpp index 0a0a29002de27c..75bb64d06ed0fe 100644 --- a/flang/lib/Parser/openmp-parsers.cpp +++ b/flang/lib/Parser/openmp-parsers.cpp @@ -1090,7 +1090,9 @@ TYPE_PARSER(startOmpLine >> construct( Parser{}) || construct( -Parser{})) / +Parser{}) || +construct( +Parser{})) / endOmpLine)) // Block Construct diff --git a/flang/lib/Parser/unparse.cpp b/flang/lib/Parser/unparse.cpp index 4fe57f3e348d35..58820476c51bc1 100644 --- a/flang/lib/Parser/unparse.cpp +++ b/flang/lib/Parser/unparse.cpp @@ -2631,81 +2631,64 @@ class UnparseVisitor { } } void Unparse(const OpenMPDeclareReductionConstruct &x) { +BeginOpenMP(); +Word("!$OMP DECLARE REDUCTION "); Put("("); Walk(std::get(x.t)), Put(" : "); Walk(std::get>(x.t), ","), Put(" : "); Walk(std::get(x.t)); Put(")"); Walk(std::get>(x.t)); +EndOpenMP(); } - bool Pre(const OpenMPDeclarativeConstruct &x) { + + void Unparse(const OpenMPDeclareMapperConstruct &z) { BeginOpenMP(); -Word("!$OMP "); -return common::visit( -common::visitors{ -[&](const OpenMPDeclarativeAllocate &z) { - Word("ALLOCATE ("); - Walk(std::get(z.t)); - Put(")"); - Walk(std::get(z.t)); - Put("\n"); - EndOpenMP(); - return false; -}, -[&](const OpenMPDeclareMapperConstruct &z) { - Word("DECLARE MAPPER ("); - const auto &spec{std::get(z.t)}; - if (auto mapname{std::get>(spec.t)}) { -Walk(mapname); -Put(":"); - } - Walk(std::get(spec.t)); - Put("::"); - Walk(std::get(spec.t)); - Put(")"); +Word("!$OMP DECLARE MAPPER ("); +const auto &spec{std::get(z.t)}; +if (auto mapname{std::get>(spec.t)}) { + Walk(mapname); + Put(":"); +} +Walk(std::get(spec.t)); +Put("::"); +Walk(std::get(spec.t)); +Put(")"); - Walk(std::get(z.t)); - Put("\n"); - return false; -}, -[&](const OpenMPDeclareReductionConstruct &) { - Word("DECLARE REDUCTION "); - retur
[llvm-branch-commits] [flang] [flang][OpenMP] Allow utility constructs in specification part (PR #121509)
llvmbot wrote: @llvm/pr-subscribers-flang-openmp Author: Krzysztof Parzyszek (kparzysz) Changes Allow utility constructs (error and nothing) to appear in the specification part as well as the execution part. The exception is "ERROR AT(EXECUTION)" which should only be in the execution part. In case of ambiguity (the boundary between the specification and the execution part), utility constructs will be parsed as belonging to the specification part. In such cases move them to the execution part in the OpenMP canonicalization code. --- Patch is 23.70 KiB, truncated to 20.00 KiB below, full version: https://github.com/llvm/llvm-project/pull/121509.diff 10 Files Affected: - (modified) flang/include/flang/Parser/parse-tree.h (+1-1) - (modified) flang/lib/Lower/OpenMP/OpenMP.cpp (+4) - (modified) flang/lib/Parser/openmp-parsers.cpp (+3-1) - (modified) flang/lib/Parser/unparse.cpp (+42-59) - (modified) flang/lib/Semantics/canonicalize-omp.cpp (+162) - (modified) flang/lib/Semantics/check-omp-structure.cpp (+18-1) - (modified) flang/lib/Semantics/check-omp-structure.h (+6-2) - (modified) flang/test/Parser/OpenMP/error-unparse.f90 (+11-7) - (modified) flang/test/Parser/OpenMP/nothing.f90 (+100) - (added) flang/test/Semantics/OpenMP/error.f90 (+8) ``diff diff --git a/flang/include/flang/Parser/parse-tree.h b/flang/include/flang/Parser/parse-tree.h index 9df7c6d5e39c31..b693e001e5e4b4 100644 --- a/flang/include/flang/Parser/parse-tree.h +++ b/flang/include/flang/Parser/parse-tree.h @@ -4342,7 +4342,7 @@ struct OpenMPDeclarativeConstruct { std::variant + OpenMPRequiresConstruct, OpenMPUtilityConstruct> u; }; diff --git a/flang/lib/Lower/OpenMP/OpenMP.cpp b/flang/lib/Lower/OpenMP/OpenMP.cpp index fe6d82125a9e01..0a84162291573a 100644 --- a/flang/lib/Lower/OpenMP/OpenMP.cpp +++ b/flang/lib/Lower/OpenMP/OpenMP.cpp @@ -2586,6 +2586,10 @@ static void genOMPDispatch(lower::AbstractConverter &converter, //===--===// // OpenMPDeclarativeConstruct visitors //===--===// +static void genOMP(lower::AbstractConverter &converter, lower::SymMap &symTable, + semantics::SemanticsContext &semaCtx, + lower::pft::Evaluation &eval, + const parser::OpenMPUtilityConstruct &); static void genOMP(lower::AbstractConverter &converter, lower::SymMap &symTable, diff --git a/flang/lib/Parser/openmp-parsers.cpp b/flang/lib/Parser/openmp-parsers.cpp index 0a0a29002de27c..75bb64d06ed0fe 100644 --- a/flang/lib/Parser/openmp-parsers.cpp +++ b/flang/lib/Parser/openmp-parsers.cpp @@ -1090,7 +1090,9 @@ TYPE_PARSER(startOmpLine >> construct( Parser{}) || construct( -Parser{})) / +Parser{}) || +construct( +Parser{})) / endOmpLine)) // Block Construct diff --git a/flang/lib/Parser/unparse.cpp b/flang/lib/Parser/unparse.cpp index 4fe57f3e348d35..58820476c51bc1 100644 --- a/flang/lib/Parser/unparse.cpp +++ b/flang/lib/Parser/unparse.cpp @@ -2631,81 +2631,64 @@ class UnparseVisitor { } } void Unparse(const OpenMPDeclareReductionConstruct &x) { +BeginOpenMP(); +Word("!$OMP DECLARE REDUCTION "); Put("("); Walk(std::get(x.t)), Put(" : "); Walk(std::get>(x.t), ","), Put(" : "); Walk(std::get(x.t)); Put(")"); Walk(std::get>(x.t)); +EndOpenMP(); } - bool Pre(const OpenMPDeclarativeConstruct &x) { + + void Unparse(const OpenMPDeclareMapperConstruct &z) { BeginOpenMP(); -Word("!$OMP "); -return common::visit( -common::visitors{ -[&](const OpenMPDeclarativeAllocate &z) { - Word("ALLOCATE ("); - Walk(std::get(z.t)); - Put(")"); - Walk(std::get(z.t)); - Put("\n"); - EndOpenMP(); - return false; -}, -[&](const OpenMPDeclareMapperConstruct &z) { - Word("DECLARE MAPPER ("); - const auto &spec{std::get(z.t)}; - if (auto mapname{std::get>(spec.t)}) { -Walk(mapname); -Put(":"); - } - Walk(std::get(spec.t)); - Put("::"); - Walk(std::get(spec.t)); - Put(")"); +Word("!$OMP DECLARE MAPPER ("); +const auto &spec{std::get(z.t)}; +if (auto mapname{std::get>(spec.t)}) { + Walk(mapname); + Put(":"); +} +Walk(std::get(spec.t)); +Put("::"); +Walk(std::get(spec.t)); +Put(")"); - Walk(std::get(z.t)); - Put("\n"); - return false; -}, -[&](const OpenMPDeclareReductionConstruct &) { - Word("DECLARE REDUCTION "); - return true; -}, -
[llvm-branch-commits] [flang] [flang][OpenMP] Allow utility constructs in specification part (PR #121509)
llvmbot wrote: @llvm/pr-subscribers-flang-semantics Author: Krzysztof Parzyszek (kparzysz) Changes Allow utility constructs (error and nothing) to appear in the specification part as well as the execution part. The exception is "ERROR AT(EXECUTION)" which should only be in the execution part. In case of ambiguity (the boundary between the specification and the execution part), utility constructs will be parsed as belonging to the specification part. In such cases move them to the execution part in the OpenMP canonicalization code. --- Patch is 23.70 KiB, truncated to 20.00 KiB below, full version: https://github.com/llvm/llvm-project/pull/121509.diff 10 Files Affected: - (modified) flang/include/flang/Parser/parse-tree.h (+1-1) - (modified) flang/lib/Lower/OpenMP/OpenMP.cpp (+4) - (modified) flang/lib/Parser/openmp-parsers.cpp (+3-1) - (modified) flang/lib/Parser/unparse.cpp (+42-59) - (modified) flang/lib/Semantics/canonicalize-omp.cpp (+162) - (modified) flang/lib/Semantics/check-omp-structure.cpp (+18-1) - (modified) flang/lib/Semantics/check-omp-structure.h (+6-2) - (modified) flang/test/Parser/OpenMP/error-unparse.f90 (+11-7) - (modified) flang/test/Parser/OpenMP/nothing.f90 (+100) - (added) flang/test/Semantics/OpenMP/error.f90 (+8) ``diff diff --git a/flang/include/flang/Parser/parse-tree.h b/flang/include/flang/Parser/parse-tree.h index 9df7c6d5e39c31..b693e001e5e4b4 100644 --- a/flang/include/flang/Parser/parse-tree.h +++ b/flang/include/flang/Parser/parse-tree.h @@ -4342,7 +4342,7 @@ struct OpenMPDeclarativeConstruct { std::variant + OpenMPRequiresConstruct, OpenMPUtilityConstruct> u; }; diff --git a/flang/lib/Lower/OpenMP/OpenMP.cpp b/flang/lib/Lower/OpenMP/OpenMP.cpp index fe6d82125a9e01..0a84162291573a 100644 --- a/flang/lib/Lower/OpenMP/OpenMP.cpp +++ b/flang/lib/Lower/OpenMP/OpenMP.cpp @@ -2586,6 +2586,10 @@ static void genOMPDispatch(lower::AbstractConverter &converter, //===--===// // OpenMPDeclarativeConstruct visitors //===--===// +static void genOMP(lower::AbstractConverter &converter, lower::SymMap &symTable, + semantics::SemanticsContext &semaCtx, + lower::pft::Evaluation &eval, + const parser::OpenMPUtilityConstruct &); static void genOMP(lower::AbstractConverter &converter, lower::SymMap &symTable, diff --git a/flang/lib/Parser/openmp-parsers.cpp b/flang/lib/Parser/openmp-parsers.cpp index 0a0a29002de27c..75bb64d06ed0fe 100644 --- a/flang/lib/Parser/openmp-parsers.cpp +++ b/flang/lib/Parser/openmp-parsers.cpp @@ -1090,7 +1090,9 @@ TYPE_PARSER(startOmpLine >> construct( Parser{}) || construct( -Parser{})) / +Parser{}) || +construct( +Parser{})) / endOmpLine)) // Block Construct diff --git a/flang/lib/Parser/unparse.cpp b/flang/lib/Parser/unparse.cpp index 4fe57f3e348d35..58820476c51bc1 100644 --- a/flang/lib/Parser/unparse.cpp +++ b/flang/lib/Parser/unparse.cpp @@ -2631,81 +2631,64 @@ class UnparseVisitor { } } void Unparse(const OpenMPDeclareReductionConstruct &x) { +BeginOpenMP(); +Word("!$OMP DECLARE REDUCTION "); Put("("); Walk(std::get(x.t)), Put(" : "); Walk(std::get>(x.t), ","), Put(" : "); Walk(std::get(x.t)); Put(")"); Walk(std::get>(x.t)); +EndOpenMP(); } - bool Pre(const OpenMPDeclarativeConstruct &x) { + + void Unparse(const OpenMPDeclareMapperConstruct &z) { BeginOpenMP(); -Word("!$OMP "); -return common::visit( -common::visitors{ -[&](const OpenMPDeclarativeAllocate &z) { - Word("ALLOCATE ("); - Walk(std::get(z.t)); - Put(")"); - Walk(std::get(z.t)); - Put("\n"); - EndOpenMP(); - return false; -}, -[&](const OpenMPDeclareMapperConstruct &z) { - Word("DECLARE MAPPER ("); - const auto &spec{std::get(z.t)}; - if (auto mapname{std::get>(spec.t)}) { -Walk(mapname); -Put(":"); - } - Walk(std::get(spec.t)); - Put("::"); - Walk(std::get(spec.t)); - Put(")"); +Word("!$OMP DECLARE MAPPER ("); +const auto &spec{std::get(z.t)}; +if (auto mapname{std::get>(spec.t)}) { + Walk(mapname); + Put(":"); +} +Walk(std::get(spec.t)); +Put("::"); +Walk(std::get(spec.t)); +Put(")"); - Walk(std::get(z.t)); - Put("\n"); - return false; -}, -[&](const OpenMPDeclareReductionConstruct &) { - Word("DECLARE REDUCTION "); - return true; -}, -
[llvm-branch-commits] [flang] [flang][OpenMP] Allow utility constructs in specification part (PR #121509)
https://github.com/kparzysz created https://github.com/llvm/llvm-project/pull/121509 Allow utility constructs (error and nothing) to appear in the specification part as well as the execution part. The exception is "ERROR AT(EXECUTION)" which should only be in the execution part. In case of ambiguity (the boundary between the specification and the execution part), utility constructs will be parsed as belonging to the specification part. In such cases move them to the execution part in the OpenMP canonicalization code. >From 0e21e1df3ef8f51f34b6dabc095a9691be2619b5 Mon Sep 17 00:00:00 2001 From: Krzysztof Parzyszek Date: Sat, 21 Dec 2024 11:18:47 -0600 Subject: [PATCH] [flang][OpenMP] Allow utility constructs in specification part Allow utility constructs (error and nothing) to appear in the specification part as well as the execution part. The exception is "ERROR AT(EXECUTION)" which should only be in the execution part. In case of ambiguity (the boundary between the specification and the execution part), utility constructs will be parsed as belonging to the specification part. In such cases move them to the execution part in the OpenMP canonicalization code. --- flang/include/flang/Parser/parse-tree.h | 2 +- flang/lib/Lower/OpenMP/OpenMP.cpp | 4 + flang/lib/Parser/openmp-parsers.cpp | 4 +- flang/lib/Parser/unparse.cpp| 101 +--- flang/lib/Semantics/canonicalize-omp.cpp| 162 flang/lib/Semantics/check-omp-structure.cpp | 19 ++- flang/lib/Semantics/check-omp-structure.h | 8 +- flang/test/Parser/OpenMP/error-unparse.f90 | 18 ++- flang/test/Parser/OpenMP/nothing.f90| 100 flang/test/Semantics/OpenMP/error.f90 | 8 + 10 files changed, 355 insertions(+), 71 deletions(-) create mode 100644 flang/test/Semantics/OpenMP/error.f90 diff --git a/flang/include/flang/Parser/parse-tree.h b/flang/include/flang/Parser/parse-tree.h index 9df7c6d5e39c31..b693e001e5e4b4 100644 --- a/flang/include/flang/Parser/parse-tree.h +++ b/flang/include/flang/Parser/parse-tree.h @@ -4342,7 +4342,7 @@ struct OpenMPDeclarativeConstruct { std::variant + OpenMPRequiresConstruct, OpenMPUtilityConstruct> u; }; diff --git a/flang/lib/Lower/OpenMP/OpenMP.cpp b/flang/lib/Lower/OpenMP/OpenMP.cpp index fe6d82125a9e01..0a84162291573a 100644 --- a/flang/lib/Lower/OpenMP/OpenMP.cpp +++ b/flang/lib/Lower/OpenMP/OpenMP.cpp @@ -2586,6 +2586,10 @@ static void genOMPDispatch(lower::AbstractConverter &converter, //===--===// // OpenMPDeclarativeConstruct visitors //===--===// +static void genOMP(lower::AbstractConverter &converter, lower::SymMap &symTable, + semantics::SemanticsContext &semaCtx, + lower::pft::Evaluation &eval, + const parser::OpenMPUtilityConstruct &); static void genOMP(lower::AbstractConverter &converter, lower::SymMap &symTable, diff --git a/flang/lib/Parser/openmp-parsers.cpp b/flang/lib/Parser/openmp-parsers.cpp index 0a0a29002de27c..75bb64d06ed0fe 100644 --- a/flang/lib/Parser/openmp-parsers.cpp +++ b/flang/lib/Parser/openmp-parsers.cpp @@ -1090,7 +1090,9 @@ TYPE_PARSER(startOmpLine >> construct( Parser{}) || construct( -Parser{})) / +Parser{}) || +construct( +Parser{})) / endOmpLine)) // Block Construct diff --git a/flang/lib/Parser/unparse.cpp b/flang/lib/Parser/unparse.cpp index 4fe57f3e348d35..58820476c51bc1 100644 --- a/flang/lib/Parser/unparse.cpp +++ b/flang/lib/Parser/unparse.cpp @@ -2631,81 +2631,64 @@ class UnparseVisitor { } } void Unparse(const OpenMPDeclareReductionConstruct &x) { +BeginOpenMP(); +Word("!$OMP DECLARE REDUCTION "); Put("("); Walk(std::get(x.t)), Put(" : "); Walk(std::get>(x.t), ","), Put(" : "); Walk(std::get(x.t)); Put(")"); Walk(std::get>(x.t)); +EndOpenMP(); } - bool Pre(const OpenMPDeclarativeConstruct &x) { + + void Unparse(const OpenMPDeclareMapperConstruct &z) { BeginOpenMP(); -Word("!$OMP "); -return common::visit( -common::visitors{ -[&](const OpenMPDeclarativeAllocate &z) { - Word("ALLOCATE ("); - Walk(std::get(z.t)); - Put(")"); - Walk(std::get(z.t)); - Put("\n"); - EndOpenMP(); - return false; -}, -[&](const OpenMPDeclareMapperConstruct &z) { - Word("DECLARE MAPPER ("); - const auto &spec{std::get(z.t)}; - if (auto mapname{std::get>(spec.t)}) { -Walk(mapname); -Put(":"); - } - Walk(std::get(spec.t)); - Put("::"); -
[llvm-branch-commits] [llvm] release/19.x: [RISCV] Don't create BuildPairF64 or SplitF64 nodes without D or Zdinx. (#116159) (PR #121175)
topperc wrote: Looks like the lit test didn't cherry-pick cleanly https://github.com/llvm/llvm-project/pull/121175 ___ llvm-branch-commits mailing list llvm-branch-commits@lists.llvm.org https://lists.llvm.org/cgi-bin/mailman/listinfo/llvm-branch-commits
[llvm-branch-commits] [llvm] release/19.x: [RISCV] Don't create BuildPairF64 or SplitF64 nodes without D or Zdinx. (#116159) (PR #121175)
https://github.com/topperc closed https://github.com/llvm/llvm-project/pull/121175 ___ llvm-branch-commits mailing list llvm-branch-commits@lists.llvm.org https://lists.llvm.org/cgi-bin/mailman/listinfo/llvm-branch-commits
[llvm-branch-commits] [llvm] release/19.x: [RISCV] Don't create BuildPairF64 or SplitF64 nodes without D or Zdinx. (#116159) (PR #121175)
topperc wrote: Going to manually cherry-pick to fix the test checks. https://github.com/llvm/llvm-project/pull/121175 ___ llvm-branch-commits mailing list llvm-branch-commits@lists.llvm.org https://lists.llvm.org/cgi-bin/mailman/listinfo/llvm-branch-commits
[llvm-branch-commits] [llvm] release/19.x: [RISCV] Don't create BuildPairF64 or SplitF64 nodes without D or Zdinx. (#116159) (PR #121501)
llvmbot wrote: @llvm/pr-subscribers-backend-risc-v Author: Craig Topper (topperc) Changes The fix in ReplaceNodeResults is the only one really required for the known crash. I couldn't hit the case in LowerOperation because that requires (f64 (bitcast i64)), but the result type is softened before the input so we don't get a chance to legalize the input. The change to the setOperationAction call was an observation that a i64<->vector cast should not be custom legalized on RV32. The custom code already calls isTypeLegal on the scalar type. --- Full diff: https://github.com/llvm/llvm-project/pull/121501.diff 2 Files Affected: - (modified) llvm/lib/Target/RISCV/RISCVISelLowering.cpp (+7-4) - (added) llvm/test/CodeGen/RISCV/rvv/rv32-zve-bitcast-crash.ll (+22) ``diff diff --git a/llvm/lib/Target/RISCV/RISCVISelLowering.cpp b/llvm/lib/Target/RISCV/RISCVISelLowering.cpp index 823fb428472ef3..badbb425997447 100644 --- a/llvm/lib/Target/RISCV/RISCVISelLowering.cpp +++ b/llvm/lib/Target/RISCV/RISCVISelLowering.cpp @@ -1396,8 +1396,9 @@ RISCVTargetLowering::RISCVTargetLowering(const TargetMachine &TM, } // Custom-legalize bitcasts from fixed-length vectors to scalar types. - setOperationAction(ISD::BITCAST, {MVT::i8, MVT::i16, MVT::i32, MVT::i64}, - Custom); + setOperationAction(ISD::BITCAST, {MVT::i8, MVT::i16, MVT::i32}, Custom); + if (Subtarget.is64Bit()) +setOperationAction(ISD::BITCAST, MVT::i64, Custom); if (Subtarget.hasStdExtZfhminOrZhinxmin()) setOperationAction(ISD::BITCAST, MVT::f16, Custom); if (Subtarget.hasStdExtFOrZfinx()) @@ -6317,7 +6318,8 @@ SDValue RISCVTargetLowering::LowerOperation(SDValue Op, DAG.getNode(RISCVISD::FMV_W_X_RV64, DL, MVT::f32, NewOp0); return FPConv; } -if (VT == MVT::f64 && Op0VT == MVT::i64 && XLenVT == MVT::i32) { +if (VT == MVT::f64 && Op0VT == MVT::i64 && !Subtarget.is64Bit() && +Subtarget.hasStdExtDOrZdinx()) { SDValue Lo, Hi; std::tie(Lo, Hi) = DAG.SplitScalar(Op0, DL, MVT::i32, MVT::i32); SDValue RetReg = @@ -12616,7 +12618,8 @@ void RISCVTargetLowering::ReplaceNodeResults(SDNode *N, SDValue FPConv = DAG.getNode(RISCVISD::FMV_X_ANYEXTW_RV64, DL, MVT::i64, Op0); Results.push_back(DAG.getNode(ISD::TRUNCATE, DL, MVT::i32, FPConv)); -} else if (VT == MVT::i64 && Op0VT == MVT::f64 && XLenVT == MVT::i32) { +} else if (VT == MVT::i64 && Op0VT == MVT::f64 && !Subtarget.is64Bit() && + Subtarget.hasStdExtDOrZdinx()) { SDValue NewReg = DAG.getNode(RISCVISD::SplitF64, DL, DAG.getVTList(MVT::i32, MVT::i32), Op0); SDValue RetReg = DAG.getNode(ISD::BUILD_PAIR, DL, MVT::i64, diff --git a/llvm/test/CodeGen/RISCV/rvv/rv32-zve-bitcast-crash.ll b/llvm/test/CodeGen/RISCV/rvv/rv32-zve-bitcast-crash.ll new file mode 100644 index 00..d6612c9d025afa --- /dev/null +++ b/llvm/test/CodeGen/RISCV/rvv/rv32-zve-bitcast-crash.ll @@ -0,0 +1,22 @@ +; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py UTC_ARGS: --version 4 +; RUN: llc < %s -mtriple=riscv32 -mattr=+zve32f,+zvl128b | FileCheck %s + +; This bitcast previously incorrectly produce a SplitF64 node. + +define i64 @foo(double %x) { +; CHECK-LABEL: foo: +; CHECK: # %bb.0: +; CHECK-NEXT:addi sp, sp, -16 +; CHECK-NEXT:.cfi_def_cfa_offset 16 +; CHECK-NEXT:sw ra, 12(sp) # 4-byte Folded Spill +; CHECK-NEXT:.cfi_offset ra, -4 +; CHECK-NEXT:lui a3, 261888 +; CHECK-NEXT:li a2, 0 +; CHECK-NEXT:call __adddf3 +; CHECK-NEXT:lw ra, 12(sp) # 4-byte Folded Reload +; CHECK-NEXT:addi sp, sp, 16 +; CHECK-NEXT:ret + %a = fadd double %x, 1.0 + %b = bitcast double %a to i64 + ret i64 %b +} `` https://github.com/llvm/llvm-project/pull/121501 ___ llvm-branch-commits mailing list llvm-branch-commits@lists.llvm.org https://lists.llvm.org/cgi-bin/mailman/listinfo/llvm-branch-commits
[llvm-branch-commits] [llvm] release/19.x: [RISCV] Don't create BuildPairF64 or SplitF64 nodes without D or Zdinx. (#116159) (PR #121501)
https://github.com/topperc created https://github.com/llvm/llvm-project/pull/121501 The fix in ReplaceNodeResults is the only one really required for the known crash. I couldn't hit the case in LowerOperation because that requires (f64 (bitcast i64)), but the result type is softened before the input so we don't get a chance to legalize the input. The change to the setOperationAction call was an observation that a i64<->vector cast should not be custom legalized on RV32. The custom code already calls isTypeLegal on the scalar type. >From 62597275e39caa6f558db9053c80cde47ead503f Mon Sep 17 00:00:00 2001 From: Craig Topper Date: Thu, 14 Nov 2024 09:54:33 -0800 Subject: [PATCH] [RISCV] Don't create BuildPairF64 or SplitF64 nodes without D or Zdinx. (#116159) The fix in ReplaceNodeResults is the only one really required for the known crash. I couldn't hit the case in LowerOperation because that requires (f64 (bitcast i64)), but the result type is softened before the input so we don't get a chance to legalize the input. The change to the setOperationAction call was an observation that a i64<->vector cast should not be custom legalized on RV32. The custom code already calls isTypeLegal on the scalar type. --- llvm/lib/Target/RISCV/RISCVISelLowering.cpp | 11 ++ .../RISCV/rvv/rv32-zve-bitcast-crash.ll | 22 +++ 2 files changed, 29 insertions(+), 4 deletions(-) create mode 100644 llvm/test/CodeGen/RISCV/rvv/rv32-zve-bitcast-crash.ll diff --git a/llvm/lib/Target/RISCV/RISCVISelLowering.cpp b/llvm/lib/Target/RISCV/RISCVISelLowering.cpp index 823fb428472ef3..badbb425997447 100644 --- a/llvm/lib/Target/RISCV/RISCVISelLowering.cpp +++ b/llvm/lib/Target/RISCV/RISCVISelLowering.cpp @@ -1396,8 +1396,9 @@ RISCVTargetLowering::RISCVTargetLowering(const TargetMachine &TM, } // Custom-legalize bitcasts from fixed-length vectors to scalar types. - setOperationAction(ISD::BITCAST, {MVT::i8, MVT::i16, MVT::i32, MVT::i64}, - Custom); + setOperationAction(ISD::BITCAST, {MVT::i8, MVT::i16, MVT::i32}, Custom); + if (Subtarget.is64Bit()) +setOperationAction(ISD::BITCAST, MVT::i64, Custom); if (Subtarget.hasStdExtZfhminOrZhinxmin()) setOperationAction(ISD::BITCAST, MVT::f16, Custom); if (Subtarget.hasStdExtFOrZfinx()) @@ -6317,7 +6318,8 @@ SDValue RISCVTargetLowering::LowerOperation(SDValue Op, DAG.getNode(RISCVISD::FMV_W_X_RV64, DL, MVT::f32, NewOp0); return FPConv; } -if (VT == MVT::f64 && Op0VT == MVT::i64 && XLenVT == MVT::i32) { +if (VT == MVT::f64 && Op0VT == MVT::i64 && !Subtarget.is64Bit() && +Subtarget.hasStdExtDOrZdinx()) { SDValue Lo, Hi; std::tie(Lo, Hi) = DAG.SplitScalar(Op0, DL, MVT::i32, MVT::i32); SDValue RetReg = @@ -12616,7 +12618,8 @@ void RISCVTargetLowering::ReplaceNodeResults(SDNode *N, SDValue FPConv = DAG.getNode(RISCVISD::FMV_X_ANYEXTW_RV64, DL, MVT::i64, Op0); Results.push_back(DAG.getNode(ISD::TRUNCATE, DL, MVT::i32, FPConv)); -} else if (VT == MVT::i64 && Op0VT == MVT::f64 && XLenVT == MVT::i32) { +} else if (VT == MVT::i64 && Op0VT == MVT::f64 && !Subtarget.is64Bit() && + Subtarget.hasStdExtDOrZdinx()) { SDValue NewReg = DAG.getNode(RISCVISD::SplitF64, DL, DAG.getVTList(MVT::i32, MVT::i32), Op0); SDValue RetReg = DAG.getNode(ISD::BUILD_PAIR, DL, MVT::i64, diff --git a/llvm/test/CodeGen/RISCV/rvv/rv32-zve-bitcast-crash.ll b/llvm/test/CodeGen/RISCV/rvv/rv32-zve-bitcast-crash.ll new file mode 100644 index 00..d6612c9d025afa --- /dev/null +++ b/llvm/test/CodeGen/RISCV/rvv/rv32-zve-bitcast-crash.ll @@ -0,0 +1,22 @@ +; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py UTC_ARGS: --version 4 +; RUN: llc < %s -mtriple=riscv32 -mattr=+zve32f,+zvl128b | FileCheck %s + +; This bitcast previously incorrectly produce a SplitF64 node. + +define i64 @foo(double %x) { +; CHECK-LABEL: foo: +; CHECK: # %bb.0: +; CHECK-NEXT:addi sp, sp, -16 +; CHECK-NEXT:.cfi_def_cfa_offset 16 +; CHECK-NEXT:sw ra, 12(sp) # 4-byte Folded Spill +; CHECK-NEXT:.cfi_offset ra, -4 +; CHECK-NEXT:lui a3, 261888 +; CHECK-NEXT:li a2, 0 +; CHECK-NEXT:call __adddf3 +; CHECK-NEXT:lw ra, 12(sp) # 4-byte Folded Reload +; CHECK-NEXT:addi sp, sp, 16 +; CHECK-NEXT:ret + %a = fadd double %x, 1.0 + %b = bitcast double %a to i64 + ret i64 %b +} ___ llvm-branch-commits mailing list llvm-branch-commits@lists.llvm.org https://lists.llvm.org/cgi-bin/mailman/listinfo/llvm-branch-commits
[llvm-branch-commits] [llvm] release/19.x: [RISCV] Don't create BuildPairF64 or SplitF64 nodes without D or Zdinx. (#116159) (PR #121501)
https://github.com/topperc milestoned https://github.com/llvm/llvm-project/pull/121501 ___ llvm-branch-commits mailing list llvm-branch-commits@lists.llvm.org https://lists.llvm.org/cgi-bin/mailman/listinfo/llvm-branch-commits
[llvm-branch-commits] [llvm] release/19.x: [RISCV] Don't create BuildPairF64 or SplitF64 nodes without D or Zdinx. (#116159) (PR #121501)
https://github.com/topperc edited https://github.com/llvm/llvm-project/pull/121501 ___ llvm-branch-commits mailing list llvm-branch-commits@lists.llvm.org https://lists.llvm.org/cgi-bin/mailman/listinfo/llvm-branch-commits
[llvm-branch-commits] [llvm] release/19.x: [RISCV] Don't create BuildPairF64 or SplitF64 nodes without D or Zdinx. (#116159) (PR #121501)
https://github.com/lenary approved this pull request. LGTM I guess this would have been easier had the original test had `nounwind`, but that's life. I don't think we should change it on the branch. https://github.com/llvm/llvm-project/pull/121501 ___ llvm-branch-commits mailing list llvm-branch-commits@lists.llvm.org https://lists.llvm.org/cgi-bin/mailman/listinfo/llvm-branch-commits
[llvm-branch-commits] [llvm] [Support] Recycler: Implement move constructor (PR #120555)
https://github.com/dwblaikie approved this pull request. https://github.com/llvm/llvm-project/pull/120555 ___ llvm-branch-commits mailing list llvm-branch-commits@lists.llvm.org https://lists.llvm.org/cgi-bin/mailman/listinfo/llvm-branch-commits
[llvm-branch-commits] [flang] [flang][OpenMP] Allow utility constructs in specification part (PR #121509)
https://github.com/kparzysz updated https://github.com/llvm/llvm-project/pull/121509 >From 0e21e1df3ef8f51f34b6dabc095a9691be2619b5 Mon Sep 17 00:00:00 2001 From: Krzysztof Parzyszek Date: Sat, 21 Dec 2024 11:18:47 -0600 Subject: [PATCH 1/2] [flang][OpenMP] Allow utility constructs in specification part Allow utility constructs (error and nothing) to appear in the specification part as well as the execution part. The exception is "ERROR AT(EXECUTION)" which should only be in the execution part. In case of ambiguity (the boundary between the specification and the execution part), utility constructs will be parsed as belonging to the specification part. In such cases move them to the execution part in the OpenMP canonicalization code. --- flang/include/flang/Parser/parse-tree.h | 2 +- flang/lib/Lower/OpenMP/OpenMP.cpp | 4 + flang/lib/Parser/openmp-parsers.cpp | 4 +- flang/lib/Parser/unparse.cpp| 101 +--- flang/lib/Semantics/canonicalize-omp.cpp| 162 flang/lib/Semantics/check-omp-structure.cpp | 19 ++- flang/lib/Semantics/check-omp-structure.h | 8 +- flang/test/Parser/OpenMP/error-unparse.f90 | 18 ++- flang/test/Parser/OpenMP/nothing.f90| 100 flang/test/Semantics/OpenMP/error.f90 | 8 + 10 files changed, 355 insertions(+), 71 deletions(-) create mode 100644 flang/test/Semantics/OpenMP/error.f90 diff --git a/flang/include/flang/Parser/parse-tree.h b/flang/include/flang/Parser/parse-tree.h index 9df7c6d5e39c31..b693e001e5e4b4 100644 --- a/flang/include/flang/Parser/parse-tree.h +++ b/flang/include/flang/Parser/parse-tree.h @@ -4342,7 +4342,7 @@ struct OpenMPDeclarativeConstruct { std::variant + OpenMPRequiresConstruct, OpenMPUtilityConstruct> u; }; diff --git a/flang/lib/Lower/OpenMP/OpenMP.cpp b/flang/lib/Lower/OpenMP/OpenMP.cpp index fe6d82125a9e01..0a84162291573a 100644 --- a/flang/lib/Lower/OpenMP/OpenMP.cpp +++ b/flang/lib/Lower/OpenMP/OpenMP.cpp @@ -2586,6 +2586,10 @@ static void genOMPDispatch(lower::AbstractConverter &converter, //===--===// // OpenMPDeclarativeConstruct visitors //===--===// +static void genOMP(lower::AbstractConverter &converter, lower::SymMap &symTable, + semantics::SemanticsContext &semaCtx, + lower::pft::Evaluation &eval, + const parser::OpenMPUtilityConstruct &); static void genOMP(lower::AbstractConverter &converter, lower::SymMap &symTable, diff --git a/flang/lib/Parser/openmp-parsers.cpp b/flang/lib/Parser/openmp-parsers.cpp index 0a0a29002de27c..75bb64d06ed0fe 100644 --- a/flang/lib/Parser/openmp-parsers.cpp +++ b/flang/lib/Parser/openmp-parsers.cpp @@ -1090,7 +1090,9 @@ TYPE_PARSER(startOmpLine >> construct( Parser{}) || construct( -Parser{})) / +Parser{}) || +construct( +Parser{})) / endOmpLine)) // Block Construct diff --git a/flang/lib/Parser/unparse.cpp b/flang/lib/Parser/unparse.cpp index 4fe57f3e348d35..58820476c51bc1 100644 --- a/flang/lib/Parser/unparse.cpp +++ b/flang/lib/Parser/unparse.cpp @@ -2631,81 +2631,64 @@ class UnparseVisitor { } } void Unparse(const OpenMPDeclareReductionConstruct &x) { +BeginOpenMP(); +Word("!$OMP DECLARE REDUCTION "); Put("("); Walk(std::get(x.t)), Put(" : "); Walk(std::get>(x.t), ","), Put(" : "); Walk(std::get(x.t)); Put(")"); Walk(std::get>(x.t)); +EndOpenMP(); } - bool Pre(const OpenMPDeclarativeConstruct &x) { + + void Unparse(const OpenMPDeclareMapperConstruct &z) { BeginOpenMP(); -Word("!$OMP "); -return common::visit( -common::visitors{ -[&](const OpenMPDeclarativeAllocate &z) { - Word("ALLOCATE ("); - Walk(std::get(z.t)); - Put(")"); - Walk(std::get(z.t)); - Put("\n"); - EndOpenMP(); - return false; -}, -[&](const OpenMPDeclareMapperConstruct &z) { - Word("DECLARE MAPPER ("); - const auto &spec{std::get(z.t)}; - if (auto mapname{std::get>(spec.t)}) { -Walk(mapname); -Put(":"); - } - Walk(std::get(spec.t)); - Put("::"); - Walk(std::get(spec.t)); - Put(")"); +Word("!$OMP DECLARE MAPPER ("); +const auto &spec{std::get(z.t)}; +if (auto mapname{std::get>(spec.t)}) { + Walk(mapname); + Put(":"); +} +Walk(std::get(spec.t)); +Put("::"); +Walk(std::get(spec.t)); +Put(")"); - Walk(std::get(z.t)); - Put("\n"); - return false; -}, -[&](co
[llvm-branch-commits] [flang] [flang][OpenMP] Allow utility constructs in specification part (PR #121509)
github-actions[bot] wrote: :warning: C/C++ code formatter, clang-format found issues in your code. :warning: You can test this locally with the following command: ``bash git-clang-format --diff 7efde1db717ee0f58564d919810b681170b7fdcb dd75856e006930b91267c1c93346df9b5880d4b5 --extensions cpp,h -- flang/examples/FlangOmpReport/FlangOmpReportVisitor.cpp flang/include/flang/Parser/parse-tree.h flang/lib/Lower/OpenMP/OpenMP.cpp flang/lib/Parser/openmp-parsers.cpp flang/lib/Parser/unparse.cpp flang/lib/Semantics/canonicalize-omp.cpp flang/lib/Semantics/check-omp-structure.cpp flang/lib/Semantics/check-omp-structure.h `` View the diff from clang-format here. ``diff diff --git a/flang/examples/FlangOmpReport/FlangOmpReportVisitor.cpp b/flang/examples/FlangOmpReport/FlangOmpReportVisitor.cpp index 38db1c0f6c..bcc327c1db 100644 --- a/flang/examples/FlangOmpReport/FlangOmpReportVisitor.cpp +++ b/flang/examples/FlangOmpReport/FlangOmpReportVisitor.cpp @@ -107,14 +107,16 @@ std::string OpenMPCounterVisitor::getName(const OmpWrapperType &w) { } std::string OpenMPCounterVisitor::getName(const OpenMPDeclarativeConstruct &c) { return std::visit(Fortran::common::visitors{ - [&](const OpenMPUtilityConstruct &o) -> std::string { -const CharBlock &source{o.source}; -return normalize_construct_name(source.ToString()); - }, - [&](const auto &o) -> std::string { -const CharBlock &source{std::get(o.t).source}; -return normalize_construct_name(source.ToString()); - },}, +[&](const OpenMPUtilityConstruct &o) -> std::string { + const CharBlock &source{o.source}; + return normalize_construct_name(source.ToString()); +}, +[&](const auto &o) -> std::string { + const CharBlock &source{ + std::get(o.t).source}; + return normalize_construct_name(source.ToString()); +}, +}, c.u); } std::string OpenMPCounterVisitor::getName(const OpenMPConstruct &c) { `` https://github.com/llvm/llvm-project/pull/121509 ___ llvm-branch-commits mailing list llvm-branch-commits@lists.llvm.org https://lists.llvm.org/cgi-bin/mailman/listinfo/llvm-branch-commits
[llvm-branch-commits] [flang] [flang][OpenMP] Allow utility constructs in specification part (PR #121509)
https://github.com/kparzysz updated https://github.com/llvm/llvm-project/pull/121509 >From 0e21e1df3ef8f51f34b6dabc095a9691be2619b5 Mon Sep 17 00:00:00 2001 From: Krzysztof Parzyszek Date: Sat, 21 Dec 2024 11:18:47 -0600 Subject: [PATCH 1/3] [flang][OpenMP] Allow utility constructs in specification part Allow utility constructs (error and nothing) to appear in the specification part as well as the execution part. The exception is "ERROR AT(EXECUTION)" which should only be in the execution part. In case of ambiguity (the boundary between the specification and the execution part), utility constructs will be parsed as belonging to the specification part. In such cases move them to the execution part in the OpenMP canonicalization code. --- flang/include/flang/Parser/parse-tree.h | 2 +- flang/lib/Lower/OpenMP/OpenMP.cpp | 4 + flang/lib/Parser/openmp-parsers.cpp | 4 +- flang/lib/Parser/unparse.cpp| 101 +--- flang/lib/Semantics/canonicalize-omp.cpp| 162 flang/lib/Semantics/check-omp-structure.cpp | 19 ++- flang/lib/Semantics/check-omp-structure.h | 8 +- flang/test/Parser/OpenMP/error-unparse.f90 | 18 ++- flang/test/Parser/OpenMP/nothing.f90| 100 flang/test/Semantics/OpenMP/error.f90 | 8 + 10 files changed, 355 insertions(+), 71 deletions(-) create mode 100644 flang/test/Semantics/OpenMP/error.f90 diff --git a/flang/include/flang/Parser/parse-tree.h b/flang/include/flang/Parser/parse-tree.h index 9df7c6d5e39c31..b693e001e5e4b4 100644 --- a/flang/include/flang/Parser/parse-tree.h +++ b/flang/include/flang/Parser/parse-tree.h @@ -4342,7 +4342,7 @@ struct OpenMPDeclarativeConstruct { std::variant + OpenMPRequiresConstruct, OpenMPUtilityConstruct> u; }; diff --git a/flang/lib/Lower/OpenMP/OpenMP.cpp b/flang/lib/Lower/OpenMP/OpenMP.cpp index fe6d82125a9e01..0a84162291573a 100644 --- a/flang/lib/Lower/OpenMP/OpenMP.cpp +++ b/flang/lib/Lower/OpenMP/OpenMP.cpp @@ -2586,6 +2586,10 @@ static void genOMPDispatch(lower::AbstractConverter &converter, //===--===// // OpenMPDeclarativeConstruct visitors //===--===// +static void genOMP(lower::AbstractConverter &converter, lower::SymMap &symTable, + semantics::SemanticsContext &semaCtx, + lower::pft::Evaluation &eval, + const parser::OpenMPUtilityConstruct &); static void genOMP(lower::AbstractConverter &converter, lower::SymMap &symTable, diff --git a/flang/lib/Parser/openmp-parsers.cpp b/flang/lib/Parser/openmp-parsers.cpp index 0a0a29002de27c..75bb64d06ed0fe 100644 --- a/flang/lib/Parser/openmp-parsers.cpp +++ b/flang/lib/Parser/openmp-parsers.cpp @@ -1090,7 +1090,9 @@ TYPE_PARSER(startOmpLine >> construct( Parser{}) || construct( -Parser{})) / +Parser{}) || +construct( +Parser{})) / endOmpLine)) // Block Construct diff --git a/flang/lib/Parser/unparse.cpp b/flang/lib/Parser/unparse.cpp index 4fe57f3e348d35..58820476c51bc1 100644 --- a/flang/lib/Parser/unparse.cpp +++ b/flang/lib/Parser/unparse.cpp @@ -2631,81 +2631,64 @@ class UnparseVisitor { } } void Unparse(const OpenMPDeclareReductionConstruct &x) { +BeginOpenMP(); +Word("!$OMP DECLARE REDUCTION "); Put("("); Walk(std::get(x.t)), Put(" : "); Walk(std::get>(x.t), ","), Put(" : "); Walk(std::get(x.t)); Put(")"); Walk(std::get>(x.t)); +EndOpenMP(); } - bool Pre(const OpenMPDeclarativeConstruct &x) { + + void Unparse(const OpenMPDeclareMapperConstruct &z) { BeginOpenMP(); -Word("!$OMP "); -return common::visit( -common::visitors{ -[&](const OpenMPDeclarativeAllocate &z) { - Word("ALLOCATE ("); - Walk(std::get(z.t)); - Put(")"); - Walk(std::get(z.t)); - Put("\n"); - EndOpenMP(); - return false; -}, -[&](const OpenMPDeclareMapperConstruct &z) { - Word("DECLARE MAPPER ("); - const auto &spec{std::get(z.t)}; - if (auto mapname{std::get>(spec.t)}) { -Walk(mapname); -Put(":"); - } - Walk(std::get(spec.t)); - Put("::"); - Walk(std::get(spec.t)); - Put(")"); +Word("!$OMP DECLARE MAPPER ("); +const auto &spec{std::get(z.t)}; +if (auto mapname{std::get>(spec.t)}) { + Walk(mapname); + Put(":"); +} +Walk(std::get(spec.t)); +Put("::"); +Walk(std::get(spec.t)); +Put(")"); - Walk(std::get(z.t)); - Put("\n"); - return false; -}, -[&](co
[llvm-branch-commits] [llvm] [X86] load atomic vector tests for combine (PR #120640)
https://github.com/jofrn updated https://github.com/llvm/llvm-project/pull/120640 >From 13ea377347ae290aa2093f783e56e2721156a399 Mon Sep 17 00:00:00 2001 From: jofrn Date: Thu, 19 Dec 2024 16:25:55 -0500 Subject: [PATCH] [X86] load atomic vector tests for combine Vector types of 2 elements that don't require widening are lowered via the prior commit's combine method, which is also needed to avoid a vector move. This change adds the tests that depend strictly on combineAtomicLoad so that SelectionDAG can translate vectors with type bfloat,half. commit-id:3a045357 --- llvm/test/CodeGen/X86/atomic-load-store.ll | 18 ++ 1 file changed, 18 insertions(+) diff --git a/llvm/test/CodeGen/X86/atomic-load-store.ll b/llvm/test/CodeGen/X86/atomic-load-store.ll index 7b2e5e60eca20e..34e541c029e1b2 100644 --- a/llvm/test/CodeGen/X86/atomic-load-store.ll +++ b/llvm/test/CodeGen/X86/atomic-load-store.ll @@ -187,6 +187,24 @@ define <2 x float> @atomic_vec2_float_align(ptr %x) { ret <2 x float> %ret } +define <2 x half> @atomic_vec2_half(ptr %x) { +; CHECK-LABEL: atomic_vec2_half: +; CHECK: ## %bb.0: +; CHECK-NEXT:movl (%rdi), %eax +; CHECK-NEXT:retq + %ret = load atomic <2 x half>, ptr %x acquire, align 4 + ret <2 x half> %ret +} + +define <2 x bfloat> @atomic_vec2_bfloat(ptr %x) { +; CHECK-LABEL: atomic_vec2_bfloat: +; CHECK: ## %bb.0: +; CHECK-NEXT:movl (%rdi), %eax +; CHECK-NEXT:retq + %ret = load atomic <2 x bfloat>, ptr %x acquire, align 4 + ret <2 x bfloat> %ret +} + define <1 x ptr> @atomic_vec1_ptr(ptr %x) nounwind { ; CHECK3-LABEL: atomic_vec1_ptr: ; CHECK3: ## %bb.0: ___ llvm-branch-commits mailing list llvm-branch-commits@lists.llvm.org https://lists.llvm.org/cgi-bin/mailman/listinfo/llvm-branch-commits
[llvm-branch-commits] [llvm] [X86] Add atomic vector tests for unaligned >1 sizes. (PR #120387)
https://github.com/jofrn updated https://github.com/llvm/llvm-project/pull/120387 >From 64e813bf4f6e8546abe99a6864cc1f360288fbab Mon Sep 17 00:00:00 2001 From: jofrn Date: Wed, 18 Dec 2024 03:40:32 -0500 Subject: [PATCH] [X86] Add atomic vector tests for unaligned >1 sizes. Unaligned atomic vectors with size >1 are lowered to calls. Adding their tests separately here. commit-id:a06a5cc6 --- llvm/test/CodeGen/X86/atomic-load-store.ll | 253 + 1 file changed, 253 insertions(+) diff --git a/llvm/test/CodeGen/X86/atomic-load-store.ll b/llvm/test/CodeGen/X86/atomic-load-store.ll index 6efcbb80c0ce6d..39e9fdfa5e62b0 100644 --- a/llvm/test/CodeGen/X86/atomic-load-store.ll +++ b/llvm/test/CodeGen/X86/atomic-load-store.ll @@ -146,6 +146,34 @@ define <1 x i64> @atomic_vec1_i64_align(ptr %x) nounwind { ret <1 x i64> %ret } +define <1 x ptr> @atomic_vec1_ptr(ptr %x) nounwind { +; CHECK3-LABEL: atomic_vec1_ptr: +; CHECK3: ## %bb.0: +; CHECK3-NEXT:pushq %rax +; CHECK3-NEXT:movq %rdi, %rsi +; CHECK3-NEXT:movq %rsp, %rdx +; CHECK3-NEXT:movl $8, %edi +; CHECK3-NEXT:movl $2, %ecx +; CHECK3-NEXT:callq ___atomic_load +; CHECK3-NEXT:movq (%rsp), %rax +; CHECK3-NEXT:popq %rcx +; CHECK3-NEXT:retq +; +; CHECK0-LABEL: atomic_vec1_ptr: +; CHECK0: ## %bb.0: +; CHECK0-NEXT:pushq %rax +; CHECK0-NEXT:movq %rdi, %rsi +; CHECK0-NEXT:movl $8, %edi +; CHECK0-NEXT:movq %rsp, %rdx +; CHECK0-NEXT:movl $2, %ecx +; CHECK0-NEXT:callq ___atomic_load +; CHECK0-NEXT:movq (%rsp), %rax +; CHECK0-NEXT:popq %rcx +; CHECK0-NEXT:retq + %ret = load atomic <1 x ptr>, ptr %x acquire, align 4 + ret <1 x ptr> %ret +} + define <1 x half> @atomic_vec1_half(ptr %x) { ; CHECK3-LABEL: atomic_vec1_half: ; CHECK3: ## %bb.0: @@ -182,3 +210,228 @@ define <1 x double> @atomic_vec1_double_align(ptr %x) nounwind { %ret = load atomic <1 x double>, ptr %x acquire, align 8 ret <1 x double> %ret } + +define <1 x i64> @atomic_vec1_i64(ptr %x) nounwind { +; CHECK3-LABEL: atomic_vec1_i64: +; CHECK3: ## %bb.0: +; CHECK3-NEXT:pushq %rax +; CHECK3-NEXT:movq %rdi, %rsi +; CHECK3-NEXT:movq %rsp, %rdx +; CHECK3-NEXT:movl $8, %edi +; CHECK3-NEXT:movl $2, %ecx +; CHECK3-NEXT:callq ___atomic_load +; CHECK3-NEXT:movq (%rsp), %rax +; CHECK3-NEXT:popq %rcx +; CHECK3-NEXT:retq +; +; CHECK0-LABEL: atomic_vec1_i64: +; CHECK0: ## %bb.0: +; CHECK0-NEXT:pushq %rax +; CHECK0-NEXT:movq %rdi, %rsi +; CHECK0-NEXT:movl $8, %edi +; CHECK0-NEXT:movq %rsp, %rdx +; CHECK0-NEXT:movl $2, %ecx +; CHECK0-NEXT:callq ___atomic_load +; CHECK0-NEXT:movq (%rsp), %rax +; CHECK0-NEXT:popq %rcx +; CHECK0-NEXT:retq + %ret = load atomic <1 x i64>, ptr %x acquire, align 4 + ret <1 x i64> %ret +} + +define <1 x double> @atomic_vec1_double(ptr %x) nounwind { +; CHECK3-LABEL: atomic_vec1_double: +; CHECK3: ## %bb.0: +; CHECK3-NEXT:pushq %rax +; CHECK3-NEXT:movq %rdi, %rsi +; CHECK3-NEXT:movq %rsp, %rdx +; CHECK3-NEXT:movl $8, %edi +; CHECK3-NEXT:movl $2, %ecx +; CHECK3-NEXT:callq ___atomic_load +; CHECK3-NEXT:movsd {{.*#+}} xmm0 = mem[0],zero +; CHECK3-NEXT:popq %rax +; CHECK3-NEXT:retq +; +; CHECK0-LABEL: atomic_vec1_double: +; CHECK0: ## %bb.0: +; CHECK0-NEXT:pushq %rax +; CHECK0-NEXT:movq %rdi, %rsi +; CHECK0-NEXT:movl $8, %edi +; CHECK0-NEXT:movq %rsp, %rdx +; CHECK0-NEXT:movl $2, %ecx +; CHECK0-NEXT:callq ___atomic_load +; CHECK0-NEXT:movsd {{.*#+}} xmm0 = mem[0],zero +; CHECK0-NEXT:popq %rax +; CHECK0-NEXT:retq + %ret = load atomic <1 x double>, ptr %x acquire, align 4 + ret <1 x double> %ret +} + +define <2 x i32> @atomic_vec2_i32(ptr %x) nounwind { +; CHECK3-LABEL: atomic_vec2_i32: +; CHECK3: ## %bb.0: +; CHECK3-NEXT:pushq %rax +; CHECK3-NEXT:movq %rdi, %rsi +; CHECK3-NEXT:movq %rsp, %rdx +; CHECK3-NEXT:movl $8, %edi +; CHECK3-NEXT:movl $2, %ecx +; CHECK3-NEXT:callq ___atomic_load +; CHECK3-NEXT:movsd {{.*#+}} xmm0 = mem[0],zero +; CHECK3-NEXT:popq %rax +; CHECK3-NEXT:retq +; +; CHECK0-LABEL: atomic_vec2_i32: +; CHECK0: ## %bb.0: +; CHECK0-NEXT:pushq %rax +; CHECK0-NEXT:movq %rdi, %rsi +; CHECK0-NEXT:movl $8, %edi +; CHECK0-NEXT:movq %rsp, %rdx +; CHECK0-NEXT:movl $2, %ecx +; CHECK0-NEXT:callq ___atomic_load +; CHECK0-NEXT:movq {{.*#+}} xmm0 = mem[0],zero +; CHECK0-NEXT:popq %rax +; CHECK0-NEXT:retq + %ret = load atomic <2 x i32>, ptr %x acquire, align 4 + ret <2 x i32> %ret +} + +define <4 x float> @atomic_vec4_float_align(ptr %x) nounwind { +; CHECK-LABEL: atomic_vec4_float_align: +; CHECK: ## %bb.0: +; CHECK-NEXT:pushq %rax +; CHECK-NEXT:movl $2, %esi +; CHECK-NEXT:callq ___atomic_load_16 +; CHECK-NEXT:movq %rdx, %xmm1 +; CHECK-NEXT:movq %rax, %xmm0 +; CHECK-NEXT:punpcklqdq {{.*#+}} xmm0 = xmm
[llvm-branch-commits] [llvm] [X86] load atomic vector tests for combine (PR #120640)
https://github.com/jofrn updated https://github.com/llvm/llvm-project/pull/120640 >From 13ea377347ae290aa2093f783e56e2721156a399 Mon Sep 17 00:00:00 2001 From: jofrn Date: Thu, 19 Dec 2024 16:25:55 -0500 Subject: [PATCH] [X86] load atomic vector tests for combine Vector types of 2 elements that don't require widening are lowered via the prior commit's combine method, which is also needed to avoid a vector move. This change adds the tests that depend strictly on combineAtomicLoad so that SelectionDAG can translate vectors with type bfloat,half. commit-id:3a045357 --- llvm/test/CodeGen/X86/atomic-load-store.ll | 18 ++ 1 file changed, 18 insertions(+) diff --git a/llvm/test/CodeGen/X86/atomic-load-store.ll b/llvm/test/CodeGen/X86/atomic-load-store.ll index 7b2e5e60eca20e..34e541c029e1b2 100644 --- a/llvm/test/CodeGen/X86/atomic-load-store.ll +++ b/llvm/test/CodeGen/X86/atomic-load-store.ll @@ -187,6 +187,24 @@ define <2 x float> @atomic_vec2_float_align(ptr %x) { ret <2 x float> %ret } +define <2 x half> @atomic_vec2_half(ptr %x) { +; CHECK-LABEL: atomic_vec2_half: +; CHECK: ## %bb.0: +; CHECK-NEXT:movl (%rdi), %eax +; CHECK-NEXT:retq + %ret = load atomic <2 x half>, ptr %x acquire, align 4 + ret <2 x half> %ret +} + +define <2 x bfloat> @atomic_vec2_bfloat(ptr %x) { +; CHECK-LABEL: atomic_vec2_bfloat: +; CHECK: ## %bb.0: +; CHECK-NEXT:movl (%rdi), %eax +; CHECK-NEXT:retq + %ret = load atomic <2 x bfloat>, ptr %x acquire, align 4 + ret <2 x bfloat> %ret +} + define <1 x ptr> @atomic_vec1_ptr(ptr %x) nounwind { ; CHECK3-LABEL: atomic_vec1_ptr: ; CHECK3: ## %bb.0: ___ llvm-branch-commits mailing list llvm-branch-commits@lists.llvm.org https://lists.llvm.org/cgi-bin/mailman/listinfo/llvm-branch-commits
[llvm-branch-commits] [llvm] [AtomicExpand] Add bitcasts when expanding load atomic vector (PR #120716)
https://github.com/jofrn updated https://github.com/llvm/llvm-project/pull/120716 >From 61ec1efbacdfd94bddd82b36da5acd92318486ff Mon Sep 17 00:00:00 2001 From: jofrn Date: Fri, 20 Dec 2024 06:14:28 -0500 Subject: [PATCH] [AtomicExpand] Add bitcasts when expanding load atomic vector AtomicExpand fails for aligned `load atomic ` because it does not find a compatible library call. This change adds appropriate bitcasts so that the call can be lowered. commit-id:f430c1af --- llvm/lib/CodeGen/AtomicExpandPass.cpp | 22 +++-- llvm/test/CodeGen/ARM/atomic-load-store.ll | 54 ++ llvm/test/CodeGen/X86/atomic-load-store.ll | 15 ++ 3 files changed, 88 insertions(+), 3 deletions(-) diff --git a/llvm/lib/CodeGen/AtomicExpandPass.cpp b/llvm/lib/CodeGen/AtomicExpandPass.cpp index a75fa688d87a8d..aa97fc8131e83b 100644 --- a/llvm/lib/CodeGen/AtomicExpandPass.cpp +++ b/llvm/lib/CodeGen/AtomicExpandPass.cpp @@ -2060,9 +2060,25 @@ bool AtomicExpandImpl::expandAtomicOpToLibcall( I->replaceAllUsesWith(V); } else if (HasResult) { Value *V; -if (UseSizedLibcall) - V = Builder.CreateBitOrPointerCast(Result, I->getType()); -else { +if (UseSizedLibcall) { + if (I->getType()->getScalarType()->isIntOrPtrTy() && I->getType()->isVectorTy() && + Result->getType()->isVectorTy() != I->getType()->isVectorTy()) { +TypeSize Size = Result->getType()->getPrimitiveSizeInBits(); +assert((unsigned)Size % 2 == 0); +unsigned HalfSize = (unsigned)Size / 2; +Value *Lo = Builder.CreateTrunc(Result, IntegerType::get(Ctx, HalfSize)); +Value *RS = Builder.CreateLShr(Result, ConstantInt::get(IntegerType::get(Ctx, Size), HalfSize)); +Value *Hi = Builder.CreateTrunc(RS, IntegerType::get(Ctx, HalfSize)); +Value *Vec = Builder.CreateInsertElement( +VectorType::get(IntegerType::get(Ctx, HalfSize), cast(I->getType())->getElementCount()), +Lo, ConstantInt::get(IntegerType::get(Ctx, 32), 0)); +if (cast(I->getType())->getElementCount().isVector()) + Vec = Builder.CreateInsertElement(Vec, Hi, ConstantInt::get(IntegerType::get(Ctx, 32), 1)); +V = Builder.CreateBitOrPointerCast(Vec, I->getType()); + } + else +V = Builder.CreateBitOrPointerCast(Result, I->getType()); +} else { V = Builder.CreateAlignedLoad(I->getType(), AllocaResult, AllocaAlignment); Builder.CreateLifetimeEnd(AllocaResult, SizeVal64); diff --git a/llvm/test/CodeGen/ARM/atomic-load-store.ll b/llvm/test/CodeGen/ARM/atomic-load-store.ll index 560dfde356c29d..e4ca564255f684 100644 --- a/llvm/test/CodeGen/ARM/atomic-load-store.ll +++ b/llvm/test/CodeGen/ARM/atomic-load-store.ll @@ -983,3 +983,57 @@ define void @store_atomic_f64__seq_cst(ptr %ptr, double %val1) { store atomic double %val1, ptr %ptr seq_cst, align 8 ret void } + +define <1 x ptr> @atomic_vec1_ptr(ptr %x) #0 { +; ARM-LABEL: atomic_vec1_ptr: +; ARM: @ %bb.0: +; ARM-NEXT:ldr r0, [r0] +; ARM-NEXT:dmb ish +; ARM-NEXT:bx lr +; +; ARMOPTNONE-LABEL: atomic_vec1_ptr: +; ARMOPTNONE: @ %bb.0: +; ARMOPTNONE-NEXT:ldr r0, [r0] +; ARMOPTNONE-NEXT:dmb ish +; ARMOPTNONE-NEXT:bx lr +; +; THUMBTWO-LABEL: atomic_vec1_ptr: +; THUMBTWO: @ %bb.0: +; THUMBTWO-NEXT:ldr r0, [r0] +; THUMBTWO-NEXT:dmb ish +; THUMBTWO-NEXT:bx lr +; +; THUMBONE-LABEL: atomic_vec1_ptr: +; THUMBONE: @ %bb.0: +; THUMBONE-NEXT:push {r7, lr} +; THUMBONE-NEXT:movs r1, #0 +; THUMBONE-NEXT:mov r2, r1 +; THUMBONE-NEXT:bl __sync_val_compare_and_swap_4 +; THUMBONE-NEXT:pop {r7, pc} +; +; ARMV4-LABEL: atomic_vec1_ptr: +; ARMV4: @ %bb.0: +; ARMV4-NEXT:push {r11, lr} +; ARMV4-NEXT:mov r1, #2 +; ARMV4-NEXT:bl __atomic_load_4 +; ARMV4-NEXT:mov r1, #255 +; ARMV4-NEXT:orr r1, r1, #65280 +; ARMV4-NEXT:and r0, r0, r1 +; ARMV4-NEXT:pop {r11, lr} +; ARMV4-NEXT:mov pc, lr +; +; ARMV6-LABEL: atomic_vec1_ptr: +; ARMV6: @ %bb.0: +; ARMV6-NEXT:ldr r0, [r0] +; ARMV6-NEXT:mov r1, #0 +; ARMV6-NEXT:mcr p15, #0, r1, c7, c10, #5 +; ARMV6-NEXT:bx lr +; +; THUMBM-LABEL: atomic_vec1_ptr: +; THUMBM: @ %bb.0: +; THUMBM-NEXT:ldr r0, [r0] +; THUMBM-NEXT:dmb sy +; THUMBM-NEXT:bx lr + %ret = load atomic <1 x ptr>, ptr %x acquire, align 4 + ret <1 x ptr> %ret +} diff --git a/llvm/test/CodeGen/X86/atomic-load-store.ll b/llvm/test/CodeGen/X86/atomic-load-store.ll index 34e541c029e1b2..2e103b261fb9d7 100644 --- a/llvm/test/CodeGen/X86/atomic-load-store.ll +++ b/llvm/test/CodeGen/X86/atomic-load-store.ll @@ -354,6 +354,21 @@ define <2 x i32> @atomic_vec2_i32(ptr %x) nounwind { ret <2 x i32> %ret } +define <2 x ptr> @atomic_vec2_ptr_align(ptr %x) nounwind { +; CHECK-LABEL: atomic_vec2_ptr_align: +; CHECK: ## %bb.0: +; CHECK-NEXT:pushq %rax +; CHECK-NEXT:movl $2, %esi +; CHECK-NEXT:
[llvm-branch-commits] [llvm] [SelectionDAG][X86] Widen <2 x T> vector types for atomic load (PR #120598)
https://github.com/jofrn edited https://github.com/llvm/llvm-project/pull/120598 ___ llvm-branch-commits mailing list llvm-branch-commits@lists.llvm.org https://lists.llvm.org/cgi-bin/mailman/listinfo/llvm-branch-commits
[llvm-branch-commits] [llvm] [X86] Manage atomic load of fp -> int promotion in DAG (PR #120386)
https://github.com/jofrn updated https://github.com/llvm/llvm-project/pull/120386 >From 4911c029665a09c33553dd2a90c473f5e789a58b Mon Sep 17 00:00:00 2001 From: jofrn Date: Wed, 18 Dec 2024 03:38:23 -0500 Subject: [PATCH] [X86] Manage atomic load of fp -> int promotion in DAG When lowering atomic <1 x T> vector types with floats, selection can fail since this pattern is unsupported. To support this, floats can be casted to an integer type of the same size. commit-id:f9d761c5 --- llvm/lib/Target/X86/X86ISelLowering.cpp| 4 +++ llvm/test/CodeGen/X86/atomic-load-store.ll | 37 ++ 2 files changed, 41 insertions(+) diff --git a/llvm/lib/Target/X86/X86ISelLowering.cpp b/llvm/lib/Target/X86/X86ISelLowering.cpp index a0514e93d6598b..fda93a2eb38745 100644 --- a/llvm/lib/Target/X86/X86ISelLowering.cpp +++ b/llvm/lib/Target/X86/X86ISelLowering.cpp @@ -2608,6 +2608,10 @@ X86TargetLowering::X86TargetLowering(const X86TargetMachine &TM, setOperationAction(Op, MVT::f32, Promote); } + setOperationPromotedToType(ISD::ATOMIC_LOAD, MVT::f16, MVT::i16); + setOperationPromotedToType(ISD::ATOMIC_LOAD, MVT::f32, MVT::i32); + setOperationPromotedToType(ISD::ATOMIC_LOAD, MVT::f64, MVT::i64); + // We have target-specific dag combine patterns for the following nodes: setTargetDAGCombine({ISD::VECTOR_SHUFFLE, ISD::SCALAR_TO_VECTOR, diff --git a/llvm/test/CodeGen/X86/atomic-load-store.ll b/llvm/test/CodeGen/X86/atomic-load-store.ll index d23cfb89f9fc87..6efcbb80c0ce6d 100644 --- a/llvm/test/CodeGen/X86/atomic-load-store.ll +++ b/llvm/test/CodeGen/X86/atomic-load-store.ll @@ -145,3 +145,40 @@ define <1 x i64> @atomic_vec1_i64_align(ptr %x) nounwind { %ret = load atomic <1 x i64>, ptr %x acquire, align 8 ret <1 x i64> %ret } + +define <1 x half> @atomic_vec1_half(ptr %x) { +; CHECK3-LABEL: atomic_vec1_half: +; CHECK3: ## %bb.0: +; CHECK3-NEXT:movzwl (%rdi), %eax +; CHECK3-NEXT:pinsrw $0, %eax, %xmm0 +; CHECK3-NEXT:retq +; +; CHECK0-LABEL: atomic_vec1_half: +; CHECK0: ## %bb.0: +; CHECK0-NEXT:movw (%rdi), %cx +; CHECK0-NEXT:## implicit-def: $eax +; CHECK0-NEXT:movw %cx, %ax +; CHECK0-NEXT:## implicit-def: $xmm0 +; CHECK0-NEXT:pinsrw $0, %eax, %xmm0 +; CHECK0-NEXT:retq + %ret = load atomic <1 x half>, ptr %x acquire, align 2 + ret <1 x half> %ret +} + +define <1 x float> @atomic_vec1_float(ptr %x) { +; CHECK-LABEL: atomic_vec1_float: +; CHECK: ## %bb.0: +; CHECK-NEXT:movss {{.*#+}} xmm0 = mem[0],zero,zero,zero +; CHECK-NEXT:retq + %ret = load atomic <1 x float>, ptr %x acquire, align 4 + ret <1 x float> %ret +} + +define <1 x double> @atomic_vec1_double_align(ptr %x) nounwind { +; CHECK-LABEL: atomic_vec1_double_align: +; CHECK: ## %bb.0: +; CHECK-NEXT:movsd {{.*#+}} xmm0 = mem[0],zero +; CHECK-NEXT:retq + %ret = load atomic <1 x double>, ptr %x acquire, align 8 + ret <1 x double> %ret +} ___ llvm-branch-commits mailing list llvm-branch-commits@lists.llvm.org https://lists.llvm.org/cgi-bin/mailman/listinfo/llvm-branch-commits
[llvm-branch-commits] [llvm] [AtomicExpand] Add bitcasts when expanding load atomic vector (PR #120716)
https://github.com/jofrn updated https://github.com/llvm/llvm-project/pull/120716 >From 61ec1efbacdfd94bddd82b36da5acd92318486ff Mon Sep 17 00:00:00 2001 From: jofrn Date: Fri, 20 Dec 2024 06:14:28 -0500 Subject: [PATCH] [AtomicExpand] Add bitcasts when expanding load atomic vector AtomicExpand fails for aligned `load atomic ` because it does not find a compatible library call. This change adds appropriate bitcasts so that the call can be lowered. commit-id:f430c1af --- llvm/lib/CodeGen/AtomicExpandPass.cpp | 22 +++-- llvm/test/CodeGen/ARM/atomic-load-store.ll | 54 ++ llvm/test/CodeGen/X86/atomic-load-store.ll | 15 ++ 3 files changed, 88 insertions(+), 3 deletions(-) diff --git a/llvm/lib/CodeGen/AtomicExpandPass.cpp b/llvm/lib/CodeGen/AtomicExpandPass.cpp index a75fa688d87a8d..aa97fc8131e83b 100644 --- a/llvm/lib/CodeGen/AtomicExpandPass.cpp +++ b/llvm/lib/CodeGen/AtomicExpandPass.cpp @@ -2060,9 +2060,25 @@ bool AtomicExpandImpl::expandAtomicOpToLibcall( I->replaceAllUsesWith(V); } else if (HasResult) { Value *V; -if (UseSizedLibcall) - V = Builder.CreateBitOrPointerCast(Result, I->getType()); -else { +if (UseSizedLibcall) { + if (I->getType()->getScalarType()->isIntOrPtrTy() && I->getType()->isVectorTy() && + Result->getType()->isVectorTy() != I->getType()->isVectorTy()) { +TypeSize Size = Result->getType()->getPrimitiveSizeInBits(); +assert((unsigned)Size % 2 == 0); +unsigned HalfSize = (unsigned)Size / 2; +Value *Lo = Builder.CreateTrunc(Result, IntegerType::get(Ctx, HalfSize)); +Value *RS = Builder.CreateLShr(Result, ConstantInt::get(IntegerType::get(Ctx, Size), HalfSize)); +Value *Hi = Builder.CreateTrunc(RS, IntegerType::get(Ctx, HalfSize)); +Value *Vec = Builder.CreateInsertElement( +VectorType::get(IntegerType::get(Ctx, HalfSize), cast(I->getType())->getElementCount()), +Lo, ConstantInt::get(IntegerType::get(Ctx, 32), 0)); +if (cast(I->getType())->getElementCount().isVector()) + Vec = Builder.CreateInsertElement(Vec, Hi, ConstantInt::get(IntegerType::get(Ctx, 32), 1)); +V = Builder.CreateBitOrPointerCast(Vec, I->getType()); + } + else +V = Builder.CreateBitOrPointerCast(Result, I->getType()); +} else { V = Builder.CreateAlignedLoad(I->getType(), AllocaResult, AllocaAlignment); Builder.CreateLifetimeEnd(AllocaResult, SizeVal64); diff --git a/llvm/test/CodeGen/ARM/atomic-load-store.ll b/llvm/test/CodeGen/ARM/atomic-load-store.ll index 560dfde356c29d..e4ca564255f684 100644 --- a/llvm/test/CodeGen/ARM/atomic-load-store.ll +++ b/llvm/test/CodeGen/ARM/atomic-load-store.ll @@ -983,3 +983,57 @@ define void @store_atomic_f64__seq_cst(ptr %ptr, double %val1) { store atomic double %val1, ptr %ptr seq_cst, align 8 ret void } + +define <1 x ptr> @atomic_vec1_ptr(ptr %x) #0 { +; ARM-LABEL: atomic_vec1_ptr: +; ARM: @ %bb.0: +; ARM-NEXT:ldr r0, [r0] +; ARM-NEXT:dmb ish +; ARM-NEXT:bx lr +; +; ARMOPTNONE-LABEL: atomic_vec1_ptr: +; ARMOPTNONE: @ %bb.0: +; ARMOPTNONE-NEXT:ldr r0, [r0] +; ARMOPTNONE-NEXT:dmb ish +; ARMOPTNONE-NEXT:bx lr +; +; THUMBTWO-LABEL: atomic_vec1_ptr: +; THUMBTWO: @ %bb.0: +; THUMBTWO-NEXT:ldr r0, [r0] +; THUMBTWO-NEXT:dmb ish +; THUMBTWO-NEXT:bx lr +; +; THUMBONE-LABEL: atomic_vec1_ptr: +; THUMBONE: @ %bb.0: +; THUMBONE-NEXT:push {r7, lr} +; THUMBONE-NEXT:movs r1, #0 +; THUMBONE-NEXT:mov r2, r1 +; THUMBONE-NEXT:bl __sync_val_compare_and_swap_4 +; THUMBONE-NEXT:pop {r7, pc} +; +; ARMV4-LABEL: atomic_vec1_ptr: +; ARMV4: @ %bb.0: +; ARMV4-NEXT:push {r11, lr} +; ARMV4-NEXT:mov r1, #2 +; ARMV4-NEXT:bl __atomic_load_4 +; ARMV4-NEXT:mov r1, #255 +; ARMV4-NEXT:orr r1, r1, #65280 +; ARMV4-NEXT:and r0, r0, r1 +; ARMV4-NEXT:pop {r11, lr} +; ARMV4-NEXT:mov pc, lr +; +; ARMV6-LABEL: atomic_vec1_ptr: +; ARMV6: @ %bb.0: +; ARMV6-NEXT:ldr r0, [r0] +; ARMV6-NEXT:mov r1, #0 +; ARMV6-NEXT:mcr p15, #0, r1, c7, c10, #5 +; ARMV6-NEXT:bx lr +; +; THUMBM-LABEL: atomic_vec1_ptr: +; THUMBM: @ %bb.0: +; THUMBM-NEXT:ldr r0, [r0] +; THUMBM-NEXT:dmb sy +; THUMBM-NEXT:bx lr + %ret = load atomic <1 x ptr>, ptr %x acquire, align 4 + ret <1 x ptr> %ret +} diff --git a/llvm/test/CodeGen/X86/atomic-load-store.ll b/llvm/test/CodeGen/X86/atomic-load-store.ll index 34e541c029e1b2..2e103b261fb9d7 100644 --- a/llvm/test/CodeGen/X86/atomic-load-store.ll +++ b/llvm/test/CodeGen/X86/atomic-load-store.ll @@ -354,6 +354,21 @@ define <2 x i32> @atomic_vec2_i32(ptr %x) nounwind { ret <2 x i32> %ret } +define <2 x ptr> @atomic_vec2_ptr_align(ptr %x) nounwind { +; CHECK-LABEL: atomic_vec2_ptr_align: +; CHECK: ## %bb.0: +; CHECK-NEXT:pushq %rax +; CHECK-NEXT:movl $2, %esi +; CHECK-NEXT:
[llvm-branch-commits] [llvm] [AtomicExpand] Add bitcasts when expanding load atomic vector (PR #120716)
https://github.com/jofrn updated https://github.com/llvm/llvm-project/pull/120716 >From 61ec1efbacdfd94bddd82b36da5acd92318486ff Mon Sep 17 00:00:00 2001 From: jofrn Date: Fri, 20 Dec 2024 06:14:28 -0500 Subject: [PATCH] [AtomicExpand] Add bitcasts when expanding load atomic vector AtomicExpand fails for aligned `load atomic ` because it does not find a compatible library call. This change adds appropriate bitcasts so that the call can be lowered. commit-id:f430c1af --- llvm/lib/CodeGen/AtomicExpandPass.cpp | 22 +++-- llvm/test/CodeGen/ARM/atomic-load-store.ll | 54 ++ llvm/test/CodeGen/X86/atomic-load-store.ll | 15 ++ 3 files changed, 88 insertions(+), 3 deletions(-) diff --git a/llvm/lib/CodeGen/AtomicExpandPass.cpp b/llvm/lib/CodeGen/AtomicExpandPass.cpp index a75fa688d87a8d..aa97fc8131e83b 100644 --- a/llvm/lib/CodeGen/AtomicExpandPass.cpp +++ b/llvm/lib/CodeGen/AtomicExpandPass.cpp @@ -2060,9 +2060,25 @@ bool AtomicExpandImpl::expandAtomicOpToLibcall( I->replaceAllUsesWith(V); } else if (HasResult) { Value *V; -if (UseSizedLibcall) - V = Builder.CreateBitOrPointerCast(Result, I->getType()); -else { +if (UseSizedLibcall) { + if (I->getType()->getScalarType()->isIntOrPtrTy() && I->getType()->isVectorTy() && + Result->getType()->isVectorTy() != I->getType()->isVectorTy()) { +TypeSize Size = Result->getType()->getPrimitiveSizeInBits(); +assert((unsigned)Size % 2 == 0); +unsigned HalfSize = (unsigned)Size / 2; +Value *Lo = Builder.CreateTrunc(Result, IntegerType::get(Ctx, HalfSize)); +Value *RS = Builder.CreateLShr(Result, ConstantInt::get(IntegerType::get(Ctx, Size), HalfSize)); +Value *Hi = Builder.CreateTrunc(RS, IntegerType::get(Ctx, HalfSize)); +Value *Vec = Builder.CreateInsertElement( +VectorType::get(IntegerType::get(Ctx, HalfSize), cast(I->getType())->getElementCount()), +Lo, ConstantInt::get(IntegerType::get(Ctx, 32), 0)); +if (cast(I->getType())->getElementCount().isVector()) + Vec = Builder.CreateInsertElement(Vec, Hi, ConstantInt::get(IntegerType::get(Ctx, 32), 1)); +V = Builder.CreateBitOrPointerCast(Vec, I->getType()); + } + else +V = Builder.CreateBitOrPointerCast(Result, I->getType()); +} else { V = Builder.CreateAlignedLoad(I->getType(), AllocaResult, AllocaAlignment); Builder.CreateLifetimeEnd(AllocaResult, SizeVal64); diff --git a/llvm/test/CodeGen/ARM/atomic-load-store.ll b/llvm/test/CodeGen/ARM/atomic-load-store.ll index 560dfde356c29d..e4ca564255f684 100644 --- a/llvm/test/CodeGen/ARM/atomic-load-store.ll +++ b/llvm/test/CodeGen/ARM/atomic-load-store.ll @@ -983,3 +983,57 @@ define void @store_atomic_f64__seq_cst(ptr %ptr, double %val1) { store atomic double %val1, ptr %ptr seq_cst, align 8 ret void } + +define <1 x ptr> @atomic_vec1_ptr(ptr %x) #0 { +; ARM-LABEL: atomic_vec1_ptr: +; ARM: @ %bb.0: +; ARM-NEXT:ldr r0, [r0] +; ARM-NEXT:dmb ish +; ARM-NEXT:bx lr +; +; ARMOPTNONE-LABEL: atomic_vec1_ptr: +; ARMOPTNONE: @ %bb.0: +; ARMOPTNONE-NEXT:ldr r0, [r0] +; ARMOPTNONE-NEXT:dmb ish +; ARMOPTNONE-NEXT:bx lr +; +; THUMBTWO-LABEL: atomic_vec1_ptr: +; THUMBTWO: @ %bb.0: +; THUMBTWO-NEXT:ldr r0, [r0] +; THUMBTWO-NEXT:dmb ish +; THUMBTWO-NEXT:bx lr +; +; THUMBONE-LABEL: atomic_vec1_ptr: +; THUMBONE: @ %bb.0: +; THUMBONE-NEXT:push {r7, lr} +; THUMBONE-NEXT:movs r1, #0 +; THUMBONE-NEXT:mov r2, r1 +; THUMBONE-NEXT:bl __sync_val_compare_and_swap_4 +; THUMBONE-NEXT:pop {r7, pc} +; +; ARMV4-LABEL: atomic_vec1_ptr: +; ARMV4: @ %bb.0: +; ARMV4-NEXT:push {r11, lr} +; ARMV4-NEXT:mov r1, #2 +; ARMV4-NEXT:bl __atomic_load_4 +; ARMV4-NEXT:mov r1, #255 +; ARMV4-NEXT:orr r1, r1, #65280 +; ARMV4-NEXT:and r0, r0, r1 +; ARMV4-NEXT:pop {r11, lr} +; ARMV4-NEXT:mov pc, lr +; +; ARMV6-LABEL: atomic_vec1_ptr: +; ARMV6: @ %bb.0: +; ARMV6-NEXT:ldr r0, [r0] +; ARMV6-NEXT:mov r1, #0 +; ARMV6-NEXT:mcr p15, #0, r1, c7, c10, #5 +; ARMV6-NEXT:bx lr +; +; THUMBM-LABEL: atomic_vec1_ptr: +; THUMBM: @ %bb.0: +; THUMBM-NEXT:ldr r0, [r0] +; THUMBM-NEXT:dmb sy +; THUMBM-NEXT:bx lr + %ret = load atomic <1 x ptr>, ptr %x acquire, align 4 + ret <1 x ptr> %ret +} diff --git a/llvm/test/CodeGen/X86/atomic-load-store.ll b/llvm/test/CodeGen/X86/atomic-load-store.ll index 34e541c029e1b2..2e103b261fb9d7 100644 --- a/llvm/test/CodeGen/X86/atomic-load-store.ll +++ b/llvm/test/CodeGen/X86/atomic-load-store.ll @@ -354,6 +354,21 @@ define <2 x i32> @atomic_vec2_i32(ptr %x) nounwind { ret <2 x i32> %ret } +define <2 x ptr> @atomic_vec2_ptr_align(ptr %x) nounwind { +; CHECK-LABEL: atomic_vec2_ptr_align: +; CHECK: ## %bb.0: +; CHECK-NEXT:pushq %rax +; CHECK-NEXT:movl $2, %esi +; CHECK-NEXT:
[llvm-branch-commits] [llvm] [X86] Add atomic vector tests for unaligned >1 sizes. (PR #120387)
https://github.com/jofrn updated https://github.com/llvm/llvm-project/pull/120387 >From 64e813bf4f6e8546abe99a6864cc1f360288fbab Mon Sep 17 00:00:00 2001 From: jofrn Date: Wed, 18 Dec 2024 03:40:32 -0500 Subject: [PATCH] [X86] Add atomic vector tests for unaligned >1 sizes. Unaligned atomic vectors with size >1 are lowered to calls. Adding their tests separately here. commit-id:a06a5cc6 --- llvm/test/CodeGen/X86/atomic-load-store.ll | 253 + 1 file changed, 253 insertions(+) diff --git a/llvm/test/CodeGen/X86/atomic-load-store.ll b/llvm/test/CodeGen/X86/atomic-load-store.ll index 6efcbb80c0ce6d..39e9fdfa5e62b0 100644 --- a/llvm/test/CodeGen/X86/atomic-load-store.ll +++ b/llvm/test/CodeGen/X86/atomic-load-store.ll @@ -146,6 +146,34 @@ define <1 x i64> @atomic_vec1_i64_align(ptr %x) nounwind { ret <1 x i64> %ret } +define <1 x ptr> @atomic_vec1_ptr(ptr %x) nounwind { +; CHECK3-LABEL: atomic_vec1_ptr: +; CHECK3: ## %bb.0: +; CHECK3-NEXT:pushq %rax +; CHECK3-NEXT:movq %rdi, %rsi +; CHECK3-NEXT:movq %rsp, %rdx +; CHECK3-NEXT:movl $8, %edi +; CHECK3-NEXT:movl $2, %ecx +; CHECK3-NEXT:callq ___atomic_load +; CHECK3-NEXT:movq (%rsp), %rax +; CHECK3-NEXT:popq %rcx +; CHECK3-NEXT:retq +; +; CHECK0-LABEL: atomic_vec1_ptr: +; CHECK0: ## %bb.0: +; CHECK0-NEXT:pushq %rax +; CHECK0-NEXT:movq %rdi, %rsi +; CHECK0-NEXT:movl $8, %edi +; CHECK0-NEXT:movq %rsp, %rdx +; CHECK0-NEXT:movl $2, %ecx +; CHECK0-NEXT:callq ___atomic_load +; CHECK0-NEXT:movq (%rsp), %rax +; CHECK0-NEXT:popq %rcx +; CHECK0-NEXT:retq + %ret = load atomic <1 x ptr>, ptr %x acquire, align 4 + ret <1 x ptr> %ret +} + define <1 x half> @atomic_vec1_half(ptr %x) { ; CHECK3-LABEL: atomic_vec1_half: ; CHECK3: ## %bb.0: @@ -182,3 +210,228 @@ define <1 x double> @atomic_vec1_double_align(ptr %x) nounwind { %ret = load atomic <1 x double>, ptr %x acquire, align 8 ret <1 x double> %ret } + +define <1 x i64> @atomic_vec1_i64(ptr %x) nounwind { +; CHECK3-LABEL: atomic_vec1_i64: +; CHECK3: ## %bb.0: +; CHECK3-NEXT:pushq %rax +; CHECK3-NEXT:movq %rdi, %rsi +; CHECK3-NEXT:movq %rsp, %rdx +; CHECK3-NEXT:movl $8, %edi +; CHECK3-NEXT:movl $2, %ecx +; CHECK3-NEXT:callq ___atomic_load +; CHECK3-NEXT:movq (%rsp), %rax +; CHECK3-NEXT:popq %rcx +; CHECK3-NEXT:retq +; +; CHECK0-LABEL: atomic_vec1_i64: +; CHECK0: ## %bb.0: +; CHECK0-NEXT:pushq %rax +; CHECK0-NEXT:movq %rdi, %rsi +; CHECK0-NEXT:movl $8, %edi +; CHECK0-NEXT:movq %rsp, %rdx +; CHECK0-NEXT:movl $2, %ecx +; CHECK0-NEXT:callq ___atomic_load +; CHECK0-NEXT:movq (%rsp), %rax +; CHECK0-NEXT:popq %rcx +; CHECK0-NEXT:retq + %ret = load atomic <1 x i64>, ptr %x acquire, align 4 + ret <1 x i64> %ret +} + +define <1 x double> @atomic_vec1_double(ptr %x) nounwind { +; CHECK3-LABEL: atomic_vec1_double: +; CHECK3: ## %bb.0: +; CHECK3-NEXT:pushq %rax +; CHECK3-NEXT:movq %rdi, %rsi +; CHECK3-NEXT:movq %rsp, %rdx +; CHECK3-NEXT:movl $8, %edi +; CHECK3-NEXT:movl $2, %ecx +; CHECK3-NEXT:callq ___atomic_load +; CHECK3-NEXT:movsd {{.*#+}} xmm0 = mem[0],zero +; CHECK3-NEXT:popq %rax +; CHECK3-NEXT:retq +; +; CHECK0-LABEL: atomic_vec1_double: +; CHECK0: ## %bb.0: +; CHECK0-NEXT:pushq %rax +; CHECK0-NEXT:movq %rdi, %rsi +; CHECK0-NEXT:movl $8, %edi +; CHECK0-NEXT:movq %rsp, %rdx +; CHECK0-NEXT:movl $2, %ecx +; CHECK0-NEXT:callq ___atomic_load +; CHECK0-NEXT:movsd {{.*#+}} xmm0 = mem[0],zero +; CHECK0-NEXT:popq %rax +; CHECK0-NEXT:retq + %ret = load atomic <1 x double>, ptr %x acquire, align 4 + ret <1 x double> %ret +} + +define <2 x i32> @atomic_vec2_i32(ptr %x) nounwind { +; CHECK3-LABEL: atomic_vec2_i32: +; CHECK3: ## %bb.0: +; CHECK3-NEXT:pushq %rax +; CHECK3-NEXT:movq %rdi, %rsi +; CHECK3-NEXT:movq %rsp, %rdx +; CHECK3-NEXT:movl $8, %edi +; CHECK3-NEXT:movl $2, %ecx +; CHECK3-NEXT:callq ___atomic_load +; CHECK3-NEXT:movsd {{.*#+}} xmm0 = mem[0],zero +; CHECK3-NEXT:popq %rax +; CHECK3-NEXT:retq +; +; CHECK0-LABEL: atomic_vec2_i32: +; CHECK0: ## %bb.0: +; CHECK0-NEXT:pushq %rax +; CHECK0-NEXT:movq %rdi, %rsi +; CHECK0-NEXT:movl $8, %edi +; CHECK0-NEXT:movq %rsp, %rdx +; CHECK0-NEXT:movl $2, %ecx +; CHECK0-NEXT:callq ___atomic_load +; CHECK0-NEXT:movq {{.*#+}} xmm0 = mem[0],zero +; CHECK0-NEXT:popq %rax +; CHECK0-NEXT:retq + %ret = load atomic <2 x i32>, ptr %x acquire, align 4 + ret <2 x i32> %ret +} + +define <4 x float> @atomic_vec4_float_align(ptr %x) nounwind { +; CHECK-LABEL: atomic_vec4_float_align: +; CHECK: ## %bb.0: +; CHECK-NEXT:pushq %rax +; CHECK-NEXT:movl $2, %esi +; CHECK-NEXT:callq ___atomic_load_16 +; CHECK-NEXT:movq %rdx, %xmm1 +; CHECK-NEXT:movq %rax, %xmm0 +; CHECK-NEXT:punpcklqdq {{.*#+}} xmm0 = xmm
[llvm-branch-commits] [llvm] [X86] load atomic vector tests for combine (PR #120640)
https://github.com/jofrn updated https://github.com/llvm/llvm-project/pull/120640 >From 13ea377347ae290aa2093f783e56e2721156a399 Mon Sep 17 00:00:00 2001 From: jofrn Date: Thu, 19 Dec 2024 16:25:55 -0500 Subject: [PATCH] [X86] load atomic vector tests for combine Vector types of 2 elements that don't require widening are lowered via the prior commit's combine method, which is also needed to avoid a vector move. This change adds the tests that depend strictly on combineAtomicLoad so that SelectionDAG can translate vectors with type bfloat,half. commit-id:3a045357 --- llvm/test/CodeGen/X86/atomic-load-store.ll | 18 ++ 1 file changed, 18 insertions(+) diff --git a/llvm/test/CodeGen/X86/atomic-load-store.ll b/llvm/test/CodeGen/X86/atomic-load-store.ll index 7b2e5e60eca20e..34e541c029e1b2 100644 --- a/llvm/test/CodeGen/X86/atomic-load-store.ll +++ b/llvm/test/CodeGen/X86/atomic-load-store.ll @@ -187,6 +187,24 @@ define <2 x float> @atomic_vec2_float_align(ptr %x) { ret <2 x float> %ret } +define <2 x half> @atomic_vec2_half(ptr %x) { +; CHECK-LABEL: atomic_vec2_half: +; CHECK: ## %bb.0: +; CHECK-NEXT:movl (%rdi), %eax +; CHECK-NEXT:retq + %ret = load atomic <2 x half>, ptr %x acquire, align 4 + ret <2 x half> %ret +} + +define <2 x bfloat> @atomic_vec2_bfloat(ptr %x) { +; CHECK-LABEL: atomic_vec2_bfloat: +; CHECK: ## %bb.0: +; CHECK-NEXT:movl (%rdi), %eax +; CHECK-NEXT:retq + %ret = load atomic <2 x bfloat>, ptr %x acquire, align 4 + ret <2 x bfloat> %ret +} + define <1 x ptr> @atomic_vec1_ptr(ptr %x) nounwind { ; CHECK3-LABEL: atomic_vec1_ptr: ; CHECK3: ## %bb.0: ___ llvm-branch-commits mailing list llvm-branch-commits@lists.llvm.org https://lists.llvm.org/cgi-bin/mailman/listinfo/llvm-branch-commits
[llvm-branch-commits] [llvm] [SelectionDAG][X86] Widen <2 x T> vector types for atomic load (PR #120598)
https://github.com/jofrn updated https://github.com/llvm/llvm-project/pull/120598 >From 2c51f72fe2a0a35e3fd01ff2ff989d64a57b2731 Mon Sep 17 00:00:00 2001 From: jofrn Date: Thu, 19 Dec 2024 11:19:39 -0500 Subject: [PATCH] [SelectionDAG][X86] Widen <2 x T> vector types for atomic load Vector types of 2 elements must be widened. This change does this for vector types of atomic load in SelectionDAG so that it can translate aligned vectors of >1 size. Also, it combines the v2 type into its equivalent scalar type so as to avoid a move to vector. commit-id:2894ccd1 --- llvm/lib/CodeGen/SelectionDAG/LegalizeTypes.h | 6 +- .../SelectionDAG/LegalizeVectorTypes.cpp | 39 - llvm/lib/Target/X86/X86ISelLowering.cpp | 79 ++- llvm/test/CodeGen/X86/atomic-load-store.ll| 41 ++ 4 files changed, 159 insertions(+), 6 deletions(-) diff --git a/llvm/lib/CodeGen/SelectionDAG/LegalizeTypes.h b/llvm/lib/CodeGen/SelectionDAG/LegalizeTypes.h index b81c9f87cb27d7..3b3dddc44e3682 100644 --- a/llvm/lib/CodeGen/SelectionDAG/LegalizeTypes.h +++ b/llvm/lib/CodeGen/SelectionDAG/LegalizeTypes.h @@ -1046,6 +1046,7 @@ class LLVM_LIBRARY_VISIBILITY DAGTypeLegalizer { SDValue WidenVecRes_EXTRACT_SUBVECTOR(SDNode* N); SDValue WidenVecRes_INSERT_SUBVECTOR(SDNode *N); SDValue WidenVecRes_INSERT_VECTOR_ELT(SDNode* N); + SDValue WidenVecRes_ATOMIC_LOAD(AtomicSDNode *N); SDValue WidenVecRes_LOAD(SDNode* N); SDValue WidenVecRes_VP_LOAD(VPLoadSDNode *N); SDValue WidenVecRes_VP_STRIDED_LOAD(VPStridedLoadSDNode *N); @@ -1129,8 +1130,9 @@ class LLVM_LIBRARY_VISIBILITY DAGTypeLegalizer { /// resulting wider type. It takes: /// LdChain: list of chains for the load to be generated. /// Ld: load to widen - SDValue GenWidenVectorLoads(SmallVectorImpl &LdChain, - LoadSDNode *LD); + template + SDValue GenWidenVectorLoads(SmallVectorImpl &LdChain, T *LD, + bool IsAtomic = false); /// Helper function to generate a set of extension loads to load a vector with /// a resulting wider type. It takes: diff --git a/llvm/lib/CodeGen/SelectionDAG/LegalizeVectorTypes.cpp b/llvm/lib/CodeGen/SelectionDAG/LegalizeVectorTypes.cpp index c85e4ba2cfa5a7..7c4caa96244b8b 100644 --- a/llvm/lib/CodeGen/SelectionDAG/LegalizeVectorTypes.cpp +++ b/llvm/lib/CodeGen/SelectionDAG/LegalizeVectorTypes.cpp @@ -4515,6 +4515,9 @@ void DAGTypeLegalizer::WidenVectorResult(SDNode *N, unsigned ResNo) { break; case ISD::EXTRACT_SUBVECTOR: Res = WidenVecRes_EXTRACT_SUBVECTOR(N); break; case ISD::INSERT_VECTOR_ELT: Res = WidenVecRes_INSERT_VECTOR_ELT(N); break; + case ISD::ATOMIC_LOAD: +Res = WidenVecRes_ATOMIC_LOAD(cast(N)); +break; case ISD::LOAD: Res = WidenVecRes_LOAD(N); break; case ISD::STEP_VECTOR: case ISD::SPLAT_VECTOR: @@ -5901,6 +5904,30 @@ SDValue DAGTypeLegalizer::WidenVecRes_INSERT_VECTOR_ELT(SDNode *N) { N->getOperand(1), N->getOperand(2)); } +SDValue DAGTypeLegalizer::WidenVecRes_ATOMIC_LOAD(AtomicSDNode *N) { + SmallVector LdChain; // Chain for the series of load + SDValue Result = GenWidenVectorLoads(LdChain, N, true /*IsAtomic*/); + + if (Result) { +// If we generate a single load, we can use that for the chain. Otherwise, +// build a factor node to remember the multiple loads are independent and +// chain to that. +SDValue NewChain; +if (LdChain.size() == 1) + NewChain = LdChain[0]; +else + NewChain = DAG.getNode(ISD::TokenFactor, SDLoc(N), MVT::Other, LdChain); + +// Modified the chain - switch anything that used the old chain to use +// the new one. +ReplaceValueWith(SDValue(N, 1), NewChain); + +return Result; + } + + report_fatal_error("Unable to widen atomic vector load"); +} + SDValue DAGTypeLegalizer::WidenVecRes_LOAD(SDNode *N) { LoadSDNode *LD = cast(N); ISD::LoadExtType ExtType = LD->getExtensionType(); @@ -7699,8 +7726,9 @@ static SDValue BuildVectorFromScalar(SelectionDAG& DAG, EVT VecTy, return DAG.getNode(ISD::BITCAST, dl, VecTy, VecOp); } +template SDValue DAGTypeLegalizer::GenWidenVectorLoads(SmallVectorImpl &LdChain, - LoadSDNode *LD) { + T *LD, bool IsAtomic) { // The strategy assumes that we can efficiently load power-of-two widths. // The routine chops the vector into the largest vector loads with the same // element type or scalar loads and then recombines it to the widen vector @@ -7757,8 +7785,13 @@ SDValue DAGTypeLegalizer::GenWidenVectorLoads(SmallVectorImpl &LdChain, } while (TypeSize::isKnownGT(RemainingWidth, NewVTWidth)); } - SDValue LdOp = DAG.getLoad(*FirstVT, dl, Chain, BasePtr, LD->getPointerInfo(), - LD->getOriginalAlign(), MMOFlags, AAInfo); + SDValue LdOp; + if (IsAtomic) +LdOp = DAG.getAtomic(ISD::A
[llvm-branch-commits] [llvm] [SelectionDAG] Legalize <1 x T> vector types for atomic load (PR #120385)
https://github.com/jofrn updated https://github.com/llvm/llvm-project/pull/120385 >From d75063f62509c33053f6271c4ab34667ad97c232 Mon Sep 17 00:00:00 2001 From: jofrn Date: Wed, 18 Dec 2024 03:37:17 -0500 Subject: [PATCH] [SelectionDAG] Legalize <1 x T> vector types for atomic load `load atomic <1 x T>` is not valid. This change legalizes vector types of atomic load via scalarization in SelectionDAG so that it can, for example, translate from `v1i32` to `i32`. commit-id:5c36cc8c --- llvm/lib/CodeGen/SelectionDAG/LegalizeTypes.h | 1 + .../SelectionDAG/LegalizeVectorTypes.cpp | 15 +++ llvm/test/CodeGen/X86/atomic-load-store.ll| 121 +- 3 files changed, 135 insertions(+), 2 deletions(-) diff --git a/llvm/lib/CodeGen/SelectionDAG/LegalizeTypes.h b/llvm/lib/CodeGen/SelectionDAG/LegalizeTypes.h index 571a710cc92a34..b81c9f87cb27d7 100644 --- a/llvm/lib/CodeGen/SelectionDAG/LegalizeTypes.h +++ b/llvm/lib/CodeGen/SelectionDAG/LegalizeTypes.h @@ -861,6 +861,7 @@ class LLVM_LIBRARY_VISIBILITY DAGTypeLegalizer { SDValue ScalarizeVecRes_ExpOp(SDNode *N); SDValue ScalarizeVecRes_INSERT_VECTOR_ELT(SDNode *N); SDValue ScalarizeVecRes_LOAD(LoadSDNode *N); + SDValue ScalarizeVecRes_ATOMIC_LOAD(AtomicSDNode *N); SDValue ScalarizeVecRes_SCALAR_TO_VECTOR(SDNode *N); SDValue ScalarizeVecRes_VSELECT(SDNode *N); SDValue ScalarizeVecRes_SELECT(SDNode *N); diff --git a/llvm/lib/CodeGen/SelectionDAG/LegalizeVectorTypes.cpp b/llvm/lib/CodeGen/SelectionDAG/LegalizeVectorTypes.cpp index 107454a92e356c..c85e4ba2cfa5a7 100644 --- a/llvm/lib/CodeGen/SelectionDAG/LegalizeVectorTypes.cpp +++ b/llvm/lib/CodeGen/SelectionDAG/LegalizeVectorTypes.cpp @@ -60,6 +60,9 @@ void DAGTypeLegalizer::ScalarizeVectorResult(SDNode *N, unsigned ResNo) { case ISD::FP_ROUND: R = ScalarizeVecRes_FP_ROUND(N); break; case ISD::FPOWI: R = ScalarizeVecRes_ExpOp(N); break; case ISD::INSERT_VECTOR_ELT: R = ScalarizeVecRes_INSERT_VECTOR_ELT(N); break; + case ISD::ATOMIC_LOAD: +R = ScalarizeVecRes_ATOMIC_LOAD(cast(N)); +break; case ISD::LOAD: R = ScalarizeVecRes_LOAD(cast(N));break; case ISD::SCALAR_TO_VECTOR: R = ScalarizeVecRes_SCALAR_TO_VECTOR(N); break; case ISD::SIGN_EXTEND_INREG: R = ScalarizeVecRes_InregOp(N); break; @@ -451,6 +454,18 @@ SDValue DAGTypeLegalizer::ScalarizeVecRes_INSERT_VECTOR_ELT(SDNode *N) { return Op; } +SDValue DAGTypeLegalizer::ScalarizeVecRes_ATOMIC_LOAD(AtomicSDNode *N) { + SDValue Result = DAG.getAtomic( + ISD::ATOMIC_LOAD, SDLoc(N), N->getMemoryVT().getVectorElementType(), + N->getValueType(0).getVectorElementType(), N->getChain(), N->getBasePtr(), + N->getMemOperand()); + + // Legalize the chain result - switch anything that used the old chain to + // use the new one. + ReplaceValueWith(SDValue(N, 1), Result.getValue(1)); + return Result; +} + SDValue DAGTypeLegalizer::ScalarizeVecRes_LOAD(LoadSDNode *N) { assert(N->isUnindexed() && "Indexed vector load?"); diff --git a/llvm/test/CodeGen/X86/atomic-load-store.ll b/llvm/test/CodeGen/X86/atomic-load-store.ll index 5bce4401f7bdb0..d23cfb89f9fc87 100644 --- a/llvm/test/CodeGen/X86/atomic-load-store.ll +++ b/llvm/test/CodeGen/X86/atomic-load-store.ll @@ -1,6 +1,6 @@ ; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py -; RUN: llc < %s -mtriple=x86_64-apple-macosx10.7.0 -verify-machineinstrs | FileCheck %s -; RUN: llc < %s -mtriple=x86_64-apple-macosx10.7.0 -verify-machineinstrs -O0 | FileCheck %s +; RUN: llc < %s -mtriple=x86_64-apple-macosx10.7.0 -verify-machineinstrs | FileCheck %s --check-prefixes=CHECK,CHECK3 +; RUN: llc < %s -mtriple=x86_64-apple-macosx10.7.0 -verify-machineinstrs -O0 | FileCheck %s --check-prefixes=CHECK,CHECK0 define void @test1(ptr %ptr, i32 %val1) { ; CHECK-LABEL: test1: @@ -28,3 +28,120 @@ define i32 @test3(ptr %ptr) { %val = load atomic i32, ptr %ptr seq_cst, align 4 ret i32 %val } + +define <1 x i32> @atomic_vec1_i32(ptr %x) { +; CHECK-LABEL: atomic_vec1_i32: +; CHECK: ## %bb.0: +; CHECK-NEXT:movl (%rdi), %eax +; CHECK-NEXT:retq + %ret = load atomic <1 x i32>, ptr %x acquire, align 4 + ret <1 x i32> %ret +} + +define <1 x i8> @atomic_vec1_i8(ptr %x) { +; CHECK3-LABEL: atomic_vec1_i8: +; CHECK3: ## %bb.0: +; CHECK3-NEXT:movzbl (%rdi), %eax +; CHECK3-NEXT:retq +; +; CHECK0-LABEL: atomic_vec1_i8: +; CHECK0: ## %bb.0: +; CHECK0-NEXT:movb (%rdi), %al +; CHECK0-NEXT:retq + %ret = load atomic <1 x i8>, ptr %x acquire, align 1 + ret <1 x i8> %ret +} + +define <1 x i16> @atomic_vec1_i16(ptr %x) { +; CHECK3-LABEL: atomic_vec1_i16: +; CHECK3: ## %bb.0: +; CHECK3-NEXT:movzwl (%rdi), %eax +; CHECK3-NEXT:retq +; +; CHECK0-LABEL: atomic_vec1_i16: +; CHECK0: ## %bb.0: +; CHECK0-NEXT:movw (%rdi), %ax +; CHECK0-NEXT:retq + %ret = load atomic <1 x i16>, ptr %x acquire, align 2 + ret <1 x i16> %ret +} + +define <1
[llvm-branch-commits] [llvm] [SelectionDAG][X86] Widen <2 x T> vector types for atomic load (PR #120598)
https://github.com/jofrn updated https://github.com/llvm/llvm-project/pull/120598 >From 2c51f72fe2a0a35e3fd01ff2ff989d64a57b2731 Mon Sep 17 00:00:00 2001 From: jofrn Date: Thu, 19 Dec 2024 11:19:39 -0500 Subject: [PATCH] [SelectionDAG][X86] Widen <2 x T> vector types for atomic load Vector types of 2 elements must be widened. This change does this for vector types of atomic load in SelectionDAG so that it can translate aligned vectors of >1 size. Also, it combines the v2 type into its equivalent scalar type so as to avoid a move to vector. commit-id:2894ccd1 --- llvm/lib/CodeGen/SelectionDAG/LegalizeTypes.h | 6 +- .../SelectionDAG/LegalizeVectorTypes.cpp | 39 - llvm/lib/Target/X86/X86ISelLowering.cpp | 79 ++- llvm/test/CodeGen/X86/atomic-load-store.ll| 41 ++ 4 files changed, 159 insertions(+), 6 deletions(-) diff --git a/llvm/lib/CodeGen/SelectionDAG/LegalizeTypes.h b/llvm/lib/CodeGen/SelectionDAG/LegalizeTypes.h index b81c9f87cb27d7..3b3dddc44e3682 100644 --- a/llvm/lib/CodeGen/SelectionDAG/LegalizeTypes.h +++ b/llvm/lib/CodeGen/SelectionDAG/LegalizeTypes.h @@ -1046,6 +1046,7 @@ class LLVM_LIBRARY_VISIBILITY DAGTypeLegalizer { SDValue WidenVecRes_EXTRACT_SUBVECTOR(SDNode* N); SDValue WidenVecRes_INSERT_SUBVECTOR(SDNode *N); SDValue WidenVecRes_INSERT_VECTOR_ELT(SDNode* N); + SDValue WidenVecRes_ATOMIC_LOAD(AtomicSDNode *N); SDValue WidenVecRes_LOAD(SDNode* N); SDValue WidenVecRes_VP_LOAD(VPLoadSDNode *N); SDValue WidenVecRes_VP_STRIDED_LOAD(VPStridedLoadSDNode *N); @@ -1129,8 +1130,9 @@ class LLVM_LIBRARY_VISIBILITY DAGTypeLegalizer { /// resulting wider type. It takes: /// LdChain: list of chains for the load to be generated. /// Ld: load to widen - SDValue GenWidenVectorLoads(SmallVectorImpl &LdChain, - LoadSDNode *LD); + template + SDValue GenWidenVectorLoads(SmallVectorImpl &LdChain, T *LD, + bool IsAtomic = false); /// Helper function to generate a set of extension loads to load a vector with /// a resulting wider type. It takes: diff --git a/llvm/lib/CodeGen/SelectionDAG/LegalizeVectorTypes.cpp b/llvm/lib/CodeGen/SelectionDAG/LegalizeVectorTypes.cpp index c85e4ba2cfa5a7..7c4caa96244b8b 100644 --- a/llvm/lib/CodeGen/SelectionDAG/LegalizeVectorTypes.cpp +++ b/llvm/lib/CodeGen/SelectionDAG/LegalizeVectorTypes.cpp @@ -4515,6 +4515,9 @@ void DAGTypeLegalizer::WidenVectorResult(SDNode *N, unsigned ResNo) { break; case ISD::EXTRACT_SUBVECTOR: Res = WidenVecRes_EXTRACT_SUBVECTOR(N); break; case ISD::INSERT_VECTOR_ELT: Res = WidenVecRes_INSERT_VECTOR_ELT(N); break; + case ISD::ATOMIC_LOAD: +Res = WidenVecRes_ATOMIC_LOAD(cast(N)); +break; case ISD::LOAD: Res = WidenVecRes_LOAD(N); break; case ISD::STEP_VECTOR: case ISD::SPLAT_VECTOR: @@ -5901,6 +5904,30 @@ SDValue DAGTypeLegalizer::WidenVecRes_INSERT_VECTOR_ELT(SDNode *N) { N->getOperand(1), N->getOperand(2)); } +SDValue DAGTypeLegalizer::WidenVecRes_ATOMIC_LOAD(AtomicSDNode *N) { + SmallVector LdChain; // Chain for the series of load + SDValue Result = GenWidenVectorLoads(LdChain, N, true /*IsAtomic*/); + + if (Result) { +// If we generate a single load, we can use that for the chain. Otherwise, +// build a factor node to remember the multiple loads are independent and +// chain to that. +SDValue NewChain; +if (LdChain.size() == 1) + NewChain = LdChain[0]; +else + NewChain = DAG.getNode(ISD::TokenFactor, SDLoc(N), MVT::Other, LdChain); + +// Modified the chain - switch anything that used the old chain to use +// the new one. +ReplaceValueWith(SDValue(N, 1), NewChain); + +return Result; + } + + report_fatal_error("Unable to widen atomic vector load"); +} + SDValue DAGTypeLegalizer::WidenVecRes_LOAD(SDNode *N) { LoadSDNode *LD = cast(N); ISD::LoadExtType ExtType = LD->getExtensionType(); @@ -7699,8 +7726,9 @@ static SDValue BuildVectorFromScalar(SelectionDAG& DAG, EVT VecTy, return DAG.getNode(ISD::BITCAST, dl, VecTy, VecOp); } +template SDValue DAGTypeLegalizer::GenWidenVectorLoads(SmallVectorImpl &LdChain, - LoadSDNode *LD) { + T *LD, bool IsAtomic) { // The strategy assumes that we can efficiently load power-of-two widths. // The routine chops the vector into the largest vector loads with the same // element type or scalar loads and then recombines it to the widen vector @@ -7757,8 +7785,13 @@ SDValue DAGTypeLegalizer::GenWidenVectorLoads(SmallVectorImpl &LdChain, } while (TypeSize::isKnownGT(RemainingWidth, NewVTWidth)); } - SDValue LdOp = DAG.getLoad(*FirstVT, dl, Chain, BasePtr, LD->getPointerInfo(), - LD->getOriginalAlign(), MMOFlags, AAInfo); + SDValue LdOp; + if (IsAtomic) +LdOp = DAG.getAtomic(ISD::A
[llvm-branch-commits] [llvm] [X86] Add atomic vector tests for unaligned >1 sizes. (PR #120387)
https://github.com/jofrn updated https://github.com/llvm/llvm-project/pull/120387 >From 64e813bf4f6e8546abe99a6864cc1f360288fbab Mon Sep 17 00:00:00 2001 From: jofrn Date: Wed, 18 Dec 2024 03:40:32 -0500 Subject: [PATCH] [X86] Add atomic vector tests for unaligned >1 sizes. Unaligned atomic vectors with size >1 are lowered to calls. Adding their tests separately here. commit-id:a06a5cc6 --- llvm/test/CodeGen/X86/atomic-load-store.ll | 253 + 1 file changed, 253 insertions(+) diff --git a/llvm/test/CodeGen/X86/atomic-load-store.ll b/llvm/test/CodeGen/X86/atomic-load-store.ll index 6efcbb80c0ce6d..39e9fdfa5e62b0 100644 --- a/llvm/test/CodeGen/X86/atomic-load-store.ll +++ b/llvm/test/CodeGen/X86/atomic-load-store.ll @@ -146,6 +146,34 @@ define <1 x i64> @atomic_vec1_i64_align(ptr %x) nounwind { ret <1 x i64> %ret } +define <1 x ptr> @atomic_vec1_ptr(ptr %x) nounwind { +; CHECK3-LABEL: atomic_vec1_ptr: +; CHECK3: ## %bb.0: +; CHECK3-NEXT:pushq %rax +; CHECK3-NEXT:movq %rdi, %rsi +; CHECK3-NEXT:movq %rsp, %rdx +; CHECK3-NEXT:movl $8, %edi +; CHECK3-NEXT:movl $2, %ecx +; CHECK3-NEXT:callq ___atomic_load +; CHECK3-NEXT:movq (%rsp), %rax +; CHECK3-NEXT:popq %rcx +; CHECK3-NEXT:retq +; +; CHECK0-LABEL: atomic_vec1_ptr: +; CHECK0: ## %bb.0: +; CHECK0-NEXT:pushq %rax +; CHECK0-NEXT:movq %rdi, %rsi +; CHECK0-NEXT:movl $8, %edi +; CHECK0-NEXT:movq %rsp, %rdx +; CHECK0-NEXT:movl $2, %ecx +; CHECK0-NEXT:callq ___atomic_load +; CHECK0-NEXT:movq (%rsp), %rax +; CHECK0-NEXT:popq %rcx +; CHECK0-NEXT:retq + %ret = load atomic <1 x ptr>, ptr %x acquire, align 4 + ret <1 x ptr> %ret +} + define <1 x half> @atomic_vec1_half(ptr %x) { ; CHECK3-LABEL: atomic_vec1_half: ; CHECK3: ## %bb.0: @@ -182,3 +210,228 @@ define <1 x double> @atomic_vec1_double_align(ptr %x) nounwind { %ret = load atomic <1 x double>, ptr %x acquire, align 8 ret <1 x double> %ret } + +define <1 x i64> @atomic_vec1_i64(ptr %x) nounwind { +; CHECK3-LABEL: atomic_vec1_i64: +; CHECK3: ## %bb.0: +; CHECK3-NEXT:pushq %rax +; CHECK3-NEXT:movq %rdi, %rsi +; CHECK3-NEXT:movq %rsp, %rdx +; CHECK3-NEXT:movl $8, %edi +; CHECK3-NEXT:movl $2, %ecx +; CHECK3-NEXT:callq ___atomic_load +; CHECK3-NEXT:movq (%rsp), %rax +; CHECK3-NEXT:popq %rcx +; CHECK3-NEXT:retq +; +; CHECK0-LABEL: atomic_vec1_i64: +; CHECK0: ## %bb.0: +; CHECK0-NEXT:pushq %rax +; CHECK0-NEXT:movq %rdi, %rsi +; CHECK0-NEXT:movl $8, %edi +; CHECK0-NEXT:movq %rsp, %rdx +; CHECK0-NEXT:movl $2, %ecx +; CHECK0-NEXT:callq ___atomic_load +; CHECK0-NEXT:movq (%rsp), %rax +; CHECK0-NEXT:popq %rcx +; CHECK0-NEXT:retq + %ret = load atomic <1 x i64>, ptr %x acquire, align 4 + ret <1 x i64> %ret +} + +define <1 x double> @atomic_vec1_double(ptr %x) nounwind { +; CHECK3-LABEL: atomic_vec1_double: +; CHECK3: ## %bb.0: +; CHECK3-NEXT:pushq %rax +; CHECK3-NEXT:movq %rdi, %rsi +; CHECK3-NEXT:movq %rsp, %rdx +; CHECK3-NEXT:movl $8, %edi +; CHECK3-NEXT:movl $2, %ecx +; CHECK3-NEXT:callq ___atomic_load +; CHECK3-NEXT:movsd {{.*#+}} xmm0 = mem[0],zero +; CHECK3-NEXT:popq %rax +; CHECK3-NEXT:retq +; +; CHECK0-LABEL: atomic_vec1_double: +; CHECK0: ## %bb.0: +; CHECK0-NEXT:pushq %rax +; CHECK0-NEXT:movq %rdi, %rsi +; CHECK0-NEXT:movl $8, %edi +; CHECK0-NEXT:movq %rsp, %rdx +; CHECK0-NEXT:movl $2, %ecx +; CHECK0-NEXT:callq ___atomic_load +; CHECK0-NEXT:movsd {{.*#+}} xmm0 = mem[0],zero +; CHECK0-NEXT:popq %rax +; CHECK0-NEXT:retq + %ret = load atomic <1 x double>, ptr %x acquire, align 4 + ret <1 x double> %ret +} + +define <2 x i32> @atomic_vec2_i32(ptr %x) nounwind { +; CHECK3-LABEL: atomic_vec2_i32: +; CHECK3: ## %bb.0: +; CHECK3-NEXT:pushq %rax +; CHECK3-NEXT:movq %rdi, %rsi +; CHECK3-NEXT:movq %rsp, %rdx +; CHECK3-NEXT:movl $8, %edi +; CHECK3-NEXT:movl $2, %ecx +; CHECK3-NEXT:callq ___atomic_load +; CHECK3-NEXT:movsd {{.*#+}} xmm0 = mem[0],zero +; CHECK3-NEXT:popq %rax +; CHECK3-NEXT:retq +; +; CHECK0-LABEL: atomic_vec2_i32: +; CHECK0: ## %bb.0: +; CHECK0-NEXT:pushq %rax +; CHECK0-NEXT:movq %rdi, %rsi +; CHECK0-NEXT:movl $8, %edi +; CHECK0-NEXT:movq %rsp, %rdx +; CHECK0-NEXT:movl $2, %ecx +; CHECK0-NEXT:callq ___atomic_load +; CHECK0-NEXT:movq {{.*#+}} xmm0 = mem[0],zero +; CHECK0-NEXT:popq %rax +; CHECK0-NEXT:retq + %ret = load atomic <2 x i32>, ptr %x acquire, align 4 + ret <2 x i32> %ret +} + +define <4 x float> @atomic_vec4_float_align(ptr %x) nounwind { +; CHECK-LABEL: atomic_vec4_float_align: +; CHECK: ## %bb.0: +; CHECK-NEXT:pushq %rax +; CHECK-NEXT:movl $2, %esi +; CHECK-NEXT:callq ___atomic_load_16 +; CHECK-NEXT:movq %rdx, %xmm1 +; CHECK-NEXT:movq %rax, %xmm0 +; CHECK-NEXT:punpcklqdq {{.*#+}} xmm0 = xmm
[llvm-branch-commits] [llvm] [X86] Manage atomic load of fp -> int promotion in DAG (PR #120386)
https://github.com/jofrn updated https://github.com/llvm/llvm-project/pull/120386 >From 4911c029665a09c33553dd2a90c473f5e789a58b Mon Sep 17 00:00:00 2001 From: jofrn Date: Wed, 18 Dec 2024 03:38:23 -0500 Subject: [PATCH] [X86] Manage atomic load of fp -> int promotion in DAG When lowering atomic <1 x T> vector types with floats, selection can fail since this pattern is unsupported. To support this, floats can be casted to an integer type of the same size. commit-id:f9d761c5 --- llvm/lib/Target/X86/X86ISelLowering.cpp| 4 +++ llvm/test/CodeGen/X86/atomic-load-store.ll | 37 ++ 2 files changed, 41 insertions(+) diff --git a/llvm/lib/Target/X86/X86ISelLowering.cpp b/llvm/lib/Target/X86/X86ISelLowering.cpp index a0514e93d6598b..fda93a2eb38745 100644 --- a/llvm/lib/Target/X86/X86ISelLowering.cpp +++ b/llvm/lib/Target/X86/X86ISelLowering.cpp @@ -2608,6 +2608,10 @@ X86TargetLowering::X86TargetLowering(const X86TargetMachine &TM, setOperationAction(Op, MVT::f32, Promote); } + setOperationPromotedToType(ISD::ATOMIC_LOAD, MVT::f16, MVT::i16); + setOperationPromotedToType(ISD::ATOMIC_LOAD, MVT::f32, MVT::i32); + setOperationPromotedToType(ISD::ATOMIC_LOAD, MVT::f64, MVT::i64); + // We have target-specific dag combine patterns for the following nodes: setTargetDAGCombine({ISD::VECTOR_SHUFFLE, ISD::SCALAR_TO_VECTOR, diff --git a/llvm/test/CodeGen/X86/atomic-load-store.ll b/llvm/test/CodeGen/X86/atomic-load-store.ll index d23cfb89f9fc87..6efcbb80c0ce6d 100644 --- a/llvm/test/CodeGen/X86/atomic-load-store.ll +++ b/llvm/test/CodeGen/X86/atomic-load-store.ll @@ -145,3 +145,40 @@ define <1 x i64> @atomic_vec1_i64_align(ptr %x) nounwind { %ret = load atomic <1 x i64>, ptr %x acquire, align 8 ret <1 x i64> %ret } + +define <1 x half> @atomic_vec1_half(ptr %x) { +; CHECK3-LABEL: atomic_vec1_half: +; CHECK3: ## %bb.0: +; CHECK3-NEXT:movzwl (%rdi), %eax +; CHECK3-NEXT:pinsrw $0, %eax, %xmm0 +; CHECK3-NEXT:retq +; +; CHECK0-LABEL: atomic_vec1_half: +; CHECK0: ## %bb.0: +; CHECK0-NEXT:movw (%rdi), %cx +; CHECK0-NEXT:## implicit-def: $eax +; CHECK0-NEXT:movw %cx, %ax +; CHECK0-NEXT:## implicit-def: $xmm0 +; CHECK0-NEXT:pinsrw $0, %eax, %xmm0 +; CHECK0-NEXT:retq + %ret = load atomic <1 x half>, ptr %x acquire, align 2 + ret <1 x half> %ret +} + +define <1 x float> @atomic_vec1_float(ptr %x) { +; CHECK-LABEL: atomic_vec1_float: +; CHECK: ## %bb.0: +; CHECK-NEXT:movss {{.*#+}} xmm0 = mem[0],zero,zero,zero +; CHECK-NEXT:retq + %ret = load atomic <1 x float>, ptr %x acquire, align 4 + ret <1 x float> %ret +} + +define <1 x double> @atomic_vec1_double_align(ptr %x) nounwind { +; CHECK-LABEL: atomic_vec1_double_align: +; CHECK: ## %bb.0: +; CHECK-NEXT:movsd {{.*#+}} xmm0 = mem[0],zero +; CHECK-NEXT:retq + %ret = load atomic <1 x double>, ptr %x acquire, align 8 + ret <1 x double> %ret +} ___ llvm-branch-commits mailing list llvm-branch-commits@lists.llvm.org https://lists.llvm.org/cgi-bin/mailman/listinfo/llvm-branch-commits
[llvm-branch-commits] [llvm] [X86] load atomic vector tests for combine (PR #120640)
https://github.com/jofrn edited https://github.com/llvm/llvm-project/pull/120640 ___ llvm-branch-commits mailing list llvm-branch-commits@lists.llvm.org https://lists.llvm.org/cgi-bin/mailman/listinfo/llvm-branch-commits