https://github.com/jofrn updated https://github.com/llvm/llvm-project/pull/120598
>From 36161dfc06ed32668b4b2812fd5943f703cfddb6 Mon Sep 17 00:00:00 2001 From: jofrn <jofer...@amd.com> Date: Thu, 19 Dec 2024 11:19:39 -0500 Subject: [PATCH] [SelectionDAG][X86] Widen <2 x T> vector types for atomic load Vector types of 2 elements must be widened. This change does this for vector types of atomic load in SelectionDAG so that it can translate aligned vectors of >1 size. Also, it also adds Pats to remove an extra MOV. commit-id:2894ccd1 --- llvm/lib/CodeGen/SelectionDAG/LegalizeTypes.h | 6 ++- .../SelectionDAG/LegalizeVectorTypes.cpp | 39 +++++++++++++-- llvm/lib/Target/X86/X86InstrCompiler.td | 7 +++ llvm/test/CodeGen/X86/atomic-load-store.ll | 49 +++++++++++++++++++ llvm/test/CodeGen/X86/atomic-unordered.ll | 3 +- 5 files changed, 97 insertions(+), 7 deletions(-) diff --git a/llvm/lib/CodeGen/SelectionDAG/LegalizeTypes.h b/llvm/lib/CodeGen/SelectionDAG/LegalizeTypes.h index b81c9f87cb27d7..3b3dddc44e3682 100644 --- a/llvm/lib/CodeGen/SelectionDAG/LegalizeTypes.h +++ b/llvm/lib/CodeGen/SelectionDAG/LegalizeTypes.h @@ -1046,6 +1046,7 @@ class LLVM_LIBRARY_VISIBILITY DAGTypeLegalizer { SDValue WidenVecRes_EXTRACT_SUBVECTOR(SDNode* N); SDValue WidenVecRes_INSERT_SUBVECTOR(SDNode *N); SDValue WidenVecRes_INSERT_VECTOR_ELT(SDNode* N); + SDValue WidenVecRes_ATOMIC_LOAD(AtomicSDNode *N); SDValue WidenVecRes_LOAD(SDNode* N); SDValue WidenVecRes_VP_LOAD(VPLoadSDNode *N); SDValue WidenVecRes_VP_STRIDED_LOAD(VPStridedLoadSDNode *N); @@ -1129,8 +1130,9 @@ class LLVM_LIBRARY_VISIBILITY DAGTypeLegalizer { /// resulting wider type. It takes: /// LdChain: list of chains for the load to be generated. /// Ld: load to widen - SDValue GenWidenVectorLoads(SmallVectorImpl<SDValue> &LdChain, - LoadSDNode *LD); + template <typename T> + SDValue GenWidenVectorLoads(SmallVectorImpl<SDValue> &LdChain, T *LD, + bool IsAtomic = false); /// Helper function to generate a set of extension loads to load a vector with /// a resulting wider type. It takes: diff --git a/llvm/lib/CodeGen/SelectionDAG/LegalizeVectorTypes.cpp b/llvm/lib/CodeGen/SelectionDAG/LegalizeVectorTypes.cpp index 0c21ede7b0dbb4..bc0a3a4589b941 100644 --- a/llvm/lib/CodeGen/SelectionDAG/LegalizeVectorTypes.cpp +++ b/llvm/lib/CodeGen/SelectionDAG/LegalizeVectorTypes.cpp @@ -4517,6 +4517,9 @@ void DAGTypeLegalizer::WidenVectorResult(SDNode *N, unsigned ResNo) { break; case ISD::EXTRACT_SUBVECTOR: Res = WidenVecRes_EXTRACT_SUBVECTOR(N); break; case ISD::INSERT_VECTOR_ELT: Res = WidenVecRes_INSERT_VECTOR_ELT(N); break; + case ISD::ATOMIC_LOAD: + Res = WidenVecRes_ATOMIC_LOAD(cast<AtomicSDNode>(N)); + break; case ISD::LOAD: Res = WidenVecRes_LOAD(N); break; case ISD::STEP_VECTOR: case ISD::SPLAT_VECTOR: @@ -5903,6 +5906,30 @@ SDValue DAGTypeLegalizer::WidenVecRes_INSERT_VECTOR_ELT(SDNode *N) { N->getOperand(1), N->getOperand(2)); } +SDValue DAGTypeLegalizer::WidenVecRes_ATOMIC_LOAD(AtomicSDNode *N) { + SmallVector<SDValue, 16> LdChain; // Chain for the series of load + SDValue Result = GenWidenVectorLoads(LdChain, N, true /*IsAtomic*/); + + if (Result) { + // If we generate a single load, we can use that for the chain. Otherwise, + // build a factor node to remember the multiple loads are independent and + // chain to that. + SDValue NewChain; + if (LdChain.size() == 1) + NewChain = LdChain[0]; + else + NewChain = DAG.getNode(ISD::TokenFactor, SDLoc(N), MVT::Other, LdChain); + + // Modified the chain - switch anything that used the old chain to use + // the new one. + ReplaceValueWith(SDValue(N, 1), NewChain); + + return Result; + } + + report_fatal_error("Unable to widen atomic vector load"); +} + SDValue DAGTypeLegalizer::WidenVecRes_LOAD(SDNode *N) { LoadSDNode *LD = cast<LoadSDNode>(N); ISD::LoadExtType ExtType = LD->getExtensionType(); @@ -7701,8 +7728,9 @@ static SDValue BuildVectorFromScalar(SelectionDAG& DAG, EVT VecTy, return DAG.getNode(ISD::BITCAST, dl, VecTy, VecOp); } +template <typename T> SDValue DAGTypeLegalizer::GenWidenVectorLoads(SmallVectorImpl<SDValue> &LdChain, - LoadSDNode *LD) { + T *LD, bool IsAtomic) { // The strategy assumes that we can efficiently load power-of-two widths. // The routine chops the vector into the largest vector loads with the same // element type or scalar loads and then recombines it to the widen vector @@ -7759,8 +7787,13 @@ SDValue DAGTypeLegalizer::GenWidenVectorLoads(SmallVectorImpl<SDValue> &LdChain, } while (TypeSize::isKnownGT(RemainingWidth, NewVTWidth)); } - SDValue LdOp = DAG.getLoad(*FirstVT, dl, Chain, BasePtr, LD->getPointerInfo(), - LD->getOriginalAlign(), MMOFlags, AAInfo); + SDValue LdOp; + if (IsAtomic) + LdOp = DAG.getAtomic(ISD::ATOMIC_LOAD, dl, *FirstVT, *FirstVT, Chain, + BasePtr, LD->getMemOperand()); + else + LdOp = DAG.getLoad(*FirstVT, dl, Chain, BasePtr, LD->getPointerInfo(), + LD->getOriginalAlign(), MMOFlags, AAInfo); LdChain.push_back(LdOp.getValue(1)); // Check if we can load the element with one instruction. diff --git a/llvm/lib/Target/X86/X86InstrCompiler.td b/llvm/lib/Target/X86/X86InstrCompiler.td index 7d4c5c0e10e492..7d8845b91fd1ed 100644 --- a/llvm/lib/Target/X86/X86InstrCompiler.td +++ b/llvm/lib/Target/X86/X86InstrCompiler.td @@ -1198,6 +1198,13 @@ def : Pat<(i16 (atomic_load_16 addr:$src)), (MOV16rm addr:$src)>; def : Pat<(i32 (atomic_load_32 addr:$src)), (MOV32rm addr:$src)>; def : Pat<(i64 (atomic_load_64 addr:$src)), (MOV64rm addr:$src)>; +def : Pat<(v4i32 (scalar_to_vector (i32 (anyext (i16 (atomic_load_16 addr:$src)))))), + (MOVDI2PDIrm addr:$src)>; // load atomic <2 x i8> +def : Pat<(v4i32 (scalar_to_vector (i32 (atomic_load_32 addr:$src)))), + (MOVDI2PDIrm addr:$src)>; // load atomic <2 x i16> +def : Pat<(v2i64 (scalar_to_vector (i64 (atomic_load_64 addr:$src)))), + (MOV64toPQIrm addr:$src)>; // load atomic <2 x i32,float> + // Floating point loads/stores. def : Pat<(atomic_store_32 (i32 (bitconvert (f32 FR32:$src))), addr:$dst), (MOVSSmr addr:$dst, FR32:$src)>, Requires<[UseSSE1]>; diff --git a/llvm/test/CodeGen/X86/atomic-load-store.ll b/llvm/test/CodeGen/X86/atomic-load-store.ll index 39e9fdfa5e62b0..6c2a7e1d68c382 100644 --- a/llvm/test/CodeGen/X86/atomic-load-store.ll +++ b/llvm/test/CodeGen/X86/atomic-load-store.ll @@ -146,6 +146,55 @@ define <1 x i64> @atomic_vec1_i64_align(ptr %x) nounwind { ret <1 x i64> %ret } +define <2 x i8> @atomic_vec2_i8(ptr %x) { +; CHECK3-LABEL: atomic_vec2_i8: +; CHECK3: ## %bb.0: +; CHECK3-NEXT: movss {{.*#+}} xmm0 = mem[0],zero,zero,zero +; CHECK3-NEXT: retq +; +; CHECK0-LABEL: atomic_vec2_i8: +; CHECK0: ## %bb.0: +; CHECK0-NEXT: movw (%rdi), %cx +; CHECK0-NEXT: ## implicit-def: $eax +; CHECK0-NEXT: movw %cx, %ax +; CHECK0-NEXT: movd %eax, %xmm0 +; CHECK0-NEXT: retq + %ret = load atomic <2 x i8>, ptr %x acquire, align 4 + ret <2 x i8> %ret +} + +define <2 x i16> @atomic_vec2_i16(ptr %x) { +; CHECK3-LABEL: atomic_vec2_i16: +; CHECK3: ## %bb.0: +; CHECK3-NEXT: movss {{.*#+}} xmm0 = mem[0],zero,zero,zero +; CHECK3-NEXT: retq +; +; CHECK0-LABEL: atomic_vec2_i16: +; CHECK0: ## %bb.0: +; CHECK0-NEXT: movd {{.*#+}} xmm0 = mem[0],zero,zero,zero +; CHECK0-NEXT: retq + %ret = load atomic <2 x i16>, ptr %x acquire, align 4 + ret <2 x i16> %ret +} + +define <2 x i32> @atomic_vec2_i32_align(ptr %x) { +; CHECK-LABEL: atomic_vec2_i32_align: +; CHECK: ## %bb.0: +; CHECK-NEXT: movq (%rdi), %xmm0 +; CHECK-NEXT: retq + %ret = load atomic <2 x i32>, ptr %x acquire, align 8 + ret <2 x i32> %ret +} + +define <2 x float> @atomic_vec2_float_align(ptr %x) { +; CHECK-LABEL: atomic_vec2_float_align: +; CHECK: ## %bb.0: +; CHECK-NEXT: movq (%rdi), %xmm0 +; CHECK-NEXT: retq + %ret = load atomic <2 x float>, ptr %x acquire, align 8 + ret <2 x float> %ret +} + define <1 x ptr> @atomic_vec1_ptr(ptr %x) nounwind { ; CHECK3-LABEL: atomic_vec1_ptr: ; CHECK3: ## %bb.0: diff --git a/llvm/test/CodeGen/X86/atomic-unordered.ll b/llvm/test/CodeGen/X86/atomic-unordered.ll index 3fb994cdb751a3..e2803d206c6c57 100644 --- a/llvm/test/CodeGen/X86/atomic-unordered.ll +++ b/llvm/test/CodeGen/X86/atomic-unordered.ll @@ -2275,8 +2275,7 @@ define i64 @load_i16_anyext_i64(ptr %ptr) { ; ; CHECK-O3-LABEL: load_i16_anyext_i64: ; CHECK-O3: # %bb.0: -; CHECK-O3-NEXT: movzwl (%rdi), %eax -; CHECK-O3-NEXT: vmovd %eax, %xmm0 +; CHECK-O3-NEXT: movd {{.*#+}} xmm0 = mem[0],zero,zero,zero ; CHECK-O3-NEXT: vmovq %xmm0, %rax ; CHECK-O3-NEXT: retq %v = load atomic i16, ptr %ptr unordered, align 8 _______________________________________________ llvm-branch-commits mailing list llvm-branch-commits@lists.llvm.org https://lists.llvm.org/cgi-bin/mailman/listinfo/llvm-branch-commits