domada updated this revision to Diff 466766. domada edited the summary of this revision. domada added a comment.
1. Modified generation of align assumptions. OMPIRBuilder generates now only assumptions calls. The arguments of the assumption calls are generated by Clang. 2. Added integration test to prove that Clang and OMPIRBuilder support aligned clause 3. Simplification of unit tests -> applying review remarks CHANGES SINCE LAST ACTION https://reviews.llvm.org/D133578/new/ https://reviews.llvm.org/D133578 Files: clang/lib/CodeGen/CGStmtOpenMP.cpp clang/test/OpenMP/irbuilder_simd_aligned.cpp llvm/include/llvm/Frontend/OpenMP/OMPIRBuilder.h llvm/lib/Frontend/OpenMP/OMPIRBuilder.cpp llvm/unittests/Frontend/OpenMPIRBuilderTest.cpp mlir/lib/Target/LLVMIR/Dialect/OpenMP/OpenMPToLLVMIRTranslation.cpp
Index: mlir/lib/Target/LLVMIR/Dialect/OpenMP/OpenMPToLLVMIRTranslation.cpp =================================================================== --- mlir/lib/Target/LLVMIR/Dialect/OpenMP/OpenMPToLLVMIRTranslation.cpp +++ mlir/lib/Target/LLVMIR/Dialect/OpenMP/OpenMPToLLVMIRTranslation.cpp @@ -996,8 +996,9 @@ if (llvm::Optional<uint64_t> safelenVar = loop.getSafelen()) safelen = builder.getInt64(safelenVar.value()); + llvm::DenseMap<llvm::Value *, llvm::Value *> alignedVars; ompBuilder->applySimd( - loopInfo, + loopInfo, alignedVars, loop.getIfExpr() ? moduleTranslation.lookupValue(loop.getIfExpr()) : nullptr, llvm::omp::OrderKind::OMP_ORDER_unknown, simdlen, safelen); Index: llvm/unittests/Frontend/OpenMPIRBuilderTest.cpp =================================================================== --- llvm/unittests/Frontend/OpenMPIRBuilderTest.cpp +++ llvm/unittests/Frontend/OpenMPIRBuilderTest.cpp @@ -1767,11 +1767,12 @@ TEST_F(OpenMPIRBuilderTest, ApplySimd) { OpenMPIRBuilder OMPBuilder(*M); - + DenseMap<Value *, Value *> AlignedVars; CanonicalLoopInfo *CLI = buildSingleLoopFunction(DL, OMPBuilder, 32); // Simd-ize the loop. - OMPBuilder.applySimd(CLI, /* IfCond */ nullptr, OrderKind::OMP_ORDER_unknown, + OMPBuilder.applySimd(CLI, AlignedVars, /* IfCond */ nullptr, + OrderKind::OMP_ORDER_unknown, /* Simdlen */ nullptr, /* Safelen */ nullptr); @@ -1798,13 +1799,76 @@ })); } -TEST_F(OpenMPIRBuilderTest, ApplySimdlen) { +TEST_F(OpenMPIRBuilderTest, ApplySimdCustomAligned) { OpenMPIRBuilder OMPBuilder(*M); + IRBuilder<> Builder(BB); + const int AlignmentValue = 32; + AllocaInst *Alloc1 = + Builder.CreateAlloca(Builder.getInt8PtrTy(), Builder.getInt64(1)); + LoadInst *Load1 = Builder.CreateLoad(Alloc1->getAllocatedType(), Alloc1); + DenseMap<Value *, Value *> AlignedVars; + AlignedVars.insert({Load1, Builder.getInt64(AlignmentValue)}); + + CanonicalLoopInfo *CLI = buildSingleLoopFunction(DL, OMPBuilder, 32); + + // Simd-ize the loop. + OMPBuilder.applySimd(CLI, AlignedVars, /* IfCond */ nullptr, + OrderKind::OMP_ORDER_unknown, + /* Simdlen */ nullptr, + /* Safelen */ nullptr); + + OMPBuilder.finalize(); + EXPECT_FALSE(verifyModule(*M, &errs())); + + PassBuilder PB; + FunctionAnalysisManager FAM; + PB.registerFunctionAnalyses(FAM); + LoopInfo &LI = FAM.getResult<LoopAnalysis>(*F); + + const std::vector<Loop *> &TopLvl = LI.getTopLevelLoops(); + EXPECT_EQ(TopLvl.size(), 1u); + + Loop *L = TopLvl.front(); + EXPECT_TRUE(findStringMetadataForLoop(L, "llvm.loop.parallel_accesses")); + EXPECT_TRUE(getBooleanLoopAttribute(L, "llvm.loop.vectorize.enable")); + + // Check for llvm.access.group metadata attached to the printf + // function in the loop body. + BasicBlock *LoopBody = CLI->getBody(); + EXPECT_TRUE(any_of(*LoopBody, [](Instruction &I) { + return I.getMetadata("llvm.access.group") != nullptr; + })); + // Check if number of assumption instructions is equal to number of aligned + // variables + BasicBlock *LoopPreheader = CLI->getPreheader(); + size_t NumAssummptionCallsInPreheader = count_if( + *LoopPreheader, [](Instruction &I) { return isa<AssumeInst>(I); }); + EXPECT_EQ(NumAssummptionCallsInPreheader, AlignedVars.size()); + + // Check if variables are correctly aligned + for (Instruction &Instr : *LoopPreheader) { + if (!isa<AssumeInst>(Instr)) + continue; + AssumeInst *AssumeInstruction = cast<AssumeInst>(&Instr); + if (AssumeInstruction->getNumTotalBundleOperands()) { + auto Bundle = AssumeInstruction->getOperandBundleAt(0); + if (Bundle.getTagName() == "align") { + EXPECT_TRUE(isa<ConstantInt>(Bundle.Inputs[1])); + auto ConstIntVal = dyn_cast<ConstantInt>(Bundle.Inputs[1]); + EXPECT_EQ(ConstIntVal->getSExtValue(), AlignmentValue); + } + } + } +} +TEST_F(OpenMPIRBuilderTest, ApplySimdlen) { + OpenMPIRBuilder OMPBuilder(*M); + DenseMap<Value *, Value *> AlignedVars; CanonicalLoopInfo *CLI = buildSingleLoopFunction(DL, OMPBuilder, 32); // Simd-ize the loop. - OMPBuilder.applySimd(CLI, /* IfCond */ nullptr, OrderKind::OMP_ORDER_unknown, + OMPBuilder.applySimd(CLI, AlignedVars, + /* IfCond */ nullptr, OrderKind::OMP_ORDER_unknown, ConstantInt::get(Type::getInt32Ty(Ctx), 3), /* Safelen */ nullptr); @@ -1834,12 +1898,13 @@ TEST_F(OpenMPIRBuilderTest, ApplySafelenOrderConcurrent) { OpenMPIRBuilder OMPBuilder(*M); + DenseMap<Value *, Value *> AlignedVars; CanonicalLoopInfo *CLI = buildSingleLoopFunction(DL, OMPBuilder, 32); // Simd-ize the loop. OMPBuilder.applySimd( - CLI, /* IfCond */ nullptr, OrderKind::OMP_ORDER_concurrent, + CLI, AlignedVars, /* IfCond */ nullptr, OrderKind::OMP_ORDER_concurrent, /* Simdlen */ nullptr, ConstantInt::get(Type::getInt32Ty(Ctx), 3)); OMPBuilder.finalize(); @@ -1870,13 +1935,13 @@ TEST_F(OpenMPIRBuilderTest, ApplySafelen) { OpenMPIRBuilder OMPBuilder(*M); + DenseMap<Value *, Value *> AlignedVars; CanonicalLoopInfo *CLI = buildSingleLoopFunction(DL, OMPBuilder, 32); - // Simd-ize the loop. - OMPBuilder.applySimd(CLI, /* IfCond */ nullptr, OrderKind::OMP_ORDER_unknown, - /* Simdlen */ nullptr, - ConstantInt::get(Type::getInt32Ty(Ctx), 3)); + OMPBuilder.applySimd( + CLI, AlignedVars, /* IfCond */ nullptr, OrderKind::OMP_ORDER_unknown, + /* Simdlen */ nullptr, ConstantInt::get(Type::getInt32Ty(Ctx), 3)); OMPBuilder.finalize(); EXPECT_FALSE(verifyModule(*M, &errs())); @@ -1904,11 +1969,12 @@ TEST_F(OpenMPIRBuilderTest, ApplySimdlenSafelen) { OpenMPIRBuilder OMPBuilder(*M); + DenseMap<Value *, Value *> AlignedVars; CanonicalLoopInfo *CLI = buildSingleLoopFunction(DL, OMPBuilder, 32); - // Simd-ize the loop. - OMPBuilder.applySimd(CLI, /* IfCond */ nullptr, OrderKind::OMP_ORDER_unknown, + OMPBuilder.applySimd(CLI, AlignedVars, /* IfCond */ nullptr, + OrderKind::OMP_ORDER_unknown, ConstantInt::get(Type::getInt32Ty(Ctx), 2), ConstantInt::get(Type::getInt32Ty(Ctx), 3)); @@ -1939,6 +2005,7 @@ TEST_F(OpenMPIRBuilderTest, ApplySimdLoopIf) { OpenMPIRBuilder OMPBuilder(*M); IRBuilder<> Builder(BB); + DenseMap<Value *, Value *> AlignedVars; AllocaInst *Alloc1 = Builder.CreateAlloca(Builder.getInt32Ty()); AllocaInst *Alloc2 = Builder.CreateAlloca(Builder.getInt32Ty()); @@ -1953,7 +2020,7 @@ CanonicalLoopInfo *CLI = buildSingleLoopFunction(DL, OMPBuilder, 32); // Simd-ize the loop with if condition - OMPBuilder.applySimd(CLI, IfCmp, OrderKind::OMP_ORDER_unknown, + OMPBuilder.applySimd(CLI, AlignedVars, IfCmp, OrderKind::OMP_ORDER_unknown, ConstantInt::get(Type::getInt32Ty(Ctx), 3), /* Safelen */ nullptr); Index: llvm/lib/Frontend/OpenMP/OMPIRBuilder.cpp =================================================================== --- llvm/lib/Frontend/OpenMP/OMPIRBuilder.cpp +++ llvm/lib/Frontend/OpenMP/OMPIRBuilder.cpp @@ -3005,9 +3005,10 @@ Builder.CreateBr(NewBlocks.front()); } -void OpenMPIRBuilder::applySimd(CanonicalLoopInfo *CanonicalLoop, Value *IfCond, - OrderKind Order, ConstantInt *Simdlen, - ConstantInt *Safelen) { +void OpenMPIRBuilder::applySimd(CanonicalLoopInfo *CanonicalLoop, + DenseMap<Value *, Value *> AlignedVars, + Value *IfCond, OrderKind Order, + ConstantInt *Simdlen, ConstantInt *Safelen) { LLVMContext &Ctx = Builder.getContext(); Function *F = CanonicalLoop->getFunction(); @@ -3025,6 +3026,17 @@ LoopInfo &&LI = LIA.run(*F, FAM); Loop *L = LI.getLoopFor(CanonicalLoop->getHeader()); + if (AlignedVars.size()) { + InsertPointTy IP = Builder.saveIP(); + Builder.SetInsertPoint(CanonicalLoop->getPreheader()->getTerminator()); + for (auto &AlignedItem : AlignedVars) { + Value *AlignedPtr = AlignedItem.first; + Value *Alignment = AlignedItem.second; + Builder.CreateAlignmentAssumption(F->getParent()->getDataLayout(), + AlignedPtr, Alignment); + } + Builder.restoreIP(IP); + } if (IfCond) { ValueToValueMapTy VMap; Index: llvm/include/llvm/Frontend/OpenMP/OMPIRBuilder.h =================================================================== --- llvm/include/llvm/Frontend/OpenMP/OMPIRBuilder.h +++ llvm/include/llvm/Frontend/OpenMP/OMPIRBuilder.h @@ -617,13 +617,17 @@ /// to the cloned loop. The cloned loop is executed when ifCond is evaluated /// to false. /// - /// \param Loop The loop to simd-ize. - /// \param IfCond The value which corresponds to the if clause condition. - /// \param Order The enum to map order clause - /// \param Simdlen The Simdlen length to apply to the simd loop. - /// \param Safelen The Safelen length to apply to the simd loop. - void applySimd(CanonicalLoopInfo *Loop, Value *IfCond, omp::OrderKind Order, - ConstantInt *Simdlen, ConstantInt *Safelen); + /// \param Loop The loop to simd-ize. + /// \param AlignedVars The map of the variables which need to aligned with. + /// \param IfCond The value which corresponds to the if clause + /// condition. + /// \param Order The enum to map order clause. + /// \param Simdlen The Simdlen length to apply to the simd loop. + /// \param Safelen The Safelen length to apply to the simd loop. + void applySimd(CanonicalLoopInfo *Loop, + llvm::DenseMap<llvm::Value *, llvm::Value *> AlignedVars, + Value *IfCond, omp::OrderKind Order, ConstantInt *Simdlen, + ConstantInt *Safelen); /// Generator for '#omp flush' /// Index: clang/test/OpenMP/irbuilder_simd_aligned.cpp =================================================================== --- /dev/null +++ clang/test/OpenMP/irbuilder_simd_aligned.cpp @@ -0,0 +1,178 @@ +// NOTE: Assertions have been autogenerated by utils/update_cc_test_checks.py UTC_ARGS: --check-globals +// RUN: %clang_cc1 -no-opaque-pointers -fopenmp-enable-irbuilder -verify -fopenmp -fopenmp-version=50 -x c++ -triple x86_64-unknown-unknown -emit-llvm %s -o - | FileCheck %s +// expected-no-diagnostics + +struct S { + int a, b; +}; + +struct P { + int a, b; +}; + +// +#define N 32 + +// CHECK-LABEL: @_Z6simplePfS_Pi( +// CHECK-NEXT: entry: +// CHECK-NEXT: [[A_ADDR:%.*]] = alloca float*, align 8 +// CHECK-NEXT: [[B_ADDR:%.*]] = alloca float*, align 8 +// CHECK-NEXT: [[C_ADDR:%.*]] = alloca i32*, align 8 +// CHECK-NEXT: [[S:%.*]] = alloca [[STRUCT_S:%.*]], align 4 +// CHECK-NEXT: [[P:%.*]] = alloca %struct.S*, align 8 +// CHECK-NEXT: [[D:%.*]] = alloca [32 x i32], align 16 +// CHECK-NEXT: [[I:%.*]] = alloca i32, align 4 +// CHECK-NEXT: [[PP:%.*]] = alloca [[STRUCT_P:%.*]], align 4 +// CHECK-NEXT: [[I1:%.*]] = alloca i32, align 4 +// CHECK-NEXT: [[AGG_CAPTURED:%.*]] = alloca [[STRUCT_ANON:%.*]], align 8 +// CHECK-NEXT: [[AGG_CAPTURED2:%.*]] = alloca [[STRUCT_ANON_0:%.*]], align 4 +// CHECK-NEXT: [[DOTCOUNT_ADDR:%.*]] = alloca i32, align 4 +// CHECK-NEXT: [[J:%.*]] = alloca i32, align 4 +// CHECK-NEXT: [[AGG_CAPTURED15:%.*]] = alloca [[STRUCT_ANON_1:%.*]], align 8 +// CHECK-NEXT: [[AGG_CAPTURED16:%.*]] = alloca [[STRUCT_ANON_2:%.*]], align 4 +// CHECK-NEXT: [[DOTCOUNT_ADDR17:%.*]] = alloca i32, align 4 +// CHECK-NEXT: store float* [[A:%.*]], float** [[A_ADDR]], align 8 +// CHECK-NEXT: store float* [[B:%.*]], float** [[B_ADDR]], align 8 +// CHECK-NEXT: store i32* [[C:%.*]], i32** [[C_ADDR]], align 8 +// CHECK-NEXT: store i32 0, i32* [[I]], align 4 +// CHECK-NEXT: br label [[FOR_COND:%.*]] +// CHECK: for.cond: +// CHECK-NEXT: [[TMP0:%.*]] = load i32, i32* [[I]], align 4 +// CHECK-NEXT: [[CMP:%.*]] = icmp slt i32 [[TMP0]], 32 +// CHECK-NEXT: br i1 [[CMP]], label [[FOR_BODY:%.*]], label [[FOR_END:%.*]] +// CHECK: for.body: +// CHECK-NEXT: [[TMP1:%.*]] = load i32, i32* [[I]], align 4 +// CHECK-NEXT: [[TMP2:%.*]] = load i32, i32* [[I]], align 4 +// CHECK-NEXT: [[IDXPROM:%.*]] = sext i32 [[TMP2]] to i64 +// CHECK-NEXT: [[ARRAYIDX:%.*]] = getelementptr inbounds [32 x i32], [32 x i32]* [[D]], i64 0, i64 [[IDXPROM]] +// CHECK-NEXT: store i32 [[TMP1]], i32* [[ARRAYIDX]], align 4 +// CHECK-NEXT: br label [[FOR_INC:%.*]] +// CHECK: for.inc: +// CHECK-NEXT: [[TMP3:%.*]] = load i32, i32* [[I]], align 4 +// CHECK-NEXT: [[INC:%.*]] = add nsw i32 [[TMP3]], 1 +// CHECK-NEXT: store i32 [[INC]], i32* [[I]], align 4 +// CHECK-NEXT: br label [[FOR_COND]], !llvm.loop [[LOOP3:![0-9]+]] +// CHECK: for.end: +// CHECK-NEXT: [[TMP4:%.*]] = load float*, float** [[A_ADDR]], align 8 +// CHECK-NEXT: [[TMP5:%.*]] = load %struct.S*, %struct.S** [[P]], align 8 +// CHECK-NEXT: [[ARRAYDECAY:%.*]] = getelementptr inbounds [32 x i32], [32 x i32]* [[D]], i64 0, i64 0 +// CHECK-NEXT: store i32 3, i32* [[I1]], align 4 +// CHECK-NEXT: [[TMP6:%.*]] = getelementptr inbounds [[STRUCT_ANON]], %struct.anon* [[AGG_CAPTURED]], i32 0, i32 0 +// CHECK-NEXT: store i32* [[I1]], i32** [[TMP6]], align 8 +// CHECK-NEXT: [[TMP7:%.*]] = getelementptr inbounds [[STRUCT_ANON_0]], %struct.anon.0* [[AGG_CAPTURED2]], i32 0, i32 0 +// CHECK-NEXT: [[TMP8:%.*]] = load i32, i32* [[I1]], align 4 +// CHECK-NEXT: store i32 [[TMP8]], i32* [[TMP7]], align 4 +// CHECK-NEXT: call void @__captured_stmt(i32* [[DOTCOUNT_ADDR]], %struct.anon* [[AGG_CAPTURED]]) +// CHECK-NEXT: [[DOTCOUNT:%.*]] = load i32, i32* [[DOTCOUNT_ADDR]], align 4 +// CHECK-NEXT: br label [[OMP_LOOP_PREHEADER:%.*]] +// CHECK: omp_loop.preheader: +// CHECK-COUNT-3: call void @llvm.assume(i1 true) [ "align" +// CHECK-NEXT: br label [[OMP_LOOP_HEADER:%.*]] +// CHECK: omp_loop.header: +// CHECK-NEXT: [[OMP_LOOP_IV:%.*]] = phi i32 [ 0, [[OMP_LOOP_PREHEADER]] ], [ [[OMP_LOOP_NEXT:%.*]], [[OMP_LOOP_INC:%.*]] ] +// CHECK-NEXT: br label [[OMP_LOOP_COND:%.*]] +// CHECK: omp_loop.cond: +// CHECK-NEXT: [[OMP_LOOP_CMP:%.*]] = icmp ult i32 [[OMP_LOOP_IV]], [[DOTCOUNT]] +// CHECK-NEXT: br i1 [[OMP_LOOP_CMP]], label [[OMP_LOOP_BODY:%.*]], label [[OMP_LOOP_EXIT:%.*]] +// CHECK: omp_loop.body: +// CHECK-NEXT: call void @__captured_stmt.1(i32* [[I1]], i32 [[OMP_LOOP_IV]], %struct.anon.0* [[AGG_CAPTURED2]]), !llvm.access.group [[ACC_GRP5:![0-9]+]] +// CHECK-NEXT: [[TMP9:%.*]] = load float*, float** [[B_ADDR]], align 8, !llvm.access.group [[ACC_GRP5]] +// CHECK-NEXT: [[TMP10:%.*]] = load i32, i32* [[I1]], align 4, !llvm.access.group [[ACC_GRP5]] +// CHECK-NEXT: [[IDXPROM3:%.*]] = sext i32 [[TMP10]] to i64 +// CHECK-NEXT: [[ARRAYIDX4:%.*]] = getelementptr inbounds float, float* [[TMP9]], i64 [[IDXPROM3]] +// CHECK-NEXT: [[TMP11:%.*]] = load float, float* [[ARRAYIDX4]], align 4, !llvm.access.group [[ACC_GRP5]] +// CHECK-NEXT: [[A5:%.*]] = getelementptr inbounds [[STRUCT_S]], %struct.S* [[S]], i32 0, i32 0 +// CHECK-NEXT: [[TMP12:%.*]] = load i32, i32* [[A5]], align 4, !llvm.access.group [[ACC_GRP5]] +// CHECK-NEXT: [[CONV:%.*]] = sitofp i32 [[TMP12]] to float +// CHECK-NEXT: [[ADD:%.*]] = fadd float [[TMP11]], [[CONV]] +// CHECK-NEXT: [[TMP13:%.*]] = load %struct.S*, %struct.S** [[P]], align 8, !llvm.access.group [[ACC_GRP5]] +// CHECK-NEXT: [[A6:%.*]] = getelementptr inbounds [[STRUCT_S]], %struct.S* [[TMP13]], i32 0, i32 0 +// CHECK-NEXT: [[TMP14:%.*]] = load i32, i32* [[A6]], align 4, !llvm.access.group [[ACC_GRP5]] +// CHECK-NEXT: [[CONV7:%.*]] = sitofp i32 [[TMP14]] to float +// CHECK-NEXT: [[ADD8:%.*]] = fadd float [[ADD]], [[CONV7]] +// CHECK-NEXT: [[TMP15:%.*]] = load i32, i32* [[I1]], align 4, !llvm.access.group [[ACC_GRP5]] +// CHECK-NEXT: [[IDXPROM9:%.*]] = sext i32 [[TMP15]] to i64 +// CHECK-NEXT: [[ARRAYIDX10:%.*]] = getelementptr inbounds [32 x i32], [32 x i32]* [[D]], i64 0, i64 [[IDXPROM9]] +// CHECK-NEXT: [[TMP16:%.*]] = load i32, i32* [[ARRAYIDX10]], align 4, !llvm.access.group [[ACC_GRP5]] +// CHECK-NEXT: [[CONV11:%.*]] = sitofp i32 [[TMP16]] to float +// CHECK-NEXT: [[ADD12:%.*]] = fadd float [[ADD8]], [[CONV11]] +// CHECK-NEXT: [[TMP17:%.*]] = load float*, float** [[A_ADDR]], align 8, !llvm.access.group [[ACC_GRP5]] +// CHECK-NEXT: [[TMP18:%.*]] = load i32, i32* [[I1]], align 4, !llvm.access.group [[ACC_GRP5]] +// CHECK-NEXT: [[IDXPROM13:%.*]] = sext i32 [[TMP18]] to i64 +// CHECK-NEXT: [[ARRAYIDX14:%.*]] = getelementptr inbounds float, float* [[TMP17]], i64 [[IDXPROM13]] +// CHECK-NEXT: store float [[ADD12]], float* [[ARRAYIDX14]], align 4, !llvm.access.group [[ACC_GRP5]] +// CHECK-NEXT: br label [[OMP_LOOP_INC]] +// CHECK: omp_loop.inc: +// CHECK-NEXT: [[OMP_LOOP_NEXT]] = add nuw i32 [[OMP_LOOP_IV]], 1 +// CHECK-NEXT: br label [[OMP_LOOP_HEADER]], !llvm.loop [[LOOP6:![0-9]+]] +// CHECK: omp_loop.exit: +// CHECK-NEXT: br label [[OMP_LOOP_AFTER:%.*]] +// CHECK: omp_loop.after: +// CHECK-NEXT: store i32 3, i32* [[J]], align 4 +// CHECK-NEXT: [[TMP19:%.*]] = getelementptr inbounds [[STRUCT_ANON_1]], %struct.anon.1* [[AGG_CAPTURED15]], i32 0, i32 0 +// CHECK-NEXT: store i32* [[J]], i32** [[TMP19]], align 8 +// CHECK-NEXT: [[TMP20:%.*]] = getelementptr inbounds [[STRUCT_ANON_2]], %struct.anon.2* [[AGG_CAPTURED16]], i32 0, i32 0 +// CHECK-NEXT: [[TMP21:%.*]] = load i32, i32* [[J]], align 4 +// CHECK-NEXT: store i32 [[TMP21]], i32* [[TMP20]], align 4 +// CHECK-NEXT: call void @__captured_stmt.2(i32* [[DOTCOUNT_ADDR17]], %struct.anon.1* [[AGG_CAPTURED15]]) +// CHECK-NEXT: [[DOTCOUNT18:%.*]] = load i32, i32* [[DOTCOUNT_ADDR17]], align 4 +// CHECK-NEXT: br label [[OMP_LOOP_PREHEADER19:%.*]] +// CHECK: omp_loop.preheader19: +// CHECK-NEXT: br label [[OMP_LOOP_HEADER20:%.*]] +// CHECK: omp_loop.header20: +// CHECK-NEXT: [[OMP_LOOP_IV26:%.*]] = phi i32 [ 0, [[OMP_LOOP_PREHEADER19]] ], [ [[OMP_LOOP_NEXT28:%.*]], [[OMP_LOOP_INC23:%.*]] ] +// CHECK-NEXT: br label [[OMP_LOOP_COND21:%.*]] +// CHECK: omp_loop.cond21: +// CHECK-NEXT: [[OMP_LOOP_CMP27:%.*]] = icmp ult i32 [[OMP_LOOP_IV26]], [[DOTCOUNT18]] +// CHECK-NEXT: br i1 [[OMP_LOOP_CMP27]], label [[OMP_LOOP_BODY22:%.*]], label [[OMP_LOOP_EXIT24:%.*]] +// CHECK: omp_loop.body22: +// CHECK-NEXT: call void @__captured_stmt.3(i32* [[J]], i32 [[OMP_LOOP_IV26]], %struct.anon.2* [[AGG_CAPTURED16]]), !llvm.access.group [[ACC_GRP9:![0-9]+]] +// CHECK-NEXT: [[A29:%.*]] = getelementptr inbounds [[STRUCT_P]], %struct.P* [[PP]], i32 0, i32 0 +// CHECK-NEXT: [[TMP22:%.*]] = load i32, i32* [[A29]], align 4, !llvm.access.group [[ACC_GRP9]] +// CHECK-NEXT: [[TMP23:%.*]] = load i32*, i32** [[C_ADDR]], align 8, !llvm.access.group [[ACC_GRP9]] +// CHECK-NEXT: [[TMP24:%.*]] = load i32, i32* [[J]], align 4, !llvm.access.group [[ACC_GRP9]] +// CHECK-NEXT: [[IDXPROM30:%.*]] = sext i32 [[TMP24]] to i64 +// CHECK-NEXT: [[ARRAYIDX31:%.*]] = getelementptr inbounds i32, i32* [[TMP23]], i64 [[IDXPROM30]] +// CHECK-NEXT: store i32 [[TMP22]], i32* [[ARRAYIDX31]], align 4, !llvm.access.group [[ACC_GRP9]] +// CHECK-NEXT: br label [[OMP_LOOP_INC23]] +// CHECK: omp_loop.inc23: +// CHECK-NEXT: [[OMP_LOOP_NEXT28]] = add nuw i32 [[OMP_LOOP_IV26]], 1 +// CHECK-NEXT: br label [[OMP_LOOP_HEADER20]], !llvm.loop [[LOOP10:![0-9]+]] +// CHECK: omp_loop.exit24: +// CHECK-NEXT: br label [[OMP_LOOP_AFTER25:%.*]] +// CHECK: omp_loop.after25: +// CHECK-NEXT: ret void +// +void simple(float *a, float *b, int *c) { + S s, *p; + int D[N]; + for (int i = 0; i <N; ++i) + D[i] = i; + P pp; +#pragma omp simd aligned (a:128) aligned(p:64) aligned(D) + for (int i = 3; i < N; i += 5) { + a[i] = b[i] + s.a + p->a + D[i]; + } + +#pragma omp simd + for (int j = 3; j < N; j += 5) { + c[j] = pp.a; + } +} +//. +// CHECK: attributes #0 = { mustprogress noinline nounwind optnone "frame-pointer"="none" "min-legal-vector-width"="0" "no-trapping-math"="true" "stack-protector-buffer-size"="8" "target-features"="+cx8,+mmx,+sse,+sse2,+x87" } +// CHECK: attributes #1 = { noinline nounwind optnone "frame-pointer"="none" "min-legal-vector-width"="0" "no-trapping-math"="true" "stack-protector-buffer-size"="8" "target-features"="+cx8,+mmx,+sse,+sse2,+x87" } +// CHECK: attributes #2 = { inaccessiblememonly nocallback nofree nosync nounwind willreturn } +//. +// CHECK: !0 = !{i32 1, !"wchar_size", i32 4} +// CHECK: !1 = !{i32 7, !"openmp", i32 50} +// CHECK: !3 = distinct !{!3, !4} +// CHECK: !4 = !{!"llvm.loop.mustprogress"} +// CHECK: !5 = distinct !{} +// CHECK: !6 = distinct !{!6, !7, !8} +// CHECK: !7 = !{!"llvm.loop.parallel_accesses", !5} +// CHECK: !8 = !{!"llvm.loop.vectorize.enable", i1 true} +// CHECK: !9 = distinct !{} +// CHECK: !10 = distinct !{!10, !11, !8} +// CHECK: !11 = !{!"llvm.loop.parallel_accesses", !9} +//. Index: clang/lib/CodeGen/CGStmtOpenMP.cpp =================================================================== --- clang/lib/CodeGen/CGStmtOpenMP.cpp +++ clang/lib/CodeGen/CGStmtOpenMP.cpp @@ -2602,7 +2602,7 @@ for (OMPClause *C : S.clauses()) { // Currently only order, simdlen and safelen clauses are supported if (!(isa<OMPSimdlenClause>(C) || isa<OMPSafelenClause>(C) || - isa<OMPOrderClause>(C))) + isa<OMPOrderClause>(C) || isa<OMPAlignedClause>(C))) return false; } @@ -2628,6 +2628,36 @@ } return true; } +static llvm::DenseMap<llvm::Value *, llvm::Value *> +GetAlignedMapping(const OMPSimdDirective &S, CodeGenFunction &CGF) { + llvm::DenseMap<llvm::Value *, llvm::Value *> AlignedVars; + for (const auto *Clause : S.getClausesOfKind<OMPAlignedClause>()) { + llvm::APInt ClauseAlignment(64, 0); + if (const Expr *AlignmentExpr = Clause->getAlignment()) { + auto *AlignmentCI = + cast<llvm::ConstantInt>(CGF.EmitScalarExpr(AlignmentExpr)); + ClauseAlignment = AlignmentCI->getValue(); + } + for (const Expr *E : Clause->varlists()) { + llvm::APInt Alignment(ClauseAlignment); + if (Alignment == 0) { + // OpenMP [2.8.1, Description] + // If no optional parameter is specified, implementation-defined default + // alignments for SIMD instructions on the target platforms are assumed. + Alignment = + CGF.getContext() + .toCharUnitsFromBits(CGF.getContext().getOpenMPDefaultSimdAlign( + E->getType()->getPointeeType())) + .getQuantity(); + } + assert((Alignment == 0 || Alignment.isPowerOf2()) && + "alignment is not power of 2"); + llvm::Value *PtrValue = CGF.EmitScalarExpr(E); + AlignedVars[PtrValue] = CGF.Builder.getInt64(Alignment.getSExtValue()); + } + } + return AlignedVars; +} void CodeGenFunction::EmitOMPSimdDirective(const OMPSimdDirective &S) { bool UseOMPIRBuilder = @@ -2637,6 +2667,8 @@ PrePostActionTy &) { // Use the OpenMPIRBuilder if enabled. if (UseOMPIRBuilder) { + llvm::DenseMap<llvm::Value *, llvm::Value *> AlignedVars = + GetAlignedMapping(S, CGF); // Emit the associated statement and get its loop representation. const Stmt *Inner = S.getRawStmt(); llvm::CanonicalLoopInfo *CLI = @@ -2669,7 +2701,8 @@ } // Add simd metadata to the collapsed loop. Do not generate // another loop for if clause. Support for if clause is done earlier. - OMPBuilder.applySimd(CLI, /*IfCond*/ nullptr, Order, Simdlen, Safelen); + OMPBuilder.applySimd(CLI, AlignedVars, + /*IfCond*/ nullptr, Order, Simdlen, Safelen); return; } };
_______________________________________________ cfe-commits mailing list cfe-commits@lists.llvm.org https://lists.llvm.org/cgi-bin/mailman/listinfo/cfe-commits