domada updated this revision to Diff 466774.
domada marked an inline comment as done.
domada added a comment.

1. Update description of AlignedVars argument


CHANGES SINCE LAST ACTION
  https://reviews.llvm.org/D133578/new/

https://reviews.llvm.org/D133578

Files:
  clang/lib/CodeGen/CGStmtOpenMP.cpp
  clang/test/OpenMP/irbuilder_simd_aligned.cpp
  llvm/include/llvm/Frontend/OpenMP/OMPIRBuilder.h
  llvm/lib/Frontend/OpenMP/OMPIRBuilder.cpp
  llvm/unittests/Frontend/OpenMPIRBuilderTest.cpp
  mlir/lib/Target/LLVMIR/Dialect/OpenMP/OpenMPToLLVMIRTranslation.cpp

Index: mlir/lib/Target/LLVMIR/Dialect/OpenMP/OpenMPToLLVMIRTranslation.cpp
===================================================================
--- mlir/lib/Target/LLVMIR/Dialect/OpenMP/OpenMPToLLVMIRTranslation.cpp
+++ mlir/lib/Target/LLVMIR/Dialect/OpenMP/OpenMPToLLVMIRTranslation.cpp
@@ -996,8 +996,9 @@
   if (llvm::Optional<uint64_t> safelenVar = loop.getSafelen())
     safelen = builder.getInt64(safelenVar.value());
 
+  llvm::DenseMap<llvm::Value *, llvm::Value *> alignedVars;
   ompBuilder->applySimd(
-      loopInfo,
+      loopInfo, alignedVars,
       loop.getIfExpr() ? moduleTranslation.lookupValue(loop.getIfExpr())
                        : nullptr,
       llvm::omp::OrderKind::OMP_ORDER_unknown, simdlen, safelen);
Index: llvm/unittests/Frontend/OpenMPIRBuilderTest.cpp
===================================================================
--- llvm/unittests/Frontend/OpenMPIRBuilderTest.cpp
+++ llvm/unittests/Frontend/OpenMPIRBuilderTest.cpp
@@ -1767,11 +1767,12 @@
 
 TEST_F(OpenMPIRBuilderTest, ApplySimd) {
   OpenMPIRBuilder OMPBuilder(*M);
-
+  DenseMap<Value *, Value *> AlignedVars;
   CanonicalLoopInfo *CLI = buildSingleLoopFunction(DL, OMPBuilder, 32);
 
   // Simd-ize the loop.
-  OMPBuilder.applySimd(CLI, /* IfCond */ nullptr, OrderKind::OMP_ORDER_unknown,
+  OMPBuilder.applySimd(CLI, AlignedVars, /* IfCond */ nullptr,
+                       OrderKind::OMP_ORDER_unknown,
                        /* Simdlen */ nullptr,
                        /* Safelen */ nullptr);
 
@@ -1798,13 +1799,76 @@
   }));
 }
 
-TEST_F(OpenMPIRBuilderTest, ApplySimdlen) {
+TEST_F(OpenMPIRBuilderTest, ApplySimdCustomAligned) {
   OpenMPIRBuilder OMPBuilder(*M);
+  IRBuilder<> Builder(BB);
+  const int AlignmentValue = 32;
+  AllocaInst *Alloc1 =
+      Builder.CreateAlloca(Builder.getInt8PtrTy(), Builder.getInt64(1));
+  LoadInst *Load1 = Builder.CreateLoad(Alloc1->getAllocatedType(), Alloc1);
+  DenseMap<Value *, Value *> AlignedVars;
+  AlignedVars.insert({Load1, Builder.getInt64(AlignmentValue)});
+
+  CanonicalLoopInfo *CLI = buildSingleLoopFunction(DL, OMPBuilder, 32);
+
+  // Simd-ize the loop.
+  OMPBuilder.applySimd(CLI, AlignedVars, /* IfCond */ nullptr,
+                       OrderKind::OMP_ORDER_unknown,
+                       /* Simdlen */ nullptr,
+                       /* Safelen */ nullptr);
+
+  OMPBuilder.finalize();
+  EXPECT_FALSE(verifyModule(*M, &errs()));
+
+  PassBuilder PB;
+  FunctionAnalysisManager FAM;
+  PB.registerFunctionAnalyses(FAM);
+  LoopInfo &LI = FAM.getResult<LoopAnalysis>(*F);
+
+  const std::vector<Loop *> &TopLvl = LI.getTopLevelLoops();
+  EXPECT_EQ(TopLvl.size(), 1u);
+
+  Loop *L = TopLvl.front();
+  EXPECT_TRUE(findStringMetadataForLoop(L, "llvm.loop.parallel_accesses"));
+  EXPECT_TRUE(getBooleanLoopAttribute(L, "llvm.loop.vectorize.enable"));
+
+  // Check for llvm.access.group metadata attached to the printf
+  // function in the loop body.
+  BasicBlock *LoopBody = CLI->getBody();
+  EXPECT_TRUE(any_of(*LoopBody, [](Instruction &I) {
+    return I.getMetadata("llvm.access.group") != nullptr;
+  }));
 
+  // Check if number of assumption instructions is equal to number of aligned
+  // variables
+  BasicBlock *LoopPreheader = CLI->getPreheader();
+  size_t NumAssummptionCallsInPreheader = count_if(
+      *LoopPreheader, [](Instruction &I) { return isa<AssumeInst>(I); });
+  EXPECT_EQ(NumAssummptionCallsInPreheader, AlignedVars.size());
+
+  // Check if variables are correctly aligned
+  for (Instruction &Instr : *LoopPreheader) {
+    if (!isa<AssumeInst>(Instr))
+      continue;
+    AssumeInst *AssumeInstruction = cast<AssumeInst>(&Instr);
+    if (AssumeInstruction->getNumTotalBundleOperands()) {
+      auto Bundle = AssumeInstruction->getOperandBundleAt(0);
+      if (Bundle.getTagName() == "align") {
+        EXPECT_TRUE(isa<ConstantInt>(Bundle.Inputs[1]));
+        auto ConstIntVal = dyn_cast<ConstantInt>(Bundle.Inputs[1]);
+        EXPECT_EQ(ConstIntVal->getSExtValue(), AlignmentValue);
+      }
+    }
+  }
+}
+TEST_F(OpenMPIRBuilderTest, ApplySimdlen) {
+  OpenMPIRBuilder OMPBuilder(*M);
+  DenseMap<Value *, Value *> AlignedVars;
   CanonicalLoopInfo *CLI = buildSingleLoopFunction(DL, OMPBuilder, 32);
 
   // Simd-ize the loop.
-  OMPBuilder.applySimd(CLI, /* IfCond */ nullptr, OrderKind::OMP_ORDER_unknown,
+  OMPBuilder.applySimd(CLI, AlignedVars,
+                       /* IfCond */ nullptr, OrderKind::OMP_ORDER_unknown,
                        ConstantInt::get(Type::getInt32Ty(Ctx), 3),
                        /* Safelen */ nullptr);
 
@@ -1834,12 +1898,13 @@
 
 TEST_F(OpenMPIRBuilderTest, ApplySafelenOrderConcurrent) {
   OpenMPIRBuilder OMPBuilder(*M);
+  DenseMap<Value *, Value *> AlignedVars;
 
   CanonicalLoopInfo *CLI = buildSingleLoopFunction(DL, OMPBuilder, 32);
 
   // Simd-ize the loop.
   OMPBuilder.applySimd(
-      CLI, /* IfCond */ nullptr, OrderKind::OMP_ORDER_concurrent,
+      CLI, AlignedVars, /* IfCond */ nullptr, OrderKind::OMP_ORDER_concurrent,
       /* Simdlen */ nullptr, ConstantInt::get(Type::getInt32Ty(Ctx), 3));
 
   OMPBuilder.finalize();
@@ -1870,13 +1935,13 @@
 
 TEST_F(OpenMPIRBuilderTest, ApplySafelen) {
   OpenMPIRBuilder OMPBuilder(*M);
+  DenseMap<Value *, Value *> AlignedVars;
 
   CanonicalLoopInfo *CLI = buildSingleLoopFunction(DL, OMPBuilder, 32);
 
-  // Simd-ize the loop.
-  OMPBuilder.applySimd(CLI, /* IfCond */ nullptr, OrderKind::OMP_ORDER_unknown,
-                       /* Simdlen */ nullptr,
-                       ConstantInt::get(Type::getInt32Ty(Ctx), 3));
+  OMPBuilder.applySimd(
+      CLI, AlignedVars, /* IfCond */ nullptr, OrderKind::OMP_ORDER_unknown,
+      /* Simdlen */ nullptr, ConstantInt::get(Type::getInt32Ty(Ctx), 3));
 
   OMPBuilder.finalize();
   EXPECT_FALSE(verifyModule(*M, &errs()));
@@ -1904,11 +1969,12 @@
 
 TEST_F(OpenMPIRBuilderTest, ApplySimdlenSafelen) {
   OpenMPIRBuilder OMPBuilder(*M);
+  DenseMap<Value *, Value *> AlignedVars;
 
   CanonicalLoopInfo *CLI = buildSingleLoopFunction(DL, OMPBuilder, 32);
 
-  // Simd-ize the loop.
-  OMPBuilder.applySimd(CLI, /* IfCond */ nullptr, OrderKind::OMP_ORDER_unknown,
+  OMPBuilder.applySimd(CLI, AlignedVars, /* IfCond */ nullptr,
+                       OrderKind::OMP_ORDER_unknown,
                        ConstantInt::get(Type::getInt32Ty(Ctx), 2),
                        ConstantInt::get(Type::getInt32Ty(Ctx), 3));
 
@@ -1939,6 +2005,7 @@
 TEST_F(OpenMPIRBuilderTest, ApplySimdLoopIf) {
   OpenMPIRBuilder OMPBuilder(*M);
   IRBuilder<> Builder(BB);
+  DenseMap<Value *, Value *> AlignedVars;
   AllocaInst *Alloc1 = Builder.CreateAlloca(Builder.getInt32Ty());
   AllocaInst *Alloc2 = Builder.CreateAlloca(Builder.getInt32Ty());
 
@@ -1953,7 +2020,7 @@
   CanonicalLoopInfo *CLI = buildSingleLoopFunction(DL, OMPBuilder, 32);
 
   // Simd-ize the loop with if condition
-  OMPBuilder.applySimd(CLI, IfCmp, OrderKind::OMP_ORDER_unknown,
+  OMPBuilder.applySimd(CLI, AlignedVars, IfCmp, OrderKind::OMP_ORDER_unknown,
                        ConstantInt::get(Type::getInt32Ty(Ctx), 3),
                        /* Safelen */ nullptr);
 
Index: llvm/lib/Frontend/OpenMP/OMPIRBuilder.cpp
===================================================================
--- llvm/lib/Frontend/OpenMP/OMPIRBuilder.cpp
+++ llvm/lib/Frontend/OpenMP/OMPIRBuilder.cpp
@@ -3005,9 +3005,10 @@
   Builder.CreateBr(NewBlocks.front());
 }
 
-void OpenMPIRBuilder::applySimd(CanonicalLoopInfo *CanonicalLoop, Value *IfCond,
-                                OrderKind Order, ConstantInt *Simdlen,
-                                ConstantInt *Safelen) {
+void OpenMPIRBuilder::applySimd(CanonicalLoopInfo *CanonicalLoop,
+                                DenseMap<Value *, Value *> AlignedVars,
+                                Value *IfCond, OrderKind Order,
+                                ConstantInt *Simdlen, ConstantInt *Safelen) {
   LLVMContext &Ctx = Builder.getContext();
 
   Function *F = CanonicalLoop->getFunction();
@@ -3025,6 +3026,17 @@
   LoopInfo &&LI = LIA.run(*F, FAM);
 
   Loop *L = LI.getLoopFor(CanonicalLoop->getHeader());
+  if (AlignedVars.size()) {
+    InsertPointTy IP = Builder.saveIP();
+    Builder.SetInsertPoint(CanonicalLoop->getPreheader()->getTerminator());
+    for (auto &AlignedItem : AlignedVars) {
+      Value *AlignedPtr = AlignedItem.first;
+      Value *Alignment = AlignedItem.second;
+      Builder.CreateAlignmentAssumption(F->getParent()->getDataLayout(),
+                                        AlignedPtr, Alignment);
+    }
+    Builder.restoreIP(IP);
+  }
 
   if (IfCond) {
     ValueToValueMapTy VMap;
Index: llvm/include/llvm/Frontend/OpenMP/OMPIRBuilder.h
===================================================================
--- llvm/include/llvm/Frontend/OpenMP/OMPIRBuilder.h
+++ llvm/include/llvm/Frontend/OpenMP/OMPIRBuilder.h
@@ -617,13 +617,18 @@
   /// to the cloned loop. The cloned loop is executed when ifCond is evaluated
   /// to false.
   ///
-  /// \param Loop    The loop to simd-ize.
-  /// \param IfCond  The value which corresponds to the if clause condition.
-  /// \param Order   The enum to map order clause
-  /// \param Simdlen The Simdlen length to apply to the simd loop.
-  /// \param Safelen The Safelen length to apply to the simd loop.
-  void applySimd(CanonicalLoopInfo *Loop, Value *IfCond, omp::OrderKind Order,
-                 ConstantInt *Simdlen, ConstantInt *Safelen);
+  /// \param Loop        The loop to simd-ize.
+  /// \param AlignedVars The map which containts pairs of the pointer
+  ///                    and its corresponding alignment.
+  /// \param IfCond      The value which corresponds to the if clause
+  ///                    condition.
+  /// \param Order       The enum to map order clause.
+  /// \param Simdlen     The Simdlen length to apply to the simd loop.
+  /// \param Safelen     The Safelen length to apply to the simd loop.
+  void applySimd(CanonicalLoopInfo *Loop,
+                 llvm::DenseMap<llvm::Value *, llvm::Value *> AlignedVars,
+                 Value *IfCond, omp::OrderKind Order, ConstantInt *Simdlen,
+                 ConstantInt *Safelen);
 
   /// Generator for '#omp flush'
   ///
Index: clang/test/OpenMP/irbuilder_simd_aligned.cpp
===================================================================
--- /dev/null
+++ clang/test/OpenMP/irbuilder_simd_aligned.cpp
@@ -0,0 +1,178 @@
+// NOTE: Assertions have been autogenerated by utils/update_cc_test_checks.py UTC_ARGS: --check-globals
+// RUN: %clang_cc1 -no-opaque-pointers -fopenmp-enable-irbuilder -verify -fopenmp -fopenmp-version=50 -x c++ -triple x86_64-unknown-unknown -emit-llvm %s -o - | FileCheck %s
+// expected-no-diagnostics
+
+struct S {
+  int a, b;
+};
+
+struct P {
+  int a, b;
+};
+
+//
+#define N 32
+
+// CHECK-LABEL: @_Z6simplePfS_Pi(
+// CHECK-NEXT:  entry:
+// CHECK-NEXT:    [[A_ADDR:%.*]] = alloca float*, align 8
+// CHECK-NEXT:    [[B_ADDR:%.*]] = alloca float*, align 8
+// CHECK-NEXT:    [[C_ADDR:%.*]] = alloca i32*, align 8
+// CHECK-NEXT:    [[S:%.*]] = alloca [[STRUCT_S:%.*]], align 4
+// CHECK-NEXT:    [[P:%.*]] = alloca %struct.S*, align 8
+// CHECK-NEXT:    [[D:%.*]] = alloca [32 x i32], align 16
+// CHECK-NEXT:    [[I:%.*]] = alloca i32, align 4
+// CHECK-NEXT:    [[PP:%.*]] = alloca [[STRUCT_P:%.*]], align 4
+// CHECK-NEXT:    [[I1:%.*]] = alloca i32, align 4
+// CHECK-NEXT:    [[AGG_CAPTURED:%.*]] = alloca [[STRUCT_ANON:%.*]], align 8
+// CHECK-NEXT:    [[AGG_CAPTURED2:%.*]] = alloca [[STRUCT_ANON_0:%.*]], align 4
+// CHECK-NEXT:    [[DOTCOUNT_ADDR:%.*]] = alloca i32, align 4
+// CHECK-NEXT:    [[J:%.*]] = alloca i32, align 4
+// CHECK-NEXT:    [[AGG_CAPTURED15:%.*]] = alloca [[STRUCT_ANON_1:%.*]], align 8
+// CHECK-NEXT:    [[AGG_CAPTURED16:%.*]] = alloca [[STRUCT_ANON_2:%.*]], align 4
+// CHECK-NEXT:    [[DOTCOUNT_ADDR17:%.*]] = alloca i32, align 4
+// CHECK-NEXT:    store float* [[A:%.*]], float** [[A_ADDR]], align 8
+// CHECK-NEXT:    store float* [[B:%.*]], float** [[B_ADDR]], align 8
+// CHECK-NEXT:    store i32* [[C:%.*]], i32** [[C_ADDR]], align 8
+// CHECK-NEXT:    store i32 0, i32* [[I]], align 4
+// CHECK-NEXT:    br label [[FOR_COND:%.*]]
+// CHECK:       for.cond:
+// CHECK-NEXT:    [[TMP0:%.*]] = load i32, i32* [[I]], align 4
+// CHECK-NEXT:    [[CMP:%.*]] = icmp slt i32 [[TMP0]], 32
+// CHECK-NEXT:    br i1 [[CMP]], label [[FOR_BODY:%.*]], label [[FOR_END:%.*]]
+// CHECK:       for.body:
+// CHECK-NEXT:    [[TMP1:%.*]] = load i32, i32* [[I]], align 4
+// CHECK-NEXT:    [[TMP2:%.*]] = load i32, i32* [[I]], align 4
+// CHECK-NEXT:    [[IDXPROM:%.*]] = sext i32 [[TMP2]] to i64
+// CHECK-NEXT:    [[ARRAYIDX:%.*]] = getelementptr inbounds [32 x i32], [32 x i32]* [[D]], i64 0, i64 [[IDXPROM]]
+// CHECK-NEXT:    store i32 [[TMP1]], i32* [[ARRAYIDX]], align 4
+// CHECK-NEXT:    br label [[FOR_INC:%.*]]
+// CHECK:       for.inc:
+// CHECK-NEXT:    [[TMP3:%.*]] = load i32, i32* [[I]], align 4
+// CHECK-NEXT:    [[INC:%.*]] = add nsw i32 [[TMP3]], 1
+// CHECK-NEXT:    store i32 [[INC]], i32* [[I]], align 4
+// CHECK-NEXT:    br label [[FOR_COND]], !llvm.loop [[LOOP3:![0-9]+]]
+// CHECK:       for.end:
+// CHECK-NEXT:    [[TMP4:%.*]] = load float*, float** [[A_ADDR]], align 8
+// CHECK-NEXT:    [[TMP5:%.*]] = load %struct.S*, %struct.S** [[P]], align 8
+// CHECK-NEXT:    [[ARRAYDECAY:%.*]] = getelementptr inbounds [32 x i32], [32 x i32]* [[D]], i64 0, i64 0
+// CHECK-NEXT:    store i32 3, i32* [[I1]], align 4
+// CHECK-NEXT:    [[TMP6:%.*]] = getelementptr inbounds [[STRUCT_ANON]], %struct.anon* [[AGG_CAPTURED]], i32 0, i32 0
+// CHECK-NEXT:    store i32* [[I1]], i32** [[TMP6]], align 8
+// CHECK-NEXT:    [[TMP7:%.*]] = getelementptr inbounds [[STRUCT_ANON_0]], %struct.anon.0* [[AGG_CAPTURED2]], i32 0, i32 0
+// CHECK-NEXT:    [[TMP8:%.*]] = load i32, i32* [[I1]], align 4
+// CHECK-NEXT:    store i32 [[TMP8]], i32* [[TMP7]], align 4
+// CHECK-NEXT:    call void @__captured_stmt(i32* [[DOTCOUNT_ADDR]], %struct.anon* [[AGG_CAPTURED]])
+// CHECK-NEXT:    [[DOTCOUNT:%.*]] = load i32, i32* [[DOTCOUNT_ADDR]], align 4
+// CHECK-NEXT:    br label [[OMP_LOOP_PREHEADER:%.*]]
+// CHECK:       omp_loop.preheader:
+// CHECK-COUNT-3: call void @llvm.assume(i1 true) [ "align"
+// CHECK-NEXT:    br label [[OMP_LOOP_HEADER:%.*]]
+// CHECK:       omp_loop.header:
+// CHECK-NEXT:    [[OMP_LOOP_IV:%.*]] = phi i32 [ 0, [[OMP_LOOP_PREHEADER]] ], [ [[OMP_LOOP_NEXT:%.*]], [[OMP_LOOP_INC:%.*]] ]
+// CHECK-NEXT:    br label [[OMP_LOOP_COND:%.*]]
+// CHECK:       omp_loop.cond:
+// CHECK-NEXT:    [[OMP_LOOP_CMP:%.*]] = icmp ult i32 [[OMP_LOOP_IV]], [[DOTCOUNT]]
+// CHECK-NEXT:    br i1 [[OMP_LOOP_CMP]], label [[OMP_LOOP_BODY:%.*]], label [[OMP_LOOP_EXIT:%.*]]
+// CHECK:       omp_loop.body:
+// CHECK-NEXT:    call void @__captured_stmt.1(i32* [[I1]], i32 [[OMP_LOOP_IV]], %struct.anon.0* [[AGG_CAPTURED2]]), !llvm.access.group [[ACC_GRP5:![0-9]+]]
+// CHECK-NEXT:    [[TMP9:%.*]] = load float*, float** [[B_ADDR]], align 8, !llvm.access.group [[ACC_GRP5]]
+// CHECK-NEXT:    [[TMP10:%.*]] = load i32, i32* [[I1]], align 4, !llvm.access.group [[ACC_GRP5]]
+// CHECK-NEXT:    [[IDXPROM3:%.*]] = sext i32 [[TMP10]] to i64
+// CHECK-NEXT:    [[ARRAYIDX4:%.*]] = getelementptr inbounds float, float* [[TMP9]], i64 [[IDXPROM3]]
+// CHECK-NEXT:    [[TMP11:%.*]] = load float, float* [[ARRAYIDX4]], align 4, !llvm.access.group [[ACC_GRP5]]
+// CHECK-NEXT:    [[A5:%.*]] = getelementptr inbounds [[STRUCT_S]], %struct.S* [[S]], i32 0, i32 0
+// CHECK-NEXT:    [[TMP12:%.*]] = load i32, i32* [[A5]], align 4, !llvm.access.group [[ACC_GRP5]]
+// CHECK-NEXT:    [[CONV:%.*]] = sitofp i32 [[TMP12]] to float
+// CHECK-NEXT:    [[ADD:%.*]] = fadd float [[TMP11]], [[CONV]]
+// CHECK-NEXT:    [[TMP13:%.*]] = load %struct.S*, %struct.S** [[P]], align 8, !llvm.access.group [[ACC_GRP5]]
+// CHECK-NEXT:    [[A6:%.*]] = getelementptr inbounds [[STRUCT_S]], %struct.S* [[TMP13]], i32 0, i32 0
+// CHECK-NEXT:    [[TMP14:%.*]] = load i32, i32* [[A6]], align 4, !llvm.access.group [[ACC_GRP5]]
+// CHECK-NEXT:    [[CONV7:%.*]] = sitofp i32 [[TMP14]] to float
+// CHECK-NEXT:    [[ADD8:%.*]] = fadd float [[ADD]], [[CONV7]]
+// CHECK-NEXT:    [[TMP15:%.*]] = load i32, i32* [[I1]], align 4, !llvm.access.group [[ACC_GRP5]]
+// CHECK-NEXT:    [[IDXPROM9:%.*]] = sext i32 [[TMP15]] to i64
+// CHECK-NEXT:    [[ARRAYIDX10:%.*]] = getelementptr inbounds [32 x i32], [32 x i32]* [[D]], i64 0, i64 [[IDXPROM9]]
+// CHECK-NEXT:    [[TMP16:%.*]] = load i32, i32* [[ARRAYIDX10]], align 4, !llvm.access.group [[ACC_GRP5]]
+// CHECK-NEXT:    [[CONV11:%.*]] = sitofp i32 [[TMP16]] to float
+// CHECK-NEXT:    [[ADD12:%.*]] = fadd float [[ADD8]], [[CONV11]]
+// CHECK-NEXT:    [[TMP17:%.*]] = load float*, float** [[A_ADDR]], align 8, !llvm.access.group [[ACC_GRP5]]
+// CHECK-NEXT:    [[TMP18:%.*]] = load i32, i32* [[I1]], align 4, !llvm.access.group [[ACC_GRP5]]
+// CHECK-NEXT:    [[IDXPROM13:%.*]] = sext i32 [[TMP18]] to i64
+// CHECK-NEXT:    [[ARRAYIDX14:%.*]] = getelementptr inbounds float, float* [[TMP17]], i64 [[IDXPROM13]]
+// CHECK-NEXT:    store float [[ADD12]], float* [[ARRAYIDX14]], align 4, !llvm.access.group [[ACC_GRP5]]
+// CHECK-NEXT:    br label [[OMP_LOOP_INC]]
+// CHECK:       omp_loop.inc:
+// CHECK-NEXT:    [[OMP_LOOP_NEXT]] = add nuw i32 [[OMP_LOOP_IV]], 1
+// CHECK-NEXT:    br label [[OMP_LOOP_HEADER]], !llvm.loop [[LOOP6:![0-9]+]]
+// CHECK:       omp_loop.exit:
+// CHECK-NEXT:    br label [[OMP_LOOP_AFTER:%.*]]
+// CHECK:       omp_loop.after:
+// CHECK-NEXT:    store i32 3, i32* [[J]], align 4
+// CHECK-NEXT:    [[TMP19:%.*]] = getelementptr inbounds [[STRUCT_ANON_1]], %struct.anon.1* [[AGG_CAPTURED15]], i32 0, i32 0
+// CHECK-NEXT:    store i32* [[J]], i32** [[TMP19]], align 8
+// CHECK-NEXT:    [[TMP20:%.*]] = getelementptr inbounds [[STRUCT_ANON_2]], %struct.anon.2* [[AGG_CAPTURED16]], i32 0, i32 0
+// CHECK-NEXT:    [[TMP21:%.*]] = load i32, i32* [[J]], align 4
+// CHECK-NEXT:    store i32 [[TMP21]], i32* [[TMP20]], align 4
+// CHECK-NEXT:    call void @__captured_stmt.2(i32* [[DOTCOUNT_ADDR17]], %struct.anon.1* [[AGG_CAPTURED15]])
+// CHECK-NEXT:    [[DOTCOUNT18:%.*]] = load i32, i32* [[DOTCOUNT_ADDR17]], align 4
+// CHECK-NEXT:    br label [[OMP_LOOP_PREHEADER19:%.*]]
+// CHECK:       omp_loop.preheader19:
+// CHECK-NEXT:    br label [[OMP_LOOP_HEADER20:%.*]]
+// CHECK:       omp_loop.header20:
+// CHECK-NEXT:    [[OMP_LOOP_IV26:%.*]] = phi i32 [ 0, [[OMP_LOOP_PREHEADER19]] ], [ [[OMP_LOOP_NEXT28:%.*]], [[OMP_LOOP_INC23:%.*]] ]
+// CHECK-NEXT:    br label [[OMP_LOOP_COND21:%.*]]
+// CHECK:       omp_loop.cond21:
+// CHECK-NEXT:    [[OMP_LOOP_CMP27:%.*]] = icmp ult i32 [[OMP_LOOP_IV26]], [[DOTCOUNT18]]
+// CHECK-NEXT:    br i1 [[OMP_LOOP_CMP27]], label [[OMP_LOOP_BODY22:%.*]], label [[OMP_LOOP_EXIT24:%.*]]
+// CHECK:       omp_loop.body22:
+// CHECK-NEXT:    call void @__captured_stmt.3(i32* [[J]], i32 [[OMP_LOOP_IV26]], %struct.anon.2* [[AGG_CAPTURED16]]), !llvm.access.group [[ACC_GRP9:![0-9]+]]
+// CHECK-NEXT:    [[A29:%.*]] = getelementptr inbounds [[STRUCT_P]], %struct.P* [[PP]], i32 0, i32 0
+// CHECK-NEXT:    [[TMP22:%.*]] = load i32, i32* [[A29]], align 4, !llvm.access.group [[ACC_GRP9]]
+// CHECK-NEXT:    [[TMP23:%.*]] = load i32*, i32** [[C_ADDR]], align 8, !llvm.access.group [[ACC_GRP9]]
+// CHECK-NEXT:    [[TMP24:%.*]] = load i32, i32* [[J]], align 4, !llvm.access.group [[ACC_GRP9]]
+// CHECK-NEXT:    [[IDXPROM30:%.*]] = sext i32 [[TMP24]] to i64
+// CHECK-NEXT:    [[ARRAYIDX31:%.*]] = getelementptr inbounds i32, i32* [[TMP23]], i64 [[IDXPROM30]]
+// CHECK-NEXT:    store i32 [[TMP22]], i32* [[ARRAYIDX31]], align 4, !llvm.access.group [[ACC_GRP9]]
+// CHECK-NEXT:    br label [[OMP_LOOP_INC23]]
+// CHECK:       omp_loop.inc23:
+// CHECK-NEXT:    [[OMP_LOOP_NEXT28]] = add nuw i32 [[OMP_LOOP_IV26]], 1
+// CHECK-NEXT:    br label [[OMP_LOOP_HEADER20]], !llvm.loop [[LOOP10:![0-9]+]]
+// CHECK:       omp_loop.exit24:
+// CHECK-NEXT:    br label [[OMP_LOOP_AFTER25:%.*]]
+// CHECK:       omp_loop.after25:
+// CHECK-NEXT:    ret void
+//
+void simple(float *a, float *b, int *c) {
+  S s, *p;
+  int D[N];
+  for (int i = 0; i <N; ++i)
+    D[i] = i;
+  P pp;
+#pragma omp simd aligned (a:128) aligned(p:64) aligned(D)
+  for (int i = 3; i < N; i += 5) {
+    a[i] = b[i] + s.a + p->a + D[i];
+  }
+
+#pragma omp simd
+  for (int j = 3; j < N; j += 5) {
+    c[j] = pp.a;
+  }
+}
+//.
+// CHECK: attributes #0 = { mustprogress noinline nounwind optnone "frame-pointer"="none" "min-legal-vector-width"="0" "no-trapping-math"="true" "stack-protector-buffer-size"="8" "target-features"="+cx8,+mmx,+sse,+sse2,+x87" }
+// CHECK: attributes #1 = { noinline nounwind optnone "frame-pointer"="none" "min-legal-vector-width"="0" "no-trapping-math"="true" "stack-protector-buffer-size"="8" "target-features"="+cx8,+mmx,+sse,+sse2,+x87" }
+// CHECK: attributes #2 = { inaccessiblememonly nocallback nofree nosync nounwind willreturn }
+//.
+// CHECK: !0 = !{i32 1, !"wchar_size", i32 4}
+// CHECK: !1 = !{i32 7, !"openmp", i32 50}
+// CHECK: !3 = distinct !{!3, !4}
+// CHECK: !4 = !{!"llvm.loop.mustprogress"}
+// CHECK: !5 = distinct !{}
+// CHECK: !6 = distinct !{!6, !7, !8}
+// CHECK: !7 = !{!"llvm.loop.parallel_accesses", !5}
+// CHECK: !8 = !{!"llvm.loop.vectorize.enable", i1 true}
+// CHECK: !9 = distinct !{}
+// CHECK: !10 = distinct !{!10, !11, !8}
+// CHECK: !11 = !{!"llvm.loop.parallel_accesses", !9}
+//.
Index: clang/lib/CodeGen/CGStmtOpenMP.cpp
===================================================================
--- clang/lib/CodeGen/CGStmtOpenMP.cpp
+++ clang/lib/CodeGen/CGStmtOpenMP.cpp
@@ -2602,7 +2602,7 @@
   for (OMPClause *C : S.clauses()) {
     // Currently only order, simdlen and safelen clauses are supported
     if (!(isa<OMPSimdlenClause>(C) || isa<OMPSafelenClause>(C) ||
-          isa<OMPOrderClause>(C)))
+          isa<OMPOrderClause>(C) || isa<OMPAlignedClause>(C)))
       return false;
   }
 
@@ -2628,6 +2628,36 @@
   }
   return true;
 }
+static llvm::DenseMap<llvm::Value *, llvm::Value *>
+GetAlignedMapping(const OMPSimdDirective &S, CodeGenFunction &CGF) {
+  llvm::DenseMap<llvm::Value *, llvm::Value *> AlignedVars;
+  for (const auto *Clause : S.getClausesOfKind<OMPAlignedClause>()) {
+    llvm::APInt ClauseAlignment(64, 0);
+    if (const Expr *AlignmentExpr = Clause->getAlignment()) {
+      auto *AlignmentCI =
+          cast<llvm::ConstantInt>(CGF.EmitScalarExpr(AlignmentExpr));
+      ClauseAlignment = AlignmentCI->getValue();
+    }
+    for (const Expr *E : Clause->varlists()) {
+      llvm::APInt Alignment(ClauseAlignment);
+      if (Alignment == 0) {
+        // OpenMP [2.8.1, Description]
+        // If no optional parameter is specified, implementation-defined default
+        // alignments for SIMD instructions on the target platforms are assumed.
+        Alignment =
+            CGF.getContext()
+                .toCharUnitsFromBits(CGF.getContext().getOpenMPDefaultSimdAlign(
+                    E->getType()->getPointeeType()))
+                .getQuantity();
+      }
+      assert((Alignment == 0 || Alignment.isPowerOf2()) &&
+             "alignment is not power of 2");
+      llvm::Value *PtrValue = CGF.EmitScalarExpr(E);
+      AlignedVars[PtrValue] = CGF.Builder.getInt64(Alignment.getSExtValue());
+    }
+  }
+  return AlignedVars;
+}
 
 void CodeGenFunction::EmitOMPSimdDirective(const OMPSimdDirective &S) {
   bool UseOMPIRBuilder =
@@ -2637,6 +2667,8 @@
                                                           PrePostActionTy &) {
       // Use the OpenMPIRBuilder if enabled.
       if (UseOMPIRBuilder) {
+        llvm::DenseMap<llvm::Value *, llvm::Value *> AlignedVars =
+            GetAlignedMapping(S, CGF);
         // Emit the associated statement and get its loop representation.
         const Stmt *Inner = S.getRawStmt();
         llvm::CanonicalLoopInfo *CLI =
@@ -2669,7 +2701,8 @@
         }
         // Add simd metadata to the collapsed loop. Do not generate
         // another loop for if clause. Support for if clause is done earlier.
-        OMPBuilder.applySimd(CLI, /*IfCond*/ nullptr, Order, Simdlen, Safelen);
+        OMPBuilder.applySimd(CLI, AlignedVars,
+                             /*IfCond*/ nullptr, Order, Simdlen, Safelen);
         return;
       }
     };
_______________________________________________
cfe-commits mailing list
cfe-commits@lists.llvm.org
https://lists.llvm.org/cgi-bin/mailman/listinfo/cfe-commits

Reply via email to