https://github.com/jtb20 updated https://github.com/llvm/llvm-project/pull/148947
>From c30dcc39a2b14eb0aef51e5f8bb34b7338e587d0 Mon Sep 17 00:00:00 2001 From: Julian Brown <julian.br...@amd.com> Date: Thu, 19 Jun 2025 07:47:38 -0500 Subject: [PATCH] [OpenMP] Don't emit redundant zero-sized mapping nodes for overlapped structs The handling of overlapped structure mapping in CGOpenMPRuntime.cpp can lead to redundant zero-sized mapping nodes at runtime. This patch fixes it using a combination of approaches: trivially adjacent struct members won't have a mapping node created between them, and for more complicated cases (inheritance) the physical layout of the struct/class is used to make sure that elements aren't missed. I've introduced a new class to track the state whilst iterating over the struct. This reduces a bit of redundancy in the code (accumulating CombinedInfo both during and after the loop), which I think is a bit neater. Before: omptarget --> Entry 0: Base=0x00007fff8d483830, Begin=0x00007fff8d483830, Size=48, Type=0x20, Name=unknown omptarget --> Entry 1: Base=0x00007fff8d483830, Begin=0x00007fff8d483830, Size=0, Type=0x1000000000003, Name=unknown omptarget --> Entry 2: Base=0x00007fff8d483830, Begin=0x00007fff8d483834, Size=0, Type=0x1000000000003, Name=unknown omptarget --> Entry 3: Base=0x00007fff8d483830, Begin=0x00007fff8d483838, Size=0, Type=0x1000000000003, Name=unknown omptarget --> Entry 4: Base=0x00007fff8d483830, Begin=0x00007fff8d48383c, Size=20, Type=0x1000000000003, Name=unknown omptarget --> Entry 5: Base=0x00007fff8d483830, Begin=0x00007fff8d483854, Size=0, Type=0x1000000000003, Name=unknown omptarget --> Entry 6: Base=0x00007fff8d483830, Begin=0x00007fff8d483858, Size=0, Type=0x1000000000003, Name=unknown omptarget --> Entry 7: Base=0x00007fff8d483830, Begin=0x00007fff8d48385c, Size=4, Type=0x1000000000003, Name=unknown omptarget --> Entry 8: Base=0x00007fff8d483830, Begin=0x00007fff8d483830, Size=4, Type=0x1000000000003, Name=unknown omptarget --> Entry 9: Base=0x00007fff8d483830, Begin=0x00007fff8d483834, Size=4, Type=0x1000000000003, Name=unknown omptarget --> Entry 10: Base=0x00007fff8d483830, Begin=0x00007fff8d483838, Size=4, Type=0x1000000000003, Name=unknown omptarget --> Entry 11: Base=0x00007fff8d483840, Begin=0x00005e7665275130, Size=32, Type=0x1000000000013, Name=unknown omptarget --> Entry 12: Base=0x00007fff8d483830, Begin=0x00007fff8d483850, Size=4, Type=0x1000000000003, Name=unknown omptarget --> Entry 13: Base=0x00007fff8d483830, Begin=0x00007fff8d483854, Size=4, Type=0x1000000000003, Name=unknown omptarget --> Entry 14: Base=0x00007fff8d483830, Begin=0x00007fff8d483858, Size=4, Type=0x1000000000003, Name=unknown After: omptarget --> Entry 0: Base=0x00007fffd0f562e0, Begin=0x00007fffd0f562e0, Size=48, Type=0x20, Name=unknown omptarget --> Entry 1: Base=0x00007fffd0f562e0, Begin=0x00007fffd0f562ec, Size=20, Type=0x1000000000003, Name=unknown omptarget --> Entry 2: Base=0x00007fffd0f562e0, Begin=0x00007fffd0f5630c, Size=4, Type=0x1000000000003, Name=unknown omptarget --> Entry 3: Base=0x00007fffd0f562e0, Begin=0x00007fffd0f562e0, Size=4, Type=0x1000000000003, Name=unknown omptarget --> Entry 4: Base=0x00007fffd0f562e0, Begin=0x00007fffd0f562e4, Size=4, Type=0x1000000000003, Name=unknown omptarget --> Entry 5: Base=0x00007fffd0f562e0, Begin=0x00007fffd0f562e8, Size=4, Type=0x1000000000003, Name=unknown omptarget --> Entry 6: Base=0x00007fffd0f562f0, Begin=0x000058b6013fb130, Size=32, Type=0x1000000000013, Name=unknown omptarget --> Entry 7: Base=0x00007fffd0f562e0, Begin=0x00007fffd0f56300, Size=4, Type=0x1000000000003, Name=unknown omptarget --> Entry 8: Base=0x00007fffd0f562e0, Begin=0x00007fffd0f56304, Size=4, Type=0x1000000000003, Name=unknown omptarget --> Entry 9: Base=0x00007fffd0f562e0, Begin=0x00007fffd0f56308, Size=4, Type=0x1000000000003, Name=unknown For code: #include <cstdlib> #include <cstdio> struct S { int x; int y; int z; int *p1; int *p2; }; struct T : public S { int a; int b; int c; }; int main() { T v; v.p1 = (int*) calloc(8, sizeof(int)); v.p2 = (int*) calloc(8, sizeof(int)); #pragma omp target map(tofrom: v, v.x, v.y, v.z, v.p1[:8], v.a, v.b, v.c) { v.x++; v.y += 2; v.z += 3; v.p1[0] += 4; v.a += 7; v.b += 5; v.c += 6; } return 0; } --- clang/lib/CodeGen/CGOpenMPRuntime.cpp | 157 ++++++++++++++------ clang/test/OpenMP/copy-gaps-1.cpp | 52 +++++++ clang/test/OpenMP/copy-gaps-2.cpp | 52 +++++++ clang/test/OpenMP/copy-gaps-3.cpp | 46 ++++++ clang/test/OpenMP/copy-gaps-4.cpp | 48 ++++++ clang/test/OpenMP/copy-gaps-5.cpp | 50 +++++++ clang/test/OpenMP/copy-gaps-6.cpp | 87 +++++++++++ clang/test/OpenMP/target_map_codegen_35.cpp | 29 +--- 8 files changed, 449 insertions(+), 72 deletions(-) create mode 100644 clang/test/OpenMP/copy-gaps-1.cpp create mode 100644 clang/test/OpenMP/copy-gaps-2.cpp create mode 100644 clang/test/OpenMP/copy-gaps-3.cpp create mode 100644 clang/test/OpenMP/copy-gaps-4.cpp create mode 100644 clang/test/OpenMP/copy-gaps-5.cpp create mode 100644 clang/test/OpenMP/copy-gaps-6.cpp diff --git a/clang/lib/CodeGen/CGOpenMPRuntime.cpp b/clang/lib/CodeGen/CGOpenMPRuntime.cpp index ce2dd4d76368a..f1698a0bec373 100644 --- a/clang/lib/CodeGen/CGOpenMPRuntime.cpp +++ b/clang/lib/CodeGen/CGOpenMPRuntime.cpp @@ -7080,6 +7080,110 @@ class MappableExprsHandler { return ConstLength.getSExtValue() != 1; } + /// A helper class to copy structures with overlapped elements, i.e. those + /// which have mappings of both "s" and "s.mem". Consecutive elements that + /// are not explicitly copied have mapping nodes synthesized for them, + /// taking care to avoid generating zero-sized copies. + class CopyOverlappedEntryGaps { + CodeGenFunction &CGF; + MapCombinedInfoTy &CombinedInfo; + OpenMPOffloadMappingFlags Flags = OpenMPOffloadMappingFlags::OMP_MAP_NONE; + const ValueDecl *MapDecl = nullptr; + const Expr *MapExpr = nullptr; + Address BP = Address::invalid(); + bool IsNonContiguous = false; + uint64_t DimSize = 0; + // These elements track the position as the struct is iterated over + // (in order of increasing element address). + const RecordDecl *LastParent = nullptr; + uint64_t Cursor = 0; + unsigned LastIndex = -1u; + Address LB = Address::invalid(); + + public: + CopyOverlappedEntryGaps(CodeGenFunction &CGF, + MapCombinedInfoTy &CombinedInfo, + OpenMPOffloadMappingFlags Flags, + const ValueDecl *MapDecl, const Expr *MapExpr, + Address BP, Address LB, bool IsNonContiguous, + uint64_t DimSize) + : CGF(CGF), CombinedInfo(CombinedInfo), Flags(Flags), MapDecl(MapDecl), + MapExpr(MapExpr), BP(BP), LB(LB), IsNonContiguous(IsNonContiguous), + DimSize(DimSize) {} + + void processField( + const OMPClauseMappableExprCommon::MappableComponent &MC, + const FieldDecl *FD, + llvm::function_ref<LValue(CodeGenFunction &, const MemberExpr *)> + EmitMemberExprBase) { + const RecordDecl *RD = FD->getParent(); + const ASTRecordLayout &RL = CGF.getContext().getASTRecordLayout(RD); + uint64_t FieldOffset = RL.getFieldOffset(FD->getFieldIndex()); + uint64_t FieldSize = + CGF.getContext().getTypeSize(FD->getType().getCanonicalType()); + Address ComponentLB = Address::invalid(); + + if (FD->getType()->isLValueReferenceType()) { + const auto *ME = cast<MemberExpr>(MC.getAssociatedExpression()); + LValue BaseLVal = EmitMemberExprBase(CGF, ME); + ComponentLB = + CGF.EmitLValueForFieldInitialization(BaseLVal, FD).getAddress(); + } else { + ComponentLB = + CGF.EmitOMPSharedLValue(MC.getAssociatedExpression()).getAddress(); + } + + if (!LastParent) + LastParent = RD; + if (FD->getParent() == LastParent) { + if (FD->getFieldIndex() != LastIndex + 1) + copyUntilField(FD, ComponentLB); + } else { + LastParent = FD->getParent(); + if (((int64_t)FieldOffset - (int64_t)Cursor) > 0) + copyUntilField(FD, ComponentLB); + } + Cursor = FieldOffset + FieldSize; + LastIndex = FD->getFieldIndex(); + LB = CGF.Builder.CreateConstGEP(ComponentLB, 1); + } + + void copyUntilField(const FieldDecl *FD, Address ComponentLB) { + llvm::Value *ComponentLBPtr = ComponentLB.emitRawPointer(CGF); + llvm::Value *LBPtr = LB.emitRawPointer(CGF); + llvm::Value *Size = + CGF.Builder.CreatePtrDiff(CGF.Int8Ty, ComponentLBPtr, LBPtr); + copySizedChunk(LBPtr, Size); + } + + void copyUntilEnd(Address HB) { + if (LastParent) { + const ASTRecordLayout &RL = + CGF.getContext().getASTRecordLayout(LastParent); + if ((uint64_t)CGF.getContext().toBits(RL.getSize()) <= Cursor) + return; + } + llvm::Value *LBPtr = LB.emitRawPointer(CGF); + llvm::Value *Size = CGF.Builder.CreatePtrDiff( + CGF.Int8Ty, CGF.Builder.CreateConstGEP(HB, 1).emitRawPointer(CGF), + LBPtr); + copySizedChunk(LBPtr, Size); + } + + void copySizedChunk(llvm::Value *Base, llvm::Value *Size) { + CombinedInfo.Exprs.emplace_back(MapDecl, MapExpr); + CombinedInfo.BasePointers.push_back(BP.emitRawPointer(CGF)); + CombinedInfo.DevicePtrDecls.push_back(nullptr); + CombinedInfo.DevicePointers.push_back(DeviceInfoTy::None); + CombinedInfo.Pointers.push_back(Base); + CombinedInfo.Sizes.push_back( + CGF.Builder.CreateIntCast(Size, CGF.Int64Ty, /*isSigned=*/true)); + CombinedInfo.Types.push_back(Flags); + CombinedInfo.Mappers.push_back(nullptr); + CombinedInfo.NonContigInfo.Dims.push_back(IsNonContiguous ? DimSize : 1); + } + }; + /// Generate the base pointers, section pointers, sizes, map type bits, and /// user-defined mappers (all included in \a CombinedInfo) for the provided /// map type, map or motion modifiers, and expression components. @@ -7570,63 +7674,22 @@ class MappableExprsHandler { getMapTypeBits(MapType, MapModifiers, MotionModifiers, IsImplicit, /*AddPtrFlag=*/false, /*AddIsTargetParamFlag=*/false, IsNonContiguous); - llvm::Value *Size = nullptr; + CopyOverlappedEntryGaps CopyGaps(CGF, CombinedInfo, Flags, MapDecl, + MapExpr, BP, LB, IsNonContiguous, + DimSize); // Do bitcopy of all non-overlapped structure elements. for (OMPClauseMappableExprCommon::MappableExprComponentListRef Component : OverlappedElements) { - Address ComponentLB = Address::invalid(); for (const OMPClauseMappableExprCommon::MappableComponent &MC : Component) { if (const ValueDecl *VD = MC.getAssociatedDeclaration()) { - const auto *FD = dyn_cast<FieldDecl>(VD); - if (FD && FD->getType()->isLValueReferenceType()) { - const auto *ME = - cast<MemberExpr>(MC.getAssociatedExpression()); - LValue BaseLVal = EmitMemberExprBase(CGF, ME); - ComponentLB = - CGF.EmitLValueForFieldInitialization(BaseLVal, FD) - .getAddress(); - } else { - ComponentLB = - CGF.EmitOMPSharedLValue(MC.getAssociatedExpression()) - .getAddress(); + if (const auto *FD = dyn_cast<FieldDecl>(VD)) { + CopyGaps.processField(MC, FD, EmitMemberExprBase); } - llvm::Value *ComponentLBPtr = ComponentLB.emitRawPointer(CGF); - llvm::Value *LBPtr = LB.emitRawPointer(CGF); - Size = CGF.Builder.CreatePtrDiff(CGF.Int8Ty, ComponentLBPtr, - LBPtr); - break; } } - assert(Size && "Failed to determine structure size"); - CombinedInfo.Exprs.emplace_back(MapDecl, MapExpr); - CombinedInfo.BasePointers.push_back(BP.emitRawPointer(CGF)); - CombinedInfo.DevicePtrDecls.push_back(nullptr); - CombinedInfo.DevicePointers.push_back(DeviceInfoTy::None); - CombinedInfo.Pointers.push_back(LB.emitRawPointer(CGF)); - CombinedInfo.Sizes.push_back(CGF.Builder.CreateIntCast( - Size, CGF.Int64Ty, /*isSigned=*/true)); - CombinedInfo.Types.push_back(Flags); - CombinedInfo.Mappers.push_back(nullptr); - CombinedInfo.NonContigInfo.Dims.push_back(IsNonContiguous ? DimSize - : 1); - LB = CGF.Builder.CreateConstGEP(ComponentLB, 1); } - CombinedInfo.Exprs.emplace_back(MapDecl, MapExpr); - CombinedInfo.BasePointers.push_back(BP.emitRawPointer(CGF)); - CombinedInfo.DevicePtrDecls.push_back(nullptr); - CombinedInfo.DevicePointers.push_back(DeviceInfoTy::None); - CombinedInfo.Pointers.push_back(LB.emitRawPointer(CGF)); - llvm::Value *LBPtr = LB.emitRawPointer(CGF); - Size = CGF.Builder.CreatePtrDiff( - CGF.Int8Ty, CGF.Builder.CreateConstGEP(HB, 1).emitRawPointer(CGF), - LBPtr); - CombinedInfo.Sizes.push_back( - CGF.Builder.CreateIntCast(Size, CGF.Int64Ty, /*isSigned=*/true)); - CombinedInfo.Types.push_back(Flags); - CombinedInfo.Mappers.push_back(nullptr); - CombinedInfo.NonContigInfo.Dims.push_back(IsNonContiguous ? DimSize - : 1); + CopyGaps.copyUntilEnd(HB); break; } llvm::Value *Size = getExprTypeSize(I->getAssociatedExpression()); diff --git a/clang/test/OpenMP/copy-gaps-1.cpp b/clang/test/OpenMP/copy-gaps-1.cpp new file mode 100644 index 0000000000000..3d4fae352eed6 --- /dev/null +++ b/clang/test/OpenMP/copy-gaps-1.cpp @@ -0,0 +1,52 @@ +// RUN: %clang_cc1 -verify -triple x86_64-pc-linux-gnu -fopenmp-targets=amdgcn-amd-amdhsa -fopenmp -emit-llvm %s -o - | FileCheck %s +// expected-no-diagnostics + +struct S { + int x; + int y; + int z; + int *p1; + int *p2; +}; + +struct T : public S { + int a; + int b; + int c; +}; + +int main() { + T v; + +#pragma omp target map(tofrom: v, v.x, v.y, v.z, v.p1[:8], v.a, v.b, v.c) + { + v.x++; + v.y += 2; + v.z += 3; + v.p1[0] += 4; + v.a += 7; + v.b += 5; + v.c += 6; + } + + return 0; +} + +// CHECK: [[CSTSZ:@.+]] = private {{.*}}constant [10 x i64] [i64 0, i64 0, i64 0, i64 4, i64 4, i64 4, i64 32, i64 4, i64 4, i64 4] +// CHECK: [[CSTTY:@.+]] = private {{.*}}constant [10 x i64] [i64 [[#0x20]], i64 [[#0x1000000000003]], i64 [[#0x1000000000003]], i64 [[#0x1000000000003]], i64 [[#0x1000000000003]], i64 [[#0x1000000000003]], i64 [[#0x1000000000013]], i64 [[#0x1000000000003]], i64 [[#0x1000000000003]], i64 [[#0x1000000000003]]] + +// CHECK-DAG: call i32 @__tgt_target_kernel(ptr @{{.+}}, i64 -1, i32 -1, i32 0, ptr @.{{.+}}.region_id, ptr [[ARGS:%.+]]) +// CHECK-DAG: [[KSIZE:%.+]] = getelementptr inbounds {{.+}}[[ARGS]], i32 0, i32 4 +// CHECK-DAG: store ptr [[SZBASE:%.+]], ptr [[KSIZE]], align 8 +// CHECK-DAG: [[SZBASE]] = getelementptr inbounds [10 x i64], ptr [[SIZES:%[^,]*]], i32 0, i32 0 + +// Check for filling of four non-constant size elements here: the whole struct +// size, the (padded) region covering p1 & p2, and the padding at the end of +// struct T. + +// CHECK-DAG: [[STR:%.+]] = getelementptr inbounds [10 x i64], ptr [[SIZES]], i32 0, i32 0 +// CHECK-DAG: store i64 %{{.+}}, ptr [[STR]], align 8 +// CHECK-DAG: [[P1P2:%.+]] = getelementptr inbounds [10 x i64], ptr [[SIZES]], i32 0, i32 1 +// CHECK-DAG: store i64 %{{.+}}, ptr [[P1P2]], align 8 +// CHECK-DAG: [[PAD:%.+]] = getelementptr inbounds [10 x i64], ptr [[SIZES]], i32 0, i32 2 +// CHECK-DAG: store i64 %{{.+}}, ptr [[PAD]], align 8 diff --git a/clang/test/OpenMP/copy-gaps-2.cpp b/clang/test/OpenMP/copy-gaps-2.cpp new file mode 100644 index 0000000000000..5bf603a3d9edb --- /dev/null +++ b/clang/test/OpenMP/copy-gaps-2.cpp @@ -0,0 +1,52 @@ +// RUN: %clang_cc1 -verify -triple x86_64-pc-linux-gnu -fopenmp-targets=amdgcn-amd-amdhsa -fopenmp -emit-llvm %s -o - | FileCheck %s +// expected-no-diagnostics + +struct S { + int x; + int y; + int z; +}; + +struct M : public S { + int mid; +}; + +struct T : public M { + int a; + int b; + int c; +}; + +int main() { + T v; + +#pragma omp target map(tofrom: v, v.y, v.z, v.a) + { + v.y++; + v.z += 2; + v.a += 3; + v.mid += 5; + } + + return 0; +} + +// CHECK: [[CSTSZ:@.+]] = private {{.*}}constant [7 x i64] [i64 0, i64 0, i64 0, i64 0, i64 4, i64 4, i64 4] +// CHECK: [[CSTTY:@.+]] = private {{.*}}constant [7 x i64] [i64 [[#0x20]], i64 [[#0x1000000000003]], i64 [[#0x1000000000003]], i64 [[#0x1000000000003]], i64 [[#0x1000000000003]], i64 [[#0x1000000000003]], i64 [[#0x1000000000003]]] + +// CHECK-DAG: call i32 @__tgt_target_kernel(ptr @{{.+}}, i64 -1, i32 -1, i32 0, ptr @.{{.+}}.region_id, ptr [[ARGS:%.+]]) +// CHECK-DAG: [[KSIZE:%.+]] = getelementptr inbounds {{.+}}[[ARGS]], i32 0, i32 4 +// CHECK-DAG: store ptr [[SZBASE:%.+]], ptr [[KSIZE]], align 8 +// CHECK-DAG: [[SZBASE]] = getelementptr inbounds [7 x i64], ptr [[SIZES:%[^,]*]], i32 0, i32 0 + +// Fill four non-constant size elements here: the whole struct size, the region +// covering v.x, the region covering v.mid and the region covering v.b and v.c. + +// CHECK-DAG: [[STR:%.+]] = getelementptr inbounds [7 x i64], ptr [[SIZES]], i32 0, i32 0 +// CHECK-DAG: store i64 %{{.+}}, ptr [[STR]], align 8 +// CHECK-DAG: [[X:%.+]] = getelementptr inbounds [7 x i64], ptr [[SIZES]], i32 0, i32 1 +// CHECK-DAG: store i64 %{{.+}}, ptr [[X]], align 8 +// CHECK-DAG: [[MID:%.+]] = getelementptr inbounds [7 x i64], ptr [[SIZES]], i32 0, i32 2 +// CHECK-DAG: store i64 %{{.+}}, ptr [[MID]], align 8 +// CHECK-DAG: [[BC:%.+]] = getelementptr inbounds [7 x i64], ptr [[SIZES]], i32 0, i32 3 +// CHECK-DAG: store i64 %{{.+}}, ptr [[BC]], align 8 diff --git a/clang/test/OpenMP/copy-gaps-3.cpp b/clang/test/OpenMP/copy-gaps-3.cpp new file mode 100644 index 0000000000000..5febb181ca1c5 --- /dev/null +++ b/clang/test/OpenMP/copy-gaps-3.cpp @@ -0,0 +1,46 @@ +// RUN: %clang_cc1 -verify -triple x86_64-pc-linux-gnu -fopenmp-targets=amdgcn-amd-amdhsa -fopenmp -emit-llvm %s -o - | FileCheck %s +// expected-no-diagnostics + +struct S { + int x; + int y; + int z; +}; + +struct T : public S { + int a; + int b; + int c; +}; + +int main() { + T v; + + // This one should have no gap between v.z & v.a. +#pragma omp target map(tofrom: v, v.y, v.z, v.a) + { + v.y++; + v.z += 2; + v.a += 3; + } + + return 0; +} + +// CHECK: [[CSTSZ:@.+]] = private {{.*}}constant [6 x i64] [i64 0, i64 0, i64 0, i64 4, i64 4, i64 4] +// CHECK: [[CSTTY:@.+]] = private {{.*}}constant [6 x i64] [i64 [[#0x20]], i64 [[#0x1000000000003]], i64 [[#0x1000000000003]], i64 [[#0x1000000000003]], i64 [[#0x1000000000003]], i64 [[#0x1000000000003]]] + +// CHECK-DAG: call i32 @__tgt_target_kernel(ptr @{{.+}}, i64 -1, i32 -1, i32 0, ptr @.{{.+}}.region_id, ptr [[ARGS:%.+]]) +// CHECK-DAG: [[KSIZE:%.+]] = getelementptr inbounds {{.+}}[[ARGS]], i32 0, i32 4 +// CHECK-DAG: store ptr [[SZBASE:%.+]], ptr [[KSIZE]], align 8 +// CHECK-DAG: [[SZBASE]] = getelementptr inbounds [6 x i64], ptr [[SIZES:%[^,]*]], i32 0, i32 0 + +// Fill three non-constant size elements here: the whole struct size, the region +// covering v.x, and the region covering v.b and v.c. + +// CHECK-DAG: [[STR:%.+]] = getelementptr inbounds [6 x i64], ptr [[SIZES]], i32 0, i32 0 +// CHECK-DAG: store i64 %{{.+}}, ptr [[STR]], align 8 +// CHECK-DAG: [[X:%.+]] = getelementptr inbounds [6 x i64], ptr [[SIZES]], i32 0, i32 1 +// CHECK-DAG: store i64 %{{.+}}, ptr [[X]], align 8 +// CHECK-DAG: [[BC:%.+]] = getelementptr inbounds [6 x i64], ptr [[SIZES]], i32 0, i32 2 +// CHECK-DAG: store i64 %{{.+}}, ptr [[BC]], align 8 diff --git a/clang/test/OpenMP/copy-gaps-4.cpp b/clang/test/OpenMP/copy-gaps-4.cpp new file mode 100644 index 0000000000000..7060fe3ea2a01 --- /dev/null +++ b/clang/test/OpenMP/copy-gaps-4.cpp @@ -0,0 +1,48 @@ +// RUN: %clang_cc1 -verify -triple x86_64-pc-linux-gnu -fopenmp-targets=amdgcn-amd-amdhsa -fopenmp -emit-llvm %s -o - | FileCheck %s +// expected-no-diagnostics + +struct S { + int x; + int y; + char z; // Hidden padding after here... +}; + +struct T : public S { + int a; + int b; + int c; +}; + +int main() { + T v; + +#pragma omp target map(tofrom: v, v.y, v.z, v.a) + { + v.y++; + v.z += 2; + v.a += 3; + } + + return 0; +} + +// CHECK: [[CSTSZ:@.+]] = private {{.*}}constant [7 x i64] [i64 0, i64 0, i64 0, i64 0, i64 4, i64 1, i64 4] +// CHECK: [[CSTTY:@.+]] = private {{.*}}constant [7 x i64] [i64 [[#0x20]], i64 [[#0x1000000000003]], i64 [[#0x1000000000003]], i64 [[#0x1000000000003]], i64 [[#0x1000000000003]], i64 [[#0x1000000000003]], i64 [[#0x1000000000003]]] + +// CHECK-DAG: call i32 @__tgt_target_kernel(ptr @{{.+}}, i64 -1, i32 -1, i32 0, ptr @.{{.+}}.region_id, ptr [[ARGS:%.+]]) +// CHECK-DAG: [[KSIZE:%.+]] = getelementptr inbounds {{.+}}[[ARGS]], i32 0, i32 4 +// CHECK-DAG: store ptr [[SZBASE:%.+]], ptr [[KSIZE]], align 8 +// CHECK-DAG: [[SZBASE]] = getelementptr inbounds [7 x i64], ptr [[SIZES:%[^,]*]], i32 0, i32 0 + +// Fill four non-constant size elements here: the whole struct size, the region +// covering v.x, the region covering padding after v.z and the region covering +// v.b and v.c. + +// CHECK-DAG: [[STR:%.+]] = getelementptr inbounds [7 x i64], ptr [[SIZES]], i32 0, i32 0 +// CHECK-DAG: store i64 %{{.+}}, ptr [[STR]], align 8 +// CHECK-DAG: [[X:%.+]] = getelementptr inbounds [7 x i64], ptr [[SIZES]], i32 0, i32 1 +// CHECK-DAG: store i64 %{{.+}}, ptr [[X]], align 8 +// CHECK-DAG: [[PAD:%.+]] = getelementptr inbounds [7 x i64], ptr [[SIZES]], i32 0, i32 2 +// CHECK-DAG: store i64 %{{.+}}, ptr [[PAD]], align 8 +// CHECK-DAG: [[BC:%.+]] = getelementptr inbounds [7 x i64], ptr [[SIZES]], i32 0, i32 3 +// CHECK-DAG: store i64 %{{.+}}, ptr [[BC]], align 8 diff --git a/clang/test/OpenMP/copy-gaps-5.cpp b/clang/test/OpenMP/copy-gaps-5.cpp new file mode 100644 index 0000000000000..fae675dc2f505 --- /dev/null +++ b/clang/test/OpenMP/copy-gaps-5.cpp @@ -0,0 +1,50 @@ +// RUN: %clang_cc1 -verify -triple x86_64-pc-linux-gnu -fopenmp-targets=amdgcn-amd-amdhsa -fopenmp -emit-llvm %s -o - | FileCheck %s +// expected-no-diagnostics + +template<typename C> +struct S { + C x; + C y; + char z; // Hidden padding after here... +}; + +template<typename C> +struct T : public S<C> { + C a; + C b; + C c; +}; + +int main() { + T<int> v; + +#pragma omp target map(tofrom: v, v.y, v.z, v.a) + { + v.y++; + v.z += 2; + v.a += 3; + } + + return 0; +} + +// CHECK: [[CSTSZ:@.+]] = private {{.*}}constant [7 x i64] [i64 0, i64 0, i64 0, i64 0, i64 4, i64 1, i64 4] +// CHECK: [[CSTTY:@.+]] = private {{.*}}constant [7 x i64] [i64 [[#0x20]], i64 [[#0x1000000000003]], i64 [[#0x1000000000003]], i64 [[#0x1000000000003]], i64 [[#0x1000000000003]], i64 [[#0x1000000000003]], i64 [[#0x1000000000003]]] + +// CHECK-DAG: call i32 @__tgt_target_kernel(ptr @{{.+}}, i64 -1, i32 -1, i32 0, ptr @.{{.+}}.region_id, ptr [[ARGS:%.+]]) +// CHECK-DAG: [[KSIZE:%.+]] = getelementptr inbounds {{.+}}[[ARGS]], i32 0, i32 4 +// CHECK-DAG: store ptr [[SZBASE:%.+]], ptr [[KSIZE]], align 8 +// CHECK-DAG: [[SZBASE]] = getelementptr inbounds [7 x i64], ptr [[SIZES:%[^,]*]], i32 0, i32 0 + +// Fill four non-constant size elements here: the whole struct size, the region +// covering v.x, the region covering padding after v.z and the region covering +// v.b and v.c. + +// CHECK-DAG: [[STR:%.+]] = getelementptr inbounds [7 x i64], ptr [[SIZES]], i32 0, i32 0 +// CHECK-DAG: store i64 %{{.+}}, ptr [[STR]], align 8 +// CHECK-DAG: [[X:%.+]] = getelementptr inbounds [7 x i64], ptr [[SIZES]], i32 0, i32 1 +// CHECK-DAG: store i64 %{{.+}}, ptr [[X]], align 8 +// CHECK-DAG: [[PAD:%.+]] = getelementptr inbounds [7 x i64], ptr [[SIZES]], i32 0, i32 2 +// CHECK-DAG: store i64 %{{.+}}, ptr [[PAD]], align 8 +// CHECK-DAG: [[BC:%.+]] = getelementptr inbounds [7 x i64], ptr [[SIZES]], i32 0, i32 3 +// CHECK-DAG: store i64 %{{.+}}, ptr [[BC]], align 8 diff --git a/clang/test/OpenMP/copy-gaps-6.cpp b/clang/test/OpenMP/copy-gaps-6.cpp new file mode 100644 index 0000000000000..9c62fde1c3762 --- /dev/null +++ b/clang/test/OpenMP/copy-gaps-6.cpp @@ -0,0 +1,87 @@ +// RUN: %clang_cc1 -verify -triple x86_64-pc-linux-gnu -fopenmp-targets=amdgcn-amd-amdhsa -fopenmp -emit-llvm %s -o - | FileCheck %s +// expected-no-diagnostics + +struct S { + int x; + int *arr; + int y; + int z; +}; + +int main() { + S v; + +#pragma omp target map(tofrom: v, v.x, v.z) + { + v.x++; + v.y += 2; + v.z += 3; + } + +#pragma omp target map(tofrom: v, v.x, v.arr[:1]) + { + v.x++; + v.y += 2; + v.arr[0] += 2; + v.z += 4; + } + +#pragma omp target map(tofrom: v, v.arr[:1]) + { + v.x++; + v.y += 2; + v.arr[0] += 2; + v.z += 4; + } + + return 0; +} + +// CHECK: [[CSTSZ0:@.+]] = private {{.*}}constant [4 x i64] [i64 0, i64 0, i64 4, i64 4] +// CHECK: [[CSTTY0:@.+]] = private {{.*}}constant [4 x i64] [i64 [[#0x20]], i64 [[#0x1000000000003]], i64 [[#0x1000000000003]], i64 [[#0x1000000000003]]] + +// CHECK: [[CSTSZ1:@.+]] = private {{.*}}constant [4 x i64] [i64 0, i64 0, i64 4, i64 4] +// CHECK: [[CSTTY1:@.+]] = private {{.*}}constant [4 x i64] [i64 [[#0x20]], i64 [[#0x1000000000003]], i64 [[#0x1000000000003]], i64 [[#0x1000000000013]]] + +// CHECK: [[CSTSZ2:@.+]] = private {{.*}}constant [3 x i64] [i64 0, i64 24, i64 4] +// CHECK: [[CSTTY2:@.+]] = private {{.*}}constant [3 x i64] [i64 [[#0x20]], i64 [[#0x1000000000003]], i64 [[#0x1000000000013]]] + +// CHECK-DAG: call i32 @__tgt_target_kernel(ptr @{{.+}}, i64 -1, i32 -1, i32 0, ptr @.{{.+}}.region_id, ptr [[ARGS:%.+]]) +// CHECK-DAG: [[KSIZE:%.+]] = getelementptr inbounds {{.+}}[[ARGS]], i32 0, i32 4 +// CHECK-DAG: store ptr [[SZBASE:%.+]], ptr [[KSIZE]], align 8 +// CHECK-DAG: [[SZBASE]] = getelementptr inbounds [4 x i64], ptr [[SIZES:%[^,]*]], i32 0, i32 0 + +// Fill two non-constant size elements here: the whole struct size, and the +// region covering v.arr and v.y. + +// CHECK-DAG: [[STR:%.+]] = getelementptr inbounds [4 x i64], ptr [[SIZES]], i32 0, i32 0 +// CHECK-DAG: store i64 %{{.+}}, ptr [[STR]], align 8 +// CHECK-DAG: [[ARRY:%.+]] = getelementptr inbounds [4 x i64], ptr [[SIZES]], i32 0, i32 1 +// CHECK-DAG: store i64 %{{.+}}, ptr [[ARRY]], align 8 + +// CHECK: call void + +// CHECK-DAG: call i32 @__tgt_target_kernel(ptr @{{.+}}, i64 -1, i32 -1, i32 0, ptr @.{{.+}}.region_id, ptr [[ARGS:%.+]]) +// CHECK-DAG: [[KSIZE:%.+]] = getelementptr inbounds {{.+}}[[ARGS]], i32 0, i32 4 +// CHECK-DAG: store ptr [[SZBASE:%.+]], ptr [[KSIZE]], align 8 +// CHECK-DAG: [[SZBASE]] = getelementptr inbounds [4 x i64], ptr [[SIZES:%[^,]*]], i32 0, i32 0 + +// Fill two non-constant size elements here: the whole struct size, and the +// region covering v.arr, v.y and v.z. + +// CHECK-DAG: [[STR:%.+]] = getelementptr inbounds [4 x i64], ptr [[SIZES]], i32 0, i32 0 +// CHECK-DAG: store i64 %{{.+}}, ptr [[STR]], align 8 +// CHECK-DAG: [[ARRYZ:%.+]] = getelementptr inbounds [4 x i64], ptr [[SIZES]], i32 0, i32 1 +// CHECK-DAG: store i64 %{{.+}}, ptr [[ARRYZ]], align 8 + +// CHECK: call void + +// CHECK-DAG: call i32 @__tgt_target_kernel(ptr @{{.+}}, i64 -1, i32 -1, i32 0, ptr @.{{.+}}.region_id, ptr [[ARGS:%.+]]) +// CHECK-DAG: [[KSIZE:%.+]] = getelementptr inbounds {{.+}}[[ARGS]], i32 0, i32 4 +// CHECK-DAG: store ptr [[SZBASE:%.+]], ptr [[KSIZE]], align 8 +// CHECK-DAG: [[SZBASE]] = getelementptr inbounds [3 x i64], ptr [[SIZES:%[^,]*]], i32 0, i32 0 + +// Fill one non-constant size element here: the whole struct size. + +// CHECK-DAG: [[STR:%.+]] = getelementptr inbounds [3 x i64], ptr [[SIZES]], i32 0, i32 0 +// CHECK-DAG: store i64 %{{.+}}, ptr [[STR]], align 8 diff --git a/clang/test/OpenMP/target_map_codegen_35.cpp b/clang/test/OpenMP/target_map_codegen_35.cpp index afa09656f70d1..c4fc49cb6e218 100644 --- a/clang/test/OpenMP/target_map_codegen_35.cpp +++ b/clang/test/OpenMP/target_map_codegen_35.cpp @@ -27,11 +27,11 @@ class S { void foo(); }; -// CK35-DAG: [[SIZE_TO:@.+]] = private {{.*}}constant [4 x i64] [i64 0, i64 0, i64 0, i64 8] +// CK35-DAG: [[SIZE_TO:@.+]] = private {{.*}}constant [3 x i64] [i64 0, i64 0, i64 8] // TARGET_PARAM = 0x20 // MEMBER_OF_1 | TO = 0x1000000000001 // MEMBER_OF_1 | PTR_AND_OBJ | TO = 0x1000000000011 -// CK35-DAG: [[MTYPE_TO:@.+]] = {{.+}}constant [4 x i64] [i64 [[#0x20]], i64 [[#0x1000000000001]], i64 [[#0x1000000000001]], i64 [[#0x1000000000011]]] +// CK35-DAG: [[MTYPE_TO:@.+]] = {{.+}}constant [3 x i64] [i64 [[#0x20]], i64 [[#0x1000000000001]], i64 [[#0x1000000000011]]] // CK35-DAG: [[SIZE_FROM:@.+]] = private {{.*}}constant [2 x i64] [i64 0, i64 8] // TARGET_PARAM = 0x20 // MEMBER_OF_1 | PTR_AND_OBJ | FROM = 0x1000000000012 @@ -86,35 +86,14 @@ void ref_map() { // CK35-DAG: [[B_BEGIN_INTPTR]] = ptrtoint ptr [[B_BEGIN_VOID:%.+]] to i64 // CK35-DAG: [[B_ADDR:%.+]] = getelementptr inbounds nuw %class.S, ptr [[S_ADDR]], i32 0, i32 1 - // pass MEMBER_OF_1 | TO {&s, &s.b+1, ((ptr)(&s+1)-(ptr)(&s.b+1))} to copy the data of remainder of s. + // pass MEMBER_OF_1 | PTR_AND_OBJ | TO {&s, &s.b, 8|4} to copy the data of s.b. // CK35-DAG: [[BP2:%.+]] = getelementptr inbounds {{.+}}[[BP]], i{{.+}} 0, i{{.+}} 2 // CK35-DAG: [[P2:%.+]] = getelementptr inbounds {{.+}}[[P]], i{{.+}} 0, i{{.+}} 2 - // CK35-DAG: [[S2:%.+]] = getelementptr inbounds {{.+}}[[S]], i{{.+}} 0, i{{.+}} 2 // CK35-DAG: store ptr [[S_ADDR]], ptr [[BP2]], - // CK35-DAG: store ptr [[B_END:%.+]], ptr [[P2]], - // CK35-DAG: store i64 [[REM_SIZE:%.+]], ptr [[S2]], - - // CK35-DAG: [[B_END]] = getelementptr ptr, ptr [[B_ADDR]], i{{.+}} 1 - - // CK35-DAG: [[REM_SIZE]] = sdiv exact i64 [[SZ:%.+]], ptrtoint (ptr getelementptr (i8, ptr null, i32 1) to i64) - // CK35-DAG: [[SZ]] = sub i64 [[S_END_INTPTR:%.+]], [[B_END_INTPTR:%.+]] - // CK35-DAG: [[B_END_INTPTR]] = ptrtoint ptr [[B_END_VOID:%.+]] to i64 - // CK35-DAG: [[S_END_INTPTR]] = ptrtoint ptr [[S_END_VOID:%.+]] to i64 - // CK35-DAG: [[S_END_VOID]] = getelementptr i8, ptr [[S_LAST:%.+]], i{{.+}} 1 - // CK35-64-DAG: [[S_LAST]] = getelementptr i8, ptr [[S_VOIDPTR:%.+]], i64 15 - // CK35-32-DAG: [[S_LAST]] = getelementptr i8, ptr [[S_VOIDPTR:%.+]], i32 7 - - // pass MEMBER_OF_1 | PTR_AND_OBJ | TO {&s, &s.b, 8|4} to copy the data of s.b. - - // CK35-DAG: [[BP3:%.+]] = getelementptr inbounds {{.+}}[[BP]], i{{.+}} 0, i{{.+}} 3 - // CK35-DAG: [[P3:%.+]] = getelementptr inbounds {{.+}}[[P]], i{{.+}} 0, i{{.+}} 3 - - - // CK35-DAG: store ptr [[S_ADDR]], ptr [[BP3]], - // CK35-DAG: store ptr [[B_ADDR:%.+]], ptr [[P3]], + // CK35-DAG: store ptr [[B_ADDR:%.+]], ptr [[P2]], // CK35-DAG: [[B_ADDR]] = load ptr, ptr [[B_REF:%.+]], align {{[0-9]+}}, !nonnull !{{[0-9]+}}, !align !{{[0-9]+}} // CK35-DAG: [[B_REF]] = getelementptr inbounds nuw %class.S, ptr [[S_ADDR]], i32 0, i32 1 _______________________________________________ cfe-commits mailing list cfe-commits@lists.llvm.org https://lists.llvm.org/cgi-bin/mailman/listinfo/cfe-commits