https://github.com/doru1004 updated https://github.com/llvm/llvm-project/pull/75642
>From e0e1f5e7bb2f95f2568b5dd647b883f4740bcafd Mon Sep 17 00:00:00 2001 From: Doru Bercea <doru.ber...@amd.com> Date: Fri, 15 Dec 2023 10:22:38 -0500 Subject: [PATCH] Fix mapping of structs to device. --- clang/lib/CodeGen/CGOpenMPRuntime.cpp | 146 +++++++++++---- clang/test/OpenMP/map_struct_ordering.cpp | 172 ++++++++++++++++++ .../struct_mapping_with_pointers.cpp | 114 ++++++++++++ 3 files changed, 399 insertions(+), 33 deletions(-) create mode 100644 clang/test/OpenMP/map_struct_ordering.cpp create mode 100644 openmp/libomptarget/test/offloading/struct_mapping_with_pointers.cpp diff --git a/clang/lib/CodeGen/CGOpenMPRuntime.cpp b/clang/lib/CodeGen/CGOpenMPRuntime.cpp index 7f7e6f53066644..350e7108b8d5a7 100644 --- a/clang/lib/CodeGen/CGOpenMPRuntime.cpp +++ b/clang/lib/CodeGen/CGOpenMPRuntime.cpp @@ -6811,8 +6811,10 @@ class MappableExprsHandler { OpenMPMapClauseKind MapType, ArrayRef<OpenMPMapModifierKind> MapModifiers, ArrayRef<OpenMPMotionModifierKind> MotionModifiers, OMPClauseMappableExprCommon::MappableExprComponentListRef Components, - MapCombinedInfoTy &CombinedInfo, StructRangeInfoTy &PartialStruct, - bool IsFirstComponentList, bool IsImplicit, + MapCombinedInfoTy &CombinedInfo, + MapCombinedInfoTy &StructBaseCombinedInfo, + StructRangeInfoTy &PartialStruct, bool IsFirstComponentList, + bool IsImplicit, bool GenerateAllInfoForClauses, const ValueDecl *Mapper = nullptr, bool ForDeviceAddr = false, const ValueDecl *BaseDecl = nullptr, const Expr *MapExpr = nullptr, ArrayRef<OMPClauseMappableExprCommon::MappableExprComponentListRef> @@ -7098,6 +7100,25 @@ class MappableExprsHandler { bool IsNonContiguous = CombinedInfo.NonContigInfo.IsNonContiguous; bool IsPrevMemberReference = false; + // We need to check if we will be encountering any MEs. If we do not + // encounter any ME expression it means we will be mapping the whole struct. + // In that case we need to skip adding an entry for the struct to the + // CombinedInfo list and instead add an entry to the StructBaseCombinedInfo + // list only when generating all info for clauses. + bool IsMappingWholeStruct = true; + if (!GenerateAllInfoForClauses) { + IsMappingWholeStruct = false; + } else { + for (auto TempI = I; TempI != CE; ++TempI) { + const MemberExpr *PossibleME = + dyn_cast<MemberExpr>(TempI->getAssociatedExpression()); + if (PossibleME) { + IsMappingWholeStruct = false; + break; + } + } + } + for (; I != CE; ++I) { // If the current component is member of a struct (parent struct) mark it. if (!EncounteredME) { @@ -7317,21 +7338,41 @@ class MappableExprsHandler { break; } llvm::Value *Size = getExprTypeSize(I->getAssociatedExpression()); + // Skip adding an entry in the CurInfo of this combined entry if the + // whole struct is currently being mapped. The struct needs to be added + // in the first position before any data internal to the struct is being + // mapped. if (!IsMemberPointerOrAddr || (Next == CE && MapType != OMPC_MAP_unknown)) { - CombinedInfo.Exprs.emplace_back(MapDecl, MapExpr); - CombinedInfo.BasePointers.push_back(BP.getPointer()); - CombinedInfo.DevicePtrDecls.push_back(nullptr); - CombinedInfo.DevicePointers.push_back(DeviceInfoTy::None); - CombinedInfo.Pointers.push_back(LB.getPointer()); - CombinedInfo.Sizes.push_back( - CGF.Builder.CreateIntCast(Size, CGF.Int64Ty, /*isSigned=*/true)); - CombinedInfo.NonContigInfo.Dims.push_back(IsNonContiguous ? DimSize - : 1); + if (!IsMappingWholeStruct) { + CombinedInfo.Exprs.emplace_back(MapDecl, MapExpr); + CombinedInfo.BasePointers.push_back(BP.getPointer()); + CombinedInfo.DevicePtrDecls.push_back(nullptr); + CombinedInfo.DevicePointers.push_back(DeviceInfoTy::None); + CombinedInfo.Pointers.push_back(LB.getPointer()); + CombinedInfo.Sizes.push_back(CGF.Builder.CreateIntCast( + Size, CGF.Int64Ty, /*isSigned=*/true)); + CombinedInfo.NonContigInfo.Dims.push_back(IsNonContiguous ? DimSize + : 1); + } else { + StructBaseCombinedInfo.Exprs.emplace_back(MapDecl, MapExpr); + StructBaseCombinedInfo.BasePointers.push_back(BP.getPointer()); + StructBaseCombinedInfo.DevicePtrDecls.push_back(nullptr); + StructBaseCombinedInfo.DevicePointers.push_back(DeviceInfoTy::None); + StructBaseCombinedInfo.Pointers.push_back(LB.getPointer()); + StructBaseCombinedInfo.Sizes.push_back(CGF.Builder.CreateIntCast( + Size, CGF.Int64Ty, /*isSigned=*/true)); + StructBaseCombinedInfo.NonContigInfo.Dims.push_back( + IsNonContiguous ? DimSize : 1); + } // If Mapper is valid, the last component inherits the mapper. bool HasMapper = Mapper && Next == CE; - CombinedInfo.Mappers.push_back(HasMapper ? Mapper : nullptr); + if (!IsMappingWholeStruct) + CombinedInfo.Mappers.push_back(HasMapper ? Mapper : nullptr); + else + StructBaseCombinedInfo.Mappers.push_back(HasMapper ? Mapper + : nullptr); // We need to add a pointer flag for each map that comes from the // same expression except for the first one. We also need to signal @@ -7363,7 +7404,10 @@ class MappableExprsHandler { } } - CombinedInfo.Types.push_back(Flags); + if (!IsMappingWholeStruct) + CombinedInfo.Types.push_back(Flags); + else + StructBaseCombinedInfo.Types.push_back(Flags); } // If we have encountered a member expression so far, keep track of the @@ -7954,8 +7998,10 @@ class MappableExprsHandler { for (const auto &Data : Info) { StructRangeInfoTy PartialStruct; - // Temporary generated information. + // Current struct information: MapCombinedInfoTy CurInfo; + // Current struct base information: + MapCombinedInfoTy StructBaseCurInfo; const Decl *D = Data.first; const ValueDecl *VD = cast_or_null<ValueDecl>(D); for (const auto &M : Data.second) { @@ -7965,29 +8011,53 @@ class MappableExprsHandler { // Remember the current base pointer index. unsigned CurrentBasePointersIdx = CurInfo.BasePointers.size(); + unsigned StructBasePointersIdx = + StructBaseCurInfo.BasePointers.size(); CurInfo.NonContigInfo.IsNonContiguous = L.Components.back().isNonContiguous(); generateInfoForComponentList( L.MapType, L.MapModifiers, L.MotionModifiers, L.Components, - CurInfo, PartialStruct, /*IsFirstComponentList=*/false, - L.IsImplicit, L.Mapper, L.ForDeviceAddr, VD, L.VarRef); + CurInfo, StructBaseCurInfo, PartialStruct, + /*IsFirstComponentList=*/false, L.IsImplicit, + /*GenerateAllInfoForClauses*/ true, L.Mapper, L.ForDeviceAddr, VD, + L.VarRef); - // If this entry relates with a device pointer, set the relevant + // If this entry relates to a device pointer, set the relevant // declaration and add the 'return pointer' flag. if (L.ReturnDevicePointer) { - assert(CurInfo.BasePointers.size() > CurrentBasePointersIdx && + // Check whether a value was added to either CurInfo or + // StructBaseCurInfo and error if no value was added to either of + // them: + assert((CurrentBasePointersIdx < CurInfo.BasePointers.size() || + StructBasePointersIdx < + StructBaseCurInfo.BasePointers.size()) && "Unexpected number of mapped base pointers."); + // Choose a base pointer index which is always valid: const ValueDecl *RelevantVD = L.Components.back().getAssociatedDeclaration(); assert(RelevantVD && "No relevant declaration related with device pointer??"); - CurInfo.DevicePtrDecls[CurrentBasePointersIdx] = RelevantVD; - CurInfo.DevicePointers[CurrentBasePointersIdx] = - L.ForDeviceAddr ? DeviceInfoTy::Address : DeviceInfoTy::Pointer; - CurInfo.Types[CurrentBasePointersIdx] |= - OpenMPOffloadMappingFlags::OMP_MAP_RETURN_PARAM; + // If StructBaseCurInfo has been updated this iteration then work on the + // first new entry added to it i.e. make sure that when multiple values are added to any of the lists, the + // first value added is being modified by the assignments below (not the last value added). + if (StructBasePointersIdx < StructBaseCurInfo.BasePointers.size()) { + StructBaseCurInfo.DevicePtrDecls[StructBasePointersIdx] = + RelevantVD; + StructBaseCurInfo.DevicePointers[StructBasePointersIdx] = + L.ForDeviceAddr ? DeviceInfoTy::Address + : DeviceInfoTy::Pointer; + StructBaseCurInfo.Types[StructBasePointersIdx] |= + OpenMPOffloadMappingFlags::OMP_MAP_RETURN_PARAM; + } else { + CurInfo.DevicePtrDecls[CurrentBasePointersIdx] = RelevantVD; + CurInfo.DevicePointers[CurrentBasePointersIdx] = + L.ForDeviceAddr ? DeviceInfoTy::Address + : DeviceInfoTy::Pointer; + CurInfo.Types[CurrentBasePointersIdx] |= + OpenMPOffloadMappingFlags::OMP_MAP_RETURN_PARAM; + } } } } @@ -8034,17 +8104,24 @@ class MappableExprsHandler { CurInfo.Mappers.push_back(nullptr); } } + + // Unify entries in one list making sure the struct mapping precedes the + // individual fields: + MapCombinedInfoTy UnionCurInfo; + UnionCurInfo.append(StructBaseCurInfo); + UnionCurInfo.append(CurInfo); + // If there is an entry in PartialStruct it means we have a struct with // individual members mapped. Emit an extra combined entry. if (PartialStruct.Base.isValid()) { - CurInfo.NonContigInfo.Dims.push_back(0); - emitCombinedEntry(CombinedInfo, CurInfo.Types, PartialStruct, + UnionCurInfo.NonContigInfo.Dims.push_back(0); + // Emit a combined entry: + emitCombinedEntry(CombinedInfo, UnionCurInfo.Types, PartialStruct, /*IsMapThis*/ !VD, OMPBuilder, VD); } - // We need to append the results of this capture to what we already - // have. - CombinedInfo.append(CurInfo); + // We need to append the results of this capture to what we already have. + CombinedInfo.append(UnionCurInfo); } // Append data for use_device_ptr clauses. CombinedInfo.append(UseDeviceDataCombinedInfo); @@ -8554,6 +8631,7 @@ class MappableExprsHandler { // Associated with a capture, because the mapping flags depend on it. // Go through all of the elements with the overlapped elements. bool IsFirstComponentList = true; + MapCombinedInfoTy StructBaseCombinedInfo; for (const auto &Pair : OverlappedData) { const MapData &L = *Pair.getFirst(); OMPClauseMappableExprCommon::MappableExprComponentListRef Components; @@ -8568,7 +8646,8 @@ class MappableExprsHandler { OverlappedComponents = Pair.getSecond(); generateInfoForComponentList( MapType, MapModifiers, std::nullopt, Components, CombinedInfo, - PartialStruct, IsFirstComponentList, IsImplicit, Mapper, + StructBaseCombinedInfo, PartialStruct, IsFirstComponentList, + IsImplicit, /*GenerateAllInfoForClauses*/ false, Mapper, /*ForDeviceAddr=*/false, VD, VarRef, OverlappedComponents); IsFirstComponentList = false; } @@ -8584,10 +8663,11 @@ class MappableExprsHandler { L; auto It = OverlappedData.find(&L); if (It == OverlappedData.end()) - generateInfoForComponentList(MapType, MapModifiers, std::nullopt, - Components, CombinedInfo, PartialStruct, - IsFirstComponentList, IsImplicit, Mapper, - /*ForDeviceAddr=*/false, VD, VarRef); + generateInfoForComponentList( + MapType, MapModifiers, std::nullopt, Components, CombinedInfo, + StructBaseCombinedInfo, PartialStruct, IsFirstComponentList, + IsImplicit, /*GenerateAllInfoForClauses*/ false, Mapper, + /*ForDeviceAddr=*/false, VD, VarRef); IsFirstComponentList = false; } } diff --git a/clang/test/OpenMP/map_struct_ordering.cpp b/clang/test/OpenMP/map_struct_ordering.cpp new file mode 100644 index 00000000000000..035b39b5b12ab4 --- /dev/null +++ b/clang/test/OpenMP/map_struct_ordering.cpp @@ -0,0 +1,172 @@ +// NOTE: Assertions have been autogenerated by utils/update_cc_test_checks.py UTC_ARGS: --include-generated-funcs --replace-value-regex "__omp_offloading_[0-9a-z]+_[0-9a-z]+" --prefix-filecheck-ir-name _ --version 4 + +// RUN: %clang_cc1 -verify -fopenmp -x c++ -std=c++11 -triple powerpc64le-unknown-unknown -fopenmp-targets=powerpc64le-ibm-linux-gnu -emit-llvm %s -o - -Wno-openmp-mapping | FileCheck %s --check-prefix=CHECK + +// expected-no-diagnostics +#ifndef HEADER +#define HEADER + +struct Descriptor { + int *datum; + long int x; + int xi; + long int arr[1][30]; +}; + +int map_struct() { + Descriptor dat = Descriptor(); + dat.xi = 3; + dat.arr[0][0] = 1; + + #pragma omp target enter data map(to: dat.datum[:10]) map(to: dat) + + #pragma omp target + { + dat.xi = 4; + dat.datum[dat.arr[0][0]] = dat.xi; + } + + #pragma omp target exit data map(from: dat) + + return dat.xi; +} + +#endif +// CHECK-LABEL: define dso_local noundef signext i32 @_Z10map_structv( +// CHECK-SAME: ) #[[ATTR0:[0-9]+]] { +// CHECK-NEXT: entry: +// CHECK-NEXT: [[DAT:%.*]] = alloca [[STRUCT_DESCRIPTOR:%.*]], align 8 +// CHECK-NEXT: [[DOTOFFLOAD_BASEPTRS:%.*]] = alloca [3 x ptr], align 8 +// CHECK-NEXT: [[DOTOFFLOAD_PTRS:%.*]] = alloca [3 x ptr], align 8 +// CHECK-NEXT: [[DOTOFFLOAD_MAPPERS:%.*]] = alloca [3 x ptr], align 8 +// CHECK-NEXT: [[DOTOFFLOAD_SIZES:%.*]] = alloca [3 x i64], align 8 +// CHECK-NEXT: [[DOTOFFLOAD_BASEPTRS4:%.*]] = alloca [1 x ptr], align 8 +// CHECK-NEXT: [[DOTOFFLOAD_PTRS5:%.*]] = alloca [1 x ptr], align 8 +// CHECK-NEXT: [[DOTOFFLOAD_MAPPERS6:%.*]] = alloca [1 x ptr], align 8 +// CHECK-NEXT: [[KERNEL_ARGS:%.*]] = alloca [[STRUCT___TGT_KERNEL_ARGUMENTS:%.*]], align 8 +// CHECK-NEXT: [[DOTOFFLOAD_BASEPTRS7:%.*]] = alloca [1 x ptr], align 8 +// CHECK-NEXT: [[DOTOFFLOAD_PTRS8:%.*]] = alloca [1 x ptr], align 8 +// CHECK-NEXT: [[DOTOFFLOAD_MAPPERS9:%.*]] = alloca [1 x ptr], align 8 +// CHECK-NEXT: call void @llvm.memset.p0.i64(ptr align 8 [[DAT]], i8 0, i64 264, i1 false) +// CHECK-NEXT: [[XI:%.*]] = getelementptr inbounds [[STRUCT_DESCRIPTOR]], ptr [[DAT]], i32 0, i32 2 +// CHECK-NEXT: store i32 3, ptr [[XI]], align 8 +// CHECK-NEXT: [[ARR:%.*]] = getelementptr inbounds [[STRUCT_DESCRIPTOR]], ptr [[DAT]], i32 0, i32 3 +// CHECK-NEXT: [[ARRAYIDX:%.*]] = getelementptr inbounds [1 x [30 x i64]], ptr [[ARR]], i64 0, i64 0 +// CHECK-NEXT: [[ARRAYIDX1:%.*]] = getelementptr inbounds [30 x i64], ptr [[ARRAYIDX]], i64 0, i64 0 +// CHECK-NEXT: store i64 1, ptr [[ARRAYIDX1]], align 8 +// CHECK-NEXT: [[DATUM:%.*]] = getelementptr inbounds [[STRUCT_DESCRIPTOR]], ptr [[DAT]], i32 0, i32 0 +// CHECK-NEXT: [[DATUM2:%.*]] = getelementptr inbounds [[STRUCT_DESCRIPTOR]], ptr [[DAT]], i32 0, i32 0 +// CHECK-NEXT: [[TMP0:%.*]] = load ptr, ptr [[DATUM2]], align 8 +// CHECK-NEXT: [[ARRAYIDX3:%.*]] = getelementptr inbounds i32, ptr [[TMP0]], i64 0 +// CHECK-NEXT: [[TMP1:%.*]] = getelementptr [[STRUCT_DESCRIPTOR]], ptr [[DAT]], i32 1 +// CHECK-NEXT: [[TMP2:%.*]] = ptrtoint ptr [[TMP1]] to i64 +// CHECK-NEXT: [[TMP3:%.*]] = ptrtoint ptr [[DAT]] to i64 +// CHECK-NEXT: [[TMP4:%.*]] = sub i64 [[TMP2]], [[TMP3]] +// CHECK-NEXT: [[TMP5:%.*]] = sdiv exact i64 [[TMP4]], ptrtoint (ptr getelementptr (i8, ptr null, i32 1) to i64) +// CHECK-NEXT: call void @llvm.memcpy.p0.p0.i64(ptr align 8 [[DOTOFFLOAD_SIZES]], ptr align 8 @.offload_sizes, i64 24, i1 false) +// CHECK-NEXT: [[TMP6:%.*]] = getelementptr inbounds [3 x ptr], ptr [[DOTOFFLOAD_BASEPTRS]], i32 0, i32 0 +// CHECK-NEXT: store ptr [[DAT]], ptr [[TMP6]], align 8 +// CHECK-NEXT: [[TMP7:%.*]] = getelementptr inbounds [3 x ptr], ptr [[DOTOFFLOAD_PTRS]], i32 0, i32 0 +// CHECK-NEXT: store ptr [[DAT]], ptr [[TMP7]], align 8 +// CHECK-NEXT: [[TMP8:%.*]] = getelementptr inbounds [3 x i64], ptr [[DOTOFFLOAD_SIZES]], i32 0, i32 0 +// CHECK-NEXT: store i64 [[TMP5]], ptr [[TMP8]], align 8 +// CHECK-NEXT: [[TMP9:%.*]] = getelementptr inbounds [3 x ptr], ptr [[DOTOFFLOAD_MAPPERS]], i64 0, i64 0 +// CHECK-NEXT: store ptr null, ptr [[TMP9]], align 8 +// CHECK-NEXT: [[TMP10:%.*]] = getelementptr inbounds [3 x ptr], ptr [[DOTOFFLOAD_BASEPTRS]], i32 0, i32 1 +// CHECK-NEXT: store ptr [[DAT]], ptr [[TMP10]], align 8 +// CHECK-NEXT: [[TMP11:%.*]] = getelementptr inbounds [3 x ptr], ptr [[DOTOFFLOAD_PTRS]], i32 0, i32 1 +// CHECK-NEXT: store ptr [[DAT]], ptr [[TMP11]], align 8 +// CHECK-NEXT: [[TMP12:%.*]] = getelementptr inbounds [3 x ptr], ptr [[DOTOFFLOAD_MAPPERS]], i64 0, i64 1 +// CHECK-NEXT: store ptr null, ptr [[TMP12]], align 8 +// CHECK-NEXT: [[TMP13:%.*]] = getelementptr inbounds [3 x ptr], ptr [[DOTOFFLOAD_BASEPTRS]], i32 0, i32 2 +// CHECK-NEXT: store ptr [[DATUM]], ptr [[TMP13]], align 8 +// CHECK-NEXT: [[TMP14:%.*]] = getelementptr inbounds [3 x ptr], ptr [[DOTOFFLOAD_PTRS]], i32 0, i32 2 +// CHECK-NEXT: store ptr [[ARRAYIDX3]], ptr [[TMP14]], align 8 +// CHECK-NEXT: [[TMP15:%.*]] = getelementptr inbounds [3 x ptr], ptr [[DOTOFFLOAD_MAPPERS]], i64 0, i64 2 +// CHECK-NEXT: store ptr null, ptr [[TMP15]], align 8 +// CHECK-NEXT: [[TMP16:%.*]] = getelementptr inbounds [3 x ptr], ptr [[DOTOFFLOAD_BASEPTRS]], i32 0, i32 0 +// CHECK-NEXT: [[TMP17:%.*]] = getelementptr inbounds [3 x ptr], ptr [[DOTOFFLOAD_PTRS]], i32 0, i32 0 +// CHECK-NEXT: [[TMP18:%.*]] = getelementptr inbounds [3 x i64], ptr [[DOTOFFLOAD_SIZES]], i32 0, i32 0 +// CHECK-NEXT: call void @__tgt_target_data_begin_mapper(ptr @[[GLOB1:[0-9]+]], i64 -1, i32 3, ptr [[TMP16]], ptr [[TMP17]], ptr [[TMP18]], ptr @.offload_maptypes, ptr null, ptr null) +// CHECK-NEXT: [[TMP19:%.*]] = getelementptr inbounds [1 x ptr], ptr [[DOTOFFLOAD_BASEPTRS4]], i32 0, i32 0 +// CHECK-NEXT: store ptr [[DAT]], ptr [[TMP19]], align 8 +// CHECK-NEXT: [[TMP20:%.*]] = getelementptr inbounds [1 x ptr], ptr [[DOTOFFLOAD_PTRS5]], i32 0, i32 0 +// CHECK-NEXT: store ptr [[DAT]], ptr [[TMP20]], align 8 +// CHECK-NEXT: [[TMP21:%.*]] = getelementptr inbounds [1 x ptr], ptr [[DOTOFFLOAD_MAPPERS6]], i64 0, i64 0 +// CHECK-NEXT: store ptr null, ptr [[TMP21]], align 8 +// CHECK-NEXT: [[TMP22:%.*]] = getelementptr inbounds [1 x ptr], ptr [[DOTOFFLOAD_BASEPTRS4]], i32 0, i32 0 +// CHECK-NEXT: [[TMP23:%.*]] = getelementptr inbounds [1 x ptr], ptr [[DOTOFFLOAD_PTRS5]], i32 0, i32 0 +// CHECK-NEXT: [[TMP24:%.*]] = getelementptr inbounds [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS]], i32 0, i32 0 +// CHECK-NEXT: store i32 2, ptr [[TMP24]], align 4 +// CHECK-NEXT: [[TMP25:%.*]] = getelementptr inbounds [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS]], i32 0, i32 1 +// CHECK-NEXT: store i32 1, ptr [[TMP25]], align 4 +// CHECK-NEXT: [[TMP26:%.*]] = getelementptr inbounds [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS]], i32 0, i32 2 +// CHECK-NEXT: store ptr [[TMP22]], ptr [[TMP26]], align 8 +// CHECK-NEXT: [[TMP27:%.*]] = getelementptr inbounds [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS]], i32 0, i32 3 +// CHECK-NEXT: store ptr [[TMP23]], ptr [[TMP27]], align 8 +// CHECK-NEXT: [[TMP28:%.*]] = getelementptr inbounds [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS]], i32 0, i32 4 +// CHECK-NEXT: store ptr @.offload_sizes.1, ptr [[TMP28]], align 8 +// CHECK-NEXT: [[TMP29:%.*]] = getelementptr inbounds [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS]], i32 0, i32 5 +// CHECK-NEXT: store ptr @.offload_maptypes.2, ptr [[TMP29]], align 8 +// CHECK-NEXT: [[TMP30:%.*]] = getelementptr inbounds [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS]], i32 0, i32 6 +// CHECK-NEXT: store ptr null, ptr [[TMP30]], align 8 +// CHECK-NEXT: [[TMP31:%.*]] = getelementptr inbounds [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS]], i32 0, i32 7 +// CHECK-NEXT: store ptr null, ptr [[TMP31]], align 8 +// CHECK-NEXT: [[TMP32:%.*]] = getelementptr inbounds [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS]], i32 0, i32 8 +// CHECK-NEXT: store i64 0, ptr [[TMP32]], align 8 +// CHECK-NEXT: [[TMP33:%.*]] = getelementptr inbounds [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS]], i32 0, i32 9 +// CHECK-NEXT: store i64 0, ptr [[TMP33]], align 8 +// CHECK-NEXT: [[TMP34:%.*]] = getelementptr inbounds [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS]], i32 0, i32 10 +// CHECK-NEXT: store [3 x i32] [i32 -1, i32 0, i32 0], ptr [[TMP34]], align 4 +// CHECK-NEXT: [[TMP35:%.*]] = getelementptr inbounds [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS]], i32 0, i32 11 +// CHECK-NEXT: store [3 x i32] zeroinitializer, ptr [[TMP35]], align 4 +// CHECK-NEXT: [[TMP36:%.*]] = getelementptr inbounds [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS]], i32 0, i32 12 +// CHECK-NEXT: store i32 0, ptr [[TMP36]], align 4 +// CHECK-NEXT: [[TMP37:%.*]] = call i32 @__tgt_target_kernel(ptr @[[GLOB1]], i64 -1, i32 -1, i32 0, ptr @.{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}__Z10map_structv_l23.region_id, ptr [[KERNEL_ARGS]]) +// CHECK-NEXT: [[TMP38:%.*]] = icmp ne i32 [[TMP37]], 0 +// CHECK-NEXT: br i1 [[TMP38]], label [[OMP_OFFLOAD_FAILED:%.*]], label [[OMP_OFFLOAD_CONT:%.*]] +// CHECK: omp_offload.failed: +// CHECK-NEXT: call void @{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}__Z10map_structv_l23(ptr [[DAT]]) #[[ATTR3:[0-9]+]] +// CHECK-NEXT: br label [[OMP_OFFLOAD_CONT]] +// CHECK: omp_offload.cont: +// CHECK-NEXT: [[TMP39:%.*]] = getelementptr inbounds [1 x ptr], ptr [[DOTOFFLOAD_BASEPTRS7]], i32 0, i32 0 +// CHECK-NEXT: store ptr [[DAT]], ptr [[TMP39]], align 8 +// CHECK-NEXT: [[TMP40:%.*]] = getelementptr inbounds [1 x ptr], ptr [[DOTOFFLOAD_PTRS8]], i32 0, i32 0 +// CHECK-NEXT: store ptr [[DAT]], ptr [[TMP40]], align 8 +// CHECK-NEXT: [[TMP41:%.*]] = getelementptr inbounds [1 x ptr], ptr [[DOTOFFLOAD_MAPPERS9]], i64 0, i64 0 +// CHECK-NEXT: store ptr null, ptr [[TMP41]], align 8 +// CHECK-NEXT: [[TMP42:%.*]] = getelementptr inbounds [1 x ptr], ptr [[DOTOFFLOAD_BASEPTRS7]], i32 0, i32 0 +// CHECK-NEXT: [[TMP43:%.*]] = getelementptr inbounds [1 x ptr], ptr [[DOTOFFLOAD_PTRS8]], i32 0, i32 0 +// CHECK-NEXT: call void @__tgt_target_data_end_mapper(ptr @[[GLOB1]], i64 -1, i32 1, ptr [[TMP42]], ptr [[TMP43]], ptr @.offload_sizes.3, ptr @.offload_maptypes.4, ptr null, ptr null) +// CHECK-NEXT: [[XI10:%.*]] = getelementptr inbounds [[STRUCT_DESCRIPTOR]], ptr [[DAT]], i32 0, i32 2 +// CHECK-NEXT: [[TMP44:%.*]] = load i32, ptr [[XI10]], align 8 +// CHECK-NEXT: ret i32 [[TMP44]] +// +// +// CHECK-LABEL: define internal void @{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}__Z10map_structv_l23( +// CHECK-SAME: ptr noundef nonnull align 8 dereferenceable(264) [[DAT:%.*]]) #[[ATTR4:[0-9]+]] { +// CHECK-NEXT: entry: +// CHECK-NEXT: [[DAT_ADDR:%.*]] = alloca ptr, align 8 +// CHECK-NEXT: store ptr [[DAT]], ptr [[DAT_ADDR]], align 8 +// CHECK-NEXT: [[TMP0:%.*]] = load ptr, ptr [[DAT_ADDR]], align 8 +// CHECK-NEXT: [[XI:%.*]] = getelementptr inbounds [[STRUCT_DESCRIPTOR:%.*]], ptr [[TMP0]], i32 0, i32 2 +// CHECK-NEXT: store i32 4, ptr [[XI]], align 8 +// CHECK-NEXT: [[XI1:%.*]] = getelementptr inbounds [[STRUCT_DESCRIPTOR]], ptr [[TMP0]], i32 0, i32 2 +// CHECK-NEXT: [[TMP1:%.*]] = load i32, ptr [[XI1]], align 8 +// CHECK-NEXT: [[DATUM:%.*]] = getelementptr inbounds [[STRUCT_DESCRIPTOR]], ptr [[TMP0]], i32 0, i32 0 +// CHECK-NEXT: [[TMP2:%.*]] = load ptr, ptr [[DATUM]], align 8 +// CHECK-NEXT: [[ARR:%.*]] = getelementptr inbounds [[STRUCT_DESCRIPTOR]], ptr [[TMP0]], i32 0, i32 3 +// CHECK-NEXT: [[ARRAYIDX:%.*]] = getelementptr inbounds [1 x [30 x i64]], ptr [[ARR]], i64 0, i64 0 +// CHECK-NEXT: [[ARRAYIDX2:%.*]] = getelementptr inbounds [30 x i64], ptr [[ARRAYIDX]], i64 0, i64 0 +// CHECK-NEXT: [[TMP3:%.*]] = load i64, ptr [[ARRAYIDX2]], align 8 +// CHECK-NEXT: [[ARRAYIDX3:%.*]] = getelementptr inbounds i32, ptr [[TMP2]], i64 [[TMP3]] +// CHECK-NEXT: store i32 [[TMP1]], ptr [[ARRAYIDX3]], align 4 +// CHECK-NEXT: ret void +// +// +// CHECK-LABEL: define internal void @.omp_offloading.requires_reg( +// CHECK-SAME: ) #[[ATTR5:[0-9]+]] { +// CHECK-NEXT: entry: +// CHECK-NEXT: call void @__tgt_register_requires(i64 1) +// CHECK-NEXT: ret void +// diff --git a/openmp/libomptarget/test/offloading/struct_mapping_with_pointers.cpp b/openmp/libomptarget/test/offloading/struct_mapping_with_pointers.cpp new file mode 100644 index 00000000000000..cecafe4c584168 --- /dev/null +++ b/openmp/libomptarget/test/offloading/struct_mapping_with_pointers.cpp @@ -0,0 +1,114 @@ +// clang-format off +// RUN: %libomptarget-compilexx-generic && env LIBOMPTARGET_DEBUG=1 %libomptarget-run-generic 2>&1 | %fcheck-generic +// clang-format on + +#include <stdio.h> +#include <stdlib.h> + +struct Descriptor { + int *datum; + long int x; + int *more_datum; + int xi; + int val_datum, val_more_datum; + long int arr[1][30]; + int val_arr; +}; + +int main() { + Descriptor dat = Descriptor(); + dat.datum = (int *)malloc(sizeof(int) * 10); + dat.more_datum = (int *)malloc(sizeof(int) * 20); + dat.xi = 3; + dat.arr[0][0] = 1; + + dat.datum[7] = 7; + dat.more_datum[17] = 17; + + /// The struct is mapped with type 0x0 when the pointer fields are mapped. + /// The struct is also map explicitely by the user. The second mapping by + /// the user must not overwrite the mapping set up for the pointer fields + /// when mapping the struct happens after the mapping of the pointers. + + // clang-format off + // CHECK: omptarget --> Entry 0: Base=[[DAT_HST_PTR_BASE:0x.*]], Begin=[[DAT_HST_PTR_BASE]], Size=288, Type=0x0, Name=unknown + // CHECK: omptarget --> Entry 1: Base=[[DAT_HST_PTR_BASE]], Begin=[[DAT_HST_PTR_BASE]], Size=288, Type=0x1000000000001, Name=unknown + // CHECK: omptarget --> Entry 2: Base=[[DAT_HST_PTR_BASE]], Begin=[[DATUM_HST_PTR_BASE:0x.*]], Size=40, Type=0x1000000000011, Name=unknown + // CHECK: omptarget --> Entry 3: Base=[[MORE_DATUM_HST_PTR_BASE:0x.*]], Begin=[[MORE_DATUM_HST_PTR_BEGIN:0x.*]], Size=80, Type=0x1000000000011, Name=unknown + // clang-format on + + /// The struct will be mapped in the same order as the above entries. + + /// First argument is the struct itself and it will be mapped once. + + // clang-format off + // CHECK: omptarget --> Looking up mapping(HstPtrBegin=[[DAT_HST_PTR_BASE]], Size=288)... + // CHECK: PluginInterface --> MemoryManagerTy::allocate: size 288 with host pointer [[DAT_HST_PTR_BASE]]. + // CHECK: omptarget --> Creating new map entry with HstPtrBase=[[DAT_HST_PTR_BASE]], HstPtrBegin=[[DAT_HST_PTR_BASE]], TgtAllocBegin=[[DAT_DEVICE_PTR_BASE:0x.*]], TgtPtrBegin=[[DAT_DEVICE_PTR_BASE]], Size=288, DynRefCount=1, HoldRefCount=0, Name=unknown + // CHECK: omptarget --> Moving 288 bytes (hst:[[DAT_HST_PTR_BASE]]) -> (tgt:[[DAT_DEVICE_PTR_BASE]]) + // clang-format on + + /// Second argument is dat.datum: + // clang-format off + // CHECK: omptarget --> Looking up mapping(HstPtrBegin=[[DATUM_HST_PTR_BASE]], Size=40)... + // CHECK: PluginInterface --> MemoryManagerTy::allocate: size 40 with host pointer [[DATUM_HST_PTR_BASE]]. + // CHECK: omptarget --> Creating new map entry with HstPtrBase=[[DATUM_HST_PTR_BASE]], HstPtrBegin=[[DATUM_HST_PTR_BASE]], TgtAllocBegin=[[DATUM_DEVICE_PTR_BASE:0x.*]], TgtPtrBegin=[[DATUM_DEVICE_PTR_BASE]], Size=40, DynRefCount=1, HoldRefCount=0, Name=unknown + // CHECK: omptarget --> Moving 40 bytes (hst:[[DATUM_HST_PTR_BASE]]) -> (tgt:[[DATUM_DEVICE_PTR_BASE]]) + // clang-format on + + /// Third argument is dat.more_datum: + // clang-format off + // CHECK: omptarget --> Looking up mapping(HstPtrBegin=[[MORE_DATUM_HST_PTR_BEGIN]], Size=80)... + // CHECK: PluginInterface --> MemoryManagerTy::allocate: size 80 with host pointer [[MORE_DATUM_HST_PTR_BEGIN]]. + // CHECK: omptarget --> Creating new map entry with HstPtrBase=[[MORE_DATUM_HST_PTR_BEGIN]], HstPtrBegin=[[MORE_DATUM_HST_PTR_BEGIN]], TgtAllocBegin=[[MORE_DATUM_DEVICE_PTR_BEGIN:0x.*]], TgtPtrBegin=[[MORE_DATUM_DEVICE_PTR_BEGIN]], Size=80, DynRefCount=1, HoldRefCount=0, Name=unknown + // CHECK: omptarget --> Moving 80 bytes (hst:[[MORE_DATUM_HST_PTR_BEGIN]]) -> (tgt:[[MORE_DATUM_DEVICE_PTR_BEGIN]]) + // clang-format on + +#pragma omp target enter data map(to : dat.datum[ : 10]) \ + map(to : dat.more_datum[ : 20]) map(to : dat) + + /// Checks induced by having a target region: + // clang-format off + // CHECK: omptarget --> Entry 0: Base=[[DAT_HST_PTR_BASE]], Begin=[[DAT_HST_PTR_BASE]], Size=288, Type=0x223, Name=unknown + // CHECK: omptarget --> Mapping exists (implicit) with HstPtrBegin=[[DAT_HST_PTR_BASE]], TgtPtrBegin=[[DAT_DEVICE_PTR_BASE]], Size=288, DynRefCount=2 (incremented), HoldRefCount=0, Name=unknown + // CHECK: omptarget --> Obtained target argument [[DAT_DEVICE_PTR_BASE]] from host pointer [[DAT_HST_PTR_BASE]] + // clang-format on + +#pragma omp target + { + dat.xi = 4; + dat.datum[7]++; + dat.more_datum[17]++; + dat.val_datum = dat.datum[7]; + dat.val_more_datum = dat.more_datum[17]; + dat.datum[dat.arr[0][0]] = dat.xi; + dat.val_arr = dat.datum[dat.arr[0][0]]; + } + + /// Post-target region checks: + // clang-format off + // CHECK: omptarget --> Mapping exists with HstPtrBegin=[[DAT_HST_PTR_BASE]], TgtPtrBegin=[[DAT_DEVICE_PTR_BASE]], Size=288, DynRefCount=1 (decremented), HoldRefCount=0 + // clang-format on + +#pragma omp target exit data map(from : dat) + + /// Target data end checks: + // clang-format off + // CHECK: omptarget --> Mapping exists with HstPtrBegin=[[DAT_HST_PTR_BASE]], TgtPtrBegin=[[DAT_DEVICE_PTR_BASE]], Size=288, DynRefCount=0 (decremented, delayed deletion), HoldRefCount=0 + // CHECK: omptarget --> Moving 288 bytes (tgt:[[DAT_DEVICE_PTR_BASE]]) -> (hst:[[DAT_HST_PTR_BASE]]) + // clang-format on + + // CHECK: dat.xi = 4 + // CHECK: dat.val_datum = 8 + // CHECK: dat.val_more_datum = 18 + // CHECK: dat.datum[dat.arr[0][0]] = 0 + // CHECK: dat.val_arr = 4 + + printf("dat.xi = %d\n", dat.xi); + printf("dat.val_datum = %d\n", dat.val_datum); + printf("dat.val_more_datum = %d\n", dat.val_more_datum); + printf("dat.datum[dat.arr[0][0]] = %d\n", dat.datum[dat.arr[0][0]]); + printf("dat.val_arr = %d\n", dat.val_arr); + + return 0; +} _______________________________________________ cfe-commits mailing list cfe-commits@lists.llvm.org https://lists.llvm.org/cgi-bin/mailman/listinfo/cfe-commits