[clang] [llvm] [OpenMP] Fix stack corruption due to argument mismatch (PR #96386)
https://github.com/sushgokh updated https://github.com/llvm/llvm-project/pull/96386 >From 7c6e2e6b0b7e55d98148386f314e779c55385f24 Mon Sep 17 00:00:00 2001 From: sgokhale Date: Sat, 22 Jun 2024 17:16:24 +0530 Subject: [PATCH] [OpenMP] Fix stack corruption due to argument mismatch While lowering (#pragma omp target update from), clang's generated .omp_task_entry. is setting up 9 arguments while calling __tgt_target_data_update_nowait_mapper. At the same time, in __tgt_target_data_update_nowait_mapper, call to targetData() is converted to a sibcall assuming it has the argument count listed in the signature. AARCH64 asm sequence for this is as follows (removed unrelated insns): .omp_task_entry..108: sub sp, sp, #32 stp x29, x30, sp, #16 // 16-byte Folded Spill add x29, sp, #16 str x8, sp, #8. // stack canary str xzr, [sp] bl __tgt_target_data_update_nowait_mapper __tgt_target_data_update_nowait_mapper: sub sp, sp, #32 stp x29, x30, sp, #16 // 16-byte Folded Spill add x29, sp, #16 str x8, sp, #8 // stack canary // Sibcall argument setup adrp x8, :got:_Z16targetDataUpdateP7ident_tR8DeviceTyiPPvS4_PlS5_S4_S4_R11AsyncInfoTyb ldr x8, [x8, :got_lo12:_Z16targetDataUpdateP7ident_tR8DeviceTyiPPvS4_PlS5_S4_S4_R11AsyncInfoTyb] stp x9, x8, x29, #16 adrp x8, .L.str.8 add x8, x8, :lo12:.L.str.8 str x8, x29, #32. <==. This is the insn that erases $fp ldp x29, x30, sp, #16 // 16-byte Folded Reload add sp, sp, #32 // Sibcall b ZL10targetDataI22TaskAsyncInfoWrapperTyEvP7ident_tliPPvS4_PlS5_S4_S4_PFiS2_R8DeviceTyiS4_S4_S5_S5_S4_S4_R11AsyncInfoTybEPKcSD On AArch64, call to __tgt_target_data_update_nowait_mapper in .omp_task_entry. sets up only single space on stack and this results in ovewriting $fp and subsequent stack corruption. This issue can be credited to discrepancy of __tgt_target_data_update_nowait_mapper signature in openmp/libomptarget/include/omptarget.h taking 13 arguments while clang/lib/CodeGen/CGOpenMPRuntime.cpp and llvm/include/llvm/Frontend/OpenMP/OMPKinds.def taking only 9 arguments. This patch modifies __tgt_target_data_update_nowait_mapper signature to match .omp_task_entry usage(and other 2 files mentioned above). Co-authored-by: Kugan Vivekanandarajah --- clang/lib/CodeGen/CGOpenMPRuntime.cpp | 23 +++--- clang/test/OpenMP/declare_mapper_codegen.cpp | 6 ++-- .../test/OpenMP/target_enter_data_codegen.cpp | 2 +- .../test/OpenMP/target_exit_data_codegen.cpp | 2 +- clang/test/OpenMP/target_update_codegen.cpp | 2 +- .../include/llvm/Frontend/OpenMP/OMPKinds.def | 30 --- llvm/test/Transforms/OpenMP/add_attributes.ll | 24 +++ 7 files changed, 50 insertions(+), 39 deletions(-) diff --git a/clang/lib/CodeGen/CGOpenMPRuntime.cpp b/clang/lib/CodeGen/CGOpenMPRuntime.cpp index f6d12d46cfc07..5acfce6604053 100644 --- a/clang/lib/CodeGen/CGOpenMPRuntime.cpp +++ b/clang/lib/CodeGen/CGOpenMPRuntime.cpp @@ -30,6 +30,7 @@ #include "llvm/ADT/ArrayRef.h" #include "llvm/ADT/SetOperations.h" #include "llvm/ADT/SmallBitVector.h" +#include "llvm/ADT/SmallVector.h" #include "llvm/ADT/StringExtras.h" #include "llvm/Bitcode/BitcodeReader.h" #include "llvm/IR/Constants.h" @@ -10332,16 +10333,12 @@ void CGOpenMPRuntime::emitTargetDataStandAloneCall( // Source location for the ident struct llvm::Value *RTLoc = emitUpdateLocation(CGF, D.getBeginLoc()); -llvm::Value *OffloadingArgs[] = { -RTLoc, -DeviceID, -PointerNum, -InputInfo.BasePointersArray.emitRawPointer(CGF), -InputInfo.PointersArray.emitRawPointer(CGF), -InputInfo.SizesArray.emitRawPointer(CGF), -MapTypesArray, -MapNamesArray, -InputInfo.MappersArray.emitRawPointer(CGF)}; +SmallVector OffloadingArgs( +{RTLoc, DeviceID, PointerNum, + InputInfo.BasePointersArray.emitRawPointer(CGF), + InputInfo.PointersArray.emitRawPointer(CGF), + InputInfo.SizesArray.emitRawPointer(CGF), MapTypesArray, MapNamesArray, + InputInfo.MappersArray.emitRawPointer(CGF)}); // Select the right runtime function call for each standalone // directive. @@ -10430,6 +10427,12 @@ void CGOpenMPRuntime::emitTargetDataStandAloneCall( llvm_unreachable("Unexpected standalone target data directive."); break; } +if (HasNowait) { + OffloadingArgs.push_back(llvm::Constant::getNullValue(CGF.Int32Ty)); + OffloadingArgs.push_back(llvm::Constant::getNullValue(CGF.VoidPtrTy)); + OffloadingArgs.push_back(llvm::Constant::getNullValue(CGF.Int32Ty)); + OffloadingArgs.push_back(llvm::Constant::getNullValue(CGF.VoidPtrTy)); +} CGF.EmitRuntimeCall( OMPBuilder.getOrCreateRuntimeFunction(CGM.getModule(), RTLFn), OffloadingArgs); diff --git a/clang/test/OpenMP/declare_mapper_codegen.cpp b/clang/test/OpenMP/declare_mapper_codegen.cpp index 647e2a0
[clang] [llvm] [OpenMP] Fix stack corruption due to argument mismatch (PR #96386)
https://github.com/sushgokh updated https://github.com/llvm/llvm-project/pull/96386 >From b376f84d9f7debffdb8815628952188c6516dafd Mon Sep 17 00:00:00 2001 From: sgokhale Date: Sat, 22 Jun 2024 17:16:24 +0530 Subject: [PATCH] [OpenMP] Fix stack corruption due to argument mismatch While lowering (#pragma omp target update from), clang's generated .omp_task_entry. is setting up 9 arguments while calling __tgt_target_data_update_nowait_mapper. At the same time, in __tgt_target_data_update_nowait_mapper, call to targetData() is converted to a sibcall assuming it has the argument count listed in the signature. AARCH64 asm sequence for this is as follows (removed unrelated insns): .omp_task_entry..108: sub sp, sp, #32 stp x29, x30, sp, #16 // 16-byte Folded Spill add x29, sp, #16 str x8, sp, #8. // stack canary str xzr, [sp] bl __tgt_target_data_update_nowait_mapper __tgt_target_data_update_nowait_mapper: sub sp, sp, #32 stp x29, x30, sp, #16 // 16-byte Folded Spill add x29, sp, #16 str x8, sp, #8 // stack canary // Sibcall argument setup adrp x8, :got:_Z16targetDataUpdateP7ident_tR8DeviceTyiPPvS4_PlS5_S4_S4_R11AsyncInfoTyb ldr x8, [x8, :got_lo12:_Z16targetDataUpdateP7ident_tR8DeviceTyiPPvS4_PlS5_S4_S4_R11AsyncInfoTyb] stp x9, x8, x29, #16 adrp x8, .L.str.8 add x8, x8, :lo12:.L.str.8 str x8, x29, #32. <==. This is the insn that erases $fp ldp x29, x30, sp, #16 // 16-byte Folded Reload add sp, sp, #32 // Sibcall b ZL10targetDataI22TaskAsyncInfoWrapperTyEvP7ident_tliPPvS4_PlS5_S4_S4_PFiS2_R8DeviceTyiS4_S4_S5_S5_S4_S4_R11AsyncInfoTybEPKcSD On AArch64, call to __tgt_target_data_update_nowait_mapper in .omp_task_entry. sets up only single space on stack and this results in ovewriting $fp and subsequent stack corruption. This issue can be credited to discrepancy of __tgt_target_data_update_nowait_mapper signature in openmp/libomptarget/include/omptarget.h taking 13 arguments while clang/lib/CodeGen/CGOpenMPRuntime.cpp and llvm/include/llvm/Frontend/OpenMP/OMPKinds.def taking only 9 arguments. This patch modifies __tgt_target_data_update_nowait_mapper signature to match .omp_task_entry usage(and other 2 files mentioned above). Co-authored-by: Kugan Vivekanandarajah --- clang/lib/CodeGen/CGOpenMPRuntime.cpp | 23 +++--- clang/test/OpenMP/declare_mapper_codegen.cpp | 6 ++-- .../test/OpenMP/target_enter_data_codegen.cpp | 2 +- .../test/OpenMP/target_exit_data_codegen.cpp | 2 +- clang/test/OpenMP/target_update_codegen.cpp | 2 +- .../include/llvm/Frontend/OpenMP/OMPKinds.def | 30 --- llvm/test/Transforms/OpenMP/add_attributes.ll | 24 +++ 7 files changed, 50 insertions(+), 39 deletions(-) diff --git a/clang/lib/CodeGen/CGOpenMPRuntime.cpp b/clang/lib/CodeGen/CGOpenMPRuntime.cpp index b47b521edd32c..f79e8f5f01a56 100644 --- a/clang/lib/CodeGen/CGOpenMPRuntime.cpp +++ b/clang/lib/CodeGen/CGOpenMPRuntime.cpp @@ -30,6 +30,7 @@ #include "llvm/ADT/ArrayRef.h" #include "llvm/ADT/SetOperations.h" #include "llvm/ADT/SmallBitVector.h" +#include "llvm/ADT/SmallVector.h" #include "llvm/ADT/StringExtras.h" #include "llvm/Bitcode/BitcodeReader.h" #include "llvm/IR/Constants.h" @@ -10341,16 +10342,12 @@ void CGOpenMPRuntime::emitTargetDataStandAloneCall( // Source location for the ident struct llvm::Value *RTLoc = emitUpdateLocation(CGF, D.getBeginLoc()); -llvm::Value *OffloadingArgs[] = { -RTLoc, -DeviceID, -PointerNum, -InputInfo.BasePointersArray.emitRawPointer(CGF), -InputInfo.PointersArray.emitRawPointer(CGF), -InputInfo.SizesArray.emitRawPointer(CGF), -MapTypesArray, -MapNamesArray, -InputInfo.MappersArray.emitRawPointer(CGF)}; +SmallVector OffloadingArgs( +{RTLoc, DeviceID, PointerNum, + InputInfo.BasePointersArray.emitRawPointer(CGF), + InputInfo.PointersArray.emitRawPointer(CGF), + InputInfo.SizesArray.emitRawPointer(CGF), MapTypesArray, MapNamesArray, + InputInfo.MappersArray.emitRawPointer(CGF)}); // Select the right runtime function call for each standalone // directive. @@ -10439,6 +10436,12 @@ void CGOpenMPRuntime::emitTargetDataStandAloneCall( llvm_unreachable("Unexpected standalone target data directive."); break; } +if (HasNowait) { + OffloadingArgs.push_back(llvm::Constant::getNullValue(CGF.Int32Ty)); + OffloadingArgs.push_back(llvm::Constant::getNullValue(CGF.VoidPtrTy)); + OffloadingArgs.push_back(llvm::Constant::getNullValue(CGF.Int32Ty)); + OffloadingArgs.push_back(llvm::Constant::getNullValue(CGF.VoidPtrTy)); +} CGF.EmitRuntimeCall( OMPBuilder.getOrCreateRuntimeFunction(CGM.getModule(), RTLFn), OffloadingArgs); diff --git a/clang/test/OpenMP/declare_mapper_codegen.cpp b/clang/test/OpenMP/declare_mapper_codegen.cpp index 647e2a0
[clang] [llvm] [OpenMP] Fix stack corruption due to argument mismatch (PR #96386)
https://github.com/sushgokh closed https://github.com/llvm/llvm-project/pull/96386 ___ cfe-commits mailing list cfe-commits@lists.llvm.org https://lists.llvm.org/cgi-bin/mailman/listinfo/cfe-commits
[clang] [llvm] [OpenMP] Fix stack corruption due to argument mismatch (PR #96386)
https://github.com/sushgokh updated https://github.com/llvm/llvm-project/pull/96386 >From af4dc96c25f32b477337cedaeb0a696f75840ac0 Mon Sep 17 00:00:00 2001 From: sgokhale Date: Sat, 22 Jun 2024 17:16:24 +0530 Subject: [PATCH] [OpenMP] Fix stack corruption due to argument mismatch While lowering (#pragma omp target update from), clang's generated .omp_task_entry. is setting up 9 arguments while calling __tgt_target_data_update_nowait_mapper. At the same time, in __tgt_target_data_update_nowait_mapper, call to targetData() is converted to a sibcall assuming it has the argument count listed in the signature. AARCH64 asm sequence for this is as follows (removed unrelated insns): .omp_task_entry..108: sub sp, sp, #32 stp x29, x30, sp, #16 // 16-byte Folded Spill add x29, sp, #16 str x8, sp, #8. // stack canary str xzr, [sp] bl __tgt_target_data_update_nowait_mapper __tgt_target_data_update_nowait_mapper: sub sp, sp, #32 stp x29, x30, sp, #16 // 16-byte Folded Spill add x29, sp, #16 str x8, sp, #8 // stack canary // Sibcall argument setup adrp x8, :got:_Z16targetDataUpdateP7ident_tR8DeviceTyiPPvS4_PlS5_S4_S4_R11AsyncInfoTyb ldr x8, [x8, :got_lo12:_Z16targetDataUpdateP7ident_tR8DeviceTyiPPvS4_PlS5_S4_S4_R11AsyncInfoTyb] stp x9, x8, x29, #16 adrp x8, .L.str.8 add x8, x8, :lo12:.L.str.8 str x8, x29, #32. <==. This is the insn that erases $fp ldp x29, x30, sp, #16 // 16-byte Folded Reload add sp, sp, #32 // Sibcall b ZL10targetDataI22TaskAsyncInfoWrapperTyEvP7ident_tliPPvS4_PlS5_S4_S4_PFiS2_R8DeviceTyiS4_S4_S5_S5_S4_S4_R11AsyncInfoTybEPKcSD On AArch64, call to __tgt_target_data_update_nowait_mapper in .omp_task_entry. sets up only single space on stack and this results in ovewriting $fp and subsequent stack corruption. This issue can be credited to discrepancy of __tgt_target_data_update_nowait_mapper signature in openmp/libomptarget/include/omptarget.h taking 13 arguments while clang/lib/CodeGen/CGOpenMPRuntime.cpp and llvm/include/llvm/Frontend/OpenMP/OMPKinds.def taking only 9 arguments. This patch modifies __tgt_target_data_update_nowait_mapper signature to match .omp_task_entry usage(and other 2 files mentioned above). Co-authored-by: Kugan Vivekanandarajah --- clang/lib/CodeGen/CGOpenMPRuntime.cpp | 28 +++-- .../include/llvm/Frontend/OpenMP/OMPKinds.def | 30 --- 2 files changed, 44 insertions(+), 14 deletions(-) diff --git a/clang/lib/CodeGen/CGOpenMPRuntime.cpp b/clang/lib/CodeGen/CGOpenMPRuntime.cpp index f6d12d46cfc07..fc3ad533666ca 100644 --- a/clang/lib/CodeGen/CGOpenMPRuntime.cpp +++ b/clang/lib/CodeGen/CGOpenMPRuntime.cpp @@ -10343,6 +10343,23 @@ void CGOpenMPRuntime::emitTargetDataStandAloneCall( MapNamesArray, InputInfo.MappersArray.emitRawPointer(CGF)}; +// Nowait calls have header declarations that take 13 arguments. Hence, the +// divergence from the OffloadingArgs definition. +llvm::Value *NowaitOffloadingArgs[] = { +RTLoc, +DeviceID, +PointerNum, +InputInfo.BasePointersArray.emitRawPointer(CGF), +InputInfo.PointersArray.emitRawPointer(CGF), +InputInfo.SizesArray.emitRawPointer(CGF), +MapTypesArray, +MapNamesArray, +InputInfo.MappersArray.emitRawPointer(CGF), +llvm::Constant::getNullValue(CGF.Int32Ty), +llvm::Constant::getNullValue(CGF.VoidPtrTy), +llvm::Constant::getNullValue(CGF.Int32Ty), +llvm::Constant::getNullValue(CGF.VoidPtrTy)}; + // Select the right runtime function call for each standalone // directive. const bool HasNowait = D.hasClausesOfKind(); @@ -10430,9 +10447,14 @@ void CGOpenMPRuntime::emitTargetDataStandAloneCall( llvm_unreachable("Unexpected standalone target data directive."); break; } -CGF.EmitRuntimeCall( -OMPBuilder.getOrCreateRuntimeFunction(CGM.getModule(), RTLFn), -OffloadingArgs); +if (HasNowait) + CGF.EmitRuntimeCall( + OMPBuilder.getOrCreateRuntimeFunction(CGM.getModule(), RTLFn), + NowaitOffloadingArgs); +else + CGF.EmitRuntimeCall( + OMPBuilder.getOrCreateRuntimeFunction(CGM.getModule(), RTLFn), + OffloadingArgs); }; auto &&TargetThenGen = [this, &ThenGen, &D, &InputInfo, &MapTypesArray, diff --git a/llvm/include/llvm/Frontend/OpenMP/OMPKinds.def b/llvm/include/llvm/Frontend/OpenMP/OMPKinds.def index fe09bb8177c28..ebd928470109a 100644 --- a/llvm/include/llvm/Frontend/OpenMP/OMPKinds.def +++ b/llvm/include/llvm/Frontend/OpenMP/OMPKinds.def @@ -438,19 +438,22 @@ __OMP_RTL(__tgt_target_kernel_nowait, false, Int32, IdentPtr, Int64, Int32, Int32, VoidPtr, KernelArgsPtr, Int32, VoidPtr, Int32, VoidPtr) __OMP_RTL(__tgt_target_data_begin_mapper, false, Void, IdentPtr, Int64, Int32, VoidPtrPtr, VoidPtrPtr, Int64Ptr, Int64Ptr, VoidPtrPtr, VoidPtrPtr)