[clang] [llvm] [OpenMP] Fix stack corruption due to argument mismatch (PR #96386)

2024-07-02 Thread Sushant Gokhale via cfe-commits

https://github.com/sushgokh updated 
https://github.com/llvm/llvm-project/pull/96386

>From 7c6e2e6b0b7e55d98148386f314e779c55385f24 Mon Sep 17 00:00:00 2001
From: sgokhale 
Date: Sat, 22 Jun 2024 17:16:24 +0530
Subject: [PATCH] [OpenMP] Fix stack corruption due to argument mismatch

While lowering (#pragma omp target update from), clang's generated
.omp_task_entry. is setting up 9 arguments while calling
__tgt_target_data_update_nowait_mapper.

At the same time, in __tgt_target_data_update_nowait_mapper, call to
targetData() is converted to a sibcall assuming
it has the argument count listed in the signature.

AARCH64 asm sequence for this is as follows (removed unrelated insns):

.omp_task_entry..108:
  sub   sp, sp, #32
  stp   x29, x30, sp, #16   // 16-byte Folded Spill
  add   x29, sp, #16
  str   x8, sp, #8. // stack canary
  str   xzr, [sp]
  bl   __tgt_target_data_update_nowait_mapper

__tgt_target_data_update_nowait_mapper:
  sub   sp, sp, #32
  stp   x29, x30, sp, #16   // 16-byte Folded Spill
  add   x29, sp, #16
  str   x8, sp, #8 // stack canary
  // Sibcall argument setup
  adrp  x8, 
:got:_Z16targetDataUpdateP7ident_tR8DeviceTyiPPvS4_PlS5_S4_S4_R11AsyncInfoTyb
  ldr   x8, [x8, 
:got_lo12:_Z16targetDataUpdateP7ident_tR8DeviceTyiPPvS4_PlS5_S4_S4_R11AsyncInfoTyb]
  stp   x9, x8, x29, #16
  adrp  x8, .L.str.8
  add   x8, x8, :lo12:.L.str.8
  str   x8, x29, #32. <==. This is the insn that erases $fp

  ldp   x29, x30, sp, #16   // 16-byte Folded Reload
  add   sp, sp, #32
  // Sibcall
  b
ZL10targetDataI22TaskAsyncInfoWrapperTyEvP7ident_tliPPvS4_PlS5_S4_S4_PFiS2_R8DeviceTyiS4_S4_S5_S5_S4_S4_R11AsyncInfoTybEPKcSD

On AArch64, call to __tgt_target_data_update_nowait_mapper in .omp_task_entry.
sets up only single space on stack and this results in ovewriting $fp
and subsequent stack corruption. This issue can be credited to discrepancy of
__tgt_target_data_update_nowait_mapper signature in
openmp/libomptarget/include/omptarget.h taking 13 arguments while
clang/lib/CodeGen/CGOpenMPRuntime.cpp and
llvm/include/llvm/Frontend/OpenMP/OMPKinds.def taking only 9 arguments.

This patch modifies __tgt_target_data_update_nowait_mapper signature
to match .omp_task_entry usage(and other 2 files mentioned above).

Co-authored-by: Kugan Vivekanandarajah 
---
 clang/lib/CodeGen/CGOpenMPRuntime.cpp | 23 +++---
 clang/test/OpenMP/declare_mapper_codegen.cpp  |  6 ++--
 .../test/OpenMP/target_enter_data_codegen.cpp |  2 +-
 .../test/OpenMP/target_exit_data_codegen.cpp  |  2 +-
 clang/test/OpenMP/target_update_codegen.cpp   |  2 +-
 .../include/llvm/Frontend/OpenMP/OMPKinds.def | 30 ---
 llvm/test/Transforms/OpenMP/add_attributes.ll | 24 +++
 7 files changed, 50 insertions(+), 39 deletions(-)

diff --git a/clang/lib/CodeGen/CGOpenMPRuntime.cpp 
b/clang/lib/CodeGen/CGOpenMPRuntime.cpp
index f6d12d46cfc07..5acfce6604053 100644
--- a/clang/lib/CodeGen/CGOpenMPRuntime.cpp
+++ b/clang/lib/CodeGen/CGOpenMPRuntime.cpp
@@ -30,6 +30,7 @@
 #include "llvm/ADT/ArrayRef.h"
 #include "llvm/ADT/SetOperations.h"
 #include "llvm/ADT/SmallBitVector.h"
+#include "llvm/ADT/SmallVector.h"
 #include "llvm/ADT/StringExtras.h"
 #include "llvm/Bitcode/BitcodeReader.h"
 #include "llvm/IR/Constants.h"
@@ -10332,16 +10333,12 @@ void CGOpenMPRuntime::emitTargetDataStandAloneCall(
 // Source location for the ident struct
 llvm::Value *RTLoc = emitUpdateLocation(CGF, D.getBeginLoc());
 
-llvm::Value *OffloadingArgs[] = {
-RTLoc,
-DeviceID,
-PointerNum,
-InputInfo.BasePointersArray.emitRawPointer(CGF),
-InputInfo.PointersArray.emitRawPointer(CGF),
-InputInfo.SizesArray.emitRawPointer(CGF),
-MapTypesArray,
-MapNamesArray,
-InputInfo.MappersArray.emitRawPointer(CGF)};
+SmallVector OffloadingArgs(
+{RTLoc, DeviceID, PointerNum,
+ InputInfo.BasePointersArray.emitRawPointer(CGF),
+ InputInfo.PointersArray.emitRawPointer(CGF),
+ InputInfo.SizesArray.emitRawPointer(CGF), MapTypesArray, 
MapNamesArray,
+ InputInfo.MappersArray.emitRawPointer(CGF)});
 
 // Select the right runtime function call for each standalone
 // directive.
@@ -10430,6 +10427,12 @@ void CGOpenMPRuntime::emitTargetDataStandAloneCall(
   llvm_unreachable("Unexpected standalone target data directive.");
   break;
 }
+if (HasNowait) {
+  OffloadingArgs.push_back(llvm::Constant::getNullValue(CGF.Int32Ty));
+  OffloadingArgs.push_back(llvm::Constant::getNullValue(CGF.VoidPtrTy));
+  OffloadingArgs.push_back(llvm::Constant::getNullValue(CGF.Int32Ty));
+  OffloadingArgs.push_back(llvm::Constant::getNullValue(CGF.VoidPtrTy));
+}
 CGF.EmitRuntimeCall(
 OMPBuilder.getOrCreateRuntimeFunction(CGM.getModule(), RTLFn),
 OffloadingArgs);
diff --git a/clang/test/OpenMP/declare_mapper_codegen.cpp 
b/clang/test/OpenMP/declare_mapper_codegen.cpp
index 647e2a0

[clang] [llvm] [OpenMP] Fix stack corruption due to argument mismatch (PR #96386)

2024-07-03 Thread Sushant Gokhale via cfe-commits

https://github.com/sushgokh updated 
https://github.com/llvm/llvm-project/pull/96386

>From b376f84d9f7debffdb8815628952188c6516dafd Mon Sep 17 00:00:00 2001
From: sgokhale 
Date: Sat, 22 Jun 2024 17:16:24 +0530
Subject: [PATCH] [OpenMP] Fix stack corruption due to argument mismatch

While lowering (#pragma omp target update from), clang's generated
.omp_task_entry. is setting up 9 arguments while calling
__tgt_target_data_update_nowait_mapper.

At the same time, in __tgt_target_data_update_nowait_mapper, call to
targetData() is converted to a sibcall assuming
it has the argument count listed in the signature.

AARCH64 asm sequence for this is as follows (removed unrelated insns):

.omp_task_entry..108:
  sub   sp, sp, #32
  stp   x29, x30, sp, #16   // 16-byte Folded Spill
  add   x29, sp, #16
  str   x8, sp, #8. // stack canary
  str   xzr, [sp]
  bl   __tgt_target_data_update_nowait_mapper

__tgt_target_data_update_nowait_mapper:
  sub   sp, sp, #32
  stp   x29, x30, sp, #16   // 16-byte Folded Spill
  add   x29, sp, #16
  str   x8, sp, #8 // stack canary
  // Sibcall argument setup
  adrp  x8, 
:got:_Z16targetDataUpdateP7ident_tR8DeviceTyiPPvS4_PlS5_S4_S4_R11AsyncInfoTyb
  ldr   x8, [x8, 
:got_lo12:_Z16targetDataUpdateP7ident_tR8DeviceTyiPPvS4_PlS5_S4_S4_R11AsyncInfoTyb]
  stp   x9, x8, x29, #16
  adrp  x8, .L.str.8
  add   x8, x8, :lo12:.L.str.8
  str   x8, x29, #32. <==. This is the insn that erases $fp

  ldp   x29, x30, sp, #16   // 16-byte Folded Reload
  add   sp, sp, #32
  // Sibcall
  b
ZL10targetDataI22TaskAsyncInfoWrapperTyEvP7ident_tliPPvS4_PlS5_S4_S4_PFiS2_R8DeviceTyiS4_S4_S5_S5_S4_S4_R11AsyncInfoTybEPKcSD

On AArch64, call to __tgt_target_data_update_nowait_mapper in .omp_task_entry.
sets up only single space on stack and this results in ovewriting $fp
and subsequent stack corruption. This issue can be credited to discrepancy of
__tgt_target_data_update_nowait_mapper signature in
openmp/libomptarget/include/omptarget.h taking 13 arguments while
clang/lib/CodeGen/CGOpenMPRuntime.cpp and
llvm/include/llvm/Frontend/OpenMP/OMPKinds.def taking only 9 arguments.

This patch modifies __tgt_target_data_update_nowait_mapper signature
to match .omp_task_entry usage(and other 2 files mentioned above).

Co-authored-by: Kugan Vivekanandarajah 
---
 clang/lib/CodeGen/CGOpenMPRuntime.cpp | 23 +++---
 clang/test/OpenMP/declare_mapper_codegen.cpp  |  6 ++--
 .../test/OpenMP/target_enter_data_codegen.cpp |  2 +-
 .../test/OpenMP/target_exit_data_codegen.cpp  |  2 +-
 clang/test/OpenMP/target_update_codegen.cpp   |  2 +-
 .../include/llvm/Frontend/OpenMP/OMPKinds.def | 30 ---
 llvm/test/Transforms/OpenMP/add_attributes.ll | 24 +++
 7 files changed, 50 insertions(+), 39 deletions(-)

diff --git a/clang/lib/CodeGen/CGOpenMPRuntime.cpp 
b/clang/lib/CodeGen/CGOpenMPRuntime.cpp
index b47b521edd32c..f79e8f5f01a56 100644
--- a/clang/lib/CodeGen/CGOpenMPRuntime.cpp
+++ b/clang/lib/CodeGen/CGOpenMPRuntime.cpp
@@ -30,6 +30,7 @@
 #include "llvm/ADT/ArrayRef.h"
 #include "llvm/ADT/SetOperations.h"
 #include "llvm/ADT/SmallBitVector.h"
+#include "llvm/ADT/SmallVector.h"
 #include "llvm/ADT/StringExtras.h"
 #include "llvm/Bitcode/BitcodeReader.h"
 #include "llvm/IR/Constants.h"
@@ -10341,16 +10342,12 @@ void CGOpenMPRuntime::emitTargetDataStandAloneCall(
 // Source location for the ident struct
 llvm::Value *RTLoc = emitUpdateLocation(CGF, D.getBeginLoc());
 
-llvm::Value *OffloadingArgs[] = {
-RTLoc,
-DeviceID,
-PointerNum,
-InputInfo.BasePointersArray.emitRawPointer(CGF),
-InputInfo.PointersArray.emitRawPointer(CGF),
-InputInfo.SizesArray.emitRawPointer(CGF),
-MapTypesArray,
-MapNamesArray,
-InputInfo.MappersArray.emitRawPointer(CGF)};
+SmallVector OffloadingArgs(
+{RTLoc, DeviceID, PointerNum,
+ InputInfo.BasePointersArray.emitRawPointer(CGF),
+ InputInfo.PointersArray.emitRawPointer(CGF),
+ InputInfo.SizesArray.emitRawPointer(CGF), MapTypesArray, 
MapNamesArray,
+ InputInfo.MappersArray.emitRawPointer(CGF)});
 
 // Select the right runtime function call for each standalone
 // directive.
@@ -10439,6 +10436,12 @@ void CGOpenMPRuntime::emitTargetDataStandAloneCall(
   llvm_unreachable("Unexpected standalone target data directive.");
   break;
 }
+if (HasNowait) {
+  OffloadingArgs.push_back(llvm::Constant::getNullValue(CGF.Int32Ty));
+  OffloadingArgs.push_back(llvm::Constant::getNullValue(CGF.VoidPtrTy));
+  OffloadingArgs.push_back(llvm::Constant::getNullValue(CGF.Int32Ty));
+  OffloadingArgs.push_back(llvm::Constant::getNullValue(CGF.VoidPtrTy));
+}
 CGF.EmitRuntimeCall(
 OMPBuilder.getOrCreateRuntimeFunction(CGM.getModule(), RTLFn),
 OffloadingArgs);
diff --git a/clang/test/OpenMP/declare_mapper_codegen.cpp 
b/clang/test/OpenMP/declare_mapper_codegen.cpp
index 647e2a0

[clang] [llvm] [OpenMP] Fix stack corruption due to argument mismatch (PR #96386)

2024-07-04 Thread Sushant Gokhale via cfe-commits

https://github.com/sushgokh closed 
https://github.com/llvm/llvm-project/pull/96386
___
cfe-commits mailing list
cfe-commits@lists.llvm.org
https://lists.llvm.org/cgi-bin/mailman/listinfo/cfe-commits


[clang] [llvm] [OpenMP] Fix stack corruption due to argument mismatch (PR #96386)

2024-06-27 Thread Sushant Gokhale via cfe-commits

https://github.com/sushgokh updated 
https://github.com/llvm/llvm-project/pull/96386

>From af4dc96c25f32b477337cedaeb0a696f75840ac0 Mon Sep 17 00:00:00 2001
From: sgokhale 
Date: Sat, 22 Jun 2024 17:16:24 +0530
Subject: [PATCH] [OpenMP] Fix stack corruption due to argument mismatch

While lowering (#pragma omp target update from), clang's generated
.omp_task_entry. is setting up 9 arguments while calling
__tgt_target_data_update_nowait_mapper.

At the same time, in __tgt_target_data_update_nowait_mapper, call to
targetData() is converted to a sibcall assuming
it has the argument count listed in the signature.

AARCH64 asm sequence for this is as follows (removed unrelated insns):

.omp_task_entry..108:
  sub   sp, sp, #32
  stp   x29, x30, sp, #16   // 16-byte Folded Spill
  add   x29, sp, #16
  str   x8, sp, #8. // stack canary
  str   xzr, [sp]
  bl   __tgt_target_data_update_nowait_mapper

__tgt_target_data_update_nowait_mapper:
  sub   sp, sp, #32
  stp   x29, x30, sp, #16   // 16-byte Folded Spill
  add   x29, sp, #16
  str   x8, sp, #8 // stack canary
  // Sibcall argument setup
  adrp  x8, 
:got:_Z16targetDataUpdateP7ident_tR8DeviceTyiPPvS4_PlS5_S4_S4_R11AsyncInfoTyb
  ldr   x8, [x8, 
:got_lo12:_Z16targetDataUpdateP7ident_tR8DeviceTyiPPvS4_PlS5_S4_S4_R11AsyncInfoTyb]
  stp   x9, x8, x29, #16
  adrp  x8, .L.str.8
  add   x8, x8, :lo12:.L.str.8
  str   x8, x29, #32. <==. This is the insn that erases $fp

  ldp   x29, x30, sp, #16   // 16-byte Folded Reload
  add   sp, sp, #32
  // Sibcall
  b
ZL10targetDataI22TaskAsyncInfoWrapperTyEvP7ident_tliPPvS4_PlS5_S4_S4_PFiS2_R8DeviceTyiS4_S4_S5_S5_S4_S4_R11AsyncInfoTybEPKcSD

On AArch64, call to __tgt_target_data_update_nowait_mapper in .omp_task_entry.
sets up only single space on stack and this results in ovewriting $fp
and subsequent stack corruption. This issue can be credited to discrepancy of
__tgt_target_data_update_nowait_mapper signature in
openmp/libomptarget/include/omptarget.h taking 13 arguments while
clang/lib/CodeGen/CGOpenMPRuntime.cpp and
llvm/include/llvm/Frontend/OpenMP/OMPKinds.def taking only 9 arguments.

This patch modifies __tgt_target_data_update_nowait_mapper signature
to match .omp_task_entry usage(and other 2 files mentioned above).

Co-authored-by: Kugan Vivekanandarajah 
---
 clang/lib/CodeGen/CGOpenMPRuntime.cpp | 28 +++--
 .../include/llvm/Frontend/OpenMP/OMPKinds.def | 30 ---
 2 files changed, 44 insertions(+), 14 deletions(-)

diff --git a/clang/lib/CodeGen/CGOpenMPRuntime.cpp 
b/clang/lib/CodeGen/CGOpenMPRuntime.cpp
index f6d12d46cfc07..fc3ad533666ca 100644
--- a/clang/lib/CodeGen/CGOpenMPRuntime.cpp
+++ b/clang/lib/CodeGen/CGOpenMPRuntime.cpp
@@ -10343,6 +10343,23 @@ void CGOpenMPRuntime::emitTargetDataStandAloneCall(
 MapNamesArray,
 InputInfo.MappersArray.emitRawPointer(CGF)};
 
+// Nowait calls have header declarations that take 13 arguments. Hence, the
+// divergence from the OffloadingArgs definition.
+llvm::Value *NowaitOffloadingArgs[] = {
+RTLoc,
+DeviceID,
+PointerNum,
+InputInfo.BasePointersArray.emitRawPointer(CGF),
+InputInfo.PointersArray.emitRawPointer(CGF),
+InputInfo.SizesArray.emitRawPointer(CGF),
+MapTypesArray,
+MapNamesArray,
+InputInfo.MappersArray.emitRawPointer(CGF),
+llvm::Constant::getNullValue(CGF.Int32Ty),
+llvm::Constant::getNullValue(CGF.VoidPtrTy),
+llvm::Constant::getNullValue(CGF.Int32Ty),
+llvm::Constant::getNullValue(CGF.VoidPtrTy)};
+
 // Select the right runtime function call for each standalone
 // directive.
 const bool HasNowait = D.hasClausesOfKind();
@@ -10430,9 +10447,14 @@ void CGOpenMPRuntime::emitTargetDataStandAloneCall(
   llvm_unreachable("Unexpected standalone target data directive.");
   break;
 }
-CGF.EmitRuntimeCall(
-OMPBuilder.getOrCreateRuntimeFunction(CGM.getModule(), RTLFn),
-OffloadingArgs);
+if (HasNowait)
+  CGF.EmitRuntimeCall(
+  OMPBuilder.getOrCreateRuntimeFunction(CGM.getModule(), RTLFn),
+  NowaitOffloadingArgs);
+else
+  CGF.EmitRuntimeCall(
+  OMPBuilder.getOrCreateRuntimeFunction(CGM.getModule(), RTLFn),
+  OffloadingArgs);
   };
 
   auto &&TargetThenGen = [this, &ThenGen, &D, &InputInfo, &MapTypesArray,
diff --git a/llvm/include/llvm/Frontend/OpenMP/OMPKinds.def 
b/llvm/include/llvm/Frontend/OpenMP/OMPKinds.def
index fe09bb8177c28..ebd928470109a 100644
--- a/llvm/include/llvm/Frontend/OpenMP/OMPKinds.def
+++ b/llvm/include/llvm/Frontend/OpenMP/OMPKinds.def
@@ -438,19 +438,22 @@ __OMP_RTL(__tgt_target_kernel_nowait, false, Int32, 
IdentPtr, Int64, Int32,
   Int32, VoidPtr, KernelArgsPtr, Int32, VoidPtr, Int32, VoidPtr)
 __OMP_RTL(__tgt_target_data_begin_mapper, false, Void, IdentPtr, Int64, Int32, 
VoidPtrPtr,
   VoidPtrPtr, Int64Ptr, Int64Ptr, VoidPtrPtr, VoidPtrPtr)