from:"Michael Liao via Phabricator via cfe\-commits"

[PATCH] D58623: [AMDGPU] Allow using integral non-type template parameters

2019-02-26 Thread Michael Liao via Phabricator via cfe-commits

This revision was automatically updated to reflect the committed changes.
Closed by commit rC354909: [AMDGPU] Allow using integral non-type template 
parameters (authored by hliao, committed by ).

Changed prior to commit:
  https://reviews.llvm.org/D58623?vs=188172&id=188416#toc

Repository:
  rC Clang

CHANGES SINCE LAST ACTION
  https://reviews.llvm.org/D58623/new/

https://reviews.llvm.org/D58623

Files:
  include/clang/Basic/Attr.td
  include/clang/Sema/Sema.h
  lib/CodeGen/TargetInfo.cpp
  lib/Sema/SemaDeclAttr.cpp
  lib/Sema/SemaTemplateInstantiateDecl.cpp
  test/SemaCUDA/amdgpu-attrs.cu
  test/SemaOpenCL/amdgpu-attrs.cl

Index: include/clang/Sema/Sema.h
===
--- include/clang/Sema/Sema.h
+++ include/clang/Sema/Sema.h
@@ -8674,6 +8674,16 @@
   void AddXConsumedAttr(Decl *D, SourceRange SR, unsigned SpellingIndex,
 RetainOwnershipKind K, bool IsTemplateInstantiation);
 
+  /// addAMDGPUFlatWorkGroupSizeAttr - Adds an amdgpu_flat_work_group_size
+  /// attribute to a particular declaration.
+  void addAMDGPUFlatWorkGroupSizeAttr(SourceRange AttrRange, Decl *D, Expr *Min,
+  Expr *Max, unsigned SpellingListIndex);
+
+  /// addAMDGPUWavePersEUAttr - Adds an amdgpu_waves_per_eu attribute to a
+  /// particular declaration.
+  void addAMDGPUWavesPerEUAttr(SourceRange AttrRange, Decl *D, Expr *Min,
+   Expr *Max, unsigned SpellingListIndex);
+
   bool checkNSReturnsRetainedReturnType(SourceLocation loc, QualType type);
 
   //======//
Index: include/clang/Basic/Attr.td
===
--- include/clang/Basic/Attr.td
+++ include/clang/Basic/Attr.td
@@ -1484,14 +1484,14 @@
 
 def AMDGPUFlatWorkGroupSize : InheritableAttr {
   let Spellings = [Clang<"amdgpu_flat_work_group_size", 0>];
-  let Args = [UnsignedArgument<"Min">, UnsignedArgument<"Max">];
+  let Args = [ExprArgument<"Min">, ExprArgument<"Max">];
   let Documentation = [AMDGPUFlatWorkGroupSizeDocs];
   let Subjects = SubjectList<[Function], ErrorDiag, "kernel functions">;
 }
 
 def AMDGPUWavesPerEU : InheritableAttr {
   let Spellings = [Clang<"amdgpu_waves_per_eu", 0>];
-  let Args = [UnsignedArgument<"Min">, UnsignedArgument<"Max", 1>];
+  let Args = [ExprArgument<"Min">, ExprArgument<"Max", 1>];
   let Documentation = [AMDGPUWavesPerEUDocs];
   let Subjects = SubjectList<[Function], ErrorDiag, "kernel functions">;
 }
Index: test/SemaOpenCL/amdgpu-attrs.cl
===
--- test/SemaOpenCL/amdgpu-attrs.cl
+++ test/SemaOpenCL/amdgpu-attrs.cl
@@ -27,12 +27,12 @@
 __attribute__((amdgpu_num_sgpr(32))) void func_num_sgpr_32() {} // expected-error {{'amdgpu_num_sgpr' attribute only applies to kernel functions}}
 __attribute__((amdgpu_num_vgpr(64))) void func_num_vgpr_64() {} // expected-error {{'amdgpu_num_vgpr' attribute only applies to kernel functions}}
 
-__attribute__((amdgpu_flat_work_group_size("ABC", "ABC"))) kernel void kernel_flat_work_group_size_ABC_ABC() {} // expected-error {{'amdgpu_flat_work_group_size' attribute requires an integer constant}}
-__attribute__((amdgpu_flat_work_group_size(32, "ABC"))) kernel void kernel_flat_work_group_size_32_ABC() {} // expected-error {{'amdgpu_flat_work_group_size' attribute requires an integer constant}}
-__attribute__((amdgpu_flat_work_group_size("ABC", 64))) kernel void kernel_flat_work_group_size_ABC_64() {} // expected-error {{'amdgpu_flat_work_group_size' attribute requires an integer constant}}
-__attribute__((amdgpu_waves_per_eu("ABC"))) kernel void kernel_waves_per_eu_ABC() {} // expected-error {{'amdgpu_waves_per_eu' attribute requires an integer constant}}
-__attribute__((amdgpu_waves_per_eu(2, "ABC"))) kernel void kernel_waves_per_eu_2_ABC() {} // expected-error {{'amdgpu_waves_per_eu' attribute requires an integer constant}}
-__attribute__((amdgpu_waves_per_eu("ABC", 4))) kernel void kernel_waves_per_eu_ABC_4() {} // expected-error {{'amdgpu_waves_per_eu' attribute requires an integer constant}}
+__attribute__((amdgpu_flat_work_group_size("ABC", "ABC"))) kernel void kernel_flat_work_group_size_ABC_ABC() {} // expected-error {{'amdgpu_flat_work_group_size' attribute requires parameter 0 to be an integer constant}}
+__attribute__((amdgpu_flat_work_group_size(32, "ABC"))) kernel void kernel_flat_work_group_size_32_ABC() {} // expected-error {{'amdgpu_flat_work_group_size' attribute requires parameter 1 to be an integer constant}}
+__attribute__((amdgpu_flat_work_group_size("ABC", 64))) kernel void kernel_flat_work_group_size_ABC_64() {} // expected-error {{'amdgpu_flat_work_group_size' attribute requires parameter 0 to be an integer constant}}
+__attribute__((amdgpu_waves_per_eu("ABC"))) kernel void kernel_waves_per_eu_ABC() {} // expected-error {{'amdgpu_waves_per_eu' attrib

[PATCH] D58992: [CUDA][HIP][DebugInfo] Skip reference device function

2019-03-05 Thread Michael Liao via Phabricator via cfe-commits

hliao created this revision.
Herald added subscribers: cfe-commits, jdoerfert, aprantl.
Herald added a project: clang.

- A device functions could be used as a non-type template parameter in a 
global/host function template. However, we should not try to retrieve that 
device function and reference it in the host-side debug info as it's only valid 
at device side.


Repository:
  rG LLVM Github Monorepo

https://reviews.llvm.org/D58992

Files:
  clang/lib/CodeGen/CGDebugInfo.cpp
  clang/test/CodeGenCUDA/debug-info-template.cu


Index: clang/test/CodeGenCUDA/debug-info-template.cu
===
--- /dev/null
+++ clang/test/CodeGenCUDA/debug-info-template.cu
@@ -0,0 +1,11 @@
+// RUN: %clang_cc1 %s --std=c++11 -triple x86_64-unknown-linux -emit-llvm -o - 
-debug-info-kind=limited -dwarf-version=2 -debugger-tuning=gdb | FileCheck %s
+
+#include "Inputs/cuda.h"
+
+__device__ void f();
+template __global__ void t() { F(); }
+__host__ void g() { t<<<1,1>>>(); }
+
+// Ensure the value of device-function (as value template parameter) in the is
+// null.
+// CHECK: !DITemplateValueParameter(name: "F", type: !34, value: null)
Index: clang/lib/CodeGen/CGDebugInfo.cpp
===
--- clang/lib/CodeGen/CGDebugInfo.cpp
+++ clang/lib/CodeGen/CGDebugInfo.cpp
@@ -1725,31 +1725,37 @@
   QualType T = TA.getParamTypeForDecl().getDesugaredType(CGM.getContext());
   llvm::DIType *TTy = getOrCreateType(T, Unit);
   llvm::Constant *V = nullptr;
-  const CXXMethodDecl *MD;
-  // Variable pointer template parameters have a value that is the address
-  // of the variable.
-  if (const auto *VD = dyn_cast(D))
-V = CGM.GetAddrOfGlobalVar(VD);
-  // Member function pointers have special support for building them, 
though
-  // this is currently unsupported in LLVM CodeGen.
-  else if ((MD = dyn_cast(D)) && MD->isInstance())
-V = CGM.getCXXABI().EmitMemberFunctionPointer(MD);
-  else if (const auto *FD = dyn_cast(D))
-V = CGM.GetAddrOfFunction(FD);
-  // Member data pointers have special handling too to compute the fixed
-  // offset within the object.
-  else if (const auto *MPT = dyn_cast(T.getTypePtr())) {
-// These five lines (& possibly the above member function pointer
-// handling) might be able to be refactored to use similar code in
-// CodeGenModule::getMemberPointerConstant
-uint64_t fieldOffset = CGM.getContext().getFieldOffset(D);
-CharUnits chars =
-CGM.getContext().toCharUnitsFromBits((int64_t)fieldOffset);
-V = CGM.getCXXABI().EmitMemberDataPointer(MPT, chars);
+  // Skip retrieve the value if that template parameter has cuda device
+  // attribute, i.e. that value is not available at the host side.
+  if (!CGM.getLangOpts().CUDA || CGM.getLangOpts().CUDAIsDevice ||
+  !D->hasAttr()) {
+const CXXMethodDecl *MD;
+// Variable pointer template parameters have a value that is the 
address
+// of the variable.
+if (const auto *VD = dyn_cast(D))
+  V = CGM.GetAddrOfGlobalVar(VD);
+// Member function pointers have special support for building them,
+// though this is currently unsupported in LLVM CodeGen.
+else if ((MD = dyn_cast(D)) && MD->isInstance())
+  V = CGM.getCXXABI().EmitMemberFunctionPointer(MD);
+else if (const auto *FD = dyn_cast(D))
+  V = CGM.GetAddrOfFunction(FD);
+// Member data pointers have special handling too to compute the fixed
+// offset within the object.
+else if (const auto *MPT =
+ dyn_cast(T.getTypePtr())) {
+  // These five lines (& possibly the above member function pointer
+  // handling) might be able to be refactored to use similar code in
+  // CodeGenModule::getMemberPointerConstant
+  uint64_t fieldOffset = CGM.getContext().getFieldOffset(D);
+  CharUnits chars =
+  CGM.getContext().toCharUnitsFromBits((int64_t)fieldOffset);
+  V = CGM.getCXXABI().EmitMemberDataPointer(MPT, chars);
+}
+V = V->stripPointerCasts();
   }
   TemplateParams.push_back(DBuilder.createTemplateValueParameter(
-  TheCU, Name, TTy,
-  cast_or_null(V->stripPointerCasts(;
+  TheCU, Name, TTy, cast_or_null(V)));
 } break;
 case TemplateArgument::NullPtr: {
   QualType T = TA.getNullPtrType();


Index: clang/test/CodeGenCUDA/debug-info-template.cu
===
--- /dev/null
+++ clang/test/CodeGenCUDA/debug-info-template.cu
@@ -0,0 +1,11 @@
+// RUN: %clang_cc1 %s --std=c++11 -triple x86_64-unknown-linux -emit-llvm -o - -debug-info-kind=limited -dwarf-version=2 -debugger-tuning=gdb | FileCheck %s
+
+#include "Inputs/cuda.h"
+
+__device__ void f();
+templat

[PATCH] D58992: [CUDA][HIP][DebugInfo] Skip reference device function

2019-03-06 Thread Michael Liao via Phabricator via cfe-commits

hliao updated this revision to Diff 189488.
hliao added a comment.

make the test more robust to potential metadata identifier change


Repository:
  rG LLVM Github Monorepo

CHANGES SINCE LAST ACTION
  https://reviews.llvm.org/D58992/new/

https://reviews.llvm.org/D58992

Files:
  clang/lib/CodeGen/CGDebugInfo.cpp
  clang/test/CodeGenCUDA/debug-info-template.cu


Index: clang/test/CodeGenCUDA/debug-info-template.cu
===
--- /dev/null
+++ clang/test/CodeGenCUDA/debug-info-template.cu
@@ -0,0 +1,10 @@
+// RUN: %clang_cc1 %s --std=c++11 -triple x86_64-unknown-linux -emit-llvm -o - 
-debug-info-kind=limited -dwarf-version=2 -debugger-tuning=gdb | FileCheck %s
+
+#include "Inputs/cuda.h"
+
+__device__ void f();
+template __global__ void t() { F(); }
+__host__ void g() { t<<<1,1>>>(); }
+
+// Ensure the value of device-function (as value template parameter) is null.
+// CHECK: !DITemplateValueParameter(name: "F", type: !{{[0-9]+}}, value: null)
Index: clang/lib/CodeGen/CGDebugInfo.cpp
===
--- clang/lib/CodeGen/CGDebugInfo.cpp
+++ clang/lib/CodeGen/CGDebugInfo.cpp
@@ -1725,31 +1725,37 @@
   QualType T = TA.getParamTypeForDecl().getDesugaredType(CGM.getContext());
   llvm::DIType *TTy = getOrCreateType(T, Unit);
   llvm::Constant *V = nullptr;
-  const CXXMethodDecl *MD;
-  // Variable pointer template parameters have a value that is the address
-  // of the variable.
-  if (const auto *VD = dyn_cast(D))
-V = CGM.GetAddrOfGlobalVar(VD);
-  // Member function pointers have special support for building them, 
though
-  // this is currently unsupported in LLVM CodeGen.
-  else if ((MD = dyn_cast(D)) && MD->isInstance())
-V = CGM.getCXXABI().EmitMemberFunctionPointer(MD);
-  else if (const auto *FD = dyn_cast(D))
-V = CGM.GetAddrOfFunction(FD);
-  // Member data pointers have special handling too to compute the fixed
-  // offset within the object.
-  else if (const auto *MPT = dyn_cast(T.getTypePtr())) {
-// These five lines (& possibly the above member function pointer
-// handling) might be able to be refactored to use similar code in
-// CodeGenModule::getMemberPointerConstant
-uint64_t fieldOffset = CGM.getContext().getFieldOffset(D);
-CharUnits chars =
-CGM.getContext().toCharUnitsFromBits((int64_t)fieldOffset);
-V = CGM.getCXXABI().EmitMemberDataPointer(MPT, chars);
+  // Skip retrieve the value if that template parameter has cuda device
+  // attribute, i.e. that value is not available at the host side.
+  if (!CGM.getLangOpts().CUDA || CGM.getLangOpts().CUDAIsDevice ||
+  !D->hasAttr()) {
+const CXXMethodDecl *MD;
+// Variable pointer template parameters have a value that is the 
address
+// of the variable.
+if (const auto *VD = dyn_cast(D))
+  V = CGM.GetAddrOfGlobalVar(VD);
+// Member function pointers have special support for building them,
+// though this is currently unsupported in LLVM CodeGen.
+else if ((MD = dyn_cast(D)) && MD->isInstance())
+  V = CGM.getCXXABI().EmitMemberFunctionPointer(MD);
+else if (const auto *FD = dyn_cast(D))
+  V = CGM.GetAddrOfFunction(FD);
+// Member data pointers have special handling too to compute the fixed
+// offset within the object.
+else if (const auto *MPT =
+ dyn_cast(T.getTypePtr())) {
+  // These five lines (& possibly the above member function pointer
+  // handling) might be able to be refactored to use similar code in
+  // CodeGenModule::getMemberPointerConstant
+  uint64_t fieldOffset = CGM.getContext().getFieldOffset(D);
+  CharUnits chars =
+  CGM.getContext().toCharUnitsFromBits((int64_t)fieldOffset);
+  V = CGM.getCXXABI().EmitMemberDataPointer(MPT, chars);
+}
+V = V->stripPointerCasts();
   }
   TemplateParams.push_back(DBuilder.createTemplateValueParameter(
-  TheCU, Name, TTy,
-  cast_or_null(V->stripPointerCasts(;
+  TheCU, Name, TTy, cast_or_null(V)));
 } break;
 case TemplateArgument::NullPtr: {
   QualType T = TA.getNullPtrType();


Index: clang/test/CodeGenCUDA/debug-info-template.cu
===
--- /dev/null
+++ clang/test/CodeGenCUDA/debug-info-template.cu
@@ -0,0 +1,10 @@
+// RUN: %clang_cc1 %s --std=c++11 -triple x86_64-unknown-linux -emit-llvm -o - -debug-info-kind=limited -dwarf-version=2 -debugger-tuning=gdb | FileCheck %s
+
+#include "Inputs/cuda.h"
+
+__device__ void f();
+template __global__ void t() { F(); }
+__host__ void g() { t<<<1,1>>>(); }
+
+// Ensure the value of device-function (as value template parameter) is null.
+// CHECK: !DITemplat

[PATCH] D58992: [CUDA][HIP][DebugInfo] Skip reference device function

2019-03-06 Thread Michael Liao via Phabricator via cfe-commits

hliao marked an inline comment as done.
hliao added inline comments.



Comment at: clang/lib/CodeGen/CGDebugInfo.cpp:1755
+}
+V = V->stripPointerCasts();
   }

aprantl wrote:
> This wasn't there before... why is this necessary?
That's from the original line 1752.


Repository:
  rG LLVM Github Monorepo

CHANGES SINCE LAST ACTION
  https://reviews.llvm.org/D58992/new/

https://reviews.llvm.org/D58992



___
cfe-commits mailing list
cfe-commits@lists.llvm.org
https://lists.llvm.org/cgi-bin/mailman/listinfo/cfe-commits

[PATCH] D58992: [CUDA][HIP][DebugInfo] Skip reference device function

2019-03-06 Thread Michael Liao via Phabricator via cfe-commits

This revision was automatically updated to reflect the committed changes.
Closed by commit rL31: [CUDA][HIP][DebugInfo] Skip reference device 
function (authored by hliao, committed by ).
Herald added a project: LLVM.
Herald added a subscriber: llvm-commits.

Changed prior to commit:
  https://reviews.llvm.org/D58992?vs=189488&id=189570#toc

Repository:
  rL LLVM

CHANGES SINCE LAST ACTION
  https://reviews.llvm.org/D58992/new/

https://reviews.llvm.org/D58992

Files:
  cfe/trunk/lib/CodeGen/CGDebugInfo.cpp
  cfe/trunk/test/CodeGenCUDA/debug-info-template.cu


Index: cfe/trunk/lib/CodeGen/CGDebugInfo.cpp
===
--- cfe/trunk/lib/CodeGen/CGDebugInfo.cpp
+++ cfe/trunk/lib/CodeGen/CGDebugInfo.cpp
@@ -1725,31 +1725,37 @@
   QualType T = TA.getParamTypeForDecl().getDesugaredType(CGM.getContext());
   llvm::DIType *TTy = getOrCreateType(T, Unit);
   llvm::Constant *V = nullptr;
-  const CXXMethodDecl *MD;
-  // Variable pointer template parameters have a value that is the address
-  // of the variable.
-  if (const auto *VD = dyn_cast(D))
-V = CGM.GetAddrOfGlobalVar(VD);
-  // Member function pointers have special support for building them, 
though
-  // this is currently unsupported in LLVM CodeGen.
-  else if ((MD = dyn_cast(D)) && MD->isInstance())
-V = CGM.getCXXABI().EmitMemberFunctionPointer(MD);
-  else if (const auto *FD = dyn_cast(D))
-V = CGM.GetAddrOfFunction(FD);
-  // Member data pointers have special handling too to compute the fixed
-  // offset within the object.
-  else if (const auto *MPT = dyn_cast(T.getTypePtr())) {
-// These five lines (& possibly the above member function pointer
-// handling) might be able to be refactored to use similar code in
-// CodeGenModule::getMemberPointerConstant
-uint64_t fieldOffset = CGM.getContext().getFieldOffset(D);
-CharUnits chars =
-CGM.getContext().toCharUnitsFromBits((int64_t)fieldOffset);
-V = CGM.getCXXABI().EmitMemberDataPointer(MPT, chars);
+  // Skip retrieve the value if that template parameter has cuda device
+  // attribute, i.e. that value is not available at the host side.
+  if (!CGM.getLangOpts().CUDA || CGM.getLangOpts().CUDAIsDevice ||
+  !D->hasAttr()) {
+const CXXMethodDecl *MD;
+// Variable pointer template parameters have a value that is the 
address
+// of the variable.
+if (const auto *VD = dyn_cast(D))
+  V = CGM.GetAddrOfGlobalVar(VD);
+// Member function pointers have special support for building them,
+// though this is currently unsupported in LLVM CodeGen.
+else if ((MD = dyn_cast(D)) && MD->isInstance())
+  V = CGM.getCXXABI().EmitMemberFunctionPointer(MD);
+else if (const auto *FD = dyn_cast(D))
+  V = CGM.GetAddrOfFunction(FD);
+// Member data pointers have special handling too to compute the fixed
+// offset within the object.
+else if (const auto *MPT =
+ dyn_cast(T.getTypePtr())) {
+  // These five lines (& possibly the above member function pointer
+  // handling) might be able to be refactored to use similar code in
+  // CodeGenModule::getMemberPointerConstant
+  uint64_t fieldOffset = CGM.getContext().getFieldOffset(D);
+  CharUnits chars =
+  CGM.getContext().toCharUnitsFromBits((int64_t)fieldOffset);
+  V = CGM.getCXXABI().EmitMemberDataPointer(MPT, chars);
+}
+V = V->stripPointerCasts();
   }
   TemplateParams.push_back(DBuilder.createTemplateValueParameter(
-  TheCU, Name, TTy,
-  cast_or_null(V->stripPointerCasts(;
+  TheCU, Name, TTy, cast_or_null(V)));
 } break;
 case TemplateArgument::NullPtr: {
   QualType T = TA.getNullPtrType();
Index: cfe/trunk/test/CodeGenCUDA/debug-info-template.cu
===
--- cfe/trunk/test/CodeGenCUDA/debug-info-template.cu
+++ cfe/trunk/test/CodeGenCUDA/debug-info-template.cu
@@ -0,0 +1,10 @@
+// RUN: %clang_cc1 %s --std=c++11 -triple x86_64-unknown-linux -emit-llvm -o - 
-debug-info-kind=limited -dwarf-version=2 -debugger-tuning=gdb | FileCheck %s
+
+#include "Inputs/cuda.h"
+
+__device__ void f();
+template __global__ void t() { F(); }
+__host__ void g() { t<<<1,1>>>(); }
+
+// Ensure the value of device-function (as value template parameter) is null.
+// CHECK: !DITemplateValueParameter(name: "F", type: !{{[0-9]+}}, value: null)


Index: cfe/trunk/lib/CodeGen/CGDebugInfo.cpp
===
--- cfe/trunk/lib/CodeGen/CGDebugInfo.cpp
+++ cfe/trunk/lib/CodeGen/CGDebugInfo.cpp
@@ -1725,31 +1725,37 @@
   QualType T = TA.getParamTypeForDecl().getDesugaredType(CGM.getContext());
   llvm::DIType *TTy

[PATCH] D59900: [Sema] Fix a crash when nonnull checking

2019-03-27 Thread Michael Liao via Phabricator via cfe-commits

hliao created this revision.
hliao added reviewers: rjmccall, tra, yaxunl.
Herald added subscribers: cfe-commits, kristof.beyls, javed.absar.
Herald added a project: clang.

- Non-null checking is triggered during prototype substitution from a template 
instantiation, if expressions in `decltype` contains nullptr chcking. Skip 
non-null checking in that case as the protype is not finalized yet. Also, the 
nullptr checking in `decltype` is only used for type inspection instead of 
codegen. Ignoring that is harmless.


Repository:
  rG LLVM Github Monorepo

https://reviews.llvm.org/D59900

Files:
  clang/lib/Sema/SemaChecking.cpp
  clang/test/SemaTemplate/decltype.cpp


Index: clang/test/SemaTemplate/decltype.cpp
===
--- /dev/null
+++ clang/test/SemaTemplate/decltype.cpp
@@ -0,0 +1,12 @@
+// RUN: %clang_cc1 -std=c++11 -fsyntax-only -verify %s
+// no crash & no diag
+
+// expected-no-diagnostics
+template 
+auto foo(T x) -> decltype((x == nullptr), *x) {
+  return *x;
+}
+
+void bar() {
+  foo(new int);
+}
Index: clang/lib/Sema/SemaChecking.cpp
===
--- clang/lib/Sema/SemaChecking.cpp
+++ clang/lib/Sema/SemaChecking.cpp
@@ -11592,6 +11592,9 @@
   }
 
   if (const auto *FD = dyn_cast(PV->getDeclContext())) {
+// Skip function template not specialized yet.
+if (FD->getTemplatedKind() == FunctionDecl::TK_FunctionTemplate)
+  return;
 auto ParamIter = llvm::find(FD->parameters(), PV);
 assert(ParamIter != FD->param_end());
 unsigned ParamNo = std::distance(FD->param_begin(), ParamIter);


Index: clang/test/SemaTemplate/decltype.cpp
===
--- /dev/null
+++ clang/test/SemaTemplate/decltype.cpp
@@ -0,0 +1,12 @@
+// RUN: %clang_cc1 -std=c++11 -fsyntax-only -verify %s
+// no crash & no diag
+
+// expected-no-diagnostics
+template 
+auto foo(T x) -> decltype((x == nullptr), *x) {
+  return *x;
+}
+
+void bar() {
+  foo(new int);
+}
Index: clang/lib/Sema/SemaChecking.cpp
===
--- clang/lib/Sema/SemaChecking.cpp
+++ clang/lib/Sema/SemaChecking.cpp
@@ -11592,6 +11592,9 @@
   }
 
   if (const auto *FD = dyn_cast(PV->getDeclContext())) {
+// Skip function template not specialized yet.
+if (FD->getTemplatedKind() == FunctionDecl::TK_FunctionTemplate)
+  return;
 auto ParamIter = llvm::find(FD->parameters(), PV);
 assert(ParamIter != FD->param_end());
 unsigned ParamNo = std::distance(FD->param_begin(), ParamIter);
___
cfe-commits mailing list
cfe-commits@lists.llvm.org
https://lists.llvm.org/cgi-bin/mailman/listinfo/cfe-commits

[PATCH] D59900: [Sema] Fix a crash when nonnull checking

2019-03-28 Thread Michael Liao via Phabricator via cfe-commits

hliao added a comment.

just explain what's the issue is and hope that help reviewers to get this fix 
quick.

- In `Sema::DiagnoseAlwaysNonNullPointer`, we issue warnings if a `nonnull` 
parameter is used in null checking. It need the function declaration to check 
parameter attributes. That usually works but fails if that function decl is not 
ready yet.
- In template instantiation, we first create the function prototype followed by 
instantiating the function body. When the function prototype is being formed, 
we may create binary or other expressions  for semantic checking. But, in that 
phase, i.e. `Sema::SubstFunctionDeclType`, we don't have the a fully 
specialized function prototype yet to check the parameter number and run into 
the assertion @ line 11596.

Hope that help to get the picture of this fix. Thanks.


Repository:
  rG LLVM Github Monorepo

CHANGES SINCE LAST ACTION
  https://reviews.llvm.org/D59900/new/

https://reviews.llvm.org/D59900



___
cfe-commits mailing list
cfe-commits@lists.llvm.org
https://lists.llvm.org/cgi-bin/mailman/listinfo/cfe-commits

[PATCH] D59900: [Sema] Fix a crash when nonnull checking

2019-03-28 Thread Michael Liao via Phabricator via cfe-commits

hliao added a comment.

just search bugzilla and, fortunately, found this issue is reported 2+ years 
ago @ https://bugs.llvm.org/show_bug.cgi?id=30559. I will revise the test case 
to PR30559 and move it into test/SemaCXX/nonnull.cpp


Repository:
  rG LLVM Github Monorepo

CHANGES SINCE LAST ACTION
  https://reviews.llvm.org/D59900/new/

https://reviews.llvm.org/D59900



___
cfe-commits mailing list
cfe-commits@lists.llvm.org
https://lists.llvm.org/cgi-bin/mailman/listinfo/cfe-commits

[PATCH] D59900: [Sema] Fix a crash when nonnull checking

2019-03-28 Thread Michael Liao via Phabricator via cfe-commits

This revision was automatically updated to reflect the committed changes.
Closed by commit rL357236: [Sema] Fix a crash when nonnull checking (authored 
by hliao, committed by ).
Herald added a project: LLVM.
Herald added a subscriber: llvm-commits.

Changed prior to commit:
  https://reviews.llvm.org/D59900?vs=192514&id=192776#toc

Repository:
  rL LLVM

CHANGES SINCE LAST ACTION
  https://reviews.llvm.org/D59900/new/

https://reviews.llvm.org/D59900

Files:
  cfe/trunk/lib/Sema/SemaChecking.cpp
  cfe/trunk/test/SemaCXX/pr30559.cpp


Index: cfe/trunk/test/SemaCXX/pr30559.cpp
===
--- cfe/trunk/test/SemaCXX/pr30559.cpp
+++ cfe/trunk/test/SemaCXX/pr30559.cpp
@@ -0,0 +1,23 @@
+// RUN: %clang_cc1 -std=c++11 -fsyntax-only %s
+
+template < bool, class > struct A {};
+template < class, int > void f () {};
+template < class T, int >
+decltype (f < T, 1 >) f (T t, typename A < t == 0, int >::type) {};
+
+struct B {};
+
+int main ()
+{
+  f < B, 0 >;
+  return 0;
+}
+
+template 
+auto foo(T x) -> decltype((x == nullptr), *x) {
+  return *x;
+}
+
+void bar() {
+  foo(new int);
+}
Index: cfe/trunk/lib/Sema/SemaChecking.cpp
===
--- cfe/trunk/lib/Sema/SemaChecking.cpp
+++ cfe/trunk/lib/Sema/SemaChecking.cpp
@@ -11592,6 +11592,9 @@
   }
 
   if (const auto *FD = dyn_cast(PV->getDeclContext())) {
+// Skip function template not specialized yet.
+if (FD->getTemplatedKind() == FunctionDecl::TK_FunctionTemplate)
+  return;
 auto ParamIter = llvm::find(FD->parameters(), PV);
 assert(ParamIter != FD->param_end());
 unsigned ParamNo = std::distance(FD->param_begin(), ParamIter);


Index: cfe/trunk/test/SemaCXX/pr30559.cpp
===
--- cfe/trunk/test/SemaCXX/pr30559.cpp
+++ cfe/trunk/test/SemaCXX/pr30559.cpp
@@ -0,0 +1,23 @@
+// RUN: %clang_cc1 -std=c++11 -fsyntax-only %s
+
+template < bool, class > struct A {};
+template < class, int > void f () {};
+template < class T, int >
+decltype (f < T, 1 >) f (T t, typename A < t == 0, int >::type) {};
+
+struct B {};
+
+int main ()
+{
+  f < B, 0 >;
+  return 0;
+}
+
+template 
+auto foo(T x) -> decltype((x == nullptr), *x) {
+  return *x;
+}
+
+void bar() {
+  foo(new int);
+}
Index: cfe/trunk/lib/Sema/SemaChecking.cpp
===
--- cfe/trunk/lib/Sema/SemaChecking.cpp
+++ cfe/trunk/lib/Sema/SemaChecking.cpp
@@ -11592,6 +11592,9 @@
   }
 
   if (const auto *FD = dyn_cast(PV->getDeclContext())) {
+// Skip function template not specialized yet.
+if (FD->getTemplatedKind() == FunctionDecl::TK_FunctionTemplate)
+  return;
 auto ParamIter = llvm::find(FD->parameters(), PV);
 assert(ParamIter != FD->param_end());
 unsigned ParamNo = std::distance(FD->param_begin(), ParamIter);
___
cfe-commits mailing list
cfe-commits@lists.llvm.org
https://lists.llvm.org/cgi-bin/mailman/listinfo/cfe-commits

[PATCH] D61194: [HIP] Fix visibility of `constant` variables.

2019-04-26 Thread Michael Liao via Phabricator via cfe-commits

hliao created this revision.
hliao added a reviewer: yaxunl.
Herald added subscribers: cfe-commits, nhaehnle, jvesely.
Herald added a project: clang.

- `__constant__` variables should not be `hidden` as the linker may turn them 
into `LOCAL` symbols.


Repository:
  rG LLVM Github Monorepo

https://reviews.llvm.org/D61194

Files:
  clang/lib/CodeGen/TargetInfo.cpp
  clang/test/CodeGenCUDA/amdgpu-visibility.cu


Index: clang/test/CodeGenCUDA/amdgpu-visibility.cu
===
--- /dev/null
+++ clang/test/CodeGenCUDA/amdgpu-visibility.cu
@@ -0,0 +1,21 @@
+// RUN: %clang_cc1 -triple amdgcn-amd-amdhsa -fcuda-is-device 
-fapply-global-visibility-to-externs -fvisibility default -emit-llvm -o - %s | 
FileCheck --check-prefix=CHECK-DEFAULT %s
+// RUN: %clang_cc1 -triple amdgcn-amd-amdhsa -fcuda-is-device 
-fapply-global-visibility-to-externs -fvisibility protected -emit-llvm -o - %s 
| FileCheck --check-prefix=CHECK-PROTECTED %s
+// RUN: %clang_cc1 -triple amdgcn-amd-amdhsa -fcuda-is-device 
-fapply-global-visibility-to-externs -fvisibility hidden -emit-llvm -o - %s | 
FileCheck --check-prefix=CHECK-HIDDEN %s
+
+#include "Inputs/cuda.h"
+
+// CHECK-DEFAULT: @c = addrspace(4) externally_initialized global
+// CHECK-DEFAULT: @g = addrspace(1) externally_initialized global
+// CHECK-PROTECTED: @c = protected addrspace(4) externally_initialized global
+// CHECK-PROTECTED: @g = protected addrspace(1) externally_initialized global
+// CHECK-HIDDEN: @c = protected addrspace(4) externally_initialized global
+// CHECK-HIDDEN: @g = protected addrspace(1) externally_initialized global
+__constant__ int c;
+__device__ int g;
+
+// CHECK-DEFAULT: define amdgpu_kernel void @_Z3foov()
+// CHECK-PROTECTED: define protected amdgpu_kernel void @_Z3foov()
+// CHECK-HIDDEN: define protected amdgpu_kernel void @_Z3foov()
+__global__ void foo() {
+  g = c;
+}
Index: clang/lib/CodeGen/TargetInfo.cpp
===
--- clang/lib/CodeGen/TargetInfo.cpp
+++ clang/lib/CodeGen/TargetInfo.cpp
@@ -7847,7 +7847,8 @@
 
   return D->hasAttr() ||
  (isa(D) && D->hasAttr()) ||
- (isa(D) && D->hasAttr());
+ (isa(D) &&
+  (D->hasAttr() || D->hasAttr()));
 }
 
 void AMDGPUTargetCodeGenInfo::setTargetAttributes(


Index: clang/test/CodeGenCUDA/amdgpu-visibility.cu
===
--- /dev/null
+++ clang/test/CodeGenCUDA/amdgpu-visibility.cu
@@ -0,0 +1,21 @@
+// RUN: %clang_cc1 -triple amdgcn-amd-amdhsa -fcuda-is-device -fapply-global-visibility-to-externs -fvisibility default -emit-llvm -o - %s | FileCheck --check-prefix=CHECK-DEFAULT %s
+// RUN: %clang_cc1 -triple amdgcn-amd-amdhsa -fcuda-is-device -fapply-global-visibility-to-externs -fvisibility protected -emit-llvm -o - %s | FileCheck --check-prefix=CHECK-PROTECTED %s
+// RUN: %clang_cc1 -triple amdgcn-amd-amdhsa -fcuda-is-device -fapply-global-visibility-to-externs -fvisibility hidden -emit-llvm -o - %s | FileCheck --check-prefix=CHECK-HIDDEN %s
+
+#include "Inputs/cuda.h"
+
+// CHECK-DEFAULT: @c = addrspace(4) externally_initialized global
+// CHECK-DEFAULT: @g = addrspace(1) externally_initialized global
+// CHECK-PROTECTED: @c = protected addrspace(4) externally_initialized global
+// CHECK-PROTECTED: @g = protected addrspace(1) externally_initialized global
+// CHECK-HIDDEN: @c = protected addrspace(4) externally_initialized global
+// CHECK-HIDDEN: @g = protected addrspace(1) externally_initialized global
+__constant__ int c;
+__device__ int g;
+
+// CHECK-DEFAULT: define amdgpu_kernel void @_Z3foov()
+// CHECK-PROTECTED: define protected amdgpu_kernel void @_Z3foov()
+// CHECK-HIDDEN: define protected amdgpu_kernel void @_Z3foov()
+__global__ void foo() {
+  g = c;
+}
Index: clang/lib/CodeGen/TargetInfo.cpp
===
--- clang/lib/CodeGen/TargetInfo.cpp
+++ clang/lib/CodeGen/TargetInfo.cpp
@@ -7847,7 +7847,8 @@
 
   return D->hasAttr() ||
  (isa(D) && D->hasAttr()) ||
- (isa(D) && D->hasAttr());
+ (isa(D) &&
+  (D->hasAttr() || D->hasAttr()));
 }
 
 void AMDGPUTargetCodeGenInfo::setTargetAttributes(
___
cfe-commits mailing list
cfe-commits@lists.llvm.org
https://lists.llvm.org/cgi-bin/mailman/listinfo/cfe-commits

[PATCH] D61194: [HIP] Fix visibility of `constant` variables.

2019-04-26 Thread Michael Liao via Phabricator via cfe-commits

hliao marked an inline comment as done.
hliao added inline comments.



Comment at: clang/lib/CodeGen/TargetInfo.cpp:7851
+ (isa(D) &&
+  (D->hasAttr() || D->hasAttr()));
 }

yaxunl wrote:
> is format right?
yeah, it's the format after clang-format. It seems that clang-format try to 
apply the same indent for LHS & RHS of a binary operator. With the leading 
parenthesis, RHS in a newline is indented with one more space.


Repository:
  rG LLVM Github Monorepo

CHANGES SINCE LAST ACTION
  https://reviews.llvm.org/D61194/new/

https://reviews.llvm.org/D61194



___
cfe-commits mailing list
cfe-commits@lists.llvm.org
https://lists.llvm.org/cgi-bin/mailman/listinfo/cfe-commits

[PATCH] D61194: [HIP] Fix visibility of `constant` variables.

2019-04-26 Thread Michael Liao via Phabricator via cfe-commits

hliao marked an inline comment as done.
hliao added inline comments.



Comment at: clang/test/CodeGenCUDA/amdgpu-visibility.cu:1
+// RUN: %clang_cc1 -triple amdgcn-amd-amdhsa -fcuda-is-device 
-fapply-global-visibility-to-externs -fvisibility default -emit-llvm -o - %s | 
FileCheck --check-prefix=CHECK-DEFAULT %s
+// RUN: %clang_cc1 -triple amdgcn-amd-amdhsa -fcuda-is-device 
-fapply-global-visibility-to-externs -fvisibility protected -emit-llvm -o - %s 
| FileCheck --check-prefix=CHECK-PROTECTED %s

yaxunl wrote:
> do we need -fapply-global-visibility-to-externs?
that's so far the option applied for both AMDGPU & HIP (targetting non-MS) by 
default in clang frontend. Check AMDGPUToolChain::addClangTargetOptions and  
HIPToolChain::addClangTargetOptions


Repository:
  rG LLVM Github Monorepo

CHANGES SINCE LAST ACTION
  https://reviews.llvm.org/D61194/new/

https://reviews.llvm.org/D61194



___
cfe-commits mailing list
cfe-commits@lists.llvm.org
https://lists.llvm.org/cgi-bin/mailman/listinfo/cfe-commits

[PATCH] D61194: [HIP] Fix visibility of `constant` variables.

2019-04-26 Thread Michael Liao via Phabricator via cfe-commits

This revision was automatically updated to reflect the committed changes.
Closed by commit rC359344: [HIP] Fix visibility of `__constant__` variables. 
(authored by hliao, committed by ).

Changed prior to commit:
  https://reviews.llvm.org/D61194?vs=196857&id=196902#toc

Repository:
  rC Clang

CHANGES SINCE LAST ACTION
  https://reviews.llvm.org/D61194/new/

https://reviews.llvm.org/D61194

Files:
  lib/CodeGen/TargetInfo.cpp
  test/CodeGenCUDA/amdgpu-visibility.cu


Index: test/CodeGenCUDA/amdgpu-visibility.cu
===
--- test/CodeGenCUDA/amdgpu-visibility.cu
+++ test/CodeGenCUDA/amdgpu-visibility.cu
@@ -0,0 +1,21 @@
+// RUN: %clang_cc1 -triple amdgcn-amd-amdhsa -fcuda-is-device 
-fapply-global-visibility-to-externs -fvisibility default -emit-llvm -o - %s | 
FileCheck --check-prefix=CHECK-DEFAULT %s
+// RUN: %clang_cc1 -triple amdgcn-amd-amdhsa -fcuda-is-device 
-fapply-global-visibility-to-externs -fvisibility protected -emit-llvm -o - %s 
| FileCheck --check-prefix=CHECK-PROTECTED %s
+// RUN: %clang_cc1 -triple amdgcn-amd-amdhsa -fcuda-is-device 
-fapply-global-visibility-to-externs -fvisibility hidden -emit-llvm -o - %s | 
FileCheck --check-prefix=CHECK-HIDDEN %s
+
+#include "Inputs/cuda.h"
+
+// CHECK-DEFAULT: @c = addrspace(4) externally_initialized global
+// CHECK-DEFAULT: @g = addrspace(1) externally_initialized global
+// CHECK-PROTECTED: @c = protected addrspace(4) externally_initialized global
+// CHECK-PROTECTED: @g = protected addrspace(1) externally_initialized global
+// CHECK-HIDDEN: @c = protected addrspace(4) externally_initialized global
+// CHECK-HIDDEN: @g = protected addrspace(1) externally_initialized global
+__constant__ int c;
+__device__ int g;
+
+// CHECK-DEFAULT: define amdgpu_kernel void @_Z3foov()
+// CHECK-PROTECTED: define protected amdgpu_kernel void @_Z3foov()
+// CHECK-HIDDEN: define protected amdgpu_kernel void @_Z3foov()
+__global__ void foo() {
+  g = c;
+}
Index: lib/CodeGen/TargetInfo.cpp
===
--- lib/CodeGen/TargetInfo.cpp
+++ lib/CodeGen/TargetInfo.cpp
@@ -7847,7 +7847,8 @@
 
   return D->hasAttr() ||
  (isa(D) && D->hasAttr()) ||
- (isa(D) && D->hasAttr());
+ (isa(D) &&
+  (D->hasAttr() || D->hasAttr()));
 }
 
 void AMDGPUTargetCodeGenInfo::setTargetAttributes(


Index: test/CodeGenCUDA/amdgpu-visibility.cu
===
--- test/CodeGenCUDA/amdgpu-visibility.cu
+++ test/CodeGenCUDA/amdgpu-visibility.cu
@@ -0,0 +1,21 @@
+// RUN: %clang_cc1 -triple amdgcn-amd-amdhsa -fcuda-is-device -fapply-global-visibility-to-externs -fvisibility default -emit-llvm -o - %s | FileCheck --check-prefix=CHECK-DEFAULT %s
+// RUN: %clang_cc1 -triple amdgcn-amd-amdhsa -fcuda-is-device -fapply-global-visibility-to-externs -fvisibility protected -emit-llvm -o - %s | FileCheck --check-prefix=CHECK-PROTECTED %s
+// RUN: %clang_cc1 -triple amdgcn-amd-amdhsa -fcuda-is-device -fapply-global-visibility-to-externs -fvisibility hidden -emit-llvm -o - %s | FileCheck --check-prefix=CHECK-HIDDEN %s
+
+#include "Inputs/cuda.h"
+
+// CHECK-DEFAULT: @c = addrspace(4) externally_initialized global
+// CHECK-DEFAULT: @g = addrspace(1) externally_initialized global
+// CHECK-PROTECTED: @c = protected addrspace(4) externally_initialized global
+// CHECK-PROTECTED: @g = protected addrspace(1) externally_initialized global
+// CHECK-HIDDEN: @c = protected addrspace(4) externally_initialized global
+// CHECK-HIDDEN: @g = protected addrspace(1) externally_initialized global
+__constant__ int c;
+__device__ int g;
+
+// CHECK-DEFAULT: define amdgpu_kernel void @_Z3foov()
+// CHECK-PROTECTED: define protected amdgpu_kernel void @_Z3foov()
+// CHECK-HIDDEN: define protected amdgpu_kernel void @_Z3foov()
+__global__ void foo() {
+  g = c;
+}
Index: lib/CodeGen/TargetInfo.cpp
===
--- lib/CodeGen/TargetInfo.cpp
+++ lib/CodeGen/TargetInfo.cpp
@@ -7847,7 +7847,8 @@
 
   return D->hasAttr() ||
  (isa(D) && D->hasAttr()) ||
- (isa(D) && D->hasAttr());
+ (isa(D) &&
+  (D->hasAttr() || D->hasAttr()));
 }
 
 void AMDGPUTargetCodeGenInfo::setTargetAttributes(
___
cfe-commits mailing list
cfe-commits@lists.llvm.org
https://lists.llvm.org/cgi-bin/mailman/listinfo/cfe-commits

[PATCH] D61396: [hip] Fix ambiguity from `>>>` of CUDA.

2019-05-01 Thread Michael Liao via Phabricator via cfe-commits

hliao created this revision.
hliao added reviewers: tra, yaxunl.
Herald added a project: clang.
Herald added a subscriber: cfe-commits.

- For template arguments ending with `>>>`, we should cease lookahead and treat 
it as type-id firstly, so that deduction could work properly.


Repository:
  rG LLVM Github Monorepo

https://reviews.llvm.org/D61396

Files:
  clang/lib/Parse/ParseTentative.cpp
  clang/test/Parser/cuda-kernel-call-c++11.cu


Index: clang/test/Parser/cuda-kernel-call-c++11.cu
===
--- clang/test/Parser/cuda-kernel-call-c++11.cu
+++ clang/test/Parser/cuda-kernel-call-c++11.cu
@@ -3,6 +3,8 @@
 template struct S {};
 template void f();
 
+template struct S {};
+
 
 void foo(void) {
   // In C++11 mode, all of these are expected to parse correctly, and the CUDA
@@ -21,4 +23,6 @@
 
   (void)(&f>>==0);
   (void)(&f>>==0);
+
+  S>> s6;
 }
Index: clang/lib/Parse/ParseTentative.cpp
===
--- clang/lib/Parse/ParseTentative.cpp
+++ clang/lib/Parse/ParseTentative.cpp
@@ -593,7 +593,8 @@
  (Tok.is(tok::greatergreater) ||
   (Tok.is(tok::ellipsis) &&
NextToken().isOneOf(tok::greater, tok::greatergreater,
-   tok::comma)) {
+   tok::comma ||
+(getLangOpts().CUDA && Tok.is(tok::greatergreatergreater {
   TPR = TPResult::True;
   isAmbiguous = true;
 


Index: clang/test/Parser/cuda-kernel-call-c++11.cu
===
--- clang/test/Parser/cuda-kernel-call-c++11.cu
+++ clang/test/Parser/cuda-kernel-call-c++11.cu
@@ -3,6 +3,8 @@
 template struct S {};
 template void f();
 
+template struct S {};
+
 
 void foo(void) {
   // In C++11 mode, all of these are expected to parse correctly, and the CUDA
@@ -21,4 +23,6 @@
 
   (void)(&f>>==0);
   (void)(&f>>==0);
+
+  S>> s6;
 }
Index: clang/lib/Parse/ParseTentative.cpp
===
--- clang/lib/Parse/ParseTentative.cpp
+++ clang/lib/Parse/ParseTentative.cpp
@@ -593,7 +593,8 @@
  (Tok.is(tok::greatergreater) ||
   (Tok.is(tok::ellipsis) &&
NextToken().isOneOf(tok::greater, tok::greatergreater,
-   tok::comma)) {
+   tok::comma ||
+(getLangOpts().CUDA && Tok.is(tok::greatergreatergreater {
   TPR = TPResult::True;
   isAmbiguous = true;
 
___
cfe-commits mailing list
cfe-commits@lists.llvm.org
https://lists.llvm.org/cgi-bin/mailman/listinfo/cfe-commits

[PATCH] D61396: [hip] Fix ambiguity from `>>>` of CUDA.

2019-05-01 Thread Michael Liao via Phabricator via cfe-commits

hliao added a comment.

In D61396#1486706 , @tra wrote:

> LGTM, but I've added @rsmith who is way more familiar with this code.


sure, no rush, let's wait for comments from @rsmith


Repository:
  rG LLVM Github Monorepo

CHANGES SINCE LAST ACTION
  https://reviews.llvm.org/D61396/new/

https://reviews.llvm.org/D61396



___
cfe-commits mailing list
cfe-commits@lists.llvm.org
https://lists.llvm.org/cgi-bin/mailman/listinfo/cfe-commits

[PATCH] D61396: [hip] Fix ambiguity from `>>>` of CUDA.

2019-05-02 Thread Michael Liao via Phabricator via cfe-commits

hliao added a comment.

@rsmith do you have the chance to review this simple fix?


Repository:
  rG LLVM Github Monorepo

CHANGES SINCE LAST ACTION
  https://reviews.llvm.org/D61396/new/

https://reviews.llvm.org/D61396



___
cfe-commits mailing list
cfe-commits@lists.llvm.org
https://lists.llvm.org/cgi-bin/mailman/listinfo/cfe-commits

[PATCH] D61458: [hip] Relax CUDA call restriction within `decltype` context.

2019-05-02 Thread Michael Liao via Phabricator via cfe-commits

hliao created this revision.
hliao added reviewers: tra, rjmccall, yaxunl.
Herald added a project: clang.
Herald added a subscriber: cfe-commits.

- Within `decltype`, expressions are only type-inspected. The restriction on 
CUDA calls should be relaxed.


Repository:
  rG LLVM Github Monorepo

https://reviews.llvm.org/D61458

Files:
  clang/include/clang/Sema/Sema.h
  clang/test/CodeGenCUDA/function-overload.cu


Index: clang/test/CodeGenCUDA/function-overload.cu
===
--- clang/test/CodeGenCUDA/function-overload.cu
+++ clang/test/CodeGenCUDA/function-overload.cu
@@ -8,6 +8,8 @@
 // RUN: | FileCheck -check-prefix=CHECK-BOTH -check-prefix=CHECK-HOST %s
 // RUN: %clang_cc1 -triple nvptx64-nvidia-cuda -fcuda-is-device -emit-llvm -o 
- %s \
 // RUN: | FileCheck -check-prefix=CHECK-BOTH -check-prefix=CHECK-DEVICE %s
+// RUN: %clang_cc1 -std=c++11 -DCHECK_DECLTYPE -triple amdgcn -fcuda-is-device 
-emit-llvm -o - %s \
+// RUN: | FileCheck -check-prefix=CHECK-DECLTYPE %s
 
 #include "Inputs/cuda.h"
 
@@ -53,3 +55,14 @@
 // CHECK-BOTH: define linkonce_odr void @_ZN7s_cd_hdD2Ev(
 // CHECK-BOTH: store i32 32,
 // CHECK-BOTH: ret void
+
+#if defined(CHECK_DECLTYPE)
+int foo(float);
+// CHECK-DECLTYPE-LABEL: @_Z3barf
+// CHECK-DECLTYPE: fptosi
+// CHECK-DECLTYPE: sitofp
+__device__ float bar(float x) {
+  decltype(foo(x)) y = x;
+  return y + 3.f;
+}
+#endif
Index: clang/include/clang/Sema/Sema.h
===
--- clang/include/clang/Sema/Sema.h
+++ clang/include/clang/Sema/Sema.h
@@ -10406,6 +10406,17 @@
   /// semantically correct CUDA programs, but only if they're never codegen'ed.
   bool IsAllowedCUDACall(const FunctionDecl *Caller,
  const FunctionDecl *Callee) {
+auto InDecltypeExpr = [this]() {
+  auto I =
+  std::find_if(ExprEvalContexts.rbegin(), ExprEvalContexts.rend(),
+   [](const ExpressionEvaluationContextRecord &C) {
+ return C.ExprContext ==
+ExpressionEvaluationContextRecord::EK_Decltype;
+   });
+  return I != ExprEvalContexts.rend();
+};
+if (InDecltypeExpr())
+  return true;
 return IdentifyCUDAPreference(Caller, Callee) != CFP_Never;
   }
 


Index: clang/test/CodeGenCUDA/function-overload.cu
===
--- clang/test/CodeGenCUDA/function-overload.cu
+++ clang/test/CodeGenCUDA/function-overload.cu
@@ -8,6 +8,8 @@
 // RUN: | FileCheck -check-prefix=CHECK-BOTH -check-prefix=CHECK-HOST %s
 // RUN: %clang_cc1 -triple nvptx64-nvidia-cuda -fcuda-is-device -emit-llvm -o - %s \
 // RUN: | FileCheck -check-prefix=CHECK-BOTH -check-prefix=CHECK-DEVICE %s
+// RUN: %clang_cc1 -std=c++11 -DCHECK_DECLTYPE -triple amdgcn -fcuda-is-device -emit-llvm -o - %s \
+// RUN: | FileCheck -check-prefix=CHECK-DECLTYPE %s
 
 #include "Inputs/cuda.h"
 
@@ -53,3 +55,14 @@
 // CHECK-BOTH: define linkonce_odr void @_ZN7s_cd_hdD2Ev(
 // CHECK-BOTH: store i32 32,
 // CHECK-BOTH: ret void
+
+#if defined(CHECK_DECLTYPE)
+int foo(float);
+// CHECK-DECLTYPE-LABEL: @_Z3barf
+// CHECK-DECLTYPE: fptosi
+// CHECK-DECLTYPE: sitofp
+__device__ float bar(float x) {
+  decltype(foo(x)) y = x;
+  return y + 3.f;
+}
+#endif
Index: clang/include/clang/Sema/Sema.h
===
--- clang/include/clang/Sema/Sema.h
+++ clang/include/clang/Sema/Sema.h
@@ -10406,6 +10406,17 @@
   /// semantically correct CUDA programs, but only if they're never codegen'ed.
   bool IsAllowedCUDACall(const FunctionDecl *Caller,
  const FunctionDecl *Callee) {
+auto InDecltypeExpr = [this]() {
+  auto I =
+  std::find_if(ExprEvalContexts.rbegin(), ExprEvalContexts.rend(),
+   [](const ExpressionEvaluationContextRecord &C) {
+ return C.ExprContext ==
+ExpressionEvaluationContextRecord::EK_Decltype;
+   });
+  return I != ExprEvalContexts.rend();
+};
+if (InDecltypeExpr())
+  return true;
 return IdentifyCUDAPreference(Caller, Callee) != CFP_Never;
   }
 
___
cfe-commits mailing list
cfe-commits@lists.llvm.org
https://lists.llvm.org/cgi-bin/mailman/listinfo/cfe-commits

[PATCH] D61458: [hip] Relax CUDA call restriction within `decltype` context.

2019-05-02 Thread Michael Liao via Phabricator via cfe-commits

hliao updated this revision to Diff 197860.
hliao added a comment.

simplify the logic using `llvm::any_of`.


Repository:
  rG LLVM Github Monorepo

CHANGES SINCE LAST ACTION
  https://reviews.llvm.org/D61458/new/

https://reviews.llvm.org/D61458

Files:
  clang/include/clang/Sema/Sema.h
  clang/test/CodeGenCUDA/function-overload.cu


Index: clang/test/CodeGenCUDA/function-overload.cu
===
--- clang/test/CodeGenCUDA/function-overload.cu
+++ clang/test/CodeGenCUDA/function-overload.cu
@@ -8,6 +8,8 @@
 // RUN: | FileCheck -check-prefix=CHECK-BOTH -check-prefix=CHECK-HOST %s
 // RUN: %clang_cc1 -triple nvptx64-nvidia-cuda -fcuda-is-device -emit-llvm -o 
- %s \
 // RUN: | FileCheck -check-prefix=CHECK-BOTH -check-prefix=CHECK-DEVICE %s
+// RUN: %clang_cc1 -std=c++11 -DCHECK_DECLTYPE -triple amdgcn -fcuda-is-device 
-emit-llvm -o - %s \
+// RUN: | FileCheck -check-prefix=CHECK-DECLTYPE %s
 
 #include "Inputs/cuda.h"
 
@@ -53,3 +55,14 @@
 // CHECK-BOTH: define linkonce_odr void @_ZN7s_cd_hdD2Ev(
 // CHECK-BOTH: store i32 32,
 // CHECK-BOTH: ret void
+
+#if defined(CHECK_DECLTYPE)
+int foo(float);
+// CHECK-DECLTYPE-LABEL: @_Z3barf
+// CHECK-DECLTYPE: fptosi
+// CHECK-DECLTYPE: sitofp
+__device__ float bar(float x) {
+  decltype(foo(x)) y = x;
+  return y + 3.f;
+}
+#endif
Index: clang/include/clang/Sema/Sema.h
===
--- clang/include/clang/Sema/Sema.h
+++ clang/include/clang/Sema/Sema.h
@@ -10406,6 +10406,12 @@
   /// semantically correct CUDA programs, but only if they're never codegen'ed.
   bool IsAllowedCUDACall(const FunctionDecl *Caller,
  const FunctionDecl *Callee) {
+if (llvm::any_of(ExprEvalContexts,
+ [](const ExpressionEvaluationContextRecord &C) {
+   return C.ExprContext ==
+  ExpressionEvaluationContextRecord::EK_Decltype;
+ }))
+  return true;
 return IdentifyCUDAPreference(Caller, Callee) != CFP_Never;
   }
 


Index: clang/test/CodeGenCUDA/function-overload.cu
===
--- clang/test/CodeGenCUDA/function-overload.cu
+++ clang/test/CodeGenCUDA/function-overload.cu
@@ -8,6 +8,8 @@
 // RUN: | FileCheck -check-prefix=CHECK-BOTH -check-prefix=CHECK-HOST %s
 // RUN: %clang_cc1 -triple nvptx64-nvidia-cuda -fcuda-is-device -emit-llvm -o - %s \
 // RUN: | FileCheck -check-prefix=CHECK-BOTH -check-prefix=CHECK-DEVICE %s
+// RUN: %clang_cc1 -std=c++11 -DCHECK_DECLTYPE -triple amdgcn -fcuda-is-device -emit-llvm -o - %s \
+// RUN: | FileCheck -check-prefix=CHECK-DECLTYPE %s
 
 #include "Inputs/cuda.h"
 
@@ -53,3 +55,14 @@
 // CHECK-BOTH: define linkonce_odr void @_ZN7s_cd_hdD2Ev(
 // CHECK-BOTH: store i32 32,
 // CHECK-BOTH: ret void
+
+#if defined(CHECK_DECLTYPE)
+int foo(float);
+// CHECK-DECLTYPE-LABEL: @_Z3barf
+// CHECK-DECLTYPE: fptosi
+// CHECK-DECLTYPE: sitofp
+__device__ float bar(float x) {
+  decltype(foo(x)) y = x;
+  return y + 3.f;
+}
+#endif
Index: clang/include/clang/Sema/Sema.h
===
--- clang/include/clang/Sema/Sema.h
+++ clang/include/clang/Sema/Sema.h
@@ -10406,6 +10406,12 @@
   /// semantically correct CUDA programs, but only if they're never codegen'ed.
   bool IsAllowedCUDACall(const FunctionDecl *Caller,
  const FunctionDecl *Callee) {
+if (llvm::any_of(ExprEvalContexts,
+ [](const ExpressionEvaluationContextRecord &C) {
+   return C.ExprContext ==
+  ExpressionEvaluationContextRecord::EK_Decltype;
+ }))
+  return true;
 return IdentifyCUDAPreference(Caller, Callee) != CFP_Never;
   }
 
___
cfe-commits mailing list
cfe-commits@lists.llvm.org
https://lists.llvm.org/cgi-bin/mailman/listinfo/cfe-commits

[PATCH] D61458: [hip] Relax CUDA call restriction within `decltype` context.

2019-05-02 Thread Michael Liao via Phabricator via cfe-commits

hliao marked an inline comment as done.
hliao added a comment.

In D61458#1488523 , @tra wrote:

> Perhaps we should allow this in all unevaluated contexts? 
>  I.e. `int s = sizeof(foo(x));` should also work.


good point, do we have a dedicated context for sizeof? that make the checking 
easier.




Comment at: clang/include/clang/Sema/Sema.h:10411
+  auto I =
+  std::find_if(ExprEvalContexts.rbegin(), ExprEvalContexts.rend(),
+   [](const ExpressionEvaluationContextRecord &C) {

tra wrote:
> I think you want `return llvm::any_of(ExprEvalContexts, ...)` here and you 
> can fold it directly into `if()` below.
yeah, that's much simpler, I will make the change.


Repository:
  rG LLVM Github Monorepo

CHANGES SINCE LAST ACTION
  https://reviews.llvm.org/D61458/new/

https://reviews.llvm.org/D61458



___
cfe-commits mailing list
cfe-commits@lists.llvm.org
https://lists.llvm.org/cgi-bin/mailman/listinfo/cfe-commits

[PATCH] D61458: [hip] Relax CUDA call restriction within `decltype` context.

2019-05-03 Thread Michael Liao via Phabricator via cfe-commits

hliao added a comment.

In D61458#1488970 , @jlebar wrote:

> Here's one for you:
>
>   __host__ float bar();
>   __device__ int bar();
>   __host__ __device__ auto foo() -> decltype(bar()) {}
>
>
> What is the return type of `foo`?  :)
>
> I don't believe the right answer is, "float when compiling for host, int when 
> compiling for device."
>
> I'd be happy if we said this was an error, so long as it's well-defined what 
> exactly we're disallowing.  But I bet @rsmith can come up with substantially 
> more evil testcases than this.

This patch is introduced to allow function or template function from `std` 
library to be used with device function. By allowing different-side candidates 
with a context only caring type inspection, we have new issue as there are 
extra beyond the regular rule for C++ overloadable resolution. We need an extra 
policy to figure out which is one the best candidate by considering CUDA 
attributes. Says the case you proposed, we may consider the following order to 
choose an overloadable candidate, e.g.

  SAME-SIDE (with the same CUDA attribute)
  NATIVE (without any CUDA attribute)
  WRONG-SIDE (with the opposite CUDA attribute)

or just

  SAME-SIDE
  NATIVE

It that a reasonable change?

Repository:
  rG LLVM Github Monorepo

CHANGES SINCE LAST ACTION
  https://reviews.llvm.org/D61458/new/

https://reviews.llvm.org/D61458

___
cfe-commits mailing list
cfe-commits@lists.llvm.org
https://lists.llvm.org/cgi-bin/mailman/listinfo/cfe-commits

[PATCH] D61458: [hip] Relax CUDA call restriction within `decltype` context.

2019-05-03 Thread Michael Liao via Phabricator via cfe-commits

hliao added a comment.

In D61458#1488981 , @rjmccall wrote:

> In D61458#1488972 , @hfinkel wrote:
>
> > In D61458#1488970 , @jlebar wrote:
> >
> > > Here's one for you:
> > >
> > >   __host__ float bar();
> > >   __device__ int bar();
> > >   __host__ __device__ auto foo() -> decltype(bar()) {}
> > >
> > >
> > > What is the return type of `foo`?  :)
> > >
> > > I don't believe the right answer is, "float when compiling for host, int 
> > > when compiling for device."
> >
> >
> > So, actually, I wonder if that's not the right answer. We generally allow 
> > different overloads to have different return types.
>
>
> Only if they also differ in some other way.  C++ does not (generally) have 
> return-type-based overloading.  The two functions described would even mangle 
> the same way if CUDA didn't include host/device in the mangling.
>
> (Function templates can differ only by return type, but if both return types 
> successfully instantiate for a given set of (possibly inferred) template 
> arguments then the templates can only be distinguished when taking their 
> address, not when calling.)
>
> I think I've said before that adding this kind of overloading is not a good 
> idea, but since it's apparently already there, you should consult the 
> specification (or at least existing practice) to figure out what you're 
> supposed to do.


BTW, just check similar stuff with nvcc, with more than one candidates, it 
accepts the following code

  float bar(); // This line could be replaced by appendig `__host` or 
`__device__`, all of them are accepted.
  __host__ __device__ auto foo() -> decltype(bar()) {}

however, if there are more than one candidates differenct on the return type 
(without or with CUDA attibute difference), it could raise the error

  foo.cu(4): error: cannot overload functions distinguished by return type alone

it seems to me that that's also an acceptable policy to handle the issue after 
we allow different-side candidates in type-only context.


Repository:
  rG LLVM Github Monorepo

CHANGES SINCE LAST ACTION
  https://reviews.llvm.org/D61458/new/

https://reviews.llvm.org/D61458



___
cfe-commits mailing list
cfe-commits@lists.llvm.org
https://lists.llvm.org/cgi-bin/mailman/listinfo/cfe-commits

[PATCH] D61458: [hip] Relax CUDA call restriction within `decltype` context.

2019-05-03 Thread Michael Liao via Phabricator via cfe-commits

hliao added a comment.

In D61458#1488970 , @jlebar wrote:

> Here's one for you:
>
>   __host__ float bar();
>   __device__ int bar();
>   __host__ __device__ auto foo() -> decltype(bar()) {}
>
>
> What is the return type of `foo`?  :)
>
> I don't believe the right answer is, "float when compiling for host, int when 
> compiling for device."
>
> I'd be happy if we said this was an error, so long as it's well-defined what 
> exactly we're disallowing.  But I bet @rsmith can come up with substantially 
> more evil testcases than this.


At from CUDA 10, that's not acceptable as we are declaring two functions only 
differ from the return type. It seems CUDA attributes do not contribute to the 
function signature. clang is quite different here.


Repository:
  rG LLVM Github Monorepo

CHANGES SINCE LAST ACTION
  https://reviews.llvm.org/D61458/new/

https://reviews.llvm.org/D61458



___
cfe-commits mailing list
cfe-commits@lists.llvm.org
https://lists.llvm.org/cgi-bin/mailman/listinfo/cfe-commits

[PATCH] D61458: [hip] Relax CUDA call restriction within `decltype` context.

2019-05-03 Thread Michael Liao via Phabricator via cfe-commits

hliao marked an inline comment as done.
hliao added inline comments.



Comment at: clang/include/clang/Sema/Sema.h:10407-10409
   bool IsAllowedCUDACall(const FunctionDecl *Caller,
  const FunctionDecl *Callee) {
+if (llvm::any_of(ExprEvalContexts,

tra wrote:
> One more thing. The idea of this function is that we're checking if the 
> `Caller` is allowed to call the `Callee`.
> However here, you're checking the current context, which may not necessarily 
> be the same as the caller's. I.e. someone could potentially call it way after 
> the context is gone.
> 
> Currently all uses of this function obtain the caller from `CurContext`, but 
> if we start relying on other properties of the current context other than the 
> caller function, then we may neet to pass the context explicitly, or only 
> pass the Callee and check if it's callable from the current context.
> 
> ```
> 
as the expression within `decltype` may be quite complicated, the idea here is 
to relax that rule within `decltype` context, not only for a particular pair of 
caller/callee.


Repository:
  rG LLVM Github Monorepo

CHANGES SINCE LAST ACTION
  https://reviews.llvm.org/D61458/new/

https://reviews.llvm.org/D61458



___
cfe-commits mailing list
cfe-commits@lists.llvm.org
https://lists.llvm.org/cgi-bin/mailman/listinfo/cfe-commits

[PATCH] D61396: [hip] Fix ambiguity from `>>>` of CUDA.

2019-05-03 Thread Michael Liao via Phabricator via cfe-commits

hliao added a comment.

ping


Repository:
  rG LLVM Github Monorepo

CHANGES SINCE LAST ACTION
  https://reviews.llvm.org/D61396/new/

https://reviews.llvm.org/D61396



___
cfe-commits mailing list
cfe-commits@lists.llvm.org
https://lists.llvm.org/cgi-bin/mailman/listinfo/cfe-commits

[PATCH] D61396: [hip] Fix ambiguity from `>>>` of CUDA.

2019-05-06 Thread Michael Liao via Phabricator via cfe-commits

hliao added a comment.

@rsmith Do you have the chance to review this patch?


Repository:
  rG LLVM Github Monorepo

CHANGES SINCE LAST ACTION
  https://reviews.llvm.org/D61396/new/

https://reviews.llvm.org/D61396



___
cfe-commits mailing list
cfe-commits@lists.llvm.org
https://lists.llvm.org/cgi-bin/mailman/listinfo/cfe-commits

[PATCH] D61396: [hip] Fix ambiguity from `>>>` of CUDA.

2019-05-07 Thread Michael Liao via Phabricator via cfe-commits

hliao added a comment.

@rsmith Do you have the chance to review this patch?


Repository:
  rG LLVM Github Monorepo

CHANGES SINCE LAST ACTION
  https://reviews.llvm.org/D61396/new/

https://reviews.llvm.org/D61396



___
cfe-commits mailing list
cfe-commits@lists.llvm.org
https://lists.llvm.org/cgi-bin/mailman/listinfo/cfe-commits

[PATCH] D61396: [hip] Fix ambiguity from `>>>` of CUDA.

2019-05-07 Thread Michael Liao via Phabricator via cfe-commits

hliao updated this revision to Diff 198529.
hliao added a comment.

revise following reviewer's suggestion.


Repository:
  rG LLVM Github Monorepo

CHANGES SINCE LAST ACTION
  https://reviews.llvm.org/D61396/new/

https://reviews.llvm.org/D61396

Files:
  clang/lib/Parse/ParseTentative.cpp
  clang/test/Parser/cuda-kernel-call-c++11.cu


Index: clang/test/Parser/cuda-kernel-call-c++11.cu
===
--- clang/test/Parser/cuda-kernel-call-c++11.cu
+++ clang/test/Parser/cuda-kernel-call-c++11.cu
@@ -3,6 +3,8 @@
 template struct S {};
 template void f();
 
+template struct S {};
+
 
 void foo(void) {
   // In C++11 mode, all of these are expected to parse correctly, and the CUDA
@@ -21,4 +23,6 @@
 
   (void)(&f>>==0);
   (void)(&f>>==0);
+
+  S>> s6;
 }
Index: clang/lib/Parse/ParseTentative.cpp
===
--- clang/lib/Parse/ParseTentative.cpp
+++ clang/lib/Parse/ParseTentative.cpp
@@ -590,9 +590,11 @@
 } else if (Context == TypeIdAsTemplateArgument &&
(Tok.isOneOf(tok::greater, tok::comma) ||
 (getLangOpts().CPlusPlus11 &&
- (Tok.is(tok::greatergreater) ||
+ (Tok.isOneOf(tok::greatergreater,
+  tok::greatergreatergreater) ||
   (Tok.is(tok::ellipsis) &&
NextToken().isOneOf(tok::greater, tok::greatergreater,
+   tok::greatergreatergreater,
tok::comma)) {
   TPR = TPResult::True;
   isAmbiguous = true;


Index: clang/test/Parser/cuda-kernel-call-c++11.cu
===
--- clang/test/Parser/cuda-kernel-call-c++11.cu
+++ clang/test/Parser/cuda-kernel-call-c++11.cu
@@ -3,6 +3,8 @@
 template struct S {};
 template void f();
 
+template struct S {};
+
 
 void foo(void) {
   // In C++11 mode, all of these are expected to parse correctly, and the CUDA
@@ -21,4 +23,6 @@
 
   (void)(&f>>==0);
   (void)(&f>>==0);
+
+  S>> s6;
 }
Index: clang/lib/Parse/ParseTentative.cpp
===
--- clang/lib/Parse/ParseTentative.cpp
+++ clang/lib/Parse/ParseTentative.cpp
@@ -590,9 +590,11 @@
 } else if (Context == TypeIdAsTemplateArgument &&
(Tok.isOneOf(tok::greater, tok::comma) ||
 (getLangOpts().CPlusPlus11 &&
- (Tok.is(tok::greatergreater) ||
+ (Tok.isOneOf(tok::greatergreater,
+  tok::greatergreatergreater) ||
   (Tok.is(tok::ellipsis) &&
NextToken().isOneOf(tok::greater, tok::greatergreater,
+   tok::greatergreatergreater,
tok::comma)) {
   TPR = TPResult::True;
   isAmbiguous = true;
___
cfe-commits mailing list
cfe-commits@lists.llvm.org
https://lists.llvm.org/cgi-bin/mailman/listinfo/cfe-commits

[PATCH] D61396: [hip] Fix ambiguity from `>>>` of CUDA.

2019-05-07 Thread Michael Liao via Phabricator via cfe-commits

hliao marked an inline comment as done.
hliao added a comment.

done


Repository:
  rG LLVM Github Monorepo

CHANGES SINCE LAST ACTION
  https://reviews.llvm.org/D61396/new/

https://reviews.llvm.org/D61396



___
cfe-commits mailing list
cfe-commits@lists.llvm.org
https://lists.llvm.org/cgi-bin/mailman/listinfo/cfe-commits

[PATCH] D61396: [hip] Fix ambiguity from `>>>` of CUDA.

2019-05-07 Thread Michael Liao via Phabricator via cfe-commits

hliao updated this revision to Diff 198545.
hliao added a comment.

Add one unrealistic case for test purpose only.


Repository:
  rG LLVM Github Monorepo

CHANGES SINCE LAST ACTION
  https://reviews.llvm.org/D61396/new/

https://reviews.llvm.org/D61396

Files:
  clang/lib/Parse/ParseTentative.cpp
  clang/test/Parser/cuda-kernel-call-c++11.cu


Index: clang/test/Parser/cuda-kernel-call-c++11.cu
===
--- clang/test/Parser/cuda-kernel-call-c++11.cu
+++ clang/test/Parser/cuda-kernel-call-c++11.cu
@@ -3,6 +3,10 @@
 template struct S {};
 template void f();
 
+template struct S {};
+
+template struct V {};
+template struct V {};
 
 void foo(void) {
   // In C++11 mode, all of these are expected to parse correctly, and the CUDA
@@ -21,4 +25,11 @@
 
   (void)(&f>>==0);
   (void)(&f>>==0);
+
+  S>> s6;
+}
+
+template
+void bar(T... args) {
+  S>> s7;
 }
Index: clang/lib/Parse/ParseTentative.cpp
===
--- clang/lib/Parse/ParseTentative.cpp
+++ clang/lib/Parse/ParseTentative.cpp
@@ -590,9 +590,11 @@
 } else if (Context == TypeIdAsTemplateArgument &&
(Tok.isOneOf(tok::greater, tok::comma) ||
 (getLangOpts().CPlusPlus11 &&
- (Tok.is(tok::greatergreater) ||
+ (Tok.isOneOf(tok::greatergreater,
+  tok::greatergreatergreater) ||
   (Tok.is(tok::ellipsis) &&
NextToken().isOneOf(tok::greater, tok::greatergreater,
+   tok::greatergreatergreater,
tok::comma)) {
   TPR = TPResult::True;
   isAmbiguous = true;


Index: clang/test/Parser/cuda-kernel-call-c++11.cu
===
--- clang/test/Parser/cuda-kernel-call-c++11.cu
+++ clang/test/Parser/cuda-kernel-call-c++11.cu
@@ -3,6 +3,10 @@
 template struct S {};
 template void f();
 
+template struct S {};
+
+template struct V {};
+template struct V {};
 
 void foo(void) {
   // In C++11 mode, all of these are expected to parse correctly, and the CUDA
@@ -21,4 +25,11 @@
 
   (void)(&f>>==0);
   (void)(&f>>==0);
+
+  S>> s6;
+}
+
+template
+void bar(T... args) {
+  S>> s7;
 }
Index: clang/lib/Parse/ParseTentative.cpp
===
--- clang/lib/Parse/ParseTentative.cpp
+++ clang/lib/Parse/ParseTentative.cpp
@@ -590,9 +590,11 @@
 } else if (Context == TypeIdAsTemplateArgument &&
(Tok.isOneOf(tok::greater, tok::comma) ||
 (getLangOpts().CPlusPlus11 &&
- (Tok.is(tok::greatergreater) ||
+ (Tok.isOneOf(tok::greatergreater,
+  tok::greatergreatergreater) ||
   (Tok.is(tok::ellipsis) &&
NextToken().isOneOf(tok::greater, tok::greatergreater,
+   tok::greatergreatergreater,
tok::comma)) {
   TPR = TPResult::True;
   isAmbiguous = true;
___
cfe-commits mailing list
cfe-commits@lists.llvm.org
https://lists.llvm.org/cgi-bin/mailman/listinfo/cfe-commits

[PATCH] D61396: [hip] Fix ambiguity from `>>>` of CUDA.

2019-05-07 Thread Michael Liao via Phabricator via cfe-commits

hliao marked an inline comment as done.
hliao added a comment.

done


Repository:
  rG LLVM Github Monorepo

CHANGES SINCE LAST ACTION
  https://reviews.llvm.org/D61396/new/

https://reviews.llvm.org/D61396



___
cfe-commits mailing list
cfe-commits@lists.llvm.org
https://lists.llvm.org/cgi-bin/mailman/listinfo/cfe-commits

[PATCH] D61396: [hip] Fix ambiguity from `>>>` of CUDA.

2019-05-07 Thread Michael Liao via Phabricator via cfe-commits

This revision was automatically updated to reflect the committed changes.
Closed by commit rL360214: [hip] Fix ambiguity from `>>>` of CUDA. 
(authored by hliao, committed by ).
Herald added a project: LLVM.
Herald added a subscriber: llvm-commits.

Changed prior to commit:
  https://reviews.llvm.org/D61396?vs=198545&id=198569#toc

Repository:
  rL LLVM

CHANGES SINCE LAST ACTION
  https://reviews.llvm.org/D61396/new/

https://reviews.llvm.org/D61396

Files:
  cfe/trunk/lib/Parse/ParseTentative.cpp
  cfe/trunk/test/Parser/cuda-kernel-call-c++11.cu


Index: cfe/trunk/lib/Parse/ParseTentative.cpp
===
--- cfe/trunk/lib/Parse/ParseTentative.cpp
+++ cfe/trunk/lib/Parse/ParseTentative.cpp
@@ -590,9 +590,11 @@
 } else if (Context == TypeIdAsTemplateArgument &&
(Tok.isOneOf(tok::greater, tok::comma) ||
 (getLangOpts().CPlusPlus11 &&
- (Tok.is(tok::greatergreater) ||
+ (Tok.isOneOf(tok::greatergreater,
+  tok::greatergreatergreater) ||
   (Tok.is(tok::ellipsis) &&
NextToken().isOneOf(tok::greater, tok::greatergreater,
+   tok::greatergreatergreater,
tok::comma)) {
   TPR = TPResult::True;
   isAmbiguous = true;
Index: cfe/trunk/test/Parser/cuda-kernel-call-c++11.cu
===
--- cfe/trunk/test/Parser/cuda-kernel-call-c++11.cu
+++ cfe/trunk/test/Parser/cuda-kernel-call-c++11.cu
@@ -3,6 +3,10 @@
 template struct S {};
 template void f();
 
+template struct S {};
+
+template struct V {};
+template struct V {};
 
 void foo(void) {
   // In C++11 mode, all of these are expected to parse correctly, and the CUDA
@@ -21,4 +25,11 @@
 
   (void)(&f>>==0);
   (void)(&f>>==0);
+
+  S>> s6;
+}
+
+template
+void bar(T... args) {
+  S>> s7;
 }


Index: cfe/trunk/lib/Parse/ParseTentative.cpp
===
--- cfe/trunk/lib/Parse/ParseTentative.cpp
+++ cfe/trunk/lib/Parse/ParseTentative.cpp
@@ -590,9 +590,11 @@
 } else if (Context == TypeIdAsTemplateArgument &&
(Tok.isOneOf(tok::greater, tok::comma) ||
 (getLangOpts().CPlusPlus11 &&
- (Tok.is(tok::greatergreater) ||
+ (Tok.isOneOf(tok::greatergreater,
+  tok::greatergreatergreater) ||
   (Tok.is(tok::ellipsis) &&
NextToken().isOneOf(tok::greater, tok::greatergreater,
+   tok::greatergreatergreater,
tok::comma)) {
   TPR = TPResult::True;
   isAmbiguous = true;
Index: cfe/trunk/test/Parser/cuda-kernel-call-c++11.cu
===
--- cfe/trunk/test/Parser/cuda-kernel-call-c++11.cu
+++ cfe/trunk/test/Parser/cuda-kernel-call-c++11.cu
@@ -3,6 +3,10 @@
 template struct S {};
 template void f();
 
+template struct S {};
+
+template struct V {};
+template struct V {};
 
 void foo(void) {
   // In C++11 mode, all of these are expected to parse correctly, and the CUDA
@@ -21,4 +25,11 @@
 
   (void)(&f>>==0);
   (void)(&f>>==0);
+
+  S>> s6;
+}
+
+template
+void bar(T... args) {
+  S>> s7;
 }
___
cfe-commits mailing list
cfe-commits@lists.llvm.org
https://lists.llvm.org/cgi-bin/mailman/listinfo/cfe-commits

[PATCH] D58509: [CodeGen] Fix string literal address space casting.

2019-02-21 Thread Michael Liao via Phabricator via cfe-commits

hliao created this revision.
hliao added a reviewer: yaxunl.
Herald added subscribers: cfe-commits, jvesely.
Herald added a project: clang.

- If a string literal is reused directly, need to add necessary address space 
casting if the target requires that.


Repository:
  rG LLVM Github Monorepo

https://reviews.llvm.org/D58509

Files:
  clang/lib/CodeGen/CodeGenModule.cpp
  clang/test/CodeGenCXX/amdgcn-string-literal.cpp


Index: clang/test/CodeGenCXX/amdgcn-string-literal.cpp
===
--- clang/test/CodeGenCXX/amdgcn-string-literal.cpp
+++ clang/test/CodeGenCXX/amdgcn-string-literal.cpp
@@ -14,7 +14,7 @@
 // CHECK-LABEL: define void @_Z1fv()
 void f() {
   const char* l_str = "l_str";
-  
+
   // CHECK: call void @llvm.memcpy.p0i8.p4i8.i64
   char l_array[] = "l_array";
 
@@ -26,3 +26,9 @@
   const char* p = g_str;
   g(p);
 }
+
+// CHECK-LABEL: define void @_Z1ev
+void e() {
+  g("string literal");
+  g("string literal");
+}
Index: clang/lib/CodeGen/CodeGenModule.cpp
===
--- clang/lib/CodeGen/CodeGenModule.cpp
+++ clang/lib/CodeGen/CodeGenModule.cpp
@@ -4522,7 +4522,8 @@
 if (auto GV = *Entry) {
   if (Alignment.getQuantity() > GV->getAlignment())
 GV->setAlignment(Alignment.getQuantity());
-  return ConstantAddress(GV, Alignment);
+  return ConstantAddress(castStringLiteralToDefaultAddressSpace(*this, GV),
+ Alignment);
 }
   }
 
@@ -4584,7 +4585,8 @@
 if (auto GV = *Entry) {
   if (Alignment.getQuantity() > GV->getAlignment())
 GV->setAlignment(Alignment.getQuantity());
-  return ConstantAddress(GV, Alignment);
+  return ConstantAddress(castStringLiteralToDefaultAddressSpace(*this, GV),
+ Alignment);
 }
   }
 


Index: clang/test/CodeGenCXX/amdgcn-string-literal.cpp
===
--- clang/test/CodeGenCXX/amdgcn-string-literal.cpp
+++ clang/test/CodeGenCXX/amdgcn-string-literal.cpp
@@ -14,7 +14,7 @@
 // CHECK-LABEL: define void @_Z1fv()
 void f() {
   const char* l_str = "l_str";
-  
+
   // CHECK: call void @llvm.memcpy.p0i8.p4i8.i64
   char l_array[] = "l_array";
 
@@ -26,3 +26,9 @@
   const char* p = g_str;
   g(p);
 }
+
+// CHECK-LABEL: define void @_Z1ev
+void e() {
+  g("string literal");
+  g("string literal");
+}
Index: clang/lib/CodeGen/CodeGenModule.cpp
===
--- clang/lib/CodeGen/CodeGenModule.cpp
+++ clang/lib/CodeGen/CodeGenModule.cpp
@@ -4522,7 +4522,8 @@
 if (auto GV = *Entry) {
   if (Alignment.getQuantity() > GV->getAlignment())
 GV->setAlignment(Alignment.getQuantity());
-  return ConstantAddress(GV, Alignment);
+  return ConstantAddress(castStringLiteralToDefaultAddressSpace(*this, GV),
+ Alignment);
 }
   }
 
@@ -4584,7 +4585,8 @@
 if (auto GV = *Entry) {
   if (Alignment.getQuantity() > GV->getAlignment())
 GV->setAlignment(Alignment.getQuantity());
-  return ConstantAddress(GV, Alignment);
+  return ConstantAddress(castStringLiteralToDefaultAddressSpace(*this, GV),
+ Alignment);
 }
   }
 
___
cfe-commits mailing list
cfe-commits@lists.llvm.org
https://lists.llvm.org/cgi-bin/mailman/listinfo/cfe-commits

[PATCH] D58509: [CodeGen] Fix string literal address space casting.

2019-02-21 Thread Michael Liao via Phabricator via cfe-commits

This revision was automatically updated to reflect the committed changes.
Closed by commit rC354610: [CodeGen] Fix string literal address space casting. 
(authored by hliao, committed by ).
Herald added a subscriber: ebevhan.

Changed prior to commit:
  https://reviews.llvm.org/D58509?vs=187797&id=187834#toc

Repository:
  rC Clang

CHANGES SINCE LAST ACTION
  https://reviews.llvm.org/D58509/new/

https://reviews.llvm.org/D58509

Files:
  lib/CodeGen/CodeGenModule.cpp
  test/CodeGenCXX/amdgcn-string-literal.cpp


Index: test/CodeGenCXX/amdgcn-string-literal.cpp
===
--- test/CodeGenCXX/amdgcn-string-literal.cpp
+++ test/CodeGenCXX/amdgcn-string-literal.cpp
@@ -14,7 +14,7 @@
 // CHECK-LABEL: define void @_Z1fv()
 void f() {
   const char* l_str = "l_str";
-  
+
   // CHECK: call void @llvm.memcpy.p0i8.p4i8.i64
   char l_array[] = "l_array";
 
@@ -26,3 +26,9 @@
   const char* p = g_str;
   g(p);
 }
+
+// CHECK-LABEL: define void @_Z1ev
+void e() {
+  g("string literal");
+  g("string literal");
+}
Index: lib/CodeGen/CodeGenModule.cpp
===
--- lib/CodeGen/CodeGenModule.cpp
+++ lib/CodeGen/CodeGenModule.cpp
@@ -4522,7 +4522,8 @@
 if (auto GV = *Entry) {
   if (Alignment.getQuantity() > GV->getAlignment())
 GV->setAlignment(Alignment.getQuantity());
-  return ConstantAddress(GV, Alignment);
+  return ConstantAddress(castStringLiteralToDefaultAddressSpace(*this, GV),
+ Alignment);
 }
   }
 
@@ -4584,7 +4585,8 @@
 if (auto GV = *Entry) {
   if (Alignment.getQuantity() > GV->getAlignment())
 GV->setAlignment(Alignment.getQuantity());
-  return ConstantAddress(GV, Alignment);
+  return ConstantAddress(castStringLiteralToDefaultAddressSpace(*this, GV),
+ Alignment);
 }
   }
 


Index: test/CodeGenCXX/amdgcn-string-literal.cpp
===
--- test/CodeGenCXX/amdgcn-string-literal.cpp
+++ test/CodeGenCXX/amdgcn-string-literal.cpp
@@ -14,7 +14,7 @@
 // CHECK-LABEL: define void @_Z1fv()
 void f() {
   const char* l_str = "l_str";
-  
+
   // CHECK: call void @llvm.memcpy.p0i8.p4i8.i64
   char l_array[] = "l_array";
 
@@ -26,3 +26,9 @@
   const char* p = g_str;
   g(p);
 }
+
+// CHECK-LABEL: define void @_Z1ev
+void e() {
+  g("string literal");
+  g("string literal");
+}
Index: lib/CodeGen/CodeGenModule.cpp
===
--- lib/CodeGen/CodeGenModule.cpp
+++ lib/CodeGen/CodeGenModule.cpp
@@ -4522,7 +4522,8 @@
 if (auto GV = *Entry) {
   if (Alignment.getQuantity() > GV->getAlignment())
 GV->setAlignment(Alignment.getQuantity());
-  return ConstantAddress(GV, Alignment);
+  return ConstantAddress(castStringLiteralToDefaultAddressSpace(*this, GV),
+ Alignment);
 }
   }
 
@@ -4584,7 +4585,8 @@
 if (auto GV = *Entry) {
   if (Alignment.getQuantity() > GV->getAlignment())
 GV->setAlignment(Alignment.getQuantity());
-  return ConstantAddress(GV, Alignment);
+  return ConstantAddress(castStringLiteralToDefaultAddressSpace(*this, GV),
+ Alignment);
 }
   }
 
___
cfe-commits mailing list
cfe-commits@lists.llvm.org
https://lists.llvm.org/cgi-bin/mailman/listinfo/cfe-commits

[PATCH] D58623: [AMDGPU] Allow using integral non-type template parameters

2019-02-25 Thread Michael Liao via Phabricator via cfe-commits

hliao created this revision.
hliao added reviewers: kzhuravl, yaxunl.
Herald added subscribers: cfe-commits, jdoerfert, t-tye, tpr, dstuttard, 
nhaehnle, wdng, jvesely.
Herald added a project: clang.

- Allow using integral non-type template parameters in the following attributes

  __attribute__((amdgpu_flat_work_group_size(, ))) 
__attribute__((amdgpu_waves_per_eu([, ])))


Repository:
  rG LLVM Github Monorepo

https://reviews.llvm.org/D58623

Files:
  clang/include/clang/Basic/Attr.td
  clang/include/clang/Sema/Sema.h
  clang/lib/CodeGen/TargetInfo.cpp
  clang/lib/Sema/SemaDeclAttr.cpp
  clang/lib/Sema/SemaTemplateInstantiateDecl.cpp
  clang/test/SemaCUDA/amdgpu-attrs.cu
  clang/test/SemaOpenCL/amdgpu-attrs.cl

Index: clang/test/SemaOpenCL/amdgpu-attrs.cl
===
--- clang/test/SemaOpenCL/amdgpu-attrs.cl
+++ clang/test/SemaOpenCL/amdgpu-attrs.cl
@@ -27,12 +27,12 @@
 __attribute__((amdgpu_num_sgpr(32))) void func_num_sgpr_32() {} // expected-error {{'amdgpu_num_sgpr' attribute only applies to kernel functions}}
 __attribute__((amdgpu_num_vgpr(64))) void func_num_vgpr_64() {} // expected-error {{'amdgpu_num_vgpr' attribute only applies to kernel functions}}
 
-__attribute__((amdgpu_flat_work_group_size("ABC", "ABC"))) kernel void kernel_flat_work_group_size_ABC_ABC() {} // expected-error {{'amdgpu_flat_work_group_size' attribute requires an integer constant}}
-__attribute__((amdgpu_flat_work_group_size(32, "ABC"))) kernel void kernel_flat_work_group_size_32_ABC() {} // expected-error {{'amdgpu_flat_work_group_size' attribute requires an integer constant}}
-__attribute__((amdgpu_flat_work_group_size("ABC", 64))) kernel void kernel_flat_work_group_size_ABC_64() {} // expected-error {{'amdgpu_flat_work_group_size' attribute requires an integer constant}}
-__attribute__((amdgpu_waves_per_eu("ABC"))) kernel void kernel_waves_per_eu_ABC() {} // expected-error {{'amdgpu_waves_per_eu' attribute requires an integer constant}}
-__attribute__((amdgpu_waves_per_eu(2, "ABC"))) kernel void kernel_waves_per_eu_2_ABC() {} // expected-error {{'amdgpu_waves_per_eu' attribute requires an integer constant}}
-__attribute__((amdgpu_waves_per_eu("ABC", 4))) kernel void kernel_waves_per_eu_ABC_4() {} // expected-error {{'amdgpu_waves_per_eu' attribute requires an integer constant}}
+__attribute__((amdgpu_flat_work_group_size("ABC", "ABC"))) kernel void kernel_flat_work_group_size_ABC_ABC() {} // expected-error {{'amdgpu_flat_work_group_size' attribute requires parameter 0 to be an integer constant}}
+__attribute__((amdgpu_flat_work_group_size(32, "ABC"))) kernel void kernel_flat_work_group_size_32_ABC() {} // expected-error {{'amdgpu_flat_work_group_size' attribute requires parameter 1 to be an integer constant}}
+__attribute__((amdgpu_flat_work_group_size("ABC", 64))) kernel void kernel_flat_work_group_size_ABC_64() {} // expected-error {{'amdgpu_flat_work_group_size' attribute requires parameter 0 to be an integer constant}}
+__attribute__((amdgpu_waves_per_eu("ABC"))) kernel void kernel_waves_per_eu_ABC() {} // expected-error {{'amdgpu_waves_per_eu' attribute requires parameter 0 to be an integer constant}}
+__attribute__((amdgpu_waves_per_eu(2, "ABC"))) kernel void kernel_waves_per_eu_2_ABC() {} // expected-error {{'amdgpu_waves_per_eu' attribute requires parameter 1 to be an integer constant}}
+__attribute__((amdgpu_waves_per_eu("ABC", 4))) kernel void kernel_waves_per_eu_ABC_4() {} // expected-error {{'amdgpu_waves_per_eu' attribute requires parameter 0 to be an integer constant}}
 __attribute__((amdgpu_num_sgpr("ABC"))) kernel void kernel_num_sgpr_ABC() {} // expected-error {{'amdgpu_num_sgpr' attribute requires an integer constant}}
 __attribute__((amdgpu_num_vgpr("ABC"))) kernel void kernel_num_vgpr_ABC() {} // expected-error {{'amdgpu_num_vgpr' attribute requires an integer constant}}
 
Index: clang/test/SemaCUDA/amdgpu-attrs.cu
===
--- clang/test/SemaCUDA/amdgpu-attrs.cu
+++ clang/test/SemaCUDA/amdgpu-attrs.cu
@@ -1,4 +1,4 @@
-// RUN: %clang_cc1 -fsyntax-only -verify %s
+// RUN: %clang_cc1 -std=c++11 -fsyntax-only -verify %s
 #include "Inputs/cuda.h"
 
 
@@ -78,3 +78,119 @@
 // expected-error@+2{{attribute 'intel_reqd_sub_group_size' can only be applied to an OpenCL kernel function}}
 __attribute__((intel_reqd_sub_group_size(64)))
 __global__ void intel_reqd_sub_group_size_64() {}
+
+// expected-error@+1{{'amdgpu_flat_work_group_size' attribute requires parameter 0 to be an integer constant}}
+__attribute__((amdgpu_flat_work_group_size("32", 64)))
+__global__ void non_int_min_flat_work_group_size_32_64() {}
+// expected-error@+1{{'amdgpu_flat_work_group_size' attribute requires parameter 1 to be an integer constant}}
+__attribute__((amdgpu_flat_work_group_size(32, "64")))
+__global__ void non_int_max_flat_work_group_size_32_64() {}
+
+int nc_min = 32, nc_max = 64;
+// expected-error@+1{{'amdgpu_

[PATCH] D58623: [AMDGPU] Allow using integral non-type template parameters

2019-02-25 Thread Michael Liao via Phabricator via cfe-commits

hliao added a comment.

non-type template parameter is used in these attributes in one of major 
workload. In addition, it also revises the constexpr support by allowing 
lvalue. The diagnostic message is refined too by pointing out which parameter 
violates the requirement of constant integer. Previous tests are revised and 
more tests are added to cover the cases fixed in this change.


Repository:
  rG LLVM Github Monorepo

CHANGES SINCE LAST ACTION
  https://reviews.llvm.org/D58623/new/

https://reviews.llvm.org/D58623



___
cfe-commits mailing list
cfe-commits@lists.llvm.org
https://lists.llvm.org/cgi-bin/mailman/listinfo/cfe-commits

[PATCH] D58627: [git] Add top-level .gitignore

2019-02-25 Thread Michael Liao via Phabricator via cfe-commits

hliao created this revision.
hliao added reviewers: kzhuravl, yaxunl.
hliao added a project: clang.
Herald added subscribers: llvm-commits, hiraditya.
Herald added a project: LLVM.

[D56411 ] Temp solution fixing CUDA template 
issue

- template with overloadable kernel function as the template function need 
revising CheckCUDACall checking.

[SelectionDAG] Harden the checking of RegClass when adding operand

- If the operand index is out-of-range, expect nullptr is returned.

[AMDGPU] Allow using integral non-type template parameters

- Allow using integral non-type template parameters in the following attributes

  __attribute__((amdgpu_flat_work_group_size(, ))) 
__attribute__((amdgpu_waves_per_eu([, ])))


Repository:
  rG LLVM Github Monorepo

https://reviews.llvm.org/D58627

Files:
  .gitignore
  clang/include/clang/Basic/Attr.td
  clang/include/clang/Sema/Sema.h
  clang/lib/CodeGen/TargetInfo.cpp
  clang/lib/Sema/SemaCUDA.cpp
  clang/lib/Sema/SemaDeclAttr.cpp
  clang/lib/Sema/SemaTemplate.cpp
  clang/lib/Sema/SemaTemplateInstantiateDecl.cpp
  clang/test/SemaCUDA/amdgpu-attrs.cu
  clang/test/SemaCUDA/kernel-template-with-func-arg.cu
  clang/test/SemaOpenCL/amdgpu-attrs.cl
  llvm/lib/CodeGen/SelectionDAG/InstrEmitter.cpp

Index: llvm/lib/CodeGen/SelectionDAG/InstrEmitter.cpp
===
--- llvm/lib/CodeGen/SelectionDAG/InstrEmitter.cpp
+++ llvm/lib/CodeGen/SelectionDAG/InstrEmitter.cpp
@@ -398,8 +398,9 @@
 const TargetRegisterClass *OpRC =
 TLI->isTypeLegal(OpVT) ? TLI->getRegClassFor(OpVT) : nullptr;
 const TargetRegisterClass *IIRC =
-II ? TRI->getAllocatableClass(TII->getRegClass(*II, IIOpNum, TRI, *MF))
-   : nullptr;
+II ? TII->getRegClass(*II, IIOpNum, TRI, *MF) : nullptr;
+assert(!II || IIOpNum < II->getNumOperands() || !IIRC);
+IIRC = TRI->getAllocatableClass(IIRC);
 
 if (OpRC && IIRC && OpRC != IIRC &&
 TargetRegisterInfo::isVirtualRegister(VReg)) {
Index: clang/test/SemaOpenCL/amdgpu-attrs.cl
===
--- clang/test/SemaOpenCL/amdgpu-attrs.cl
+++ clang/test/SemaOpenCL/amdgpu-attrs.cl
@@ -27,12 +27,12 @@
 __attribute__((amdgpu_num_sgpr(32))) void func_num_sgpr_32() {} // expected-error {{'amdgpu_num_sgpr' attribute only applies to kernel functions}}
 __attribute__((amdgpu_num_vgpr(64))) void func_num_vgpr_64() {} // expected-error {{'amdgpu_num_vgpr' attribute only applies to kernel functions}}
 
-__attribute__((amdgpu_flat_work_group_size("ABC", "ABC"))) kernel void kernel_flat_work_group_size_ABC_ABC() {} // expected-error {{'amdgpu_flat_work_group_size' attribute requires an integer constant}}
-__attribute__((amdgpu_flat_work_group_size(32, "ABC"))) kernel void kernel_flat_work_group_size_32_ABC() {} // expected-error {{'amdgpu_flat_work_group_size' attribute requires an integer constant}}
-__attribute__((amdgpu_flat_work_group_size("ABC", 64))) kernel void kernel_flat_work_group_size_ABC_64() {} // expected-error {{'amdgpu_flat_work_group_size' attribute requires an integer constant}}
-__attribute__((amdgpu_waves_per_eu("ABC"))) kernel void kernel_waves_per_eu_ABC() {} // expected-error {{'amdgpu_waves_per_eu' attribute requires an integer constant}}
-__attribute__((amdgpu_waves_per_eu(2, "ABC"))) kernel void kernel_waves_per_eu_2_ABC() {} // expected-error {{'amdgpu_waves_per_eu' attribute requires an integer constant}}
-__attribute__((amdgpu_waves_per_eu("ABC", 4))) kernel void kernel_waves_per_eu_ABC_4() {} // expected-error {{'amdgpu_waves_per_eu' attribute requires an integer constant}}
+__attribute__((amdgpu_flat_work_group_size("ABC", "ABC"))) kernel void kernel_flat_work_group_size_ABC_ABC() {} // expected-error {{'amdgpu_flat_work_group_size' attribute requires parameter 0 to be an integer constant}}
+__attribute__((amdgpu_flat_work_group_size(32, "ABC"))) kernel void kernel_flat_work_group_size_32_ABC() {} // expected-error {{'amdgpu_flat_work_group_size' attribute requires parameter 1 to be an integer constant}}
+__attribute__((amdgpu_flat_work_group_size("ABC", 64))) kernel void kernel_flat_work_group_size_ABC_64() {} // expected-error {{'amdgpu_flat_work_group_size' attribute requires parameter 0 to be an integer constant}}
+__attribute__((amdgpu_waves_per_eu("ABC"))) kernel void kernel_waves_per_eu_ABC() {} // expected-error {{'amdgpu_waves_per_eu' attribute requires parameter 0 to be an integer constant}}
+__attribute__((amdgpu_waves_per_eu(2, "ABC"))) kernel void kernel_waves_per_eu_2_ABC() {} // expected-error {{'amdgpu_waves_per_eu' attribute requires parameter 1 to be an integer constant}}
+__attribute__((amdgpu_waves_per_eu("ABC", 4))) kernel void kernel_waves_per_eu_ABC_4() {} // expected-error {{'amdgpu_waves_per_eu' attribute requires parameter 0 to be an integer constant}}
 __attribute__((amdgpu_num_sgpr("ABC"))) kernel void kernel_num_sgpr_ABC() {} // expect

[PATCH] D58627: [git] Add top-level .gitignore

2019-02-25 Thread Michael Liao via Phabricator via cfe-commits

hliao abandoned this revision.
hliao added a comment.

by mistake, Ctrl-C is not fast enough to stop it.


Repository:
  rG LLVM Github Monorepo

CHANGES SINCE LAST ACTION
  https://reviews.llvm.org/D58627/new/

https://reviews.llvm.org/D58627



___
cfe-commits mailing list
cfe-commits@lists.llvm.org
https://lists.llvm.org/cgi-bin/mailman/listinfo/cfe-commits

[PATCH] D62971: [HIP] Remove the assertion on match between host/device names.

2019-06-06 Thread Michael Liao via Phabricator via cfe-commits

hliao created this revision.
hliao added a reviewer: yaxunl.
Herald added a project: clang.
Herald added a subscriber: cfe-commits.

- Under different ABIs, it's obvious that assertion is too strong. Even under 
the same ABI, once there are unnamed type not required to follow ODR rule, 
host- and device-side mangling may still get different names. As both the host- 
and device-side compilation always observe the same AST tree, even with 
different names, we still could associate the correct pairs, i.e., we don't use 
(mangled) names to linkage host- and device-side globals. There's no need to 
have this assertion.


Repository:
  rG LLVM Github Monorepo

https://reviews.llvm.org/D62971

Files:
  clang/lib/CodeGen/CGCUDANV.cpp
  clang/test/CodeGenCUDA/unnamed-types.cu


Index: clang/test/CodeGenCUDA/unnamed-types.cu
===
--- /dev/null
+++ clang/test/CodeGenCUDA/unnamed-types.cu
@@ -0,0 +1,24 @@
+// RUN: %clang_cc1 -std=c++11 -x hip -triple x86_64-linux-gnu -emit-llvm %s -o 
- | FileCheck %s
+
+#include "Inputs/cuda.h"
+
+// CHECK: @0 = private unnamed_addr constant [40 x i8] 
c"_Z2k0IZZ2f1PfENK3$_0clES0_EUlfE_EvS0_T_\00"
+
+template 
+__global__ void k0(float *p, F f) {
+  p[0] = f(p[0]);
+}
+
+void f0(float *p) {
+  [](float *p) {
+*p = 1.f;
+  }(p);
+}
+
+void f1(float *p) {
+  [](float *p) {
+k0<<<1,1>>>(p, [] __device__ (float x) { return x + 1.f; });
+  }(p);
+}
+// CHECK: @__hip_register_globals
+// CHECK: 
__hipRegisterFunction{{.*}}_Z2k0IZZ2f1PfENK3$_1clES0_EUlfE_EvS0_T_{{.*}}@0
Index: clang/lib/CodeGen/CGCUDANV.cpp
===
--- clang/lib/CodeGen/CGCUDANV.cpp
+++ clang/lib/CodeGen/CGCUDANV.cpp
@@ -217,11 +217,6 @@
 
 void CGNVCUDARuntime::emitDeviceStub(CodeGenFunction &CGF,
  FunctionArgList &Args) {
-  assert(getDeviceSideName(CGF.CurFuncDecl) == CGF.CurFn->getName() ||
- getDeviceSideName(CGF.CurFuncDecl) + ".stub" == CGF.CurFn->getName() 
||
- CGF.CGM.getContext().getTargetInfo().getCXXABI() !=
- CGF.CGM.getContext().getAuxTargetInfo()->getCXXABI());
-
   EmittedKernels.push_back({CGF.CurFn, CGF.CurFuncDecl});
   if (CudaFeatureEnabled(CGM.getTarget().getSDKVersion(),
  CudaFeature::CUDA_USES_NEW_LAUNCH))


Index: clang/test/CodeGenCUDA/unnamed-types.cu
===
--- /dev/null
+++ clang/test/CodeGenCUDA/unnamed-types.cu
@@ -0,0 +1,24 @@
+// RUN: %clang_cc1 -std=c++11 -x hip -triple x86_64-linux-gnu -emit-llvm %s -o - | FileCheck %s
+
+#include "Inputs/cuda.h"
+
+// CHECK: @0 = private unnamed_addr constant [40 x i8] c"_Z2k0IZZ2f1PfENK3$_0clES0_EUlfE_EvS0_T_\00"
+
+template 
+__global__ void k0(float *p, F f) {
+  p[0] = f(p[0]);
+}
+
+void f0(float *p) {
+  [](float *p) {
+*p = 1.f;
+  }(p);
+}
+
+void f1(float *p) {
+  [](float *p) {
+k0<<<1,1>>>(p, [] __device__ (float x) { return x + 1.f; });
+  }(p);
+}
+// CHECK: @__hip_register_globals
+// CHECK: __hipRegisterFunction{{.*}}_Z2k0IZZ2f1PfENK3$_1clES0_EUlfE_EvS0_T_{{.*}}@0
Index: clang/lib/CodeGen/CGCUDANV.cpp
===
--- clang/lib/CodeGen/CGCUDANV.cpp
+++ clang/lib/CodeGen/CGCUDANV.cpp
@@ -217,11 +217,6 @@
 
 void CGNVCUDARuntime::emitDeviceStub(CodeGenFunction &CGF,
  FunctionArgList &Args) {
-  assert(getDeviceSideName(CGF.CurFuncDecl) == CGF.CurFn->getName() ||
- getDeviceSideName(CGF.CurFuncDecl) + ".stub" == CGF.CurFn->getName() ||
- CGF.CGM.getContext().getTargetInfo().getCXXABI() !=
- CGF.CGM.getContext().getAuxTargetInfo()->getCXXABI());
-
   EmittedKernels.push_back({CGF.CurFn, CGF.CurFuncDecl});
   if (CudaFeatureEnabled(CGM.getTarget().getSDKVersion(),
  CudaFeature::CUDA_USES_NEW_LAUNCH))
___
cfe-commits mailing list
cfe-commits@lists.llvm.org
https://lists.llvm.org/cgi-bin/mailman/listinfo/cfe-commits

[PATCH] D62971: [HIP] Remove the assertion on match between host/device names.

2019-06-06 Thread Michael Liao via Phabricator via cfe-commits

hliao added a comment.

a little explanation of the test case and what's issue is.


Repository:
  rG LLVM Github Monorepo

CHANGES SINCE LAST ACTION
  https://reviews.llvm.org/D62971/new/

https://reviews.llvm.org/D62971



___
cfe-commits mailing list
cfe-commits@lists.llvm.org
https://lists.llvm.org/cgi-bin/mailman/listinfo/cfe-commits

[PATCH] D62971: [HIP] Remove the assertion on match between host/device names.

2019-06-06 Thread Michael Liao via Phabricator via cfe-commits

hliao marked 2 inline comments as done.
hliao added inline comments.



Comment at: clang/test/CodeGenCUDA/unnamed-types.cu:5
+
+// CHECK: @0 = private unnamed_addr constant [40 x i8] 
c"_Z2k0IZZ2f1PfENK3$_0clES0_EUlfE_EvS0_T_\00"
+

device-side mangled name, notice that `$_0` refers to the unnamed closure in f1.



Comment at: clang/test/CodeGenCUDA/unnamed-types.cu:24
+// CHECK: @__hip_register_globals
+// CHECK: 
__hipRegisterFunction{{.*}}_Z2k0IZZ2f1PfENK3$_1clES0_EUlfE_EvS0_T_{{.*}}@0

the registration of host-side stub function to the device-side function name, 
which is defined in `@0`. Notice that the host-side stub function has `$_1`, 
which refers to the closure in f1 as there's another closure (host-only) in f0.


Repository:
  rG LLVM Github Monorepo

CHANGES SINCE LAST ACTION
  https://reviews.llvm.org/D62971/new/

https://reviews.llvm.org/D62971



___
cfe-commits mailing list
cfe-commits@lists.llvm.org
https://lists.llvm.org/cgi-bin/mailman/listinfo/cfe-commits

[PATCH] D62971: [HIP] Remove the assertion on match between host/device names.

2019-06-06 Thread Michael Liao via Phabricator via cfe-commits

hliao added a comment.

I took this back. I fab a case where anonymous type IDs mismatch between the 
device-side name between host-compilation and device-compilation.


Repository:
  rG LLVM Github Monorepo

CHANGES SINCE LAST ACTION
  https://reviews.llvm.org/D62971/new/

https://reviews.llvm.org/D62971



___
cfe-commits mailing list
cfe-commits@lists.llvm.org
https://lists.llvm.org/cgi-bin/mailman/listinfo/cfe-commits

[PATCH] D63020: [HIP] Fix visibility for 'extern' device variables.

2019-06-07 Thread Michael Liao via Phabricator via cfe-commits

hliao created this revision.
hliao added a reviewer: yaxunl.
Herald added subscribers: cfe-commits, nhaehnle, jvesely.
Herald added a project: clang.

- Fix a bug which misses the change for a variable to be set with 
target-specific attributes.


Repository:
  rG LLVM Github Monorepo

https://reviews.llvm.org/D63020

Files:
  clang/lib/CodeGen/CodeGenModule.cpp
  clang/test/CodeGenCUDA/amdgpu-visibility.cu


Index: clang/test/CodeGenCUDA/amdgpu-visibility.cu
===
--- clang/test/CodeGenCUDA/amdgpu-visibility.cu
+++ clang/test/CodeGenCUDA/amdgpu-visibility.cu
@@ -13,6 +13,16 @@
 __constant__ int c;
 __device__ int g;
 
+// CHECK-DEFAULT: @e = external addrspace(1) global
+// CHECK-PROTECTED: @e = external protected addrspace(1) global
+// CHECK-HIDDEN: @e = external protected addrspace(1) global
+extern __device__ int e;
+
+// dummy one to hold reference to `e`.
+__device__ int f() {
+  return e;
+}
+
 // CHECK-DEFAULT: define amdgpu_kernel void @_Z3foov()
 // CHECK-PROTECTED: define protected amdgpu_kernel void @_Z3foov()
 // CHECK-HIDDEN: define protected amdgpu_kernel void @_Z3foov()
Index: clang/lib/CodeGen/CodeGenModule.cpp
===
--- clang/lib/CodeGen/CodeGenModule.cpp
+++ clang/lib/CodeGen/CodeGenModule.cpp
@@ -3464,6 +3464,9 @@
 }
   }
 
+  if (GV->isDeclaration())
+getTargetCodeGenInfo().setTargetAttributes(D, GV, *this);
+
   LangAS ExpectedAS =
   D ? D->getType().getAddressSpace()
 : (LangOpts.OpenCL ? LangAS::opencl_global : LangAS::Default);
@@ -3473,9 +3476,6 @@
 return getTargetCodeGenInfo().performAddrSpaceCast(*this, GV, AddrSpace,
ExpectedAS, Ty);
 
-  if (GV->isDeclaration())
-getTargetCodeGenInfo().setTargetAttributes(D, GV, *this);
-
   return GV;
 }
 


Index: clang/test/CodeGenCUDA/amdgpu-visibility.cu
===
--- clang/test/CodeGenCUDA/amdgpu-visibility.cu
+++ clang/test/CodeGenCUDA/amdgpu-visibility.cu
@@ -13,6 +13,16 @@
 __constant__ int c;
 __device__ int g;
 
+// CHECK-DEFAULT: @e = external addrspace(1) global
+// CHECK-PROTECTED: @e = external protected addrspace(1) global
+// CHECK-HIDDEN: @e = external protected addrspace(1) global
+extern __device__ int e;
+
+// dummy one to hold reference to `e`.
+__device__ int f() {
+  return e;
+}
+
 // CHECK-DEFAULT: define amdgpu_kernel void @_Z3foov()
 // CHECK-PROTECTED: define protected amdgpu_kernel void @_Z3foov()
 // CHECK-HIDDEN: define protected amdgpu_kernel void @_Z3foov()
Index: clang/lib/CodeGen/CodeGenModule.cpp
===
--- clang/lib/CodeGen/CodeGenModule.cpp
+++ clang/lib/CodeGen/CodeGenModule.cpp
@@ -3464,6 +3464,9 @@
 }
   }
 
+  if (GV->isDeclaration())
+getTargetCodeGenInfo().setTargetAttributes(D, GV, *this);
+
   LangAS ExpectedAS =
   D ? D->getType().getAddressSpace()
 : (LangOpts.OpenCL ? LangAS::opencl_global : LangAS::Default);
@@ -3473,9 +3476,6 @@
 return getTargetCodeGenInfo().performAddrSpaceCast(*this, GV, AddrSpace,
ExpectedAS, Ty);
 
-  if (GV->isDeclaration())
-getTargetCodeGenInfo().setTargetAttributes(D, GV, *this);
-
   return GV;
 }
 
___
cfe-commits mailing list
cfe-commits@lists.llvm.org
https://lists.llvm.org/cgi-bin/mailman/listinfo/cfe-commits

[PATCH] D62244: [AMDGPU] Enable the implicit arguments for HIP (CLANG)

2019-06-10 Thread Michael Liao via Phabricator via cfe-commits

hliao added inline comments.



Comment at: test/CodeGenCUDA/amdgpu-hip-implicit-kernarg.cu:7
+
+// CHECK-DAG: attributes #0 = { noinline nounwind optnone 
"amdgpu-implicitarg-num-bytes"="48"

For a single check, you don't need CHECK-DAG.


CHANGES SINCE LAST ACTION
  https://reviews.llvm.org/D62244/new/

https://reviews.llvm.org/D62244



___
cfe-commits mailing list
cfe-commits@lists.llvm.org
https://lists.llvm.org/cgi-bin/mailman/listinfo/cfe-commits

[PATCH] D63143: [HIP] Enforce ODR rule for lambda in HIP/CUDA.

2019-06-11 Thread Michael Liao via Phabricator via cfe-commits

hliao created this revision.
Herald added a project: clang.
Herald added a subscriber: cfe-commits.

Repository:
  rG LLVM Github Monorepo

https://reviews.llvm.org/D63143

Files:
  clang/include/clang/Basic/LangOptions.def
  clang/include/clang/Driver/CC1Options.td
  clang/include/clang/Sema/Sema.h
  clang/lib/Frontend/CompilerInvocation.cpp
  clang/lib/Sema/SemaLambda.cpp
  clang/test/CodeGenCUDA/unnamed-types.cu

Index: clang/test/CodeGenCUDA/unnamed-types.cu
===
--- /dev/null
+++ clang/test/CodeGenCUDA/unnamed-types.cu
@@ -0,0 +1,34 @@
+// RUN: %clang_cc1 -std=c++11 -x hip -triple x86_64-linux-gnu -fcuda-force-lambda-odr -emit-llvm %s -o - | FileCheck %s --check-prefix=HOST
+// RUN: %clang_cc1 -std=c++11 -x hip -triple amdgcn-amd-amdhsa -fcuda-force-lambda-odr -fcuda-is-device -emit-llvm %s -o - | FileCheck %s --check-prefix=DEVICE
+
+#include "Inputs/cuda.h"
+
+// HOST: @0 = private unnamed_addr constant [43 x i8] c"_Z2k0IZZ2f1PfENKUlS0_E_clES0_EUlfE_EvS0_T_\00", align 1
+
+__device__ float d0(float x) {
+  return [](float x) { return x + 2.f; }(x);
+}
+
+__device__ float d1(float x) {
+  return [](float x) { return x * 2.f; }(x);
+}
+
+// DEVICE: amdgpu_kernel void @_Z2k0IZZ2f1PfENKUlS0_E_clES0_EUlfE_EvS0_T_(
+template 
+__global__ void k0(float *p, F f) {
+  p[0] = f(p[0]) + d0(p[1]) + d1(p[2]);
+}
+
+void f0(float *p) {
+  [](float *p) {
+*p = 1.f;
+  }(p);
+}
+
+void f1(float *p) {
+  [](float *p) {
+k0<<<1,1>>>(p, [] __device__ (float x) { return x + 1.f; });
+  }(p);
+}
+// HOST: @__hip_register_globals
+// HOST: __hipRegisterFunction{{.*}}@_Z2k0IZZ2f1PfENKUlS0_E_clES0_EUlfE_EvS0_T_{{.*}}@0
Index: clang/lib/Sema/SemaLambda.cpp
===
--- clang/lib/Sema/SemaLambda.cpp
+++ clang/lib/Sema/SemaLambda.cpp
@@ -272,9 +272,8 @@
   return false;
 }
 
-MangleNumberingContext *
-Sema::getCurrentMangleNumberContext(const DeclContext *DC,
-Decl *&ManglingContextDecl) {
+MangleNumberingContext *Sema::getCurrentMangleNumberContext(
+const DeclContext *DC, Decl *&ManglingContextDecl, bool SkpNoODRChk) {
   // Compute the context for allocating mangling numbers in the current
   // expression, if the ABI requires them.
   ManglingContextDecl = ExprEvalContexts.back().ManglingContextDecl;
@@ -322,7 +321,8 @@
   case Normal: {
 //  -- the bodies of non-exported nonspecialized template functions
 //  -- the bodies of inline functions
-if ((IsInNonspecializedTemplate &&
+if (SkpNoODRChk ||
+(IsInNonspecializedTemplate &&
  !(ManglingContextDecl && isa(ManglingContextDecl))) ||
 isInInlineFunction(CurContext)) {
   ManglingContextDecl = nullptr;
@@ -337,7 +337,7 @@
 
   case StaticDataMember:
 //  -- the initializers of nonspecialized static members of template classes
-if (!IsInNonspecializedTemplate) {
+if (!SkpNoODRChk && !IsInNonspecializedTemplate) {
   ManglingContextDecl = nullptr;
   return nullptr;
 }
@@ -441,9 +441,9 @@
 Class->setLambdaMangling(Mangling->first, Mangling->second);
   } else {
 Decl *ManglingContextDecl;
-if (MangleNumberingContext *MCtx =
-getCurrentMangleNumberContext(Class->getDeclContext(),
-  ManglingContextDecl)) {
+if (MangleNumberingContext *MCtx = getCurrentMangleNumberContext(
+Class->getDeclContext(), ManglingContextDecl,
+getLangOpts().CUDAForceLambdaODR)) {
   unsigned ManglingNumber = MCtx->getManglingNumber(Method);
   Class->setLambdaMangling(ManglingNumber, ManglingContextDecl);
 }
Index: clang/lib/Frontend/CompilerInvocation.cpp
===
--- clang/lib/Frontend/CompilerInvocation.cpp
+++ clang/lib/Frontend/CompilerInvocation.cpp
@@ -2431,6 +2431,9 @@
   if (Args.hasArg(OPT_fno_cuda_host_device_constexpr))
 Opts.CUDAHostDeviceConstexpr = 0;
 
+  if (Args.hasArg(OPT_fcuda_force_lambda_odr))
+Opts.CUDAForceLambdaODR = 1;
+
   if (Opts.CUDAIsDevice && Args.hasArg(OPT_fcuda_approx_transcendentals))
 Opts.CUDADeviceApproxTranscendentals = 1;
 
Index: clang/include/clang/Sema/Sema.h
===
--- clang/include/clang/Sema/Sema.h
+++ clang/include/clang/Sema/Sema.h
@@ -1090,10 +1090,10 @@
   /// block literal.
   /// \param[out] ManglingContextDecl - Returns the ManglingContextDecl
   /// associated with the context, if relevant.
-  MangleNumberingContext *getCurrentMangleNumberContext(
-const DeclContext *DC,
-Decl *&ManglingContextDecl);
-
+  MangleNumberingContext *
+  getCurrentMangleNumberContext(const DeclContext *DC,
+Decl *&ManglingContextDecl,
+bool SkpNoODRChk = false);
 
   /// SpecialMemberOverloadResult - The overloading result for a s

[PATCH] D63143: [HIP] Enforce ODR rule for lambda in HIP/CUDA.

2019-06-11 Thread Michael Liao via Phabricator via cfe-commits

hliao updated this revision to Diff 204152.
hliao added a comment.

Add the comment for the motivation of this patch as well as reviewers.


Repository:
  rG LLVM Github Monorepo

CHANGES SINCE LAST ACTION
  https://reviews.llvm.org/D63143/new/

https://reviews.llvm.org/D63143

Files:
  clang/include/clang/Basic/LangOptions.def
  clang/include/clang/Driver/CC1Options.td
  clang/include/clang/Sema/Sema.h
  clang/lib/Frontend/CompilerInvocation.cpp
  clang/lib/Sema/SemaLambda.cpp
  clang/test/CodeGenCUDA/unnamed-types.cu

Index: clang/test/CodeGenCUDA/unnamed-types.cu
===
--- /dev/null
+++ clang/test/CodeGenCUDA/unnamed-types.cu
@@ -0,0 +1,34 @@
+// RUN: %clang_cc1 -std=c++11 -x hip -triple x86_64-linux-gnu -fcuda-force-lambda-odr -emit-llvm %s -o - | FileCheck %s --check-prefix=HOST
+// RUN: %clang_cc1 -std=c++11 -x hip -triple amdgcn-amd-amdhsa -fcuda-force-lambda-odr -fcuda-is-device -emit-llvm %s -o - | FileCheck %s --check-prefix=DEVICE
+
+#include "Inputs/cuda.h"
+
+// HOST: @0 = private unnamed_addr constant [43 x i8] c"_Z2k0IZZ2f1PfENKUlS0_E_clES0_EUlfE_EvS0_T_\00", align 1
+
+__device__ float d0(float x) {
+  return [](float x) { return x + 2.f; }(x);
+}
+
+__device__ float d1(float x) {
+  return [](float x) { return x * 2.f; }(x);
+}
+
+// DEVICE: amdgpu_kernel void @_Z2k0IZZ2f1PfENKUlS0_E_clES0_EUlfE_EvS0_T_(
+template 
+__global__ void k0(float *p, F f) {
+  p[0] = f(p[0]) + d0(p[1]) + d1(p[2]);
+}
+
+void f0(float *p) {
+  [](float *p) {
+*p = 1.f;
+  }(p);
+}
+
+void f1(float *p) {
+  [](float *p) {
+k0<<<1,1>>>(p, [] __device__ (float x) { return x + 1.f; });
+  }(p);
+}
+// HOST: @__hip_register_globals
+// HOST: __hipRegisterFunction{{.*}}@_Z2k0IZZ2f1PfENKUlS0_E_clES0_EUlfE_EvS0_T_{{.*}}@0
Index: clang/lib/Sema/SemaLambda.cpp
===
--- clang/lib/Sema/SemaLambda.cpp
+++ clang/lib/Sema/SemaLambda.cpp
@@ -272,9 +272,8 @@
   return false;
 }
 
-MangleNumberingContext *
-Sema::getCurrentMangleNumberContext(const DeclContext *DC,
-Decl *&ManglingContextDecl) {
+MangleNumberingContext *Sema::getCurrentMangleNumberContext(
+const DeclContext *DC, Decl *&ManglingContextDecl, bool SkpNoODRChk) {
   // Compute the context for allocating mangling numbers in the current
   // expression, if the ABI requires them.
   ManglingContextDecl = ExprEvalContexts.back().ManglingContextDecl;
@@ -322,7 +321,8 @@
   case Normal: {
 //  -- the bodies of non-exported nonspecialized template functions
 //  -- the bodies of inline functions
-if ((IsInNonspecializedTemplate &&
+if (SkpNoODRChk ||
+(IsInNonspecializedTemplate &&
  !(ManglingContextDecl && isa(ManglingContextDecl))) ||
 isInInlineFunction(CurContext)) {
   ManglingContextDecl = nullptr;
@@ -337,7 +337,7 @@
 
   case StaticDataMember:
 //  -- the initializers of nonspecialized static members of template classes
-if (!IsInNonspecializedTemplate) {
+if (!SkpNoODRChk && !IsInNonspecializedTemplate) {
   ManglingContextDecl = nullptr;
   return nullptr;
 }
@@ -441,9 +441,9 @@
 Class->setLambdaMangling(Mangling->first, Mangling->second);
   } else {
 Decl *ManglingContextDecl;
-if (MangleNumberingContext *MCtx =
-getCurrentMangleNumberContext(Class->getDeclContext(),
-  ManglingContextDecl)) {
+if (MangleNumberingContext *MCtx = getCurrentMangleNumberContext(
+Class->getDeclContext(), ManglingContextDecl,
+getLangOpts().CUDAForceLambdaODR)) {
   unsigned ManglingNumber = MCtx->getManglingNumber(Method);
   Class->setLambdaMangling(ManglingNumber, ManglingContextDecl);
 }
Index: clang/lib/Frontend/CompilerInvocation.cpp
===
--- clang/lib/Frontend/CompilerInvocation.cpp
+++ clang/lib/Frontend/CompilerInvocation.cpp
@@ -2431,6 +2431,9 @@
   if (Args.hasArg(OPT_fno_cuda_host_device_constexpr))
 Opts.CUDAHostDeviceConstexpr = 0;
 
+  if (Args.hasArg(OPT_fcuda_force_lambda_odr))
+Opts.CUDAForceLambdaODR = 1;
+
   if (Opts.CUDAIsDevice && Args.hasArg(OPT_fcuda_approx_transcendentals))
 Opts.CUDADeviceApproxTranscendentals = 1;
 
Index: clang/include/clang/Sema/Sema.h
===
--- clang/include/clang/Sema/Sema.h
+++ clang/include/clang/Sema/Sema.h
@@ -1090,10 +1090,10 @@
   /// block literal.
   /// \param[out] ManglingContextDecl - Returns the ManglingContextDecl
   /// associated with the context, if relevant.
-  MangleNumberingContext *getCurrentMangleNumberContext(
-const DeclContext *DC,
-Decl *&ManglingContextDecl);
-
+  MangleNumberingContext *
+  getCurrentMangleNumberContext(const DeclContext *DC,
+Decl *&ManglingContextDecl,
+

[PATCH] D63164: [HIP] Add option to force lambda nameing following ODR in HIP/CUDA.

2019-06-11 Thread Michael Liao via Phabricator via cfe-commits

hliao created this revision.
hliao added reviewers: tra, yaxunl.
Herald added a project: clang.
Herald added a subscriber: cfe-commits.

- Clang follows its own scheme for lambdas which don't need to follow ODR rule. 
That scheme will assign an unqiue ID within the TU scope and won't be unique or 
consistent across TUs.
- In CUDA/HIP, a lambda with `__device__` or `__host__ __device__` (or an 
extended lambda) may be used in `__global__` template function instantiation. 
If that lambda cannot be named following ODR rule, the device compilation may 
produce a mismatching device kernel name from the host compilation as the 
anonymous type ID assignment aforementioned.
- In this patch, a new language option, `-fcuda-force-lambda-odr`, is 
introduced to force ODR for lambda naming so that all lambda could be 
consistently named across TUs, including the device compilation. This solves 
the assertion checking device kernel names as well as ensures the named-based 
resolution could resolve the correct device binaries from the device name 
generated in the host compilation.


Repository:
  rG LLVM Github Monorepo

https://reviews.llvm.org/D63164

Files:
  clang/include/clang/Basic/LangOptions.def
  clang/include/clang/Driver/CC1Options.td
  clang/include/clang/Sema/Sema.h
  clang/lib/Frontend/CompilerInvocation.cpp
  clang/lib/Sema/SemaLambda.cpp
  clang/test/CodeGenCUDA/unnamed-types.cu

Index: clang/test/CodeGenCUDA/unnamed-types.cu
===
--- /dev/null
+++ clang/test/CodeGenCUDA/unnamed-types.cu
@@ -0,0 +1,34 @@
+// RUN: %clang_cc1 -std=c++11 -x hip -triple x86_64-linux-gnu -fcuda-force-lambda-odr -emit-llvm %s -o - | FileCheck %s --check-prefix=HOST
+// RUN: %clang_cc1 -std=c++11 -x hip -triple amdgcn-amd-amdhsa -fcuda-force-lambda-odr -fcuda-is-device -emit-llvm %s -o - | FileCheck %s --check-prefix=DEVICE
+
+#include "Inputs/cuda.h"
+
+// HOST: @0 = private unnamed_addr constant [43 x i8] c"_Z2k0IZZ2f1PfENKUlS0_E_clES0_EUlfE_EvS0_T_\00", align 1
+
+__device__ float d0(float x) {
+  return [](float x) { return x + 2.f; }(x);
+}
+
+__device__ float d1(float x) {
+  return [](float x) { return x * 2.f; }(x);
+}
+
+// DEVICE: amdgpu_kernel void @_Z2k0IZZ2f1PfENKUlS0_E_clES0_EUlfE_EvS0_T_(
+template 
+__global__ void k0(float *p, F f) {
+  p[0] = f(p[0]) + d0(p[1]) + d1(p[2]);
+}
+
+void f0(float *p) {
+  [](float *p) {
+*p = 1.f;
+  }(p);
+}
+
+void f1(float *p) {
+  [](float *p) {
+k0<<<1,1>>>(p, [] __device__ (float x) { return x + 1.f; });
+  }(p);
+}
+// HOST: @__hip_register_globals
+// HOST: __hipRegisterFunction{{.*}}@_Z2k0IZZ2f1PfENKUlS0_E_clES0_EUlfE_EvS0_T_{{.*}}@0
Index: clang/lib/Sema/SemaLambda.cpp
===
--- clang/lib/Sema/SemaLambda.cpp
+++ clang/lib/Sema/SemaLambda.cpp
@@ -272,9 +272,8 @@
   return false;
 }
 
-MangleNumberingContext *
-Sema::getCurrentMangleNumberContext(const DeclContext *DC,
-Decl *&ManglingContextDecl) {
+MangleNumberingContext *Sema::getCurrentMangleNumberContext(
+const DeclContext *DC, Decl *&ManglingContextDecl, bool SkpNoODRChk) {
   // Compute the context for allocating mangling numbers in the current
   // expression, if the ABI requires them.
   ManglingContextDecl = ExprEvalContexts.back().ManglingContextDecl;
@@ -322,7 +321,8 @@
   case Normal: {
 //  -- the bodies of non-exported nonspecialized template functions
 //  -- the bodies of inline functions
-if ((IsInNonspecializedTemplate &&
+if (SkpNoODRChk ||
+(IsInNonspecializedTemplate &&
  !(ManglingContextDecl && isa(ManglingContextDecl))) ||
 isInInlineFunction(CurContext)) {
   ManglingContextDecl = nullptr;
@@ -337,7 +337,7 @@
 
   case StaticDataMember:
 //  -- the initializers of nonspecialized static members of template classes
-if (!IsInNonspecializedTemplate) {
+if (!SkpNoODRChk && !IsInNonspecializedTemplate) {
   ManglingContextDecl = nullptr;
   return nullptr;
 }
@@ -441,9 +441,9 @@
 Class->setLambdaMangling(Mangling->first, Mangling->second);
   } else {
 Decl *ManglingContextDecl;
-if (MangleNumberingContext *MCtx =
-getCurrentMangleNumberContext(Class->getDeclContext(),
-  ManglingContextDecl)) {
+if (MangleNumberingContext *MCtx = getCurrentMangleNumberContext(
+Class->getDeclContext(), ManglingContextDecl,
+getLangOpts().CUDAForceLambdaODR)) {
   unsigned ManglingNumber = MCtx->getManglingNumber(Method);
   Class->setLambdaMangling(ManglingNumber, ManglingContextDecl);
 }
Index: clang/lib/Frontend/CompilerInvocation.cpp
===
--- clang/lib/Frontend/CompilerInvocation.cpp
+++ clang/lib/Frontend/CompilerInvocation.cpp
@@ -2431,6 +2431,9 @@
   if (Args.hasArg(OPT_fno_cuda_host_device_constexpr))
 Op

[PATCH] D63164: [HIP] Add option to force lambda nameing following ODR in HIP/CUDA.

2019-06-11 Thread Michael Liao via Phabricator via cfe-commits

hliao added a comment.

In D63164#1538968 , @tra wrote:

> So, in short, what you're saying is that lambda type may leak into the 
> mangled name of a `__global__` function and ne need to ensure that the 
> mangled name is identical for both host and device, hence the need for 
> consistent naming of lambdas.
>
> If that's the case, shouldn't it be enabled for CUDA/HIP by default? While 
> it's not frequently used ATM, it is something we do want to work correctly 
> all the time. The failure to do so results in weird runtime failures that 
> would be hard to debug for end-users.
>
> @rsmith -- are there any downsides having this enabled all the time?

yeah, we should ensure consistent naming by default. But, I want to hear more 
suggestion and comment before making that option by default. To more specific, 
as that option forces all naming of lambda to follow ODR rule. For 
non-`__device__` lambda, even though there is no code quality change, we do add 
overhead for the compiler itself, as the additional records, though that should 
be negligible.  A potential solution is to record the ODR context for parent 
lambdas and re-number them if the inner lambda is found as `__device__` one.
However, I do like the straight-forward and extremely simple solution of this 
patch to force all lambda naming following ODR, there is no code quality change 
and, potentially slight, FE overhead. What's your thought?

Repository:
  rG LLVM Github Monorepo

CHANGES SINCE LAST ACTION
  https://reviews.llvm.org/D63164/new/

https://reviews.llvm.org/D63164

___
cfe-commits mailing list
cfe-commits@lists.llvm.org
https://lists.llvm.org/cgi-bin/mailman/listinfo/cfe-commits

[PATCH] D63164: [HIP] Add option to force lambda nameing following ODR in HIP/CUDA.

2019-06-12 Thread Michael Liao via Phabricator via cfe-commits

hliao added a comment.

ping for comment as one of HIP-based workload is blocked by this issue


Repository:
  rG LLVM Github Monorepo

CHANGES SINCE LAST ACTION
  https://reviews.llvm.org/D63164/new/

https://reviews.llvm.org/D63164



___
cfe-commits mailing list
cfe-commits@lists.llvm.org
https://lists.llvm.org/cgi-bin/mailman/listinfo/cfe-commits

[PATCH] D63164: [HIP] Add option to force lambda nameing following ODR in HIP/CUDA.

2019-06-13 Thread Michael Liao via Phabricator via cfe-commits

hliao added a comment.

In D63164#1542361 , @rsmith wrote:

> I think this is the wrong way to handle this issue. We need to give lambdas a 
> mangling if they occur in functions for which there can be definitions in 
> multiple translation units. In regular C++ code, that's inline functions and 
> function template specializations, so that's what we're currently checking 
> for. CUDA adds more cases (in particular, `__host__ __device__` functions, 
> plus anything else that can be emitted for multiple targets), so we should 
> additionally check for those cases when determining whether to number 
> lambdas. I don't see any need for a flag to control this behavior.

I agree that this's a temporary solution to fix the issue. But, the real tricky 
part is that, once we found a `__device__` lambda, we need to ensure all the 
enclosing scopes should be named following ODR as well just as the case 
illustrated in the test case. In fact, it's not the outer lambda (not annotated 
with `__device__` nor within an inline function.) not being named in ODR. The 
tricky issue is that, so far, we don't maintain a context to add mangling back 
if we found an inner one needs to follow ODR. We have to add that before we 
could do that on-demand. I was working on that but it would take more efforts 
of review.
That's also the motivation why this change adds a option to guard this behavior.

Repository:
  rG LLVM Github Monorepo

CHANGES SINCE LAST ACTION
  https://reviews.llvm.org/D63164/new/

https://reviews.llvm.org/D63164

___
cfe-commits mailing list
cfe-commits@lists.llvm.org
https://lists.llvm.org/cgi-bin/mailman/listinfo/cfe-commits

[PATCH] D63164: [HIP] Add option to force lambda nameing following ODR in HIP/CUDA.

2019-06-14 Thread Michael Liao via Phabricator via cfe-commits

hliao added a comment.

PING


Repository:
  rG LLVM Github Monorepo

CHANGES SINCE LAST ACTION
  https://reviews.llvm.org/D63164/new/

https://reviews.llvm.org/D63164



___
cfe-commits mailing list
cfe-commits@lists.llvm.org
https://lists.llvm.org/cgi-bin/mailman/listinfo/cfe-commits

[PATCH] D63335: [HIP] Change kernel stub name again

2019-06-14 Thread Michael Liao via Phabricator via cfe-commits

hliao created this revision.
hliao added reviewers: yaxunl, tra.
Herald added a project: clang.
Herald added a subscriber: cfe-commits.

- Prefix kernel stub with `__device_stub__` to avoid potential symbol name 
conflicts in debugger.
- Revise the interface to derive the stub name and simplify the assertion of it.


Repository:
  rG LLVM Github Monorepo

https://reviews.llvm.org/D63335

Files:
  clang/lib/CodeGen/CGCUDANV.cpp
  clang/lib/CodeGen/CGCUDARuntime.h
  clang/lib/CodeGen/CodeGenModule.cpp
  clang/test/CodeGenCUDA/kernel-stub-name.cu


Index: clang/test/CodeGenCUDA/kernel-stub-name.cu
===
--- clang/test/CodeGenCUDA/kernel-stub-name.cu
+++ clang/test/CodeGenCUDA/kernel-stub-name.cu
@@ -10,7 +10,7 @@
 __global__ void kernelfunc() {}
 
 // CHECK-LABEL: define{{.*}}@_Z8hostfuncv()
-// CHECK: call void @[[STUB:_Z10kernelfuncIiEvv.stub]]()
+// CHECK: call void @[[STUB:__device_stub___Z10kernelfuncIiEvv]]()
 void hostfunc(void) { kernelfunc<<<1, 1>>>(); }
 
 // CHECK: define{{.*}}@[[STUB]]
Index: clang/lib/CodeGen/CodeGenModule.cpp
===
--- clang/lib/CodeGen/CodeGenModule.cpp
+++ clang/lib/CodeGen/CodeGenModule.cpp
@@ -1088,13 +1088,10 @@
   const auto *ND = cast(GD.getDecl());
   std::string MangledName = getMangledNameImpl(*this, GD, ND);
 
-  // Postfix kernel stub names with .stub to differentiate them from kernel
-  // names in device binaries. This is to facilitate the debugger to find
-  // the correct symbols for kernels in the device binary.
+  // Derive the kernel stub from CUDA runtime.
   if (auto *FD = dyn_cast(GD.getDecl()))
-if (getLangOpts().HIP && !getLangOpts().CUDAIsDevice &&
-FD->hasAttr())
-  MangledName = MangledName + ".stub";
+if (!getLangOpts().CUDAIsDevice && FD->hasAttr())
+  MangledName = getCUDARuntime().getDeviceStubName(MangledName);
 
   auto Result = Manglings.insert(std::make_pair(MangledName, GD));
   return MangledDeclNames[CanonicalGD] = Result.first->first();
Index: clang/lib/CodeGen/CGCUDARuntime.h
===
--- clang/lib/CodeGen/CGCUDARuntime.h
+++ clang/lib/CodeGen/CGCUDARuntime.h
@@ -15,6 +15,8 @@
 #ifndef LLVM_CLANG_LIB_CODEGEN_CGCUDARUNTIME_H
 #define LLVM_CLANG_LIB_CODEGEN_CGCUDARUNTIME_H
 
+#include "llvm/ADT/StringRef.h"
+
 namespace llvm {
 class Function;
 class GlobalVariable;
@@ -63,6 +65,9 @@
   /// Returns a module cleanup function or nullptr if it's not needed.
   /// Must be called after ModuleCtorFunction
   virtual llvm::Function *makeModuleDtorFunction() = 0;
+
+  /// Construct and return the stub name of a kernel.
+  virtual std::string getDeviceStubName(llvm::StringRef Name) const = 0;
 };
 
 /// Creates an instance of a CUDA runtime class.
Index: clang/lib/CodeGen/CGCUDANV.cpp
===
--- clang/lib/CodeGen/CGCUDANV.cpp
+++ clang/lib/CodeGen/CGCUDANV.cpp
@@ -132,6 +132,8 @@
   llvm::Function *makeModuleCtorFunction() override;
   /// Creates module destructor function
   llvm::Function *makeModuleDtorFunction() override;
+  /// Construct and return the stub name of a kernel.
+  std::string getDeviceStubName(llvm::StringRef Name) const override;
 };
 
 }
@@ -217,10 +219,11 @@
 
 void CGNVCUDARuntime::emitDeviceStub(CodeGenFunction &CGF,
  FunctionArgList &Args) {
-  assert(getDeviceSideName(CGF.CurFuncDecl) == CGF.CurFn->getName() ||
- getDeviceSideName(CGF.CurFuncDecl) + ".stub" == CGF.CurFn->getName() 
||
- CGF.CGM.getContext().getTargetInfo().getCXXABI() !=
- CGF.CGM.getContext().getAuxTargetInfo()->getCXXABI());
+  assert((CGF.CGM.getContext().getAuxTargetInfo() &&
+  (CGF.CGM.getContext().getAuxTargetInfo()->getCXXABI() !=
+   CGF.CGM.getContext().getTargetInfo().getCXXABI())) ||
+ getDeviceStubName(getDeviceSideName(CGF.CurFuncDecl)) ==
+ CGF.CurFn->getName());
 
   EmittedKernels.push_back({CGF.CurFn, CGF.CurFuncDecl});
   if (CudaFeatureEnabled(CGM.getTarget().getSDKVersion(),
@@ -780,6 +783,12 @@
   return ModuleDtorFunc;
 }
 
+std::string CGNVCUDARuntime::getDeviceStubName(llvm::StringRef Name) const {
+  if (!CGM.getLangOpts().HIP)
+return Name;
+  return std::move(("__device_stub__" + Name).str());
+}
+
 CGCUDARuntime *CodeGen::CreateNVCUDARuntime(CodeGenModule &CGM) {
   return new CGNVCUDARuntime(CGM);
 }


Index: clang/test/CodeGenCUDA/kernel-stub-name.cu
===
--- clang/test/CodeGenCUDA/kernel-stub-name.cu
+++ clang/test/CodeGenCUDA/kernel-stub-name.cu
@@ -10,7 +10,7 @@
 __global__ void kernelfunc() {}
 
 // CHECK-LABEL: define{{.*}}@_Z8hostfuncv()
-// CHECK: call void @[[STUB:_Z10kernelfuncIiEvv.stub]]()
+// CHECK: call void @[[STUB:__device_stub___Z10kernelfuncIiEvv]]()
 void hostfunc(void) { kernelfu

[PATCH] D63335: [HIP] Change kernel stub name again

2019-06-14 Thread Michael Liao via Phabricator via cfe-commits

hliao marked an inline comment as done.
hliao added a comment.

it's requested from debugger people. they don't want to the host-side stub 
could match the device-side kernel function name. the previous scheme cannot 
prevent that.




Comment at: clang/lib/CodeGen/CGCUDANV.cpp:222-226
+  assert((CGF.CGM.getContext().getAuxTargetInfo() &&
+  (CGF.CGM.getContext().getAuxTargetInfo()->getCXXABI() !=
+   CGF.CGM.getContext().getTargetInfo().getCXXABI())) ||
+ getDeviceStubName(getDeviceSideName(CGF.CurFuncDecl)) ==
+ CGF.CurFn->getName());

tra wrote:
> I'm not sure I understand what exactly this assertion checks.
> The condition appears to be true is host/device ABIs are different OR the 
> name of the current function is the same as the (possibly mangled) 
> device-side name + __device_stub_ prefix.
> 
> While the first part makes sense, I'm not sure I understand the name 
> comparison part.
> Could you tell me more and, maybe, add a comment explaining what's going on 
> here.
The second is to ensure, if, under the same ABI, kernel stub name derived from 
device-side name mangling should be the same the sub name generated from 
host-side, CGF.CurFn->getName() is the mangled named from host compilation


Repository:
  rG LLVM Github Monorepo

CHANGES SINCE LAST ACTION
  https://reviews.llvm.org/D63335/new/

https://reviews.llvm.org/D63335



___
cfe-commits mailing list
cfe-commits@lists.llvm.org
https://lists.llvm.org/cgi-bin/mailman/listinfo/cfe-commits

[PATCH] D63335: [HIP] Change kernel stub name again

2019-06-14 Thread Michael Liao via Phabricator via cfe-commits

hliao marked an inline comment as done.
hliao added inline comments.



Comment at: clang/lib/CodeGen/CGCUDANV.cpp:222-226
+  assert((CGF.CGM.getContext().getAuxTargetInfo() &&
+  (CGF.CGM.getContext().getAuxTargetInfo()->getCXXABI() !=
+   CGF.CGM.getContext().getTargetInfo().getCXXABI())) ||
+ getDeviceStubName(getDeviceSideName(CGF.CurFuncDecl)) ==
+ CGF.CurFn->getName());

hliao wrote:
> tra wrote:
> > I'm not sure I understand what exactly this assertion checks.
> > The condition appears to be true is host/device ABIs are different OR the 
> > name of the current function is the same as the (possibly mangled) 
> > device-side name + __device_stub_ prefix.
> > 
> > While the first part makes sense, I'm not sure I understand the name 
> > comparison part.
> > Could you tell me more and, maybe, add a comment explaining what's going on 
> > here.
> The second is to ensure, if, under the same ABI, kernel stub name derived 
> from device-side name mangling should be the same the sub name generated from 
> host-side, CGF.CurFn->getName() is the mangled named from host compilation
previous assertion expression gets the same goal, if ABI is different, the stub 
name from device-side should match the stub name from the host-side 
compilation. As we add a dedicated interface to the derive stub name, we could 
simplify the comparison to a single one.
Also, we put the simple condition checking ahead (a common practice) to reduce 
the overhead of string comparison


Repository:
  rG LLVM Github Monorepo

CHANGES SINCE LAST ACTION
  https://reviews.llvm.org/D63335/new/

https://reviews.llvm.org/D63335



___
cfe-commits mailing list
cfe-commits@lists.llvm.org
https://lists.llvm.org/cgi-bin/mailman/listinfo/cfe-commits

[PATCH] D63335: [HIP] Change kernel stub name again

2019-06-14 Thread Michael Liao via Phabricator via cfe-commits

hliao added a comment.

In D63335#1543854 , @tra wrote:

> In D63335#1543845 , @hliao wrote:
>
> > it's requested from debugger people. they don't want to the host-side stub 
> > could match the device-side kernel function name. the previous scheme 
> > cannot prevent that.
>
>
> I understand that you want a different name for the stub. My question is why 
> the ".stub" suffix was not sufficient and how does having a prefix instead 
> helps? Making the name un-demangleable is undesirable, IMO. There should be a 
> good reason to justify it.


it's based on debugger people told me, with ".stub", the debugger still could 
find it match the original device kernel even though it could find both of 
them. But, they want to match the original one only and leave the stub one 
intentionally unmatched.


Repository:
  rG LLVM Github Monorepo

CHANGES SINCE LAST ACTION
  https://reviews.llvm.org/D63335/new/

https://reviews.llvm.org/D63335



___
cfe-commits mailing list
cfe-commits@lists.llvm.org
https://lists.llvm.org/cgi-bin/mailman/listinfo/cfe-commits

[PATCH] D63335: [HIP] Change kernel stub name again

2019-06-14 Thread Michael Liao via Phabricator via cfe-commits

hliao marked an inline comment as done.
hliao added inline comments.



Comment at: clang/lib/CodeGen/CGCUDANV.cpp:789
+return Name;
+  return std::move(("__device_stub__" + Name).str());
+}

tra wrote:
> I suspect `return "__device_stub__" + Name;` would do. StringRef will convert 
> to std::string and copy elision should avoid unnecessary copy.
"__device__stub__" + Name results in Twine, where not copy is generated. Only 
the final str() converts Twine into std::string involving copies. Otherwise, 
there's one copy from Name to std::string and another copy by std::string 
operator+, right?


Repository:
  rG LLVM Github Monorepo

CHANGES SINCE LAST ACTION
  https://reviews.llvm.org/D63335/new/

https://reviews.llvm.org/D63335



___
cfe-commits mailing list
cfe-commits@lists.llvm.org
https://lists.llvm.org/cgi-bin/mailman/listinfo/cfe-commits

[PATCH] D63335: [HIP] Change kernel stub name again

2019-06-14 Thread Michael Liao via Phabricator via cfe-commits

hliao added a comment.

In D63335#1543854 , @tra wrote:

> In D63335#1543845 , @hliao wrote:
>
> > it's requested from debugger people. they don't want to the host-side stub 
> > could match the device-side kernel function name. the previous scheme 
> > cannot prevent that.
>
>
> I understand that you want a different name for the stub. My question is why 
> the ".stub" suffix was not sufficient and how does having a prefix instead 
> helps? Making the name un-demangleable is undesirable, IMO. There should be a 
> good reason to justify it.


Yeah, I understand that un-demangleable name causes lots of frustration. But, 
based on what I learned, CUDA generated the similar thing, e.g. 
`__device_stub__Z15transformKernelPfiif` is the stub function from cuda 10.1


Repository:
  rG LLVM Github Monorepo

CHANGES SINCE LAST ACTION
  https://reviews.llvm.org/D63335/new/

https://reviews.llvm.org/D63335



___
cfe-commits mailing list
cfe-commits@lists.llvm.org
https://lists.llvm.org/cgi-bin/mailman/listinfo/cfe-commits

[PATCH] D63335: [HIP] Change kernel stub name again

2019-06-14 Thread Michael Liao via Phabricator via cfe-commits

hliao added a comment.

In D63335#1543854 , @tra wrote:

> In D63335#1543845 , @hliao wrote:
>
> > it's requested from debugger people. they don't want to the host-side stub 
> > could match the device-side kernel function name. the previous scheme 
> > cannot prevent that.
>
>
> I understand that you want a different name for the stub. My question is why 
> the ".stub" suffix was not sufficient and how does having a prefix instead 
> helps? Making the name un-demangleable is undesirable, IMO. There should be a 
> good reason to justify it.


Is it OK for us to mangle `__device_stub __` as the nested name into the 
original one, says, we prepend `_ZN15__device_stub__E`, so that we have 
`_ZN15__device_stub__E10kernelfuncIiEvv`

and

$ c++filt _ZN15__device_stub__E10kernelfuncIiEvv
__device_stub__(kernelfunc, void, void)


Repository:
  rG LLVM Github Monorepo

CHANGES SINCE LAST ACTION
  https://reviews.llvm.org/D63335/new/

https://reviews.llvm.org/D63335



___
cfe-commits mailing list
cfe-commits@lists.llvm.org
https://lists.llvm.org/cgi-bin/mailman/listinfo/cfe-commits

[PATCH] D63335: [HIP] Change kernel stub name again

2019-06-14 Thread Michael Liao via Phabricator via cfe-commits

hliao added a comment.

In D63335#1544311 , @tra wrote:

> In D63335#1544019 , @hliao wrote:
>
> > In D63335#1543854 , @tra wrote:
> >
> > > In D63335#1543845 , @hliao wrote:
> > >
> > > > it's requested from debugger people. they don't want to the host-side 
> > > > stub could match the device-side kernel function name. the previous 
> > > > scheme cannot prevent that.
> > >
> > >
> > > I understand that you want a different name for the stub. My question is 
> > > why the ".stub" suffix was not sufficient and how does having a prefix 
> > > instead helps? Making the name un-demangleable is undesirable, IMO. There 
> > > should be a good reason to justify it.
> >
> >
> > it's based on debugger people told me, with ".stub", the debugger still 
> > could find it match the original device kernel even though it could find 
> > both of them. But, they want to match the original one only and leave the 
> > stub one intentionally unmatched.
>
>
> Sorry, I still don't think I understand the reasons for this change. The stub 
> and the kernel do have a different name now. I don't quite get it why the 
> debugger can differentiate the names when they differ by prefix, but can't 
> when they differ by suffix. It sounds like an attempt to work around a 
> problem somewhere else.
>
> Could you talk to the folks requesting the change and get more details on 
> what exactly we need to do here and, more importantly, why.


But, after unmangling, debugger still could match both as they are almost 
identical excep the final variants, like `clone`. The debugger will set all 
locations matching that specified kernel name.


Repository:
  rG LLVM Github Monorepo

CHANGES SINCE LAST ACTION
  https://reviews.llvm.org/D63335/new/

https://reviews.llvm.org/D63335



___
cfe-commits mailing list
cfe-commits@lists.llvm.org
https://lists.llvm.org/cgi-bin/mailman/listinfo/cfe-commits

[PATCH] D63335: [HIP] Change kernel stub name again

2019-06-14 Thread Michael Liao via Phabricator via cfe-commits

hliao added a comment.

In D63335#1544320 , @tra wrote:

> In D63335#1544315 , @hliao wrote:
>
> > > Sorry, I still don't think I understand the reasons for this change. The 
> > > stub and the kernel do have a different name now. I don't quite get it 
> > > why the debugger can differentiate the names when they differ by prefix, 
> > > but can't when they differ by suffix. It sounds like an attempt to work 
> > > around a problem somewhere else.
> > > 
> > > Could you talk to the folks requesting the change and get more details on 
> > > what exactly we need to do here and, more importantly, why.
> >
> > But, after unmangling, debugger still could match both as they are almost 
> > identical excep the final variants, like `clone`. The debugger will set all 
> > locations matching that specified kernel name.
>
>
> OK, so the real issue is that demangled name looks identical to debugger.
>  One way to deal with that is to , essentially, break mangling in compiler.
>  Another would be to teach debugger how to distinguish the stub from the 
> kernel using additional information likely available to debugger (i.e. 
> mangled name or the location of the symbol -- is it in the host binary or in 
> the GPU binary).
>
> I would argue that breaking mangling is not the best choice here. 
>  I think debugger does have sufficient information to deal with this and that 
> would be the right place to deal with the issue.


em, I did push the later as well, :(. OK, I will simplify the patch to change 
any functionality but move the calculation of device name into a common 
interface. So that, vendor could adjust that internally with minimal change. OK?


Repository:
  rG LLVM Github Monorepo

CHANGES SINCE LAST ACTION
  https://reviews.llvm.org/D63335/new/

https://reviews.llvm.org/D63335



___
cfe-commits mailing list
cfe-commits@lists.llvm.org
https://lists.llvm.org/cgi-bin/mailman/listinfo/cfe-commits

[PATCH] D63335: [HIP] Change kernel stub name again

2019-06-14 Thread Michael Liao via Phabricator via cfe-commits

hliao updated this revision to Diff 204856.
hliao added a comment.

Just revise the interface for device kernel stubbing.


Repository:
  rG LLVM Github Monorepo

CHANGES SINCE LAST ACTION
  https://reviews.llvm.org/D63335/new/

https://reviews.llvm.org/D63335

Files:
  clang/lib/CodeGen/CGCUDANV.cpp
  clang/lib/CodeGen/CGCUDARuntime.h
  clang/lib/CodeGen/CodeGenModule.cpp


Index: clang/lib/CodeGen/CodeGenModule.cpp
===
--- clang/lib/CodeGen/CodeGenModule.cpp
+++ clang/lib/CodeGen/CodeGenModule.cpp
@@ -1088,13 +1088,10 @@
   const auto *ND = cast(GD.getDecl());
   std::string MangledName = getMangledNameImpl(*this, GD, ND);
 
-  // Postfix kernel stub names with .stub to differentiate them from kernel
-  // names in device binaries. This is to facilitate the debugger to find
-  // the correct symbols for kernels in the device binary.
+  // Derive the kernel stub from CUDA runtime.
   if (auto *FD = dyn_cast(GD.getDecl()))
-if (getLangOpts().HIP && !getLangOpts().CUDAIsDevice &&
-FD->hasAttr())
-  MangledName = MangledName + ".stub";
+if (!getLangOpts().CUDAIsDevice && FD->hasAttr())
+  MangledName = getCUDARuntime().getDeviceStubName(MangledName);
 
   auto Result = Manglings.insert(std::make_pair(MangledName, GD));
   return MangledDeclNames[CanonicalGD] = Result.first->first();
Index: clang/lib/CodeGen/CGCUDARuntime.h
===
--- clang/lib/CodeGen/CGCUDARuntime.h
+++ clang/lib/CodeGen/CGCUDARuntime.h
@@ -15,6 +15,8 @@
 #ifndef LLVM_CLANG_LIB_CODEGEN_CGCUDARUNTIME_H
 #define LLVM_CLANG_LIB_CODEGEN_CGCUDARUNTIME_H
 
+#include "llvm/ADT/StringRef.h"
+
 namespace llvm {
 class Function;
 class GlobalVariable;
@@ -63,6 +65,9 @@
   /// Returns a module cleanup function or nullptr if it's not needed.
   /// Must be called after ModuleCtorFunction
   virtual llvm::Function *makeModuleDtorFunction() = 0;
+
+  /// Construct and return the stub name of a kernel.
+  virtual std::string getDeviceStubName(llvm::StringRef Name) const = 0;
 };
 
 /// Creates an instance of a CUDA runtime class.
Index: clang/lib/CodeGen/CGCUDANV.cpp
===
--- clang/lib/CodeGen/CGCUDANV.cpp
+++ clang/lib/CodeGen/CGCUDANV.cpp
@@ -132,6 +132,8 @@
   llvm::Function *makeModuleCtorFunction() override;
   /// Creates module destructor function
   llvm::Function *makeModuleDtorFunction() override;
+  /// Construct and return the stub name of a kernel.
+  std::string getDeviceStubName(llvm::StringRef Name) const override;
 };
 
 }
@@ -217,10 +219,20 @@
 
 void CGNVCUDARuntime::emitDeviceStub(CodeGenFunction &CGF,
  FunctionArgList &Args) {
-  assert(getDeviceSideName(CGF.CurFuncDecl) == CGF.CurFn->getName() ||
- getDeviceSideName(CGF.CurFuncDecl) + ".stub" == CGF.CurFn->getName() 
||
- CGF.CGM.getContext().getTargetInfo().getCXXABI() !=
- CGF.CGM.getContext().getAuxTargetInfo()->getCXXABI());
+  // Ensure either we have different ABIs between host and device compilations,
+  // says host compilation following MSVC ABI but device compilation follows
+  // Itanium C++ ABI or, if they follow the same ABI, kernel names after
+  // mangling should be same after name stubbing. The later checking is very
+  // important as the device kernel name being mangled in host-compilation is
+  // used to resolve the device binaries to be executed. Inconsistent naming
+  // result in undefined behavior. Even though we cannot check that naming
+  // directly between host- and device-compilations, the host- and
+  // device-mangling in host compilation could help catch certain ones.
+  assert((CGF.CGM.getContext().getAuxTargetInfo() &&
+  (CGF.CGM.getContext().getAuxTargetInfo()->getCXXABI() !=
+   CGF.CGM.getContext().getTargetInfo().getCXXABI())) ||
+ getDeviceStubName(getDeviceSideName(CGF.CurFuncDecl)) ==
+ CGF.CurFn->getName());
 
   EmittedKernels.push_back({CGF.CurFn, CGF.CurFuncDecl});
   if (CudaFeatureEnabled(CGM.getTarget().getSDKVersion(),
@@ -780,6 +792,12 @@
   return ModuleDtorFunc;
 }
 
+std::string CGNVCUDARuntime::getDeviceStubName(llvm::StringRef Name) const {
+  if (!CGM.getLangOpts().HIP)
+return Name;
+  return std::move((Name + ".stub").str());
+}
+
 CGCUDARuntime *CodeGen::CreateNVCUDARuntime(CodeGenModule &CGM) {
   return new CGNVCUDARuntime(CGM);
 }


Index: clang/lib/CodeGen/CodeGenModule.cpp
===
--- clang/lib/CodeGen/CodeGenModule.cpp
+++ clang/lib/CodeGen/CodeGenModule.cpp
@@ -1088,13 +1088,10 @@
   const auto *ND = cast(GD.getDecl());
   std::string MangledName = getMangledNameImpl(*this, GD, ND);
 
-  // Postfix kernel stub names with .stub to differentiate them from kernel
-  // names in device binaries. This is to facilitate the debugger to find
-  // the cor

[PATCH] D63335: [HIP] Add the interface deriving the stub name of device kernels.

2019-06-14 Thread Michael Liao via Phabricator via cfe-commits

hliao added a comment.

In D63335#1544428 , @tra wrote:

> In D63335#1544324 , @hliao wrote:
>
> > > I think debugger does have sufficient information to deal with this and 
> > > that would be the right place to deal with the issue.
> >
> > em, I did push the later as well, :(. OK, I will simplify the patch to 
> > change any functionality but move the calculation of device name into a 
> > common interface. So that, vendor could adjust that internally with minimal 
> > change. OK?
>
>
> :-( Sorry about that. I realize how frustrating that can be.
>
> Perhaps it's worth trying once more. You can argue that this change will have 
> trouble being upstreamed without a good technical explanation why it must be 
> done in the compiler. Perhaps they do have compelling reasons why it's hard 
> to do in the debugger, but without specific details from their end it appears 
> indistinguishable from a (possibly misguided) quick fix. It may help if you 
> could get the debugger folks to chime in directly on the review.


shall we review code refactoring first, so that that change could be just a 
single line change. Yes, I could post that later and drag in necessary stake 
holders.


Repository:
  rG LLVM Github Monorepo

CHANGES SINCE LAST ACTION
  https://reviews.llvm.org/D63335/new/

https://reviews.llvm.org/D63335



___
cfe-commits mailing list
cfe-commits@lists.llvm.org
https://lists.llvm.org/cgi-bin/mailman/listinfo/cfe-commits

[PATCH] D63335: [HIP] Add the interface deriving the stub name of device kernels.

2019-06-17 Thread Michael Liao via Phabricator via cfe-commits

This revision was automatically updated to reflect the committed changes.
Closed by commit rL363553: [HIP] Add the interface deriving the stub name of 
device kernels. (authored by hliao, committed by ).
Herald added a project: LLVM.
Herald added a subscriber: llvm-commits.

Changed prior to commit:
  https://reviews.llvm.org/D63335?vs=204856&id=205051#toc

Repository:
  rL LLVM

CHANGES SINCE LAST ACTION
  https://reviews.llvm.org/D63335/new/

https://reviews.llvm.org/D63335

Files:
  cfe/trunk/lib/CodeGen/CGCUDANV.cpp
  cfe/trunk/lib/CodeGen/CGCUDARuntime.h
  cfe/trunk/lib/CodeGen/CodeGenModule.cpp


Index: cfe/trunk/lib/CodeGen/CGCUDARuntime.h
===
--- cfe/trunk/lib/CodeGen/CGCUDARuntime.h
+++ cfe/trunk/lib/CodeGen/CGCUDARuntime.h
@@ -15,6 +15,8 @@
 #ifndef LLVM_CLANG_LIB_CODEGEN_CGCUDARUNTIME_H
 #define LLVM_CLANG_LIB_CODEGEN_CGCUDARUNTIME_H
 
+#include "llvm/ADT/StringRef.h"
+
 namespace llvm {
 class Function;
 class GlobalVariable;
@@ -63,6 +65,9 @@
   /// Returns a module cleanup function or nullptr if it's not needed.
   /// Must be called after ModuleCtorFunction
   virtual llvm::Function *makeModuleDtorFunction() = 0;
+
+  /// Construct and return the stub name of a kernel.
+  virtual std::string getDeviceStubName(llvm::StringRef Name) const = 0;
 };
 
 /// Creates an instance of a CUDA runtime class.
Index: cfe/trunk/lib/CodeGen/CodeGenModule.cpp
===
--- cfe/trunk/lib/CodeGen/CodeGenModule.cpp
+++ cfe/trunk/lib/CodeGen/CodeGenModule.cpp
@@ -1088,13 +1088,11 @@
   const auto *ND = cast(GD.getDecl());
   std::string MangledName = getMangledNameImpl(*this, GD, ND);
 
-  // Postfix kernel stub names with .stub to differentiate them from kernel
-  // names in device binaries. This is to facilitate the debugger to find
-  // the correct symbols for kernels in the device binary.
+  // Adjust kernel stub mangling as we may need to be able to differentiate
+  // them from the kernel itself (e.g., for HIP).
   if (auto *FD = dyn_cast(GD.getDecl()))
-if (getLangOpts().HIP && !getLangOpts().CUDAIsDevice &&
-FD->hasAttr())
-  MangledName = MangledName + ".stub";
+if (!getLangOpts().CUDAIsDevice && FD->hasAttr())
+  MangledName = getCUDARuntime().getDeviceStubName(MangledName);
 
   auto Result = Manglings.insert(std::make_pair(MangledName, GD));
   return MangledDeclNames[CanonicalGD] = Result.first->first();
Index: cfe/trunk/lib/CodeGen/CGCUDANV.cpp
===
--- cfe/trunk/lib/CodeGen/CGCUDANV.cpp
+++ cfe/trunk/lib/CodeGen/CGCUDANV.cpp
@@ -132,6 +132,8 @@
   llvm::Function *makeModuleCtorFunction() override;
   /// Creates module destructor function
   llvm::Function *makeModuleDtorFunction() override;
+  /// Construct and return the stub name of a kernel.
+  std::string getDeviceStubName(llvm::StringRef Name) const override;
 };
 
 }
@@ -217,10 +219,20 @@
 
 void CGNVCUDARuntime::emitDeviceStub(CodeGenFunction &CGF,
  FunctionArgList &Args) {
-  assert(getDeviceSideName(CGF.CurFuncDecl) == CGF.CurFn->getName() ||
- getDeviceSideName(CGF.CurFuncDecl) + ".stub" == CGF.CurFn->getName() 
||
- CGF.CGM.getContext().getTargetInfo().getCXXABI() !=
- CGF.CGM.getContext().getAuxTargetInfo()->getCXXABI());
+  // Ensure either we have different ABIs between host and device compilations,
+  // says host compilation following MSVC ABI but device compilation follows
+  // Itanium C++ ABI or, if they follow the same ABI, kernel names after
+  // mangling should be the same after name stubbing. The later checking is
+  // very important as the device kernel name being mangled in host-compilation
+  // is used to resolve the device binaries to be executed. Inconsistent naming
+  // result in undefined behavior. Even though we cannot check that naming
+  // directly between host- and device-compilations, the host- and
+  // device-mangling in host compilation could help catching certain ones.
+  assert((CGF.CGM.getContext().getAuxTargetInfo() &&
+  (CGF.CGM.getContext().getAuxTargetInfo()->getCXXABI() !=
+   CGF.CGM.getContext().getTargetInfo().getCXXABI())) ||
+ getDeviceStubName(getDeviceSideName(CGF.CurFuncDecl)) ==
+ CGF.CurFn->getName());
 
   EmittedKernels.push_back({CGF.CurFn, CGF.CurFuncDecl});
   if (CudaFeatureEnabled(CGM.getTarget().getSDKVersion(),
@@ -780,6 +792,12 @@
   return ModuleDtorFunc;
 }
 
+std::string CGNVCUDARuntime::getDeviceStubName(llvm::StringRef Name) const {
+  if (!CGM.getLangOpts().HIP)
+return Name;
+  return std::move((Name + ".stub").str());
+}
+
 CGCUDARuntime *CodeGen::CreateNVCUDARuntime(CodeGenModule &CGM) {
   return new CGNVCUDARuntime(CGM);
 }


Index: cfe/trunk/lib/CodeGen/CGCUDARuntime.h
===
--- cfe/trunk/lib/

[PATCH] D63164: [HIP] Add option to force lambda nameing following ODR in HIP/CUDA.

2019-06-17 Thread Michael Liao via Phabricator via cfe-commits

hliao added a comment.

ping again. not sure my explanation gives more details on why this patch is 
created.


Repository:
  rG LLVM Github Monorepo

CHANGES SINCE LAST ACTION
  https://reviews.llvm.org/D63164/new/

https://reviews.llvm.org/D63164



___
cfe-commits mailing list
cfe-commits@lists.llvm.org
https://lists.llvm.org/cgi-bin/mailman/listinfo/cfe-commits

[PATCH] D62603: [CUDA][HIP] Skip setting `externally_initialized` for static device variables.

2019-05-29 Thread Michael Liao via Phabricator via cfe-commits

hliao created this revision.
hliao added a reviewer: yaxunl.
Herald added a project: clang.
Herald added a subscriber: cfe-commits.

- By declaring device variables as `static`, we assume they won't be 
addressable from the host side. Thus, no `externally_initialized` is required.


Repository:
  rG LLVM Github Monorepo

https://reviews.llvm.org/D62603

Files:
  clang/lib/CodeGen/CodeGenModule.cpp
  clang/test/CodeGenCUDA/device-var-init.cu


Index: clang/test/CodeGenCUDA/device-var-init.cu
===
--- clang/test/CodeGenCUDA/device-var-init.cu
+++ clang/test/CodeGenCUDA/device-var-init.cu
@@ -33,6 +33,16 @@
 // DEVICE: @d_v_i = addrspace(1) externally_initialized global i32 1,
 // HOST:   @d_v_i = internal global i32 undef,
 
+// For `static` device variables, assume they won't be addressed from the host
+// side.
+static __device__ int d_s_v_i = 1;
+// DEVICE: @_ZL7d_s_v_i = internal addrspace(1) global i32 1,
+
+// Dummy function to keep static variables referenced.
+__device__ int foo() {
+  return d_s_v_i;
+}
+
 // trivial constructor -- allowed
 __device__ T d_t;
 // DEVICE: @d_t = addrspace(1) externally_initialized global %struct.T 
zeroinitializer
Index: clang/lib/CodeGen/CodeGenModule.cpp
===
--- clang/lib/CodeGen/CodeGenModule.cpp
+++ clang/lib/CodeGen/CodeGenModule.cpp
@@ -3869,7 +3869,8 @@
   // / cudaMemcpyToSymbol() / cudaMemcpyFromSymbol())."
   if (GV && LangOpts.CUDA) {
 if (LangOpts.CUDAIsDevice) {
-  if (D->hasAttr() || D->hasAttr())
+  if (Linkage != llvm::GlobalValue::InternalLinkage &&
+  (D->hasAttr() || D->hasAttr()))
 GV->setExternallyInitialized(true);
 } else {
   // Host-side shadows of external declarations of device-side


Index: clang/test/CodeGenCUDA/device-var-init.cu
===
--- clang/test/CodeGenCUDA/device-var-init.cu
+++ clang/test/CodeGenCUDA/device-var-init.cu
@@ -33,6 +33,16 @@
 // DEVICE: @d_v_i = addrspace(1) externally_initialized global i32 1,
 // HOST:   @d_v_i = internal global i32 undef,
 
+// For `static` device variables, assume they won't be addressed from the host
+// side.
+static __device__ int d_s_v_i = 1;
+// DEVICE: @_ZL7d_s_v_i = internal addrspace(1) global i32 1,
+
+// Dummy function to keep static variables referenced.
+__device__ int foo() {
+  return d_s_v_i;
+}
+
 // trivial constructor -- allowed
 __device__ T d_t;
 // DEVICE: @d_t = addrspace(1) externally_initialized global %struct.T zeroinitializer
Index: clang/lib/CodeGen/CodeGenModule.cpp
===
--- clang/lib/CodeGen/CodeGenModule.cpp
+++ clang/lib/CodeGen/CodeGenModule.cpp
@@ -3869,7 +3869,8 @@
   // / cudaMemcpyToSymbol() / cudaMemcpyFromSymbol())."
   if (GV && LangOpts.CUDA) {
 if (LangOpts.CUDAIsDevice) {
-  if (D->hasAttr() || D->hasAttr())
+  if (Linkage != llvm::GlobalValue::InternalLinkage &&
+  (D->hasAttr() || D->hasAttr()))
 GV->setExternallyInitialized(true);
 } else {
   // Host-side shadows of external declarations of device-side
___
cfe-commits mailing list
cfe-commits@lists.llvm.org
https://lists.llvm.org/cgi-bin/mailman/listinfo/cfe-commits

[PATCH] D62603: [CUDA][HIP] Skip setting `externally_initialized` for static device variables.

2019-05-29 Thread Michael Liao via Phabricator via cfe-commits

hliao added a comment.

thanks, but that `static __device__` variable won't have shadow in host anymore.


Repository:
  rG LLVM Github Monorepo

CHANGES SINCE LAST ACTION
  https://reviews.llvm.org/D62603/new/

https://reviews.llvm.org/D62603



___
cfe-commits mailing list
cfe-commits@lists.llvm.org
https://lists.llvm.org/cgi-bin/mailman/listinfo/cfe-commits

[PATCH] D62603: [CUDA][HIP] Skip setting `externally_initialized` for static device variables.

2019-05-29 Thread Michael Liao via Phabricator via cfe-commits

This revision was automatically updated to reflect the committed changes.
Closed by commit rC361994: [CUDA][HIP] Skip setting `externally_initialized` 
for static device variables. (authored by hliao, committed by ).

Changed prior to commit:
  https://reviews.llvm.org/D62603?vs=201938&id=201975#toc

Repository:
  rC Clang

CHANGES SINCE LAST ACTION
  https://reviews.llvm.org/D62603/new/

https://reviews.llvm.org/D62603

Files:
  lib/CodeGen/CodeGenModule.cpp
  test/CodeGenCUDA/device-var-init.cu


Index: lib/CodeGen/CodeGenModule.cpp
===
--- lib/CodeGen/CodeGenModule.cpp
+++ lib/CodeGen/CodeGenModule.cpp
@@ -3869,7 +3869,8 @@
   // / cudaMemcpyToSymbol() / cudaMemcpyFromSymbol())."
   if (GV && LangOpts.CUDA) {
 if (LangOpts.CUDAIsDevice) {
-  if (D->hasAttr() || D->hasAttr())
+  if (Linkage != llvm::GlobalValue::InternalLinkage &&
+  (D->hasAttr() || D->hasAttr()))
 GV->setExternallyInitialized(true);
 } else {
   // Host-side shadows of external declarations of device-side
Index: test/CodeGenCUDA/device-var-init.cu
===
--- test/CodeGenCUDA/device-var-init.cu
+++ test/CodeGenCUDA/device-var-init.cu
@@ -33,6 +33,16 @@
 // DEVICE: @d_v_i = addrspace(1) externally_initialized global i32 1,
 // HOST:   @d_v_i = internal global i32 undef,
 
+// For `static` device variables, assume they won't be addressed from the host
+// side.
+static __device__ int d_s_v_i = 1;
+// DEVICE: @_ZL7d_s_v_i = internal addrspace(1) global i32 1,
+
+// Dummy function to keep static variables referenced.
+__device__ int foo() {
+  return d_s_v_i;
+}
+
 // trivial constructor -- allowed
 __device__ T d_t;
 // DEVICE: @d_t = addrspace(1) externally_initialized global %struct.T 
zeroinitializer


Index: lib/CodeGen/CodeGenModule.cpp
===
--- lib/CodeGen/CodeGenModule.cpp
+++ lib/CodeGen/CodeGenModule.cpp
@@ -3869,7 +3869,8 @@
   // / cudaMemcpyToSymbol() / cudaMemcpyFromSymbol())."
   if (GV && LangOpts.CUDA) {
 if (LangOpts.CUDAIsDevice) {
-  if (D->hasAttr() || D->hasAttr())
+  if (Linkage != llvm::GlobalValue::InternalLinkage &&
+  (D->hasAttr() || D->hasAttr()))
 GV->setExternallyInitialized(true);
 } else {
   // Host-side shadows of external declarations of device-side
Index: test/CodeGenCUDA/device-var-init.cu
===
--- test/CodeGenCUDA/device-var-init.cu
+++ test/CodeGenCUDA/device-var-init.cu
@@ -33,6 +33,16 @@
 // DEVICE: @d_v_i = addrspace(1) externally_initialized global i32 1,
 // HOST:   @d_v_i = internal global i32 undef,
 
+// For `static` device variables, assume they won't be addressed from the host
+// side.
+static __device__ int d_s_v_i = 1;
+// DEVICE: @_ZL7d_s_v_i = internal addrspace(1) global i32 1,
+
+// Dummy function to keep static variables referenced.
+__device__ int foo() {
+  return d_s_v_i;
+}
+
 // trivial constructor -- allowed
 __device__ T d_t;
 // DEVICE: @d_t = addrspace(1) externally_initialized global %struct.T zeroinitializer
___
cfe-commits mailing list
cfe-commits@lists.llvm.org
https://lists.llvm.org/cgi-bin/mailman/listinfo/cfe-commits

[PATCH] D62603: [CUDA][HIP] Skip setting `externally_initialized` for static device variables.

2019-05-29 Thread Michael Liao via Phabricator via cfe-commits

hliao added a comment.

In D62603#1521503 , @tra wrote:

> In D62603#1521484 , @hliao wrote:
>
> > thanks, but that `static __device__` variable won't have shadow in host 
> > anymore.
>
>
> Why not? Your change only changes whether `externally_initialized` is applied 
> to the variable during device-side compilation. It does not change what 
> happens on the host side. 
>  AFAICT, it will still be generated on the host side and the host side should 
> still be able to take its address.
>  NVCC also allows that: https://godbolt.org/z/t78RvM




In D62603#1521507 , @tra wrote:

> Note for the future -- it would be great if we could finish discussing the 
> patch before landing it. 
>  I would still like to see the host-side test.


Sorry, will follow that rule. Yes, that patch only changes the device-side. 
But, for host-side, even that variable is declared as `static` as well, but 
there's no reference to it. clang just skip emitting it.


Repository:
  rC Clang

CHANGES SINCE LAST ACTION
  https://reviews.llvm.org/D62603/new/

https://reviews.llvm.org/D62603



___
cfe-commits mailing list
cfe-commits@lists.llvm.org
https://lists.llvm.org/cgi-bin/mailman/listinfo/cfe-commits

[PATCH] D62603: [CUDA][HIP] Skip setting `externally_initialized` for static device variables.

2019-05-29 Thread Michael Liao via Phabricator via cfe-commits

hliao added a comment.

In D62603#1521503 , @tra wrote:

> In D62603#1521484 , @hliao wrote:
>
> > thanks, but that `static __device__` variable won't have shadow in host 
> > anymore.
>
>
> Why not? Your change only changes whether `externally_initialized` is applied 
> to the variable during device-side compilation. It does not change what 
> happens on the host side. 
>  AFAICT, it will still be generated on the host side and the host side should 
> still be able to take its address.
>  NVCC also allows that: https://godbolt.org/z/t78RvM


BTW, that code posted looks quite weird to me, how the code could make sense by 
return a pointer of device variable? or a pointer of shadow host variable?


Repository:
  rC Clang

CHANGES SINCE LAST ACTION
  https://reviews.llvm.org/D62603/new/

https://reviews.llvm.org/D62603



___
cfe-commits mailing list
cfe-commits@lists.llvm.org
https://lists.llvm.org/cgi-bin/mailman/listinfo/cfe-commits

[PATCH] D62603: [CUDA][HIP] Skip setting `externally_initialized` for static device variables.

2019-05-29 Thread Michael Liao via Phabricator via cfe-commits

hliao added a comment.

In D62603#1521788 , @tra wrote:

> >> NVCC also allows that: https://godbolt.org/z/t78RvM
> > 
> > BTW, that code posted looks quite weird to me, how the code could make 
> > sense by return a pointer of device variable? or a pointer of shadow host 
> > variable?
>
> Magic. :-)
>  More practical example would be something like this:
>
>   __device__ int array[10];
>  
>   __host__ func() {
> cudaMemset(array, 0, sizeof(array));
>   }
>
>
> cudaMemset is a host function and it needs to use something that exists on 
> the host side as the first argument.
>  In order to deal with this, compiler:
>
> - creates uninitialized `int array[10]` on the host side. This allows ising 
> sizeof(array) on the host size.
> - registers its address/size with CUDA runtime. This allows passing address 
> of host-side shadow array to various CUDA runtime routines. The runtime knows 
> what it has on device side and maps shadow's address to the real device 
> address. This way CUDA runtime functions can make static device-side data 
> accessible without having to explicitly figure out their device-side address.


that should assume that variable is not declared with `static`. that's also the 
motivation of this patch.


Repository:
  rC Clang

CHANGES SINCE LAST ACTION
  https://reviews.llvm.org/D62603/new/

https://reviews.llvm.org/D62603



___
cfe-commits mailing list
cfe-commits@lists.llvm.org
https://lists.llvm.org/cgi-bin/mailman/listinfo/cfe-commits

[PATCH] D62603: [CUDA][HIP] Skip setting `externally_initialized` for static device variables.

2019-05-29 Thread Michael Liao via Phabricator via cfe-commits

hliao added a comment.

In D62603#1521832 , @tra wrote:

> In D62603#1521792 , @hliao wrote:
>
> > that should assume that variable is not declared with `static`. that's also 
> > the motivation of this patch.
>
>
> cppreference defines internal linkage as 'The name can be referred to from 
> all scopes in the current translation unit.'
>  The current translation unit in CUDA context gets a bit murky. On one hand 
> host and device are compiled separately, and may conceivably be considered 
> separate TUs. On the other hand, the fact that we mix host and device code in 
> the same source file implies tight coupling and the users do expect them to 
> be treated as if all host and device code in the source file is in the same 
> TU. E.g. you may have a kernel in an anonymous namespace yet you do want to 
> be able to launch it from the host side.
>
> I think `static __device__` globals would fall into the same category -- 
> nominally they should not be visible outside of device-side object file, but 
> in practice we do need to make them visible from the host side of the same TU.


That's true if there's a reference on the host side. E.g, if I modify `foo` 
function as both __host__ and __device, that host-side shadow could be 
generated (with 'undef` initializer as expected.)


Repository:
  rC Clang

CHANGES SINCE LAST ACTION
  https://reviews.llvm.org/D62603/new/

https://reviews.llvm.org/D62603



___
cfe-commits mailing list
cfe-commits@lists.llvm.org
https://lists.llvm.org/cgi-bin/mailman/listinfo/cfe-commits

[PATCH] D67730: [CUDA][HIP] Fix typo in `BestViableFunction`

2019-09-18 Thread Michael Liao via Phabricator via cfe-commits

hliao created this revision.
hliao added a reviewer: tra.
Herald added a project: clang.
Herald added a subscriber: cfe-commits.

- Should consider viable ones only when checking SameSide candidates.
- Replace erasing with clearing viable flag to reduce data moving/copying.
- Add one and revise another one as the diagnostic message are more relevant 
compared to previous one.


Repository:
  rG LLVM Github Monorepo

https://reviews.llvm.org/D67730

Files:
  clang/lib/Sema/SemaOverload.cpp
  clang/test/SemaCUDA/function-overload.cu
  clang/test/SemaCUDA/implicit-member-target-collision-cxx11.cu


Index: clang/test/SemaCUDA/implicit-member-target-collision-cxx11.cu
===
--- clang/test/SemaCUDA/implicit-member-target-collision-cxx11.cu
+++ clang/test/SemaCUDA/implicit-member-target-collision-cxx11.cu
@@ -74,11 +74,13 @@
 struct C4_with_collision : A4_with_host_copy_ctor, B4_with_device_copy_ctor {
 };
 
-// expected-note@-3 {{copy constructor of 'C4_with_collision' is implicitly 
deleted because base class 'B4_with_device_copy_ctor' has no copy constructor}}
+// expected-note@-3 {{candidate constructor (the implicit copy constructor) 
not viable: call to invalid function from __host__ function}}
+// expected-note@-4 {{implicit copy constructor inferred target collision: 
call to both __host__ and __device__ members}}
+// expected-note@-5 {{candidate constructor (the implicit default constructor) 
not viable: requires 0 arguments, but 1 was provided}}
 
 void hostfoo4() {
   C4_with_collision c;
-  C4_with_collision c2 = c; // expected-error {{call to implicitly-deleted 
copy constructor of 'C4_with_collision'}}
+  C4_with_collision c2 = c; // expected-error {{no matching constructor for 
initialization of 'C4_with_collision'}}
 }
 
 
//--
Index: clang/test/SemaCUDA/function-overload.cu
===
--- clang/test/SemaCUDA/function-overload.cu
+++ clang/test/SemaCUDA/function-overload.cu
@@ -1,8 +1,8 @@
 // REQUIRES: x86-registered-target
 // REQUIRES: nvptx-registered-target
 
-// RUN: %clang_cc1 -triple x86_64-unknown-linux-gnu -fsyntax-only -verify %s
-// RUN: %clang_cc1 -triple nvptx64-nvidia-cuda -fsyntax-only -fcuda-is-device 
-verify %s
+// RUN: %clang_cc1 -std=c++11 -triple x86_64-unknown-linux-gnu -fsyntax-only 
-verify %s
+// RUN: %clang_cc1 -std=c++11 -triple nvptx64-nvidia-cuda -fsyntax-only 
-fcuda-is-device -verify %s
 
 #include "Inputs/cuda.h"
 
@@ -402,3 +402,20 @@
 __device__ void test_device_template_overload() {
   template_overload(1); // OK. Attribute-based overloading picks __device__ 
variant.
 }
+
+// Two irrelevant classes with `operator-` defined. One of them is device only.
+struct C1 { int m; };
+struct C2 { int *m; };
+__device__
+int operator-(const C1 &x, const C1 &y) { return x.m - y.m; }
+int operator-(const C2 &x, const C2 &y) { return x.m - y.m; }
+
+template 
+constexpr int constexpr_overload(const T &x, const T &y) {
+  return x - y;
+}
+
+// Verify that function overloading doesn't prune candidate wrongly.
+int test_constexpr_overload(C2 x, C2 y) {
+  return constexpr_overload(x, y);
+}
Index: clang/lib/Sema/SemaOverload.cpp
===
--- clang/lib/Sema/SemaOverload.cpp
+++ clang/lib/Sema/SemaOverload.cpp
@@ -9422,17 +9422,19 @@
 const FunctionDecl *Caller = dyn_cast(S.CurContext);
 bool ContainsSameSideCandidate =
 llvm::any_of(Candidates, [&](OverloadCandidate *Cand) {
-  return Cand->Function &&
+  // Consider viable function only.
+  return Cand->Viable && Cand->Function &&
  S.IdentifyCUDAPreference(Caller, Cand->Function) ==
  Sema::CFP_SameSide;
 });
 if (ContainsSameSideCandidate) {
-  auto IsWrongSideCandidate = [&](OverloadCandidate *Cand) {
-return Cand->Function &&
-   S.IdentifyCUDAPreference(Caller, Cand->Function) ==
-   Sema::CFP_WrongSide;
-  };
-  llvm::erase_if(Candidates, IsWrongSideCandidate);
+  // Clear viable flag for WrongSide varible candidates.
+  llvm::for_each(Candidates, [&](OverloadCandidate *Cand) {
+if (Cand->Viable && Cand->Function &&
+S.IdentifyCUDAPreference(Caller, Cand->Function) ==
+Sema::CFP_WrongSide)
+  Cand->Viable = false;
+  });
 }
   }
 


Index: clang/test/SemaCUDA/implicit-member-target-collision-cxx11.cu
===
--- clang/test/SemaCUDA/implicit-member-target-collision-cxx11.cu
+++ clang/test/SemaCUDA/implicit-member-target-collision-cxx11.cu
@@ -74,11 +74,13 @@
 struct C4_with_collision : A4_with_host_copy_ctor, B4_with_device_copy_ctor {
 };
 
-// expected-note@-3 {{copy constructor of 'C4_with_collision' is implicitly dele

[PATCH] D67730: [CUDA][HIP] Fix typo in `BestViableFunction`

2019-09-19 Thread Michael Liao via Phabricator via cfe-commits

hliao marked 3 inline comments as done.
hliao added a comment.

r372318 with test case revised following suggestion.


Repository:
  rG LLVM Github Monorepo

CHANGES SINCE LAST ACTION
  https://reviews.llvm.org/D67730/new/

https://reviews.llvm.org/D67730



___
cfe-commits mailing list
cfe-commits@lists.llvm.org
https://lists.llvm.org/cgi-bin/mailman/listinfo/cfe-commits

[PATCH] D67730: [CUDA][HIP] Fix typo in `BestViableFunction`

2019-09-19 Thread Michael Liao via Phabricator via cfe-commits

This revision was automatically updated to reflect the committed changes.
Closed by commit rL372318: [CUDA][HIP] Fix typo in `BestViableFunction` 
(authored by hliao, committed by ).
Herald added a project: LLVM.
Herald added a subscriber: llvm-commits.

Changed prior to commit:
  https://reviews.llvm.org/D67730?vs=220738&id=220854#toc

Repository:
  rL LLVM

CHANGES SINCE LAST ACTION
  https://reviews.llvm.org/D67730/new/

https://reviews.llvm.org/D67730

Files:
  cfe/trunk/lib/Sema/SemaOverload.cpp
  cfe/trunk/test/SemaCUDA/function-overload.cu
  cfe/trunk/test/SemaCUDA/implicit-member-target-collision-cxx11.cu


Index: cfe/trunk/test/SemaCUDA/function-overload.cu
===
--- cfe/trunk/test/SemaCUDA/function-overload.cu
+++ cfe/trunk/test/SemaCUDA/function-overload.cu
@@ -402,3 +402,20 @@
 __device__ void test_device_template_overload() {
   template_overload(1); // OK. Attribute-based overloading picks __device__ 
variant.
 }
+
+// Two classes with `operator-` defined. One of them is device only.
+struct C1;
+struct C2;
+__device__
+int operator-(const C1 &x, const C1 &y);
+int operator-(const C2 &x, const C2 &y);
+
+template 
+__host__ __device__ int constexpr_overload(const T &x, const T &y) {
+  return x - y;
+}
+
+// Verify that function overloading doesn't prune candidate wrongly.
+int test_constexpr_overload(C2 &x, C2 &y) {
+  return constexpr_overload(x, y);
+}
Index: cfe/trunk/test/SemaCUDA/implicit-member-target-collision-cxx11.cu
===
--- cfe/trunk/test/SemaCUDA/implicit-member-target-collision-cxx11.cu
+++ cfe/trunk/test/SemaCUDA/implicit-member-target-collision-cxx11.cu
@@ -74,11 +74,13 @@
 struct C4_with_collision : A4_with_host_copy_ctor, B4_with_device_copy_ctor {
 };
 
-// expected-note@-3 {{copy constructor of 'C4_with_collision' is implicitly 
deleted because base class 'B4_with_device_copy_ctor' has no copy constructor}}
+// expected-note@-3 {{candidate constructor (the implicit copy constructor) 
not viable: call to invalid function from __host__ function}}
+// expected-note@-4 {{implicit copy constructor inferred target collision: 
call to both __host__ and __device__ members}}
+// expected-note@-5 {{candidate constructor (the implicit default constructor) 
not viable: requires 0 arguments, but 1 was provided}}
 
 void hostfoo4() {
   C4_with_collision c;
-  C4_with_collision c2 = c; // expected-error {{call to implicitly-deleted 
copy constructor of 'C4_with_collision'}}
+  C4_with_collision c2 = c; // expected-error {{no matching constructor for 
initialization of 'C4_with_collision'}}
 }
 
 
//--
Index: cfe/trunk/lib/Sema/SemaOverload.cpp
===
--- cfe/trunk/lib/Sema/SemaOverload.cpp
+++ cfe/trunk/lib/Sema/SemaOverload.cpp
@@ -9422,17 +9422,19 @@
 const FunctionDecl *Caller = dyn_cast(S.CurContext);
 bool ContainsSameSideCandidate =
 llvm::any_of(Candidates, [&](OverloadCandidate *Cand) {
-  return Cand->Function &&
+  // Consider viable function only.
+  return Cand->Viable && Cand->Function &&
  S.IdentifyCUDAPreference(Caller, Cand->Function) ==
  Sema::CFP_SameSide;
 });
 if (ContainsSameSideCandidate) {
-  auto IsWrongSideCandidate = [&](OverloadCandidate *Cand) {
-return Cand->Function &&
-   S.IdentifyCUDAPreference(Caller, Cand->Function) ==
-   Sema::CFP_WrongSide;
-  };
-  llvm::erase_if(Candidates, IsWrongSideCandidate);
+  // Clear viable flag for WrongSide varible candidates.
+  llvm::for_each(Candidates, [&](OverloadCandidate *Cand) {
+if (Cand->Viable && Cand->Function &&
+S.IdentifyCUDAPreference(Caller, Cand->Function) ==
+Sema::CFP_WrongSide)
+  Cand->Viable = false;
+  });
 }
   }
 


Index: cfe/trunk/test/SemaCUDA/function-overload.cu
===
--- cfe/trunk/test/SemaCUDA/function-overload.cu
+++ cfe/trunk/test/SemaCUDA/function-overload.cu
@@ -402,3 +402,20 @@
 __device__ void test_device_template_overload() {
   template_overload(1); // OK. Attribute-based overloading picks __device__ variant.
 }
+
+// Two classes with `operator-` defined. One of them is device only.
+struct C1;
+struct C2;
+__device__
+int operator-(const C1 &x, const C1 &y);
+int operator-(const C2 &x, const C2 &y);
+
+template 
+__host__ __device__ int constexpr_overload(const T &x, const T &y) {
+  return x - y;
+}
+
+// Verify that function overloading doesn't prune candidate wrongly.
+int test_constexpr_overload(C2 &x, C2 &y) {
+  return constexpr_overload(x, y);
+}
Index: cfe/trunk/test/SemaCUDA/implicit-member-target-collision-cxx11.cu
===

[PATCH] D67924: [Sema] Fix the atomic expr rebuilding order.

2019-09-23 Thread Michael Liao via Phabricator via cfe-commits

hliao added a comment.

The current BuildAtomicExpr expects the arguments to be in the API order 
instead of the AST order. If RebuildAtomicExpr uses the same BuildAtomicExpr, 
it needs to ensure the order of arguments are in API order; otherwise, 
arguments (especially the one with memory order) will be misplaced.


Repository:
  rG LLVM Github Monorepo

CHANGES SINCE LAST ACTION
  https://reviews.llvm.org/D67924/new/

https://reviews.llvm.org/D67924



___
cfe-commits mailing list
cfe-commits@lists.llvm.org
https://lists.llvm.org/cgi-bin/mailman/listinfo/cfe-commits

[PATCH] D67924: [Sema] Fix the atomic expr rebuilding order.

2019-09-23 Thread Michael Liao via Phabricator via cfe-commits

hliao created this revision.
hliao added a reviewer: erichkeane.
Herald added subscribers: cfe-commits, jfb.
Herald added a project: clang.
hliao added a comment.

The current BuildAtomicExpr expects the arguments to be in the API order 
instead of the AST order. If RebuildAtomicExpr uses the same BuildAtomicExpr, 
it needs to ensure the order of arguments are in API order; otherwise, 
arguments (especially the one with memory order) will be misplaced.


- Rearrange the atomic expr order to the API order when rebuilding atomic expr 
during template instantiation.


Repository:
  rG LLVM Github Monorepo

https://reviews.llvm.org/D67924

Files:
  clang/include/clang/Sema/Sema.h
  clang/lib/Sema/SemaChecking.cpp
  clang/lib/Sema/TreeTransform.h
  clang/test/AST/atomic-expr.cpp

Index: clang/test/AST/atomic-expr.cpp
===
--- clang/test/AST/atomic-expr.cpp
+++ clang/test/AST/atomic-expr.cpp
@@ -3,7 +3,7 @@
 template
 void pr43370() {
   int arr[2];
-  __atomic_store_n(arr, 0, 0);
+  __atomic_store_n(arr, 0, 5);
 }
 void useage(){
   pr43370();
@@ -13,7 +13,13 @@
 // CHECK: AtomicExpr
 // CHECK-NEXT: ImplicitCastExpr
 // CHECK-SAME: 
+// CHECK-NEXT: DeclRefExpr 0x{{[0-9a-f]+}} <{{[^:]+}}:20> 'int [2]' lvalue Var 0x{{[0-9a-f]+}} 'arr' 'int [2]'
+// CHECK-NEXT: IntegerLiteral 0x{{[0-9a-f]+}} <{{[^:]+}}:28> 'int' 5
+// CHECK-NEXT: IntegerLiteral 0x{{[0-9a-f]+}} <{{[^:]+}}:25> 'int' 0
 // CHECK:FunctionDecl 0x{{[0-9a-f]+}}  line:4:6 used pr43370
 // CHECK: AtomicExpr
 // CHECK-NEXT: ImplicitCastExpr
 // CHECK-SAME: 
+// CHECK-NEXT: DeclRefExpr 0x{{[0-9a-f]+}} <{{[^:]+}}:20> 'int [2]' lvalue Var 0x{{[0-9a-f]+}} 'arr' 'int [2]'
+// CHECK-NEXT: IntegerLiteral 0x{{[0-9a-f]+}} <{{[^:]+}}:28> 'int' 5
+// CHECK-NEXT: IntegerLiteral 0x{{[0-9a-f]+}} <{{[^:]+}}:25> 'int' 0
Index: clang/lib/Sema/TreeTransform.h
===
--- clang/lib/Sema/TreeTransform.h
+++ clang/lib/Sema/TreeTransform.h
@@ -3315,7 +3315,8 @@
 // Use this for all of the locations, since we don't know the difference
 // between the call and the expr at this point.
 SourceRange Range{BuiltinLoc, RParenLoc};
-return getSema().BuildAtomicExpr(Range, Range, RParenLoc, SubExprs, Op);
+return getSema().BuildAtomicExpr(Range, Range, RParenLoc, SubExprs, Op,
+ true);
   }
 
 private:
Index: clang/lib/Sema/SemaChecking.cpp
===
--- clang/lib/Sema/SemaChecking.cpp
+++ clang/lib/Sema/SemaChecking.cpp
@@ -4473,7 +4473,8 @@
 
 ExprResult Sema::BuildAtomicExpr(SourceRange CallRange, SourceRange ExprRange,
  SourceLocation RParenLoc, MultiExprArg Args,
- AtomicExpr::AtomicOp Op) {
+ AtomicExpr::AtomicOp Op,
+ bool NeedRearrangeArgs) {
   // All the non-OpenCL operations take one of the following forms.
   // The OpenCL operations take the __c11 forms with one extra argument for
   // synchronization scope.
@@ -4754,19 +4755,56 @@
 IsPassedByAddress = true;
   }
 
+  SmallVector ReArgs;
+  if (NeedRearrangeArgs) {
+ReArgs.push_back(Args[0]);
+switch (Form) {
+case Init:
+case Load:
+  ReArgs.push_back(Args[1]); // Val1/Order
+  break;
+case LoadCopy:
+case Copy:
+case Arithmetic:
+case Xchg:
+  ReArgs.push_back(Args[2]); // Val1
+  ReArgs.push_back(Args[1]); // Order
+  break;
+case GNUXchg:
+  ReArgs.push_back(Args[2]); // Val1
+  ReArgs.push_back(Args[3]); // Val2
+  ReArgs.push_back(Args[1]); // Order
+  break;
+case C11CmpXchg:
+  ReArgs.push_back(Args[2]); // Val1
+  ReArgs.push_back(Args[4]); // Val2
+  ReArgs.push_back(Args[1]); // Order
+  ReArgs.push_back(Args[3]); // OrderFail
+  break;
+case GNUCmpXchg:
+  ReArgs.push_back(Args[2]); // Val1
+  ReArgs.push_back(Args[4]); // Val2
+  ReArgs.push_back(Args[5]); // Weak
+  ReArgs.push_back(Args[1]); // Order
+  ReArgs.push_back(Args[3]); // OrderFail
+  break;
+}
+  } else
+ReArgs.append(Args.begin(), Args.end());
+
   // The first argument's non-CV pointer type is used to deduce the type of
   // subsequent arguments, except for:
   //  - weak flag (always converted to bool)
   //  - memory order (always converted to int)
   //  - scope  (always converted to int)
-  for (unsigned i = 0; i != Args.size(); ++i) {
+  for (unsigned i = 0; i != ReArgs.size(); ++i) {
 QualType Ty;
 if (i < NumVals[Form] + 1) {
   switch (i) {
   case 0:
 // The first argument is always a pointer. It has a fixed type.
 // It is always dereferenced, a nullptr is undefined.
-CheckNonNullArgument(*this, Args[i], ExprRange.getBegin());
+CheckNonNullArgument(*this, ReArgs[i], ExprRange.getBegin());

[PATCH] D67924: [Sema] Fix the atomic expr rebuilding order.

2019-09-23 Thread Michael Liao via Phabricator via cfe-commits

hliao added a comment.

In D67924#1679409 , @erichkeane wrote:

> Yikes, good catch!
>
> Would we be better off instead to just modify how the other switch loads the 
> value?  Presumably something like, "if (NeedsRearrangeArgs) 
> SubExprs.append(Args.begin(), Args.end()); else /*the switch*/.


Loop from L4762 will check "value" arguments assuming the API order as well. 
That's why arguments are arranged so that the value checking logic also check 
the correct arguments.


Repository:
  rG LLVM Github Monorepo

CHANGES SINCE LAST ACTION
  https://reviews.llvm.org/D67924/new/

https://reviews.llvm.org/D67924



___
cfe-commits mailing list
cfe-commits@lists.llvm.org
https://lists.llvm.org/cgi-bin/mailman/listinfo/cfe-commits

[PATCH] D67924: [Sema] Fix the atomic expr rebuilding order.

2019-09-23 Thread Michael Liao via Phabricator via cfe-commits

hliao updated this revision to Diff 221365.
hliao added a comment.

Add parameter name for that default argument.


Repository:
  rG LLVM Github Monorepo

CHANGES SINCE LAST ACTION
  https://reviews.llvm.org/D67924/new/

https://reviews.llvm.org/D67924

Files:
  clang/include/clang/Sema/Sema.h
  clang/lib/Sema/SemaChecking.cpp
  clang/lib/Sema/TreeTransform.h
  clang/test/AST/atomic-expr.cpp

Index: clang/test/AST/atomic-expr.cpp
===
--- clang/test/AST/atomic-expr.cpp
+++ clang/test/AST/atomic-expr.cpp
@@ -3,7 +3,7 @@
 template
 void pr43370() {
   int arr[2];
-  __atomic_store_n(arr, 0, 0);
+  __atomic_store_n(arr, 0, 5);
 }
 void useage(){
   pr43370();
@@ -13,7 +13,13 @@
 // CHECK: AtomicExpr
 // CHECK-NEXT: ImplicitCastExpr
 // CHECK-SAME: 
+// CHECK-NEXT: DeclRefExpr 0x{{[0-9a-f]+}} <{{[^:]+}}:20> 'int [2]' lvalue Var 0x{{[0-9a-f]+}} 'arr' 'int [2]'
+// CHECK-NEXT: IntegerLiteral 0x{{[0-9a-f]+}} <{{[^:]+}}:28> 'int' 5
+// CHECK-NEXT: IntegerLiteral 0x{{[0-9a-f]+}} <{{[^:]+}}:25> 'int' 0
 // CHECK:FunctionDecl 0x{{[0-9a-f]+}}  line:4:6 used pr43370
 // CHECK: AtomicExpr
 // CHECK-NEXT: ImplicitCastExpr
 // CHECK-SAME: 
+// CHECK-NEXT: DeclRefExpr 0x{{[0-9a-f]+}} <{{[^:]+}}:20> 'int [2]' lvalue Var 0x{{[0-9a-f]+}} 'arr' 'int [2]'
+// CHECK-NEXT: IntegerLiteral 0x{{[0-9a-f]+}} <{{[^:]+}}:28> 'int' 5
+// CHECK-NEXT: IntegerLiteral 0x{{[0-9a-f]+}} <{{[^:]+}}:25> 'int' 0
Index: clang/lib/Sema/TreeTransform.h
===
--- clang/lib/Sema/TreeTransform.h
+++ clang/lib/Sema/TreeTransform.h
@@ -3315,7 +3315,8 @@
 // Use this for all of the locations, since we don't know the difference
 // between the call and the expr at this point.
 SourceRange Range{BuiltinLoc, RParenLoc};
-return getSema().BuildAtomicExpr(Range, Range, RParenLoc, SubExprs, Op);
+return getSema().BuildAtomicExpr(Range, Range, RParenLoc, SubExprs, Op,
+ /*NeedRearrangeArgs*/ true);
   }
 
 private:
Index: clang/lib/Sema/SemaChecking.cpp
===
--- clang/lib/Sema/SemaChecking.cpp
+++ clang/lib/Sema/SemaChecking.cpp
@@ -4473,7 +4473,8 @@
 
 ExprResult Sema::BuildAtomicExpr(SourceRange CallRange, SourceRange ExprRange,
  SourceLocation RParenLoc, MultiExprArg Args,
- AtomicExpr::AtomicOp Op) {
+ AtomicExpr::AtomicOp Op,
+ bool NeedRearrangeArgs) {
   // All the non-OpenCL operations take one of the following forms.
   // The OpenCL operations take the __c11 forms with one extra argument for
   // synchronization scope.
@@ -4754,19 +4755,56 @@
 IsPassedByAddress = true;
   }
 
+  SmallVector ReArgs;
+  if (NeedRearrangeArgs) {
+ReArgs.push_back(Args[0]);
+switch (Form) {
+case Init:
+case Load:
+  ReArgs.push_back(Args[1]); // Val1/Order
+  break;
+case LoadCopy:
+case Copy:
+case Arithmetic:
+case Xchg:
+  ReArgs.push_back(Args[2]); // Val1
+  ReArgs.push_back(Args[1]); // Order
+  break;
+case GNUXchg:
+  ReArgs.push_back(Args[2]); // Val1
+  ReArgs.push_back(Args[3]); // Val2
+  ReArgs.push_back(Args[1]); // Order
+  break;
+case C11CmpXchg:
+  ReArgs.push_back(Args[2]); // Val1
+  ReArgs.push_back(Args[4]); // Val2
+  ReArgs.push_back(Args[1]); // Order
+  ReArgs.push_back(Args[3]); // OrderFail
+  break;
+case GNUCmpXchg:
+  ReArgs.push_back(Args[2]); // Val1
+  ReArgs.push_back(Args[4]); // Val2
+  ReArgs.push_back(Args[5]); // Weak
+  ReArgs.push_back(Args[1]); // Order
+  ReArgs.push_back(Args[3]); // OrderFail
+  break;
+}
+  } else
+ReArgs.append(Args.begin(), Args.end());
+
   // The first argument's non-CV pointer type is used to deduce the type of
   // subsequent arguments, except for:
   //  - weak flag (always converted to bool)
   //  - memory order (always converted to int)
   //  - scope  (always converted to int)
-  for (unsigned i = 0; i != Args.size(); ++i) {
+  for (unsigned i = 0; i != ReArgs.size(); ++i) {
 QualType Ty;
 if (i < NumVals[Form] + 1) {
   switch (i) {
   case 0:
 // The first argument is always a pointer. It has a fixed type.
 // It is always dereferenced, a nullptr is undefined.
-CheckNonNullArgument(*this, Args[i], ExprRange.getBegin());
+CheckNonNullArgument(*this, ReArgs[i], ExprRange.getBegin());
 // Nothing else to do: we already know all we want about this pointer.
 continue;
   case 1:
@@ -4780,12 +4818,12 @@
 else if (Form == Copy || Form == Xchg) {
   if (IsPassedByAddress)
 // The value pointer is always dereferenced, a nullptr is undefined.
-CheckNonNullArgument(*this, Args[i], ExprRange.getBegin()

[PATCH] D67924: [Sema] Fix the atomic expr rebuilding order.

2019-09-23 Thread Michael Liao via Phabricator via cfe-commits

hliao updated this revision to Diff 221369.
hliao added a comment.

revised


Repository:
  rG LLVM Github Monorepo

CHANGES SINCE LAST ACTION
  https://reviews.llvm.org/D67924/new/

https://reviews.llvm.org/D67924

Files:
  clang/include/clang/Sema/Sema.h
  clang/lib/Sema/SemaChecking.cpp
  clang/lib/Sema/TreeTransform.h
  clang/test/AST/atomic-expr.cpp

Index: clang/test/AST/atomic-expr.cpp
===
--- clang/test/AST/atomic-expr.cpp
+++ clang/test/AST/atomic-expr.cpp
@@ -3,7 +3,7 @@
 template
 void pr43370() {
   int arr[2];
-  __atomic_store_n(arr, 0, 0);
+  __atomic_store_n(arr, 0, 5);
 }
 void useage(){
   pr43370();
@@ -13,7 +13,13 @@
 // CHECK: AtomicExpr
 // CHECK-NEXT: ImplicitCastExpr
 // CHECK-SAME: 
+// CHECK-NEXT: DeclRefExpr 0x{{[0-9a-f]+}} <{{[^:]+}}:20> 'int [2]' lvalue Var 0x{{[0-9a-f]+}} 'arr' 'int [2]'
+// CHECK-NEXT: IntegerLiteral 0x{{[0-9a-f]+}} <{{[^:]+}}:28> 'int' 5
+// CHECK-NEXT: IntegerLiteral 0x{{[0-9a-f]+}} <{{[^:]+}}:25> 'int' 0
 // CHECK:FunctionDecl 0x{{[0-9a-f]+}}  line:4:6 used pr43370
 // CHECK: AtomicExpr
 // CHECK-NEXT: ImplicitCastExpr
 // CHECK-SAME: 
+// CHECK-NEXT: DeclRefExpr 0x{{[0-9a-f]+}} <{{[^:]+}}:20> 'int [2]' lvalue Var 0x{{[0-9a-f]+}} 'arr' 'int [2]'
+// CHECK-NEXT: IntegerLiteral 0x{{[0-9a-f]+}} <{{[^:]+}}:28> 'int' 5
+// CHECK-NEXT: IntegerLiteral 0x{{[0-9a-f]+}} <{{[^:]+}}:25> 'int' 0
Index: clang/lib/Sema/TreeTransform.h
===
--- clang/lib/Sema/TreeTransform.h
+++ clang/lib/Sema/TreeTransform.h
@@ -3308,14 +3308,15 @@
   ///
   /// By default, performs semantic analysis to build the new expression.
   /// Subclasses may override this routine to provide different behavior.
-  ExprResult RebuildAtomicExpr(SourceLocation BuiltinLoc,
-   MultiExprArg SubExprs,
+  ExprResult RebuildAtomicExpr(SourceLocation BuiltinLoc, MultiExprArg SubExprs,
AtomicExpr::AtomicOp Op,
SourceLocation RParenLoc) {
 // Use this for all of the locations, since we don't know the difference
 // between the call and the expr at this point.
 SourceRange Range{BuiltinLoc, RParenLoc};
-return getSema().BuildAtomicExpr(Range, Range, RParenLoc, SubExprs, Op);
+return getSema().BuildAtomicExpr(
+Range, Range, RParenLoc, SubExprs, Op,
+/*AtomicArgumentOrder*/ Sema::AtomicArgumentOrder::AST);
   }
 
 private:
Index: clang/lib/Sema/SemaChecking.cpp
===
--- clang/lib/Sema/SemaChecking.cpp
+++ clang/lib/Sema/SemaChecking.cpp
@@ -4473,7 +4473,8 @@
 
 ExprResult Sema::BuildAtomicExpr(SourceRange CallRange, SourceRange ExprRange,
  SourceLocation RParenLoc, MultiExprArg Args,
- AtomicExpr::AtomicOp Op) {
+ AtomicExpr::AtomicOp Op,
+ AtomicArgumentOrder ArgOrder) {
   // All the non-OpenCL operations take one of the following forms.
   // The OpenCL operations take the __c11 forms with one extra argument for
   // synchronization scope.
@@ -4754,19 +4755,56 @@
 IsPassedByAddress = true;
   }
 
+  SmallVector APIOrderedArgs;
+  if (ArgOrder == Sema::AtomicArgumentOrder::AST) {
+APIOrderedArgs.push_back(Args[0]);
+switch (Form) {
+case Init:
+case Load:
+  APIOrderedArgs.push_back(Args[1]); // Val1/Order
+  break;
+case LoadCopy:
+case Copy:
+case Arithmetic:
+case Xchg:
+  APIOrderedArgs.push_back(Args[2]); // Val1
+  APIOrderedArgs.push_back(Args[1]); // Order
+  break;
+case GNUXchg:
+  APIOrderedArgs.push_back(Args[2]); // Val1
+  APIOrderedArgs.push_back(Args[3]); // Val2
+  APIOrderedArgs.push_back(Args[1]); // Order
+  break;
+case C11CmpXchg:
+  APIOrderedArgs.push_back(Args[2]); // Val1
+  APIOrderedArgs.push_back(Args[4]); // Val2
+  APIOrderedArgs.push_back(Args[1]); // Order
+  APIOrderedArgs.push_back(Args[3]); // OrderFail
+  break;
+case GNUCmpXchg:
+  APIOrderedArgs.push_back(Args[2]); // Val1
+  APIOrderedArgs.push_back(Args[4]); // Val2
+  APIOrderedArgs.push_back(Args[5]); // Weak
+  APIOrderedArgs.push_back(Args[1]); // Order
+  APIOrderedArgs.push_back(Args[3]); // OrderFail
+  break;
+}
+  } else
+APIOrderedArgs.append(Args.begin(), Args.end());
+
   // The first argument's non-CV pointer type is used to deduce the type of
   // subsequent arguments, except for:
   //  - weak flag (always converted to bool)
   //  - memory order (always converted to int)
   //  - scope  (always converted to int)
-  for (unsigned i = 0; i != Args.size(); ++i) {
+  for (unsigned i = 0; i != APIOrderedArgs.size(); ++i) {
 QualType Ty;
 if (i < NumVals[Form] + 1) {
   switch (i) {
   case 0:
 // The first ar

[PATCH] D67924: [Sema] Fix the atomic expr rebuilding order.

2019-09-23 Thread Michael Liao via Phabricator via cfe-commits

hliao updated this revision to Diff 221374.
hliao added a comment.

add test case for compare_exchange.


Repository:
  rG LLVM Github Monorepo

CHANGES SINCE LAST ACTION
  https://reviews.llvm.org/D67924/new/

https://reviews.llvm.org/D67924

Files:
  clang/include/clang/Sema/Sema.h
  clang/lib/Sema/SemaChecking.cpp
  clang/lib/Sema/TreeTransform.h
  clang/test/AST/atomic-expr.cpp

Index: clang/test/AST/atomic-expr.cpp
===
--- clang/test/AST/atomic-expr.cpp
+++ clang/test/AST/atomic-expr.cpp
@@ -3,17 +3,58 @@
 template
 void pr43370() {
   int arr[2];
-  __atomic_store_n(arr, 0, 0);
+  __atomic_store_n(arr, 0, 5);
 }
+
+template
+void foo() {
+  int arr[2];
+  (void)__atomic_compare_exchange_n(arr, arr, 1, 0, 3, 4);
+}
+
 void useage(){
   pr43370();
+  foo();
 }
 
 // CHECK:FunctionTemplateDecl 0x{{[0-9a-f]+}} <{{[^,]+}}, line:7:1> line:4:6 pr43370
 // CHECK: AtomicExpr
 // CHECK-NEXT: ImplicitCastExpr
 // CHECK-SAME: 
+// CHECK-NEXT: DeclRefExpr 0x{{[0-9a-f]+}} <{{[^:]+}}:20> 'int [2]' lvalue Var 0x{{[0-9a-f]+}} 'arr' 'int [2]'
+// CHECK-NEXT: IntegerLiteral 0x{{[0-9a-f]+}} <{{[^:]+}}:28> 'int' 5
+// CHECK-NEXT: IntegerLiteral 0x{{[0-9a-f]+}} <{{[^:]+}}:25> 'int' 0
 // CHECK:FunctionDecl 0x{{[0-9a-f]+}}  line:4:6 used pr43370
 // CHECK: AtomicExpr
 // CHECK-NEXT: ImplicitCastExpr
 // CHECK-SAME: 
+// CHECK-NEXT: DeclRefExpr 0x{{[0-9a-f]+}} <{{[^:]+}}:20> 'int [2]' lvalue Var 0x{{[0-9a-f]+}} 'arr' 'int [2]'
+// CHECK-NEXT: IntegerLiteral 0x{{[0-9a-f]+}} <{{[^:]+}}:28> 'int' 5
+// CHECK-NEXT: IntegerLiteral 0x{{[0-9a-f]+}} <{{[^:]+}}:25> 'int' 0
+
+// CHECK:FunctionTemplateDecl 0x{{[0-9a-f]+}}  line:10:6 foo
+// CHECK: AtomicExpr
+// CHECK-NEXT: ImplicitCastExpr
+// CHECK-SAME: 
+// CHECK-NEXT: DeclRefExpr 0x{{[0-9a-f]+}} <{{[^:]+}}:37> 'int [2]' lvalue Var 0x{{[0-9a-f]+}} 'arr' 'int [2]'
+// CHECK-NEXT: IntegerLiteral 0x{{[0-9a-f]+}} <{{[^:]+}}:53> 'int' 3
+// CHECK-NEXT: ImplicitCastExpr
+// CHECK-SAME: 
+// CHECK-NEXT: DeclRefExpr 0x{{[0-9a-f]+}} <{{[^:]+}}:42> 'int [2]' lvalue Var 0x{{[0-9a-f]+}} 'arr' 'int [2]'
+// CHECK-NEXT: IntegerLiteral 0x{{[0-9a-f]+}} <{{[^:]+}}:56> 'int' 4
+// CHECK-NEXT: IntegerLiteral 0x{{[0-9a-f]+}} <{{[^:]+}}:47> 'int' 1
+// CHECK-NEXT: ImplicitCastExpr
+// CHECK-NEXT: IntegerLiteral 0x{{[0-9a-f]+}} <{{[^:]+}}:50> 'int' 0
+// CHECK:FunctionDecl 0x{{[0-9a-f]+}}  line:10:6 used foo
+// CHECK: AtomicExpr
+// CHECK-NEXT: ImplicitCastExpr
+// CHECK-SAME: 
+// CHECK-NEXT: DeclRefExpr 0x{{[0-9a-f]+}} <{{[^:]+}}:37> 'int [2]' lvalue Var 0x{{[0-9a-f]+}} 'arr' 'int [2]'
+// CHECK-NEXT: IntegerLiteral 0x{{[0-9a-f]+}} <{{[^:]+}}:53> 'int' 3
+// CHECK-NEXT: ImplicitCastExpr
+// CHECK-SAME: 
+// CHECK-NEXT: DeclRefExpr 0x{{[0-9a-f]+}} <{{[^:]+}}:42> 'int [2]' lvalue Var 0x{{[0-9a-f]+}} 'arr' 'int [2]'
+// CHECK-NEXT: IntegerLiteral 0x{{[0-9a-f]+}} <{{[^:]+}}:56> 'int' 4
+// CHECK-NEXT: IntegerLiteral 0x{{[0-9a-f]+}} <{{[^:]+}}:47> 'int' 1
+// CHECK-NEXT: ImplicitCastExpr
+// CHECK-NEXT: IntegerLiteral 0x{{[0-9a-f]+}} <{{[^:]+}}:50> 'int' 0
Index: clang/lib/Sema/TreeTransform.h
===
--- clang/lib/Sema/TreeTransform.h
+++ clang/lib/Sema/TreeTransform.h
@@ -3308,14 +3308,15 @@
   ///
   /// By default, performs semantic analysis to build the new expression.
   /// Subclasses may override this routine to provide different behavior.
-  ExprResult RebuildAtomicExpr(SourceLocation BuiltinLoc,
-   MultiExprArg SubExprs,
+  ExprResult RebuildAtomicExpr(SourceLocation BuiltinLoc, MultiExprArg SubExprs,
AtomicExpr::AtomicOp Op,
SourceLocation RParenLoc) {
 // Use this for all of the locations, since we don't know the difference
 // between the call and the expr at this point.
 SourceRange Range{BuiltinLoc, RParenLoc};
-return getSema().BuildAtomicExpr(Range, Range, RParenLoc, SubExprs, Op);
+return getSema().BuildAtomicExpr(
+Range, Range, RParenLoc, SubExprs, Op,
+/*AtomicArgumentOrder*/ Sema::AtomicArgumentOrder::AST);
   }
 
 private:
Index: clang/lib/Sema/SemaChecking.cpp
===
--- clang/lib/Sema/SemaChecking.cpp
+++ clang/lib/Sema/SemaChecking.cpp
@@ -4473,7 +4473,8 @@
 
 ExprResult Sema::BuildAtomicExpr(SourceRange CallRange, SourceRange ExprRange,
  SourceLocation RParenLoc, MultiExprArg Args,
- AtomicExpr::AtomicOp Op) {
+ AtomicExpr::AtomicOp Op,
+ AtomicArgumentOrder ArgOrder) {
   // All the non-OpenCL operations take one of the following forms.
   // The OpenCL operations take the __c11 forms with one extra argument for
   // synchronization scope.
@@ -4754,19 +4755,56 @@
 IsPassedByAddress = true;
   }
 
+  SmallVector APIOrderedArgs;
+  if (Ar

[PATCH] D67924: [Sema] Fix the atomic expr rebuilding order.

2019-09-23 Thread Michael Liao via Phabricator via cfe-commits

hliao updated this revision to Diff 221375.
hliao added a comment.

update


Repository:
  rG LLVM Github Monorepo

CHANGES SINCE LAST ACTION
  https://reviews.llvm.org/D67924/new/

https://reviews.llvm.org/D67924

Files:
  clang/include/clang/Sema/Sema.h
  clang/lib/Sema/SemaChecking.cpp
  clang/lib/Sema/TreeTransform.h
  clang/test/AST/atomic-expr.cpp

Index: clang/test/AST/atomic-expr.cpp
===
--- clang/test/AST/atomic-expr.cpp
+++ clang/test/AST/atomic-expr.cpp
@@ -3,17 +3,58 @@
 template
 void pr43370() {
   int arr[2];
-  __atomic_store_n(arr, 0, 0);
+  __atomic_store_n(arr, 0, 5);
 }
+
+template
+void foo() {
+  int arr[2];
+  (void)__atomic_compare_exchange_n(arr, arr, 1, 0, 3, 4);
+}
+
 void useage(){
   pr43370();
+  foo();
 }
 
 // CHECK:FunctionTemplateDecl 0x{{[0-9a-f]+}} <{{[^,]+}}, line:7:1> line:4:6 pr43370
 // CHECK: AtomicExpr
 // CHECK-NEXT: ImplicitCastExpr
 // CHECK-SAME: 
+// CHECK-NEXT: DeclRefExpr 0x{{[0-9a-f]+}} <{{[^:]+}}:20> 'int [2]' lvalue Var 0x{{[0-9a-f]+}} 'arr' 'int [2]'
+// CHECK-NEXT: IntegerLiteral 0x{{[0-9a-f]+}} <{{[^:]+}}:28> 'int' 5
+// CHECK-NEXT: IntegerLiteral 0x{{[0-9a-f]+}} <{{[^:]+}}:25> 'int' 0
 // CHECK:FunctionDecl 0x{{[0-9a-f]+}}  line:4:6 used pr43370
 // CHECK: AtomicExpr
 // CHECK-NEXT: ImplicitCastExpr
 // CHECK-SAME: 
+// CHECK-NEXT: DeclRefExpr 0x{{[0-9a-f]+}} <{{[^:]+}}:20> 'int [2]' lvalue Var 0x{{[0-9a-f]+}} 'arr' 'int [2]'
+// CHECK-NEXT: IntegerLiteral 0x{{[0-9a-f]+}} <{{[^:]+}}:28> 'int' 5
+// CHECK-NEXT: IntegerLiteral 0x{{[0-9a-f]+}} <{{[^:]+}}:25> 'int' 0
+
+// CHECK:FunctionTemplateDecl 0x{{[0-9a-f]+}}  line:10:6 foo
+// CHECK: AtomicExpr
+// CHECK-NEXT: ImplicitCastExpr
+// CHECK-SAME: 
+// CHECK-NEXT: DeclRefExpr 0x{{[0-9a-f]+}} <{{[^:]+}}:37> 'int [2]' lvalue Var 0x{{[0-9a-f]+}} 'arr' 'int [2]'
+// CHECK-NEXT: IntegerLiteral 0x{{[0-9a-f]+}} <{{[^:]+}}:53> 'int' 3
+// CHECK-NEXT: ImplicitCastExpr
+// CHECK-SAME: 
+// CHECK-NEXT: DeclRefExpr 0x{{[0-9a-f]+}} <{{[^:]+}}:42> 'int [2]' lvalue Var 0x{{[0-9a-f]+}} 'arr' 'int [2]'
+// CHECK-NEXT: IntegerLiteral 0x{{[0-9a-f]+}} <{{[^:]+}}:56> 'int' 4
+// CHECK-NEXT: IntegerLiteral 0x{{[0-9a-f]+}} <{{[^:]+}}:47> 'int' 1
+// CHECK-NEXT: ImplicitCastExpr
+// CHECK-NEXT: IntegerLiteral 0x{{[0-9a-f]+}} <{{[^:]+}}:50> 'int' 0
+// CHECK:FunctionDecl 0x{{[0-9a-f]+}}  line:10:6 used foo
+// CHECK: AtomicExpr
+// CHECK-NEXT: ImplicitCastExpr
+// CHECK-SAME: 
+// CHECK-NEXT: DeclRefExpr 0x{{[0-9a-f]+}} <{{[^:]+}}:37> 'int [2]' lvalue Var 0x{{[0-9a-f]+}} 'arr' 'int [2]'
+// CHECK-NEXT: IntegerLiteral 0x{{[0-9a-f]+}} <{{[^:]+}}:53> 'int' 3
+// CHECK-NEXT: ImplicitCastExpr
+// CHECK-SAME: 
+// CHECK-NEXT: DeclRefExpr 0x{{[0-9a-f]+}} <{{[^:]+}}:42> 'int [2]' lvalue Var 0x{{[0-9a-f]+}} 'arr' 'int [2]'
+// CHECK-NEXT: IntegerLiteral 0x{{[0-9a-f]+}} <{{[^:]+}}:56> 'int' 4
+// CHECK-NEXT: IntegerLiteral 0x{{[0-9a-f]+}} <{{[^:]+}}:47> 'int' 1
+// CHECK-NEXT: ImplicitCastExpr
+// CHECK-NEXT: IntegerLiteral 0x{{[0-9a-f]+}} <{{[^:]+}}:50> 'int' 0
Index: clang/lib/Sema/TreeTransform.h
===
--- clang/lib/Sema/TreeTransform.h
+++ clang/lib/Sema/TreeTransform.h
@@ -3308,14 +3308,14 @@
   ///
   /// By default, performs semantic analysis to build the new expression.
   /// Subclasses may override this routine to provide different behavior.
-  ExprResult RebuildAtomicExpr(SourceLocation BuiltinLoc,
-   MultiExprArg SubExprs,
+  ExprResult RebuildAtomicExpr(SourceLocation BuiltinLoc, MultiExprArg SubExprs,
AtomicExpr::AtomicOp Op,
SourceLocation RParenLoc) {
 // Use this for all of the locations, since we don't know the difference
 // between the call and the expr at this point.
 SourceRange Range{BuiltinLoc, RParenLoc};
-return getSema().BuildAtomicExpr(Range, Range, RParenLoc, SubExprs, Op);
+return getSema().BuildAtomicExpr(Range, Range, RParenLoc, SubExprs, Op,
+ Sema::AtomicArgumentOrder::AST);
   }
 
 private:
Index: clang/lib/Sema/SemaChecking.cpp
===
--- clang/lib/Sema/SemaChecking.cpp
+++ clang/lib/Sema/SemaChecking.cpp
@@ -4473,7 +4473,8 @@
 
 ExprResult Sema::BuildAtomicExpr(SourceRange CallRange, SourceRange ExprRange,
  SourceLocation RParenLoc, MultiExprArg Args,
- AtomicExpr::AtomicOp Op) {
+ AtomicExpr::AtomicOp Op,
+ AtomicArgumentOrder ArgOrder) {
   // All the non-OpenCL operations take one of the following forms.
   // The OpenCL operations take the __c11 forms with one extra argument for
   // synchronization scope.
@@ -4754,19 +4755,56 @@
 IsPassedByAddress = true;
   }
 
+  SmallVector APIOrderedArgs;
+  if (ArgOrder == Sema::AtomicArgumentOrde

[PATCH] D67924: [Sema] Fix the atomic expr rebuilding order.

2019-09-23 Thread Michael Liao via Phabricator via cfe-commits

This revision was automatically updated to reflect the committed changes.
Closed by commit rL372640: [Sema] Fix the atomic expr rebuilding order. 
(authored by hliao, committed by ).
Herald added a project: LLVM.
Herald added a subscriber: llvm-commits.

Changed prior to commit:
  https://reviews.llvm.org/D67924?vs=221375&id=221378#toc

Repository:
  rL LLVM

CHANGES SINCE LAST ACTION
  https://reviews.llvm.org/D67924/new/

https://reviews.llvm.org/D67924

Files:
  cfe/trunk/include/clang/Sema/Sema.h
  cfe/trunk/lib/Sema/SemaChecking.cpp
  cfe/trunk/lib/Sema/TreeTransform.h
  cfe/trunk/test/AST/atomic-expr.cpp

Index: cfe/trunk/include/clang/Sema/Sema.h
===
--- cfe/trunk/include/clang/Sema/Sema.h
+++ cfe/trunk/include/clang/Sema/Sema.h
@@ -4637,9 +4637,12 @@
MultiExprArg ArgExprs, SourceLocation RParenLoc,
Expr *ExecConfig = nullptr,
bool IsExecConfig = false);
-  ExprResult BuildAtomicExpr(SourceRange CallRange, SourceRange ExprRange,
- SourceLocation RParenLoc, MultiExprArg Args,
- AtomicExpr::AtomicOp Op);
+  enum class AtomicArgumentOrder { API, AST };
+  ExprResult
+  BuildAtomicExpr(SourceRange CallRange, SourceRange ExprRange,
+  SourceLocation RParenLoc, MultiExprArg Args,
+  AtomicExpr::AtomicOp Op,
+  AtomicArgumentOrder ArgOrder = AtomicArgumentOrder::API);
   ExprResult
   BuildResolvedCallExpr(Expr *Fn, NamedDecl *NDecl, SourceLocation LParenLoc,
 ArrayRef Arg, SourceLocation RParenLoc,
Index: cfe/trunk/test/AST/atomic-expr.cpp
===
--- cfe/trunk/test/AST/atomic-expr.cpp
+++ cfe/trunk/test/AST/atomic-expr.cpp
@@ -3,17 +3,58 @@
 template
 void pr43370() {
   int arr[2];
-  __atomic_store_n(arr, 0, 0);
+  __atomic_store_n(arr, 0, 5);
 }
+
+template
+void foo() {
+  int arr[2];
+  (void)__atomic_compare_exchange_n(arr, arr, 1, 0, 3, 4);
+}
+
 void useage(){
   pr43370();
+  foo();
 }
 
 // CHECK:FunctionTemplateDecl 0x{{[0-9a-f]+}} <{{[^,]+}}, line:7:1> line:4:6 pr43370
 // CHECK: AtomicExpr
 // CHECK-NEXT: ImplicitCastExpr
 // CHECK-SAME: 
+// CHECK-NEXT: DeclRefExpr 0x{{[0-9a-f]+}} <{{[^:]+}}:20> 'int [2]' lvalue Var 0x{{[0-9a-f]+}} 'arr' 'int [2]'
+// CHECK-NEXT: IntegerLiteral 0x{{[0-9a-f]+}} <{{[^:]+}}:28> 'int' 5
+// CHECK-NEXT: IntegerLiteral 0x{{[0-9a-f]+}} <{{[^:]+}}:25> 'int' 0
 // CHECK:FunctionDecl 0x{{[0-9a-f]+}}  line:4:6 used pr43370
 // CHECK: AtomicExpr
 // CHECK-NEXT: ImplicitCastExpr
 // CHECK-SAME: 
+// CHECK-NEXT: DeclRefExpr 0x{{[0-9a-f]+}} <{{[^:]+}}:20> 'int [2]' lvalue Var 0x{{[0-9a-f]+}} 'arr' 'int [2]'
+// CHECK-NEXT: IntegerLiteral 0x{{[0-9a-f]+}} <{{[^:]+}}:28> 'int' 5
+// CHECK-NEXT: IntegerLiteral 0x{{[0-9a-f]+}} <{{[^:]+}}:25> 'int' 0
+
+// CHECK:FunctionTemplateDecl 0x{{[0-9a-f]+}}  line:10:6 foo
+// CHECK: AtomicExpr
+// CHECK-NEXT: ImplicitCastExpr
+// CHECK-SAME: 
+// CHECK-NEXT: DeclRefExpr 0x{{[0-9a-f]+}} <{{[^:]+}}:37> 'int [2]' lvalue Var 0x{{[0-9a-f]+}} 'arr' 'int [2]'
+// CHECK-NEXT: IntegerLiteral 0x{{[0-9a-f]+}} <{{[^:]+}}:53> 'int' 3
+// CHECK-NEXT: ImplicitCastExpr
+// CHECK-SAME: 
+// CHECK-NEXT: DeclRefExpr 0x{{[0-9a-f]+}} <{{[^:]+}}:42> 'int [2]' lvalue Var 0x{{[0-9a-f]+}} 'arr' 'int [2]'
+// CHECK-NEXT: IntegerLiteral 0x{{[0-9a-f]+}} <{{[^:]+}}:56> 'int' 4
+// CHECK-NEXT: IntegerLiteral 0x{{[0-9a-f]+}} <{{[^:]+}}:47> 'int' 1
+// CHECK-NEXT: ImplicitCastExpr
+// CHECK-NEXT: IntegerLiteral 0x{{[0-9a-f]+}} <{{[^:]+}}:50> 'int' 0
+// CHECK:FunctionDecl 0x{{[0-9a-f]+}}  line:10:6 used foo
+// CHECK: AtomicExpr
+// CHECK-NEXT: ImplicitCastExpr
+// CHECK-SAME: 
+// CHECK-NEXT: DeclRefExpr 0x{{[0-9a-f]+}} <{{[^:]+}}:37> 'int [2]' lvalue Var 0x{{[0-9a-f]+}} 'arr' 'int [2]'
+// CHECK-NEXT: IntegerLiteral 0x{{[0-9a-f]+}} <{{[^:]+}}:53> 'int' 3
+// CHECK-NEXT: ImplicitCastExpr
+// CHECK-SAME: 
+// CHECK-NEXT: DeclRefExpr 0x{{[0-9a-f]+}} <{{[^:]+}}:42> 'int [2]' lvalue Var 0x{{[0-9a-f]+}} 'arr' 'int [2]'
+// CHECK-NEXT: IntegerLiteral 0x{{[0-9a-f]+}} <{{[^:]+}}:56> 'int' 4
+// CHECK-NEXT: IntegerLiteral 0x{{[0-9a-f]+}} <{{[^:]+}}:47> 'int' 1
+// CHECK-NEXT: ImplicitCastExpr
+// CHECK-NEXT: IntegerLiteral 0x{{[0-9a-f]+}} <{{[^:]+}}:50> 'int' 0
Index: cfe/trunk/lib/Sema/SemaChecking.cpp
===
--- cfe/trunk/lib/Sema/SemaChecking.cpp
+++ cfe/trunk/lib/Sema/SemaChecking.cpp
@@ -4473,7 +4473,8 @@
 
 ExprResult Sema::BuildAtomicExpr(SourceRange CallRange, SourceRange ExprRange,
  SourceLocation RParenLoc, MultiExprArg Args,
- AtomicExpr::AtomicOp Op) {
+ AtomicExpr::AtomicOp Op,
+ AtomicArgumentOrder ArgOrder) {
   // All the non-OpenCL operations

[PATCH] D68030: [CUDA][HIP] Initial kernel return type relaxing.

2019-09-25 Thread Michael Liao via Phabricator via cfe-commits

hliao created this revision.
Herald added a project: clang.
Herald added a subscriber: cfe-commits.
hliao abandoned this revision.

Repository:
  rG LLVM Github Monorepo

https://reviews.llvm.org/D68030

Files:
  clang/lib/Sema/SemaDeclAttr.cpp
  clang/lib/Sema/SemaExpr.cpp
  clang/lib/Sema/SemaStmt.cpp
  clang/lib/Sema/SemaTemplateDeduction.cpp


Index: clang/lib/Sema/SemaTemplateDeduction.cpp
===
--- clang/lib/Sema/SemaTemplateDeduction.cpp
+++ clang/lib/Sema/SemaTemplateDeduction.cpp
@@ -3093,6 +3093,13 @@
   Function->getTypeSpecStartLoc(), Function->getDeclName());
 if (ResultType.isNull() || Trap.hasErrorOccurred())
   return TDK_SubstitutionFailure;
+// CUDA: Kernel function must have 'void' return type.
+if (getLangOpts().CUDA)
+  if (Function->hasAttr() && !ResultType->isVoidType()) {
+Diag(Function->getLocation(), diag::err_kern_type_not_void_return)
+<< Function->getType() << Function->getSourceRange();
+return TDK_SubstitutionFailure;
+  }
   }
 
   // Instantiate the types of each of the function parameters given the
Index: clang/lib/Sema/SemaStmt.cpp
===
--- clang/lib/Sema/SemaStmt.cpp
+++ clang/lib/Sema/SemaStmt.cpp
@@ -3500,6 +3500,14 @@
   return true;
   }
 
+  // CUDA: Kernel function must have 'void' return type.
+  if (getLangOpts().CUDA)
+if (FD->hasAttr() && !Deduced->isVoidType()) {
+  Diag(FD->getLocation(), diag::err_kern_type_not_void_return)
+  << FD->getType() << FD->getSourceRange();
+  return true;
+}
+
   //  If a function with a declared return type that contains a placeholder 
type
   //  has multiple return statements, the return type is deduced for each 
return
   //  statement. [...] if the type deduced is not the same in each deduction,
Index: clang/lib/Sema/SemaExpr.cpp
===
--- clang/lib/Sema/SemaExpr.cpp
+++ clang/lib/Sema/SemaExpr.cpp
@@ -5891,7 +5891,9 @@
 << FDecl << Fn->getSourceRange());
 
   // CUDA: Kernel function must have 'void' return type
-  if (!FuncT->getReturnType()->isVoidType())
+  if (!FuncT->getReturnType()->isVoidType() &&
+  !FuncT->getReturnType()->getAs() &&
+  !FuncT->getReturnType()->isInstantiationDependentType())
 return ExprError(Diag(LParenLoc, diag::err_kern_type_not_void_return)
 << Fn->getType() << Fn->getSourceRange());
 } else {
Index: clang/lib/Sema/SemaDeclAttr.cpp
===
--- clang/lib/Sema/SemaDeclAttr.cpp
+++ clang/lib/Sema/SemaDeclAttr.cpp
@@ -4223,7 +4223,9 @@
 return;
   }
   const auto *FD = cast(D);
-  if (!FD->getReturnType()->isVoidType()) {
+  if (!FD->getReturnType()->isVoidType() &&
+  !FD->getReturnType()->getAs() &&
+  !FD->getReturnType()->isInstantiationDependentType()) {
 SourceRange RTRange = FD->getReturnTypeSourceRange();
 S.Diag(FD->getTypeSpecStartLoc(), diag::err_kern_type_not_void_return)
 << FD->getType()


Index: clang/lib/Sema/SemaTemplateDeduction.cpp
===
--- clang/lib/Sema/SemaTemplateDeduction.cpp
+++ clang/lib/Sema/SemaTemplateDeduction.cpp
@@ -3093,6 +3093,13 @@
   Function->getTypeSpecStartLoc(), Function->getDeclName());
 if (ResultType.isNull() || Trap.hasErrorOccurred())
   return TDK_SubstitutionFailure;
+// CUDA: Kernel function must have 'void' return type.
+if (getLangOpts().CUDA)
+  if (Function->hasAttr() && !ResultType->isVoidType()) {
+Diag(Function->getLocation(), diag::err_kern_type_not_void_return)
+<< Function->getType() << Function->getSourceRange();
+return TDK_SubstitutionFailure;
+  }
   }
 
   // Instantiate the types of each of the function parameters given the
Index: clang/lib/Sema/SemaStmt.cpp
===
--- clang/lib/Sema/SemaStmt.cpp
+++ clang/lib/Sema/SemaStmt.cpp
@@ -3500,6 +3500,14 @@
   return true;
   }
 
+  // CUDA: Kernel function must have 'void' return type.
+  if (getLangOpts().CUDA)
+if (FD->hasAttr() && !Deduced->isVoidType()) {
+  Diag(FD->getLocation(), diag::err_kern_type_not_void_return)
+  << FD->getType() << FD->getSourceRange();
+  return true;
+}
+
   //  If a function with a declared return type that contains a placeholder type
   //  has multiple return statements, the return type is deduced for each return
   //  statement. [...] if the type deduced is not the same in each deduction,
Index: clang/lib/Sema/SemaExpr.cpp
===
--- clang/lib/Sema/SemaExpr.cpp
+++ clang/lib/Sema/SemaExpr.cpp
@@ -5891,7 +5891,9 @@
 << FDecl << Fn->getSource

[PATCH] D68031: [CUDA][HIP] Enable kernel function return type deduction.

2019-09-25 Thread Michael Liao via Phabricator via cfe-commits

hliao created this revision.
hliao added reviewers: tra, jlebar.
Herald added a project: clang.
Herald added a subscriber: cfe-commits.

- Even though only `void` is still accepted as the deduced return type, 
enabling deduction/instantiation on the return type allows more consistent 
coding.


Repository:
  rG LLVM Github Monorepo

https://reviews.llvm.org/D68031

Files:
  clang/lib/Sema/SemaDeclAttr.cpp
  clang/lib/Sema/SemaExpr.cpp
  clang/lib/Sema/SemaStmt.cpp
  clang/lib/Sema/SemaTemplateDeduction.cpp
  clang/test/SemaCUDA/autoret-global.cu

Index: clang/test/SemaCUDA/autoret-global.cu
===
--- /dev/null
+++ clang/test/SemaCUDA/autoret-global.cu
@@ -0,0 +1,44 @@
+// RUN: %clang_cc1 -std=c++14 -fsyntax-only -verify %s
+
+#include "Inputs/cuda.h"
+
+template 
+__global__ T foo() {
+  // expected-note@-1 {{kernel function type 'T ()' must have void return type}}
+}
+
+void f0() {
+  foo<<<0, 0>>>();
+  foo<<<0, 0>>>();
+  // expected-error@-1 {{no matching function for call to 'foo'}}
+}
+
+__global__ auto f1() {
+}
+
+__global__ auto f2(int x) {
+  return x + 1;
+  // expected-error@-2 {{kernel function type 'auto (int)' must have void return type}}
+}
+
+template  struct enable_if { typedef T type; };
+template  struct enable_if {};
+
+template 
+__global__
+auto bar() -> typename enable_if::type {
+  // expected-note@-1 {{requirement '3 == 1' was not satisfied [with N = 3]}}
+}
+
+template 
+__global__
+auto bar() -> typename enable_if::type {
+  // expected-note@-1 {{requirement '3 == 2' was not satisfied [with N = 3]}}
+}
+
+void f3() {
+  bar<1><<<0, 0>>>();
+  bar<2><<<0, 0>>>();
+  bar<3><<<0, 0>>>();
+  // expected-error@-1 {{no matching function for call to 'bar'}}
+}
Index: clang/lib/Sema/SemaTemplateDeduction.cpp
===
--- clang/lib/Sema/SemaTemplateDeduction.cpp
+++ clang/lib/Sema/SemaTemplateDeduction.cpp
@@ -3093,6 +3093,13 @@
   Function->getTypeSpecStartLoc(), Function->getDeclName());
 if (ResultType.isNull() || Trap.hasErrorOccurred())
   return TDK_SubstitutionFailure;
+// CUDA: Kernel function must have 'void' return type.
+if (getLangOpts().CUDA)
+  if (Function->hasAttr() && !ResultType->isVoidType()) {
+Diag(Function->getLocation(), diag::err_kern_type_not_void_return)
+<< Function->getType() << Function->getSourceRange();
+return TDK_SubstitutionFailure;
+  }
   }
 
   // Instantiate the types of each of the function parameters given the
Index: clang/lib/Sema/SemaStmt.cpp
===
--- clang/lib/Sema/SemaStmt.cpp
+++ clang/lib/Sema/SemaStmt.cpp
@@ -3500,6 +3500,14 @@
   return true;
   }
 
+  // CUDA: Kernel function must have 'void' return type.
+  if (getLangOpts().CUDA)
+if (FD->hasAttr() && !Deduced->isVoidType()) {
+  Diag(FD->getLocation(), diag::err_kern_type_not_void_return)
+  << FD->getType() << FD->getSourceRange();
+  return true;
+}
+
   //  If a function with a declared return type that contains a placeholder type
   //  has multiple return statements, the return type is deduced for each return
   //  statement. [...] if the type deduced is not the same in each deduction,
Index: clang/lib/Sema/SemaExpr.cpp
===
--- clang/lib/Sema/SemaExpr.cpp
+++ clang/lib/Sema/SemaExpr.cpp
@@ -5891,7 +5891,9 @@
 << FDecl << Fn->getSourceRange());
 
   // CUDA: Kernel function must have 'void' return type
-  if (!FuncT->getReturnType()->isVoidType())
+  if (!FuncT->getReturnType()->isVoidType() &&
+  !FuncT->getReturnType()->getAs() &&
+  !FuncT->getReturnType()->isInstantiationDependentType())
 return ExprError(Diag(LParenLoc, diag::err_kern_type_not_void_return)
 << Fn->getType() << Fn->getSourceRange());
 } else {
Index: clang/lib/Sema/SemaDeclAttr.cpp
===
--- clang/lib/Sema/SemaDeclAttr.cpp
+++ clang/lib/Sema/SemaDeclAttr.cpp
@@ -4223,7 +4223,9 @@
 return;
   }
   const auto *FD = cast(D);
-  if (!FD->getReturnType()->isVoidType()) {
+  if (!FD->getReturnType()->isVoidType() &&
+  !FD->getReturnType()->getAs() &&
+  !FD->getReturnType()->isInstantiationDependentType()) {
 SourceRange RTRange = FD->getReturnTypeSourceRange();
 S.Diag(FD->getTypeSpecStartLoc(), diag::err_kern_type_not_void_return)
 << FD->getType()
___
cfe-commits mailing list
cfe-commits@lists.llvm.org
https://lists.llvm.org/cgi-bin/mailman/listinfo/cfe-commits

[PATCH] D68031: [CUDA][HIP] Enable kernel function return type deduction.

2019-09-25 Thread Michael Liao via Phabricator via cfe-commits

This revision was automatically updated to reflect the committed changes.
Closed by commit rG24337db61666: [CUDA][HIP] Enable kernel function return type 
deduction. (authored by hliao).

Repository:
  rG LLVM Github Monorepo

CHANGES SINCE LAST ACTION
  https://reviews.llvm.org/D68031/new/

https://reviews.llvm.org/D68031

Files:
  clang/lib/Sema/SemaDeclAttr.cpp
  clang/lib/Sema/SemaExpr.cpp
  clang/lib/Sema/SemaStmt.cpp
  clang/lib/Sema/SemaTemplateDeduction.cpp
  clang/test/SemaCUDA/autoret-global.cu

Index: clang/test/SemaCUDA/autoret-global.cu
===
--- /dev/null
+++ clang/test/SemaCUDA/autoret-global.cu
@@ -0,0 +1,44 @@
+// RUN: %clang_cc1 -std=c++14 -fsyntax-only -verify %s
+
+#include "Inputs/cuda.h"
+
+template 
+__global__ T foo() {
+  // expected-note@-1 {{kernel function type 'T ()' must have void return type}}
+}
+
+void f0() {
+  foo<<<0, 0>>>();
+  foo<<<0, 0>>>();
+  // expected-error@-1 {{no matching function for call to 'foo'}}
+}
+
+__global__ auto f1() {
+}
+
+__global__ auto f2(int x) {
+  return x + 1;
+  // expected-error@-2 {{kernel function type 'auto (int)' must have void return type}}
+}
+
+template  struct enable_if { typedef T type; };
+template  struct enable_if {};
+
+template 
+__global__
+auto bar() -> typename enable_if::type {
+  // expected-note@-1 {{requirement '3 == 1' was not satisfied [with N = 3]}}
+}
+
+template 
+__global__
+auto bar() -> typename enable_if::type {
+  // expected-note@-1 {{requirement '3 == 2' was not satisfied [with N = 3]}}
+}
+
+void f3() {
+  bar<1><<<0, 0>>>();
+  bar<2><<<0, 0>>>();
+  bar<3><<<0, 0>>>();
+  // expected-error@-1 {{no matching function for call to 'bar'}}
+}
Index: clang/lib/Sema/SemaTemplateDeduction.cpp
===
--- clang/lib/Sema/SemaTemplateDeduction.cpp
+++ clang/lib/Sema/SemaTemplateDeduction.cpp
@@ -3093,6 +3093,13 @@
   Function->getTypeSpecStartLoc(), Function->getDeclName());
 if (ResultType.isNull() || Trap.hasErrorOccurred())
   return TDK_SubstitutionFailure;
+// CUDA: Kernel function must have 'void' return type.
+if (getLangOpts().CUDA)
+  if (Function->hasAttr() && !ResultType->isVoidType()) {
+Diag(Function->getLocation(), diag::err_kern_type_not_void_return)
+<< Function->getType() << Function->getSourceRange();
+return TDK_SubstitutionFailure;
+  }
   }
 
   // Instantiate the types of each of the function parameters given the
Index: clang/lib/Sema/SemaStmt.cpp
===
--- clang/lib/Sema/SemaStmt.cpp
+++ clang/lib/Sema/SemaStmt.cpp
@@ -3500,6 +3500,14 @@
   return true;
   }
 
+  // CUDA: Kernel function must have 'void' return type.
+  if (getLangOpts().CUDA)
+if (FD->hasAttr() && !Deduced->isVoidType()) {
+  Diag(FD->getLocation(), diag::err_kern_type_not_void_return)
+  << FD->getType() << FD->getSourceRange();
+  return true;
+}
+
   //  If a function with a declared return type that contains a placeholder type
   //  has multiple return statements, the return type is deduced for each return
   //  statement. [...] if the type deduced is not the same in each deduction,
Index: clang/lib/Sema/SemaExpr.cpp
===
--- clang/lib/Sema/SemaExpr.cpp
+++ clang/lib/Sema/SemaExpr.cpp
@@ -5891,7 +5891,9 @@
 << FDecl << Fn->getSourceRange());
 
   // CUDA: Kernel function must have 'void' return type
-  if (!FuncT->getReturnType()->isVoidType())
+  if (!FuncT->getReturnType()->isVoidType() &&
+  !FuncT->getReturnType()->getAs() &&
+  !FuncT->getReturnType()->isInstantiationDependentType())
 return ExprError(Diag(LParenLoc, diag::err_kern_type_not_void_return)
 << Fn->getType() << Fn->getSourceRange());
 } else {
Index: clang/lib/Sema/SemaDeclAttr.cpp
===
--- clang/lib/Sema/SemaDeclAttr.cpp
+++ clang/lib/Sema/SemaDeclAttr.cpp
@@ -4223,7 +4223,9 @@
 return;
   }
   const auto *FD = cast(D);
-  if (!FD->getReturnType()->isVoidType()) {
+  if (!FD->getReturnType()->isVoidType() &&
+  !FD->getReturnType()->getAs() &&
+  !FD->getReturnType()->isInstantiationDependentType()) {
 SourceRange RTRange = FD->getReturnTypeSourceRange();
 S.Diag(FD->getTypeSpecStartLoc(), diag::err_kern_type_not_void_return)
 << FD->getType()
___
cfe-commits mailing list
cfe-commits@lists.llvm.org
https://lists.llvm.org/cgi-bin/mailman/listinfo/cfe-commits

[PATCH] D68031: [CUDA][HIP] Enable kernel function return type deduction.

2019-09-25 Thread Michael Liao via Phabricator via cfe-commits

hliao added a comment.

In D68031#1682822 , @tra wrote:

> Nice.  I'd mention in the commit message that NVCC does not support deduced 
> return type for kernel functions.


Just tried with NVCC from CUDA 10, except auto-based deduced type is not 
supported, type deduction in a template is supported, the following test code 
passes compilation with NVCC

  #include 
  
  template 
  __global__ T foo() {
  }
  
  void f0() {
foo<<<0, 0>>>();
  #if 0
foo<<<0, 0>>>();
  #endif
  }
  
  template  struct enable_if { typedef T type; };
  template  struct enable_if {};
  
  template 
  __global__
  auto bar() -> typename enable_if::type {
  }
  
  template 
  __global__
  auto bar() -> typename enable_if::type {
  }
  
  void f3() {
bar<1><<<0, 0>>>();
bar<2><<<0, 0>>>();
  #if 0
bar<3><<<0, 0>>>();
  #endif
  }

`s/#if 0/#if 1` also shows NVCC could give the error on the correct position 
but the message, IMHO, is misleading compared to the one from clang.


Repository:
  rG LLVM Github Monorepo

CHANGES SINCE LAST ACTION
  https://reviews.llvm.org/D68031/new/

https://reviews.llvm.org/D68031



___
cfe-commits mailing list
cfe-commits@lists.llvm.org
https://lists.llvm.org/cgi-bin/mailman/listinfo/cfe-commits

[PATCH] D68157: [X86][ABI] Keep empty class argument passing by value compatible with GCC.

2019-09-27 Thread Michael Liao via Phabricator via cfe-commits

hliao created this revision.
hliao added a reviewer: craig.topper.
Herald added a project: clang.
Herald added a subscriber: cfe-commits.

Repository:
  rG LLVM Github Monorepo

https://reviews.llvm.org/D68157

Files:
  clang/lib/CodeGen/TargetInfo.cpp
  clang/test/CodeGen/empty-class.cpp


Index: clang/test/CodeGen/empty-class.cpp
===
--- /dev/null
+++ clang/test/CodeGen/empty-class.cpp
@@ -0,0 +1,16 @@
+// RUN: %clang_cc1 %s -emit-llvm -o - -triple=x86_64 | FileCheck 
--check-prefix=X64 %s
+// RUN: %clang_cc1 %s -emit-llvm -o - -triple=i686 | FileCheck 
--check-prefix=X32 %s
+
+class Empty {};
+
+void bar(Empty *);
+
+// X64-LABEL: _Z3foo5Empty
+// X64-SAME: %class.Empty* byval(%class.Empty) align 8 %e
+// X64-NOT: alloca
+// X32-LABEL: _Z3foo5Empty
+// X32-SAME: %class.Empty* byval(%class.Empty) align 4 %e
+// X32-NOT: alloca
+void foo(Empty e) {
+  bar(&e);
+}
Index: clang/lib/CodeGen/TargetInfo.cpp
===
--- clang/lib/CodeGen/TargetInfo.cpp
+++ clang/lib/CodeGen/TargetInfo.cpp
@@ -1670,8 +1670,12 @@
   return getIndirectResult(Ty, true, State);
 
 // Ignore empty structs/unions on non-Windows.
-if (!IsWin32StructABI && isEmptyRecord(getContext(), Ty, true))
+if (!IsWin32StructABI && isEmptyRecord(getContext(), Ty, true)) {
+  // For compatibility with GCC, treat it like a struct with a single char.
+  if (getContext().getLangOpts().CPlusPlus)
+return getIndirectResult(Ty, /*ByVal=*/true, State);
   return ABIArgInfo::getIgnore();
+}
 
 llvm::LLVMContext &LLVMContext = getVMContext();
 llvm::IntegerType *Int32 = llvm::Type::getInt32Ty(LLVMContext);
@@ -2799,6 +2803,16 @@
 if (RD->hasFlexibleArrayMember())
   return;
 
+// According to the resolution to A-5 in
+// https://itanium-cxx-abi.github.io/cxx-abi/cxx-open.html, empty class by
+// value should be treated as a struct containing a single character.
+// That's aligned with record layout in AST and compatible with gcc.
+// Check https://godbolt.org/z/D1u2MV for the difference in codegen.
+if (getContext().getLangOpts().CPlusPlus)
+  if (const CXXRecordDecl *CXXRD = dyn_cast(RD))
+if (CXXRD->isEmpty())
+  return;
+
 const ASTRecordLayout &Layout = getContext().getASTRecordLayout(RD);
 
 // Reset Lo class, this will be recomputed.


Index: clang/test/CodeGen/empty-class.cpp
===
--- /dev/null
+++ clang/test/CodeGen/empty-class.cpp
@@ -0,0 +1,16 @@
+// RUN: %clang_cc1 %s -emit-llvm -o - -triple=x86_64 | FileCheck --check-prefix=X64 %s
+// RUN: %clang_cc1 %s -emit-llvm -o - -triple=i686 | FileCheck --check-prefix=X32 %s
+
+class Empty {};
+
+void bar(Empty *);
+
+// X64-LABEL: _Z3foo5Empty
+// X64-SAME: %class.Empty* byval(%class.Empty) align 8 %e
+// X64-NOT: alloca
+// X32-LABEL: _Z3foo5Empty
+// X32-SAME: %class.Empty* byval(%class.Empty) align 4 %e
+// X32-NOT: alloca
+void foo(Empty e) {
+  bar(&e);
+}
Index: clang/lib/CodeGen/TargetInfo.cpp
===
--- clang/lib/CodeGen/TargetInfo.cpp
+++ clang/lib/CodeGen/TargetInfo.cpp
@@ -1670,8 +1670,12 @@
   return getIndirectResult(Ty, true, State);
 
 // Ignore empty structs/unions on non-Windows.
-if (!IsWin32StructABI && isEmptyRecord(getContext(), Ty, true))
+if (!IsWin32StructABI && isEmptyRecord(getContext(), Ty, true)) {
+  // For compatibility with GCC, treat it like a struct with a single char.
+  if (getContext().getLangOpts().CPlusPlus)
+return getIndirectResult(Ty, /*ByVal=*/true, State);
   return ABIArgInfo::getIgnore();
+}
 
 llvm::LLVMContext &LLVMContext = getVMContext();
 llvm::IntegerType *Int32 = llvm::Type::getInt32Ty(LLVMContext);
@@ -2799,6 +2803,16 @@
 if (RD->hasFlexibleArrayMember())
   return;
 
+// According to the resolution to A-5 in
+// https://itanium-cxx-abi.github.io/cxx-abi/cxx-open.html, empty class by
+// value should be treated as a struct containing a single character.
+// That's aligned with record layout in AST and compatible with gcc.
+// Check https://godbolt.org/z/D1u2MV for the difference in codegen.
+if (getContext().getLangOpts().CPlusPlus)
+  if (const CXXRecordDecl *CXXRD = dyn_cast(RD))
+if (CXXRD->isEmpty())
+  return;
+
 const ASTRecordLayout &Layout = getContext().getASTRecordLayout(RD);
 
 // Reset Lo class, this will be recomputed.
___
cfe-commits mailing list
cfe-commits@lists.llvm.org
https://lists.llvm.org/cgi-bin/mailman/listinfo/cfe-commits

[PATCH] D68157: [X86][ABI] Keep empty class argument passing by value compatible with GCC.

2019-09-30 Thread Michael Liao via Phabricator via cfe-commits

hliao added a comment.

ping for review


Repository:
  rG LLVM Github Monorepo

CHANGES SINCE LAST ACTION
  https://reviews.llvm.org/D68157/new/

https://reviews.llvm.org/D68157



___
cfe-commits mailing list
cfe-commits@lists.llvm.org
https://lists.llvm.org/cgi-bin/mailman/listinfo/cfe-commits

[PATCH] D68157: [X86][ABI] Keep empty class argument passing by value compatible with GCC.

2019-10-01 Thread Michael Liao via Phabricator via cfe-commits

hliao added a comment.

PING


Repository:
  rG LLVM Github Monorepo

CHANGES SINCE LAST ACTION
  https://reviews.llvm.org/D68157/new/

https://reviews.llvm.org/D68157



___
cfe-commits mailing list
cfe-commits@lists.llvm.org
https://lists.llvm.org/cgi-bin/mailman/listinfo/cfe-commits

[PATCH] D68300: [HIP] Add option -fno-link-builtin-bitcode to disable linking device lib

2019-10-03 Thread Michael Liao via Phabricator via cfe-commits

hliao added inline comments.



Comment at: include/clang/Driver/Options.td:606
+def flink_builtin_bitcode : Flag<["-"], "flink-builtin-bitcode">,
+  Flags<[CC1Option]>, HelpText<"Link builtin bitcode for HIP device 
compilation.">;
+def fno_link_builtin_bitcode : Flag<["-"], "fno-link-builtin-bitcode">;

tra wrote:
> yaxunl wrote:
> > ashi1 wrote:
> > > Since this is a more generic approach, we won't need to specify HIP ?
> > this patch only implemented this option for HIP. If it is used for other 
> > languages, this help text should be updated.
> Hmm. Cuda currently uses `-nocudalib` for essentially the same purpose (Sort 
> of like `-nostdlib`, but for CUDA). Perhaps we should consolidate all these 
> into `-nogpulib` and alias `-nocudalib` to it. 
how about other relevant options, such as replacing cuda-device-only with 
gpu-device-only or hip-device-only to avoid confusing with CUDA.


CHANGES SINCE LAST ACTION
  https://reviews.llvm.org/D68300/new/

https://reviews.llvm.org/D68300



___
cfe-commits mailing list
cfe-commits@lists.llvm.org
https://lists.llvm.org/cgi-bin/mailman/listinfo/cfe-commits

[PATCH] D68394: [HIP] Enable specifying different default gpu arch for HIP/CUDA.

2019-10-03 Thread Michael Liao via Phabricator via cfe-commits

hliao created this revision.
hliao added reviewers: tra, yaxunl.
Herald added a project: clang.
Herald added a subscriber: cfe-commits.

Repository:
  rG LLVM Github Monorepo

https://reviews.llvm.org/D68394

Files:
  clang/lib/Driver/Driver.cpp
  clang/test/Driver/hip-default-gpu-arch.hip


Index: clang/test/Driver/hip-default-gpu-arch.hip
===
--- /dev/null
+++ clang/test/Driver/hip-default-gpu-arch.hip
@@ -0,0 +1,7 @@
+// REQUIRES: clang-driver
+// REQUIRES: x86-registered-target
+// REQUIRES: amdgpu-registered-target
+
+// RUN: %clang -### -x hip -c %s 2>&1 | FileCheck %s
+
+// CHECK: {{.*}}clang{{.*}}"-target-cpu" "gfx600"
Index: clang/lib/Driver/Driver.cpp
===
--- clang/lib/Driver/Driver.cpp
+++ clang/lib/Driver/Driver.cpp
@@ -2329,6 +2329,10 @@
 
 /// Flag for -fgpu-rdc.
 bool Relocatable = false;
+
+/// Default GPU architecture if there's no one specified.
+CudaArch DefaultCudaArch = CudaArch::UNKNOWN;
+
   public:
 CudaActionBuilderBase(Compilation &C, DerivedArgList &Args,
   const Driver::InputList &Inputs,
@@ -2518,7 +2522,7 @@
   // supported GPUs.  sm_20 code should work correctly, if
   // suboptimally, on all newer GPUs.
   if (GpuArchList.empty())
-GpuArchList.push_back(CudaArch::SM_20);
+GpuArchList.push_back(DefaultCudaArch);
 
   return Error;
 }
@@ -2530,7 +2534,9 @@
   public:
 CudaActionBuilder(Compilation &C, DerivedArgList &Args,
   const Driver::InputList &Inputs)
-: CudaActionBuilderBase(C, Args, Inputs, Action::OFK_Cuda) {}
+: CudaActionBuilderBase(C, Args, Inputs, Action::OFK_Cuda) {
+  DefaultCudaArch = CudaArch::SM_20;
+}
 
 ActionBuilderReturnCode
 getDeviceDependences(OffloadAction::DeviceDependences &DA,
@@ -2645,7 +2651,9 @@
   public:
 HIPActionBuilder(Compilation &C, DerivedArgList &Args,
  const Driver::InputList &Inputs)
-: CudaActionBuilderBase(C, Args, Inputs, Action::OFK_HIP) {}
+: CudaActionBuilderBase(C, Args, Inputs, Action::OFK_HIP) {
+  DefaultCudaArch = CudaArch::GFX600;
+}
 
 bool canUseBundlerUnbundler() const override { return true; }
 


Index: clang/test/Driver/hip-default-gpu-arch.hip
===
--- /dev/null
+++ clang/test/Driver/hip-default-gpu-arch.hip
@@ -0,0 +1,7 @@
+// REQUIRES: clang-driver
+// REQUIRES: x86-registered-target
+// REQUIRES: amdgpu-registered-target
+
+// RUN: %clang -### -x hip -c %s 2>&1 | FileCheck %s
+
+// CHECK: {{.*}}clang{{.*}}"-target-cpu" "gfx600"
Index: clang/lib/Driver/Driver.cpp
===
--- clang/lib/Driver/Driver.cpp
+++ clang/lib/Driver/Driver.cpp
@@ -2329,6 +2329,10 @@
 
 /// Flag for -fgpu-rdc.
 bool Relocatable = false;
+
+/// Default GPU architecture if there's no one specified.
+CudaArch DefaultCudaArch = CudaArch::UNKNOWN;
+
   public:
 CudaActionBuilderBase(Compilation &C, DerivedArgList &Args,
   const Driver::InputList &Inputs,
@@ -2518,7 +2522,7 @@
   // supported GPUs.  sm_20 code should work correctly, if
   // suboptimally, on all newer GPUs.
   if (GpuArchList.empty())
-GpuArchList.push_back(CudaArch::SM_20);
+GpuArchList.push_back(DefaultCudaArch);
 
   return Error;
 }
@@ -2530,7 +2534,9 @@
   public:
 CudaActionBuilder(Compilation &C, DerivedArgList &Args,
   const Driver::InputList &Inputs)
-: CudaActionBuilderBase(C, Args, Inputs, Action::OFK_Cuda) {}
+: CudaActionBuilderBase(C, Args, Inputs, Action::OFK_Cuda) {
+  DefaultCudaArch = CudaArch::SM_20;
+}
 
 ActionBuilderReturnCode
 getDeviceDependences(OffloadAction::DeviceDependences &DA,
@@ -2645,7 +2651,9 @@
   public:
 HIPActionBuilder(Compilation &C, DerivedArgList &Args,
  const Driver::InputList &Inputs)
-: CudaActionBuilderBase(C, Args, Inputs, Action::OFK_HIP) {}
+: CudaActionBuilderBase(C, Args, Inputs, Action::OFK_HIP) {
+  DefaultCudaArch = CudaArch::GFX600;
+}
 
 bool canUseBundlerUnbundler() const override { return true; }
 
___
cfe-commits mailing list
cfe-commits@lists.llvm.org
https://lists.llvm.org/cgi-bin/mailman/listinfo/cfe-commits

[PATCH] D68394: [HIP] Enable specifying different default gpu arch for HIP/CUDA.

2019-10-03 Thread Michael Liao via Phabricator via cfe-commits

hliao marked an inline comment as done.
hliao added inline comments.



Comment at: clang/lib/Driver/Driver.cpp:2655
+: CudaActionBuilderBase(C, Args, Inputs, Action::OFK_HIP) {
+  DefaultCudaArch = CudaArch::GFX600;
+}

Sam, could you let me know which reasonable default arch should we use here?


Repository:
  rG LLVM Github Monorepo

CHANGES SINCE LAST ACTION
  https://reviews.llvm.org/D68394/new/

https://reviews.llvm.org/D68394



___
cfe-commits mailing list
cfe-commits@lists.llvm.org
https://lists.llvm.org/cgi-bin/mailman/listinfo/cfe-commits

[PATCH] D68394: [HIP] Enable specifying different default gpu arch for HIP/CUDA.

2019-10-03 Thread Michael Liao via Phabricator via cfe-commits

hliao marked an inline comment as done.
hliao added inline comments.



Comment at: clang/lib/Driver/Driver.cpp:2538
+: CudaActionBuilderBase(C, Args, Inputs, Action::OFK_Cuda) {
+  DefaultCudaArch = CudaArch::SM_20;
+}

tra wrote:
> This technically depends on the CUDA version.
> We do have CUDA version info in `clang/lib/Driver/ToolChains/Cuda.h`
> The default for NVCC has been sm_30 since CUDA-9.0.  In fact sm_20 is not 
> supported at all by CUDA-9.0+ at all , so we should bump the default to sm_30 
> for those versions.
> 
unfortunately, when the action build is running, the CUDA is not detected yet, 
I probably revise the detection logic to update CUDA's default gpu arch after 
successful detection


Repository:
  rG LLVM Github Monorepo

CHANGES SINCE LAST ACTION
  https://reviews.llvm.org/D68394/new/

https://reviews.llvm.org/D68394



___
cfe-commits mailing list
cfe-commits@lists.llvm.org
https://lists.llvm.org/cgi-bin/mailman/listinfo/cfe-commits

[PATCH] D68394: [HIP] Enable specifying different default gpu arch for HIP/CUDA.

2019-10-03 Thread Michael Liao via Phabricator via cfe-commits

This revision was automatically updated to reflect the committed changes.
Closed by commit rL373634: [HIP] Enable specifying different default gpu arch 
for HIP/CUDA. (authored by hliao, committed by ).
Herald added a project: LLVM.
Herald added a subscriber: llvm-commits.

Changed prior to commit:
  https://reviews.llvm.org/D68394?vs=223015&id=223055#toc

Repository:
  rL LLVM

CHANGES SINCE LAST ACTION
  https://reviews.llvm.org/D68394/new/

https://reviews.llvm.org/D68394

Files:
  cfe/trunk/lib/Driver/Driver.cpp
  cfe/trunk/test/Driver/hip-default-gpu-arch.hip


Index: cfe/trunk/lib/Driver/Driver.cpp
===
--- cfe/trunk/lib/Driver/Driver.cpp
+++ cfe/trunk/lib/Driver/Driver.cpp
@@ -2329,6 +2329,10 @@
 
 /// Flag for -fgpu-rdc.
 bool Relocatable = false;
+
+/// Default GPU architecture if there's no one specified.
+CudaArch DefaultCudaArch = CudaArch::UNKNOWN;
+
   public:
 CudaActionBuilderBase(Compilation &C, DerivedArgList &Args,
   const Driver::InputList &Inputs,
@@ -2518,7 +2522,7 @@
   // supported GPUs.  sm_20 code should work correctly, if
   // suboptimally, on all newer GPUs.
   if (GpuArchList.empty())
-GpuArchList.push_back(CudaArch::SM_20);
+GpuArchList.push_back(DefaultCudaArch);
 
   return Error;
 }
@@ -2530,7 +2534,9 @@
   public:
 CudaActionBuilder(Compilation &C, DerivedArgList &Args,
   const Driver::InputList &Inputs)
-: CudaActionBuilderBase(C, Args, Inputs, Action::OFK_Cuda) {}
+: CudaActionBuilderBase(C, Args, Inputs, Action::OFK_Cuda) {
+  DefaultCudaArch = CudaArch::SM_20;
+}
 
 ActionBuilderReturnCode
 getDeviceDependences(OffloadAction::DeviceDependences &DA,
@@ -2645,7 +2651,9 @@
   public:
 HIPActionBuilder(Compilation &C, DerivedArgList &Args,
  const Driver::InputList &Inputs)
-: CudaActionBuilderBase(C, Args, Inputs, Action::OFK_HIP) {}
+: CudaActionBuilderBase(C, Args, Inputs, Action::OFK_HIP) {
+  DefaultCudaArch = CudaArch::GFX803;
+}
 
 bool canUseBundlerUnbundler() const override { return true; }
 
Index: cfe/trunk/test/Driver/hip-default-gpu-arch.hip
===
--- cfe/trunk/test/Driver/hip-default-gpu-arch.hip
+++ cfe/trunk/test/Driver/hip-default-gpu-arch.hip
@@ -0,0 +1,7 @@
+// REQUIRES: clang-driver
+// REQUIRES: x86-registered-target
+// REQUIRES: amdgpu-registered-target
+
+// RUN: %clang -### -x hip -c %s 2>&1 | FileCheck %s
+
+// CHECK: {{.*}}clang{{.*}}"-target-cpu" "gfx803"


Index: cfe/trunk/lib/Driver/Driver.cpp
===
--- cfe/trunk/lib/Driver/Driver.cpp
+++ cfe/trunk/lib/Driver/Driver.cpp
@@ -2329,6 +2329,10 @@
 
 /// Flag for -fgpu-rdc.
 bool Relocatable = false;
+
+/// Default GPU architecture if there's no one specified.
+CudaArch DefaultCudaArch = CudaArch::UNKNOWN;
+
   public:
 CudaActionBuilderBase(Compilation &C, DerivedArgList &Args,
   const Driver::InputList &Inputs,
@@ -2518,7 +2522,7 @@
   // supported GPUs.  sm_20 code should work correctly, if
   // suboptimally, on all newer GPUs.
   if (GpuArchList.empty())
-GpuArchList.push_back(CudaArch::SM_20);
+GpuArchList.push_back(DefaultCudaArch);
 
   return Error;
 }
@@ -2530,7 +2534,9 @@
   public:
 CudaActionBuilder(Compilation &C, DerivedArgList &Args,
   const Driver::InputList &Inputs)
-: CudaActionBuilderBase(C, Args, Inputs, Action::OFK_Cuda) {}
+: CudaActionBuilderBase(C, Args, Inputs, Action::OFK_Cuda) {
+  DefaultCudaArch = CudaArch::SM_20;
+}
 
 ActionBuilderReturnCode
 getDeviceDependences(OffloadAction::DeviceDependences &DA,
@@ -2645,7 +2651,9 @@
   public:
 HIPActionBuilder(Compilation &C, DerivedArgList &Args,
  const Driver::InputList &Inputs)
-: CudaActionBuilderBase(C, Args, Inputs, Action::OFK_HIP) {}
+: CudaActionBuilderBase(C, Args, Inputs, Action::OFK_HIP) {
+  DefaultCudaArch = CudaArch::GFX803;
+}
 
 bool canUseBundlerUnbundler() const override { return true; }
 
Index: cfe/trunk/test/Driver/hip-default-gpu-arch.hip
===
--- cfe/trunk/test/Driver/hip-default-gpu-arch.hip
+++ cfe/trunk/test/Driver/hip-default-gpu-arch.hip
@@ -0,0 +1,7 @@
+// REQUIRES: clang-driver
+// REQUIRES: x86-registered-target
+// REQUIRES: amdgpu-registered-target
+
+// RUN: %clang -### -x hip -c %s 2>&1 | FileCheck %s
+
+// CHECK: {{.*}}clang{{.*}}"-target-cpu" "gfx803"
___
cfe-commits mailing list
cfe-commits@lists.llvm.org
https://lists.llvm.org/cgi-bin/mailman/listinfo/cfe-commits

[PATCH] D68578: [HIP] Fix device stub name

2019-10-07 Thread Michael Liao via Phabricator via cfe-commits

hliao added inline comments.



Comment at: lib/CodeGen/CGCUDANV.cpp:235
CGF.CGM.getContext().getTargetInfo().getCXXABI())) ||
+ CGF.getLangOpts().HIP ||
  getDeviceStubName(getDeviceSideName(CGF.CurFuncDecl)) ==

keeping the original assertion in HIP is still valuable to capture naming 
mismatch issue for unnamed types


CHANGES SINCE LAST ACTION
  https://reviews.llvm.org/D68578/new/

https://reviews.llvm.org/D68578



___
cfe-commits mailing list
cfe-commits@lists.llvm.org
https://lists.llvm.org/cgi-bin/mailman/listinfo/cfe-commits

[PATCH] D68587: [hip] Assume host-only compilation if the final phase is ahead of `backend`.

2019-10-07 Thread Michael Liao via Phabricator via cfe-commits

hliao created this revision.
hliao added reviewers: tra, yaxunl.
Herald added a project: clang.
Herald added a subscriber: cfe-commits.

- There are several scenarios where the compilation needs stopping before 
`backend`, such as `-E`, `-fsyntax-ony`, and even more if developers want to 
diagnose outputs from different phases. Under these cases, the offload bundler 
is not yet required or not valid to run as the output from the device-side 
compilation is not ready yet. As the result, it's assumed that, if the final 
phase is ahead of `backend`, these compilations are host only. If developers 
need the corresponding outputs for those phases from the device-side one, 
`--cuda-device-only` needs specifying to the compiler.


Repository:
  rG LLVM Github Monorepo

https://reviews.llvm.org/D68587

Files:
  clang/lib/Driver/Driver.cpp
  clang/test/Driver/hip-pre-backend-phases.hip


Index: clang/test/Driver/hip-pre-backend-phases.hip
===
--- /dev/null
+++ clang/test/Driver/hip-pre-backend-phases.hip
@@ -0,0 +1,11 @@
+// REQUIRES: clang-driver
+// REQUIRES: x86-registered-target
+// REQUIRES: amdgpu-registered-target
+
+// RUN: %clang -### -x hip -nogpulib -target x86_64 -M %s 2>&1 | FileCheck %s
+// RUN: %clang -### -x hip -nogpulib -target x86_64 -E %s 2>&1 | FileCheck %s
+// RUN: %clang -### -x hip -nogpulib -target x86_64 -fsyntax-only %s 2>&1 | 
FileCheck %s
+
+// CHECK-NOT: clang{{.*}}" "-cc1" {{.*}} "-fcuda-is-device"
+// CHECK: clang{{.*}}" "-cc1" "-triple" "x86_64"
+// CHECK-NOT: clang-offload-bundler"
Index: clang/lib/Driver/Driver.cpp
===
--- clang/lib/Driver/Driver.cpp
+++ clang/lib/Driver/Driver.cpp
@@ -2661,6 +2661,13 @@
 getDeviceDependences(OffloadAction::DeviceDependences &DA,
  phases::ID CurPhase, phases::ID FinalPhase,
  PhasesTy &Phases) override {
+  // If the final phase won't be able to generate the output bundling both
+  // device and host objects, it assumes such usage are host only unless
+  // device only compilation option is specified.
+  if (!CompileDeviceOnly && FinalPhase < phases::Backend) {
+CudaDeviceActions.clear();
+return ABRT_Inactive;
+  }
   // amdgcn does not support linking of object files, therefore we skip
   // backend and assemble phases to output LLVM IR. Except for generating
   // non-relocatable device coee, where we generate fat binary for device


Index: clang/test/Driver/hip-pre-backend-phases.hip
===
--- /dev/null
+++ clang/test/Driver/hip-pre-backend-phases.hip
@@ -0,0 +1,11 @@
+// REQUIRES: clang-driver
+// REQUIRES: x86-registered-target
+// REQUIRES: amdgpu-registered-target
+
+// RUN: %clang -### -x hip -nogpulib -target x86_64 -M %s 2>&1 | FileCheck %s
+// RUN: %clang -### -x hip -nogpulib -target x86_64 -E %s 2>&1 | FileCheck %s
+// RUN: %clang -### -x hip -nogpulib -target x86_64 -fsyntax-only %s 2>&1 | FileCheck %s
+
+// CHECK-NOT: clang{{.*}}" "-cc1" {{.*}} "-fcuda-is-device"
+// CHECK: clang{{.*}}" "-cc1" "-triple" "x86_64"
+// CHECK-NOT: clang-offload-bundler"
Index: clang/lib/Driver/Driver.cpp
===
--- clang/lib/Driver/Driver.cpp
+++ clang/lib/Driver/Driver.cpp
@@ -2661,6 +2661,13 @@
 getDeviceDependences(OffloadAction::DeviceDependences &DA,
  phases::ID CurPhase, phases::ID FinalPhase,
  PhasesTy &Phases) override {
+  // If the final phase won't be able to generate the output bundling both
+  // device and host objects, it assumes such usage are host only unless
+  // device only compilation option is specified.
+  if (!CompileDeviceOnly && FinalPhase < phases::Backend) {
+CudaDeviceActions.clear();
+return ABRT_Inactive;
+  }
   // amdgcn does not support linking of object files, therefore we skip
   // backend and assemble phases to output LLVM IR. Except for generating
   // non-relocatable device coee, where we generate fat binary for device
___
cfe-commits mailing list
cfe-commits@lists.llvm.org
https://lists.llvm.org/cgi-bin/mailman/listinfo/cfe-commits

[PATCH] D68587: [hip] Assume host-only compilation if the final phase is ahead of `backend`.

2019-10-07 Thread Michael Liao via Phabricator via cfe-commits

hliao added a comment.

In D68587#1698055 , @tra wrote:

> I'm fine with this for -E/-M,
>
> I would argue that with `-fsyntax-only` we want to know whether our source 
> code, which is common for all sub-compilations, has syntactic errors. 
>  The way we compile HIP & CUDA sources, some of the errors will only be 
> reported on one side of the compilation. 
>  So, in order to make sure there are no syntax errors, we need to perform 
> *all* sub-compilations with `-fsyntax-only`.
>
> E.g. it would be rather surprising to see the compilation succeeding with 
> `-fsyntax-only`, but then fail with a syntax error somewhere on the device 
> side during a real compilation.

for most compilation tools, single input and single output are expected. 
Without assuming `-fsyntax-only` alone is host-compilation only, that at least 
run syntax checking twice. The result may be misleading and there are 
clang-based tools (like clang-tidy) may have no legacy way to be runnable. To 
check device-side compilation syntax, we are still able to explicitly ask that 
by specifying `--cuda-device-only`.

Repository:
  rG LLVM Github Monorepo

CHANGES SINCE LAST ACTION
  https://reviews.llvm.org/D68587/new/

https://reviews.llvm.org/D68587

___
cfe-commits mailing list
cfe-commits@lists.llvm.org
https://lists.llvm.org/cgi-bin/mailman/listinfo/cfe-commits

[PATCH] D68587: [hip] Assume host-only compilation if the final phase is ahead of `backend`.

2019-10-07 Thread Michael Liao via Phabricator via cfe-commits

hliao added a comment.

In D68587#1698247 , @tra wrote:

> In D68587#1698102 , @hliao wrote:
>
> > for most compilation tools, single input and single output are expected. 
> > Without assuming `-fsyntax-only` alone is host-compilation only, that at 
> > least run syntax checking twice.
>
>
> I believe the driver will not run subsequent jobs if one of the device 
> compilations fails. You may see duplicate warnings from multiple stages, but 
> overall the error handling works in a fairly predictable way now.

It still runs and gives the same error (if that error is applicable to both 
sides) at least twice if you just specify `-fsyntax-only` or `-E`. That won't 
happen for regular compilation option (`-c`) due to the additional device 
dependencies added.
The error itself is, in fact, should be clear enough, the most confusing part 
is the diagnostic message and suggestions from clang as host- and device-side 
compilations are quite different, especially the error message may be mixed 
with other-side the normal output.

> 
> 
>> The result may be misleading
> 
> Potentially repeated warning are still *correct*, while omitting an error is 
> not, IMO.  I believe that did come up before and we had to make some changes 
> to the driver to keep host compilation working even when device-side 
> compilations produce no output.
> 
> To think of it, I'm starting to doubt that this patch is an improvement for 
> `-M` either. You will get the dependencies for the host, but they are not 
> necessarily the same as the dependencies for the device-side compilation. 
> Producing a partial list of dependencies will be potentially incorrect. IMO 
> we do need dependencies info from all sub-compilations.

Even without this patch, `-M` or more specifically `-MD` already breaks now as 
we just run the dependency generation action twice for each side. The later 
will overwrite the former *.d file. We need special handling of `-M` to match 
nvcc.

> Perhaps we should limit the scope of this patch to -E only for now?

Just found nvcc's `-E` returns the output of the device-side compilation for 
the first GPU arch specified. Anyway, whether to match that behavior is just 
another question.

> 
> 
>> and there are clang-based tools (like clang-tidy) may have no legacy way to 
>> be runnable.
> 
> Tooling does get surprised by the multiple jobs created by CUDA compilation. 
>  The work around is to pass `--cuda-host-only`. Passing an extra flag is 
> usually not a showstopper (at least it was not in cases I had to deal with at 
> work and we have fair number of clang-derived tools). Usually in order to get 
> correct CUDA compilation in this scenario you will also need to tell the tool 
> where to find CUDA's headers, so the mechanism for passing additional options 
> already exists.

but some tools, like clang-tidy, may be found difficult to insert that option 
properly, says `clang-tidy -p` supposes to read the compilation command 
databased generated by cmake or meta-build systems and performs additional 
checks for sources. Adding that option may inevitably make them CUDA/HIP aware.

> If that's still a problem, then we should change the tooling infrastructure 
> to use host-only compilation for HIP and CUDA by default.

That's an option I was looking into as well. But, generally speaking, we need a 
clear definition on expected output for options like `-M`, `-MD`, `-E`, 
`-fsyntax-only`, `-S -emit-llvm`, even `-S` (for HIP only.)

>> To check device-side compilation syntax, we are still able to explicitly ask 
>> that by specifying `--cuda-device-only`.
> 
> Yes, that could be done. However, as I've mentioned above the same argument 
> applies to tooling & `--cuda-host-only`, so there's no advantage here. IMO 
> the most common use case should be the default, so it's the clang itself 
> which should remain working correctly without having to add extra flags.
> 
> Also, this patch makes it impossible to run -fsyntax-only on *all* 
> sub-compilations at once. I will have to run `clang -fsyntax-only` multiple 
> times -- once per host and once per each device.

We do have another option `-cuda-compile-host-device` to explicit ask for host- 
and device-side compilations.

> I do want to check all sub-compilations with `-fsyntax-only` on a regular 
> basis (e.g. when running creduce on a cuda source), so having to do that for 
> each sub-compilation separately does not look like an improvement to me.

Repository:
  rG LLVM Github Monorepo

CHANGES SINCE LAST ACTION
  https://reviews.llvm.org/D68587/new/

https://reviews.llvm.org/D68587

___
cfe-commits mailing list
cfe-commits@lists.llvm.org
https://lists.llvm.org/cgi-bin/mailman/listinfo/cfe-commits

[PATCH] D68652: [driver][hip] Skip bundler if host action is nothing.

2019-10-08 Thread Michael Liao via Phabricator via cfe-commits

hliao created this revision.
hliao added reviewers: sfantao, tra, yaxunl.
Herald added a project: clang.
Herald added a subscriber: cfe-commits.

Repository:
  rG LLVM Github Monorepo

https://reviews.llvm.org/D68652

Files:
  clang/lib/Driver/Driver.cpp
  clang/test/Driver/hip-syntax-only.hip


Index: clang/test/Driver/hip-syntax-only.hip
===
--- /dev/null
+++ clang/test/Driver/hip-syntax-only.hip
@@ -0,0 +1,9 @@
+// REQUIRES: clang-driver
+// REQUIRES: x86-registered-target
+// REQUIRES: amdgpu-registered-target
+
+// RUN: %clang -### -x hip -nogpulib -target x86_64 -fsyntax-only %s 2>&1 | 
FileCheck %s
+
+// CHECK-DAG: clang{{.*}}" "-cc1" {{.*}} "-fcuda-is-device"
+// CHECK-DAG: clang{{.*}}" "-cc1" "-triple" "x86_64"
+// CHECK-NOT: clang-offload-bundler"
Index: clang/lib/Driver/Driver.cpp
===
--- clang/lib/Driver/Driver.cpp
+++ clang/lib/Driver/Driver.cpp
@@ -3105,7 +3105,8 @@
 // the resulting list. Otherwise, just append the device actions. For
 // device only compilation, HostAction is a null pointer, therefore only do
 // this when HostAction is not a null pointer.
-if (CanUseBundler && HostAction && !OffloadAL.empty()) {
+if (CanUseBundler && HostAction &&
+HostAction->getType() != types::TY_Nothing && !OffloadAL.empty()) {
   // Add the host action to the list in order to create the bundling 
action.
   OffloadAL.push_back(HostAction);
 


Index: clang/test/Driver/hip-syntax-only.hip
===
--- /dev/null
+++ clang/test/Driver/hip-syntax-only.hip
@@ -0,0 +1,9 @@
+// REQUIRES: clang-driver
+// REQUIRES: x86-registered-target
+// REQUIRES: amdgpu-registered-target
+
+// RUN: %clang -### -x hip -nogpulib -target x86_64 -fsyntax-only %s 2>&1 | FileCheck %s
+
+// CHECK-DAG: clang{{.*}}" "-cc1" {{.*}} "-fcuda-is-device"
+// CHECK-DAG: clang{{.*}}" "-cc1" "-triple" "x86_64"
+// CHECK-NOT: clang-offload-bundler"
Index: clang/lib/Driver/Driver.cpp
===
--- clang/lib/Driver/Driver.cpp
+++ clang/lib/Driver/Driver.cpp
@@ -3105,7 +3105,8 @@
 // the resulting list. Otherwise, just append the device actions. For
 // device only compilation, HostAction is a null pointer, therefore only do
 // this when HostAction is not a null pointer.
-if (CanUseBundler && HostAction && !OffloadAL.empty()) {
+if (CanUseBundler && HostAction &&
+HostAction->getType() != types::TY_Nothing && !OffloadAL.empty()) {
   // Add the host action to the list in order to create the bundling action.
   OffloadAL.push_back(HostAction);
 
___
cfe-commits mailing list
cfe-commits@lists.llvm.org
https://lists.llvm.org/cgi-bin/mailman/listinfo/cfe-commits

[PATCH] D68652: [driver][hip] Skip bundler if host action is nothing.

2019-10-08 Thread Michael Liao via Phabricator via cfe-commits

hliao marked an inline comment as done.
hliao added inline comments.



Comment at: clang/test/Driver/hip-syntax-only.hip:7
+
+// CHECK-DAG: clang{{.*}}" "-cc1" {{.*}} "-fcuda-is-device"
+// CHECK-DAG: clang{{.*}}" "-cc1" "-triple" "x86_64"

tra wrote:
> I'd include `-target ` and a comment describing that we're making sure 
> that both host and device compilations are still executed.
won't -fcuda-is-device be sufficient? that's option specific to device-side 
compilation.


Repository:
  rG LLVM Github Monorepo

CHANGES SINCE LAST ACTION
  https://reviews.llvm.org/D68652/new/

https://reviews.llvm.org/D68652



___
cfe-commits mailing list
cfe-commits@lists.llvm.org
https://lists.llvm.org/cgi-bin/mailman/listinfo/cfe-commits

[PATCH] D68652: [driver][hip] Skip bundler if host action is nothing.

2019-10-08 Thread Michael Liao via Phabricator via cfe-commits

This revision was automatically updated to reflect the committed changes.
Closed by commit rG6334a59454ef: [driver][hip] Skip bundler if host action is 
nothing. (authored by hliao).

Changed prior to commit:
  https://reviews.llvm.org/D68652?vs=223890&id=223905#toc

Repository:
  rG LLVM Github Monorepo

CHANGES SINCE LAST ACTION
  https://reviews.llvm.org/D68652/new/

https://reviews.llvm.org/D68652

Files:
  clang/lib/Driver/Driver.cpp
  clang/test/Driver/hip-syntax-only.hip


Index: clang/test/Driver/hip-syntax-only.hip
===
--- /dev/null
+++ clang/test/Driver/hip-syntax-only.hip
@@ -0,0 +1,11 @@
+// REQUIRES: clang-driver
+// REQUIRES: x86-registered-target
+// REQUIRES: amdgpu-registered-target
+
+// RUN: %clang -### -x hip -nogpulib -target x86_64 -fsyntax-only %s 2>&1 | 
FileCheck %s
+
+// Check that there are commands for both host- and device-side compilations.
+//
+// CHECK-DAG: clang{{.*}}" "-cc1" {{.*}} "-fcuda-is-device"
+// CHECK-DAG: clang{{.*}}" "-cc1" "-triple" "x86_64"
+// CHECK-NOT: clang-offload-bundler"
Index: clang/lib/Driver/Driver.cpp
===
--- clang/lib/Driver/Driver.cpp
+++ clang/lib/Driver/Driver.cpp
@@ -3108,7 +3108,8 @@
 // the resulting list. Otherwise, just append the device actions. For
 // device only compilation, HostAction is a null pointer, therefore only do
 // this when HostAction is not a null pointer.
-if (CanUseBundler && HostAction && !OffloadAL.empty()) {
+if (CanUseBundler && HostAction &&
+HostAction->getType() != types::TY_Nothing && !OffloadAL.empty()) {
   // Add the host action to the list in order to create the bundling 
action.
   OffloadAL.push_back(HostAction);
 


Index: clang/test/Driver/hip-syntax-only.hip
===
--- /dev/null
+++ clang/test/Driver/hip-syntax-only.hip
@@ -0,0 +1,11 @@
+// REQUIRES: clang-driver
+// REQUIRES: x86-registered-target
+// REQUIRES: amdgpu-registered-target
+
+// RUN: %clang -### -x hip -nogpulib -target x86_64 -fsyntax-only %s 2>&1 | FileCheck %s
+
+// Check that there are commands for both host- and device-side compilations.
+//
+// CHECK-DAG: clang{{.*}}" "-cc1" {{.*}} "-fcuda-is-device"
+// CHECK-DAG: clang{{.*}}" "-cc1" "-triple" "x86_64"
+// CHECK-NOT: clang-offload-bundler"
Index: clang/lib/Driver/Driver.cpp
===
--- clang/lib/Driver/Driver.cpp
+++ clang/lib/Driver/Driver.cpp
@@ -3108,7 +3108,8 @@
 // the resulting list. Otherwise, just append the device actions. For
 // device only compilation, HostAction is a null pointer, therefore only do
 // this when HostAction is not a null pointer.
-if (CanUseBundler && HostAction && !OffloadAL.empty()) {
+if (CanUseBundler && HostAction &&
+HostAction->getType() != types::TY_Nothing && !OffloadAL.empty()) {
   // Add the host action to the list in order to create the bundling action.
   OffloadAL.push_back(HostAction);
 
___
cfe-commits mailing list
cfe-commits@lists.llvm.org
https://lists.llvm.org/cgi-bin/mailman/listinfo/cfe-commits

1 2 3 4 5 6 >

1 - 100 of 500 matches

Mail list logo