[PATCH] D55483: Introduce the callback attribute and emit !callback metadata

2019-01-11 Thread Johannes Doerfert via Phabricator via cfe-commits
jdoerfert updated this revision to Diff 181420.
jdoerfert marked 21 inline comments as done.
jdoerfert added a comment.

Style changes, clang-format, documentation improvements


Repository:
  rC Clang

CHANGES SINCE LAST ACTION
  https://reviews.llvm.org/D55483/new/

https://reviews.llvm.org/D55483

Files:
  include/clang/AST/ASTContext.h
  include/clang/Basic/Attr.td
  include/clang/Basic/AttrDocs.td
  include/clang/Basic/Builtins.def
  include/clang/Basic/Builtins.h
  include/clang/Basic/DiagnosticSemaKinds.td
  lib/AST/ASTContext.cpp
  lib/Basic/Builtins.cpp
  lib/CodeGen/CGOpenMPRuntime.cpp
  lib/CodeGen/CodeGenModule.cpp
  lib/Sema/SemaDecl.cpp
  lib/Sema/SemaDeclAttr.cpp
  test/Analysis/retain-release.m
  test/CodeGen/attr-callback.c
  test/CodeGen/callback_annotated.c
  test/CodeGen/callback_openmp.c
  test/CodeGen/callback_pthread_create.c
  test/CodeGenCXX/attr-callback.cpp
  test/Misc/pragma-attribute-supported-attributes-list.test
  test/OpenMP/parallel_codegen.cpp
  test/Sema/attr-callback-broken.c
  test/Sema/attr-callback.c
  test/SemaCXX/attr-callback-broken.cpp
  test/SemaCXX/attr-callback.cpp
  utils/TableGen/ClangAttrEmitter.cpp

Index: utils/TableGen/ClangAttrEmitter.cpp
===
--- utils/TableGen/ClangAttrEmitter.cpp
+++ utils/TableGen/ClangAttrEmitter.cpp
@@ -775,6 +775,11 @@
 }
   };
 
+  struct VariadicParamOrParamIdxArgument : public VariadicArgument {
+VariadicParamOrParamIdxArgument(const Record &Arg, StringRef Attr)
+: VariadicArgument(Arg, Attr, "int") {}
+  };
+
   // Unique the enums, but maintain the original declaration ordering.
   std::vector
   uniqueEnumsInOrder(const std::vector &enums) {
@@ -1283,6 +1288,8 @@
 Ptr = llvm::make_unique(Arg, Attr);
   else if (ArgName == "VariadicParamIdxArgument")
 Ptr = llvm::make_unique(Arg, Attr);
+  else if (ArgName == "VariadicParamOrParamIdxArgument")
+Ptr = llvm::make_unique(Arg, Attr);
   else if (ArgName == "ParamIdxArgument")
 Ptr = llvm::make_unique(Arg, Attr, "ParamIdx");
   else if (ArgName == "VariadicIdentifierArgument")
@@ -2116,6 +2123,7 @@
  llvm::StringSwitch(
  Arg->getSuperClasses().back().first->getName())
  .Case("VariadicIdentifierArgument", true)
+ .Case("VariadicParamOrParamIdxArgument", true)
  .Default(false);
 }
 
Index: test/SemaCXX/attr-callback.cpp
===
--- /dev/null
+++ test/SemaCXX/attr-callback.cpp
@@ -0,0 +1,67 @@
+// RUN: %clang_cc1 %s -verify -fsyntax-only
+
+// expected-no-diagnostics
+
+class C_in_class {
+#include "../Sema/attr-callback.c"
+};
+
+struct Base {
+
+  void no_args_1(void (*callback)(void));
+  __attribute__((callback(1))) void no_args_2(void (*callback)(void));
+  __attribute__((callback(callback))) void no_args_3(void (*callback)(void)) {}
+
+  __attribute__((callback(1, 0))) virtual void
+  this_tr(void (*callback)(Base *));
+
+  __attribute__((callback(1, __this, __, __this))) virtual void
+  this_unknown_this(void (*callback)(Base *, Base *, Base *));
+
+  __attribute__((callback(1))) virtual void
+  virtual_1(void (*callback)(void));
+
+  __attribute__((callback(callback))) virtual void
+  virtual_2(void (*callback)(void));
+
+  __attribute__((callback(1))) virtual void
+  virtual_3(void (*callback)(void));
+};
+
+__attribute__((callback(1))) void
+Base::no_args_1(void (*callback)(void)) {
+}
+
+void Base::no_args_2(void (*callback)(void)) {
+}
+
+struct Derived_1 : public Base {
+
+  __attribute__((callback(1, 0))) virtual void
+  this_tr(void (*callback)(Base *)) override;
+
+  __attribute__((callback(1))) virtual void
+  virtual_1(void (*callback)(void)) override {}
+
+  virtual void
+  virtual_3(void (*callback)(void)) override {}
+};
+
+struct Derived_2 : public Base {
+
+  __attribute__((callback(callback))) virtual void
+  virtual_1(void (*callback)(void)) override;
+
+  virtual void
+  virtual_2(void (*callback)(void)) override;
+
+  virtual void
+  virtual_3(void (*callback)(void)) override;
+};
+
+void Derived_2::virtual_1(void (*callback)(void)) {}
+
+__attribute__((callback(1))) void
+Derived_2::virtual_2(void (*callback)(void)) {}
+
+void Derived_2::virtual_3(void (*callback)(void)) {}
Index: test/SemaCXX/attr-callback-broken.cpp
===
--- /dev/null
+++ test/SemaCXX/attr-callback-broken.cpp
@@ -0,0 +1,7 @@
+// RUN: %clang_cc1 %s -verify -fsyntax-only
+
+class C_in_class {
+#define HAS_THIS
+#include "../Sema/attr-callback-broken.c"
+#undef HAS_THIS
+};
Index: test/Sema/attr-callback.c
===
--- /dev/null
+++ test/Sema/attr-callback.c
@@ -0,0 +1,14 @@
+// RUN: %clang_cc1 %s -verify -fsyntax-only
+
+// expected-no-diagnostics
+
+__attribute__((callback(1))) void no_args(void (*callback)(void));
+__attribute__((callback(1, 2, 3))) void arg

[PATCH] D55483: Introduce the callback attribute and emit !callback metadata

2019-01-11 Thread Johannes Doerfert via Phabricator via cfe-commits
jdoerfert added a comment.

Updated according to your comments.




Comment at: lib/Sema/SemaDeclAttr.cpp:3481
+  llvm::StringMap NameIdxMapping;
+  NameIdxMapping["__"] = -1;
+

aaron.ballman wrote:
> This doesn't match the documentation -- I assume you switched from `?` to 
> `__` because `__` at least parses as a valid identifier, whereas `?` would 
> require extra parsing support? If so, that's fine by me.
Yes, `__`, and `__this` where chosen because they work without lexer/parser 
changes and are in the implementation namespace. I forgot to update the 
documentation though. Will be fixed.



Comment at: lib/Sema/SemaDeclAttr.cpp:3483
+
+  NameIdxMapping["__this"] = 0;
+

aaron.ballman wrote:
> This doesn't match the documentation either, but I'm less clear on why the 
> double underscores are used.
If you use `this`, the lexer will generate the special "this" token. That one 
is checked explicitly to be only used inside of non-static class methods. If 
you have an idea how to avoid this check or make it consider uses in the 
attribute as OK, please let me know.



Comment at: lib/Sema/SemaDeclAttr.cpp:3492
+  SmallVector EncodingIndices;
+  for (unsigned u = 0, e = AL.getNumArgs(); u < e; u++) {
+

aaron.ballman wrote:
> Identifiers don't match the usual naming conventions.  Prefer `++U` as well.
OK.


> Prefer ++U as well.

Out of curiosity, why?



Comment at: lib/Sema/SemaDeclAttr.cpp:3493
+  for (unsigned u = 0, e = AL.getNumArgs(); u < e; u++) {
+
+SourceRange SR;

aaron.ballman wrote:
> Spurious newline
That was intentional but OK.


Repository:
  rC Clang

CHANGES SINCE LAST ACTION
  https://reviews.llvm.org/D55483/new/

https://reviews.llvm.org/D55483



___
cfe-commits mailing list
cfe-commits@lists.llvm.org
http://lists.llvm.org/cgi-bin/mailman/listinfo/cfe-commits


[PATCH] D55483: Introduce the callback attribute and emit !callback metadata

2019-01-14 Thread Johannes Doerfert via Phabricator via cfe-commits
jdoerfert updated this revision to Diff 181622.
jdoerfert marked 2 inline comments as done.
jdoerfert added a comment.

Small fixes, allow "this" inside "callbacks"


Repository:
  rC Clang

CHANGES SINCE LAST ACTION
  https://reviews.llvm.org/D55483/new/

https://reviews.llvm.org/D55483

Files:
  include/clang/AST/ASTContext.h
  include/clang/Basic/Attr.td
  include/clang/Basic/AttrDocs.td
  include/clang/Basic/Builtins.def
  include/clang/Basic/Builtins.h
  include/clang/Basic/DiagnosticSemaKinds.td
  lib/AST/ASTContext.cpp
  lib/Basic/Builtins.cpp
  lib/CodeGen/CGOpenMPRuntime.cpp
  lib/CodeGen/CodeGenModule.cpp
  lib/Parse/ParseDecl.cpp
  lib/Sema/SemaDecl.cpp
  lib/Sema/SemaDeclAttr.cpp
  test/Analysis/retain-release.m
  test/CodeGen/attr-callback.c
  test/CodeGen/callback_annotated.c
  test/CodeGen/callback_openmp.c
  test/CodeGen/callback_pthread_create.c
  test/CodeGenCXX/attr-callback.cpp
  test/Misc/pragma-attribute-supported-attributes-list.test
  test/OpenMP/parallel_codegen.cpp
  test/Sema/attr-callback-broken.c
  test/Sema/attr-callback.c
  test/SemaCXX/attr-callback-broken.cpp
  test/SemaCXX/attr-callback.cpp
  utils/TableGen/ClangAttrEmitter.cpp

Index: utils/TableGen/ClangAttrEmitter.cpp
===
--- utils/TableGen/ClangAttrEmitter.cpp
+++ utils/TableGen/ClangAttrEmitter.cpp
@@ -775,6 +775,11 @@
 }
   };
 
+  struct VariadicParamOrParamIdxArgument : public VariadicArgument {
+VariadicParamOrParamIdxArgument(const Record &Arg, StringRef Attr)
+: VariadicArgument(Arg, Attr, "int") {}
+  };
+
   // Unique the enums, but maintain the original declaration ordering.
   std::vector
   uniqueEnumsInOrder(const std::vector &enums) {
@@ -1283,6 +1288,8 @@
 Ptr = llvm::make_unique(Arg, Attr);
   else if (ArgName == "VariadicParamIdxArgument")
 Ptr = llvm::make_unique(Arg, Attr);
+  else if (ArgName == "VariadicParamOrParamIdxArgument")
+Ptr = llvm::make_unique(Arg, Attr);
   else if (ArgName == "ParamIdxArgument")
 Ptr = llvm::make_unique(Arg, Attr, "ParamIdx");
   else if (ArgName == "VariadicIdentifierArgument")
@@ -2116,6 +2123,7 @@
  llvm::StringSwitch(
  Arg->getSuperClasses().back().first->getName())
  .Case("VariadicIdentifierArgument", true)
+ .Case("VariadicParamOrParamIdxArgument", true)
  .Default(false);
 }
 
Index: test/SemaCXX/attr-callback.cpp
===
--- /dev/null
+++ test/SemaCXX/attr-callback.cpp
@@ -0,0 +1,67 @@
+// RUN: %clang_cc1 %s -verify -fsyntax-only
+
+// expected-no-diagnostics
+
+class C_in_class {
+#include "../Sema/attr-callback.c"
+};
+
+struct Base {
+
+  void no_args_1(void (*callback)(void));
+  __attribute__((callback(1))) void no_args_2(void (*callback)(void));
+  __attribute__((callback(callback))) void no_args_3(void (*callback)(void)) {}
+
+  __attribute__((callback(1, 0))) virtual void
+  this_tr(void (*callback)(Base *));
+
+  __attribute__((callback(1, this, __, this))) virtual void
+  this_unknown_this(void (*callback)(Base *, Base *, Base *));
+
+  __attribute__((callback(1))) virtual void
+  virtual_1(void (*callback)(void));
+
+  __attribute__((callback(callback))) virtual void
+  virtual_2(void (*callback)(void));
+
+  __attribute__((callback(1))) virtual void
+  virtual_3(void (*callback)(void));
+};
+
+__attribute__((callback(1))) void
+Base::no_args_1(void (*callback)(void)) {
+}
+
+void Base::no_args_2(void (*callback)(void)) {
+}
+
+struct Derived_1 : public Base {
+
+  __attribute__((callback(1, 0))) virtual void
+  this_tr(void (*callback)(Base *)) override;
+
+  __attribute__((callback(1))) virtual void
+  virtual_1(void (*callback)(void)) override {}
+
+  virtual void
+  virtual_3(void (*callback)(void)) override {}
+};
+
+struct Derived_2 : public Base {
+
+  __attribute__((callback(callback))) virtual void
+  virtual_1(void (*callback)(void)) override;
+
+  virtual void
+  virtual_2(void (*callback)(void)) override;
+
+  virtual void
+  virtual_3(void (*callback)(void)) override;
+};
+
+void Derived_2::virtual_1(void (*callback)(void)) {}
+
+__attribute__((callback(1))) void
+Derived_2::virtual_2(void (*callback)(void)) {}
+
+void Derived_2::virtual_3(void (*callback)(void)) {}
Index: test/SemaCXX/attr-callback-broken.cpp
===
--- /dev/null
+++ test/SemaCXX/attr-callback-broken.cpp
@@ -0,0 +1,7 @@
+// RUN: %clang_cc1 %s -verify -fsyntax-only
+
+class C_in_class {
+#define HAS_THIS
+#include "../Sema/attr-callback-broken.c"
+#undef HAS_THIS
+};
Index: test/Sema/attr-callback.c
===
--- /dev/null
+++ test/Sema/attr-callback.c
@@ -0,0 +1,14 @@
+// RUN: %clang_cc1 %s -verify -fsyntax-only
+
+// expected-no-diagnostics
+
+__attribute__((callback(1))) void no_args(void (*callback)(void));
+__attribute__((callback(1, 2, 3))

[PATCH] D55483: Introduce the callback attribute and emit !callback metadata

2019-01-14 Thread Johannes Doerfert via Phabricator via cfe-commits
jdoerfert updated this revision to Diff 181631.
jdoerfert added a comment.

Generalize the treatment of "kw_this" as "kw_ident"


Repository:
  rC Clang

CHANGES SINCE LAST ACTION
  https://reviews.llvm.org/D55483/new/

https://reviews.llvm.org/D55483

Files:
  include/clang/AST/ASTContext.h
  include/clang/Basic/Attr.td
  include/clang/Basic/AttrDocs.td
  include/clang/Basic/Builtins.def
  include/clang/Basic/Builtins.h
  include/clang/Basic/DiagnosticSemaKinds.td
  lib/AST/ASTContext.cpp
  lib/Basic/Builtins.cpp
  lib/CodeGen/CGOpenMPRuntime.cpp
  lib/CodeGen/CodeGenModule.cpp
  lib/Parse/ParseDecl.cpp
  lib/Sema/SemaDecl.cpp
  lib/Sema/SemaDeclAttr.cpp
  test/Analysis/retain-release.m
  test/CodeGen/attr-callback.c
  test/CodeGen/callback_annotated.c
  test/CodeGen/callback_openmp.c
  test/CodeGen/callback_pthread_create.c
  test/CodeGenCXX/attr-callback.cpp
  test/Misc/pragma-attribute-supported-attributes-list.test
  test/OpenMP/parallel_codegen.cpp
  test/Sema/attr-callback-broken.c
  test/Sema/attr-callback.c
  test/SemaCXX/attr-callback-broken.cpp
  test/SemaCXX/attr-callback.cpp
  utils/TableGen/ClangAttrEmitter.cpp

Index: utils/TableGen/ClangAttrEmitter.cpp
===
--- utils/TableGen/ClangAttrEmitter.cpp
+++ utils/TableGen/ClangAttrEmitter.cpp
@@ -775,6 +775,11 @@
 }
   };
 
+  struct VariadicParamOrParamIdxArgument : public VariadicArgument {
+VariadicParamOrParamIdxArgument(const Record &Arg, StringRef Attr)
+: VariadicArgument(Arg, Attr, "int") {}
+  };
+
   // Unique the enums, but maintain the original declaration ordering.
   std::vector
   uniqueEnumsInOrder(const std::vector &enums) {
@@ -1283,6 +1288,8 @@
 Ptr = llvm::make_unique(Arg, Attr);
   else if (ArgName == "VariadicParamIdxArgument")
 Ptr = llvm::make_unique(Arg, Attr);
+  else if (ArgName == "VariadicParamOrParamIdxArgument")
+Ptr = llvm::make_unique(Arg, Attr);
   else if (ArgName == "ParamIdxArgument")
 Ptr = llvm::make_unique(Arg, Attr, "ParamIdx");
   else if (ArgName == "VariadicIdentifierArgument")
@@ -2116,6 +2123,7 @@
  llvm::StringSwitch(
  Arg->getSuperClasses().back().first->getName())
  .Case("VariadicIdentifierArgument", true)
+ .Case("VariadicParamOrParamIdxArgument", true)
  .Default(false);
 }
 
@@ -2158,6 +2166,34 @@
   OS << "#endif // CLANG_ATTR_IDENTIFIER_ARG_LIST\n\n";
 }
 
+static bool keywordThisIsaIdentifierInArgument(Record *Arg) {
+  return !Arg->getSuperClasses().empty() &&
+ llvm::StringSwitch(
+ Arg->getSuperClasses().back().first->getName())
+ .Case("VariadicParamOrParamIdxArgument", true)
+ .Default(false);
+}
+
+static void emitClangAttrThisIsaIdentifierArgList(RecordKeeper &Records,
+  raw_ostream &OS) {
+  OS << "#if defined(CLANG_ATTR_THIS_ISA_IDENTIFIER_ARG_LIST)\n";
+  std::vector Attrs = Records.getAllDerivedDefinitions("Attr");
+  for (const auto *A : Attrs) {
+// Determine whether the first argument is a variadic identifier.
+std::vector Args = A->getValueAsListOfDefs("Args");
+if (Args.empty() || !keywordThisIsaIdentifierInArgument(Args[0]))
+  continue;
+
+// All these spellings take an identifier argument.
+forEachUniqueSpelling(*A, [&](const FlattenedSpelling &S) {
+  OS << ".Case(\"" << S.name() << "\", "
+ << "true"
+ << ")\n";
+});
+  }
+  OS << "#endif // CLANG_ATTR_THIS_ISA_IDENTIFIER_ARG_LIST\n\n";
+}
+
 namespace clang {
 
 // Emits the class definitions for attributes.
@@ -3738,6 +3774,7 @@
   emitClangAttrArgContextList(Records, OS);
   emitClangAttrIdentifierArgList(Records, OS);
   emitClangAttrVariadicIdentifierArgList(Records, OS);
+  emitClangAttrThisIsaIdentifierArgList(Records, OS);
   emitClangAttrTypeArgList(Records, OS);
   emitClangAttrLateParsedList(Records, OS);
 }
Index: test/SemaCXX/attr-callback.cpp
===
--- /dev/null
+++ test/SemaCXX/attr-callback.cpp
@@ -0,0 +1,67 @@
+// RUN: %clang_cc1 %s -verify -fsyntax-only
+
+// expected-no-diagnostics
+
+class C_in_class {
+#include "../Sema/attr-callback.c"
+};
+
+struct Base {
+
+  void no_args_1(void (*callback)(void));
+  __attribute__((callback(1))) void no_args_2(void (*callback)(void));
+  __attribute__((callback(callback))) void no_args_3(void (*callback)(void)) {}
+
+  __attribute__((callback(1, 0))) virtual void
+  this_tr(void (*callback)(Base *));
+
+  __attribute__((callback(1, this, __, this))) virtual void
+  this_unknown_this(void (*callback)(Base *, Base *, Base *));
+
+  __attribute__((callback(1))) virtual void
+  virtual_1(void (*callback)(void));
+
+  __attribute__((callback(callback))) virtual void
+  virtual_2(void (*callback)(void));
+
+  __attribute__((callback(1))) virtual void
+  virtual_3(void (*callback)(void));
+};
+
+__attribute__((call

[PATCH] D55483: Introduce the callback attribute and emit !callback metadata

2019-01-18 Thread Johannes Doerfert via Phabricator via cfe-commits
This revision was automatically updated to reflect the committed changes.
Closed by commit rL351629: Emit !callback metadata and introduce the callback 
attribute (authored by jdoerfert, committed by ).

Changed prior to commit:
  https://reviews.llvm.org/D55483?vs=181631&id=182665#toc

Repository:
  rL LLVM

CHANGES SINCE LAST ACTION
  https://reviews.llvm.org/D55483/new/

https://reviews.llvm.org/D55483

Files:
  cfe/trunk/include/clang/AST/ASTContext.h
  cfe/trunk/include/clang/Basic/Attr.td
  cfe/trunk/include/clang/Basic/AttrDocs.td
  cfe/trunk/include/clang/Basic/Builtins.def
  cfe/trunk/include/clang/Basic/Builtins.h
  cfe/trunk/include/clang/Basic/DiagnosticSemaKinds.td
  cfe/trunk/lib/AST/ASTContext.cpp
  cfe/trunk/lib/Basic/Builtins.cpp
  cfe/trunk/lib/CodeGen/CGOpenMPRuntime.cpp
  cfe/trunk/lib/CodeGen/CodeGenModule.cpp
  cfe/trunk/lib/Parse/ParseDecl.cpp
  cfe/trunk/lib/Sema/SemaDecl.cpp
  cfe/trunk/lib/Sema/SemaDeclAttr.cpp
  cfe/trunk/test/Analysis/retain-release.m
  cfe/trunk/test/CodeGen/attr-callback.c
  cfe/trunk/test/CodeGen/callback_annotated.c
  cfe/trunk/test/CodeGen/callback_openmp.c
  cfe/trunk/test/CodeGen/callback_pthread_create.c
  cfe/trunk/test/CodeGenCXX/attr-callback.cpp
  cfe/trunk/test/Misc/pragma-attribute-supported-attributes-list.test
  cfe/trunk/test/OpenMP/parallel_codegen.cpp
  cfe/trunk/test/Sema/attr-callback-broken.c
  cfe/trunk/test/Sema/attr-callback.c
  cfe/trunk/test/SemaCXX/attr-callback-broken.cpp
  cfe/trunk/test/SemaCXX/attr-callback.cpp
  cfe/trunk/utils/TableGen/ClangAttrEmitter.cpp

Index: cfe/trunk/utils/TableGen/ClangAttrEmitter.cpp
===
--- cfe/trunk/utils/TableGen/ClangAttrEmitter.cpp
+++ cfe/trunk/utils/TableGen/ClangAttrEmitter.cpp
@@ -776,6 +776,11 @@
 }
   };
 
+  struct VariadicParamOrParamIdxArgument : public VariadicArgument {
+VariadicParamOrParamIdxArgument(const Record &Arg, StringRef Attr)
+: VariadicArgument(Arg, Attr, "int") {}
+  };
+
   // Unique the enums, but maintain the original declaration ordering.
   std::vector
   uniqueEnumsInOrder(const std::vector &enums) {
@@ -1284,6 +1289,8 @@
 Ptr = llvm::make_unique(Arg, Attr);
   else if (ArgName == "VariadicParamIdxArgument")
 Ptr = llvm::make_unique(Arg, Attr);
+  else if (ArgName == "VariadicParamOrParamIdxArgument")
+Ptr = llvm::make_unique(Arg, Attr);
   else if (ArgName == "ParamIdxArgument")
 Ptr = llvm::make_unique(Arg, Attr, "ParamIdx");
   else if (ArgName == "VariadicIdentifierArgument")
@@ -2117,6 +2124,7 @@
  llvm::StringSwitch(
  Arg->getSuperClasses().back().first->getName())
  .Case("VariadicIdentifierArgument", true)
+ .Case("VariadicParamOrParamIdxArgument", true)
  .Default(false);
 }
 
@@ -2159,6 +2167,34 @@
   OS << "#endif // CLANG_ATTR_IDENTIFIER_ARG_LIST\n\n";
 }
 
+static bool keywordThisIsaIdentifierInArgument(const Record *Arg) {
+  return !Arg->getSuperClasses().empty() &&
+ llvm::StringSwitch(
+ Arg->getSuperClasses().back().first->getName())
+ .Case("VariadicParamOrParamIdxArgument", true)
+ .Default(false);
+}
+
+static void emitClangAttrThisIsaIdentifierArgList(RecordKeeper &Records,
+  raw_ostream &OS) {
+  OS << "#if defined(CLANG_ATTR_THIS_ISA_IDENTIFIER_ARG_LIST)\n";
+  std::vector Attrs = Records.getAllDerivedDefinitions("Attr");
+  for (const auto *A : Attrs) {
+// Determine whether the first argument is a variadic identifier.
+std::vector Args = A->getValueAsListOfDefs("Args");
+if (Args.empty() || !keywordThisIsaIdentifierInArgument(Args[0]))
+  continue;
+
+// All these spellings take an identifier argument.
+forEachUniqueSpelling(*A, [&](const FlattenedSpelling &S) {
+  OS << ".Case(\"" << S.name() << "\", "
+ << "true"
+ << ")\n";
+});
+  }
+  OS << "#endif // CLANG_ATTR_THIS_ISA_IDENTIFIER_ARG_LIST\n\n";
+}
+
 namespace clang {
 
 // Emits the class definitions for attributes.
@@ -3767,6 +3803,7 @@
   emitClangAttrArgContextList(Records, OS);
   emitClangAttrIdentifierArgList(Records, OS);
   emitClangAttrVariadicIdentifierArgList(Records, OS);
+  emitClangAttrThisIsaIdentifierArgList(Records, OS);
   emitClangAttrTypeArgList(Records, OS);
   emitClangAttrLateParsedList(Records, OS);
 }
Index: cfe/trunk/include/clang/AST/ASTContext.h
===
--- cfe/trunk/include/clang/AST/ASTContext.h
+++ cfe/trunk/include/clang/AST/ASTContext.h
@@ -2003,6 +2003,9 @@
 /// No error
 GE_None,
 
+/// Missing a type
+GE_Missing_type,
+
 /// Missing a type from 
 GE_Missing_stdio,
 
Index: cfe/trunk/include/clang/Basic/AttrDocs.td
===
--- cfe/trunk/include/clang/Basic/AttrDocs.td
+++ cfe/trunk/include/clang/Basic/AttrDocs.td
@@ -3781

[PATCH] D55483: Introduce the callback attribute and emit !callback metadata

2018-12-08 Thread Johannes Doerfert via Phabricator via cfe-commits
jdoerfert created this revision.
jdoerfert added reviewers: hfinkel, reames, fhahn, ABataev.
Herald added subscribers: jfb, bollu.

  With commit rX (currently https://reviews.llvm.org/D54498), LLVM
  gained the ability to apply existing optimizations on indirections
  through callbacks. This is based on an abstraction that hides the
  middle man as described in rX and the llvm::AbstractCallSite class.
  
  This commit enables clang to emit !callback metadata that is
  understood by LLVM. It does so in three different cases:
1) For known broker functions declarations that are directly
   emitted, e.g., __kmpc_fork_call for the OpenMP pragma parallel.
2) For known broker functions that are identified by their name and
   source location through the builtin mechanism, e.g.,
   pthread_create from the POSIX thread API.
3) For user annotated functions that carry the "callback(idx, ...)"
   attribute. The attribute has to include the index of the callback
   callee and how the passed arguments can be identified (as many as
   the callback callee has).
  
  For additional information, also consider the commit message and
  discussion for the LLVM patch: https://reviews.llvm.org/D54498

NOTE: This is only committed after https://reviews.llvm.org/D54498 and
the commit message will be modified accordingly.


Repository:
  rC Clang

https://reviews.llvm.org/D55483

Files:
  include/clang/Basic/Attr.td
  include/clang/Basic/Builtins.def
  include/clang/Basic/Builtins.h
  include/clang/Basic/DiagnosticSemaKinds.td
  lib/Basic/Builtins.cpp
  lib/CodeGen/CGOpenMPRuntime.cpp
  lib/CodeGen/CodeGenModule.cpp
  lib/Sema/SemaDecl.cpp
  lib/Sema/SemaDeclAttr.cpp
  test/CodeGen/callback_annotated.c
  test/CodeGen/callback_openmp.c
  test/CodeGen/callback_pthread_create.c

Index: test/CodeGen/callback_pthread_create.c
===
--- /dev/null
+++ test/CodeGen/callback_pthread_create.c
@@ -0,0 +1,31 @@
+// RUN: %clang -O1 %s -S -c -emit-llvm -o - | FileCheck %s
+// RUN: %clang -O1 %s -S -c -emit-llvm -o - | opt -ipconstprop -S | FileCheck --check-prefix=IPCP %s
+
+// CHECK: declare !callback ![[cid:[0-9]+]] dso_local i32 @pthread_create
+// CHECK: ![[cid]] = !{i1 false, i64 3, i64 4}
+
+#include 
+
+const int GlobalVar = 0;
+
+static void *callee0(void *payload) {
+// IPCP:  define internal i8* @callee0
+// IPCP-NEXT:   entry:
+// IPCP-NEXT: ret i8* null
+  return payload;
+}
+
+static void *callee1(void *payload) {
+// IPCP:  define internal i8* @callee1
+// IPCP-NEXT:   entry:
+// IPCP-NEXT: ret i8* bitcast (i32* @GlobalVar to i8*)
+  return payload;
+}
+
+void foo() {
+  pthread_t MyFirstThread;
+  pthread_create(&MyFirstThread, NULL, callee0, NULL);
+
+  pthread_t MySecondThread;
+  pthread_create(&MySecondThread, NULL, callee1, (void *)&GlobalVar);
+}
Index: test/CodeGen/callback_openmp.c
===
--- /dev/null
+++ test/CodeGen/callback_openmp.c
@@ -0,0 +1,27 @@
+// RUN: %clang_cc1 -triple i386-unknown-unknown -fopenmp -O1 %s -emit-llvm -o - | FileCheck %s
+// RUN: %clang_cc1 -triple i386-unknown-unknown -fopenmp -O1 %s -emit-llvm -o - | opt -ipconstprop -S | FileCheck --check-prefix=IPCP %s
+
+// CHECK: declare !callback ![[cid:[0-9]+]] void @__kmpc_fork_call
+// CHECK: declare !callback ![[cid]] void @__kmpc_fork_teams
+// CHECK: ![[cid]] = !{i1 true, i64 3, i64 0, i64 0}
+
+void work1(int, int);
+void work2(int, int);
+void work12(int, int);
+
+void foo(int q) {
+  int p = 2;
+
+  #pragma omp parallel firstprivate(q, p)
+  work1(p, q);
+// IPCP: call void @work1(i32 2, i32 %{{[._a-zA-Z0-9]*}})
+
+  #pragma omp parallel for firstprivate(p, q)
+  for (int i = 0; i < q; i++)
+work2(i, p);
+// IPCP: call void @work2(i32 %{{[._a-zA-Z0-9]*}}, i32 2)
+
+  #pragma omp target teams firstprivate(p)
+  work12(p, p);
+// IPCP: call void @work12(i32 2, i32 2)
+}
Index: test/CodeGen/callback_annotated.c
===
--- /dev/null
+++ test/CodeGen/callback_annotated.c
@@ -0,0 +1,70 @@
+// RUN: %clang_cc1 -triple i386-unknown-unknown -fopenmp -O1 %s -emit-llvm -o - | FileCheck %s --check-prefix=RUN1
+// RUN: %clang_cc1 -triple i386-unknown-unknown -fopenmp -O1 %s -emit-llvm -o - | FileCheck %s --check-prefix=RUN2
+// RUN: %clang_cc1 -triple i386-unknown-unknown -fopenmp -O1 %s -emit-llvm -o - | opt -ipconstprop -S | FileCheck --check-prefix=IPCP %s
+
+// RUN1-DAG: @broker0({{[^#]*#[0-9]+}} !callback ![[cid0:[0-9]+]]
+__attribute__((callback (1, 2)))
+void* broker0(void* (*callee)(void *), void *payload) {
+  return callee(payload);
+}
+
+// RUN1-DAG: @broker1({{[^#]*#[0-9]+}} !callback ![[cid1:[0-9]+]]
+__attribute__((callback (2, 1)))
+void* broker1(void *payload, void* (*callee)(void *)) {
+  return broker0(callee, payload);
+}
+
+// RUN1-DAG: declare !callback ![[cid2:[0-9]+]] i8* @broker2
+__attribute_

[PATCH] D55483: Introduce the callback attribute and emit !callback metadata

2018-12-10 Thread Johannes Doerfert via Phabricator via cfe-commits
jdoerfert marked 2 inline comments as done.
jdoerfert added a comment.

In D55483#1326029 , @aaron.ballman 
wrote:

> This is missing all of the Sema and SemaCXX tests. Should have tests for 
> member functions, variadic functions, incorrect arguments, incorrect 
> subjects, etc.


I will write more tests and update this revision.




Comment at: include/clang/Basic/Attr.td:1204
+  VariadicUnsignedArgument<"PayloadIndices">];
+  let Subjects = SubjectList<[Function]>;
+  let Documentation = [Undocumented];

aaron.ballman wrote:
> Should this also apply to Objective-C methods?
> 
> Why should the user specify this attribute on the function as opposed to on 
> the parameter? e.g.,
> ```
> // Why this:
> __attribute__((callback (1, 2, 3)))
> void* broker0(void* (*callee)(void *), void *payload, int otherPayload) {
>   return callee(payload);
> }
> 
> // Instead of this:
> void* broker0(void* (*callee)(void *) __attribute__((callback (2, 3))), void 
> *payload, int otherPayload) {
>   return callee(payload);
> }
> 
> // Or this:
> void* broker0(void* (*callee)(void *) __attribute__((callback (payload, 
> otherPayload))), void *payload, int otherPayload) {
>   return callee(payload);
> }
> ```
> I ask because these "use an index" attributes are really hard for users to 
> use in practice. They have to account for 0-vs-1 based indexing, implicit 
> this parameters, etc and if we can avoid that, it may be worth the effort.
> Should this also apply to Objective-C methods?

I don't need it to and unless somebody does, I'd say no.


> I ask because these "use an index" attributes are really hard for users to 
> use in practice. They have to account for 0-vs-1 based indexing, implicit 
> this parameters, etc and if we can avoid that, it may be worth the effort.

I was thinking that the function notation makes it clear that there is *only 
one callback per function* allowed right now. I don't expect many manual users 
of this feature until we improve the middle-end support, so it is unclear to me 
if this requirement needs to be removed as well.

Other than that, some thoughts: 
- I do not feel strongly about this.
- The middle requirement seems not much better n the first, we would still need 
to deal with index numbers (callbacks without arguments are not really 
interesting for now). 
- The last encoding requires us to define a symbol for "unknown argument" 
(maybe _ or ?).



Comment at: include/clang/Basic/Attr.td:1205
+  let Subjects = SubjectList<[Function]>;
+  let Documentation = [Undocumented];
+}

aaron.ballman wrote:
> No new undocumented attributes, please.
Ok, I can write documentation similar to the commit message and the lang-ref 
documentation for the callback metadata.


Repository:
  rC Clang

CHANGES SINCE LAST ACTION
  https://reviews.llvm.org/D55483/new/

https://reviews.llvm.org/D55483



___
cfe-commits mailing list
cfe-commits@lists.llvm.org
http://lists.llvm.org/cgi-bin/mailman/listinfo/cfe-commits


[PATCH] D55483: Introduce the callback attribute and emit !callback metadata

2018-12-10 Thread Johannes Doerfert via Phabricator via cfe-commits
jdoerfert updated this revision to Diff 177662.
jdoerfert added a comment.

Update according to initial feedback


Repository:
  rC Clang

CHANGES SINCE LAST ACTION
  https://reviews.llvm.org/D55483/new/

https://reviews.llvm.org/D55483

Files:
  include/clang/AST/ASTContext.h
  include/clang/Basic/Attr.td
  include/clang/Basic/AttrDocs.td
  include/clang/Basic/Builtins.def
  include/clang/Basic/Builtins.h
  include/clang/Basic/DiagnosticSemaKinds.td
  lib/AST/ASTContext.cpp
  lib/Basic/Builtins.cpp
  lib/CodeGen/CGOpenMPRuntime.cpp
  lib/CodeGen/CodeGenModule.cpp
  lib/Sema/SemaDecl.cpp
  lib/Sema/SemaDeclAttr.cpp
  test/CodeGen/attr-callback.c
  test/CodeGen/callback_annotated.c
  test/CodeGen/callback_openmp.c
  test/CodeGen/callback_pthread_create.c
  test/CodeGen/callback_qsort_r.c
  test/CodeGenCXX/attr-callback.cpp
  test/Misc/pragma-attribute-supported-attributes-list.test
  test/OpenMP/parallel_codegen.cpp
  test/Sema/attr-callback-broken.c
  test/Sema/attr-callback.c
  test/SemaCXX/attr-callback-broken.cpp
  test/SemaCXX/attr-callback.cpp
  utils/TableGen/ClangAttrEmitter.cpp

Index: utils/TableGen/ClangAttrEmitter.cpp
===
--- utils/TableGen/ClangAttrEmitter.cpp
+++ utils/TableGen/ClangAttrEmitter.cpp
@@ -1275,6 +1275,8 @@
 Ptr = llvm::make_unique(Arg, Attr, "unsigned");
   else if (ArgName == "VariadicUnsignedArgument")
 Ptr = llvm::make_unique(Arg, Attr, "unsigned");
+  else if (ArgName == "VariadicSignedArgument")
+Ptr = llvm::make_unique(Arg, Attr, "int");
   else if (ArgName == "VariadicStringArgument")
 Ptr = llvm::make_unique(Arg, Attr);
   else if (ArgName == "VariadicEnumArgument")
Index: test/SemaCXX/attr-callback.cpp
===
--- /dev/null
+++ test/SemaCXX/attr-callback.cpp
@@ -0,0 +1,67 @@
+// RUN: %clang_cc1 %s -verify -fsyntax-only
+
+// expected-no-diagnostics
+
+class C_in_class {
+#include "../Sema/attr-callback.c"
+};
+
+struct Base {
+
+  void no_args_1(void (*callback)(void));
+  __attribute__((callback(1))) void no_args_2(void (*callback)(void));
+  __attribute__((callback(1))) void no_args_3(void (*callback)(void)) {}
+
+  __attribute__((callback(1, 0))) virtual void
+  this_tr(void (*callback)(Base *));
+
+  __attribute__((callback(1, 0, -1, 0))) virtual void
+  this_unknown_this(void (*callback)(Base *, Base *, Base *));
+
+  __attribute__((callback(1))) virtual void
+  virtual_1(void (*callback)(void));
+
+  __attribute__((callback(1))) virtual void
+  virtual_2(void (*callback)(void));
+
+  __attribute__((callback(1))) virtual void
+  virtual_3(void (*callback)(void));
+};
+
+__attribute__((callback(1))) void
+Base::no_args_1(void (*callback)(void)) {
+}
+
+void Base::no_args_2(void (*callback)(void)) {
+}
+
+struct Derived_1 : public Base {
+
+  __attribute__((callback(1, 0))) virtual void
+  this_tr(void (*callback)(Base *)) override;
+
+  __attribute__((callback(1))) virtual void
+  virtual_1(void (*callback)(void)) override {}
+
+  virtual void
+  virtual_3(void (*callback)(void)) override {}
+};
+
+struct Derived_2 : public Base {
+
+  __attribute__((callback(1))) virtual void
+  virtual_1(void (*callback)(void)) override;
+
+  virtual void
+  virtual_2(void (*callback)(void)) override;
+
+  virtual void
+  virtual_3(void (*callback)(void)) override;
+};
+
+void Derived_2::virtual_1(void (*callback)(void)) {}
+
+__attribute__((callback(1))) void
+Derived_2::virtual_2(void (*callback)(void)) {}
+
+void Derived_2::virtual_3(void (*callback)(void)) {}
Index: test/SemaCXX/attr-callback-broken.cpp
===
--- /dev/null
+++ test/SemaCXX/attr-callback-broken.cpp
@@ -0,0 +1,7 @@
+// RUN: %clang_cc1 %s -verify -fsyntax-only
+
+class C_in_class {
+#define HAS_THIS
+#include "../Sema/attr-callback-broken.c"
+#undef HAS_THIS
+};
Index: test/Sema/attr-callback.c
===
--- /dev/null
+++ test/Sema/attr-callback.c
@@ -0,0 +1,8 @@
+// RUN: %clang_cc1 %s -verify -fsyntax-only
+
+// expected-no-diagnostics
+
+__attribute__((callback(1))) void no_args(void (*callback)(void));
+__attribute__((callback(1, 2, 3)))   void args_1(void (*callback)(int, double), int a, double b);
+__attribute__((callback(2, 3, 3)))   void args_2(int a, void (*callback)(double, double), double b);
+__attribute__((callback(2, -1, -1))) void args_3(int a, void (*callback)(double, double), double b);
Index: test/Sema/attr-callback-broken.c
===
--- /dev/null
+++ test/Sema/attr-callback-broken.c
@@ -0,0 +1,38 @@
+// RUN: %clang_cc1 %s -verify -fsyntax-only
+
+__attribute__((callback())) void no_callee(void (*callback)(void)); // expected-error {{'callback' attribute takes at least 1 argument}}
+
+__attribute__((callback(1, 1)))void too_many_args_1(void   (*callback)(void)) {}   // exp

[PATCH] D55483: Introduce the callback attribute and emit !callback metadata

2018-12-11 Thread Johannes Doerfert via Phabricator via cfe-commits
jdoerfert updated this revision to Diff 11.
jdoerfert marked 11 inline comments as done.
jdoerfert added a comment.

Fix and improve documentation


Repository:
  rC Clang

CHANGES SINCE LAST ACTION
  https://reviews.llvm.org/D55483/new/

https://reviews.llvm.org/D55483

Files:
  include/clang/AST/ASTContext.h
  include/clang/Basic/Attr.td
  include/clang/Basic/AttrDocs.td
  include/clang/Basic/Builtins.def
  include/clang/Basic/Builtins.h
  include/clang/Basic/DiagnosticSemaKinds.td
  lib/AST/ASTContext.cpp
  lib/Basic/Builtins.cpp
  lib/CodeGen/CGOpenMPRuntime.cpp
  lib/CodeGen/CodeGenModule.cpp
  lib/Sema/SemaDecl.cpp
  lib/Sema/SemaDeclAttr.cpp
  test/Analysis/retain-release.m
  test/CodeGen/attr-callback.c
  test/CodeGen/callback_annotated.c
  test/CodeGen/callback_openmp.c
  test/CodeGen/callback_pthread_create.c
  test/CodeGen/callback_qsort_r.c
  test/CodeGenCXX/attr-callback.cpp
  test/Misc/pragma-attribute-supported-attributes-list.test
  test/OpenMP/parallel_codegen.cpp
  test/Sema/attr-callback-broken.c
  test/Sema/attr-callback.c
  test/SemaCXX/attr-callback-broken.cpp
  test/SemaCXX/attr-callback.cpp
  utils/TableGen/ClangAttrEmitter.cpp

Index: utils/TableGen/ClangAttrEmitter.cpp
===
--- utils/TableGen/ClangAttrEmitter.cpp
+++ utils/TableGen/ClangAttrEmitter.cpp
@@ -1275,6 +1275,8 @@
 Ptr = llvm::make_unique(Arg, Attr, "unsigned");
   else if (ArgName == "VariadicUnsignedArgument")
 Ptr = llvm::make_unique(Arg, Attr, "unsigned");
+  else if (ArgName == "VariadicSignedArgument")
+Ptr = llvm::make_unique(Arg, Attr, "int");
   else if (ArgName == "VariadicStringArgument")
 Ptr = llvm::make_unique(Arg, Attr);
   else if (ArgName == "VariadicEnumArgument")
Index: test/SemaCXX/attr-callback.cpp
===
--- /dev/null
+++ test/SemaCXX/attr-callback.cpp
@@ -0,0 +1,67 @@
+// RUN: %clang_cc1 %s -verify -fsyntax-only
+
+// expected-no-diagnostics
+
+class C_in_class {
+#include "../Sema/attr-callback.c"
+};
+
+struct Base {
+
+  void no_args_1(void (*callback)(void));
+  __attribute__((callback(1))) void no_args_2(void (*callback)(void));
+  __attribute__((callback(1))) void no_args_3(void (*callback)(void)) {}
+
+  __attribute__((callback(1, 0))) virtual void
+  this_tr(void (*callback)(Base *));
+
+  __attribute__((callback(1, 0, -1, 0))) virtual void
+  this_unknown_this(void (*callback)(Base *, Base *, Base *));
+
+  __attribute__((callback(1))) virtual void
+  virtual_1(void (*callback)(void));
+
+  __attribute__((callback(1))) virtual void
+  virtual_2(void (*callback)(void));
+
+  __attribute__((callback(1))) virtual void
+  virtual_3(void (*callback)(void));
+};
+
+__attribute__((callback(1))) void
+Base::no_args_1(void (*callback)(void)) {
+}
+
+void Base::no_args_2(void (*callback)(void)) {
+}
+
+struct Derived_1 : public Base {
+
+  __attribute__((callback(1, 0))) virtual void
+  this_tr(void (*callback)(Base *)) override;
+
+  __attribute__((callback(1))) virtual void
+  virtual_1(void (*callback)(void)) override {}
+
+  virtual void
+  virtual_3(void (*callback)(void)) override {}
+};
+
+struct Derived_2 : public Base {
+
+  __attribute__((callback(1))) virtual void
+  virtual_1(void (*callback)(void)) override;
+
+  virtual void
+  virtual_2(void (*callback)(void)) override;
+
+  virtual void
+  virtual_3(void (*callback)(void)) override;
+};
+
+void Derived_2::virtual_1(void (*callback)(void)) {}
+
+__attribute__((callback(1))) void
+Derived_2::virtual_2(void (*callback)(void)) {}
+
+void Derived_2::virtual_3(void (*callback)(void)) {}
Index: test/SemaCXX/attr-callback-broken.cpp
===
--- /dev/null
+++ test/SemaCXX/attr-callback-broken.cpp
@@ -0,0 +1,7 @@
+// RUN: %clang_cc1 %s -verify -fsyntax-only
+
+class C_in_class {
+#define HAS_THIS
+#include "../Sema/attr-callback-broken.c"
+#undef HAS_THIS
+};
Index: test/Sema/attr-callback.c
===
--- /dev/null
+++ test/Sema/attr-callback.c
@@ -0,0 +1,8 @@
+// RUN: %clang_cc1 %s -verify -fsyntax-only
+
+// expected-no-diagnostics
+
+__attribute__((callback(1))) void no_args(void (*callback)(void));
+__attribute__((callback(1, 2, 3)))   void args_1(void (*callback)(int, double), int a, double b);
+__attribute__((callback(2, 3, 3)))   void args_2(int a, void (*callback)(double, double), double b);
+__attribute__((callback(2, -1, -1))) void args_3(int a, void (*callback)(double, double), double b);
Index: test/Sema/attr-callback-broken.c
===
--- /dev/null
+++ test/Sema/attr-callback-broken.c
@@ -0,0 +1,38 @@
+// RUN: %clang_cc1 %s -verify -fsyntax-only
+
+__attribute__((callback())) void no_callee(void (*callback)(void)); // expected-error {{'callback' attribute takes at least 1 argument}}
+
+__attribute__((callbac

[PATCH] D55483: Introduce the callback attribute and emit !callback metadata

2018-12-11 Thread Johannes Doerfert via Phabricator via cfe-commits
jdoerfert added a comment.

Thanks for the feedback.

Once we decided on the style of the annotation, I will implement that and 
change the tests/documentation accordingly.




Comment at: include/clang/Basic/Attr.td:1204
+  VariadicUnsignedArgument<"PayloadIndices">];
+  let Subjects = SubjectList<[Function]>;
+  let Documentation = [Undocumented];

aaron.ballman wrote:
> jdoerfert wrote:
> > aaron.ballman wrote:
> > > Should this also apply to Objective-C methods?
> > > 
> > > Why should the user specify this attribute on the function as opposed to 
> > > on the parameter? e.g.,
> > > ```
> > > // Why this:
> > > __attribute__((callback (1, 2, 3)))
> > > void* broker0(void* (*callee)(void *), void *payload, int otherPayload) {
> > >   return callee(payload);
> > > }
> > > 
> > > // Instead of this:
> > > void* broker0(void* (*callee)(void *) __attribute__((callback (2, 3))), 
> > > void *payload, int otherPayload) {
> > >   return callee(payload);
> > > }
> > > 
> > > // Or this:
> > > void* broker0(void* (*callee)(void *) __attribute__((callback (payload, 
> > > otherPayload))), void *payload, int otherPayload) {
> > >   return callee(payload);
> > > }
> > > ```
> > > I ask because these "use an index" attributes are really hard for users 
> > > to use in practice. They have to account for 0-vs-1 based indexing, 
> > > implicit this parameters, etc and if we can avoid that, it may be worth 
> > > the effort.
> > > Should this also apply to Objective-C methods?
> > 
> > I don't need it to and unless somebody does, I'd say no.
> > 
> > 
> > > I ask because these "use an index" attributes are really hard for users 
> > > to use in practice. They have to account for 0-vs-1 based indexing, 
> > > implicit this parameters, etc and if we can avoid that, it may be worth 
> > > the effort.
> > 
> > I was thinking that the function notation makes it clear that there is 
> > *only one callback per function* allowed right now. I don't expect many 
> > manual users of this feature until we improve the middle-end support, so it 
> > is unclear to me if this requirement needs to be removed as well.
> > 
> > Other than that, some thoughts: 
> > - I do not feel strongly about this.
> > - The middle requirement seems not much better n the first, we would still 
> > need to deal with index numbers (callbacks without arguments are not really 
> > interesting for now). 
> > - The last encoding requires us to define a symbol for "unknown argument" 
> > (maybe _ or ?).
> > I was thinking that the function notation makes it clear that there is 
> > *only one callback per function* allowed right now.
> 
> I don't see how that follows. Users may still try writing:
> ```
> __attribute__((callback (1, 3, 4)))
> __attribute__((callback (2, 3, 4)))
> void broker0(void (*cb1)(void *, int), void (*cb2)(void *, int), void 
> *payload, int otherPayload) {
>   cb1(payload, otherPayload);
>   cb2(payload, otherPayload);
> }
> ```
> and reasonably expect that to work (we should have this as a test case, and 
> probably warn on it).
> 
> I'm not strongly opposed to the current way this is exposed to users, just 
> wondering if we can find a better way to surface the feature.
> 
> > The last encoding requires us to define a symbol for "unknown argument" 
> > (maybe _ or ?).
> 
> Ah, I wasn't aware that this was part of the feature, but the documentation 
> you wrote helped to clarify for me. Personal preference is for `?`, but any 
> symbol will do (so long as we aren't hoping users can count commas, e.g., 
> `callback(frobble,,,foo)`).
> and reasonably expect that to work (we should have this as a test case, and 
> probably warn on it).

We have a test case and we'll spit out an error. (Sema/attr-callback-broken.c 
line 21 & 22)


> I'm not strongly opposed to the current way this is exposed to users, just 
> wondering if we can find a better way to surface the feature.

I can change it to the inlined style if nobody disagrees:

```
   void broker(int foo, void (*callback)(int, int, int, int) 
__attribute__((callback(foo, ?, bar, ?))), int bar);

```

As I mentioned, I don't have a strong opinion on this but I just don't want to 
change it back and forth :)



Repository:
  rC Clang

CHANGES SINCE LAST ACTION
  https://reviews.llvm.org/D55483/new/

https://reviews.llvm.org/D55483



___
cfe-commits mailing list
cfe-commits@lists.llvm.org
http://lists.llvm.org/cgi-bin/mailman/listinfo/cfe-commits


[PATCH] D58091: Customize warnings for missing built-in type

2019-02-26 Thread Johannes Doerfert via Phabricator via cfe-commits
jdoerfert updated this revision to Diff 188411.
jdoerfert marked an inline comment as done.
jdoerfert added a comment.

Fix typo


Repository:
  rG LLVM Github Monorepo

CHANGES SINCE LAST ACTION
  https://reviews.llvm.org/D58091/new/

https://reviews.llvm.org/D58091

Files:
  clang/include/clang/Basic/DiagnosticSemaKinds.td
  clang/lib/Sema/SemaDecl.cpp
  clang/test/Analysis/retain-release.m
  clang/test/Sema/builtin-setjmp.c
  clang/test/Sema/implicit-builtin-decl.c

Index: clang/test/Sema/implicit-builtin-decl.c
===
--- clang/test/Sema/implicit-builtin-decl.c
+++ clang/test/Sema/implicit-builtin-decl.c
@@ -55,14 +55,17 @@
 
 void snprintf() { }
 
-// PR8316
-void longjmp(); // expected-warning{{declaration of built-in function 'longjmp' requires inclusion of the header }}
+// PR8316 & PR40692
+void longjmp(); // expected-warning{{declaration of built-in function 'longjmp' requires the definition of the 'jmp_buf' type, commonly proived in the header .}}
 
 extern float fmaxf(float, float);
 
 struct __jmp_buf_tag {};
-void sigsetjmp(struct __jmp_buf_tag[1], int); // expected-warning{{declaration of built-in function 'sigsetjmp' requires inclusion of the header }}
+void sigsetjmp(struct __jmp_buf_tag[1], int); // expected-warning{{declaration of built-in function 'sigsetjmp' requires the declaration of the 'jmp_buf' type, commonly proived in the header .}}
 
 // CHECK: FunctionDecl {{.*}}  col:6 sigsetjmp '
 // CHECK-NOT: FunctionDecl
 // CHECK: ReturnsTwiceAttr {{.*}} <{{.*}}> Implicit
+
+// PR40692
+void pthread_create(); // no warning expected
Index: clang/test/Sema/builtin-setjmp.c
===
--- /dev/null
+++ clang/test/Sema/builtin-setjmp.c
@@ -0,0 +1,10 @@
+// RUN: %clang_cc1 -triple x86_64-unknown-unknown -fsyntax-only -verify -DNO_JMP_BUF %s
+// RUN: %clang_cc1 -triple x86_64-unknown-unknown -fsyntax-only -verify %s
+
+#ifdef NO_JMP_BUF
+extern long setjmp(long *);   // expected-warning {{declaration of built-in function 'setjmp' requires the declaration of the 'jmp_buf' type, commonly proived in the header .}}
+#else
+typedef long jmp_buf;
+extern int setjmp(char);  // expected-warning@8 {{incompatible redeclaration of library function 'setjmp'}}
+  // expected-note@8{{'setjmp' is a builtin with type 'int (jmp_buf)' (aka 'int (long)')}}
+#endif
Index: clang/test/Analysis/retain-release.m
===
--- clang/test/Analysis/retain-release.m
+++ clang/test/Analysis/retain-release.m
@@ -2,7 +2,7 @@
 // RUN: %clang_analyze_cc1 -triple x86_64-apple-darwin10\
 // RUN: -analyzer-checker=core,osx.coreFoundation.CFRetainRelease\
 // RUN: -analyzer-checker=osx.cocoa.ClassRelease,osx.cocoa.RetainCount\
-// RUN: -analyzer-checker=debug.ExprInspection -fblocks -verify=expected,C %s\
+// RUN: -analyzer-checker=debug.ExprInspection -fblocks -verify %s\
 // RUN: -Wno-objc-root-class -analyzer-output=plist -o %t.objc.plist
 // RUN: %clang_analyze_cc1 -triple x86_64-apple-darwin10\
 // RUN: -analyzer-checker=core,osx.coreFoundation.CFRetainRelease\
@@ -1231,7 +1231,7 @@
 typedef unsigned long __darwin_pthread_key_t;
 typedef __darwin_pthread_key_t pthread_key_t;
 
-int pthread_create(pthread_t *, const pthread_attr_t *,  // C-warning{{declaration of built-in function 'pthread_create' requires inclusion of the header }}
+int pthread_create(pthread_t *, const pthread_attr_t *,
void *(*)(void *), void *);
 
 int pthread_setspecific(pthread_key_t key, const void *value);
Index: clang/lib/Sema/SemaDecl.cpp
===
--- clang/lib/Sema/SemaDecl.cpp
+++ clang/lib/Sema/SemaDecl.cpp
@@ -1955,10 +1955,27 @@
   ASTContext::GetBuiltinTypeError Error;
   QualType R = Context.GetBuiltinType(ID, Error);
   if (Error) {
-if (ForRedeclaration)
-  Diag(Loc, diag::warn_implicit_decl_requires_sysheader)
-  << getHeaderName(Context.BuiltinInfo, ID, Error)
+if (!ForRedeclaration)
+  return nullptr;
+
+// If we have a builtin without an associated type we should not emit a
+// warning when we were not able to find a type for it.
+if (Error == ASTContext::GE_Missing_type)
+  return nullptr;
+
+// If we could not find a type for setjmp it is because the jmp_buf type was
+// not defined prior to the setjmp declaration.
+if (Error == ASTContext::GE_Missing_setjmp) {
+  Diag(Loc, diag::warn_implicit_decl_no_jmp_buf)
   << Context.BuiltinInfo.getName(ID);
+  return nullptr;
+}
+
+// Generally, we emit a warning that the declaration requires the
+// appropriate header.
+Diag(Loc, diag::warn_implicit_decl_requires_sysheader)
+<< getHeaderName(Context.BuiltinInfo, ID, Error)
+<< Context.BuiltinInfo.getName(ID);
 return nullpt

[PATCH] D57460: [WIP] Prototype a generic kernel offloading interface

2019-02-26 Thread Johannes Doerfert via Phabricator via cfe-commits
jdoerfert updated this revision to Diff 188458.
jdoerfert added a comment.
Herald added projects: clang, OpenMP, LLVM.
Herald added subscribers: llvm-commits, openmp-commits, cfe-commits.

Cleaned up clang part and interface. LLVM part still needs work


Repository:
  rG LLVM Github Monorepo

CHANGES SINCE LAST ACTION
  https://reviews.llvm.org/D57460/new/

https://reviews.llvm.org/D57460

Files:
  clang/lib/CodeGen/CGOpenMPRuntime.h
  clang/lib/CodeGen/CGOpenMPRuntimeNVPTX.cpp
  clang/lib/CodeGen/CGOpenMPRuntimeTRegion.cpp
  clang/lib/CodeGen/CGOpenMPRuntimeTRegion.h
  clang/lib/CodeGen/CMakeLists.txt
  clang/lib/CodeGen/CodeGenModule.cpp
  llvm/include/llvm/InitializePasses.h
  llvm/include/llvm/LinkAllPasses.h
  llvm/include/llvm/Support/SaveAndRestore.h
  llvm/include/llvm/Transforms/IPO.h
  llvm/lib/Transforms/IPO/CMakeLists.txt
  llvm/lib/Transforms/IPO/IPO.cpp
  llvm/lib/Transforms/IPO/OpenMPOpt.cpp
  llvm/lib/Transforms/IPO/PassManagerBuilder.cpp
  llvm/lib/Transforms/Scalar/SCCP.cpp
  llvm/test/Other/opt-O2-pipeline.ll
  llvm/test/Other/opt-O3-pipeline.ll
  llvm/test/Other/opt-Os-pipeline.ll
  openmp/libomptarget/cmake/Modules/LibomptargetNVPTXBitcodeLibrary.cmake
  openmp/libomptarget/deviceRTLs/common/target_region.h
  openmp/libomptarget/deviceRTLs/nvptx/CMakeLists.txt
  openmp/libomptarget/deviceRTLs/nvptx/src/omp_data.cu
  openmp/libomptarget/deviceRTLs/nvptx/src/omptarget-nvptx.h
  openmp/libomptarget/deviceRTLs/nvptx/src/target_region.cu

Index: openmp/libomptarget/deviceRTLs/nvptx/src/target_region.cu
===
--- /dev/null
+++ openmp/libomptarget/deviceRTLs/nvptx/src/target_region.cu
@@ -0,0 +1,197 @@
+//===-- target_region.cu  CUDA impl. of the target region interface -*-===//
+//
+// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
+//
+//===--===//
+//
+// This file contains the implementation of the common target region interface.
+//
+//===--===//
+
+// Include the native definitions first as certain defines might be needed in
+// the common interface definition below.
+#include "omptarget-nvptx.h"
+#include "interface.h"
+
+#include "../../common/target_region.h"
+
+/// The pointer used to share memory between team threads.
+extern __device__ __shared__ target_region_shared_buffer
+_target_region_shared_memory;
+
+EXTERN char *__kmpc_target_region_kernel_get_shared_memory() {
+  return _target_region_shared_memory.begin();
+}
+EXTERN char *__kmpc_target_region_kernel_get_private_memory() {
+  return _target_region_shared_memory.begin() +
+ _target_region_shared_memory.get_offset();
+}
+
+/// Simple generic state machine for worker threads.
+INLINE static void
+__kmpc_target_region_state_machine(bool IsOMPRuntimeInitialized) {
+
+  do {
+void *WorkFn = 0;
+
+// Wait for the signal that we have a new work function.
+__kmpc_barrier_simple_spmd(NULL, 0);
+
+// Retrieve the work function from the runtime.
+bool IsActive = __kmpc_kernel_parallel(&WorkFn, IsOMPRuntimeInitialized);
+
+// If there is nothing more to do, break out of the state machine by
+// returning to the caller.
+if (!WorkFn)
+  return;
+
+if (IsActive) {
+  char *SharedVars = __kmpc_target_region_kernel_get_shared_memory();
+  char *PrivateVars = __kmpc_target_region_kernel_get_private_memory();
+
+  ((ParallelWorkFnTy)WorkFn)(SharedVars, PrivateVars);
+
+  __kmpc_kernel_end_parallel();
+}
+
+__kmpc_barrier_simple_spmd(NULL, 0);
+
+  } while (true);
+}
+
+/// Filter threads into masters and workers. If \p UseStateMachine is true,
+/// required workers will enter a state machine through and be trapped there.
+/// Master and surplus worker threads will return from this function immediately
+/// while required workers will only return once there is no more work. The
+/// return value indicates if the thread is a master (1), a surplus worker (0),
+/// or a finished required worker released from the state machine (-1).
+INLINE static int8_t
+__kmpc_target_region_thread_filter(unsigned ThreadLimit, bool UseStateMachine,
+   bool IsOMPRuntimeInitialized) {
+
+  unsigned TId = GetThreadIdInBlock();
+  bool IsWorker = TId < ThreadLimit;
+
+  if (IsWorker) {
+if (UseStateMachine)
+  __kmpc_target_region_state_machine(IsOMPRuntimeInitialized);
+return -1;
+  }
+
+  return TId == GetMasterThreadID();
+}
+
+EXTERN int8_t __kmpc_target_region_kernel_init(bool UseSPMDMode,
+   bool UseStateMachine,
+   bool RequiresOMPRuntime,
+   bool RequiresD

[PATCH] D57460: [WIP] Prototype a generic kernel offloading interface

2019-02-26 Thread Johannes Doerfert via Phabricator via cfe-commits
jdoerfert added a comment.

I updated the patch with one that has a clean clang and runtime integration. It 
passes all but one OpenMP V&V test suite test (developed as part of ECP by U 
Del and others). The LLVM optimization gives us 30% speedup on the 
rodinia3.1/nw (Needleman Wunsch) benchmark (more details and a cleaned up LLVM 
part will follow in ~2 weeks time).

Please feel free to look through and comment on the interface (openmp/...) and 
clang part till then!


Repository:
  rG LLVM Github Monorepo

CHANGES SINCE LAST ACTION
  https://reviews.llvm.org/D57460/new/

https://reviews.llvm.org/D57460



___
cfe-commits mailing list
cfe-commits@lists.llvm.org
https://lists.llvm.org/cgi-bin/mailman/listinfo/cfe-commits


[PATCH] D57460: [WIP] Prototype a generic kernel offloading interface

2019-02-26 Thread Johannes Doerfert via Phabricator via cfe-commits
jdoerfert updated this revision to Diff 188460.
jdoerfert added a comment.

Remove leftover debug msgs


Repository:
  rG LLVM Github Monorepo

CHANGES SINCE LAST ACTION
  https://reviews.llvm.org/D57460/new/

https://reviews.llvm.org/D57460

Files:
  clang/lib/CodeGen/CGOpenMPRuntime.h
  clang/lib/CodeGen/CGOpenMPRuntimeTRegion.cpp
  clang/lib/CodeGen/CGOpenMPRuntimeTRegion.h
  clang/lib/CodeGen/CMakeLists.txt
  clang/lib/CodeGen/CodeGenModule.cpp
  llvm/include/llvm/InitializePasses.h
  llvm/include/llvm/LinkAllPasses.h
  llvm/include/llvm/Support/SaveAndRestore.h
  llvm/include/llvm/Transforms/IPO.h
  llvm/lib/Transforms/IPO/CMakeLists.txt
  llvm/lib/Transforms/IPO/IPO.cpp
  llvm/lib/Transforms/IPO/OpenMPOpt.cpp
  llvm/lib/Transforms/IPO/PassManagerBuilder.cpp
  llvm/lib/Transforms/Scalar/SCCP.cpp
  llvm/test/Other/opt-O2-pipeline.ll
  llvm/test/Other/opt-O3-pipeline.ll
  llvm/test/Other/opt-Os-pipeline.ll
  openmp/libomptarget/cmake/Modules/LibomptargetNVPTXBitcodeLibrary.cmake
  openmp/libomptarget/deviceRTLs/common/target_region.h
  openmp/libomptarget/deviceRTLs/nvptx/CMakeLists.txt
  openmp/libomptarget/deviceRTLs/nvptx/src/omp_data.cu
  openmp/libomptarget/deviceRTLs/nvptx/src/omptarget-nvptx.h
  openmp/libomptarget/deviceRTLs/nvptx/src/target_region.cu

Index: openmp/libomptarget/deviceRTLs/nvptx/src/target_region.cu
===
--- /dev/null
+++ openmp/libomptarget/deviceRTLs/nvptx/src/target_region.cu
@@ -0,0 +1,197 @@
+//===-- target_region.cu  CUDA impl. of the target region interface -*-===//
+//
+// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
+//
+//===--===//
+//
+// This file contains the implementation of the common target region interface.
+//
+//===--===//
+
+// Include the native definitions first as certain defines might be needed in
+// the common interface definition below.
+#include "omptarget-nvptx.h"
+#include "interface.h"
+
+#include "../../common/target_region.h"
+
+/// The pointer used to share memory between team threads.
+extern __device__ __shared__ target_region_shared_buffer
+_target_region_shared_memory;
+
+EXTERN char *__kmpc_target_region_kernel_get_shared_memory() {
+  return _target_region_shared_memory.begin();
+}
+EXTERN char *__kmpc_target_region_kernel_get_private_memory() {
+  return _target_region_shared_memory.begin() +
+ _target_region_shared_memory.get_offset();
+}
+
+/// Simple generic state machine for worker threads.
+INLINE static void
+__kmpc_target_region_state_machine(bool IsOMPRuntimeInitialized) {
+
+  do {
+void *WorkFn = 0;
+
+// Wait for the signal that we have a new work function.
+__kmpc_barrier_simple_spmd(NULL, 0);
+
+// Retrieve the work function from the runtime.
+bool IsActive = __kmpc_kernel_parallel(&WorkFn, IsOMPRuntimeInitialized);
+
+// If there is nothing more to do, break out of the state machine by
+// returning to the caller.
+if (!WorkFn)
+  return;
+
+if (IsActive) {
+  char *SharedVars = __kmpc_target_region_kernel_get_shared_memory();
+  char *PrivateVars = __kmpc_target_region_kernel_get_private_memory();
+
+  ((ParallelWorkFnTy)WorkFn)(SharedVars, PrivateVars);
+
+  __kmpc_kernel_end_parallel();
+}
+
+__kmpc_barrier_simple_spmd(NULL, 0);
+
+  } while (true);
+}
+
+/// Filter threads into masters and workers. If \p UseStateMachine is true,
+/// required workers will enter a state machine through and be trapped there.
+/// Master and surplus worker threads will return from this function immediately
+/// while required workers will only return once there is no more work. The
+/// return value indicates if the thread is a master (1), a surplus worker (0),
+/// or a finished required worker released from the state machine (-1).
+INLINE static int8_t
+__kmpc_target_region_thread_filter(unsigned ThreadLimit, bool UseStateMachine,
+   bool IsOMPRuntimeInitialized) {
+
+  unsigned TId = GetThreadIdInBlock();
+  bool IsWorker = TId < ThreadLimit;
+
+  if (IsWorker) {
+if (UseStateMachine)
+  __kmpc_target_region_state_machine(IsOMPRuntimeInitialized);
+return -1;
+  }
+
+  return TId == GetMasterThreadID();
+}
+
+EXTERN int8_t __kmpc_target_region_kernel_init(bool UseSPMDMode,
+   bool UseStateMachine,
+   bool RequiresOMPRuntime,
+   bool RequiresDataSharing) {
+  unsigned NumThreads = GetNumberOfThreadsInBlock();
+
+  // Handle the SPMD case first.
+  if (UseSPMDMode) {
+
+__kmpc_spmd_kernel_init(NumThreads, RequiresOMPRuntime,
+ 

[PATCH] D57460: [WIP] Prototype a generic kernel offloading interface

2019-02-26 Thread Johannes Doerfert via Phabricator via cfe-commits
jdoerfert updated this revision to Diff 188462.
jdoerfert added a comment.

Remove more unneeded changes


Repository:
  rG LLVM Github Monorepo

CHANGES SINCE LAST ACTION
  https://reviews.llvm.org/D57460/new/

https://reviews.llvm.org/D57460

Files:
  clang/lib/CodeGen/CGOpenMPRuntimeTRegion.cpp
  clang/lib/CodeGen/CGOpenMPRuntimeTRegion.h
  clang/lib/CodeGen/CMakeLists.txt
  clang/lib/CodeGen/CodeGenModule.cpp
  llvm/include/llvm/InitializePasses.h
  llvm/include/llvm/LinkAllPasses.h
  llvm/include/llvm/Transforms/IPO.h
  llvm/lib/Transforms/IPO/CMakeLists.txt
  llvm/lib/Transforms/IPO/IPO.cpp
  llvm/lib/Transforms/IPO/OpenMPOpt.cpp
  llvm/lib/Transforms/IPO/PassManagerBuilder.cpp
  llvm/lib/Transforms/Scalar/SCCP.cpp
  llvm/test/Other/opt-O2-pipeline.ll
  llvm/test/Other/opt-O3-pipeline.ll
  llvm/test/Other/opt-Os-pipeline.ll
  openmp/libomptarget/cmake/Modules/LibomptargetNVPTXBitcodeLibrary.cmake
  openmp/libomptarget/deviceRTLs/common/target_region.h
  openmp/libomptarget/deviceRTLs/nvptx/CMakeLists.txt
  openmp/libomptarget/deviceRTLs/nvptx/src/omp_data.cu
  openmp/libomptarget/deviceRTLs/nvptx/src/omptarget-nvptx.h
  openmp/libomptarget/deviceRTLs/nvptx/src/target_region.cu

Index: openmp/libomptarget/deviceRTLs/nvptx/src/target_region.cu
===
--- /dev/null
+++ openmp/libomptarget/deviceRTLs/nvptx/src/target_region.cu
@@ -0,0 +1,197 @@
+//===-- target_region.cu  CUDA impl. of the target region interface -*-===//
+//
+// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
+//
+//===--===//
+//
+// This file contains the implementation of the common target region interface.
+//
+//===--===//
+
+// Include the native definitions first as certain defines might be needed in
+// the common interface definition below.
+#include "omptarget-nvptx.h"
+#include "interface.h"
+
+#include "../../common/target_region.h"
+
+/// The pointer used to share memory between team threads.
+extern __device__ __shared__ target_region_shared_buffer
+_target_region_shared_memory;
+
+EXTERN char *__kmpc_target_region_kernel_get_shared_memory() {
+  return _target_region_shared_memory.begin();
+}
+EXTERN char *__kmpc_target_region_kernel_get_private_memory() {
+  return _target_region_shared_memory.begin() +
+ _target_region_shared_memory.get_offset();
+}
+
+/// Simple generic state machine for worker threads.
+INLINE static void
+__kmpc_target_region_state_machine(bool IsOMPRuntimeInitialized) {
+
+  do {
+void *WorkFn = 0;
+
+// Wait for the signal that we have a new work function.
+__kmpc_barrier_simple_spmd(NULL, 0);
+
+// Retrieve the work function from the runtime.
+bool IsActive = __kmpc_kernel_parallel(&WorkFn, IsOMPRuntimeInitialized);
+
+// If there is nothing more to do, break out of the state machine by
+// returning to the caller.
+if (!WorkFn)
+  return;
+
+if (IsActive) {
+  char *SharedVars = __kmpc_target_region_kernel_get_shared_memory();
+  char *PrivateVars = __kmpc_target_region_kernel_get_private_memory();
+
+  ((ParallelWorkFnTy)WorkFn)(SharedVars, PrivateVars);
+
+  __kmpc_kernel_end_parallel();
+}
+
+__kmpc_barrier_simple_spmd(NULL, 0);
+
+  } while (true);
+}
+
+/// Filter threads into masters and workers. If \p UseStateMachine is true,
+/// required workers will enter a state machine through and be trapped there.
+/// Master and surplus worker threads will return from this function immediately
+/// while required workers will only return once there is no more work. The
+/// return value indicates if the thread is a master (1), a surplus worker (0),
+/// or a finished required worker released from the state machine (-1).
+INLINE static int8_t
+__kmpc_target_region_thread_filter(unsigned ThreadLimit, bool UseStateMachine,
+   bool IsOMPRuntimeInitialized) {
+
+  unsigned TId = GetThreadIdInBlock();
+  bool IsWorker = TId < ThreadLimit;
+
+  if (IsWorker) {
+if (UseStateMachine)
+  __kmpc_target_region_state_machine(IsOMPRuntimeInitialized);
+return -1;
+  }
+
+  return TId == GetMasterThreadID();
+}
+
+EXTERN int8_t __kmpc_target_region_kernel_init(bool UseSPMDMode,
+   bool UseStateMachine,
+   bool RequiresOMPRuntime,
+   bool RequiresDataSharing) {
+  unsigned NumThreads = GetNumberOfThreadsInBlock();
+
+  // Handle the SPMD case first.
+  if (UseSPMDMode) {
+
+__kmpc_spmd_kernel_init(NumThreads, RequiresOMPRuntime,
+RequiresDataSharing);
+
+if (RequiresDataSharing)
+   

[PATCH] D57460: [WIP] Prototype a generic kernel offloading interface

2019-02-26 Thread Johannes Doerfert via Phabricator via cfe-commits
jdoerfert updated this revision to Diff 188461.
jdoerfert added a comment.

Remove unneeded changes


Repository:
  rG LLVM Github Monorepo

CHANGES SINCE LAST ACTION
  https://reviews.llvm.org/D57460/new/

https://reviews.llvm.org/D57460

Files:
  clang/lib/CodeGen/CGOpenMPRuntimeTRegion.cpp
  clang/lib/CodeGen/CGOpenMPRuntimeTRegion.h
  clang/lib/CodeGen/CMakeLists.txt
  clang/lib/CodeGen/CodeGenModule.cpp
  llvm/include/llvm/InitializePasses.h
  llvm/include/llvm/LinkAllPasses.h
  llvm/include/llvm/Support/SaveAndRestore.h
  llvm/include/llvm/Transforms/IPO.h
  llvm/lib/Transforms/IPO/CMakeLists.txt
  llvm/lib/Transforms/IPO/IPO.cpp
  llvm/lib/Transforms/IPO/OpenMPOpt.cpp
  llvm/lib/Transforms/IPO/PassManagerBuilder.cpp
  llvm/lib/Transforms/Scalar/SCCP.cpp
  llvm/test/Other/opt-O2-pipeline.ll
  llvm/test/Other/opt-O3-pipeline.ll
  llvm/test/Other/opt-Os-pipeline.ll
  openmp/libomptarget/cmake/Modules/LibomptargetNVPTXBitcodeLibrary.cmake
  openmp/libomptarget/deviceRTLs/common/target_region.h
  openmp/libomptarget/deviceRTLs/nvptx/CMakeLists.txt
  openmp/libomptarget/deviceRTLs/nvptx/src/omp_data.cu
  openmp/libomptarget/deviceRTLs/nvptx/src/omptarget-nvptx.h
  openmp/libomptarget/deviceRTLs/nvptx/src/target_region.cu

Index: openmp/libomptarget/deviceRTLs/nvptx/src/target_region.cu
===
--- /dev/null
+++ openmp/libomptarget/deviceRTLs/nvptx/src/target_region.cu
@@ -0,0 +1,197 @@
+//===-- target_region.cu  CUDA impl. of the target region interface -*-===//
+//
+// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
+//
+//===--===//
+//
+// This file contains the implementation of the common target region interface.
+//
+//===--===//
+
+// Include the native definitions first as certain defines might be needed in
+// the common interface definition below.
+#include "omptarget-nvptx.h"
+#include "interface.h"
+
+#include "../../common/target_region.h"
+
+/// The pointer used to share memory between team threads.
+extern __device__ __shared__ target_region_shared_buffer
+_target_region_shared_memory;
+
+EXTERN char *__kmpc_target_region_kernel_get_shared_memory() {
+  return _target_region_shared_memory.begin();
+}
+EXTERN char *__kmpc_target_region_kernel_get_private_memory() {
+  return _target_region_shared_memory.begin() +
+ _target_region_shared_memory.get_offset();
+}
+
+/// Simple generic state machine for worker threads.
+INLINE static void
+__kmpc_target_region_state_machine(bool IsOMPRuntimeInitialized) {
+
+  do {
+void *WorkFn = 0;
+
+// Wait for the signal that we have a new work function.
+__kmpc_barrier_simple_spmd(NULL, 0);
+
+// Retrieve the work function from the runtime.
+bool IsActive = __kmpc_kernel_parallel(&WorkFn, IsOMPRuntimeInitialized);
+
+// If there is nothing more to do, break out of the state machine by
+// returning to the caller.
+if (!WorkFn)
+  return;
+
+if (IsActive) {
+  char *SharedVars = __kmpc_target_region_kernel_get_shared_memory();
+  char *PrivateVars = __kmpc_target_region_kernel_get_private_memory();
+
+  ((ParallelWorkFnTy)WorkFn)(SharedVars, PrivateVars);
+
+  __kmpc_kernel_end_parallel();
+}
+
+__kmpc_barrier_simple_spmd(NULL, 0);
+
+  } while (true);
+}
+
+/// Filter threads into masters and workers. If \p UseStateMachine is true,
+/// required workers will enter a state machine through and be trapped there.
+/// Master and surplus worker threads will return from this function immediately
+/// while required workers will only return once there is no more work. The
+/// return value indicates if the thread is a master (1), a surplus worker (0),
+/// or a finished required worker released from the state machine (-1).
+INLINE static int8_t
+__kmpc_target_region_thread_filter(unsigned ThreadLimit, bool UseStateMachine,
+   bool IsOMPRuntimeInitialized) {
+
+  unsigned TId = GetThreadIdInBlock();
+  bool IsWorker = TId < ThreadLimit;
+
+  if (IsWorker) {
+if (UseStateMachine)
+  __kmpc_target_region_state_machine(IsOMPRuntimeInitialized);
+return -1;
+  }
+
+  return TId == GetMasterThreadID();
+}
+
+EXTERN int8_t __kmpc_target_region_kernel_init(bool UseSPMDMode,
+   bool UseStateMachine,
+   bool RequiresOMPRuntime,
+   bool RequiresDataSharing) {
+  unsigned NumThreads = GetNumberOfThreadsInBlock();
+
+  // Handle the SPMD case first.
+  if (UseSPMDMode) {
+
+__kmpc_spmd_kernel_init(NumThreads, RequiresOMPRuntime,
+RequiresDataSharin

[PATCH] D59319: [OpenMP][Offloading][1/3] A generic and simple target region interface

2019-03-13 Thread Johannes Doerfert via Phabricator via cfe-commits
jdoerfert created this revision.
jdoerfert added reviewers: ABataev, arpith-jacob, guraypp, gtbercea, hfinkel.
Herald added a project: OpenMP.

This patch introduces an alternative OpenMP GPU kernel offloading
interface called target kernel region (or TRegion).

The commit includes the runtime library implementation for the NVPTX
device plugin, implemented mostly in terms of the existing
functionality.

The interface is deliberately simple to be easily analyzable in the
middle end. Design decisions included:

- Hide all (complex) implementation choices in the runtime library but allow 
complete removal of the abstraction once the runtime is inlined.
- Provide all runtime calls with sufficient, easy encoded information.


Repository:
  rG LLVM Github Monorepo

https://reviews.llvm.org/D59319

Files:
  openmp/libomptarget/cmake/Modules/LibomptargetNVPTXBitcodeLibrary.cmake
  openmp/libomptarget/deviceRTLs/common/target_region.h
  openmp/libomptarget/deviceRTLs/nvptx/CMakeLists.txt
  openmp/libomptarget/deviceRTLs/nvptx/src/omp_data.cu
  openmp/libomptarget/deviceRTLs/nvptx/src/omptarget-nvptx.h
  openmp/libomptarget/deviceRTLs/nvptx/src/target_region.cu

Index: openmp/libomptarget/deviceRTLs/nvptx/src/target_region.cu
===
--- /dev/null
+++ openmp/libomptarget/deviceRTLs/nvptx/src/target_region.cu
@@ -0,0 +1,197 @@
+//===-- target_region.cu  CUDA impl. of the target region interface -*-===//
+//
+// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
+//
+//===--===//
+//
+// This file contains the implementation of the common target region interface.
+//
+//===--===//
+
+// Include the native definitions first as certain defines might be needed in
+// the common interface definition below.
+#include "omptarget-nvptx.h"
+#include "interface.h"
+
+#include "../../common/target_region.h"
+
+/// The pointer used to share memory between team threads.
+extern __device__ __shared__ target_region_shared_buffer
+_target_region_shared_memory;
+
+EXTERN char *__kmpc_target_region_kernel_get_shared_memory() {
+  return _target_region_shared_memory.begin();
+}
+EXTERN char *__kmpc_target_region_kernel_get_private_memory() {
+  return _target_region_shared_memory.begin() +
+ _target_region_shared_memory.get_offset();
+}
+
+/// Simple generic state machine for worker threads.
+INLINE static void
+__kmpc_target_region_state_machine(bool IsOMPRuntimeInitialized) {
+
+  do {
+void *WorkFn = 0;
+
+// Wait for the signal that we have a new work function.
+__kmpc_barrier_simple_spmd(NULL, 0);
+
+// Retrieve the work function from the runtime.
+bool IsActive = __kmpc_kernel_parallel(&WorkFn, IsOMPRuntimeInitialized);
+
+// If there is nothing more to do, break out of the state machine by
+// returning to the caller.
+if (!WorkFn)
+  return;
+
+if (IsActive) {
+  char *SharedVars = __kmpc_target_region_kernel_get_shared_memory();
+  char *PrivateVars = __kmpc_target_region_kernel_get_private_memory();
+
+  ((ParallelWorkFnTy)WorkFn)(SharedVars, PrivateVars);
+
+  __kmpc_kernel_end_parallel();
+}
+
+__kmpc_barrier_simple_spmd(NULL, 0);
+
+  } while (true);
+}
+
+/// Filter threads into masters and workers. If \p UseStateMachine is true,
+/// required workers will enter a state machine through and be trapped there.
+/// Master and surplus worker threads will return from this function immediately
+/// while required workers will only return once there is no more work. The
+/// return value indicates if the thread is a master (1), a surplus worker (0),
+/// or a finished required worker released from the state machine (-1).
+INLINE static int8_t
+__kmpc_target_region_thread_filter(unsigned ThreadLimit, bool UseStateMachine,
+   bool IsOMPRuntimeInitialized) {
+
+  unsigned TId = GetThreadIdInBlock();
+  bool IsWorker = TId < ThreadLimit;
+
+  if (IsWorker) {
+if (UseStateMachine)
+  __kmpc_target_region_state_machine(IsOMPRuntimeInitialized);
+return -1;
+  }
+
+  return TId == GetMasterThreadID();
+}
+
+EXTERN int8_t __kmpc_target_region_kernel_init(bool UseSPMDMode,
+   bool UseStateMachine,
+   bool RequiresOMPRuntime,
+   bool RequiresDataSharing) {
+  unsigned NumThreads = GetNumberOfThreadsInBlock();
+
+  // Handle the SPMD case first.
+  if (UseSPMDMode) {
+
+__kmpc_spmd_kernel_init(NumThreads, RequiresOMPRuntime,
+RequiresDataSharing);
+
+if (RequiresDataSharing)
+  __kmpc_data_sharing_init_stac

[PATCH] D59319: [OpenMP][Offloading][1/3] A generic and simple target region interface

2019-03-13 Thread Johannes Doerfert via Phabricator via cfe-commits
jdoerfert updated this revision to Diff 190484.
jdoerfert added a comment.

Simplify the commmit further


Repository:
  rG LLVM Github Monorepo

CHANGES SINCE LAST ACTION
  https://reviews.llvm.org/D59319/new/

https://reviews.llvm.org/D59319

Files:
  openmp/libomptarget/deviceRTLs/common/target_region.h
  openmp/libomptarget/deviceRTLs/nvptx/CMakeLists.txt
  openmp/libomptarget/deviceRTLs/nvptx/src/omp_data.cu
  openmp/libomptarget/deviceRTLs/nvptx/src/omptarget-nvptx.h
  openmp/libomptarget/deviceRTLs/nvptx/src/target_region.cu

Index: openmp/libomptarget/deviceRTLs/nvptx/src/target_region.cu
===
--- /dev/null
+++ openmp/libomptarget/deviceRTLs/nvptx/src/target_region.cu
@@ -0,0 +1,197 @@
+//===-- target_region.cu  CUDA impl. of the target region interface -*-===//
+//
+// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
+//
+//===--===//
+//
+// This file contains the implementation of the common target region interface.
+//
+//===--===//
+
+// Include the native definitions first as certain defines might be needed in
+// the common interface definition below.
+#include "omptarget-nvptx.h"
+#include "interface.h"
+
+#include "../../common/target_region.h"
+
+/// The pointer used to share memory between team threads.
+extern __device__ __shared__ target_region_shared_buffer
+_target_region_shared_memory;
+
+EXTERN char *__kmpc_target_region_kernel_get_shared_memory() {
+  return _target_region_shared_memory.begin();
+}
+EXTERN char *__kmpc_target_region_kernel_get_private_memory() {
+  return _target_region_shared_memory.begin() +
+ _target_region_shared_memory.get_offset();
+}
+
+/// Simple generic state machine for worker threads.
+INLINE static void
+__kmpc_target_region_state_machine(bool IsOMPRuntimeInitialized) {
+
+  do {
+void *WorkFn = 0;
+
+// Wait for the signal that we have a new work function.
+__kmpc_barrier_simple_spmd(NULL, 0);
+
+// Retrieve the work function from the runtime.
+bool IsActive = __kmpc_kernel_parallel(&WorkFn, IsOMPRuntimeInitialized);
+
+// If there is nothing more to do, break out of the state machine by
+// returning to the caller.
+if (!WorkFn)
+  return;
+
+if (IsActive) {
+  char *SharedVars = __kmpc_target_region_kernel_get_shared_memory();
+  char *PrivateVars = __kmpc_target_region_kernel_get_private_memory();
+
+  ((ParallelWorkFnTy)WorkFn)(SharedVars, PrivateVars);
+
+  __kmpc_kernel_end_parallel();
+}
+
+__kmpc_barrier_simple_spmd(NULL, 0);
+
+  } while (true);
+}
+
+/// Filter threads into masters and workers. If \p UseStateMachine is true,
+/// required workers will enter a state machine through and be trapped there.
+/// Master and surplus worker threads will return from this function immediately
+/// while required workers will only return once there is no more work. The
+/// return value indicates if the thread is a master (1), a surplus worker (0),
+/// or a finished required worker released from the state machine (-1).
+INLINE static int8_t
+__kmpc_target_region_thread_filter(unsigned ThreadLimit, bool UseStateMachine,
+   bool IsOMPRuntimeInitialized) {
+
+  unsigned TId = GetThreadIdInBlock();
+  bool IsWorker = TId < ThreadLimit;
+
+  if (IsWorker) {
+if (UseStateMachine)
+  __kmpc_target_region_state_machine(IsOMPRuntimeInitialized);
+return -1;
+  }
+
+  return TId == GetMasterThreadID();
+}
+
+EXTERN int8_t __kmpc_target_region_kernel_init(bool UseSPMDMode,
+   bool UseStateMachine,
+   bool RequiresOMPRuntime,
+   bool RequiresDataSharing) {
+  unsigned NumThreads = GetNumberOfThreadsInBlock();
+
+  // Handle the SPMD case first.
+  if (UseSPMDMode) {
+
+__kmpc_spmd_kernel_init(NumThreads, RequiresOMPRuntime,
+RequiresDataSharing);
+
+if (RequiresDataSharing)
+  __kmpc_data_sharing_init_stack_spmd();
+
+return 1;
+  }
+
+  // Reserve one WARP in non-SPMD mode for the masters.
+  unsigned ThreadLimit = NumThreads - WARPSIZE;
+  int8_t FilterVal = __kmpc_target_region_thread_filter(
+  ThreadLimit, UseStateMachine, RequiresOMPRuntime);
+
+  // If the filter returns 1 the executing thread is a team master which will
+  // initialize the kernel in the following.
+  if (FilterVal == 1) {
+__kmpc_kernel_init(ThreadLimit, RequiresOMPRuntime);
+__kmpc_data_sharing_init_stack();
+_target_region_shared_memory.init();
+  }
+
+  return FilterVal;
+}
+
+EXTERN void __kmpc_target_region_kernel_deinit(bool

[PATCH] D59328: [OpenMP][Offloading][2/3] Codegen for target regions (TRegions)

2019-03-13 Thread Johannes Doerfert via Phabricator via cfe-commits
jdoerfert created this revision.
jdoerfert added a project: OpenMP.
jdoerfert added reviewers: ABataev, arpith-jacob, guraypp, gtbercea, hfinkel.
Herald added a project: clang.

The commit includes the Clang code generation for OpenMP target
constructs based on the interface target region (TRegion) interface.

The interface was introduced in https://reviews.llvm.org/D59319 .

This target code generation is a vastly simplified clone of the NVPTX
code generation but

- there is no NVPTX (or other) target specific code, at least there should not 
be any. The "checkArchForUnifiedAddressing" functionality should therefore be 
moved to a target specific location later on.
- we provide hooks for subclasses in order to perform front-end analysis, as an 
alternative of LLVM based optimizations, e.g., to enable SPMD-mode. (See 
isKnownSPMDMode, mayNeedRuntimeSupport, and mayPerformDataSharing)

The interface is deliberately simple to be easily analyzable in the
middle end. Design decisions included:

- Hide all (complex) implementation choices in the runtime library but allow 
complete removal of the abstraction once the runtime is inlined.
- Provide all runtime calls with sufficient, easy encoded information.


Repository:
  rG LLVM Github Monorepo

https://reviews.llvm.org/D59328

Files:
  clang/lib/CodeGen/CGOpenMPRuntimeTRegion.cpp
  clang/lib/CodeGen/CGOpenMPRuntimeTRegion.h
  clang/lib/CodeGen/CMakeLists.txt
  clang/lib/CodeGen/CodeGenModule.cpp
  clang/test/OpenMP/target_tregion_no_SPMD_mode.c

Index: clang/test/OpenMP/target_tregion_no_SPMD_mode.c
===
--- /dev/null
+++ clang/test/OpenMP/target_tregion_no_SPMD_mode.c
@@ -0,0 +1,72 @@
+// RUN: %clang_cc1 -fopenmp -fopenmp-targets=nvptx64-nvidia-cuda -emit-llvm-bc %s -o %t-ppc-host.bc
+// RUN: %clang_cc1 -fopenmp -triple nvptx64-unknown-unknown -fopenmp-targets=nvptx64-nvidia-cuda -mllvm -openmp-tregion-runtime -emit-llvm %s -fopenmp-is-device -fopenmp-host-ir-file-path %t-ppc-host.bc -o - | FileCheck %s
+
+// CHECK: loop_in_loop_in_tregion
+// CHECK:  %0 = call i8 @__kmpc_target_region_kernel_init(i1 false, i1 true, i1 true, i1 true)
+// CHECK:  call void @__kmpc_target_region_kernel_deinit(i1 false, i1 true)
+void loop_in_loop_in_tregion(int *A, int *B) {
+#pragma omp target
+  for (int i = 0; i < 512; i++) {
+for (int j = 0; j < 1024; j++)
+  A[j] += B[i + j];
+  }
+}
+
+// CHECK: parallel_loops_and_accesses_in_tregion
+// CHECK:  %0 = call i8 @__kmpc_target_region_kernel_init(i1 false, i1 true, i1 true, i1 true)
+// CHECK:  call void @__kmpc_target_region_kernel_parallel(i1 false, i1 true, void (i8*, i8*)* @.omp_TRegion._wrapper, i8* undef, i16 0, i8* %2, i16 16, i1 false)
+// CHECK:  call void @__kmpc_target_region_kernel_parallel(i1 false, i1 true, void (i8*, i8*)* @.omp_TRegion.1_wrapper, i8* undef, i16 0, i8* %5, i16 16, i1 false)
+// CHECK:  call void @__kmpc_target_region_kernel_parallel(i1 false, i1 true, void (i8*, i8*)* @.omp_TRegion.2_wrapper, i8* undef, i16 0, i8* %8, i16 16, i1 false)
+// CHECK:  call void @__kmpc_target_region_kernel_deinit(i1 false, i1 true)
+void parallel_loops_and_accesses_in_tregion(int *A, int *B) {
+#pragma omp target
+  {
+#pragma omp parallel for
+for (int j = 0; j < 1024; j++)
+  A[j] += B[0 + j];
+#pragma omp parallel for
+for (int j = 0; j < 1024; j++)
+  A[j] += B[1 + j];
+#pragma omp parallel for
+for (int j = 0; j < 1024; j++)
+  A[j] += B[2 + j];
+
+// This needs a guard in SPMD mode
+A[0] = B[0];
+  }
+}
+
+void extern_func();
+static void parallel_loop(int *A, int *B, int i) {
+#pragma omp parallel for
+  for (int j = 0; j < 1024; j++)
+A[j] += B[i + j];
+}
+
+// CHECK: parallel_loop_in_function_in_loop_with_global_acc_in_tregion
+// CHECK:  %1 = call i8 @__kmpc_target_region_kernel_init(i1 false, i1 true, i1 true, i1 true)
+// CHECK:  call void @__kmpc_target_region_kernel_deinit(i1 false, i1 true)
+int Global[512];
+void parallel_loop_in_function_in_loop_with_global_acc_in_tregion(int *A, int *B) {
+#pragma omp target
+  for (int i = 0; i < 512; i++) {
+parallel_loop(A, B, i);
+Global[i]++;
+  }
+}
+
+// CHECK: parallel_loop
+// CHECK:  call void @__kmpc_target_region_kernel_parallel(i1 false, i1 true, void (i8*, i8*)* @.omp_TRegion.3_wrapper, i8* undef, i16 0, i8* %0, i16 24, i1 false)
+
+// CHECK: parallel_loops_in_functions_and_extern_func_in_tregion
+// CHECK:  %0 = call i8 @__kmpc_target_region_kernel_init(i1 false, i1 true, i1 true, i1 true)
+// CHECK:  call void @__kmpc_target_region_kernel_deinit(i1 false, i1 true)
+void parallel_loops_in_functions_and_extern_func_in_tregion(int *A, int *B) {
+#pragma omp target
+  {
+parallel_loop(A, B, 1);
+parallel_loop(A, B, 2);
+extern_func();
+parallel_loop(A, B, 3);
+  }
+}
Index: clang/lib/CodeGen/CodeGenModule.cpp
===
--- clang/lib/CodeGen/CodeGenModule.cpp
+++ clang

[PATCH] D57460: [OpenMP][Offloading] A generic and simple OpenMP target kernel interface

2019-03-13 Thread Johannes Doerfert via Phabricator via cfe-commits
jdoerfert abandoned this revision.
jdoerfert added a comment.

Patch was split, new revisions can be found here:

  OpenMP: https://reviews.llvm.org/D59319
   Clang: https://reviews.llvm.org/D59328
LLVM: https://reviews.llvm.org/D59331


Repository:
  rG LLVM Github Monorepo

CHANGES SINCE LAST ACTION
  https://reviews.llvm.org/D57460/new/

https://reviews.llvm.org/D57460



___
cfe-commits mailing list
cfe-commits@lists.llvm.org
https://lists.llvm.org/cgi-bin/mailman/listinfo/cfe-commits


[PATCH] D59319: [OpenMP][Offloading][1/3] A generic and simple target region interface

2019-03-13 Thread Johannes Doerfert via Phabricator via cfe-commits
jdoerfert marked 5 inline comments as done.
jdoerfert added inline comments.



Comment at: openmp/libomptarget/deviceRTLs/common/target_region.h:27
+
+/// The target region _kernel_ interface for GPUs
+///

ABataev wrote:
> All exported functions are declared in the `interface.h` file. I don't think 
> we need an extra interface file here
`interface.h`, or to be more precise for people that do not know, 
`deviceRTLs/nvptx/src/interface.h`, is nvptx specific. This file, 
`deviceRTLs/common/target_region.h`, is by design target agnostic and not 
placed _under_ the nvptx subfolder. If you are willing to move `interface.h` 
into a common space and remove the nvptx specific functions we can merge the 
two. Otherwise, I have strong reservations agains that and good reason not to 
do it.



Comment at: openmp/libomptarget/deviceRTLs/common/target_region.h:100
+///
+EXTERN int8_t __kmpc_target_region_kernel_init(bool UseSPMDMode,
+   bool RequiresOMPRuntime,

ABataev wrote:
> Better to use `ident_loc` for passing info about execution mode and 
> full/lightweight runtime.
Could you please explain why you think that? Adding indirection through a 
structure does not really seem beneficial to me.


Repository:
  rG LLVM Github Monorepo

CHANGES SINCE LAST ACTION
  https://reviews.llvm.org/D59319/new/

https://reviews.llvm.org/D59319



___
cfe-commits mailing list
cfe-commits@lists.llvm.org
https://lists.llvm.org/cgi-bin/mailman/listinfo/cfe-commits


[PATCH] D59319: [OpenMP][Offloading][1/3] A generic and simple target region interface

2019-03-14 Thread Johannes Doerfert via Phabricator via cfe-commits
jdoerfert added inline comments.



Comment at: openmp/libomptarget/deviceRTLs/common/target_region.h:27
+
+/// The target region _kernel_ interface for GPUs
+///

ABataev wrote:
> jdoerfert wrote:
> > ABataev wrote:
> > > All exported functions are declared in the `interface.h` file. I don't 
> > > think we need an extra interface file here
> > `interface.h`, or to be more precise for people that do not know, 
> > `deviceRTLs/nvptx/src/interface.h`, is nvptx specific. This file, 
> > `deviceRTLs/common/target_region.h`, is by design target agnostic and not 
> > placed _under_ the nvptx subfolder. If you are willing to move 
> > `interface.h` into a common space and remove the nvptx specific functions 
> > we can merge the two. Otherwise, I have strong reservations agains that and 
> > good reason not to do it.
> I see that currently it is written in Cuda. It means, it targets NVidia GPUs, 
> at least at the moment. I'm fine to put this header file into the common 
> directory, if you're sure that this is really target agnostic. But maybe just 
> for a start we should put it to NVPTX directory? Later, when you or somebody 
> else will add support for other GPUs and he/she will find out that these 
> functions are really target agnostic, they can be moved into the common 
> directory?
> I see that currently it is written in Cuda. It means, it targets NVidia GPUs, 
> at least at the moment

How do you see that? (I hope we both talk about this file, correct?)


> But maybe just for a start we should put it to NVPTX directory?

Why? What is the benefit? If we want it to be agnostic, regardless of the 
current state, it should be developed _outside_ of the target specific 
directories.




Comment at: openmp/libomptarget/deviceRTLs/common/target_region.h:100
+///
+EXTERN int8_t __kmpc_target_region_kernel_init(bool UseSPMDMode,
+   bool RequiresOMPRuntime,

ABataev wrote:
> jdoerfert wrote:
> > ABataev wrote:
> > > Better to use `ident_loc` for passing info about execution mode and 
> > > full/lightweight runtime.
> > Could you please explain why you think that? Adding indirection through a 
> > structure does not really seem beneficial to me.
> Almost all function from libomp rely on `ident_loc`. The functions, which 
> were added for NVPTX without this parameter had a lot of problems later and 
> most of them were replaced with the functions with this parameter type. Plus, 
> this parameter is used for OMPD/OMPT and it may be important for future 
> OMPD/OMPT support.
> Almost all function from libomp rely on ident_loc.

If you look at the implementation of this interface for NVPTX you will see that 
the called functions do not take `ident_loc` values. When you create the calls 
from the existing NVPTX code generation in clang, the current code **does not 
use** `ident_loc` for similar functions, see:
`___kmpc_kernel_init(kmp_int32 thread_limit, int16_t RequiresOMPRuntime)`,
`__kmpc_kernel_deinit(int16_t IsOMPRuntimeInitialized)`,
`__kmpc_spmd_kernel_init(kmp_int32 thread_limit, int16_t RequiresOMPRuntime, 
int16_t RequiresDataSharing)`,
`__kmpc_kernel_parallel(void **outlined_function, int16_t 
IsOMPRuntimeInitialized)`,
...



> Plus, this parameter is used for OMPD/OMPT and it may be important for future 
> OMPD/OMPT support.

If we at some point need to make the options permanent in an `ident_loc` we can 
simply pass an `ident_loc` and require it to be initialized by the call. 
Cluttering the user code with stores and indirection is exactly what I do want 
to avoid.



Comment at: openmp/libomptarget/deviceRTLs/common/target_region.h:124
+/// unpacking code.
+typedef void (*ParallelWorkFnTy)(char * /* SharedValues */,
+ char * /* PrivateValues */);

ABataev wrote:
> We used `void *` for buffers usually, I think it is better to use `void *` 
> here too instead of `char *`.
Thanks, fixed.



Comment at: openmp/libomptarget/deviceRTLs/nvptx/src/omp_data.cu:70
+
+__device__ __shared__ target_region_shared_buffer _target_region_shared_memory;
+

ABataev wrote:
> It would be good to store it the global memory rather than in the shared to 
> save th shared memory. Also, we already are using several shared memory 
> buffers for different purposes, it would be good to merge them somehow to 
> reduce pressure on shared memory.
I would have reused your buffer but it is for reasons unclear to me, not a 
byte-wise buffer but an array of `void *` and also used as such. Using it as a 
byte-wise buffer might cause problems or at least confusion. Changing it to a 
byte-wise buffer would be fine with me. I don't need a separate buffer but just 
one with the functionality implemented in this one.



Comment at: openmp/libomptarge

[PATCH] D59319: [OpenMP][Offloading][1/3] A generic and simple target region interface

2019-03-14 Thread Johannes Doerfert via Phabricator via cfe-commits
jdoerfert updated this revision to Diff 190710.
jdoerfert marked 6 inline comments as done.
jdoerfert added a comment.

Change char* to void*


Repository:
  rG LLVM Github Monorepo

CHANGES SINCE LAST ACTION
  https://reviews.llvm.org/D59319/new/

https://reviews.llvm.org/D59319

Files:
  openmp/libomptarget/deviceRTLs/common/target_region.h
  openmp/libomptarget/deviceRTLs/nvptx/CMakeLists.txt
  openmp/libomptarget/deviceRTLs/nvptx/src/omp_data.cu
  openmp/libomptarget/deviceRTLs/nvptx/src/omptarget-nvptx.h
  openmp/libomptarget/deviceRTLs/nvptx/src/target_region.cu

Index: openmp/libomptarget/deviceRTLs/nvptx/src/target_region.cu
===
--- /dev/null
+++ openmp/libomptarget/deviceRTLs/nvptx/src/target_region.cu
@@ -0,0 +1,197 @@
+//===-- target_region.cu  CUDA impl. of the target region interface -*-===//
+//
+// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
+//
+//===--===//
+//
+// This file contains the implementation of the common target region interface.
+//
+//===--===//
+
+// Include the native definitions first as certain defines might be needed in
+// the common interface definition below.
+#include "omptarget-nvptx.h"
+#include "interface.h"
+
+#include "../../common/target_region.h"
+
+/// The pointer used to share memory between team threads.
+extern __device__ __shared__ target_region_shared_buffer
+_target_region_shared_memory;
+
+EXTERN char *__kmpc_target_region_kernel_get_shared_memory() {
+  return _target_region_shared_memory.begin();
+}
+EXTERN char *__kmpc_target_region_kernel_get_private_memory() {
+  return _target_region_shared_memory.begin() +
+ _target_region_shared_memory.get_offset();
+}
+
+/// Simple generic state machine for worker threads.
+INLINE static void
+__kmpc_target_region_state_machine(bool IsOMPRuntimeInitialized) {
+
+  do {
+void *WorkFn = 0;
+
+// Wait for the signal that we have a new work function.
+__kmpc_barrier_simple_spmd(NULL, 0);
+
+// Retrieve the work function from the runtime.
+bool IsActive = __kmpc_kernel_parallel(&WorkFn, IsOMPRuntimeInitialized);
+
+// If there is nothing more to do, break out of the state machine by
+// returning to the caller.
+if (!WorkFn)
+  return;
+
+if (IsActive) {
+  char *SharedVars = __kmpc_target_region_kernel_get_shared_memory();
+  char *PrivateVars = __kmpc_target_region_kernel_get_private_memory();
+
+  ((ParallelWorkFnTy)WorkFn)(SharedVars, PrivateVars);
+
+  __kmpc_kernel_end_parallel();
+}
+
+__kmpc_barrier_simple_spmd(NULL, 0);
+
+  } while (true);
+}
+
+/// Filter threads into masters and workers. If \p UseStateMachine is true,
+/// required workers will enter a state machine through and be trapped there.
+/// Master and surplus worker threads will return from this function immediately
+/// while required workers will only return once there is no more work. The
+/// return value indicates if the thread is a master (1), a surplus worker (0),
+/// or a finished required worker released from the state machine (-1).
+INLINE static int8_t
+__kmpc_target_region_thread_filter(unsigned ThreadLimit, bool UseStateMachine,
+   bool IsOMPRuntimeInitialized) {
+
+  unsigned TId = GetThreadIdInBlock();
+  bool IsWorker = TId < ThreadLimit;
+
+  if (IsWorker) {
+if (UseStateMachine)
+  __kmpc_target_region_state_machine(IsOMPRuntimeInitialized);
+return -1;
+  }
+
+  return TId == GetMasterThreadID();
+}
+
+EXTERN int8_t __kmpc_target_region_kernel_init(bool UseSPMDMode,
+   bool UseStateMachine,
+   bool RequiresOMPRuntime,
+   bool RequiresDataSharing) {
+  unsigned NumThreads = GetNumberOfThreadsInBlock();
+
+  // Handle the SPMD case first.
+  if (UseSPMDMode) {
+
+__kmpc_spmd_kernel_init(NumThreads, RequiresOMPRuntime,
+RequiresDataSharing);
+
+if (RequiresDataSharing)
+  __kmpc_data_sharing_init_stack_spmd();
+
+return 1;
+  }
+
+  // Reserve one WARP in non-SPMD mode for the masters.
+  unsigned ThreadLimit = NumThreads - WARPSIZE;
+  int8_t FilterVal = __kmpc_target_region_thread_filter(
+  ThreadLimit, UseStateMachine, RequiresOMPRuntime);
+
+  // If the filter returns 1 the executing thread is a team master which will
+  // initialize the kernel in the following.
+  if (FilterVal == 1) {
+__kmpc_kernel_init(ThreadLimit, RequiresOMPRuntime);
+__kmpc_data_sharing_init_stack();
+_target_region_shared_memory.init();
+  }
+
+  return FilterVal;
+}
+
+EXTERN void __

[PATCH] D59319: [OpenMP][Offloading][1/3] A generic and simple target region interface

2019-03-14 Thread Johannes Doerfert via Phabricator via cfe-commits
jdoerfert marked 2 inline comments as done.
jdoerfert added inline comments.



Comment at: openmp/libomptarget/deviceRTLs/common/target_region.h:27
+
+/// The target region _kernel_ interface for GPUs
+///

ABataev wrote:
> jdoerfert wrote:
> > ABataev wrote:
> > > jdoerfert wrote:
> > > > ABataev wrote:
> > > > > All exported functions are declared in the `interface.h` file. I 
> > > > > don't think we need an extra interface file here
> > > > `interface.h`, or to be more precise for people that do not know, 
> > > > `deviceRTLs/nvptx/src/interface.h`, is nvptx specific. This file, 
> > > > `deviceRTLs/common/target_region.h`, is by design target agnostic and 
> > > > not placed _under_ the nvptx subfolder. If you are willing to move 
> > > > `interface.h` into a common space and remove the nvptx specific 
> > > > functions we can merge the two. Otherwise, I have strong reservations 
> > > > agains that and good reason not to do it.
> > > I see that currently it is written in Cuda. It means, it targets NVidia 
> > > GPUs, at least at the moment. I'm fine to put this header file into the 
> > > common directory, if you're sure that this is really target agnostic. But 
> > > maybe just for a start we should put it to NVPTX directory? Later, when 
> > > you or somebody else will add support for other GPUs and he/she will find 
> > > out that these functions are really target agnostic, they can be moved 
> > > into the common directory?
> > > I see that currently it is written in Cuda. It means, it targets NVidia 
> > > GPUs, at least at the moment
> > 
> > How do you see that? (I hope we both talk about this file, correct?)
> > 
> > 
> > > But maybe just for a start we should put it to NVPTX directory?
> > 
> > Why? What is the benefit? If we want it to be agnostic, regardless of the 
> > current state, it should be developed _outside_ of the target specific 
> > directories.
> > 
> I'm not talking about this particular file, just like I said we can put it 
> into `common` subdirectory. I'm talking about the implementation files. They 
> all are written in Cuda, no?
> But it is not proved yet that this solution is target agnostic. Did you test 
> it for AMD?
> I'm not talking about this particular file, just like I said we can put it 
> into common subdirectory.

OK. It is (the only file in the common folder for now).


> I'm talking about the implementation files. They all are written in Cuda, no?

Yes, Cuda, and placed under the nvptx folder for that reason. That is what you 
want, correct?


> But it is not proved yet that this solution is target agnostic. Did you test 
> it for AMD?

What do you mean by solution? I do not have a second implementation of the 
interface but nothing up to the implementation of the interface is target 
aware. By construction, this means it will work for anything we can implement 
the interface in. 

Why do you fight so hard against this? What exactly do you want to change here? 
Given the last comment, and assuming I understand you correctly, the files are 
all exactly where you want them to be. That the wording sometimes states 
"target agnostic" is a sign of intent, even if for some currently unknown 
reason it would not hold true.





Comment at: openmp/libomptarget/deviceRTLs/common/target_region.h:100
+///
+EXTERN int8_t __kmpc_target_region_kernel_init(bool UseSPMDMode,
+   bool RequiresOMPRuntime,

ABataev wrote:
> jdoerfert wrote:
> > ABataev wrote:
> > > jdoerfert wrote:
> > > > ABataev wrote:
> > > > > Better to use `ident_loc` for passing info about execution mode and 
> > > > > full/lightweight runtime.
> > > > Could you please explain why you think that? Adding indirection through 
> > > > a structure does not really seem beneficial to me.
> > > Almost all function from libomp rely on `ident_loc`. The functions, which 
> > > were added for NVPTX without this parameter had a lot of problems later 
> > > and most of them were replaced with the functions with this parameter 
> > > type. Plus, this parameter is used for OMPD/OMPT and it may be important 
> > > for future OMPD/OMPT support.
> > > Almost all function from libomp rely on ident_loc.
> > 
> > If you look at the implementation of this interface for NVPTX you will see 
> > that the called functions do not take `ident_loc` values. When you create 
> > the calls from the existing NVPTX code generation in clang, the current 
> > code **does not use** `ident_loc` for similar functions, see:
> > `___kmpc_kernel_init(kmp_int32 thread_limit, int16_t RequiresOMPRuntime)`,
> > `__kmpc_kernel_deinit(int16_t IsOMPRuntimeInitialized)`,
> > `__kmpc_spmd_kernel_init(kmp_int32 thread_limit, int16_t 
> > RequiresOMPRuntime, int16_t RequiresDataSharing)`,
> > `__kmpc_kernel_parallel(void **outlined_function, int16_t 
> > IsOMPRuntimeInitialized)`,
> > ...
> > 
> > 
> > 
> > > Plus, this parameter is used for OMPD/O

[PATCH] D59319: [OpenMP][Offloading][1/3] A generic and simple target region interface

2019-03-14 Thread Johannes Doerfert via Phabricator via cfe-commits
jdoerfert updated this revision to Diff 190717.
jdoerfert marked 4 inline comments as done.
jdoerfert added a comment.

Replace more char* with void*


Repository:
  rG LLVM Github Monorepo

CHANGES SINCE LAST ACTION
  https://reviews.llvm.org/D59319/new/

https://reviews.llvm.org/D59319

Files:
  openmp/libomptarget/deviceRTLs/common/target_region.h
  openmp/libomptarget/deviceRTLs/nvptx/CMakeLists.txt
  openmp/libomptarget/deviceRTLs/nvptx/src/omp_data.cu
  openmp/libomptarget/deviceRTLs/nvptx/src/omptarget-nvptx.h
  openmp/libomptarget/deviceRTLs/nvptx/src/target_region.cu

Index: openmp/libomptarget/deviceRTLs/nvptx/src/target_region.cu
===
--- /dev/null
+++ openmp/libomptarget/deviceRTLs/nvptx/src/target_region.cu
@@ -0,0 +1,197 @@
+//===-- target_region.cu  CUDA impl. of the target region interface -*-===//
+//
+// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
+//
+//===--===//
+//
+// This file contains the implementation of the common target region interface.
+//
+//===--===//
+
+// Include the native definitions first as certain defines might be needed in
+// the common interface definition below.
+#include "omptarget-nvptx.h"
+#include "interface.h"
+
+#include "../../common/target_region.h"
+
+/// The pointer used to share memory between team threads.
+extern __device__ __shared__ target_region_shared_buffer
+_target_region_shared_memory;
+
+EXTERN void *__kmpc_target_region_kernel_get_shared_memory() {
+  return _target_region_shared_memory.begin();
+}
+EXTERN void *__kmpc_target_region_kernel_get_private_memory() {
+  return _target_region_shared_memory.begin() +
+ _target_region_shared_memory.get_offset();
+}
+
+/// Simple generic state machine for worker threads.
+INLINE static void
+__kmpc_target_region_state_machine(bool IsOMPRuntimeInitialized) {
+
+  do {
+void *WorkFn = 0;
+
+// Wait for the signal that we have a new work function.
+__kmpc_barrier_simple_spmd(NULL, 0);
+
+// Retrieve the work function from the runtime.
+bool IsActive = __kmpc_kernel_parallel(&WorkFn, IsOMPRuntimeInitialized);
+
+// If there is nothing more to do, break out of the state machine by
+// returning to the caller.
+if (!WorkFn)
+  return;
+
+if (IsActive) {
+  void *SharedVars = __kmpc_target_region_kernel_get_shared_memory();
+  void *PrivateVars = __kmpc_target_region_kernel_get_private_memory();
+
+  ((ParallelWorkFnTy)WorkFn)(SharedVars, PrivateVars);
+
+  __kmpc_kernel_end_parallel();
+}
+
+__kmpc_barrier_simple_spmd(NULL, 0);
+
+  } while (true);
+}
+
+/// Filter threads into masters and workers. If \p UseStateMachine is true,
+/// required workers will enter a state machine through and be trapped there.
+/// Master and surplus worker threads will return from this function immediately
+/// while required workers will only return once there is no more work. The
+/// return value indicates if the thread is a master (1), a surplus worker (0),
+/// or a finished required worker released from the state machine (-1).
+INLINE static int8_t
+__kmpc_target_region_thread_filter(unsigned ThreadLimit, bool UseStateMachine,
+   bool IsOMPRuntimeInitialized) {
+
+  unsigned TId = GetThreadIdInBlock();
+  bool IsWorker = TId < ThreadLimit;
+
+  if (IsWorker) {
+if (UseStateMachine)
+  __kmpc_target_region_state_machine(IsOMPRuntimeInitialized);
+return -1;
+  }
+
+  return TId == GetMasterThreadID();
+}
+
+EXTERN int8_t __kmpc_target_region_kernel_init(bool UseSPMDMode,
+   bool UseStateMachine,
+   bool RequiresOMPRuntime,
+   bool RequiresDataSharing) {
+  unsigned NumThreads = GetNumberOfThreadsInBlock();
+
+  // Handle the SPMD case first.
+  if (UseSPMDMode) {
+
+__kmpc_spmd_kernel_init(NumThreads, RequiresOMPRuntime,
+RequiresDataSharing);
+
+if (RequiresDataSharing)
+  __kmpc_data_sharing_init_stack_spmd();
+
+return 1;
+  }
+
+  // Reserve one WARP in non-SPMD mode for the masters.
+  unsigned ThreadLimit = NumThreads - WARPSIZE;
+  int8_t FilterVal = __kmpc_target_region_thread_filter(
+  ThreadLimit, UseStateMachine, RequiresOMPRuntime);
+
+  // If the filter returns 1 the executing thread is a team master which will
+  // initialize the kernel in the following.
+  if (FilterVal == 1) {
+__kmpc_kernel_init(ThreadLimit, RequiresOMPRuntime);
+__kmpc_data_sharing_init_stack();
+_target_region_shared_memory.init();
+  }
+
+  return FilterVal;
+}
+
+EXTERN

[PATCH] D59319: [OpenMP][Offloading][1/3] A generic and simple target region interface

2019-03-14 Thread Johannes Doerfert via Phabricator via cfe-commits
jdoerfert marked 3 inline comments as done.
jdoerfert added inline comments.



Comment at: openmp/libomptarget/deviceRTLs/common/target_region.h:100
+///
+EXTERN int8_t __kmpc_target_region_kernel_init(bool UseSPMDMode,
+   bool RequiresOMPRuntime,

ABataev wrote:
> jdoerfert wrote:
> > ABataev wrote:
> > > jdoerfert wrote:
> > > > ABataev wrote:
> > > > > jdoerfert wrote:
> > > > > > ABataev wrote:
> > > > > > > Better to use `ident_loc` for passing info about execution mode 
> > > > > > > and full/lightweight runtime.
> > > > > > Could you please explain why you think that? Adding indirection 
> > > > > > through a structure does not really seem beneficial to me.
> > > > > Almost all function from libomp rely on `ident_loc`. The functions, 
> > > > > which were added for NVPTX without this parameter had a lot of 
> > > > > problems later and most of them were replaced with the functions with 
> > > > > this parameter type. Plus, this parameter is used for OMPD/OMPT and 
> > > > > it may be important for future OMPD/OMPT support.
> > > > > Almost all function from libomp rely on ident_loc.
> > > > 
> > > > If you look at the implementation of this interface for NVPTX you will 
> > > > see that the called functions do not take `ident_loc` values. When you 
> > > > create the calls from the existing NVPTX code generation in clang, the 
> > > > current code **does not use** `ident_loc` for similar functions, see:
> > > > `___kmpc_kernel_init(kmp_int32 thread_limit, int16_t 
> > > > RequiresOMPRuntime)`,
> > > > `__kmpc_kernel_deinit(int16_t IsOMPRuntimeInitialized)`,
> > > > `__kmpc_spmd_kernel_init(kmp_int32 thread_limit, int16_t 
> > > > RequiresOMPRuntime, int16_t RequiresDataSharing)`,
> > > > `__kmpc_kernel_parallel(void **outlined_function, int16_t 
> > > > IsOMPRuntimeInitialized)`,
> > > > ...
> > > > 
> > > > 
> > > > 
> > > > > Plus, this parameter is used for OMPD/OMPT and it may be important 
> > > > > for future OMPD/OMPT support.
> > > > 
> > > > If we at some point need to make the options permanent in an 
> > > > `ident_loc` we can simply pass an `ident_loc` and require it to be 
> > > > initialized by the call. Cluttering the user code with stores and 
> > > > indirection is exactly what I do want to avoid.
> > > 1. The new functions rely on `ident_loc`. We had to add those new 
> > > functions because the old ones did not use it and it was bad design 
> > > decision. Now we need to fix this. I suggest you do everything right from 
> > > the very beginning rather than fixing this later by adding extra entry 
> > > points to support OMPT/OMPD or something else, for example.
> > > 2. No, you cannot simply change the interface of the library to keep the 
> > > compatibility with the previous versions of the compiler/library. You 
> > > will need to add the new entries.  
> > Let's start this one again because I still haven't understood. Why do we 
> > need to populate the `ident_loc` again? What information has to be in there 
> > at which point? I want this to be clear because a lot of other "design 
> > decisions" of the existing code base are in my opinion not necessary and 
> > consequently missing here. That includes, for example, various global 
> > variables. If we have a description of the problem you try to solve with 
> > the `ident_loc` we might be able to find a way that cuts down on state.
> > 
> > 
> > Regarding the "compatibility", this is not a stable interface people can 
> > rely on. Whatever is committed in this first patch __is not__ set in stone. 
> > Also, we can _always_ add a `__kmpc_init_ident_loc()` function after 
> > the fact.
> Ident_loc holds the data about current source code location, execution mode 
> and is full runtime required or not. Also, it is used in OMPT/OMPD support.
> Regarding "compatibility" libraries must be most stable part of the compiler, 
> because the user migbt need to link the old object file/library with the new 
> one. Because of this the new versions of libraries must be compatible with 
> old ones. And you need to maintain the deprecated parts to keep the 
> compatibility with the previous versions. All these libs already have a lot 
> of old code that because of the initial poor design and we need to maintain 
> them. I would like to avoid this situation with this patch.
> Ident_loc holds the data about current source code location, execution mode 
> and is full runtime required or not. Also, it is used in OMPT/OMPD support.

We can store that information through a `__kmpc_init_ident_loc()` call once 
needed.


> Regarding "compatibility" libraries must be most stable part of the compiler, 
> because the user migbt need to link the old object file/library with the new 
> one. Because of this the new versions of libraries must be compatible with 
> old ones. And you need to maintain the deprecated parts to keep the 
> compatibility with the previous versi

[PATCH] D59319: [OpenMP][Offloading][1/3] A generic and simple target region interface

2019-03-14 Thread Johannes Doerfert via Phabricator via cfe-commits
jdoerfert added inline comments.



Comment at: openmp/libomptarget/deviceRTLs/common/target_region.h:100
+///
+EXTERN int8_t __kmpc_target_region_kernel_init(bool UseSPMDMode,
+   bool RequiresOMPRuntime,

jdoerfert wrote:
> ABataev wrote:
> > jdoerfert wrote:
> > > ABataev wrote:
> > > > jdoerfert wrote:
> > > > > ABataev wrote:
> > > > > > jdoerfert wrote:
> > > > > > > ABataev wrote:
> > > > > > > > Better to use `ident_loc` for passing info about execution mode 
> > > > > > > > and full/lightweight runtime.
> > > > > > > Could you please explain why you think that? Adding indirection 
> > > > > > > through a structure does not really seem beneficial to me.
> > > > > > Almost all function from libomp rely on `ident_loc`. The functions, 
> > > > > > which were added for NVPTX without this parameter had a lot of 
> > > > > > problems later and most of them were replaced with the functions 
> > > > > > with this parameter type. Plus, this parameter is used for 
> > > > > > OMPD/OMPT and it may be important for future OMPD/OMPT support.
> > > > > > Almost all function from libomp rely on ident_loc.
> > > > > 
> > > > > If you look at the implementation of this interface for NVPTX you 
> > > > > will see that the called functions do not take `ident_loc` values. 
> > > > > When you create the calls from the existing NVPTX code generation in 
> > > > > clang, the current code **does not use** `ident_loc` for similar 
> > > > > functions, see:
> > > > > `___kmpc_kernel_init(kmp_int32 thread_limit, int16_t 
> > > > > RequiresOMPRuntime)`,
> > > > > `__kmpc_kernel_deinit(int16_t IsOMPRuntimeInitialized)`,
> > > > > `__kmpc_spmd_kernel_init(kmp_int32 thread_limit, int16_t 
> > > > > RequiresOMPRuntime, int16_t RequiresDataSharing)`,
> > > > > `__kmpc_kernel_parallel(void **outlined_function, int16_t 
> > > > > IsOMPRuntimeInitialized)`,
> > > > > ...
> > > > > 
> > > > > 
> > > > > 
> > > > > > Plus, this parameter is used for OMPD/OMPT and it may be important 
> > > > > > for future OMPD/OMPT support.
> > > > > 
> > > > > If we at some point need to make the options permanent in an 
> > > > > `ident_loc` we can simply pass an `ident_loc` and require it to be 
> > > > > initialized by the call. Cluttering the user code with stores and 
> > > > > indirection is exactly what I do want to avoid.
> > > > 1. The new functions rely on `ident_loc`. We had to add those new 
> > > > functions because the old ones did not use it and it was bad design 
> > > > decision. Now we need to fix this. I suggest you do everything right 
> > > > from the very beginning rather than fixing this later by adding extra 
> > > > entry points to support OMPT/OMPD or something else, for example.
> > > > 2. No, you cannot simply change the interface of the library to keep 
> > > > the compatibility with the previous versions of the compiler/library. 
> > > > You will need to add the new entries.  
> > > Let's start this one again because I still haven't understood. Why do we 
> > > need to populate the `ident_loc` again? What information has to be in 
> > > there at which point? I want this to be clear because a lot of other 
> > > "design decisions" of the existing code base are in my opinion not 
> > > necessary and consequently missing here. That includes, for example, 
> > > various global variables. If we have a description of the problem you try 
> > > to solve with the `ident_loc` we might be able to find a way that cuts 
> > > down on state.
> > > 
> > > 
> > > Regarding the "compatibility", this is not a stable interface people can 
> > > rely on. Whatever is committed in this first patch __is not__ set in 
> > > stone. Also, we can _always_ add a `__kmpc_init_ident_loc()` function 
> > > after the fact.
> > Ident_loc holds the data about current source code location, execution mode 
> > and is full runtime required or not. Also, it is used in OMPT/OMPD support.
> > Regarding "compatibility" libraries must be most stable part of the 
> > compiler, because the user migbt need to link the old object file/library 
> > with the new one. Because of this the new versions of libraries must be 
> > compatible with old ones. And you need to maintain the deprecated parts to 
> > keep the compatibility with the previous versions. All these libs already 
> > have a lot of old code that because of the initial poor design and we need 
> > to maintain them. I would like to avoid this situation with this patch.
> > Ident_loc holds the data about current source code location, execution mode 
> > and is full runtime required or not. Also, it is used in OMPT/OMPD support.
> 
> We can store that information through a `__kmpc_init_ident_loc()` call 
> once needed.
> 
> 
> > Regarding "compatibility" libraries must be most stable part of the 
> > compiler, because the user migbt need to link the old object file/library 
> > with the new one. Because of this the new versio

[PATCH] D59319: [OpenMP][Offloading][1/3] A generic and simple target region interface

2019-03-14 Thread Johannes Doerfert via Phabricator via cfe-commits
jdoerfert updated this revision to Diff 190767.
jdoerfert marked 4 inline comments as done.
jdoerfert added a comment.

Add ident_t* to the interface functions


Repository:
  rG LLVM Github Monorepo

CHANGES SINCE LAST ACTION
  https://reviews.llvm.org/D59319/new/

https://reviews.llvm.org/D59319

Files:
  openmp/libomptarget/deviceRTLs/common/target_region.h
  openmp/libomptarget/deviceRTLs/nvptx/CMakeLists.txt
  openmp/libomptarget/deviceRTLs/nvptx/src/omp_data.cu
  openmp/libomptarget/deviceRTLs/nvptx/src/omptarget-nvptx.h
  openmp/libomptarget/deviceRTLs/nvptx/src/target_region.cu

Index: openmp/libomptarget/deviceRTLs/nvptx/src/target_region.cu
===
--- /dev/null
+++ openmp/libomptarget/deviceRTLs/nvptx/src/target_region.cu
@@ -0,0 +1,198 @@
+//===-- target_region.cu  CUDA impl. of the target region interface -*-===//
+//
+// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
+//
+//===--===//
+//
+// This file contains the implementation of the common target region interface.
+//
+//===--===//
+
+// Include the native definitions first as certain defines might be needed in
+// the common interface definition below.
+#include "omptarget-nvptx.h"
+#include "interface.h"
+
+#include "../../common/target_region.h"
+
+/// The pointer used to share memory between team threads.
+extern __device__ __shared__ target_region_shared_buffer
+_target_region_shared_memory;
+
+EXTERN void *__kmpc_target_region_kernel_get_shared_memory() {
+  return _target_region_shared_memory.begin();
+}
+EXTERN void *__kmpc_target_region_kernel_get_private_memory() {
+  return _target_region_shared_memory.begin() +
+ _target_region_shared_memory.get_offset();
+}
+
+/// Simple generic state machine for worker threads.
+INLINE static void
+__kmpc_target_region_state_machine(ident_t *Ident, bool IsOMPRuntimeInitialized) {
+
+  do {
+void *WorkFn = 0;
+
+// Wait for the signal that we have a new work function.
+__kmpc_barrier_simple_spmd(Ident, 0);
+
+// Retrieve the work function from the runtime.
+bool IsActive = __kmpc_kernel_parallel(&WorkFn, IsOMPRuntimeInitialized);
+
+// If there is nothing more to do, break out of the state machine by
+// returning to the caller.
+if (!WorkFn)
+  return;
+
+if (IsActive) {
+  void *SharedVars = __kmpc_target_region_kernel_get_shared_memory();
+  void *PrivateVars = __kmpc_target_region_kernel_get_private_memory();
+
+  ((ParallelWorkFnTy)WorkFn)(SharedVars, PrivateVars);
+
+  __kmpc_kernel_end_parallel();
+}
+
+__kmpc_barrier_simple_spmd(Ident, 0);
+
+  } while (true);
+}
+
+/// Filter threads into masters and workers. If \p UseStateMachine is true,
+/// required workers will enter a state machine through and be trapped there.
+/// Master and surplus worker threads will return from this function immediately
+/// while required workers will only return once there is no more work. The
+/// return value indicates if the thread is a master (1), a surplus worker (0),
+/// or a finished required worker released from the state machine (-1).
+INLINE static int8_t
+__kmpc_target_region_thread_filter(ident_t *Ident, unsigned ThreadLimit,
+   bool UseStateMachine,
+   bool IsOMPRuntimeInitialized) {
+
+  unsigned TId = GetThreadIdInBlock();
+  bool IsWorker = TId < ThreadLimit;
+
+  if (IsWorker) {
+if (UseStateMachine)
+  __kmpc_target_region_state_machine(Ident, IsOMPRuntimeInitialized);
+return -1;
+  }
+
+  return TId == GetMasterThreadID();
+}
+
+EXTERN int8_t __kmpc_target_region_kernel_init(ident_t *Ident, bool UseSPMDMode,
+   bool UseStateMachine,
+   bool RequiresOMPRuntime,
+   bool RequiresDataSharing) {
+  unsigned NumThreads = GetNumberOfThreadsInBlock();
+
+  // Handle the SPMD case first.
+  if (UseSPMDMode) {
+
+__kmpc_spmd_kernel_init(NumThreads, RequiresOMPRuntime,
+RequiresDataSharing);
+
+if (RequiresDataSharing)
+  __kmpc_data_sharing_init_stack_spmd();
+
+return 1;
+  }
+
+  // Reserve one WARP in non-SPMD mode for the masters.
+  unsigned ThreadLimit = NumThreads - WARPSIZE;
+  int8_t FilterVal = __kmpc_target_region_thread_filter(
+  Ident, ThreadLimit, UseStateMachine, RequiresOMPRuntime);
+
+  // If the filter returns 1 the executing thread is a team master which will
+  // initialize the kernel in the following.
+  if (FilterVal == 1) {
+__kmpc_kernel_init(ThreadLimit, RequiresOMPRuntime);
+__kmpc

[PATCH] D59418: [OpenMP][Offloading] Extract common functionality

2019-03-15 Thread Johannes Doerfert via Phabricator via cfe-commits
jdoerfert created this revision.
jdoerfert added reviewers: ABataev, arpith-jacob, guraypp, gtbercea, hfinkel.
jdoerfert added a project: OpenMP.
Herald added a project: clang.

This patch introduces the CGOpenMPRuntimeTarget class to collect helpers
and functionality common to all target offloading code generation
schemes. All initial members have been taken from the NVPTX code
generation and removed there.

This is a preperation patch for https://reviews.llvm.org/D59328


Repository:
  rG LLVM Github Monorepo

https://reviews.llvm.org/D59418

Files:
  clang/lib/CodeGen/CGOpenMPRuntimeNVPTX.cpp
  clang/lib/CodeGen/CGOpenMPRuntimeNVPTX.h
  clang/lib/CodeGen/CGOpenMPRuntimeTarget.cpp
  clang/lib/CodeGen/CGOpenMPRuntimeTarget.h
  clang/lib/CodeGen/CMakeLists.txt

Index: clang/lib/CodeGen/CMakeLists.txt
===
--- clang/lib/CodeGen/CMakeLists.txt
+++ clang/lib/CodeGen/CMakeLists.txt
@@ -69,6 +69,7 @@
   CGOpenCLRuntime.cpp
   CGOpenMPRuntime.cpp
   CGOpenMPRuntimeNVPTX.cpp
+  CGOpenMPRuntimeTarget.cpp
   CGRecordLayoutBuilder.cpp
   CGStmt.cpp
   CGStmtOpenMP.cpp
Index: clang/lib/CodeGen/CGOpenMPRuntimeTarget.h
===
--- /dev/null
+++ clang/lib/CodeGen/CGOpenMPRuntimeTarget.h
@@ -0,0 +1,104 @@
+//===-- CGOpenMPRuntimeTarget.h --- Common OpenMP target codegen --===//
+//
+// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
+//
+//===--===//
+//
+// Code common to all OpenMP target codegens.
+//
+//===--===//
+
+#ifndef LLVM_CLANG_LIB_CODEGEN_CGOPENMPRUNTIMETARGET_H
+#define LLVM_CLANG_LIB_CODEGEN_CGOPENMPRUNTIMETARGET_H
+
+#include "CGOpenMPRuntime.h"
+
+namespace clang {
+namespace CodeGen {
+
+struct CGOpenMPRuntimeTarget : public CGOpenMPRuntime {
+
+  explicit CGOpenMPRuntimeTarget(CodeGenModule &CGM);
+
+  /// Defines the execution mode.
+  enum ExecutionMode {
+/// SPMD execution mode (all threads are worker threads).
+EM_SPMD,
+/// Non-SPMD execution mode (1 master thread, others are workers).
+EM_NonSPMD,
+/// Unknown execution mode (orphaned directive).
+EM_Unknown,
+  };
+
+  /// Return the execution mode, if not overloaded this is always Unknown.
+  virtual ExecutionMode getExecutionMode() const { return EM_Unknown; }
+
+  /// Return the value decleration encapsulated in the expression \p E.
+  static const ValueDecl *getUnderlyingVar(const Expr *E);
+
+  //
+  // Base class overrides.
+  //
+
+  /// Creates offloading entry for the provided entry ID \a ID,
+  /// address \a Addr, size \a Size, and flags \a Flags.
+  void createOffloadEntry(llvm::Constant *ID, llvm::Constant *Addr,
+  uint64_t Size, int32_t Flags,
+  llvm::GlobalValue::LinkageTypes Linkage) override;
+
+  /// Emit call to void __kmpc_push_proc_bind(ident_t *loc, kmp_int32
+  /// global_tid, int proc_bind) to generate code for 'proc_bind' clause.
+  virtual void emitProcBindClause(CodeGenFunction &CGF,
+  OpenMPProcBindClauseKind ProcBind,
+  SourceLocation Loc) override;
+
+  /// Emits call to void __kmpc_push_num_threads(ident_t *loc, kmp_int32
+  /// global_tid, kmp_int32 num_threads) to generate code for 'num_threads'
+  /// clause.
+  /// \param NumThreads An integer value of threads.
+  virtual void emitNumThreadsClause(CodeGenFunction &CGF,
+llvm::Value *NumThreads,
+SourceLocation Loc) override;
+
+  /// Set the number of teams to \p NumTeams and the thread limit to
+  /// \p ThreadLimit.
+  ///
+  /// \param NumTeams An integer expression of teams.
+  /// \param ThreadLimit An integer expression of threads.
+  void emitNumTeamsClause(CodeGenFunction &CGF, const Expr *NumTeams,
+  const Expr *ThreadLimit, SourceLocation Loc) override;
+
+  /// Choose a default value for the schedule clause.
+  void getDefaultScheduleAndChunk(CodeGenFunction &CGF,
+  const OMPLoopDirective &S,
+  OpenMPScheduleClauseKind &ScheduleKind,
+  const Expr *&ChunkExpr) const override;
+
+  /// Emits code for teams call of the \a OutlinedFn with
+  /// variables captured in a record which address is stored in \a
+  /// CapturedStruct.
+  /// \param OutlinedFn Outlined function to be run by team masters. Type of
+  /// this function is void(*)(kmp_int32 *, kmp_int32, struct context_vars*).
+  /// \param CapturedVars A pointer to the record with the references to
+  /// variables used in \a OutlinedFn function.
+  ///
+  

[PATCH] D59420: [NFC][OpenMP] Move runtime function generation to the target codegen

2019-03-15 Thread Johannes Doerfert via Phabricator via cfe-commits
jdoerfert created this revision.
jdoerfert added reviewers: ABataev, arpith-jacob, guraypp, gtbercea, hfinkel.
jdoerfert added projects: OpenMP, clang.
jdoerfert added a parent revision: D59418: [OpenMP][Offloading] Extract common 
functionality.

This commit simply moves the runtime function generation from the NVPTX
to the common target code generation.

This is a preperation patch for https://reviews.llvm.org/D59328


Repository:
  rG LLVM Github Monorepo

https://reviews.llvm.org/D59420

Files:
  clang/lib/CodeGen/CGOpenMPRuntimeNVPTX.cpp
  clang/lib/CodeGen/CGOpenMPRuntimeNVPTX.h
  clang/lib/CodeGen/CGOpenMPRuntimeTarget.cpp
  clang/lib/CodeGen/CGOpenMPRuntimeTarget.h

Index: clang/lib/CodeGen/CGOpenMPRuntimeTarget.h
===
--- clang/lib/CodeGen/CGOpenMPRuntimeTarget.h
+++ clang/lib/CodeGen/CGOpenMPRuntimeTarget.h
@@ -38,6 +38,95 @@
   /// Return the value decleration encapsulated in the expression \p E.
   static const ValueDecl *getUnderlyingVar(const Expr *E);
 
+  enum OpenMPRTLTargetFunctions {
+/// Call to void __kmpc_kernel_init(kmp_int32 thread_limit,
+/// int16_t RequiresOMPRuntime);
+OMPRTL_NVPTX__kmpc_kernel_init,
+/// Call to void __kmpc_kernel_deinit(int16_t IsOMPRuntimeInitialized);
+OMPRTL_NVPTX__kmpc_kernel_deinit,
+/// Call to void __kmpc_spmd_kernel_init(kmp_int32 thread_limit,
+/// int16_t RequiresOMPRuntime, int16_t RequiresDataSharing);
+OMPRTL_NVPTX__kmpc_spmd_kernel_init,
+/// Call to void __kmpc_spmd_kernel_deinit_v2(int16_t RequiresOMPRuntime);
+OMPRTL_NVPTX__kmpc_spmd_kernel_deinit_v2,
+/// Call to void __kmpc_kernel_prepare_parallel(void
+/// *outlined_function, int16_t
+/// IsOMPRuntimeInitialized);
+OMPRTL_NVPTX__kmpc_kernel_prepare_parallel,
+/// Call to bool __kmpc_kernel_parallel(void **outlined_function,
+/// int16_t IsOMPRuntimeInitialized);
+OMPRTL_NVPTX__kmpc_kernel_parallel,
+/// Call to void __kmpc_kernel_end_parallel();
+OMPRTL_NVPTX__kmpc_kernel_end_parallel,
+/// Call to void __kmpc_serialized_parallel(ident_t *loc, kmp_int32
+/// global_tid);
+OMPRTL_NVPTX__kmpc_serialized_parallel,
+/// Call to void __kmpc_end_serialized_parallel(ident_t *loc, kmp_int32
+/// global_tid);
+OMPRTL_NVPTX__kmpc_end_serialized_parallel,
+/// Call to int32_t __kmpc_shuffle_int32(int32_t element,
+/// int16_t lane_offset, int16_t warp_size);
+OMPRTL_NVPTX__kmpc_shuffle_int32,
+/// Call to int64_t __kmpc_shuffle_int64(int64_t element,
+/// int16_t lane_offset, int16_t warp_size);
+OMPRTL_NVPTX__kmpc_shuffle_int64,
+/// Call to __kmpc_nvptx_parallel_reduce_nowait_v2(ident_t *loc, kmp_int32
+/// global_tid, kmp_int32 num_vars, size_t reduce_size, void* reduce_data,
+/// void (*kmp_ShuffleReductFctPtr)(void *rhsData, int16_t lane_id, int16_t
+/// lane_offset, int16_t shortCircuit),
+/// void (*kmp_InterWarpCopyFctPtr)(void* src, int32_t warp_num));
+OMPRTL_NVPTX__kmpc_nvptx_parallel_reduce_nowait_v2,
+/// Call to __kmpc_nvptx_teams_reduce_nowait_v2(ident_t *loc, kmp_int32
+/// global_tid, void *global_buffer, int32_t num_of_records, void*
+/// reduce_data,
+/// void (*kmp_ShuffleReductFctPtr)(void *rhsData, int16_t lane_id, int16_t
+/// lane_offset, int16_t shortCircuit),
+/// void (*kmp_InterWarpCopyFctPtr)(void* src, int32_t warp_num), void
+/// (*kmp_ListToGlobalCpyFctPtr)(void *buffer, int idx, void *reduce_data),
+/// void (*kmp_GlobalToListCpyFctPtr)(void *buffer, int idx,
+/// void *reduce_data), void (*kmp_GlobalToListCpyPtrsFctPtr)(void *buffer,
+/// int idx, void *reduce_data), void (*kmp_GlobalToListRedFctPtr)(void
+/// *buffer, int idx, void *reduce_data));
+OMPRTL_NVPTX__kmpc_nvptx_teams_reduce_nowait_v2,
+/// Call to __kmpc_nvptx_end_reduce_nowait(int32_t global_tid);
+OMPRTL_NVPTX__kmpc_end_reduce_nowait,
+/// Call to void __kmpc_data_sharing_init_stack();
+OMPRTL_NVPTX__kmpc_data_sharing_init_stack,
+/// Call to void __kmpc_data_sharing_init_stack_spmd();
+OMPRTL_NVPTX__kmpc_data_sharing_init_stack_spmd,
+/// Call to void* __kmpc_data_sharing_coalesced_push_stack(size_t size,
+/// int16_t UseSharedMemory);
+OMPRTL_NVPTX__kmpc_data_sharing_coalesced_push_stack,
+/// Call to void __kmpc_data_sharing_pop_stack(void *a);
+OMPRTL_NVPTX__kmpc_data_sharing_pop_stack,
+/// Call to void __kmpc_begin_sharing_variables(void ***args,
+/// size_t n_args);
+OMPRTL_NVPTX__kmpc_begin_sharing_variables,
+/// Call to void __kmpc_end_sharing_variables();
+OMPRTL_NVPTX__kmpc_end_sharing_variables,
+/// Call to void __kmpc_get_shared_variables(void ***GlobalArgs)
+OMPRTL_NVPTX__kmpc_get_shared_variables,
+/// Call to uint16_t __kmpc_parallel_level(ident_t *loc, kmp_int32
+/// global_tid);
+OMPRTL_NVPTX__kmpc_parallel_level,
+/// Call to int8_t __kmpc_is

[PATCH] D59421: [OpenMP][Offloading] Allow to build the TRegion interface functions

2019-03-15 Thread Johannes Doerfert via Phabricator via cfe-commits
jdoerfert created this revision.
jdoerfert added reviewers: ABataev, arpith-jacob, guraypp, gtbercea, hfinkel.
jdoerfert added projects: OpenMP, clang.

This patch adds the TRegion interface functions to the ones we can build
through the common OpenMP target offloading class.

This is a preperation patch for https://reviews.llvm.org/D59328


Repository:
  rG LLVM Github Monorepo

https://reviews.llvm.org/D59421

Files:
  clang/lib/CodeGen/CGOpenMPRuntimeTarget.cpp
  clang/lib/CodeGen/CGOpenMPRuntimeTarget.h

Index: clang/lib/CodeGen/CGOpenMPRuntimeTarget.h
===
--- clang/lib/CodeGen/CGOpenMPRuntimeTarget.h
+++ clang/lib/CodeGen/CGOpenMPRuntimeTarget.h
@@ -122,6 +122,35 @@
 /// Call to void __kmpc_barrier_simple_spmd(ident_t *loc, kmp_int32
 /// global_tid);
 OMPRTL__kmpc_barrier_simple_spmd,
+
+/// Target Region (TREgion) Kernel interface
+///
+///{
+
+/// char __kmpc_target_region_kernel_init(ident_t *Ident,
+///   bool UseSPMDMode,
+///   bool UseStateMachine,
+///   bool RequiresOMPRuntime,
+///   bool RequiresDataSharing);
+OMPRTL__kmpc_target_region_kernel_init,
+
+/// void __kmpc_target_region_kernel_deinit(ident_t *Ident,
+/// bool UseSPMDMode,
+/// bool RequiredOMPRuntime);
+OMPRTL__kmpc_target_region_kernel_deinit,
+
+/// void __kmpc_target_region_kernel_parallel(ident_t *Ident,
+///   bool UseSPMDMode,
+///   bool RequiredOMPRuntime,
+///   ParallelWorkFnTy WorkFn,
+///   void *SharedVars,
+///   uint16_t SharedVarsBytes,
+///   void *PrivateVars,
+///   uint16_t PrivateVarsBytes,
+///   bool SharedPointers);
+OMPRTL__kmpc_target_region_kernel_parallel,
+
+///}
   };
 
   /// Returns the OpenMP runtime function identified by \p ID.
Index: clang/lib/CodeGen/CGOpenMPRuntimeTarget.cpp
===
--- clang/lib/CodeGen/CGOpenMPRuntimeTarget.cpp
+++ clang/lib/CodeGen/CGOpenMPRuntimeTarget.cpp
@@ -52,6 +52,7 @@
 llvm::FunctionCallee CGOpenMPRuntimeTarget::createTargetRuntimeFunction(
 OpenMPRTLTargetFunctions ID) {
   llvm::FunctionCallee RTLFn = nullptr;
+  auto *I1Ty = llvm::IntegerType::getInt1Ty(CGM.getLLVMContext());
   switch (ID) {
   case OMPRTL_NVPTX__kmpc_kernel_init: {
 // Build void __kmpc_kernel_init(kmp_int32 thread_limit, int16_t
@@ -343,7 +344,96 @@
 ->addFnAttr(llvm::Attribute::Convergent);
 break;
   }
+  case OMPRTL__kmpc_target_region_kernel_init: {
+// char __kmpc_target_region_kernel_init(ident_t *Ident,
+//   bool UseSPMDMode,
+//   bool UseStateMachine,
+//   bool RequiresOMPRuntime,
+//   bool RequiresDataSharing);
+llvm::Type *TypeParams[] = {getIdentTyPointerTy(), I1Ty, I1Ty, I1Ty, I1Ty};
+auto *FnTy =
+llvm::FunctionType::get(CGM.Int8Ty, TypeParams, /* isVarArg */ false);
+RTLFn =
+CGM.CreateRuntimeFunction(FnTy, "__kmpc_target_region_kernel_init");
+
+llvm::Function *RTFn = cast(RTLFn.getCallee());
+RTFn->addParamAttr(0, llvm::Attribute::NoCapture);
+break;
+  }
+  case OMPRTL__kmpc_target_region_kernel_deinit: {
+// void __kmpc_target_region_kernel_deinit(ident_t *Ident,
+// bool UseSPMDMode,
+// bool RequiredOMPRuntime);
+llvm::Type *TypeParams[] = {getIdentTyPointerTy(), I1Ty, I1Ty};
+auto *FnTy =
+llvm::FunctionType::get(CGM.VoidTy, TypeParams, /* isVarArg */ false);
+RTLFn =
+CGM.CreateRuntimeFunction(FnTy, "__kmpc_target_region_kernel_deinit");
+
+llvm::Function *RTFn = cast(RTLFn.getCallee());
+RTFn->addParamAttr(0, llvm::Attribute::NoCapture);
+break;
+  }
+  case OMPRTL__kmpc_target_region_kernel_parallel: {
+// typedef void (*ParallelWorkFnTy)(void *, void *);
+auto *ParWorkFnTy =
+llvm::FunctionType::get(CGM.VoidTy, {CGM.VoidPtrTy, CGM.VoidPtrTy},
+/* isVarArg */ false);
+
+// void __kmpc_target_region_kernel_parallel(ident_t *Ident,
+//   bool UseSPMDMode,
+//   bool RequiredOMPRuntime,
+//  

[PATCH] D59328: [OpenMP][Offloading][2/3] Codegen for target regions (TRegions)

2019-03-15 Thread Johannes Doerfert via Phabricator via cfe-commits
jdoerfert updated this revision to Diff 190840.
jdoerfert added a comment.

Remove code extracted into separate commits, see D59418 
, D59420 , 
and D59421 .


Repository:
  rG LLVM Github Monorepo

CHANGES SINCE LAST ACTION
  https://reviews.llvm.org/D59328/new/

https://reviews.llvm.org/D59328

Files:
  clang/lib/CodeGen/CGOpenMPRuntimeTRegion.cpp
  clang/lib/CodeGen/CGOpenMPRuntimeTRegion.h
  clang/lib/CodeGen/CMakeLists.txt
  clang/lib/CodeGen/CodeGenModule.cpp
  clang/test/OpenMP/target_tregion_no_SPMD_mode.c

Index: clang/test/OpenMP/target_tregion_no_SPMD_mode.c
===
--- /dev/null
+++ clang/test/OpenMP/target_tregion_no_SPMD_mode.c
@@ -0,0 +1,72 @@
+// RUN: %clang_cc1 -fopenmp -fopenmp-targets=nvptx64-nvidia-cuda -emit-llvm-bc %s -o %t-ppc-host.bc
+// RUN: %clang_cc1 -fopenmp -triple nvptx64-unknown-unknown -fopenmp-targets=nvptx64-nvidia-cuda -mllvm -openmp-tregion-runtime -emit-llvm %s -fopenmp-is-device -fopenmp-host-ir-file-path %t-ppc-host.bc -o - | FileCheck %s
+
+// CHECK: loop_in_loop_in_tregion
+// CHECK:  %0 = call i8 @__kmpc_target_region_kernel_init(i1 false, i1 true, i1 true, i1 true)
+// CHECK:  call void @__kmpc_target_region_kernel_deinit(i1 false, i1 true)
+void loop_in_loop_in_tregion(int *A, int *B) {
+#pragma omp target
+  for (int i = 0; i < 512; i++) {
+for (int j = 0; j < 1024; j++)
+  A[j] += B[i + j];
+  }
+}
+
+// CHECK: parallel_loops_and_accesses_in_tregion
+// CHECK:  %0 = call i8 @__kmpc_target_region_kernel_init(i1 false, i1 true, i1 true, i1 true)
+// CHECK:  call void @__kmpc_target_region_kernel_parallel(i1 false, i1 true, void (i8*, i8*)* @.omp_TRegion._wrapper, i8* undef, i16 0, i8* %2, i16 16, i1 false)
+// CHECK:  call void @__kmpc_target_region_kernel_parallel(i1 false, i1 true, void (i8*, i8*)* @.omp_TRegion.1_wrapper, i8* undef, i16 0, i8* %5, i16 16, i1 false)
+// CHECK:  call void @__kmpc_target_region_kernel_parallel(i1 false, i1 true, void (i8*, i8*)* @.omp_TRegion.2_wrapper, i8* undef, i16 0, i8* %8, i16 16, i1 false)
+// CHECK:  call void @__kmpc_target_region_kernel_deinit(i1 false, i1 true)
+void parallel_loops_and_accesses_in_tregion(int *A, int *B) {
+#pragma omp target
+  {
+#pragma omp parallel for
+for (int j = 0; j < 1024; j++)
+  A[j] += B[0 + j];
+#pragma omp parallel for
+for (int j = 0; j < 1024; j++)
+  A[j] += B[1 + j];
+#pragma omp parallel for
+for (int j = 0; j < 1024; j++)
+  A[j] += B[2 + j];
+
+// This needs a guard in SPMD mode
+A[0] = B[0];
+  }
+}
+
+void extern_func();
+static void parallel_loop(int *A, int *B, int i) {
+#pragma omp parallel for
+  for (int j = 0; j < 1024; j++)
+A[j] += B[i + j];
+}
+
+// CHECK: parallel_loop_in_function_in_loop_with_global_acc_in_tregion
+// CHECK:  %1 = call i8 @__kmpc_target_region_kernel_init(i1 false, i1 true, i1 true, i1 true)
+// CHECK:  call void @__kmpc_target_region_kernel_deinit(i1 false, i1 true)
+int Global[512];
+void parallel_loop_in_function_in_loop_with_global_acc_in_tregion(int *A, int *B) {
+#pragma omp target
+  for (int i = 0; i < 512; i++) {
+parallel_loop(A, B, i);
+Global[i]++;
+  }
+}
+
+// CHECK: parallel_loop
+// CHECK:  call void @__kmpc_target_region_kernel_parallel(i1 false, i1 true, void (i8*, i8*)* @.omp_TRegion.3_wrapper, i8* undef, i16 0, i8* %0, i16 24, i1 false)
+
+// CHECK: parallel_loops_in_functions_and_extern_func_in_tregion
+// CHECK:  %0 = call i8 @__kmpc_target_region_kernel_init(i1 false, i1 true, i1 true, i1 true)
+// CHECK:  call void @__kmpc_target_region_kernel_deinit(i1 false, i1 true)
+void parallel_loops_in_functions_and_extern_func_in_tregion(int *A, int *B) {
+#pragma omp target
+  {
+parallel_loop(A, B, 1);
+parallel_loop(A, B, 2);
+extern_func();
+parallel_loop(A, B, 3);
+  }
+}
Index: clang/lib/CodeGen/CodeGenModule.cpp
===
--- clang/lib/CodeGen/CodeGenModule.cpp
+++ clang/lib/CodeGen/CodeGenModule.cpp
@@ -20,6 +20,7 @@
 #include "CGOpenCLRuntime.h"
 #include "CGOpenMPRuntime.h"
 #include "CGOpenMPRuntimeNVPTX.h"
+#include "CGOpenMPRuntimeTRegion.h"
 #include "CodeGenFunction.h"
 #include "CodeGenPGO.h"
 #include "ConstantEmitter.h"
@@ -67,6 +68,11 @@
 llvm::cl::desc("Emit limited coverage mapping information (experimental)"),
 llvm::cl::init(false));
 
+static llvm::cl::opt UseGenericTRegionInterface(
+"openmp-tregion-runtime", llvm::cl::ZeroOrMore, llvm::cl::Hidden,
+llvm::cl::desc("Use the generic target region OpenMP runtime interface"),
+llvm::cl::init(false));
+
 static const char AnnotationSection[] = "llvm.metadata";
 
 static CGCXXABI *createCXXABI(CodeGenModule &CGM) {
@@ -206,7 +212,10 @@
   case llvm::Triple::nvptx64:
 assert(getLangOpts().OpenMPIsDevice &&
"OpenMP NVPTX is only prepared to deal with devi

[PATCH] D59424: [OpenMP][NVPTX] Replace void** buffer by byte-wise buffer

2019-03-15 Thread Johannes Doerfert via Phabricator via cfe-commits
jdoerfert created this revision.
jdoerfert added reviewers: ABataev, arpith-jacob, guraypp, gtbercea, hfinkel.
jdoerfert added a project: OpenMP.

This commit implements the existing void** buffer used to share
arguments between threads in a team with a byte-wise buffer. For now,
the void** buffer is kept for compatibility.

The byte-wise buffer, if used directly, allows to save memory when small
arguments are shared between team threads. It does also allow to track
an additional offset that differentiates two distinct back-to-back
memory regions, e.g., for shared (copy in & out) and firstprivate (copy
in only) variables.

This is a preparation patch for https://reviews.llvm.org/D59319


Repository:
  rG LLVM Github Monorepo

https://reviews.llvm.org/D59424

Files:
  openmp/libomptarget/deviceRTLs/nvptx/src/omp_data.cu
  openmp/libomptarget/deviceRTLs/nvptx/src/omptarget-nvptx.h
  openmp/libomptarget/deviceRTLs/nvptx/src/option.h

Index: openmp/libomptarget/deviceRTLs/nvptx/src/option.h
===
--- openmp/libomptarget/deviceRTLs/nvptx/src/option.h
+++ openmp/libomptarget/deviceRTLs/nvptx/src/option.h
@@ -27,9 +27,9 @@
 // region to synchronize with each other.
 #define L1_BARRIER (1)
 
-// Maximum number of preallocated arguments to an outlined parallel/simd function.
-// Anything more requires dynamic memory allocation.
-#define MAX_SHARED_ARGS 20
+// Maximum number of preallocated bytes that can be passed to an outlined
+// parallel/simd function before dynamic memory allocation is required.
+#define PRE_SHARED_BYTES 128
 
 // Maximum number of omp state objects per SM allocated statically in global
 // memory.
Index: openmp/libomptarget/deviceRTLs/nvptx/src/omptarget-nvptx.h
===
--- openmp/libomptarget/deviceRTLs/nvptx/src/omptarget-nvptx.h
+++ openmp/libomptarget/deviceRTLs/nvptx/src/omptarget-nvptx.h
@@ -63,42 +63,84 @@
 #define __SYNCTHREADS_N(n) asm volatile("bar.sync %0;" : : "r"(n) : "memory");
 #define __SYNCTHREADS() __SYNCTHREADS_N(0)
 
+/// Helper structure to manage the memory shared by the threads in a team.
+///
+/// This buffer can manage two adjacent byte-wise objects by tracking the
+/// beginning of the second, as an offset, in addition to the beginning of the
+/// first, as a pointer.
+///
+/// Note: Only the team master is allowed to call non-const functions!
+struct shared_bytes_buffer {
+
+  INLINE void init() {
+_ptr = &_data[0];
+_size = PRE_SHARED_BYTES;
+_offset = 0;
+  }
+
+  /// Release any dynamic allocated memory.
+  INLINE void release() {
+if (_size == PRE_SHARED_BYTES)
+  return;
+SafeFree(_ptr, (char *)"free shared dynamic buffer");
+init();
+  }
+
+  INLINE void set(void *ptr, size_t offset) {
+release();
+_ptr = (char *)ptr;
+_offset = offset;
+  }
+
+  INLINE void resize(size_t size, size_t offset) {
+_offset = offset;
+
+if (size <= _size)
+  return;
+
+if (_size != PRE_SHARED_BYTES)
+  SafeFree(_ptr, (char *)"free shared dynamic buffer");
+
+_size = size;
+_ptr = (char *)SafeMalloc(_size, (char *)"new shared buffer");
+  }
+
+  // Called by all threads.
+  INLINE void *begin() const { return _ptr; };
+  INLINE size_t size() const { return _size; };
+  INLINE size_t get_offset() const { return _offset; };
+
+private:
+  // Pre-allocated space that holds PRE_SHARED_BYTES many bytes.
+  char _data[PRE_SHARED_BYTES];
+
+  // Pointer to the currently used buffer.
+  char *_ptr;
+
+  // Size of the currently used buffer.
+  uint32_t _size;
+
+  // Offset into the currently used buffer.
+  uint32_t _offset;
+};
+
+extern __device__ __shared__ shared_bytes_buffer _shared_bytes_buffer_memory;
+
 // arguments needed for L0 parallelism only.
+//
+// NOTE: Deprecated, use shared_byte_buffer instead.
 class omptarget_nvptx_SharedArgs {
 public:
   // All these methods must be called by the master thread only.
-  INLINE void Init() {
-args  = buffer;
-nArgs = MAX_SHARED_ARGS;
-  }
-  INLINE void DeInit() {
-// Free any memory allocated for outlined parallel function with a large
-// number of arguments.
-if (nArgs > MAX_SHARED_ARGS) {
-  SafeFree(args, (char *)"new extended args");
-  Init();
-}
-  }
+  INLINE void Init() { _shared_bytes_buffer_memory.init(); }
+  INLINE void DeInit() { _shared_bytes_buffer_memory.release(); }
   INLINE void EnsureSize(size_t size) {
-if (size > nArgs) {
-  if (nArgs > MAX_SHARED_ARGS) {
-SafeFree(args, (char *)"new extended args");
-  }
-  args = (void **) SafeMalloc(size * sizeof(void *),
-  (char *)"new extended args");
-  nArgs = size;
-}
+_shared_bytes_buffer_memory.resize(size * sizeof(void *), 0);
   }
   // Called by all threads.
-  INLINE void **GetArgs() const { return args; };
-private:
-  // buffer of pre-allocated arguments.
-  void *buffer[MAX_SH

[PATCH] D59319: [OpenMP][Offloading][1/3] A generic and simple target region interface

2019-03-15 Thread Johannes Doerfert via Phabricator via cfe-commits
jdoerfert updated this revision to Diff 190861.
jdoerfert marked an inline comment as done.
jdoerfert added a comment.

Rebase onto D59424  and fix errors caused by 
the wrong use of ident_t


Repository:
  rG LLVM Github Monorepo

CHANGES SINCE LAST ACTION
  https://reviews.llvm.org/D59319/new/

https://reviews.llvm.org/D59319

Files:
  openmp/libomptarget/deviceRTLs/common/target_region.h
  openmp/libomptarget/deviceRTLs/nvptx/CMakeLists.txt
  openmp/libomptarget/deviceRTLs/nvptx/src/target_region.cu

Index: openmp/libomptarget/deviceRTLs/nvptx/src/target_region.cu
===
--- /dev/null
+++ openmp/libomptarget/deviceRTLs/nvptx/src/target_region.cu
@@ -0,0 +1,195 @@
+//===-- target_region.cu  CUDA impl. of the target region interface -*-===//
+//
+// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
+//
+//===--===//
+//
+// This file contains the implementation of the common target region interface.
+//
+//===--===//
+
+// Include the native definitions first as certain defines might be needed in
+// the common interface definition below.
+#include "omptarget-nvptx.h"
+#include "interface.h"
+
+#include "../../common/target_region.h"
+
+EXTERN void *__kmpc_target_region_kernel_get_shared_memory() {
+  return _shared_bytes_buffer_memory.begin();
+}
+EXTERN void *__kmpc_target_region_kernel_get_private_memory() {
+  return ((char *)_shared_bytes_buffer_memory.begin()) +
+ _shared_bytes_buffer_memory.get_offset();
+}
+
+/// Simple generic state machine for worker threads.
+INLINE static void
+__kmpc_target_region_state_machine(ident_t *Ident,
+   bool IsOMPRuntimeInitialized) {
+
+  do {
+void *WorkFn = 0;
+
+// Wait for the signal that we have a new work function.
+__kmpc_barrier_simple_spmd(Ident, 0);
+
+// Retrieve the work function from the runtime.
+bool IsActive = __kmpc_kernel_parallel(&WorkFn, IsOMPRuntimeInitialized);
+
+// If there is nothing more to do, break out of the state machine by
+// returning to the caller.
+if (!WorkFn)
+  return;
+
+if (IsActive) {
+  void *SharedVars = __kmpc_target_region_kernel_get_shared_memory();
+  void *PrivateVars = __kmpc_target_region_kernel_get_private_memory();
+
+  ((ParallelWorkFnTy)WorkFn)(SharedVars, PrivateVars);
+
+  __kmpc_kernel_end_parallel();
+}
+
+__kmpc_barrier_simple_spmd(Ident, 0);
+
+  } while (true);
+}
+
+/// Filter threads into masters and workers. If \p UseStateMachine is true,
+/// required workers will enter a state machine through and be trapped there.
+/// Master and surplus worker threads will return from this function immediately
+/// while required workers will only return once there is no more work. The
+/// return value indicates if the thread is a master (1), a surplus worker (0),
+/// or a finished required worker released from the state machine (-1).
+INLINE static int8_t
+__kmpc_target_region_thread_filter(ident_t *Ident, unsigned ThreadLimit,
+   bool UseStateMachine,
+   bool IsOMPRuntimeInitialized) {
+
+  unsigned TId = GetThreadIdInBlock();
+  bool IsWorker = TId < ThreadLimit;
+
+  if (IsWorker) {
+if (UseStateMachine)
+  __kmpc_target_region_state_machine(Ident, IsOMPRuntimeInitialized);
+return -1;
+  }
+
+  return TId == GetMasterThreadID();
+}
+
+EXTERN int8_t __kmpc_target_region_kernel_init(ident_t *Ident, bool UseSPMDMode,
+   bool UseStateMachine,
+   bool RequiresOMPRuntime,
+   bool RequiresDataSharing) {
+  unsigned NumThreads = GetNumberOfThreadsInBlock();
+
+  // Handle the SPMD case first.
+  if (UseSPMDMode) {
+
+__kmpc_spmd_kernel_init(NumThreads, RequiresOMPRuntime,
+RequiresDataSharing);
+
+if (RequiresDataSharing)
+  __kmpc_data_sharing_init_stack_spmd();
+
+return 1;
+  }
+
+  // Reserve one WARP in non-SPMD mode for the masters.
+  unsigned ThreadLimit = NumThreads - WARPSIZE;
+  int8_t FilterVal = __kmpc_target_region_thread_filter(
+  Ident, ThreadLimit, UseStateMachine, RequiresOMPRuntime);
+
+  // If the filter returns 1 the executing thread is a team master which will
+  // initialize the kernel in the following.
+  if (FilterVal == 1) {
+__kmpc_kernel_init(ThreadLimit, RequiresOMPRuntime);
+__kmpc_data_sharing_init_stack();
+_shared_bytes_buffer_memory.init();
+  }
+
+  return FilterVal;
+}
+
+EXTERN void __kmpc_target_region_kernel_deinit(ident_t *Ident

[PATCH] D59319: [OpenMP][Offloading][1/3] A generic and simple target region interface

2019-03-15 Thread Johannes Doerfert via Phabricator via cfe-commits
jdoerfert marked an inline comment as done.
jdoerfert added a comment.

> What is this buffer used for? [...]

I'll copy your comment and respond in this review D59424 
.




Comment at: openmp/libomptarget/deviceRTLs/common/target_region.h:104
+///
+EXTERN int8_t __kmpc_target_region_kernel_init(ident_t *Ident, bool 
UseSPMDMode,
+   bool RequiresOMPRuntime,

ABataev wrote:
> If you're using `ident_t` `UseSPMDMode` and `RequiresOMPRuntime` parameters 
> are not needed anymore. They are passed in `ident_t` structure.
> If you're using ident_t UseSPMDMode and RequiresOMPRuntime parameters are not 
> needed anymore. They are passed in ident_t structure.

They are not in the TRegion interface, at least not by the TRegion code 
generation. If required, we can add that or require the 
`__kmpc_target_region_kernel_init` implementation to store the values in the 
`ident_t`. Regardless, we do not want to hide the variables in the `ident_t` 
because that would result in worse analysis results and cause optimizations to 
be harder. The main point of all these changes is, after all, to make 
optimizations easy. Given that we expect these functions to be inlined, there 
is also no harm done wrt. runtime costs.






Repository:
  rG LLVM Github Monorepo

CHANGES SINCE LAST ACTION
  https://reviews.llvm.org/D59319/new/

https://reviews.llvm.org/D59319



___
cfe-commits mailing list
cfe-commits@lists.llvm.org
https://lists.llvm.org/cgi-bin/mailman/listinfo/cfe-commits


[PATCH] D59424: [OpenMP][NVPTX] Replace void** buffer by byte-wise buffer

2019-03-15 Thread Johannes Doerfert via Phabricator via cfe-commits
jdoerfert updated this revision to Diff 190866.
jdoerfert marked an inline comment as done.
jdoerfert added a comment.

Fix the set/release use case


Repository:
  rG LLVM Github Monorepo

CHANGES SINCE LAST ACTION
  https://reviews.llvm.org/D59424/new/

https://reviews.llvm.org/D59424

Files:
  openmp/libomptarget/deviceRTLs/nvptx/src/omp_data.cu
  openmp/libomptarget/deviceRTLs/nvptx/src/omptarget-nvptx.h
  openmp/libomptarget/deviceRTLs/nvptx/src/option.h

Index: openmp/libomptarget/deviceRTLs/nvptx/src/option.h
===
--- openmp/libomptarget/deviceRTLs/nvptx/src/option.h
+++ openmp/libomptarget/deviceRTLs/nvptx/src/option.h
@@ -27,9 +27,9 @@
 // region to synchronize with each other.
 #define L1_BARRIER (1)
 
-// Maximum number of preallocated arguments to an outlined parallel/simd function.
-// Anything more requires dynamic memory allocation.
-#define MAX_SHARED_ARGS 20
+// Maximum number of preallocated bytes that can be passed to an outlined
+// parallel/simd function before dynamic memory allocation is required.
+#define PRE_SHARED_BYTES 128
 
 // Maximum number of omp state objects per SM allocated statically in global
 // memory.
Index: openmp/libomptarget/deviceRTLs/nvptx/src/omptarget-nvptx.h
===
--- openmp/libomptarget/deviceRTLs/nvptx/src/omptarget-nvptx.h
+++ openmp/libomptarget/deviceRTLs/nvptx/src/omptarget-nvptx.h
@@ -63,42 +63,87 @@
 #define __SYNCTHREADS_N(n) asm volatile("bar.sync %0;" : : "r"(n) : "memory");
 #define __SYNCTHREADS() __SYNCTHREADS_N(0)
 
+/// Helper structure to manage the memory shared by the threads in a team.
+///
+/// This buffer can manage two adjacent byte-wise objects by tracking the
+/// beginning of the second, as an offset, in addition to the beginning of the
+/// first, as a pointer.
+///
+/// Note: Only the team master is allowed to call non-const functions!
+struct shared_bytes_buffer {
+
+  INLINE void init() {
+_ptr = &_data[0];
+_size = PRE_SHARED_BYTES;
+_offset = 0;
+  }
+
+  /// Release any dynamic allocated memory.
+  INLINE void release() {
+if (_size != PRE_SHARED_BYTES)
+  SafeFree(_ptr, (char *)"free shared dynamic buffer");
+// Always perform an init, it is cheap and required after a set call was
+// performed during the last use of the buffer.
+init();
+  }
+
+  INLINE void set(void *ptr, size_t offset) {
+// Note that release will set _size to PRE_SHARED_BYTES, thereby avoiding
+// the next release call from freeing the associated memory.
+release();
+_ptr = (char *)ptr;
+_offset = offset;
+  }
+
+  INLINE void resize(size_t size, size_t offset) {
+_offset = offset;
+
+if (size <= _size)
+  return;
+
+if (_size != PRE_SHARED_BYTES)
+  SafeFree(_ptr, (char *)"free shared dynamic buffer");
+
+_size = size;
+_ptr = (char *)SafeMalloc(_size, (char *)"new shared buffer");
+  }
+
+  // Called by all threads.
+  INLINE void *begin() const { return _ptr; };
+  INLINE size_t size() const { return _size; };
+  INLINE size_t get_offset() const { return _offset; };
+
+private:
+  // Pre-allocated space that holds PRE_SHARED_BYTES many bytes.
+  char _data[PRE_SHARED_BYTES];
+
+  // Pointer to the currently used buffer.
+  char *_ptr;
+
+  // Size of the currently used buffer.
+  uint32_t _size;
+
+  // Offset into the currently used buffer.
+  uint32_t _offset;
+};
+
+extern __device__ __shared__ shared_bytes_buffer _shared_bytes_buffer_memory;
+
 // arguments needed for L0 parallelism only.
+//
+// NOTE: Deprecated, use shared_byte_buffer instead.
 class omptarget_nvptx_SharedArgs {
 public:
   // All these methods must be called by the master thread only.
-  INLINE void Init() {
-args  = buffer;
-nArgs = MAX_SHARED_ARGS;
-  }
-  INLINE void DeInit() {
-// Free any memory allocated for outlined parallel function with a large
-// number of arguments.
-if (nArgs > MAX_SHARED_ARGS) {
-  SafeFree(args, (char *)"new extended args");
-  Init();
-}
-  }
+  INLINE void Init() { _shared_bytes_buffer_memory.init(); }
+  INLINE void DeInit() { _shared_bytes_buffer_memory.release(); }
   INLINE void EnsureSize(size_t size) {
-if (size > nArgs) {
-  if (nArgs > MAX_SHARED_ARGS) {
-SafeFree(args, (char *)"new extended args");
-  }
-  args = (void **) SafeMalloc(size * sizeof(void *),
-  (char *)"new extended args");
-  nArgs = size;
-}
+_shared_bytes_buffer_memory.resize(size * sizeof(void *), 0);
   }
   // Called by all threads.
-  INLINE void **GetArgs() const { return args; };
-private:
-  // buffer of pre-allocated arguments.
-  void *buffer[MAX_SHARED_ARGS];
-  // pointer to arguments buffer.
-  // starts off as a pointer to 'buffer' but can be dynamically allocated.
-  void **args;
-  // starts off as MAX_SHARED_ARGS but can increase in size.
-  uint32_t nArg

[PATCH] D59424: [OpenMP][NVPTX] Replace void** buffer by byte-wise buffer

2019-03-15 Thread Johannes Doerfert via Phabricator via cfe-commits
jdoerfert marked an inline comment as not done.
jdoerfert added inline comments.



Comment at: openmp/libomptarget/deviceRTLs/nvptx/src/omptarget-nvptx.h:73
+/// Note: Only the team master is allowed to call non-const functions!
+struct shared_bytes_buffer {
+

> What is this buffer used for? Transferring pointers to the shread variables 
> to the parallel regions? If so, it must be handled by the compiler. There are 
> several reasons to do this:
> 1) You're using malloc/free functions for large buffers. The fact is that the 
> size of this buffer is known at the compile time and compiler can generate 
> the fixed size buffer in the global memory if required. We already have 
> similar implementation for target regions, globalized variables etc. You can 
> take a look and adapt it for your purpose.
> 2) Malloc/free are not very fast on the GPU, so it will get an additional 
> performance with the preallocated buffers.
> 3) Another one problem with malloc/free is that they are using preallocated 
> memory and the size of this memory is limited by 8Mb (if I do recall 
> correctly). This memory is required for the correct support of the local 
> variables globalization and we alredy ran into the situation when malloc 
> could not allocate enough memory for it with some previous implementations.
> 4) You can reused the shared memory buffers already generated by the compiler 
> and save shared memory.

[Quote by ABataev copied from 
https://reviews.llvm.org/D59319?id=190767#inline-525900 after the patch was 
split.]


This buffer is supposed to be used to communicate variables in shared and 
firstprivate clauses between threads in a team. In this patch it is simply used 
to implement the old `void**` buffer. How, when, if we use it is part of the 
interface implementation. For now, this buffer simply serves the users of the 
`omptarget_nvptx_globalArgs` global.

If you want to provide compiler allocated memory to avoid the buffer use, no 
problem,
the `__kmpc_target_region_kernel_parallel` function allows to do so, see the 
`SharedMemPointers` flag. I wouldn't want to put the logic to generate these 
buffers in the front-end though.


Repository:
  rG LLVM Github Monorepo

CHANGES SINCE LAST ACTION
  https://reviews.llvm.org/D59424/new/

https://reviews.llvm.org/D59424



___
cfe-commits mailing list
cfe-commits@lists.llvm.org
https://lists.llvm.org/cgi-bin/mailman/listinfo/cfe-commits


[PATCH] D59319: [OpenMP][Offloading][1/3] A generic and simple target region interface

2019-03-15 Thread Johannes Doerfert via Phabricator via cfe-commits
jdoerfert updated this revision to Diff 190868.
jdoerfert added a comment.

Fix a typo (use of wrong variable) and improve comments


Repository:
  rG LLVM Github Monorepo

CHANGES SINCE LAST ACTION
  https://reviews.llvm.org/D59319/new/

https://reviews.llvm.org/D59319

Files:
  openmp/libomptarget/deviceRTLs/common/target_region.h
  openmp/libomptarget/deviceRTLs/nvptx/CMakeLists.txt
  openmp/libomptarget/deviceRTLs/nvptx/src/target_region.cu

Index: openmp/libomptarget/deviceRTLs/nvptx/src/target_region.cu
===
--- /dev/null
+++ openmp/libomptarget/deviceRTLs/nvptx/src/target_region.cu
@@ -0,0 +1,205 @@
+//===-- target_region.cu  CUDA impl. of the target region interface -*-===//
+//
+// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
+//
+//===--===//
+//
+// This file contains the implementation of the common target region interface.
+//
+//===--===//
+
+// Include the native definitions first as certain defines might be needed in
+// the common interface definition below.
+#include "interface.h"
+#include "omptarget-nvptx.h"
+
+#include "../../common/target_region.h"
+
+EXTERN void *__kmpc_target_region_kernel_get_shared_memory() {
+  return _shared_bytes_buffer_memory.begin();
+}
+EXTERN void *__kmpc_target_region_kernel_get_private_memory() {
+  return ((char *)_shared_bytes_buffer_memory.begin()) +
+ _shared_bytes_buffer_memory.get_offset();
+}
+
+/// Simple generic state machine for worker threads.
+INLINE static void
+__kmpc_target_region_state_machine(ident_t *Ident,
+   bool IsOMPRuntimeInitialized) {
+
+  do {
+void *WorkFn = 0;
+
+// Wait for the signal that we have a new work function.
+__kmpc_barrier_simple_spmd(Ident, 0);
+
+// Retrieve the work function from the runtime.
+bool IsActive = __kmpc_kernel_parallel(&WorkFn, IsOMPRuntimeInitialized);
+
+// If there is nothing more to do, break out of the state machine by
+// returning to the caller.
+if (!WorkFn)
+  return;
+
+if (IsActive) {
+  void *SharedVars = __kmpc_target_region_kernel_get_shared_memory();
+  void *PrivateVars = __kmpc_target_region_kernel_get_private_memory();
+
+  ((ParallelWorkFnTy)WorkFn)(SharedVars, PrivateVars);
+
+  __kmpc_kernel_end_parallel();
+}
+
+__kmpc_barrier_simple_spmd(Ident, 0);
+
+  } while (true);
+}
+
+/// Filter threads into masters and workers. If \p UseStateMachine is true,
+/// required workers will enter a state machine through and be trapped there.
+/// Master and surplus worker threads will return from this function immediately
+/// while required workers will only return once there is no more work. The
+/// return value indicates if the thread is a master (1), a surplus worker (0),
+/// or a finished required worker released from the state machine (-1).
+INLINE static int8_t
+__kmpc_target_region_thread_filter(ident_t *Ident, unsigned ThreadLimit,
+   bool UseStateMachine,
+   bool IsOMPRuntimeInitialized) {
+
+  unsigned TId = GetThreadIdInBlock();
+  bool IsWorker = TId < ThreadLimit;
+
+  if (IsWorker) {
+if (UseStateMachine)
+  __kmpc_target_region_state_machine(Ident, IsOMPRuntimeInitialized);
+return -1;
+  }
+
+  return TId == GetMasterThreadID();
+}
+
+EXTERN int8_t __kmpc_target_region_kernel_init(ident_t *Ident, bool UseSPMDMode,
+   bool UseStateMachine,
+   bool RequiresOMPRuntime,
+   bool RequiresDataSharing) {
+  unsigned NumThreads = GetNumberOfThreadsInBlock();
+
+  // Handle the SPMD case first.
+  if (UseSPMDMode) {
+
+__kmpc_spmd_kernel_init(NumThreads, RequiresOMPRuntime,
+RequiresDataSharing);
+
+if (RequiresDataSharing)
+  __kmpc_data_sharing_init_stack_spmd();
+
+return 1;
+  }
+
+  // Reserve one WARP in non-SPMD mode for the masters.
+  unsigned ThreadLimit = NumThreads - WARPSIZE;
+  int8_t FilterVal = __kmpc_target_region_thread_filter(
+  Ident, ThreadLimit, UseStateMachine, RequiresOMPRuntime);
+
+  // If the filter returns 1 the executing thread is a team master which will
+  // initialize the kernel in the following.
+  if (FilterVal == 1) {
+__kmpc_kernel_init(ThreadLimit, RequiresOMPRuntime);
+__kmpc_data_sharing_init_stack();
+_shared_bytes_buffer_memory.init();
+  }
+
+  return FilterVal;
+}
+
+EXTERN void __kmpc_target_region_kernel_deinit(ident_t *Ident, bool UseSPMDMode,
+   bool RequiredOMPRuntime)

[PATCH] D59319: [OpenMP][Offloading][1/3] A generic and simple target region interface

2019-03-15 Thread Johannes Doerfert via Phabricator via cfe-commits
jdoerfert marked an inline comment as done.
jdoerfert added inline comments.



Comment at: openmp/libomptarget/deviceRTLs/common/target_region.h:104
+///
+EXTERN int8_t __kmpc_target_region_kernel_init(ident_t *Ident, bool 
UseSPMDMode,
+   bool RequiresOMPRuntime,

ABataev wrote:
> jdoerfert wrote:
> > ABataev wrote:
> > > If you're using `ident_t` `UseSPMDMode` and `RequiresOMPRuntime` 
> > > parameters are not needed anymore. They are passed in `ident_t` structure.
> > > If you're using ident_t UseSPMDMode and RequiresOMPRuntime parameters are 
> > > not needed anymore. They are passed in ident_t structure.
> > 
> > They are not in the TRegion interface, at least not by the TRegion code 
> > generation. If required, we can add that or require the 
> > `__kmpc_target_region_kernel_init` implementation to store the values in 
> > the `ident_t`. Regardless, we do not want to hide the variables in the 
> > `ident_t` because that would result in worse analysis results and cause 
> > optimizations to be harder. The main point of all these changes is, after 
> > all, to make optimizations easy. Given that we expect these functions to be 
> > inlined, there is also no harm done wrt. runtime costs.
> > 
> > 
> > 
> > 
> This is why we used them. Those `ident_t`s  are constant and it allows us to 
> perform an additional optimization in the functions, that do not have 
> `isSPMDMpde` and `RequiresFullRuntime`. Because of this parameter, we gained 
> a significant performance boost. LLVM knows how to deal with the structures, 
> don't worry about the optimization.
> This is why we used them. Those ident_ts are constant and it allows us to 
> perform an additional optimization in the functions, that do not have 
> isSPMDMpde and RequiresFullRuntime.

The boolean parameters are (currently) also constant. The main point however is 
that in our expected use case, an inlined device RTL, there is literally no 
cost to pay by having the flags explicit as parameters.


> Because of this parameter, we gained a significant performance boost.

Compared to what? Not having information about the execution mode, etc. at all? 
How would that become worse? 




> LLVM knows how to deal with the structures, don't worry about the 
> optimization.

I am (painfully) aware of LLVM's capability to promote arguments (that is what 
is needed if we do not inline or perform IP-SCCP). However, using a pointer 
does allow the use of non-constant `ident_t` values, which are problematic. 
They might actually be useful for the original purpose of `ident_t`, namely 
location information. Think function merging that will cause a call with one of 
multiple different `ident_t` pointers. Making sure we can promote the values in 
that case is already much harder than checking if all potential values are the 
same boolean constant.



Repository:
  rG LLVM Github Monorepo

CHANGES SINCE LAST ACTION
  https://reviews.llvm.org/D59319/new/

https://reviews.llvm.org/D59319



___
cfe-commits mailing list
cfe-commits@lists.llvm.org
https://lists.llvm.org/cgi-bin/mailman/listinfo/cfe-commits


[PATCH] D59424: [OpenMP][NVPTX] Replace void** buffer by byte-wise buffer

2019-03-15 Thread Johannes Doerfert via Phabricator via cfe-commits
jdoerfert marked an inline comment as done.
jdoerfert added inline comments.



Comment at: openmp/libomptarget/deviceRTLs/nvptx/src/omptarget-nvptx.h:73
+/// Note: Only the team master is allowed to call non-const functions!
+struct shared_bytes_buffer {
+

ABataev wrote:
> jdoerfert wrote:
> > > What is this buffer used for? Transferring pointers to the shread 
> > > variables to the parallel regions? If so, it must be handled by the 
> > > compiler. There are several reasons to do this:
> > > 1) You're using malloc/free functions for large buffers. The fact is that 
> > > the size of this buffer is known at the compile time and compiler can 
> > > generate the fixed size buffer in the global memory if required. We 
> > > already have similar implementation for target regions, globalized 
> > > variables etc. You can take a look and adapt it for your purpose.
> > > 2) Malloc/free are not very fast on the GPU, so it will get an additional 
> > > performance with the preallocated buffers.
> > > 3) Another one problem with malloc/free is that they are using 
> > > preallocated memory and the size of this memory is limited by 8Mb (if I 
> > > do recall correctly). This memory is required for the correct support of 
> > > the local variables globalization and we alredy ran into the situation 
> > > when malloc could not allocate enough memory for it with some previous 
> > > implementations.
> > > 4) You can reused the shared memory buffers already generated by the 
> > > compiler and save shared memory.
> > 
> > [Quote by ABataev copied from 
> > https://reviews.llvm.org/D59319?id=190767#inline-525900 after the patch was 
> > split.]
> > 
> > 
> > This buffer is supposed to be used to communicate variables in shared and 
> > firstprivate clauses between threads in a team. In this patch it is simply 
> > used to implement the old `void**` buffer. How, when, if we use it is part 
> > of the interface implementation. For now, this buffer simply serves the 
> > users of the `omptarget_nvptx_globalArgs` global.
> > 
> > If you want to provide compiler allocated memory to avoid the buffer use, 
> > no problem,
> > the `__kmpc_target_region_kernel_parallel` function allows to do so, see 
> > the `SharedMemPointers` flag. I wouldn't want to put the logic to generate 
> > these buffers in the front-end though.
> Why?
Why what?


Repository:
  rG LLVM Github Monorepo

CHANGES SINCE LAST ACTION
  https://reviews.llvm.org/D59424/new/

https://reviews.llvm.org/D59424



___
cfe-commits mailing list
cfe-commits@lists.llvm.org
https://lists.llvm.org/cgi-bin/mailman/listinfo/cfe-commits


[PATCH] D59424: [OpenMP][NVPTX] Replace void** buffer by byte-wise buffer

2019-03-15 Thread Johannes Doerfert via Phabricator via cfe-commits
jdoerfert marked an inline comment as done.
jdoerfert added inline comments.



Comment at: openmp/libomptarget/deviceRTLs/nvptx/src/omptarget-nvptx.h:73
+/// Note: Only the team master is allowed to call non-const functions!
+struct shared_bytes_buffer {
+

ABataev wrote:
> jdoerfert wrote:
> > ABataev wrote:
> > > jdoerfert wrote:
> > > > > What is this buffer used for? Transferring pointers to the shread 
> > > > > variables to the parallel regions? If so, it must be handled by the 
> > > > > compiler. There are several reasons to do this:
> > > > > 1) You're using malloc/free functions for large buffers. The fact is 
> > > > > that the size of this buffer is known at the compile time and 
> > > > > compiler can generate the fixed size buffer in the global memory if 
> > > > > required. We already have similar implementation for target regions, 
> > > > > globalized variables etc. You can take a look and adapt it for your 
> > > > > purpose.
> > > > > 2) Malloc/free are not very fast on the GPU, so it will get an 
> > > > > additional performance with the preallocated buffers.
> > > > > 3) Another one problem with malloc/free is that they are using 
> > > > > preallocated memory and the size of this memory is limited by 8Mb (if 
> > > > > I do recall correctly). This memory is required for the correct 
> > > > > support of the local variables globalization and we alredy ran into 
> > > > > the situation when malloc could not allocate enough memory for it 
> > > > > with some previous implementations.
> > > > > 4) You can reused the shared memory buffers already generated by the 
> > > > > compiler and save shared memory.
> > > > 
> > > > [Quote by ABataev copied from 
> > > > https://reviews.llvm.org/D59319?id=190767#inline-525900 after the patch 
> > > > was split.]
> > > > 
> > > > 
> > > > This buffer is supposed to be used to communicate variables in shared 
> > > > and firstprivate clauses between threads in a team. In this patch it is 
> > > > simply used to implement the old `void**` buffer. How, when, if we use 
> > > > it is part of the interface implementation. For now, this buffer simply 
> > > > serves the users of the `omptarget_nvptx_globalArgs` global.
> > > > 
> > > > If you want to provide compiler allocated memory to avoid the buffer 
> > > > use, no problem,
> > > > the `__kmpc_target_region_kernel_parallel` function allows to do so, 
> > > > see the `SharedMemPointers` flag. I wouldn't want to put the logic to 
> > > > generate these buffers in the front-end though.
> > > Why?
> > Why what?
> Why you don't want to put the buffers generation to the compiler?
> Why you don't want to put the buffers generation to the compiler?

I did never say that. I explicitly explained how the new interface allows you 
to do exactly that. Maybe you confuse it with me not wanting the generation to 
be part of the front-end (=Clang). That is because it is an implementation 
choice for performance, as such it should not be done by Clang if it might 
obstruct analyses later on, or could be done better with more analysis support.

If we conclude we actually need to share values, after we tried to eliminate 
the need for sharing altogether, we can decide if a global buffer is 
preferable. If so, we can check if
there is already one that is unused or if we would need to create a new one.

//Regarding this patch:// It is not supposed to change the behavior at all. 
This patch just introduces a more general buffer which is then used to 
implement the old buffer. How/when the buffer is used is not affected.


Repository:
  rG LLVM Github Monorepo

CHANGES SINCE LAST ACTION
  https://reviews.llvm.org/D59424/new/

https://reviews.llvm.org/D59424



___
cfe-commits mailing list
cfe-commits@lists.llvm.org
https://lists.llvm.org/cgi-bin/mailman/listinfo/cfe-commits


[PATCH] D59319: [OpenMP][Offloading][1/3] A generic and simple target region interface

2019-03-15 Thread Johannes Doerfert via Phabricator via cfe-commits
jdoerfert marked an inline comment as done.
jdoerfert added inline comments.



Comment at: openmp/libomptarget/deviceRTLs/common/target_region.h:104
+///
+EXTERN int8_t __kmpc_target_region_kernel_init(ident_t *Ident, bool 
UseSPMDMode,
+   bool RequiresOMPRuntime,

ABataev wrote:
> jdoerfert wrote:
> > ABataev wrote:
> > > jdoerfert wrote:
> > > > ABataev wrote:
> > > > > If you're using `ident_t` `UseSPMDMode` and `RequiresOMPRuntime` 
> > > > > parameters are not needed anymore. They are passed in `ident_t` 
> > > > > structure.
> > > > > If you're using ident_t UseSPMDMode and RequiresOMPRuntime parameters 
> > > > > are not needed anymore. They are passed in ident_t structure.
> > > > 
> > > > They are not in the TRegion interface, at least not by the TRegion code 
> > > > generation. If required, we can add that or require the 
> > > > `__kmpc_target_region_kernel_init` implementation to store the values 
> > > > in the `ident_t`. Regardless, we do not want to hide the variables in 
> > > > the `ident_t` because that would result in worse analysis results and 
> > > > cause optimizations to be harder. The main point of all these changes 
> > > > is, after all, to make optimizations easy. Given that we expect these 
> > > > functions to be inlined, there is also no harm done wrt. runtime costs.
> > > > 
> > > > 
> > > > 
> > > > 
> > > This is why we used them. Those `ident_t`s  are constant and it allows us 
> > > to perform an additional optimization in the functions, that do not have 
> > > `isSPMDMpde` and `RequiresFullRuntime`. Because of this parameter, we 
> > > gained a significant performance boost. LLVM knows how to deal with the 
> > > structures, don't worry about the optimization.
> > > This is why we used them. Those ident_ts are constant and it allows us to 
> > > perform an additional optimization in the functions, that do not have 
> > > isSPMDMpde and RequiresFullRuntime.
> > 
> > The boolean parameters are (currently) also constant. The main point 
> > however is that in our expected use case, an inlined device RTL, there is 
> > literally no cost to pay by having the flags explicit as parameters.
> > 
> > 
> > > Because of this parameter, we gained a significant performance boost.
> > 
> > Compared to what? Not having information about the execution mode, etc. at 
> > all? How would that become worse? 
> > 
> > 
> > 
> > 
> > > LLVM knows how to deal with the structures, don't worry about the 
> > > optimization.
> > 
> > I am (painfully) aware of LLVM's capability to promote arguments (that is 
> > what is needed if we do not inline or perform IP-SCCP). However, using a 
> > pointer does allow the use of non-constant `ident_t` values, which are 
> > problematic. They might actually be useful for the original purpose of 
> > `ident_t`, namely location information. Think function merging that will 
> > cause a call with one of multiple different `ident_t` pointers. Making sure 
> > we can promote the values in that case is already much harder than checking 
> > if all potential values are the same boolean constant.
> > 
> 1. This is the data duplication.
> 2. Compared to the previous implementation.
> 3. It allows, yes, but the compiler generates constant `ident_t`. This 
> structure used not only for the location information, but it used also for 
> other purposes. There are no problems with the code inlining and optimization 
> for `ident_t`s.
> 1. This is the data duplication.

What is? Having explicit constant boolean parameters? There is no "duplication" 
if they are constant and the functions are inlined. If you //really think 
otherwise//, I'm afraid we will not make progress here without a third opinion.


> 2. Compared to the previous implementation.

I do not know what the previous implementation was. I'm also unsure what the 
point is you are trying to make. If it is different from point 1., could you 
please elaborate?

> 3. It allows, yes, but the compiler generates constant ident_t. This 
> structure used not only for the location information, but it used also for 
> other purposes. There are no problems with the code inlining and optimization 
> for ident_ts.

For now, maybe. I just gave you a very plausible example of how there could be 
performance implications in the near future due to the indirection compared to 
explicit boolean parameters.



Repository:
  rG LLVM Github Monorepo

CHANGES SINCE LAST ACTION
  https://reviews.llvm.org/D59319/new/

https://reviews.llvm.org/D59319



___
cfe-commits mailing list
cfe-commits@lists.llvm.org
https://lists.llvm.org/cgi-bin/mailman/listinfo/cfe-commits


[PATCH] D59424: [OpenMP][NVPTX] Replace void** buffer by byte-wise buffer

2019-03-15 Thread Johannes Doerfert via Phabricator via cfe-commits
jdoerfert marked an inline comment as done.
jdoerfert added inline comments.



Comment at: openmp/libomptarget/deviceRTLs/nvptx/src/omptarget-nvptx.h:73
+/// Note: Only the team master is allowed to call non-const functions!
+struct shared_bytes_buffer {
+

ABataev wrote:
> jdoerfert wrote:
> > ABataev wrote:
> > > jdoerfert wrote:
> > > > ABataev wrote:
> > > > > jdoerfert wrote:
> > > > > > > What is this buffer used for? Transferring pointers to the shread 
> > > > > > > variables to the parallel regions? If so, it must be handled by 
> > > > > > > the compiler. There are several reasons to do this:
> > > > > > > 1) You're using malloc/free functions for large buffers. The fact 
> > > > > > > is that the size of this buffer is known at the compile time and 
> > > > > > > compiler can generate the fixed size buffer in the global memory 
> > > > > > > if required. We already have similar implementation for target 
> > > > > > > regions, globalized variables etc. You can take a look and adapt 
> > > > > > > it for your purpose.
> > > > > > > 2) Malloc/free are not very fast on the GPU, so it will get an 
> > > > > > > additional performance with the preallocated buffers.
> > > > > > > 3) Another one problem with malloc/free is that they are using 
> > > > > > > preallocated memory and the size of this memory is limited by 8Mb 
> > > > > > > (if I do recall correctly). This memory is required for the 
> > > > > > > correct support of the local variables globalization and we 
> > > > > > > alredy ran into the situation when malloc could not allocate 
> > > > > > > enough memory for it with some previous implementations.
> > > > > > > 4) You can reused the shared memory buffers already generated by 
> > > > > > > the compiler and save shared memory.
> > > > > > 
> > > > > > [Quote by ABataev copied from 
> > > > > > https://reviews.llvm.org/D59319?id=190767#inline-525900 after the 
> > > > > > patch was split.]
> > > > > > 
> > > > > > 
> > > > > > This buffer is supposed to be used to communicate variables in 
> > > > > > shared and firstprivate clauses between threads in a team. In this 
> > > > > > patch it is simply used to implement the old `void**` buffer. How, 
> > > > > > when, if we use it is part of the interface implementation. For 
> > > > > > now, this buffer simply serves the users of the 
> > > > > > `omptarget_nvptx_globalArgs` global.
> > > > > > 
> > > > > > If you want to provide compiler allocated memory to avoid the 
> > > > > > buffer use, no problem,
> > > > > > the `__kmpc_target_region_kernel_parallel` function allows to do 
> > > > > > so, see the `SharedMemPointers` flag. I wouldn't want to put the 
> > > > > > logic to generate these buffers in the front-end though.
> > > > > Why?
> > > > Why what?
> > > Why you don't want to put the buffers generation to the compiler?
> > > Why you don't want to put the buffers generation to the compiler?
> > 
> > I did never say that. I explicitly explained how the new interface allows 
> > you to do exactly that. Maybe you confuse it with me not wanting the 
> > generation to be part of the front-end (=Clang). That is because it is an 
> > implementation choice for performance, as such it should not be done by 
> > Clang if it might obstruct analyses later on, or could be done better with 
> > more analysis support.
> > 
> > If we conclude we actually need to share values, after we tried to 
> > eliminate the need for sharing altogether, we can decide if a global buffer 
> > is preferable. If so, we can check if
> > there is already one that is unused or if we would need to create a new one.
> > 
> > //Regarding this patch:// It is not supposed to change the behavior at all. 
> > This patch just introduces a more general buffer which is then used to 
> > implement the old buffer. How/when the buffer is used is not affected.
> Only Clang can do better analysis for the shared variables, runtime is not. 
> This part must be implemented in Clang, not in the runtime. I had no time to 
> fix the existing implementation of parameters passing in non-SPMD mode. But 
> if you working on this, you should definetely implement this in the compiler, 
> not in the library.
> It does not brea the analysis or anything else. It really produces better 
> code with the better performance and less memory use.
> If you're going to implement it, you need to implement it in the best 
> possible way. Who else is going to fix this later, when we ran into the 
> problems with the shared memory and/or `malloc`ed memory?
> Only Clang can do better analysis for the shared variables, runtime is not. 
> This part must be implemented in Clang, not in the runtime. I had no time to 
> fix the existing implementation of parameters passing in non-SPMD mode. But 
> if you working on this, you should definetely implement this in the compiler, 
> not in the library.

Clang is not the right place for analysis. //That is what the patch set is all 
about.

[PATCH] D59319: [OpenMP][Offloading][1/3] A generic and simple target region interface

2019-03-22 Thread Johannes Doerfert via Phabricator via cfe-commits
jdoerfert updated this revision to Diff 191984.
jdoerfert added a comment.

Introduce a ternary mode for parallel regions, fix minor mistakes


Repository:
  rG LLVM Github Monorepo

CHANGES SINCE LAST ACTION
  https://reviews.llvm.org/D59319/new/

https://reviews.llvm.org/D59319

Files:
  openmp/libomptarget/deviceRTLs/common/target_region.h
  openmp/libomptarget/deviceRTLs/nvptx/CMakeLists.txt
  openmp/libomptarget/deviceRTLs/nvptx/src/target_region.cu

Index: openmp/libomptarget/deviceRTLs/nvptx/src/target_region.cu
===
--- /dev/null
+++ openmp/libomptarget/deviceRTLs/nvptx/src/target_region.cu
@@ -0,0 +1,210 @@
+//===-- target_region.cu  CUDA impl. of the target region interface -*-===//
+//
+// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
+//
+//===--===//
+//
+// This file contains the implementation of the common target region interface.
+//
+//===--===//
+
+// Include the native definitions first as certain defines might be needed in
+// the common interface definition below.
+#include "omptarget-nvptx.h"
+#include "interface.h"
+
+#include "../../common/target_region.h"
+
+EXTERN void *__kmpc_target_region_kernel_get_shared_memory() {
+  return _shared_bytes_buffer_memory.begin();
+}
+EXTERN void *__kmpc_target_region_kernel_get_private_memory() {
+  return ((char *)_shared_bytes_buffer_memory.begin()) +
+ _shared_bytes_buffer_memory.get_offset();
+}
+
+/// Simple generic state machine for worker threads.
+INLINE static void
+__kmpc_target_region_state_machine(ident_t *Ident,
+   bool IsOMPRuntimeInitialized) {
+
+  do {
+void *WorkFn = 0;
+
+// Wait for the signal that we have a new work function.
+__kmpc_barrier_simple_spmd(Ident, 0);
+
+// Retrieve the work function from the runtime.
+bool IsActive = __kmpc_kernel_parallel(&WorkFn, IsOMPRuntimeInitialized);
+
+// If there is nothing more to do, break out of the state machine by
+// returning to the caller.
+if (!WorkFn)
+  return;
+
+if (IsActive) {
+  void *SharedVars = __kmpc_target_region_kernel_get_shared_memory();
+  void *PrivateVars = __kmpc_target_region_kernel_get_private_memory();
+
+  ((ParallelWorkFnTy)WorkFn)(SharedVars, PrivateVars);
+
+  __kmpc_kernel_end_parallel();
+}
+
+__kmpc_barrier_simple_spmd(Ident, 0);
+
+  } while (true);
+}
+
+/// Filter threads into masters and workers. If \p UseStateMachine is true,
+/// required workers will enter a state machine through and be trapped there.
+/// Master and surplus worker threads will return from this function immediately
+/// while required workers will only return once there is no more work. The
+/// return value indicates if the thread is a master (1), a surplus worker (0),
+/// or a finished required worker released from the state machine (-1).
+INLINE static int8_t
+__kmpc_target_region_thread_filter(ident_t *Ident, unsigned ThreadLimit,
+   bool UseStateMachine,
+   bool IsOMPRuntimeInitialized) {
+
+  unsigned TId = GetThreadIdInBlock();
+  bool IsWorker = TId < ThreadLimit;
+
+  if (IsWorker) {
+if (UseStateMachine)
+  __kmpc_target_region_state_machine(Ident, IsOMPRuntimeInitialized);
+return -1;
+  }
+
+  return TId == GetMasterThreadID();
+}
+
+EXTERN int8_t __kmpc_target_region_kernel_init(ident_t *Ident, bool UseSPMDMode,
+   bool RequiresOMPRuntime,
+   bool UseStateMachine,
+   bool RequiresDataSharing) {
+  unsigned NumThreads = GetNumberOfThreadsInBlock();
+
+  // Handle the SPMD case first.
+  if (UseSPMDMode) {
+
+__kmpc_spmd_kernel_init(NumThreads, RequiresOMPRuntime,
+RequiresDataSharing);
+
+if (RequiresDataSharing)
+  __kmpc_data_sharing_init_stack_spmd();
+
+return 1;
+  }
+
+  // Reserve one WARP in non-SPMD mode for the masters.
+  unsigned ThreadLimit = NumThreads - WARPSIZE;
+  int8_t FilterVal = __kmpc_target_region_thread_filter(
+  Ident, ThreadLimit, UseStateMachine, RequiresOMPRuntime);
+
+  // If the filter returns 1 the executing thread is a team master which will
+  // initialize the kernel in the following.
+  if (FilterVal == 1) {
+__kmpc_kernel_init(ThreadLimit, RequiresOMPRuntime);
+__kmpc_data_sharing_init_stack();
+_shared_bytes_buffer_memory.init();
+  }
+
+  return FilterVal;
+}
+
+EXTERN void __kmpc_target_region_kernel_deinit(ident_t *Ident, bool UseSPMDMode,
+   bool RequiredO

[PATCH] D59418: [OpenMP][Offloading] Extract common functionality

2019-03-22 Thread Johannes Doerfert via Phabricator via cfe-commits
jdoerfert updated this revision to Diff 191985.
jdoerfert added a comment.

Minor update


Repository:
  rG LLVM Github Monorepo

CHANGES SINCE LAST ACTION
  https://reviews.llvm.org/D59418/new/

https://reviews.llvm.org/D59418

Files:
  clang/lib/CodeGen/CGOpenMPRuntimeNVPTX.cpp
  clang/lib/CodeGen/CGOpenMPRuntimeNVPTX.h
  clang/lib/CodeGen/CGOpenMPRuntimeTarget.cpp
  clang/lib/CodeGen/CGOpenMPRuntimeTarget.h
  clang/lib/CodeGen/CMakeLists.txt

Index: clang/lib/CodeGen/CMakeLists.txt
===
--- clang/lib/CodeGen/CMakeLists.txt
+++ clang/lib/CodeGen/CMakeLists.txt
@@ -69,6 +69,7 @@
   CGOpenCLRuntime.cpp
   CGOpenMPRuntime.cpp
   CGOpenMPRuntimeNVPTX.cpp
+  CGOpenMPRuntimeTarget.cpp
   CGRecordLayoutBuilder.cpp
   CGStmt.cpp
   CGStmtOpenMP.cpp
Index: clang/lib/CodeGen/CGOpenMPRuntimeTarget.h
===
--- /dev/null
+++ clang/lib/CodeGen/CGOpenMPRuntimeTarget.h
@@ -0,0 +1,104 @@
+//===-- CGOpenMPRuntimeTarget.h --- Common OpenMP target codegen --===//
+//
+// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
+//
+//===--===//
+//
+// Code common to all OpenMP target codegens.
+//
+//===--===//
+
+#ifndef LLVM_CLANG_LIB_CODEGEN_CGOPENMPRUNTIMETARGET_H
+#define LLVM_CLANG_LIB_CODEGEN_CGOPENMPRUNTIMETARGET_H
+
+#include "CGOpenMPRuntime.h"
+
+namespace clang {
+namespace CodeGen {
+
+struct CGOpenMPRuntimeTarget : public CGOpenMPRuntime {
+
+  explicit CGOpenMPRuntimeTarget(CodeGenModule &CGM);
+
+  /// Defines the execution mode.
+  enum ExecutionMode {
+/// SPMD execution mode (all threads are worker threads).
+EM_SPMD,
+/// Non-SPMD execution mode (1 master thread, others are workers).
+EM_NonSPMD,
+/// Unknown execution mode (orphaned directive).
+EM_Unknown,
+  };
+
+  /// Return the execution mode, if not overloaded this is always Unknown.
+  virtual ExecutionMode getExecutionMode() const { return EM_Unknown; }
+
+  /// Return the value decleration encapsulated in the expression \p E.
+  static const ValueDecl *getUnderlyingVar(const Expr *E);
+
+  //
+  // Base class overrides.
+  //
+
+  /// Creates offloading entry for the provided entry ID \a ID,
+  /// address \a Addr, size \a Size, and flags \a Flags.
+  void createOffloadEntry(llvm::Constant *ID, llvm::Constant *Addr,
+  uint64_t Size, int32_t Flags,
+  llvm::GlobalValue::LinkageTypes Linkage) override;
+
+  /// Emit call to void __kmpc_push_proc_bind(ident_t *loc, kmp_int32
+  /// global_tid, int proc_bind) to generate code for 'proc_bind' clause.
+  virtual void emitProcBindClause(CodeGenFunction &CGF,
+  OpenMPProcBindClauseKind ProcBind,
+  SourceLocation Loc) override;
+
+  /// Emits call to void __kmpc_push_num_threads(ident_t *loc, kmp_int32
+  /// global_tid, kmp_int32 num_threads) to generate code for 'num_threads'
+  /// clause.
+  /// \param NumThreads An integer value of threads.
+  virtual void emitNumThreadsClause(CodeGenFunction &CGF,
+llvm::Value *NumThreads,
+SourceLocation Loc) override;
+
+  /// Set the number of teams to \p NumTeams and the thread limit to
+  /// \p ThreadLimit.
+  ///
+  /// \param NumTeams An integer expression of teams.
+  /// \param ThreadLimit An integer expression of threads.
+  void emitNumTeamsClause(CodeGenFunction &CGF, const Expr *NumTeams,
+  const Expr *ThreadLimit, SourceLocation Loc) override;
+
+  /// Choose a default value for the schedule clause.
+  void getDefaultScheduleAndChunk(CodeGenFunction &CGF,
+  const OMPLoopDirective &S,
+  OpenMPScheduleClauseKind &ScheduleKind,
+  const Expr *&ChunkExpr) const override;
+
+  /// Emits code for teams call of the \a OutlinedFn with
+  /// variables captured in a record which address is stored in \a
+  /// CapturedStruct.
+  /// \param OutlinedFn Outlined function to be run by team masters. Type of
+  /// this function is void(*)(kmp_int32 *, kmp_int32, struct context_vars*).
+  /// \param CapturedVars A pointer to the record with the references to
+  /// variables used in \a OutlinedFn function.
+  ///
+  void emitTeamsCall(CodeGenFunction &CGF, const OMPExecutableDirective &D,
+ SourceLocation Loc, llvm::Function *OutlinedFn,
+ ArrayRef CapturedVars) override;
+
+  /// Returns default address space for the constant firstprivates, __constant__
+  /// address space by default.
+  uns

[PATCH] D59328: [OpenMP][Offloading][2/3] Codegen for target regions (TRegions)

2019-03-22 Thread Johannes Doerfert via Phabricator via cfe-commits
jdoerfert updated this revision to Diff 191988.
jdoerfert added a comment.

Syncronize interface types


Repository:
  rG LLVM Github Monorepo

CHANGES SINCE LAST ACTION
  https://reviews.llvm.org/D59328/new/

https://reviews.llvm.org/D59328

Files:
  clang/lib/CodeGen/CGOpenMPRuntimeTRegion.cpp
  clang/lib/CodeGen/CGOpenMPRuntimeTRegion.h
  clang/lib/CodeGen/CGOpenMPRuntimeTarget.h
  clang/lib/CodeGen/CMakeLists.txt
  clang/lib/CodeGen/CodeGenModule.cpp
  clang/test/OpenMP/target_tregion_no_SPMD_mode.c

Index: clang/test/OpenMP/target_tregion_no_SPMD_mode.c
===
--- /dev/null
+++ clang/test/OpenMP/target_tregion_no_SPMD_mode.c
@@ -0,0 +1,72 @@
+// RUN: %clang_cc1 -fopenmp -fopenmp-targets=nvptx64-nvidia-cuda -emit-llvm-bc %s -o %t-ppc-host.bc
+// RUN: %clang_cc1 -fopenmp -triple nvptx64-unknown-unknown -fopenmp-targets=nvptx64-nvidia-cuda -mllvm -openmp-tregion-runtime -emit-llvm %s -fopenmp-is-device -fopenmp-host-ir-file-path %t-ppc-host.bc -o - | FileCheck %s
+
+// CHECK: loop_in_loop_in_tregion
+// CHECK:  %0 = call i8 @__kmpc_target_region_kernel_init(%struct.ident_t* null, i1 false, i1 true, i1 true, i1 true)
+// CHECK:  call void @__kmpc_target_region_kernel_deinit(%struct.ident_t* null, i1 false, i1 true)
+void loop_in_loop_in_tregion(int *A, int *B) {
+#pragma omp target
+  for (int i = 0; i < 512; i++) {
+for (int j = 0; j < 1024; j++)
+  A[j] += B[i + j];
+  }
+}
+
+// CHECK: parallel_loops_and_accesses_in_tregion
+// CHECK:  %0 = call i8 @__kmpc_target_region_kernel_init(%struct.ident_t* null, i1 false, i1 true, i1 true, i1 true)
+// CHECK:  call void @__kmpc_target_region_kernel_parallel(%struct.ident_t* null, i1 false, i1 true, void (i8*, i8*)* @.omp_TRegion._wrapper, i8* undef, i16 0, i8* %2, i16 16, i1 false)
+// CHECK:  call void @__kmpc_target_region_kernel_parallel(%struct.ident_t* null, i1 false, i1 true, void (i8*, i8*)* @.omp_TRegion.1_wrapper, i8* undef, i16 0, i8* %5, i16 16, i1 false)
+// CHECK:  call void @__kmpc_target_region_kernel_parallel(%struct.ident_t* null, i1 false, i1 true, void (i8*, i8*)* @.omp_TRegion.2_wrapper, i8* undef, i16 0, i8* %8, i16 16, i1 false)
+// CHECK:  call void @__kmpc_target_region_kernel_deinit(%struct.ident_t* null, i1 false, i1 true)
+void parallel_loops_and_accesses_in_tregion(int *A, int *B) {
+#pragma omp target
+  {
+#pragma omp parallel for
+for (int j = 0; j < 1024; j++)
+  A[j] += B[0 + j];
+#pragma omp parallel for
+for (int j = 0; j < 1024; j++)
+  A[j] += B[1 + j];
+#pragma omp parallel for
+for (int j = 0; j < 1024; j++)
+  A[j] += B[2 + j];
+
+// This needs a guard in SPMD mode
+A[0] = B[0];
+  }
+}
+
+void extern_func();
+static void parallel_loop(int *A, int *B, int i) {
+#pragma omp parallel for
+  for (int j = 0; j < 1024; j++)
+A[j] += B[i + j];
+}
+
+// CHECK: parallel_loop_in_function_in_loop_with_global_acc_in_tregion
+// CHECK:  %1 = call i8 @__kmpc_target_region_kernel_init(%struct.ident_t* null, i1 false, i1 true, i1 true, i1 true)
+// CHECK:  call void @__kmpc_target_region_kernel_deinit(%struct.ident_t* null, i1 false, i1 true)
+int Global[512];
+void parallel_loop_in_function_in_loop_with_global_acc_in_tregion(int *A, int *B) {
+#pragma omp target
+  for (int i = 0; i < 512; i++) {
+parallel_loop(A, B, i);
+Global[i]++;
+  }
+}
+
+// CHECK: parallel_loop
+// CHECK:  call void @__kmpc_target_region_kernel_parallel(%struct.ident_t* null, i1 false, i1 true, void (i8*, i8*)* @.omp_TRegion.3_wrapper, i8* undef, i16 0, i8* %0, i16 24, i1 false)
+
+// CHECK: parallel_loops_in_functions_and_extern_func_in_tregion
+// CHECK:  %0 = call i8 @__kmpc_target_region_kernel_init(%struct.ident_t* null, i1 false, i1 true, i1 true, i1 true)
+// CHECK:  call void @__kmpc_target_region_kernel_deinit(%struct.ident_t* null, i1 false, i1 true)
+void parallel_loops_in_functions_and_extern_func_in_tregion(int *A, int *B) {
+#pragma omp target
+  {
+parallel_loop(A, B, 1);
+parallel_loop(A, B, 2);
+extern_func();
+parallel_loop(A, B, 3);
+  }
+}
Index: clang/lib/CodeGen/CodeGenModule.cpp
===
--- clang/lib/CodeGen/CodeGenModule.cpp
+++ clang/lib/CodeGen/CodeGenModule.cpp
@@ -20,6 +20,7 @@
 #include "CGOpenCLRuntime.h"
 #include "CGOpenMPRuntime.h"
 #include "CGOpenMPRuntimeNVPTX.h"
+#include "CGOpenMPRuntimeTRegion.h"
 #include "CodeGenFunction.h"
 #include "CodeGenPGO.h"
 #include "ConstantEmitter.h"
@@ -67,6 +68,11 @@
 llvm::cl::desc("Emit limited coverage mapping information (experimental)"),
 llvm::cl::init(false));
 
+static llvm::cl::opt UseGenericTRegionInterface(
+"openmp-tregion-runtime", llvm::cl::ZeroOrMore, llvm::cl::Hidden,
+llvm::cl::desc("Use the generic target region OpenMP runtime interface"),
+llvm::cl::init(false));
+
 static const char AnnotationSection[] = "llvm.metadata";
 
 static CGCXXABI *createCXXABI(CodeGenModul

[PATCH] D59421: [OpenMP][Offloading] Allow to build the TRegion interface functions

2019-03-22 Thread Johannes Doerfert via Phabricator via cfe-commits
jdoerfert updated this revision to Diff 191987.
jdoerfert added a comment.

Actually syncronize interface types


Repository:
  rG LLVM Github Monorepo

CHANGES SINCE LAST ACTION
  https://reviews.llvm.org/D59421/new/

https://reviews.llvm.org/D59421

Files:
  clang/lib/CodeGen/CGOpenMPRuntimeTarget.cpp
  clang/lib/CodeGen/CGOpenMPRuntimeTarget.h

Index: clang/lib/CodeGen/CGOpenMPRuntimeTarget.h
===
--- clang/lib/CodeGen/CGOpenMPRuntimeTarget.h
+++ clang/lib/CodeGen/CGOpenMPRuntimeTarget.h
@@ -122,6 +122,35 @@
 /// Call to void __kmpc_barrier_simple_spmd(ident_t *loc, kmp_int32
 /// global_tid);
 OMPRTL__kmpc_barrier_simple_spmd,
+
+/// Target Region (TREgion) Kernel interface
+///
+///{
+
+/// char __kmpc_target_region_kernel_init(ident_t *Ident,
+///   bool UseSPMDMode,
+///   bool UseStateMachine,
+///   bool RequiresOMPRuntime,
+///   bool RequiresDataSharing);
+OMPRTL__kmpc_target_region_kernel_init,
+
+/// void __kmpc_target_region_kernel_deinit(ident_t *Ident,
+/// bool UseSPMDMode,
+/// bool RequiredOMPRuntime);
+OMPRTL__kmpc_target_region_kernel_deinit,
+
+/// void __kmpc_target_region_kernel_parallel(ident_t *Ident,
+///   uint16_t UseSPMDMode,
+///   bool RequiredOMPRuntime,
+///   ParallelWorkFnTy WorkFn,
+///   void *SharedVars,
+///   uint16_t SharedVarsBytes,
+///   void *PrivateVars,
+///   uint16_t PrivateVarsBytes,
+///   bool SharedPointers);
+OMPRTL__kmpc_target_region_kernel_parallel,
+
+///}
   };
 
   /// Returns the OpenMP runtime function identified by \p ID.
Index: clang/lib/CodeGen/CGOpenMPRuntimeTarget.cpp
===
--- clang/lib/CodeGen/CGOpenMPRuntimeTarget.cpp
+++ clang/lib/CodeGen/CGOpenMPRuntimeTarget.cpp
@@ -52,6 +52,7 @@
 llvm::FunctionCallee CGOpenMPRuntimeTarget::createTargetRuntimeFunction(
 OpenMPRTLTargetFunctions ID) {
   llvm::FunctionCallee RTLFn = nullptr;
+  auto *I1Ty = llvm::IntegerType::getInt1Ty(CGM.getLLVMContext());
   switch (ID) {
   case OMPRTL_NVPTX__kmpc_kernel_init: {
 // Build void __kmpc_kernel_init(kmp_int32 thread_limit, int16_t
@@ -343,7 +344,96 @@
 ->addFnAttr(llvm::Attribute::Convergent);
 break;
   }
+  case OMPRTL__kmpc_target_region_kernel_init: {
+// char __kmpc_target_region_kernel_init(ident_t *Ident,
+//   bool UseSPMDMode,
+//   bool UseStateMachine,
+//   bool RequiresOMPRuntime,
+//   bool RequiresDataSharing);
+llvm::Type *TypeParams[] = {getIdentTyPointerTy(), I1Ty, I1Ty, I1Ty, I1Ty};
+auto *FnTy =
+llvm::FunctionType::get(CGM.Int8Ty, TypeParams, /* isVarArg */ false);
+RTLFn =
+CGM.CreateRuntimeFunction(FnTy, "__kmpc_target_region_kernel_init");
+
+llvm::Function *RTFn = cast(RTLFn.getCallee());
+RTFn->addParamAttr(0, llvm::Attribute::NoCapture);
+break;
+  }
+  case OMPRTL__kmpc_target_region_kernel_deinit: {
+// void __kmpc_target_region_kernel_deinit(ident_t *Ident,
+// bool UseSPMDMode,
+// bool RequiredOMPRuntime);
+llvm::Type *TypeParams[] = {getIdentTyPointerTy(), I1Ty, I1Ty};
+auto *FnTy =
+llvm::FunctionType::get(CGM.VoidTy, TypeParams, /* isVarArg */ false);
+RTLFn =
+CGM.CreateRuntimeFunction(FnTy, "__kmpc_target_region_kernel_deinit");
+
+llvm::Function *RTFn = cast(RTLFn.getCallee());
+RTFn->addParamAttr(0, llvm::Attribute::NoCapture);
+break;
+  }
+  case OMPRTL__kmpc_target_region_kernel_parallel: {
+// typedef void (*ParallelWorkFnTy)(void *, void *);
+auto *ParWorkFnTy =
+llvm::FunctionType::get(CGM.VoidTy, {CGM.VoidPtrTy, CGM.VoidPtrTy},
+/* isVarArg */ false);
+
+// void __kmpc_target_region_kernel_parallel(ident_t *Ident,
+//   uint16_t UseSPMDMode,
+//   bool RequiredOMPRuntime,
+//   ParallelWorkFnTy WorkFn,
+//   void *SharedVars,
+//   

[PATCH] D59421: [OpenMP][Offloading] Allow to build the TRegion interface functions

2019-03-22 Thread Johannes Doerfert via Phabricator via cfe-commits
jdoerfert updated this revision to Diff 191986.
jdoerfert added a comment.

Synchronize interface types


Repository:
  rG LLVM Github Monorepo

CHANGES SINCE LAST ACTION
  https://reviews.llvm.org/D59421/new/

https://reviews.llvm.org/D59421

Files:
  clang/lib/CodeGen/CGOpenMPRuntimeTarget.cpp
  clang/lib/CodeGen/CGOpenMPRuntimeTarget.h

Index: clang/lib/CodeGen/CGOpenMPRuntimeTarget.h
===
--- clang/lib/CodeGen/CGOpenMPRuntimeTarget.h
+++ clang/lib/CodeGen/CGOpenMPRuntimeTarget.h
@@ -122,6 +122,35 @@
 /// Call to void __kmpc_barrier_simple_spmd(ident_t *loc, kmp_int32
 /// global_tid);
 OMPRTL__kmpc_barrier_simple_spmd,
+
+/// Target Region (TREgion) Kernel interface
+///
+///{
+
+/// char __kmpc_target_region_kernel_init(ident_t *Ident,
+///   bool UseSPMDMode,
+///   bool UseStateMachine,
+///   bool RequiresOMPRuntime,
+///   bool RequiresDataSharing);
+OMPRTL__kmpc_target_region_kernel_init,
+
+/// void __kmpc_target_region_kernel_deinit(ident_t *Ident,
+/// bool UseSPMDMode,
+/// bool RequiredOMPRuntime);
+OMPRTL__kmpc_target_region_kernel_deinit,
+
+/// void __kmpc_target_region_kernel_parallel(ident_t *Ident,
+///   uint16_t UseSPMDMode,
+///   bool RequiredOMPRuntime,
+///   ParallelWorkFnTy WorkFn,
+///   void *SharedVars,
+///   uint16_t SharedVarsBytes,
+///   void *PrivateVars,
+///   uint16_t PrivateVarsBytes,
+///   bool SharedPointers);
+OMPRTL__kmpc_target_region_kernel_parallel,
+
+///}
   };
 
   /// Returns the OpenMP runtime function identified by \p ID.
Index: clang/lib/CodeGen/CGOpenMPRuntimeTarget.cpp
===
--- clang/lib/CodeGen/CGOpenMPRuntimeTarget.cpp
+++ clang/lib/CodeGen/CGOpenMPRuntimeTarget.cpp
@@ -52,6 +52,7 @@
 llvm::FunctionCallee CGOpenMPRuntimeTarget::createTargetRuntimeFunction(
 OpenMPRTLTargetFunctions ID) {
   llvm::FunctionCallee RTLFn = nullptr;
+  auto *I1Ty = llvm::IntegerType::getInt1Ty(CGM.getLLVMContext());
   switch (ID) {
   case OMPRTL_NVPTX__kmpc_kernel_init: {
 // Build void __kmpc_kernel_init(kmp_int32 thread_limit, int16_t
@@ -343,7 +344,96 @@
 ->addFnAttr(llvm::Attribute::Convergent);
 break;
   }
+  case OMPRTL__kmpc_target_region_kernel_init: {
+// char __kmpc_target_region_kernel_init(ident_t *Ident,
+//   bool UseSPMDMode,
+//   bool UseStateMachine,
+//   bool RequiresOMPRuntime,
+//   bool RequiresDataSharing);
+llvm::Type *TypeParams[] = {getIdentTyPointerTy(), I1Ty, I1Ty, I1Ty, I1Ty};
+auto *FnTy =
+llvm::FunctionType::get(CGM.Int8Ty, TypeParams, /* isVarArg */ false);
+RTLFn =
+CGM.CreateRuntimeFunction(FnTy, "__kmpc_target_region_kernel_init");
+
+llvm::Function *RTFn = cast(RTLFn.getCallee());
+RTFn->addParamAttr(0, llvm::Attribute::NoCapture);
+break;
+  }
+  case OMPRTL__kmpc_target_region_kernel_deinit: {
+// void __kmpc_target_region_kernel_deinit(ident_t *Ident,
+// bool UseSPMDMode,
+// bool RequiredOMPRuntime);
+llvm::Type *TypeParams[] = {getIdentTyPointerTy(), I1Ty, I1Ty};
+auto *FnTy =
+llvm::FunctionType::get(CGM.VoidTy, TypeParams, /* isVarArg */ false);
+RTLFn =
+CGM.CreateRuntimeFunction(FnTy, "__kmpc_target_region_kernel_deinit");
+
+llvm::Function *RTFn = cast(RTLFn.getCallee());
+RTFn->addParamAttr(0, llvm::Attribute::NoCapture);
+break;
+  }
+  case OMPRTL__kmpc_target_region_kernel_parallel: {
+// typedef void (*ParallelWorkFnTy)(void *, void *);
+auto *ParWorkFnTy =
+llvm::FunctionType::get(CGM.VoidTy, {CGM.VoidPtrTy, CGM.VoidPtrTy},
+/* isVarArg */ false);
+
+// void __kmpc_target_region_kernel_parallel(ident_t *Ident,
+//   uint16_t UseSPMDMode,
+//   bool RequiredOMPRuntime,
+//   ParallelWorkFnTy WorkFn,
+//   void *SharedVars,
+//   uint

[PATCH] D59919: [Attributor] Deduce "returned" argument attribute

2019-03-28 Thread Johannes Doerfert via Phabricator via cfe-commits
jdoerfert created this revision.
jdoerfert added reviewers: homerdin, hfinkel, fedor.sergeev, sanjoy, spatel, 
nlopes, nicholas, reames.
Herald added subscribers: cfe-commits, bollu, hiraditya.
Herald added projects: clang, LLVM.

Deduce the "returned" argument attribute by collecting all potentially
returned values.

Note: Not only the unique return value, if any, can be used by
subsequent attributes but also the set of all potentially returned
values as well as the mapping from returned values to return
instructions that they originate from.


Repository:
  rG LLVM Github Monorepo

https://reviews.llvm.org/D59919

Files:
  clang/test/CodeGenOpenCL/as_type.cl
  llvm/include/llvm/Transforms/IPO/Attributor.h
  llvm/lib/Transforms/IPO/Attributor.cpp
  llvm/test/Transforms/FunctionAttrs/SCC1.ll
  llvm/test/Transforms/FunctionAttrs/arg_nocapture.ll
  llvm/test/Transforms/FunctionAttrs/arg_returned.ll

Index: llvm/test/Transforms/FunctionAttrs/arg_returned.ll
===
--- llvm/test/Transforms/FunctionAttrs/arg_returned.ll
+++ llvm/test/Transforms/FunctionAttrs/arg_returned.ll
@@ -1,4 +1,8 @@
-; RUN: opt -functionattrs -attributor -S < %s | FileCheck %s
+; RUN: opt -functionattrs -S < %s | FileCheck %s --check-prefix=FNATTR
+; RUN: opt -attributor -S < %s | FileCheck %s --check-prefix=ATTRIBUTOR
+; RUN: opt -attributor -functionattrs -S < %s | FileCheck %s --check-prefix=BOTH
+; RUN: opt -attributor -attributor-max-iterations=18 -S < %s | FileCheck %s --check-prefix=FEW_IT
+; RUN: opt -attributor -attributor-max-iterations=19 -functionattrs -S < %s | FileCheck %s --check-prefix=BOTH
 ;
 ; Test cases specifically designed for the "returned" argument attribute.
 ; We use FIXME's to indicate problems and missing attributes.
@@ -16,13 +20,20 @@
 
 ; TEST 1
 ;
-; CHECK: define dso_local i32 @sink_r0(i32 returned %r) [[NoInlineNoRecurseNoUnwindReadnoneUwtable:#[0-9]]]
+; BOTH: define dso_local i32 @sink_r0(i32 returned %r) [[NoInlineNoRecurseNoUnwindReadnoneUwtable:#[0-9]]]
+; BOTH: define dso_local i32 @scc_r1(i32 %a, i32 returned %r, i32 %b) [[NoInlineNoUnwindReadnoneUwtable:#[0-9]]]
+; BOTH: define dso_local i32 @scc_r2(i32 %a, i32 %b, i32 returned %r) [[NoInlineNoUnwindReadnoneUwtable]]
+; BOTH: define dso_local i32 @scc_rX(i32 %a, i32 %b, i32 %r) [[NoInlineNoUnwindReadnoneUwtable]]
 ;
-; FIXME: returned on %r missing:
-; CHECK: define dso_local i32 @scc_r1(i32 %a, i32 %r, i32 %b) [[NoInlineNoUnwindReadnoneUwtable:#[0-9]]]
+; FNATTR: define dso_local i32 @sink_r0(i32 returned %r) [[NoInlineNoRecurseNoUnwindReadnoneUwtable:#[0-9]]]
+; FNATTR: define dso_local i32 @scc_r1(i32 %a, i32 %r, i32 %b) [[NoInlineNoUnwindReadnoneUwtable:#[0-9]]]
+; FNATTR: define dso_local i32 @scc_r2(i32 %a, i32 %b, i32 %r) [[NoInlineNoUnwindReadnoneUwtable]]
+; FNATTR: define dso_local i32 @scc_rX(i32 %a, i32 %b, i32 %r) [[NoInlineNoUnwindReadnoneUwtable]]
 ;
-; FIXME: returned on %r missing:
-; CHECK: define dso_local i32 @scc_r2(i32 %a, i32 %b, i32 %r) [[NoInlineNoUnwindReadnoneUwtable]]
+; ATTRIBUTOR: define dso_local i32 @sink_r0(i32 returned %r) [[NoInlineNoRecurseNoUnwindReadnoneUwtable:#[0-9]]]
+; ATTRIBUTOR: define dso_local i32 @scc_r1(i32 %a, i32 returned %r, i32 %b) [[NoInlineNoUnwindReadnoneUwtable:#[0-9]]]
+; ATTRIBUTOR: define dso_local i32 @scc_r2(i32 %a, i32 %b, i32 returned %r) [[NoInlineNoUnwindReadnoneUwtable]]
+; ATTRIBUTOR: define dso_local i32 @scc_rX(i32 %a, i32 %b, i32 %r) [[NoInlineNoUnwindReadnoneUwtable]]
 ;
 ; int scc_r1(int a, int b, int r);
 ; int scc_r2(int a, int b, int r);
@@ -157,13 +168,17 @@
 
 ; TEST 2
 ;
-; CHECK: define dso_local double* @ptr_sink_r0(double* readnone returned %r) [[NoInlineNoRecurseNoUnwindReadnoneUwtable]]
+; BOTH: define dso_local double* @ptr_sink_r0(double* readnone returned %r) [[NoInlineNoRecurseNoUnwindReadnoneUwtable]]
+; BOTH: define dso_local double* @ptr_scc_r1(double* %a, double* readnone returned %r, double* nocapture readnone %b) [[NoInlineNoUnwindReadnoneUwtable]]
+; BOTH: define dso_local double* @ptr_scc_r2(double* readnone %a, double* readnone %b, double* readnone returned %r) [[NoInlineNoUnwindReadnoneUwtable]]
 ;
-; FIXME: returned on %r missing:
-; CHECK: define dso_local double* @ptr_scc_r1(double* %a, double* readnone %r, double* nocapture readnone %b) [[NoInlineNoUnwindReadnoneUwtable]]
+; FNATTR: define dso_local double* @ptr_sink_r0(double* readnone returned %r) [[NoInlineNoRecurseNoUnwindReadnoneUwtable]]
+; FNATTR: define dso_local double* @ptr_scc_r1(double* %a, double* readnone %r, double* nocapture readnone %b) [[NoInlineNoUnwindReadnoneUwtable]]
+; FNATTR: define dso_local double* @ptr_scc_r2(double* readnone %a, double* readnone %b, double* readnone %r) [[NoInlineNoUnwindReadnoneUwtable]]
 ;
-; FIXME: returned on %r missing:
-; CHECK: define dso_local double* @ptr_scc_r2(double* readnone %a, double* readnone %b, double* readnone %r) [[NoInlineNoUnwindReadnoneUwtable]]
+; ATTRIBUTOR: define dso

[PATCH] D59922: [Attributor] Deduce "no-capture" argument attribute

2019-03-28 Thread Johannes Doerfert via Phabricator via cfe-commits
jdoerfert created this revision.
jdoerfert added reviewers: homerdin, hfinkel, fedor.sergeev, sanjoy, spatel, 
nlopes, nicholas, reames.
Herald added subscribers: cfe-commits, bollu, hiraditya.
Herald added projects: clang, LLVM.

Add the no-capture argument attribute deduction to the Attributor
fixpoint framework.

The new string attributed "no-capture-maybe-returned" is introduced to
allow deduction of no-capture through functions that "capture" an
argument but only by "returning" it.


Repository:
  rG LLVM Github Monorepo

https://reviews.llvm.org/D59922

Files:
  clang/test/CodeGenObjC/os_log.m
  clang/test/CodeGenOpenCL/as_type.cl
  llvm/include/llvm/Transforms/IPO/Attributor.h
  llvm/lib/Transforms/IPO/Attributor.cpp
  llvm/test/Transforms/FunctionAttrs/2009-01-02-LocalStores.ll
  llvm/test/Transforms/FunctionAttrs/SCC1.ll
  llvm/test/Transforms/FunctionAttrs/arg_nocapture.ll
  llvm/test/Transforms/FunctionAttrs/arg_returned.ll
  llvm/test/Transforms/FunctionAttrs/incompatible_fn_attrs.ll
  llvm/test/Transforms/FunctionAttrs/nocapture.ll
  llvm/test/Transforms/FunctionAttrs/readattrs.ll

Index: llvm/test/Transforms/FunctionAttrs/readattrs.ll
===
--- llvm/test/Transforms/FunctionAttrs/readattrs.ll
+++ llvm/test/Transforms/FunctionAttrs/readattrs.ll
@@ -12,7 +12,7 @@
   ret void
 }
 
-; CHECK: define i8* @test2(i8* readnone returned %p)
+; CHECK: define i8* @test2(i8* readnone returned "no-capture-maybe-returned" %p)
 define i8* @test2(i8* %p) {
   store i32 0, i32* @x
   ret i8* %p
@@ -54,13 +54,13 @@
   ret void
 }
 
-; CHECK: define i32* @test8_1(i32* readnone returned %p)
+; CHECK: define i32* @test8_1(i32* readnone returned "no-capture-maybe-returned" %p)
 define i32* @test8_1(i32* %p) {
 entry:
   ret i32* %p
 }
 
-; CHECK: define void @test8_2(i32* %p)
+; CHECK: define void @test8_2(i32* nocapture %p)
 define void @test8_2(i32* %p) {
 entry:
   %call = call i32* @test8_1(i32* %p)
Index: llvm/test/Transforms/FunctionAttrs/nocapture.ll
===
--- llvm/test/Transforms/FunctionAttrs/nocapture.ll
+++ llvm/test/Transforms/FunctionAttrs/nocapture.ll
@@ -3,7 +3,7 @@
 
 @g = global i32* null		;  [#uses=1]
 
-; CHECK: define i32* @c1(i32* readnone returned %q)
+; CHECK: define i32* @c1(i32* readnone returned "no-capture-maybe-returned" %q)
 define i32* @c1(i32* %q) {
 	ret i32* %q
 }
@@ -134,7 +134,7 @@
 	ret void
 }
 
-; CHECK: define void @test1_1(i8* nocapture readnone %x1_1, i8* %y1_1)
+; CHECK: define void @test1_1(i8* nocapture %x1_1, i8* nocapture %y1_1)
 ; It would be acceptable to add readnone to %y1_1 and %y1_2.
 define void @test1_1(i8* %x1_1, i8* %y1_1) {
   call i8* @test1_2(i8* %x1_1, i8* %y1_1)
@@ -142,7 +142,7 @@
   ret void
 }
 
-; CHECK: define i8* @test1_2(i8* nocapture readnone %x1_2, i8* returned %y1_2)
+; CHECK: define i8* @test1_2(i8* nocapture %x1_2, i8* returned "no-capture-maybe-returned" %y1_2)
 define i8* @test1_2(i8* %x1_2, i8* %y1_2) {
   call void @test1_1(i8* %x1_2, i8* %y1_2)
   store i32* null, i32** @g
@@ -156,21 +156,21 @@
   ret void
 }
 
-; CHECK: define void @test3(i8* nocapture readnone %x3, i8* nocapture readnone %y3, i8* nocapture readnone %z3)
+; CHECK: define void @test3(i8* nocapture %x3, i8* nocapture readnone %y3, i8* nocapture %z3)
 define void @test3(i8* %x3, i8* %y3, i8* %z3) {
   call void @test3(i8* %z3, i8* %y3, i8* %x3)
   store i32* null, i32** @g
   ret void
 }
 
-; CHECK: define void @test4_1(i8* %x4_1)
+; CHECK: define void @test4_1(i8* nocapture readnone %x4_1)
 define void @test4_1(i8* %x4_1) {
   call i8* @test4_2(i8* %x4_1, i8* %x4_1, i8* %x4_1)
   store i32* null, i32** @g
   ret void
 }
 
-; CHECK: define i8* @test4_2(i8* nocapture readnone %x4_2, i8* readnone returned %y4_2, i8* nocapture readnone %z4_2)
+; CHECK: define i8* @test4_2(i8* nocapture readnone %x4_2, i8* readnone returned "no-capture-maybe-returned" %y4_2, i8* nocapture readnone %z4_2)
 define i8* @test4_2(i8* %x4_2, i8* %y4_2, i8* %z4_2) {
   call void @test4_1(i8* null)
   store i32* null, i32** @g
Index: llvm/test/Transforms/FunctionAttrs/incompatible_fn_attrs.ll
===
--- llvm/test/Transforms/FunctionAttrs/incompatible_fn_attrs.ll
+++ llvm/test/Transforms/FunctionAttrs/incompatible_fn_attrs.ll
@@ -6,21 +6,21 @@
 
 ; Function Attrs: argmemonly
 define i32* @given_argmem_infer_readnone(i32* %p) #0 {
-; CHECK: define i32* @given_argmem_infer_readnone(i32* readnone returned %p) #0 {
+; CHECK: define i32* @given_argmem_infer_readnone(i32* readnone returned "no-capture-maybe-returned" %p) #0 {
 entry:
   ret i32* %p
 }
 
 ; Function Attrs: inaccessiblememonly
 define i32* @given_inaccessible_infer_readnone(i32* %p) #1 {
-; CHECK: define i32* @given_inaccessible_infer_readnone(i32* readnone returned %p) #0 {
+; CHECK: define i32* @given_inaccessible_infer_readnone(i32* readnone returned "no-captur

[PATCH] D59919: [Attributor] Deduce "returned" argument attribute

2019-03-28 Thread Johannes Doerfert via Phabricator via cfe-commits
jdoerfert updated this revision to Diff 192738.
jdoerfert added a comment.

Fix the last bug exposed by llvm-test-suite & SPEC2006


Repository:
  rG LLVM Github Monorepo

CHANGES SINCE LAST ACTION
  https://reviews.llvm.org/D59919/new/

https://reviews.llvm.org/D59919

Files:
  clang/test/CodeGenOpenCL/as_type.cl
  llvm/include/llvm/Transforms/IPO/Attributor.h
  llvm/lib/Transforms/IPO/Attributor.cpp
  llvm/test/Transforms/FunctionAttrs/SCC1.ll
  llvm/test/Transforms/FunctionAttrs/arg_nocapture.ll
  llvm/test/Transforms/FunctionAttrs/arg_returned.ll

Index: llvm/test/Transforms/FunctionAttrs/arg_returned.ll
===
--- llvm/test/Transforms/FunctionAttrs/arg_returned.ll
+++ llvm/test/Transforms/FunctionAttrs/arg_returned.ll
@@ -1,28 +1,43 @@
-; RUN: opt -functionattrs -attributor -S < %s | FileCheck %s
+; RUN: opt -functionattrs -S < %s | FileCheck %s --check-prefix=FNATTR
+; RUN: opt -attributor -S < %s | FileCheck %s --check-prefix=ATTRIBUTOR
+; RUN: opt -attributor -functionattrs -S < %s | FileCheck %s --check-prefix=BOTH
+; RUN: opt -attributor -attributor-max-iterations=18 -S < %s | FileCheck %s --check-prefix=FEW_IT
+; RUN: opt -attributor -attributor-max-iterations=19 -functionattrs -S < %s | FileCheck %s --check-prefix=BOTH
 ;
 ; Test cases specifically designed for the "returned" argument attribute.
 ; We use FIXME's to indicate problems and missing attributes.
 ;
-; TEST 1: SCC test returning an integer value argument
-; TEST 2: the same SCC as in 1 returning a pointer value argument
-; TEST 3: a singleton SCC with a lot of recursive calls
-; TEST 4: address taken function with call to an external functions
-; TEST 5: call to a function that might be redifined at link time
-; TEST 6: returned argument goes through select and phi
-; TEST 7: returned argument goes through recursion, select, and phi
-; TEST 8: returned argument goes through bitcasts
-; TEST 9: returned argument goes through select and phi interleaved with bitcasts
+; TEST  1: SCC test returning an integer value argument
+; TEST  2: the same SCC as in 1 returning a pointer value argument
+; TEST  3: a singleton SCC with a lot of recursive calls
+; TEST  4: address taken function with call to an external functions
+; TEST  5: call to a function that might be redifined at link time
+; TEST  6: returned argument goes through select and phi
+; TEST  7: returned argument goes through recursion, select, and phi
+; TEST  8: returned argument goes through bitcasts
+; TEST  9: returned argument goes through select and phi interleaved with bitcasts
+; TEST 10: return argument or argument or undef
+; TEST 11: return undef or argument or argument
+; TEST 12: return undef or argument or undef
+; TEST 13: return argument or unknown call result
 
 
 ; TEST 1
 ;
-; CHECK: define dso_local i32 @sink_r0(i32 returned %r) [[NoInlineNoRecurseNoUnwindReadnoneUwtable:#[0-9]]]
+; BOTH: define dso_local i32 @sink_r0(i32 returned %r) [[NoInlineNoRecurseNoUnwindReadnoneUwtable:#[0-9]]]
+; BOTH: define dso_local i32 @scc_r1(i32 %a, i32 returned %r, i32 %b) [[NoInlineNoUnwindReadnoneUwtable:#[0-9]]]
+; BOTH: define dso_local i32 @scc_r2(i32 %a, i32 %b, i32 returned %r) [[NoInlineNoUnwindReadnoneUwtable]]
+; BOTH: define dso_local i32 @scc_rX(i32 %a, i32 %b, i32 %r) [[NoInlineNoUnwindReadnoneUwtable]]
 ;
-; FIXME: returned on %r missing:
-; CHECK: define dso_local i32 @scc_r1(i32 %a, i32 %r, i32 %b) [[NoInlineNoUnwindReadnoneUwtable:#[0-9]]]
+; FNATTR: define dso_local i32 @sink_r0(i32 returned %r) [[NoInlineNoRecurseNoUnwindReadnoneUwtable:#[0-9]]]
+; FNATTR: define dso_local i32 @scc_r1(i32 %a, i32 %r, i32 %b) [[NoInlineNoUnwindReadnoneUwtable:#[0-9]]]
+; FNATTR: define dso_local i32 @scc_r2(i32 %a, i32 %b, i32 %r) [[NoInlineNoUnwindReadnoneUwtable]]
+; FNATTR: define dso_local i32 @scc_rX(i32 %a, i32 %b, i32 %r) [[NoInlineNoUnwindReadnoneUwtable]]
 ;
-; FIXME: returned on %r missing:
-; CHECK: define dso_local i32 @scc_r2(i32 %a, i32 %b, i32 %r) [[NoInlineNoUnwindReadnoneUwtable]]
+; ATTRIBUTOR: define dso_local i32 @sink_r0(i32 returned %r) [[NoInlineNoRecurseNoUnwindReadnoneUwtable:#[0-9]]]
+; ATTRIBUTOR: define dso_local i32 @scc_r1(i32 %a, i32 returned %r, i32 %b) [[NoInlineNoUnwindReadnoneUwtable:#[0-9]]]
+; ATTRIBUTOR: define dso_local i32 @scc_r2(i32 %a, i32 %b, i32 returned %r) [[NoInlineNoUnwindReadnoneUwtable]]
+; ATTRIBUTOR: define dso_local i32 @scc_rX(i32 %a, i32 %b, i32 %r) [[NoInlineNoUnwindReadnoneUwtable]]
 ;
 ; int scc_r1(int a, int b, int r);
 ; int scc_r2(int a, int b, int r);
@@ -157,13 +172,17 @@
 
 ; TEST 2
 ;
-; CHECK: define dso_local double* @ptr_sink_r0(double* readnone returned %r) [[NoInlineNoRecurseNoUnwindReadnoneUwtable]]
+; BOTH: define dso_local double* @ptr_sink_r0(double* readnone returned %r) [[NoInlineNoRecurseNoUnwindReadnoneUwtable]]
+; BOTH: define dso_local double* @ptr_scc_r1(double* %a, double* readnone returned %r, double* nocapture readnone %b) [[NoInlineN

[PATCH] D59922: [Attributor] Deduce "no-capture" argument attribute

2019-03-28 Thread Johannes Doerfert via Phabricator via cfe-commits
jdoerfert marked 2 inline comments as done.
jdoerfert added inline comments.



Comment at: llvm/test/Transforms/FunctionAttrs/arg_nocapture.ll:103
-; FIXME: returned missing for %a
 ; FIXME: We should *not* derive any attributes for the return value not 
present on the argument!
 ; CHECK: define dso_local noalias nonnull i32* @srec16(i32* nocapture readnone 
%a)

The comments on this one were off from the very beginning, I'll fix them and 
there won't be a change during this commit.



Comment at: llvm/test/Transforms/FunctionAttrs/nocapture.ll:137
 
-; CHECK: define void @test1_1(i8* nocapture readnone %x1_1, i8* %y1_1)
 ; It would be acceptable to add readnone to %y1_1 and %y1_2.

So, the old FuncAttr deduction interleaves one of two memory behavior 
deductions with the capture analysis. With this patch capture analysis becomes 
obsolete, as the attributor added annotations, and certain memory behavior 
attributes are not detected anymore. There will be a memory behavior detection 
for the attributor soon which should make this problem go away.


Repository:
  rG LLVM Github Monorepo

CHANGES SINCE LAST ACTION
  https://reviews.llvm.org/D59922/new/

https://reviews.llvm.org/D59922



___
cfe-commits mailing list
cfe-commits@lists.llvm.org
https://lists.llvm.org/cgi-bin/mailman/listinfo/cfe-commits


[PATCH] D59980: [Attributor] Deduce memory behavior argument attributes

2019-03-28 Thread Johannes Doerfert via Phabricator via cfe-commits
jdoerfert created this revision.
jdoerfert added reviewers: homerdin, hfinkel, fedor.sergeev, sanjoy, spatel, 
nlopes, nicholas, reames.
Herald added subscribers: cfe-commits, bollu, aheejin, hiraditya, eraman, 
sbc100, javed.absar, nhaehnle, jvesely.
Herald added projects: clang, LLVM.
jdoerfert added parent revisions: D59922: [Attributor] Deduce "no-capture" 
argument attribute, D59979: [Attributor][NFC] Add helper functions to deal wit 
bit-encodings.

Deduce the memory behavior, aka "read-none", "read-only", or
"write-only", for function arguments.


Repository:
  rG LLVM Github Monorepo

https://reviews.llvm.org/D59980

Files:
  clang/test/CodeGen/arm-vfp16-arguments.c
  clang/test/CodeGen/systemz-inline-asm.c
  clang/test/CodeGenCXX/wasm-args-returns.cpp
  clang/test/CodeGenObjC/os_log.m
  clang/test/CodeGenOpenCL/amdgpu-abi-struct-coerce.cl
  clang/test/CodeGenOpenCL/amdgpu-call-kernel.cl
  clang/test/CodeGenOpenCL/kernels-have-spir-cc-by-default.cl
  llvm/include/llvm/Transforms/IPO/Attributor.h
  llvm/lib/Transforms/IPO/Attributor.cpp
  llvm/test/Transforms/FunctionAttrs/SCC1.ll
  llvm/test/Transforms/FunctionAttrs/arg_nocapture.ll
  llvm/test/Transforms/FunctionAttrs/arg_returned.ll
  llvm/test/Transforms/FunctionAttrs/nocapture.ll
  llvm/test/Transforms/FunctionAttrs/readattrs.ll

Index: llvm/test/Transforms/FunctionAttrs/readattrs.ll
===
--- llvm/test/Transforms/FunctionAttrs/readattrs.ll
+++ llvm/test/Transforms/FunctionAttrs/readattrs.ll
@@ -32,7 +32,7 @@
   ret void
 }
 
-; CHECK: define void @test5(i8** nocapture %p, i8* %q)
+; CHECK: define void @test5(i8** nocapture writeonly %p, i8* %q)
 ; Missed optz'n: we could make %q readnone, but don't break test6!
 define void @test5(i8** %p, i8* %q) {
   store i8* %q, i8** %p
@@ -40,7 +40,7 @@
 }
 
 declare void @test6_1()
-; CHECK: define void @test6_2(i8** nocapture %p, i8* %q)
+; CHECK: define void @test6_2(i8** nocapture writeonly %p, i8* %q)
 ; This is not a missed optz'n.
 define void @test6_2(i8** %p, i8* %q) {
   store i8* %q, i8** %p
@@ -48,7 +48,7 @@
   ret void
 }
 
-; CHECK: define void @test7_1(i32* inalloca nocapture %a)
+; CHECK: define void @test7_1(i32* inalloca nocapture readnone %a)
 ; inalloca parameters are always considered written
 define void @test7_1(i32* inalloca %a) {
   ret void
@@ -60,7 +60,7 @@
   ret i32* %p
 }
 
-; CHECK: define void @test8_2(i32* nocapture %p)
+; CHECK: define void @test8_2(i32* nocapture writeonly %p)
 define void @test8_2(i32* %p) {
 entry:
   %call = call i32* @test8_1(i32* %p)
Index: llvm/test/Transforms/FunctionAttrs/nocapture.ll
===
--- llvm/test/Transforms/FunctionAttrs/nocapture.ll
+++ llvm/test/Transforms/FunctionAttrs/nocapture.ll
@@ -134,15 +134,14 @@
 	ret void
 }
 
-; CHECK: define void @test1_1(i8* nocapture %x1_1, i8* nocapture %y1_1)
-; It would be acceptable to add readnone to %y1_1 and %y1_2.
+; CHECK: define void @test1_1(i8* nocapture readnone %x1_1, i8* nocapture readnone %y1_1)
 define void @test1_1(i8* %x1_1, i8* %y1_1) {
   call i8* @test1_2(i8* %x1_1, i8* %y1_1)
   store i32* null, i32** @g
   ret void
 }
 
-; CHECK: define i8* @test1_2(i8* nocapture %x1_2, i8* returned "no-capture-maybe-returned" %y1_2)
+; CHECK: define i8* @test1_2(i8* nocapture readnone %x1_2, i8* readnone returned "no-capture-maybe-returned" %y1_2)
 define i8* @test1_2(i8* %x1_2, i8* %y1_2) {
   call void @test1_1(i8* %x1_2, i8* %y1_2)
   store i32* null, i32** @g
@@ -156,7 +155,7 @@
   ret void
 }
 
-; CHECK: define void @test3(i8* nocapture %x3, i8* nocapture readnone %y3, i8* nocapture %z3)
+; CHECK: define void @test3(i8* nocapture readnone %x3, i8* nocapture readnone %y3, i8* nocapture readnone %z3)
 define void @test3(i8* %x3, i8* %y3, i8* %z3) {
   call void @test3(i8* %z3, i8* %y3, i8* %x3)
   store i32* null, i32** @g
@@ -237,7 +236,7 @@
   ret void
 }
 
-; CHECK: @nocaptureStrip(i8* nocapture %p)
+; CHECK: @nocaptureStrip(i8* nocapture writeonly %p)
 define void @nocaptureStrip(i8* %p) {
 entry:
   %b = call i8* @llvm.strip.invariant.group.p0i8(i8* %p)
Index: llvm/test/Transforms/FunctionAttrs/arg_returned.ll
===
--- llvm/test/Transforms/FunctionAttrs/arg_returned.ll
+++ llvm/test/Transforms/FunctionAttrs/arg_returned.ll
@@ -180,8 +180,8 @@
 ; FNATTR: define dso_local double* @ptr_scc_r1(double* %a, double* readnone %r, double* nocapture readnone %b) [[NoInlineNoUnwindReadnoneUwtable]]
 ; FNATTR: define dso_local double* @ptr_scc_r2(double* readnone %a, double* readnone %b, double* readnone %r) [[NoInlineNoUnwindReadnoneUwtable]]
 ;
-; ATTRIBUTOR: define dso_local double* @ptr_sink_r0(double* returned "no-capture-maybe-returned" %r) [[NoInlineNoRecurseNoUnwindReadnoneUwtable]]
-; ATTRIBUTOR: define dso_local double* @ptr_scc_r1(double* %a, double* returned %r, double* nocapture %b) [[NoInlineNoUnwindReadn

[PATCH] D59919: [Attributor] Deduce "returned" argument attribute

2019-03-29 Thread Johannes Doerfert via Phabricator via cfe-commits
jdoerfert updated this revision to Diff 192916.
jdoerfert added a comment.

Minor updates


Repository:
  rG LLVM Github Monorepo

CHANGES SINCE LAST ACTION
  https://reviews.llvm.org/D59919/new/

https://reviews.llvm.org/D59919

Files:
  clang/test/CodeGenOpenCL/as_type.cl
  llvm/include/llvm/Transforms/IPO/Attributor.h
  llvm/lib/Transforms/IPO/Attributor.cpp
  llvm/test/Transforms/FunctionAttrs/SCC1.ll
  llvm/test/Transforms/FunctionAttrs/arg_nocapture.ll
  llvm/test/Transforms/FunctionAttrs/arg_returned.ll

Index: llvm/test/Transforms/FunctionAttrs/arg_returned.ll
===
--- llvm/test/Transforms/FunctionAttrs/arg_returned.ll
+++ llvm/test/Transforms/FunctionAttrs/arg_returned.ll
@@ -1,4 +1,8 @@
-; RUN: opt -functionattrs -attributor -S < %s | FileCheck %s
+; RUN: opt -functionattrs -S < %s | FileCheck %s --check-prefix=FNATTR
+; RUN: opt -attributor -S < %s | FileCheck %s --check-prefix=ATTRIBUTOR
+; RUN: opt -attributor -functionattrs -S < %s | FileCheck %s --check-prefix=BOTH
+; RUN: opt -attributor -attributor-max-iterations=18 -S < %s | FileCheck %s --check-prefix=FEW_IT
+; RUN: opt -attributor -attributor-max-iterations=19 -functionattrs -S < %s | FileCheck %s --check-prefix=BOTH
 ;
 ; Test cases specifically designed for the "returned" argument attribute.
 ; We use FIXME's to indicate problems and missing attributes.
@@ -20,13 +24,20 @@
 
 ; TEST 1
 ;
-; CHECK: define dso_local i32 @sink_r0(i32 returned %r) [[NoInlineNoRecurseNoUnwindReadnoneUwtable:#[0-9]]]
+; BOTH: define dso_local i32 @sink_r0(i32 returned %r) [[NoInlineNoRecurseNoUnwindReadnoneUwtable:#[0-9]]]
+; BOTH: define dso_local i32 @scc_r1(i32 %a, i32 returned %r, i32 %b) [[NoInlineNoUnwindReadnoneUwtable:#[0-9]]]
+; BOTH: define dso_local i32 @scc_r2(i32 %a, i32 %b, i32 returned %r) [[NoInlineNoUnwindReadnoneUwtable]]
+; BOTH: define dso_local i32 @scc_rX(i32 %a, i32 %b, i32 %r) [[NoInlineNoUnwindReadnoneUwtable]]
 ;
-; FIXME: returned on %r missing:
-; CHECK: define dso_local i32 @scc_r1(i32 %a, i32 %r, i32 %b) [[NoInlineNoUnwindReadnoneUwtable:#[0-9]]]
+; FNATTR: define dso_local i32 @sink_r0(i32 returned %r) [[NoInlineNoRecurseNoUnwindReadnoneUwtable:#[0-9]]]
+; FNATTR: define dso_local i32 @scc_r1(i32 %a, i32 %r, i32 %b) [[NoInlineNoUnwindReadnoneUwtable:#[0-9]]]
+; FNATTR: define dso_local i32 @scc_r2(i32 %a, i32 %b, i32 %r) [[NoInlineNoUnwindReadnoneUwtable]]
+; FNATTR: define dso_local i32 @scc_rX(i32 %a, i32 %b, i32 %r) [[NoInlineNoUnwindReadnoneUwtable]]
 ;
-; FIXME: returned on %r missing:
-; CHECK: define dso_local i32 @scc_r2(i32 %a, i32 %b, i32 %r) [[NoInlineNoUnwindReadnoneUwtable]]
+; ATTRIBUTOR: define dso_local i32 @sink_r0(i32 returned %r) [[NoInlineNoRecurseNoUnwindReadnoneUwtable:#[0-9]]]
+; ATTRIBUTOR: define dso_local i32 @scc_r1(i32 %a, i32 returned %r, i32 %b) [[NoInlineNoUnwindReadnoneUwtable:#[0-9]]]
+; ATTRIBUTOR: define dso_local i32 @scc_r2(i32 %a, i32 %b, i32 returned %r) [[NoInlineNoUnwindReadnoneUwtable]]
+; ATTRIBUTOR: define dso_local i32 @scc_rX(i32 %a, i32 %b, i32 %r) [[NoInlineNoUnwindReadnoneUwtable]]
 ;
 ; int scc_r1(int a, int b, int r);
 ; int scc_r2(int a, int b, int r);
@@ -161,13 +172,17 @@
 
 ; TEST 2
 ;
-; CHECK: define dso_local double* @ptr_sink_r0(double* readnone returned %r) [[NoInlineNoRecurseNoUnwindReadnoneUwtable]]
+; BOTH: define dso_local double* @ptr_sink_r0(double* readnone returned %r) [[NoInlineNoRecurseNoUnwindReadnoneUwtable]]
+; BOTH: define dso_local double* @ptr_scc_r1(double* %a, double* readnone returned %r, double* nocapture readnone %b) [[NoInlineNoUnwindReadnoneUwtable]]
+; BOTH: define dso_local double* @ptr_scc_r2(double* readnone %a, double* readnone %b, double* readnone returned %r) [[NoInlineNoUnwindReadnoneUwtable]]
 ;
-; FIXME: returned on %r missing:
-; CHECK: define dso_local double* @ptr_scc_r1(double* %a, double* readnone %r, double* nocapture readnone %b) [[NoInlineNoUnwindReadnoneUwtable]]
+; FNATTR: define dso_local double* @ptr_sink_r0(double* readnone returned %r) [[NoInlineNoRecurseNoUnwindReadnoneUwtable]]
+; FNATTR: define dso_local double* @ptr_scc_r1(double* %a, double* readnone %r, double* nocapture readnone %b) [[NoInlineNoUnwindReadnoneUwtable]]
+; FNATTR: define dso_local double* @ptr_scc_r2(double* readnone %a, double* readnone %b, double* readnone %r) [[NoInlineNoUnwindReadnoneUwtable]]
 ;
-; FIXME: returned on %r missing:
-; CHECK: define dso_local double* @ptr_scc_r2(double* readnone %a, double* readnone %b, double* readnone %r) [[NoInlineNoUnwindReadnoneUwtable]]
+; ATTRIBUTOR: define dso_local double* @ptr_sink_r0(double* returned %r) [[NoInlineNoRecurseNoUnwindReadnoneUwtable]]
+; ATTRIBUTOR: define dso_local double* @ptr_scc_r1(double* %a, double* returned %r, double* %b) [[NoInlineNoUnwindReadnoneUwtable]]
+; ATTRIBUTOR: define dso_local double* @ptr_scc_r2(double* %a, double* %b, double* returned %r) [[NoInlineNoUnwindReadnoneUwtable]]
 ;
 ; double* ptr_scc_r1(double* a

[PATCH] D59980: [Attributor] Deduce memory behavior argument attributes

2019-03-29 Thread Johannes Doerfert via Phabricator via cfe-commits
jdoerfert updated this revision to Diff 192961.
jdoerfert added a comment.

Minor update


Repository:
  rG LLVM Github Monorepo

CHANGES SINCE LAST ACTION
  https://reviews.llvm.org/D59980/new/

https://reviews.llvm.org/D59980

Files:
  clang/test/CodeGen/arm-vfp16-arguments.c
  clang/test/CodeGen/systemz-inline-asm.c
  clang/test/CodeGenCXX/wasm-args-returns.cpp
  clang/test/CodeGenObjC/os_log.m
  clang/test/CodeGenOpenCL/amdgpu-abi-struct-coerce.cl
  clang/test/CodeGenOpenCL/amdgpu-call-kernel.cl
  clang/test/CodeGenOpenCL/kernels-have-spir-cc-by-default.cl
  llvm/include/llvm/Transforms/IPO/Attributor.h
  llvm/lib/Transforms/IPO/Attributor.cpp
  llvm/test/Transforms/FunctionAttrs/SCC1.ll
  llvm/test/Transforms/FunctionAttrs/arg_nocapture.ll
  llvm/test/Transforms/FunctionAttrs/arg_returned.ll
  llvm/test/Transforms/FunctionAttrs/nocapture.ll
  llvm/test/Transforms/FunctionAttrs/readattrs.ll

Index: llvm/test/Transforms/FunctionAttrs/readattrs.ll
===
--- llvm/test/Transforms/FunctionAttrs/readattrs.ll
+++ llvm/test/Transforms/FunctionAttrs/readattrs.ll
@@ -32,7 +32,7 @@
   ret void
 }
 
-; CHECK: define void @test5(i8** nocapture %p, i8* %q)
+; CHECK: define void @test5(i8** nocapture writeonly %p, i8* %q)
 ; Missed optz'n: we could make %q readnone, but don't break test6!
 define void @test5(i8** %p, i8* %q) {
   store i8* %q, i8** %p
@@ -40,7 +40,7 @@
 }
 
 declare void @test6_1()
-; CHECK: define void @test6_2(i8** nocapture %p, i8* %q)
+; CHECK: define void @test6_2(i8** nocapture writeonly %p, i8* %q)
 ; This is not a missed optz'n.
 define void @test6_2(i8** %p, i8* %q) {
   store i8* %q, i8** %p
@@ -48,7 +48,7 @@
   ret void
 }
 
-; CHECK: define void @test7_1(i32* inalloca nocapture %a)
+; CHECK: define void @test7_1(i32* inalloca nocapture readnone %a)
 ; inalloca parameters are always considered written
 define void @test7_1(i32* inalloca %a) {
   ret void
@@ -60,7 +60,7 @@
   ret i32* %p
 }
 
-; CHECK: define void @test8_2(i32* nocapture %p)
+; CHECK: define void @test8_2(i32* nocapture writeonly %p)
 define void @test8_2(i32* %p) {
 entry:
   %call = call i32* @test8_1(i32* %p)
Index: llvm/test/Transforms/FunctionAttrs/nocapture.ll
===
--- llvm/test/Transforms/FunctionAttrs/nocapture.ll
+++ llvm/test/Transforms/FunctionAttrs/nocapture.ll
@@ -134,15 +134,14 @@
 	ret void
 }
 
-; CHECK: define void @test1_1(i8* nocapture %x1_1, i8* nocapture %y1_1)
-; It would be acceptable to add readnone to %y1_1 and %y1_2.
+; CHECK: define void @test1_1(i8* nocapture readnone %x1_1, i8* nocapture readnone %y1_1)
 define void @test1_1(i8* %x1_1, i8* %y1_1) {
   call i8* @test1_2(i8* %x1_1, i8* %y1_1)
   store i32* null, i32** @g
   ret void
 }
 
-; CHECK: define i8* @test1_2(i8* nocapture %x1_2, i8* returned "no-capture-maybe-returned" %y1_2)
+; CHECK: define i8* @test1_2(i8* nocapture readnone %x1_2, i8* readnone returned "no-capture-maybe-returned" %y1_2)
 define i8* @test1_2(i8* %x1_2, i8* %y1_2) {
   call void @test1_1(i8* %x1_2, i8* %y1_2)
   store i32* null, i32** @g
@@ -156,7 +155,7 @@
   ret void
 }
 
-; CHECK: define void @test3(i8* nocapture %x3, i8* nocapture readnone %y3, i8* nocapture %z3)
+; CHECK: define void @test3(i8* nocapture readnone %x3, i8* nocapture readnone %y3, i8* nocapture readnone %z3)
 define void @test3(i8* %x3, i8* %y3, i8* %z3) {
   call void @test3(i8* %z3, i8* %y3, i8* %x3)
   store i32* null, i32** @g
@@ -237,7 +236,7 @@
   ret void
 }
 
-; CHECK: @nocaptureStrip(i8* nocapture %p)
+; CHECK: @nocaptureStrip(i8* nocapture writeonly %p)
 define void @nocaptureStrip(i8* %p) {
 entry:
   %b = call i8* @llvm.strip.invariant.group.p0i8(i8* %p)
Index: llvm/test/Transforms/FunctionAttrs/arg_returned.ll
===
--- llvm/test/Transforms/FunctionAttrs/arg_returned.ll
+++ llvm/test/Transforms/FunctionAttrs/arg_returned.ll
@@ -180,8 +180,8 @@
 ; FNATTR: define dso_local double* @ptr_scc_r1(double* %a, double* readnone %r, double* nocapture readnone %b) [[NoInlineNoUnwindReadnoneUwtable]]
 ; FNATTR: define dso_local double* @ptr_scc_r2(double* readnone %a, double* readnone %b, double* readnone %r) [[NoInlineNoUnwindReadnoneUwtable]]
 ;
-; ATTRIBUTOR: define dso_local double* @ptr_sink_r0(double* returned "no-capture-maybe-returned" %r) [[NoInlineNoRecurseNoUnwindReadnoneUwtable]]
-; ATTRIBUTOR: define dso_local double* @ptr_scc_r1(double* %a, double* returned %r, double* nocapture %b) [[NoInlineNoUnwindReadnoneUwtable]]
+; ATTRIBUTOR: define dso_local double* @ptr_sink_r0(double* readnone returned "no-capture-maybe-returned" %r) [[NoInlineNoRecurseNoUnwindReadnoneUwtable]]
+; ATTRIBUTOR: define dso_local double* @ptr_scc_r1(double* %a, double* returned %r, double* nocapture readnone %b) [[NoInlineNoUnwindReadnoneUwtable]]
 ; ATTRIBUTOR: define dso_local double* @ptr_scc_r2(double* %a, double* %b, double*

[PATCH] D59922: [Attributor] Deduce "no-capture" argument attribute

2019-03-29 Thread Johannes Doerfert via Phabricator via cfe-commits
jdoerfert updated this revision to Diff 192962.
jdoerfert added a comment.

Closed a side-channel through "integers"


Repository:
  rG LLVM Github Monorepo

CHANGES SINCE LAST ACTION
  https://reviews.llvm.org/D59922/new/

https://reviews.llvm.org/D59922

Files:
  clang/test/CodeGenObjC/os_log.m
  clang/test/CodeGenOpenCL/as_type.cl
  llvm/include/llvm/Transforms/IPO/Attributor.h
  llvm/lib/Transforms/IPO/Attributor.cpp
  llvm/test/Transforms/FunctionAttrs/2009-01-02-LocalStores.ll
  llvm/test/Transforms/FunctionAttrs/SCC1.ll
  llvm/test/Transforms/FunctionAttrs/arg_nocapture.ll
  llvm/test/Transforms/FunctionAttrs/arg_returned.ll
  llvm/test/Transforms/FunctionAttrs/incompatible_fn_attrs.ll
  llvm/test/Transforms/FunctionAttrs/nocapture.ll
  llvm/test/Transforms/FunctionAttrs/readattrs.ll

Index: llvm/test/Transforms/FunctionAttrs/readattrs.ll
===
--- llvm/test/Transforms/FunctionAttrs/readattrs.ll
+++ llvm/test/Transforms/FunctionAttrs/readattrs.ll
@@ -12,7 +12,7 @@
   ret void
 }
 
-; CHECK: define i8* @test2(i8* readnone returned %p)
+; CHECK: define i8* @test2(i8* readnone returned "no-capture-maybe-returned" %p)
 define i8* @test2(i8* %p) {
   store i32 0, i32* @x
   ret i8* %p
@@ -54,13 +54,13 @@
   ret void
 }
 
-; CHECK: define i32* @test8_1(i32* readnone returned %p)
+; CHECK: define i32* @test8_1(i32* readnone returned "no-capture-maybe-returned" %p)
 define i32* @test8_1(i32* %p) {
 entry:
   ret i32* %p
 }
 
-; CHECK: define void @test8_2(i32* %p)
+; CHECK: define void @test8_2(i32* nocapture %p)
 define void @test8_2(i32* %p) {
 entry:
   %call = call i32* @test8_1(i32* %p)
Index: llvm/test/Transforms/FunctionAttrs/nocapture.ll
===
--- llvm/test/Transforms/FunctionAttrs/nocapture.ll
+++ llvm/test/Transforms/FunctionAttrs/nocapture.ll
@@ -3,7 +3,7 @@
 
 @g = global i32* null		;  [#uses=1]
 
-; CHECK: define i32* @c1(i32* readnone returned %q)
+; CHECK: define i32* @c1(i32* readnone returned "no-capture-maybe-returned" %q)
 define i32* @c1(i32* %q) {
 	ret i32* %q
 }
@@ -134,7 +134,7 @@
 	ret void
 }
 
-; CHECK: define void @test1_1(i8* nocapture readnone %x1_1, i8* %y1_1)
+; CHECK: define void @test1_1(i8* nocapture %x1_1, i8* nocapture %y1_1)
 ; It would be acceptable to add readnone to %y1_1 and %y1_2.
 define void @test1_1(i8* %x1_1, i8* %y1_1) {
   call i8* @test1_2(i8* %x1_1, i8* %y1_1)
@@ -142,7 +142,7 @@
   ret void
 }
 
-; CHECK: define i8* @test1_2(i8* nocapture readnone %x1_2, i8* returned %y1_2)
+; CHECK: define i8* @test1_2(i8* nocapture %x1_2, i8* returned "no-capture-maybe-returned" %y1_2)
 define i8* @test1_2(i8* %x1_2, i8* %y1_2) {
   call void @test1_1(i8* %x1_2, i8* %y1_2)
   store i32* null, i32** @g
@@ -156,21 +156,21 @@
   ret void
 }
 
-; CHECK: define void @test3(i8* nocapture readnone %x3, i8* nocapture readnone %y3, i8* nocapture readnone %z3)
+; CHECK: define void @test3(i8* nocapture %x3, i8* nocapture readnone %y3, i8* nocapture %z3)
 define void @test3(i8* %x3, i8* %y3, i8* %z3) {
   call void @test3(i8* %z3, i8* %y3, i8* %x3)
   store i32* null, i32** @g
   ret void
 }
 
-; CHECK: define void @test4_1(i8* %x4_1)
+; CHECK: define void @test4_1(i8* nocapture readnone %x4_1)
 define void @test4_1(i8* %x4_1) {
   call i8* @test4_2(i8* %x4_1, i8* %x4_1, i8* %x4_1)
   store i32* null, i32** @g
   ret void
 }
 
-; CHECK: define i8* @test4_2(i8* nocapture readnone %x4_2, i8* readnone returned %y4_2, i8* nocapture readnone %z4_2)
+; CHECK: define i8* @test4_2(i8* nocapture readnone %x4_2, i8* readnone returned "no-capture-maybe-returned" %y4_2, i8* nocapture readnone %z4_2)
 define i8* @test4_2(i8* %x4_2, i8* %y4_2, i8* %z4_2) {
   call void @test4_1(i8* null)
   store i32* null, i32** @g
Index: llvm/test/Transforms/FunctionAttrs/incompatible_fn_attrs.ll
===
--- llvm/test/Transforms/FunctionAttrs/incompatible_fn_attrs.ll
+++ llvm/test/Transforms/FunctionAttrs/incompatible_fn_attrs.ll
@@ -6,21 +6,21 @@
 
 ; Function Attrs: argmemonly
 define i32* @given_argmem_infer_readnone(i32* %p) #0 {
-; CHECK: define i32* @given_argmem_infer_readnone(i32* readnone returned %p) #0 {
+; CHECK: define i32* @given_argmem_infer_readnone(i32* readnone returned "no-capture-maybe-returned" %p) #0 {
 entry:
   ret i32* %p
 }
 
 ; Function Attrs: inaccessiblememonly
 define i32* @given_inaccessible_infer_readnone(i32* %p) #1 {
-; CHECK: define i32* @given_inaccessible_infer_readnone(i32* readnone returned %p) #0 {
+; CHECK: define i32* @given_inaccessible_infer_readnone(i32* readnone returned "no-capture-maybe-returned" %p) #0 {
 entry:
   ret i32* %p
 }
 
 ; Function Attrs: inaccessiblemem_or_argmemonly
 define i32* @given_inaccessible_or_argmem_infer_readnone(i32* %p) #2 {
-; CHECK: define i32* @given_inaccessible_or_argmem_infer_readnone(i32* readnone returned %p) #0 {
+; CHECK: define i32* @given_inacc

[PATCH] D59919: [Attributor] Deduce "returned" argument attribute

2019-04-01 Thread Johannes Doerfert via Phabricator via cfe-commits
jdoerfert marked an inline comment as done.
jdoerfert added inline comments.



Comment at: llvm/lib/Transforms/IPO/Attributor.cpp:394
+
+indicateFixpoint(/* Optimistic */ true);
+return;

xbolva00 wrote:
> Maybe enum here ?
> So you could call indicateFixpoint(Fixpoint::optimistic) ?
> 
> Or maybe even better, indicateOptimisticFixpoint()?
Good points. I'll probably go with the second, most explicit, solution. I'll 
update (all) the source files soon.


Repository:
  rG LLVM Github Monorepo

CHANGES SINCE LAST ACTION
  https://reviews.llvm.org/D59919/new/

https://reviews.llvm.org/D59919



___
cfe-commits mailing list
cfe-commits@lists.llvm.org
https://lists.llvm.org/cgi-bin/mailman/listinfo/cfe-commits


[PATCH] D59980: [Attributor] Deduce memory behavior argument attributes

2019-04-01 Thread Johannes Doerfert via Phabricator via cfe-commits
jdoerfert updated this revision to Diff 193107.
jdoerfert added a comment.

Minor adjustments wrt later patches


Repository:
  rG LLVM Github Monorepo

CHANGES SINCE LAST ACTION
  https://reviews.llvm.org/D59980/new/

https://reviews.llvm.org/D59980

Files:
  clang/test/CodeGen/arm-vfp16-arguments.c
  clang/test/CodeGen/systemz-inline-asm.c
  clang/test/CodeGenCXX/wasm-args-returns.cpp
  clang/test/CodeGenObjC/os_log.m
  clang/test/CodeGenOpenCL/amdgpu-abi-struct-coerce.cl
  clang/test/CodeGenOpenCL/amdgpu-call-kernel.cl
  clang/test/CodeGenOpenCL/kernels-have-spir-cc-by-default.cl
  llvm/include/llvm/Transforms/IPO/Attributor.h
  llvm/lib/Transforms/IPO/Attributor.cpp
  llvm/test/Transforms/FunctionAttrs/SCC1.ll
  llvm/test/Transforms/FunctionAttrs/arg_nocapture.ll
  llvm/test/Transforms/FunctionAttrs/arg_returned.ll
  llvm/test/Transforms/FunctionAttrs/nocapture.ll
  llvm/test/Transforms/FunctionAttrs/readattrs.ll

Index: llvm/test/Transforms/FunctionAttrs/readattrs.ll
===
--- llvm/test/Transforms/FunctionAttrs/readattrs.ll
+++ llvm/test/Transforms/FunctionAttrs/readattrs.ll
@@ -33,7 +33,7 @@
   ret void
 }
 
-; CHECK: define void @test5(i8** nocapture %p, i8* %q)
+; CHECK: define void @test5(i8** nocapture writeonly %p, i8* %q)
 ; Missed optz'n: we could make %q readnone, but don't break test6!
 define void @test5(i8** %p, i8* %q) {
   store i8* %q, i8** %p
@@ -41,7 +41,7 @@
 }
 
 declare void @test6_1()
-; CHECK: define void @test6_2(i8** nocapture %p, i8* %q)
+; CHECK: define void @test6_2(i8** nocapture writeonly %p, i8* %q)
 ; This is not a missed optz'n.
 define void @test6_2(i8** %p, i8* %q) {
   store i8* %q, i8** %p
@@ -49,7 +49,7 @@
   ret void
 }
 
-; CHECK: define void @test7_1(i32* inalloca nocapture %a)
+; CHECK: define void @test7_1(i32* inalloca nocapture readnone %a)
 ; inalloca parameters are always considered written
 define void @test7_1(i32* inalloca %a) {
   ret void
@@ -61,7 +61,7 @@
   ret i32* %p
 }
 
-; CHECK: define void @test8_2(i32* nocapture %p)
+; CHECK: define void @test8_2(i32* nocapture writeonly %p)
 define void @test8_2(i32* %p) {
 entry:
   %call = call i32* @test8_1(i32* %p)
Index: llvm/test/Transforms/FunctionAttrs/nocapture.ll
===
--- llvm/test/Transforms/FunctionAttrs/nocapture.ll
+++ llvm/test/Transforms/FunctionAttrs/nocapture.ll
@@ -134,15 +134,14 @@
 	ret void
 }
 
-; CHECK: define void @test1_1(i8* nocapture %x1_1, i8* nocapture %y1_1)
-; It would be acceptable to add readnone to %y1_1 and %y1_2.
+; CHECK: define void @test1_1(i8* nocapture readnone %x1_1, i8* nocapture readnone %y1_1)
 define void @test1_1(i8* %x1_1, i8* %y1_1) {
   call i8* @test1_2(i8* %x1_1, i8* %y1_1)
   store i32* null, i32** @g
   ret void
 }
 
-; CHECK: define i8* @test1_2(i8* nocapture %x1_2, i8* returned "no-capture-maybe-returned" %y1_2)
+; CHECK: define i8* @test1_2(i8* nocapture readnone %x1_2, i8* readnone returned "no-capture-maybe-returned" %y1_2)
 define i8* @test1_2(i8* %x1_2, i8* %y1_2) {
   call void @test1_1(i8* %x1_2, i8* %y1_2)
   store i32* null, i32** @g
@@ -156,7 +155,7 @@
   ret void
 }
 
-; CHECK: define void @test3(i8* nocapture %x3, i8* nocapture readnone %y3, i8* nocapture %z3)
+; CHECK: define void @test3(i8* nocapture readnone %x3, i8* nocapture readnone %y3, i8* nocapture readnone %z3)
 define void @test3(i8* %x3, i8* %y3, i8* %z3) {
   call void @test3(i8* %z3, i8* %y3, i8* %x3)
   store i32* null, i32** @g
@@ -237,7 +236,7 @@
   ret void
 }
 
-; CHECK: @nocaptureStrip(i8* nocapture %p)
+; CHECK: @nocaptureStrip(i8* nocapture writeonly %p)
 define void @nocaptureStrip(i8* %p) {
 entry:
   %b = call i8* @llvm.strip.invariant.group.p0i8(i8* %p)
Index: llvm/test/Transforms/FunctionAttrs/arg_returned.ll
===
--- llvm/test/Transforms/FunctionAttrs/arg_returned.ll
+++ llvm/test/Transforms/FunctionAttrs/arg_returned.ll
@@ -180,8 +180,8 @@
 ; FNATTR: define dso_local double* @ptr_scc_r1(double* %a, double* readnone %r, double* nocapture readnone %b) [[NoInlineNoUnwindReadnoneUwtable]]
 ; FNATTR: define dso_local double* @ptr_scc_r2(double* readnone %a, double* readnone %b, double* readnone %r) [[NoInlineNoUnwindReadnoneUwtable]]
 ;
-; ATTRIBUTOR: define dso_local double* @ptr_sink_r0(double* returned "no-capture-maybe-returned" %r) [[NoInlineNoRecurseNoUnwindReadnoneUwtable]]
-; ATTRIBUTOR: define dso_local double* @ptr_scc_r1(double* %a, double* returned %r, double* nocapture %b) [[NoInlineNoUnwindReadnoneUwtable]]
+; ATTRIBUTOR: define dso_local double* @ptr_sink_r0(double* readnone returned "no-capture-maybe-returned" %r) [[NoInlineNoRecurseNoUnwindReadnoneUwtable]]
+; ATTRIBUTOR: define dso_local double* @ptr_scc_r1(double* %a, double* returned %r, double* nocapture readnone %b) [[NoInlineNoUnwindReadnoneUwtable]]
 ; ATTRIBUTOR: define dso_local double* @ptr_scc_r2(double* 

[PATCH] D59919: [Attributor] Deduce "returned" argument attribute

2019-04-01 Thread Johannes Doerfert via Phabricator via cfe-commits
jdoerfert updated this revision to Diff 193115.
jdoerfert added a comment.

Minor changes


Repository:
  rG LLVM Github Monorepo

CHANGES SINCE LAST ACTION
  https://reviews.llvm.org/D59919/new/

https://reviews.llvm.org/D59919

Files:
  clang/test/CodeGenOpenCL/as_type.cl
  llvm/include/llvm/Transforms/IPO/Attributor.h
  llvm/lib/Transforms/IPO/Attributor.cpp
  llvm/test/Transforms/FunctionAttrs/SCC1.ll
  llvm/test/Transforms/FunctionAttrs/arg_nocapture.ll
  llvm/test/Transforms/FunctionAttrs/arg_returned.ll

Index: llvm/test/Transforms/FunctionAttrs/arg_returned.ll
===
--- llvm/test/Transforms/FunctionAttrs/arg_returned.ll
+++ llvm/test/Transforms/FunctionAttrs/arg_returned.ll
@@ -1,4 +1,8 @@
-; RUN: opt -functionattrs -attributor -S < %s | FileCheck %s
+; RUN: opt -functionattrs -S < %s | FileCheck %s --check-prefix=FNATTR
+; RUN: opt -attributor -S < %s | FileCheck %s --check-prefix=ATTRIBUTOR
+; RUN: opt -attributor -functionattrs -S < %s | FileCheck %s --check-prefix=BOTH
+; RUN: opt -attributor -attributor-max-iterations=18 -S < %s | FileCheck %s --check-prefix=FEW_IT
+; RUN: opt -attributor -attributor-max-iterations=19 -functionattrs -S < %s | FileCheck %s --check-prefix=BOTH
 ;
 ; Test cases specifically designed for the "returned" argument attribute.
 ; We use FIXME's to indicate problems and missing attributes.
@@ -20,13 +24,20 @@
 
 ; TEST 1
 ;
-; CHECK: define dso_local i32 @sink_r0(i32 returned %r) [[NoInlineNoRecurseNoUnwindReadnoneUwtable:#[0-9]]]
+; BOTH: define dso_local i32 @sink_r0(i32 returned %r) [[NoInlineNoRecurseNoUnwindReadnoneUwtable:#[0-9]]]
+; BOTH: define dso_local i32 @scc_r1(i32 %a, i32 returned %r, i32 %b) [[NoInlineNoUnwindReadnoneUwtable:#[0-9]]]
+; BOTH: define dso_local i32 @scc_r2(i32 %a, i32 %b, i32 returned %r) [[NoInlineNoUnwindReadnoneUwtable]]
+; BOTH: define dso_local i32 @scc_rX(i32 %a, i32 %b, i32 %r) [[NoInlineNoUnwindReadnoneUwtable]]
 ;
-; FIXME: returned on %r missing:
-; CHECK: define dso_local i32 @scc_r1(i32 %a, i32 %r, i32 %b) [[NoInlineNoUnwindReadnoneUwtable:#[0-9]]]
+; FNATTR: define dso_local i32 @sink_r0(i32 returned %r) [[NoInlineNoRecurseNoUnwindReadnoneUwtable:#[0-9]]]
+; FNATTR: define dso_local i32 @scc_r1(i32 %a, i32 %r, i32 %b) [[NoInlineNoUnwindReadnoneUwtable:#[0-9]]]
+; FNATTR: define dso_local i32 @scc_r2(i32 %a, i32 %b, i32 %r) [[NoInlineNoUnwindReadnoneUwtable]]
+; FNATTR: define dso_local i32 @scc_rX(i32 %a, i32 %b, i32 %r) [[NoInlineNoUnwindReadnoneUwtable]]
 ;
-; FIXME: returned on %r missing:
-; CHECK: define dso_local i32 @scc_r2(i32 %a, i32 %b, i32 %r) [[NoInlineNoUnwindReadnoneUwtable]]
+; ATTRIBUTOR: define dso_local i32 @sink_r0(i32 returned %r) [[NoInlineNoRecurseNoUnwindReadnoneUwtable:#[0-9]]]
+; ATTRIBUTOR: define dso_local i32 @scc_r1(i32 %a, i32 returned %r, i32 %b) [[NoInlineNoUnwindReadnoneUwtable:#[0-9]]]
+; ATTRIBUTOR: define dso_local i32 @scc_r2(i32 %a, i32 %b, i32 returned %r) [[NoInlineNoUnwindReadnoneUwtable]]
+; ATTRIBUTOR: define dso_local i32 @scc_rX(i32 %a, i32 %b, i32 %r) [[NoInlineNoUnwindReadnoneUwtable]]
 ;
 ; int scc_r1(int a, int b, int r);
 ; int scc_r2(int a, int b, int r);
@@ -161,13 +172,17 @@
 
 ; TEST 2
 ;
-; CHECK: define dso_local double* @ptr_sink_r0(double* readnone returned %r) [[NoInlineNoRecurseNoUnwindReadnoneUwtable]]
+; BOTH: define dso_local double* @ptr_sink_r0(double* readnone returned %r) [[NoInlineNoRecurseNoUnwindReadnoneUwtable]]
+; BOTH: define dso_local double* @ptr_scc_r1(double* %a, double* readnone returned %r, double* nocapture readnone %b) [[NoInlineNoUnwindReadnoneUwtable]]
+; BOTH: define dso_local double* @ptr_scc_r2(double* readnone %a, double* readnone %b, double* readnone returned %r) [[NoInlineNoUnwindReadnoneUwtable]]
 ;
-; FIXME: returned on %r missing:
-; CHECK: define dso_local double* @ptr_scc_r1(double* %a, double* readnone %r, double* nocapture readnone %b) [[NoInlineNoUnwindReadnoneUwtable]]
+; FNATTR: define dso_local double* @ptr_sink_r0(double* readnone returned %r) [[NoInlineNoRecurseNoUnwindReadnoneUwtable]]
+; FNATTR: define dso_local double* @ptr_scc_r1(double* %a, double* readnone %r, double* nocapture readnone %b) [[NoInlineNoUnwindReadnoneUwtable]]
+; FNATTR: define dso_local double* @ptr_scc_r2(double* readnone %a, double* readnone %b, double* readnone %r) [[NoInlineNoUnwindReadnoneUwtable]]
 ;
-; FIXME: returned on %r missing:
-; CHECK: define dso_local double* @ptr_scc_r2(double* readnone %a, double* readnone %b, double* readnone %r) [[NoInlineNoUnwindReadnoneUwtable]]
+; ATTRIBUTOR: define dso_local double* @ptr_sink_r0(double* returned %r) [[NoInlineNoRecurseNoUnwindReadnoneUwtable]]
+; ATTRIBUTOR: define dso_local double* @ptr_scc_r1(double* %a, double* returned %r, double* %b) [[NoInlineNoUnwindReadnoneUwtable]]
+; ATTRIBUTOR: define dso_local double* @ptr_scc_r2(double* %a, double* %b, double* returned %r) [[NoInlineNoUnwindReadnoneUwtable]]
 ;
 ; double* ptr_scc_r1(double* a

[PATCH] D59922: [Attributor] Deduce "no-capture" argument attribute

2019-04-01 Thread Johannes Doerfert via Phabricator via cfe-commits
jdoerfert updated this revision to Diff 193122.
jdoerfert added a comment.

Rebase


Repository:
  rG LLVM Github Monorepo

CHANGES SINCE LAST ACTION
  https://reviews.llvm.org/D59922/new/

https://reviews.llvm.org/D59922

Files:
  clang/test/CodeGenObjC/os_log.m
  clang/test/CodeGenOpenCL/as_type.cl
  llvm/include/llvm/Transforms/IPO/Attributor.h
  llvm/lib/Transforms/IPO/Attributor.cpp
  llvm/test/Transforms/FunctionAttrs/2009-01-02-LocalStores.ll
  llvm/test/Transforms/FunctionAttrs/SCC1.ll
  llvm/test/Transforms/FunctionAttrs/arg_nocapture.ll
  llvm/test/Transforms/FunctionAttrs/arg_returned.ll
  llvm/test/Transforms/FunctionAttrs/incompatible_fn_attrs.ll
  llvm/test/Transforms/FunctionAttrs/nocapture.ll
  llvm/test/Transforms/FunctionAttrs/readattrs.ll

Index: llvm/test/Transforms/FunctionAttrs/readattrs.ll
===
--- llvm/test/Transforms/FunctionAttrs/readattrs.ll
+++ llvm/test/Transforms/FunctionAttrs/readattrs.ll
@@ -13,7 +13,7 @@
   ret void
 }
 
-; CHECK: define i8* @test2(i8* readnone returned %p)
+; CHECK: define i8* @test2(i8* readnone returned "no-capture-maybe-returned" %p)
 define i8* @test2(i8* %p) {
   store i32 0, i32* @x
   ret i8* %p
@@ -55,13 +55,13 @@
   ret void
 }
 
-; CHECK: define i32* @test8_1(i32* readnone returned %p)
+; CHECK: define i32* @test8_1(i32* readnone returned "no-capture-maybe-returned" %p)
 define i32* @test8_1(i32* %p) {
 entry:
   ret i32* %p
 }
 
-; CHECK: define void @test8_2(i32* %p)
+; CHECK: define void @test8_2(i32* nocapture %p)
 define void @test8_2(i32* %p) {
 entry:
   %call = call i32* @test8_1(i32* %p)
Index: llvm/test/Transforms/FunctionAttrs/nocapture.ll
===
--- llvm/test/Transforms/FunctionAttrs/nocapture.ll
+++ llvm/test/Transforms/FunctionAttrs/nocapture.ll
@@ -1,9 +1,9 @@
 ; RUN: opt < %s -attributor -functionattrs -S | FileCheck %s
-; RUN: opt < %s -passes='attributor,cgscc(function-attrs)' -S | FileCheck %s
+; RUN: opt < %s -passes='cgscc(attributor,function-attrs)' -S | FileCheck %s
 
 @g = global i32* null		;  [#uses=1]
 
-; CHECK: define i32* @c1(i32* readnone returned %q)
+; CHECK: define i32* @c1(i32* readnone returned "no-capture-maybe-returned" %q)
 define i32* @c1(i32* %q) {
 	ret i32* %q
 }
@@ -134,7 +134,7 @@
 	ret void
 }
 
-; CHECK: define void @test1_1(i8* nocapture readnone %x1_1, i8* %y1_1)
+; CHECK: define void @test1_1(i8* nocapture %x1_1, i8* nocapture %y1_1)
 ; It would be acceptable to add readnone to %y1_1 and %y1_2.
 define void @test1_1(i8* %x1_1, i8* %y1_1) {
   call i8* @test1_2(i8* %x1_1, i8* %y1_1)
@@ -142,7 +142,7 @@
   ret void
 }
 
-; CHECK: define i8* @test1_2(i8* nocapture readnone %x1_2, i8* returned %y1_2)
+; CHECK: define i8* @test1_2(i8* nocapture %x1_2, i8* returned "no-capture-maybe-returned" %y1_2)
 define i8* @test1_2(i8* %x1_2, i8* %y1_2) {
   call void @test1_1(i8* %x1_2, i8* %y1_2)
   store i32* null, i32** @g
@@ -156,21 +156,21 @@
   ret void
 }
 
-; CHECK: define void @test3(i8* nocapture readnone %x3, i8* nocapture readnone %y3, i8* nocapture readnone %z3)
+; CHECK: define void @test3(i8* nocapture %x3, i8* nocapture readnone %y3, i8* nocapture %z3)
 define void @test3(i8* %x3, i8* %y3, i8* %z3) {
   call void @test3(i8* %z3, i8* %y3, i8* %x3)
   store i32* null, i32** @g
   ret void
 }
 
-; CHECK: define void @test4_1(i8* %x4_1)
+; CHECK: define void @test4_1(i8* nocapture readnone %x4_1)
 define void @test4_1(i8* %x4_1) {
   call i8* @test4_2(i8* %x4_1, i8* %x4_1, i8* %x4_1)
   store i32* null, i32** @g
   ret void
 }
 
-; CHECK: define i8* @test4_2(i8* nocapture readnone %x4_2, i8* readnone returned %y4_2, i8* nocapture readnone %z4_2)
+; CHECK: define i8* @test4_2(i8* nocapture readnone %x4_2, i8* readnone returned "no-capture-maybe-returned" %y4_2, i8* nocapture readnone %z4_2)
 define i8* @test4_2(i8* %x4_2, i8* %y4_2, i8* %z4_2) {
   call void @test4_1(i8* null)
   store i32* null, i32** @g
Index: llvm/test/Transforms/FunctionAttrs/incompatible_fn_attrs.ll
===
--- llvm/test/Transforms/FunctionAttrs/incompatible_fn_attrs.ll
+++ llvm/test/Transforms/FunctionAttrs/incompatible_fn_attrs.ll
@@ -6,21 +6,21 @@
 
 ; Function Attrs: argmemonly
 define i32* @given_argmem_infer_readnone(i32* %p) #0 {
-; CHECK: define i32* @given_argmem_infer_readnone(i32* readnone returned %p) #0 {
+; CHECK: define i32* @given_argmem_infer_readnone(i32* readnone returned "no-capture-maybe-returned" %p) #0 {
 entry:
   ret i32* %p
 }
 
 ; Function Attrs: inaccessiblememonly
 define i32* @given_inaccessible_infer_readnone(i32* %p) #1 {
-; CHECK: define i32* @given_inaccessible_infer_readnone(i32* readnone returned %p) #0 {
+; CHECK: define i32* @given_inaccessible_infer_readnone(i32* readnone returned "no-capture-maybe-returned" %p) #0 {
 entry:
   ret i32* %p
 }
 
 ; Function Attrs: inaccessiblemem_or_argmemonly
 define i32* @given_i

[PATCH] D59980: [Attributor] Deduce memory behavior argument attributes

2019-04-01 Thread Johannes Doerfert via Phabricator via cfe-commits
jdoerfert updated this revision to Diff 193123.
jdoerfert added a comment.

Rebase


Repository:
  rG LLVM Github Monorepo

CHANGES SINCE LAST ACTION
  https://reviews.llvm.org/D59980/new/

https://reviews.llvm.org/D59980

Files:
  clang/test/CodeGen/arm-vfp16-arguments.c
  clang/test/CodeGen/systemz-inline-asm.c
  clang/test/CodeGenCXX/wasm-args-returns.cpp
  clang/test/CodeGenObjC/os_log.m
  clang/test/CodeGenOpenCL/amdgpu-abi-struct-coerce.cl
  clang/test/CodeGenOpenCL/amdgpu-call-kernel.cl
  clang/test/CodeGenOpenCL/kernels-have-spir-cc-by-default.cl
  llvm/include/llvm/Transforms/IPO/Attributor.h
  llvm/lib/Transforms/IPO/Attributor.cpp
  llvm/test/Transforms/FunctionAttrs/SCC1.ll
  llvm/test/Transforms/FunctionAttrs/arg_nocapture.ll
  llvm/test/Transforms/FunctionAttrs/arg_returned.ll
  llvm/test/Transforms/FunctionAttrs/nocapture.ll
  llvm/test/Transforms/FunctionAttrs/readattrs.ll

Index: llvm/test/Transforms/FunctionAttrs/readattrs.ll
===
--- llvm/test/Transforms/FunctionAttrs/readattrs.ll
+++ llvm/test/Transforms/FunctionAttrs/readattrs.ll
@@ -33,7 +33,7 @@
   ret void
 }
 
-; CHECK: define void @test5(i8** nocapture %p, i8* %q)
+; CHECK: define void @test5(i8** nocapture writeonly %p, i8* %q)
 ; Missed optz'n: we could make %q readnone, but don't break test6!
 define void @test5(i8** %p, i8* %q) {
   store i8* %q, i8** %p
@@ -41,7 +41,7 @@
 }
 
 declare void @test6_1()
-; CHECK: define void @test6_2(i8** nocapture %p, i8* %q)
+; CHECK: define void @test6_2(i8** nocapture writeonly %p, i8* %q)
 ; This is not a missed optz'n.
 define void @test6_2(i8** %p, i8* %q) {
   store i8* %q, i8** %p
@@ -49,7 +49,7 @@
   ret void
 }
 
-; CHECK: define void @test7_1(i32* inalloca nocapture %a)
+; CHECK: define void @test7_1(i32* inalloca nocapture readnone %a)
 ; inalloca parameters are always considered written
 define void @test7_1(i32* inalloca %a) {
   ret void
@@ -61,7 +61,7 @@
   ret i32* %p
 }
 
-; CHECK: define void @test8_2(i32* nocapture %p)
+; CHECK: define void @test8_2(i32* nocapture writeonly %p)
 define void @test8_2(i32* %p) {
 entry:
   %call = call i32* @test8_1(i32* %p)
Index: llvm/test/Transforms/FunctionAttrs/nocapture.ll
===
--- llvm/test/Transforms/FunctionAttrs/nocapture.ll
+++ llvm/test/Transforms/FunctionAttrs/nocapture.ll
@@ -134,15 +134,14 @@
 	ret void
 }
 
-; CHECK: define void @test1_1(i8* nocapture %x1_1, i8* nocapture %y1_1)
-; It would be acceptable to add readnone to %y1_1 and %y1_2.
+; CHECK: define void @test1_1(i8* nocapture readnone %x1_1, i8* nocapture readnone %y1_1)
 define void @test1_1(i8* %x1_1, i8* %y1_1) {
   call i8* @test1_2(i8* %x1_1, i8* %y1_1)
   store i32* null, i32** @g
   ret void
 }
 
-; CHECK: define i8* @test1_2(i8* nocapture %x1_2, i8* returned "no-capture-maybe-returned" %y1_2)
+; CHECK: define i8* @test1_2(i8* nocapture readnone %x1_2, i8* readnone returned "no-capture-maybe-returned" %y1_2)
 define i8* @test1_2(i8* %x1_2, i8* %y1_2) {
   call void @test1_1(i8* %x1_2, i8* %y1_2)
   store i32* null, i32** @g
@@ -156,7 +155,7 @@
   ret void
 }
 
-; CHECK: define void @test3(i8* nocapture %x3, i8* nocapture readnone %y3, i8* nocapture %z3)
+; CHECK: define void @test3(i8* nocapture readnone %x3, i8* nocapture readnone %y3, i8* nocapture readnone %z3)
 define void @test3(i8* %x3, i8* %y3, i8* %z3) {
   call void @test3(i8* %z3, i8* %y3, i8* %x3)
   store i32* null, i32** @g
@@ -237,7 +236,7 @@
   ret void
 }
 
-; CHECK: @nocaptureStrip(i8* nocapture %p)
+; CHECK: @nocaptureStrip(i8* nocapture writeonly %p)
 define void @nocaptureStrip(i8* %p) {
 entry:
   %b = call i8* @llvm.strip.invariant.group.p0i8(i8* %p)
Index: llvm/test/Transforms/FunctionAttrs/arg_returned.ll
===
--- llvm/test/Transforms/FunctionAttrs/arg_returned.ll
+++ llvm/test/Transforms/FunctionAttrs/arg_returned.ll
@@ -180,8 +180,8 @@
 ; FNATTR: define dso_local double* @ptr_scc_r1(double* %a, double* readnone %r, double* nocapture readnone %b) [[NoInlineNoUnwindReadnoneUwtable]]
 ; FNATTR: define dso_local double* @ptr_scc_r2(double* readnone %a, double* readnone %b, double* readnone %r) [[NoInlineNoUnwindReadnoneUwtable]]
 ;
-; ATTRIBUTOR: define dso_local double* @ptr_sink_r0(double* returned "no-capture-maybe-returned" %r) [[NoInlineNoRecurseNoUnwindReadnoneUwtable]]
-; ATTRIBUTOR: define dso_local double* @ptr_scc_r1(double* %a, double* returned %r, double* nocapture %b) [[NoInlineNoUnwindReadnoneUwtable]]
+; ATTRIBUTOR: define dso_local double* @ptr_sink_r0(double* readnone returned "no-capture-maybe-returned" %r) [[NoInlineNoRecurseNoUnwindReadnoneUwtable]]
+; ATTRIBUTOR: define dso_local double* @ptr_scc_r1(double* %a, double* returned %r, double* nocapture readnone %b) [[NoInlineNoUnwindReadnoneUwtable]]
 ; ATTRIBUTOR: define dso_local double* @ptr_scc_r2(double* %a, double* %b, double* retur

[PATCH] D60076: [Attributor] Deduce memory behavior function attributes

2019-04-01 Thread Johannes Doerfert via Phabricator via cfe-commits
jdoerfert created this revision.
jdoerfert added reviewers: homerdin, hfinkel, fedor.sergeev, sanjoy, spatel, 
nlopes, nicholas, reames.
Herald added projects: clang, LLVM.
Herald added a subscriber: cfe-commits.
jdoerfert added a parent revision: D59980: [Attributor] Deduce memory behavior 
argument attributes.
jdoerfert added a child revision: D60077: [Attributor] Deduce memory location 
function attributes.

Deduce the memory behavior, aka "read-none", "read-only", or
"write-only", for functions. This also improves argument deduction
(D59980 ) because it can rely on the function 
memory behavior which
is derived.

Impact on the statistics (-stats) for LLVM-TS + Spec2006:

  CHANGED: attributor   NumAttributesManifested
77683 ->  87205 (   +12.258%)
  CHANGED: attributor   NumAttributesValidFixpoint
109073 -> 118598 (+8.733%)
  CHANGED: attributor   NumFnArgumentReadOnly  
16532 ->  16755 (+1.349%)
ADDED: attributor   NumFnReadNone   
 n/a ->   2930
ADDED: attributor   NumFnReadOnly   
 n/a ->   4380
ADDED: attributor   NumFnWriteOnly  
 n/a ->   1960
  CHANGED: functionattrsNumReadNone 
3095 ->165 (   -94.669%)
  CHANGED: functionattrsNumReadNoneArg  
 216 ->144 (   -33.333%)
  CHANGED: functionattrsNumReadOnly 
4363 ->134 (   -96.929%)
  CHANGED: functionattrsNumReadOnlyArg  
1072 ->945 (   -11.847%)
  CHANGED: functionattrsNumWriteOnly
2012 -> 52 (   -97.416%)

Note: The deduction will improve with later patches that introduce new

  functionality we can utilize. Also, some are a result of a bug, see:
  http://llvm.org/PR41328


Repository:
  rG LLVM Github Monorepo

https://reviews.llvm.org/D60076

Files:
  clang/test/CodeGenOpenCL/as_type.cl
  llvm/include/llvm/Transforms/IPO/Attributor.h
  llvm/lib/Transforms/IPO/Attributor.cpp
  llvm/test/Transforms/FunctionAttrs/arg_returned.ll

Index: llvm/test/Transforms/FunctionAttrs/arg_returned.ll
===
--- llvm/test/Transforms/FunctionAttrs/arg_returned.ll
+++ llvm/test/Transforms/FunctionAttrs/arg_returned.ll
@@ -356,7 +356,7 @@
 ; }
 ;
 ; FNATTR: define dso_local double @select_and_phi(double %b) [[NoInlineNoRecurseNoUnwindReadnoneUwtable]]
-; ATTRIBUTOR: define dso_local double @select_and_phi(double returned %b) [[NoInlineNoRecurseNoUnwindUwtable:#[0-9]*]]
+; ATTRIBUTOR: define dso_local double @select_and_phi(double returned %b) [[NoInlineNoRecurseNoUnwindReadnoneUwtable]]
 ;
 define dso_local double @select_and_phi(double %b) #0 {
 entry:
@@ -384,7 +384,7 @@
 ; }
 ;
 ; FNATTR: define dso_local double @recursion_select_and_phi(i32 %a, double %b) [[NoInlineNoUnwindReadnoneUwtable]]
-; ATTRIBUTOR: define dso_local double @recursion_select_and_phi(i32 %a, double returned %b) [[NoInlineNoUnwindUwtable]]
+; ATTRIBUTOR: define dso_local double @recursion_select_and_phi(i32 %a, double returned %b) [[NoInlineNoUnwindReadnoneUwtable]]
 ;
 define dso_local double @recursion_select_and_phi(i32 %a, double %b) #0 {
 entry:
@@ -411,7 +411,7 @@
 ; }
 ;
 ; FNATTR: define dso_local double* @bitcast(i32* readnone %b) [[NoInlineNoRecurseNoUnwindReadnoneUwtable]]
-; ATTRIBUTOR: define dso_local double* @bitcast(i32* readnone returned "no-capture-maybe-returned" %b) [[NoInlineNoRecurseNoUnwindUwtable]]
+; ATTRIBUTOR: define dso_local double* @bitcast(i32* readnone returned "no-capture-maybe-returned" %b) [[NoInlineNoRecurseNoUnwindReadnoneUwtable]]
 ; BOTH:   define dso_local double* @bitcast(i32* readnone returned "no-capture-maybe-returned" %b) [[NoInlineNoRecurseNoUnwindReadnoneUwtable]]
 ;
 define dso_local double* @bitcast(i32* %b) #0 {
@@ -431,7 +431,7 @@
 ; }
 ;
 ; FNATTR: define dso_local double* @bitcasts_select_and_phi(i32* readnone %b) [[NoInlineNoRecurseNoUnwindReadnoneUwtable]]
-; ATTRIBUTOR: define dso_local double* @bitcasts_select_and_phi(i32* readnone returned "no-capture-maybe-returned" %b) [[NoInlineNoRecurseNoUnwindUwtable]]
+; ATTRIBUTOR: define dso_local double* @bitcasts_select_and_phi(i32* readnone returned "no-capture-maybe-returned" %b) [[NoInlineNoRecurseNoUnwindReadnoneUwtable]]
 ; BOTH:   define dso_local double* @bitcasts_select_and_phi(i32* readnone returned "no-capture-maybe-returned" %b) [[NoInlineNoRecurseNoUnwindReadnoneUwtable]]
 ;
 define dso_local double* @bitcasts_select_and_phi(i32* %b) #0 {
@@ -466,7 +466,7 @@
 ; }
 ;
 ; FNATTR: define dso_local double* @ret_arg_arg_undef(i32* readnone %b) [[NoInlineNoRecurseNoUnw

[PATCH] D59919: [Attributor] Deduce "returned" argument attribute

2019-04-15 Thread Johannes Doerfert via Phabricator via cfe-commits
jdoerfert marked an inline comment as done.
jdoerfert added inline comments.



Comment at: llvm/lib/Transforms/IPO/Attributor.cpp:495
+  virtual size_t getNumReturnValues() const override {
+return isValidState() ? ReturnedValues.size() : -1;
+  }

This should probably call `llvm_unreachable` with a message instead of 
returning -1.


Repository:
  rG LLVM Github Monorepo

CHANGES SINCE LAST ACTION
  https://reviews.llvm.org/D59919/new/

https://reviews.llvm.org/D59919



___
cfe-commits mailing list
cfe-commits@lists.llvm.org
https://lists.llvm.org/cgi-bin/mailman/listinfo/cfe-commits


[PATCH] D60907: [OpenMP][WIP] Add math functions support in OpenMP offloading

2019-04-19 Thread Johannes Doerfert via Phabricator via cfe-commits
jdoerfert added inline comments.



Comment at: include/clang/Driver/ToolChain.h:575
 
+  /// Add arguments to use system-specific CUDA includes.
+  virtual void AddMathDeviceFunctions(const llvm::opt::ArgList &DriverArgs,

Copy & Past comment



Comment at: lib/Headers/__clang_openmp_math.h:5
+
+#ifdef __NVPTX__
+#pragma omp declare target

Why is this NVPTX specific?


Repository:
  rC Clang

CHANGES SINCE LAST ACTION
  https://reviews.llvm.org/D60907/new/

https://reviews.llvm.org/D60907



___
cfe-commits mailing list
cfe-commits@lists.llvm.org
https://lists.llvm.org/cgi-bin/mailman/listinfo/cfe-commits


[PATCH] D60907: [OpenMP][WIP] Add math functions support in OpenMP offloading

2019-04-19 Thread Johannes Doerfert via Phabricator via cfe-commits
jdoerfert added a comment.

To follow up on my comment why this is NVPTX specific:

Is there a reason why this has to happen in the Cuda ToolChain part?
I would have assumed us to add the declarations similar to the ones provided in 
`__clang_openmp_math.h` whenever we may compile for a target.
So, if we have any OpenMP target related code in the TU, we add the header 
`__clang_openmp_target_math.h` which defines "common" math functions as you did 
in `__clang_openmp_math.h` (without the NVPTX guard). The runtime will then 
implement `__kmpc_` as it sees fit.


Repository:
  rC Clang

CHANGES SINCE LAST ACTION
  https://reviews.llvm.org/D60907/new/

https://reviews.llvm.org/D60907



___
cfe-commits mailing list
cfe-commits@lists.llvm.org
https://lists.llvm.org/cgi-bin/mailman/listinfo/cfe-commits


[PATCH] D59418: [OpenMP][Offloading] Extract common functionality

2019-04-22 Thread Johannes Doerfert via Phabricator via cfe-commits
jdoerfert added a comment.

ping.


Repository:
  rG LLVM Github Monorepo

CHANGES SINCE LAST ACTION
  https://reviews.llvm.org/D59418/new/

https://reviews.llvm.org/D59418



___
cfe-commits mailing list
cfe-commits@lists.llvm.org
https://lists.llvm.org/cgi-bin/mailman/listinfo/cfe-commits


[PATCH] D59420: [NFC][OpenMP] Move runtime function generation to the target codegen

2019-04-22 Thread Johannes Doerfert via Phabricator via cfe-commits
jdoerfert added a comment.

ping.


Repository:
  rG LLVM Github Monorepo

CHANGES SINCE LAST ACTION
  https://reviews.llvm.org/D59420/new/

https://reviews.llvm.org/D59420



___
cfe-commits mailing list
cfe-commits@lists.llvm.org
https://lists.llvm.org/cgi-bin/mailman/listinfo/cfe-commits


[PATCH] D59424: [OpenMP][NVPTX] Replace void** buffer by byte-wise buffer

2019-04-22 Thread Johannes Doerfert via Phabricator via cfe-commits
jdoerfert added a comment.

ping.


Repository:
  rG LLVM Github Monorepo

CHANGES SINCE LAST ACTION
  https://reviews.llvm.org/D59424/new/

https://reviews.llvm.org/D59424



___
cfe-commits mailing list
cfe-commits@lists.llvm.org
https://lists.llvm.org/cgi-bin/mailman/listinfo/cfe-commits


[PATCH] D60907: [OpenMP] Add math functions support in OpenMP offloading

2019-04-29 Thread Johannes Doerfert via Phabricator via cfe-commits
jdoerfert added inline comments.



Comment at: lib/Driver/ToolChains/Clang.cpp:1159
+  getToolChain().getTriple().isNVPTX())
+getToolChain().AddMathDeviceFunctions(Args, CmdArgs);
+

Here is another "NVPTX" specialization that I don't think we need. At least 
with more targets we need to relax this condition.



Comment at: lib/Headers/__clang_openmp_math.h:13
+
+#ifdef __NVPTX__
+#pragma omp declare target

Why is this NVPTX specific (again)?


Repository:
  rC Clang

CHANGES SINCE LAST ACTION
  https://reviews.llvm.org/D60907/new/

https://reviews.llvm.org/D60907



___
cfe-commits mailing list
cfe-commits@lists.llvm.org
https://lists.llvm.org/cgi-bin/mailman/listinfo/cfe-commits


[PATCH] D47394: [OpenMP][Clang][NVPTX] Replace bundling with partial linking for the OpenMP NVPTX device offloading toolchain

2019-04-29 Thread Johannes Doerfert via Phabricator via cfe-commits
jdoerfert added subscribers: xtian, gregrodgers, ddibyend.
jdoerfert added a comment.
Herald added a subscriber: ormris.

Could you sketch for me how this will (potentially) work if we have multiple 
target vendors? The fatbin solution seems tailored to NVIDIA, but maybe I'm 
wrong here.

In any case, we need to make progress on this front and if this solution is 
compatible with other vendors we should get it in asap.

@xtian, @gregrodgers, @ddibyend please take a look or have someone take a look 
and comment.




Comment at: lib/Driver/Driver.cpp:3972
   bool BuildingForOffloadDevice = TargetDeviceOffloadKind != Action::OFK_None;
+
   if (const OffloadAction *OA = dyn_cast(A)) {

unrelated



Comment at: lib/Driver/ToolChains/Clang.cpp:6117
+CmdArgs.push_back(TCArgs.MakeArgString(Inputs[I].getFilename()));
+  }
+

In "core-LLVM" we usually avoid these braces.



Comment at: lib/Driver/ToolChains/Cuda.cpp:401
+  const char *CubinF = Args.MakeArgString(TC.getInputFilename(Output));
+  CmdArgs.push_back(CubinF);
   for (const auto& II : Inputs)

It might not be worth it to save CubinF here but create it 120 lines later 
instead



Comment at: lib/Driver/ToolChains/Cuda.cpp:547
+const char *CompilerExec =
+Args.MakeArgString(TC.GetProgramPath("clang++"));
+C.addCommand(llvm::make_unique(

You cannot hardcode clang++, it could be C code and we don't want to cause 
interoperability problems and/or the warnings that will inevitably follow.



Comment at: lib/Driver/ToolChains/Cuda.cpp:661
+  if (C.canSkipOffloadBundler())
+Args.AddAllArgs(CmdArgs, options::OPT_L);
+

Could you add a comment here please?



Comment at: lib/Driver/ToolChains/Cuda.cpp:687
+CmdArgs.push_back(C.getArgs().MakeArgString(llvm::Twine("-l") +
+  II.getInputArg().getValue()));
   continue;

By comparing this code with the one after the `if (... endwith(".a"))` it seems 
this treated a bit differently than a static library below. I mention it only 
because of the comment above.


Repository:
  rC Clang

CHANGES SINCE LAST ACTION
  https://reviews.llvm.org/D47394/new/

https://reviews.llvm.org/D47394



___
cfe-commits mailing list
cfe-commits@lists.llvm.org
https://lists.llvm.org/cgi-bin/mailman/listinfo/cfe-commits


[PATCH] D47849: [OpenMP][Clang][NVPTX] Enable math functions called in an OpenMP NVPTX target device region to be resolved as device-native function calls

2019-04-29 Thread Johannes Doerfert via Phabricator via cfe-commits
jdoerfert added a comment.

In D47849#1435770 , @hfinkel wrote:

> We need to make progress on this, and I'd like to suggest a path forward...
>
> First, we have a fundamental problem here: Using host headers to declare 
> functions for the device execution environment isn't sound. Those host 
> headers can do anything, and while some platforms might provide a way to make 
> the host headers more friendly (e.g., by defining __NO_MATH_INLINES), these 
> mechanisms are neither robust nor portable. Thus, we should not rely on host 
> headers to define functions that might be available on the device. However, 
> even when compiling for the device, code meant only for host execution must 
> be semantically analyzable. This, in general, requires the host headers. So 
> we have a situation in which we must both use the host headers during device 
> compilation (to keep the semantic analysis of the surrounding host code 
> working) and also can't use the host headers to provide definitions for use 
> for device code (e.g., because those host headers might provide definitions 
> relying on host inline asm, intrinsics, using types not lowerable in device 
> code, could provide declarations using linkage-affecting attributes not 
> lowerable for the device, etc.).
>
> This is, or is very similar to, the problem that the host/device overloading 
> addresses in CUDA. It is also the problem, or very similar to the problem, 
> that the new OpenMP 5 `declare variant` directive is intended to address. 
> Johannes and I discussed this earlier today, and I suggest that we:
>
> 1. Add a math.h wrapper to clang/lib/Headers, which generally just does an 
> include_next of math.h, but provides us with the ability to customize this 
> behavior. Writing a header for OpenMP on NVIDIA GPUs which is essentially 
> identical to the math.h functions in __clang_cuda_device_functions.h would be 
> unfortunate, and as CUDA does provide the underlying execution environment 
> for OpenMP target offload on NVIDIA GPUs, duplicative even in principle. We 
> don't need to alter the default global namespace, however, but can include 
> this file from the wrapper math.h.


I imagine this to look sth along the lines of:

  // File: clang/lib/Headers/math.h
  
  #ifdef CUDA
#include "CUDA_INCLUDE_DIR/cuda_math.h"
  #elifdef ...
...
  #endif
  
  #include_next "math.h"

So a clang internal `math.h` wrapper which, depending on the target, includes 
all "math.h" headers in the right order.
The overload resolution should pick the right version even if there are 
multiple declared.


Repository:
  rC Clang

CHANGES SINCE LAST ACTION
  https://reviews.llvm.org/D47849/new/

https://reviews.llvm.org/D47849



___
cfe-commits mailing list
cfe-commits@lists.llvm.org
https://lists.llvm.org/cgi-bin/mailman/listinfo/cfe-commits


[PATCH] D60907: [OpenMP] Add math functions support in OpenMP offloading

2019-04-29 Thread Johannes Doerfert via Phabricator via cfe-commits
jdoerfert added a comment.

In D60907#1483615 , @hfinkel wrote:

> In D60907#1479370 , @gtbercea wrote:
>
> > In D60907#1479142 , @hfinkel wrote:
> >
> > > In D60907#1479118 , @gtbercea 
> > > wrote:
> > >
> > > > Ping @hfinkel @tra
> > >
> > >
> > > The last two comments in D47849  
> > > indicated exploration of a different approach, and one which still seems 
> > > superior to this one. Can you please comment on why you're now pursuing 
> > > this approach instead?
> >
> >
> > ...
> >
> > Hal, as far as I can tell, this solution is similar to yours but with a 
> > slightly different implementation. If there are particular aspects about 
> > this patch you would like to discuss/give feedback on please let me know.
>
>
> The solution I suggested had the advantages of:
>
> 1. Being able to directly reuse the code in 
> `__clang_cuda_device_functions.h`. On the other hand, using this solution we 
> need to implement a wrapper function for every math function. When 
> `__clang_cuda_device_functions.h` is updated, we need to update the OpenMP 
> wrapper as well.


I'd even go as far as to argue that `__clang_cuda_device_functions.h` should 
include the internal math.h wrapper to get all math functions. See also the 
next comment.

> 2. Providing access to wrappers for other CUDA intrinsics in a natural way 
> (e.g., rnorm3d) [it looks a bit nicer to provide a host version of rnorm3d 
> than __nv_rnorm3d in user code].

@hfinkel 
I don't see why you want to mix CUDA intrinsics with math.h overloads. I added 
a rough outline of how I imagined the internal math.h header to look like as a 
comment in D47849 . Could you elaborate how 
that differs from what you imagine and how the other intrinsics come in?

> 3. Being similar to the "declare variant" functionality from OpenMP 5, and 
> thus, I suspect, closer to the solution we'll eventually be able to apply in 
> a standard way to all targets.

I can see this.

>> This solution is following Alexey's suggestions. This solution allows the 
>> optimization of math calls if they apply (example: pow(x,2) => x*x ) which 
>> was one of the issues in the previous solution I implemented.
> 
> So we're also missing that optimization for CUDA code when compiling with 
> Clang? Isn't this also something that, regardless, should be fixed?

Maybe through a general built-in recognition and lowering into target specific 
implementations/intrinsics late again?


Repository:
  rC Clang

CHANGES SINCE LAST ACTION
  https://reviews.llvm.org/D60907/new/

https://reviews.llvm.org/D60907



___
cfe-commits mailing list
cfe-commits@lists.llvm.org
https://lists.llvm.org/cgi-bin/mailman/listinfo/cfe-commits


[PATCH] D60907: [OpenMP] Add math functions support in OpenMP offloading

2019-04-30 Thread Johannes Doerfert via Phabricator via cfe-commits
jdoerfert added a comment.

In D60907#1484529 , @hfinkel wrote:

> In D60907#1483660 , @jdoerfert wrote:
>
> > In D60907#1483615 , @hfinkel wrote:
> >
> > > In D60907#1479370 , @gtbercea 
> > > wrote:
> > >
> > > > In D60907#1479142 , @hfinkel 
> > > > wrote:
> > > >
> > > > > In D60907#1479118 , 
> > > > > @gtbercea wrote:
> > > > >
> > > > > > Ping @hfinkel @tra
> > > > >
> > > > >
> > > > > The last two comments in D47849  
> > > > > indicated exploration of a different approach, and one which still 
> > > > > seems superior to this one. Can you please comment on why you're now 
> > > > > pursuing this approach instead?
> > > >
> > > >
> > > > ...
> > > >
> > > > Hal, as far as I can tell, this solution is similar to yours but with a 
> > > > slightly different implementation. If there are particular aspects 
> > > > about this patch you would like to discuss/give feedback on please let 
> > > > me know.
> > >
> > >
> > > The solution I suggested had the advantages of:
> > >
> > > 1. Being able to directly reuse the code in 
> > > `__clang_cuda_device_functions.h`. On the other hand, using this solution 
> > > we need to implement a wrapper function for every math function. When 
> > > `__clang_cuda_device_functions.h` is updated, we need to update the 
> > > OpenMP wrapper as well.
> >
> >
> > I'd even go as far as to argue that `__clang_cuda_device_functions.h` 
> > should include the internal math.h wrapper to get all math functions. See 
> > also the next comment.
> >
> > > 2. Providing access to wrappers for other CUDA intrinsics in a natural 
> > > way (e.g., rnorm3d) [it looks a bit nicer to provide a host version of 
> > > rnorm3d than __nv_rnorm3d in user code].
> >
> > @hfinkel 
> >  I don't see why you want to mix CUDA intrinsics with math.h overloads.
>
>
> What I had in mind was matching non-standard functions in a standard way. For 
> example, let's just say that I have a CUDA kernel that uses the rnorm3d 
> function, or I otherwise have a function that I'd like to write in OpenMP 
> that will make good use of this CUDA function (because it happens to have an 
> efficient device implementation). This is a function that CUDA provides, in 
> the global namespace, although it's not standard.
>
> Then I can do something like this (depending on how we setup the 
> implementation):
>
>   double rnorm3d(double a,  double b, double c) {
> return sqrt(a*a + b*b + c*c);
>   }
>   
>   ...
>   
>   #pragma omp target
>   {
> double a = ..., b = ..., c = ...;
> double r = rnorm3d(a, b, c)
>   }
>   
>
> and, if we use the CUDA math headers for CUDA math-function support, than 
> this might "just work." To be clear, I can see an argument for having this 
> work being a bad idea ;) -- but it has the advantage of providing a way to 
> take advantage of system-specific functions while still writing 
> completely-portable code.


Matching `rnorm3d` and replacing it with some nvvm "intrinsic" is something I 
wouldn't like to see happening if `math.h` was included and not if it was not. 
As you say, in Cuda that is not how it works either. I'm in favor of reusing 
the built-in recognition mechanism:
That is, if the target is nvptx, the name is rnorm3d, we match that name and 
use the appropriate intrinsic, as we do others already for other targets.

>>   I added a rough outline of how I imagined the internal math.h header to 
>> look like as a comment in D47849. Could you elaborate how that differs from 
>> what you imagine and how the other intrinsics come in?
> 
> That looks like what I had in mind (including 
> `__clang_cuda_device_functions.h` to get the device functions.)
> 
>> 
>> 
>>> 3. Being similar to the "declare variant" functionality from OpenMP 5, and 
>>> thus, I suspect, closer to the solution we'll eventually be able to apply 
>>> in a standard way to all targets.
>> 
>> I can see this.
>> 
 This solution is following Alexey's suggestions. This solution allows the 
 optimization of math calls if they apply (example: pow(x,2) => x*x ) which 
 was one of the issues in the previous solution I implemented.
>>> 
>>> So we're also missing that optimization for CUDA code when compiling with 
>>> Clang? Isn't this also something that, regardless, should be fixed?
>> 
>> Maybe through a general built-in recognition and lowering into target 
>> specific implementations/intrinsics late again?
> 
> I suspect that we need to match the intrinsics and perform the optimizations 
> in LLVM at that level in order to get the optimizations for CUDA.

That seems reasonable to me. We could also match other intrinsics, e.g., 
`rnorm3d`, here as well, both by name but also by the computation pattern.

In D60907#148464

[PATCH] D61399: [OpenMP][Clang] Support for target math functions

2019-05-01 Thread Johannes Doerfert via Phabricator via cfe-commits
jdoerfert added a comment.

For the record, this is an implementation of the scheme proposed in 
https://reviews.llvm.org/D60907#1484756.
There are drawbacks, see the TODO, but it will give most people a short term 
solution until we get OpenMP 5.0 variants.

Finally, there is a remote chance this will cause trouble to people that use 
math.h/cmath functions, e.g. with the old SSE hack, which are not available 
anymore.
I don't suspect that to happen but if it does we can, again as a short term 
solution, selectively extract declarations from the host cmath into the device 
cmath.


Repository:
  rC Clang

CHANGES SINCE LAST ACTION
  https://reviews.llvm.org/D61399/new/

https://reviews.llvm.org/D61399



___
cfe-commits mailing list
cfe-commits@lists.llvm.org
https://lists.llvm.org/cgi-bin/mailman/listinfo/cfe-commits


[PATCH] D61399: [OpenMP][Clang] Support for target math functions

2019-05-01 Thread Johannes Doerfert via Phabricator via cfe-commits
jdoerfert added inline comments.



Comment at: lib/Headers/openmp_wrappers/__clang_openmp_math.h:29
+
+#define __forceinline__ __attribute__((always_inline))
+

I think this is a leftover we forgot to remove.


Repository:
  rC Clang

CHANGES SINCE LAST ACTION
  https://reviews.llvm.org/D61399/new/

https://reviews.llvm.org/D61399



___
cfe-commits mailing list
cfe-commits@lists.llvm.org
https://lists.llvm.org/cgi-bin/mailman/listinfo/cfe-commits


[PATCH] D61399: [OpenMP][Clang] Support for target math functions

2019-05-04 Thread Johannes Doerfert via Phabricator via cfe-commits
jdoerfert added a comment.

Alexey, is this is good to go now?


Repository:
  rC Clang

CHANGES SINCE LAST ACTION
  https://reviews.llvm.org/D61399/new/

https://reviews.llvm.org/D61399



___
cfe-commits mailing list
cfe-commits@lists.llvm.org
https://lists.llvm.org/cgi-bin/mailman/listinfo/cfe-commits


[PATCH] D52118: [Loopinfo] Remove one latch case in getLoopID. NFC.

2018-09-15 Thread Johannes Doerfert via Phabricator via cfe-commits
jdoerfert accepted this revision.
jdoerfert added a comment.
This revision is now accepted and ready to land.

> save an iteration over the loop's basic blocks (which is what getLoopLatch 
> does)

I'm not sure this is true. getLoopLatch() in LoopInfoImpl.h 
only traverses the children of the header in the inverse graph.
That should, I think, be similar to predecessors(Header) in case
of the IR CFG.

That being said, the patch makes sense to me and it is a simple,
straightforward improvement. I don't see any downsides and
it simplifies the code.

LGTM


Repository:
  rC Clang

https://reviews.llvm.org/D52118



___
cfe-commits mailing list
cfe-commits@lists.llvm.org
http://lists.llvm.org/cgi-bin/mailman/listinfo/cfe-commits


[PATCH] D58091: Customize warnings for missing built-in type

2019-02-11 Thread Johannes Doerfert via Phabricator via cfe-commits
jdoerfert created this revision.
jdoerfert added reviewers: jyknight, lebedev.ri, aaron.ballman, bcain.
Herald added subscribers: jfb, bollu, krytarowski, emaste.
Herald added a project: clang.

If we detect a built-in declaration for which we cannot derive a type
matching the pattern in the Builtins.def file, we currently emit a
warning that the respective header is needed. However, this is not
necessarily the behavior we want as it has no connection to the location
of the declaration (which can actually be in the header in question).
Instead, this warning is generated

- if we could not build the type for the pattern on file (for some reason). 
Here we should make the reason explicit. The actual problem is otherwise 
circumvented as the warning is misleading, see [0] for an example.
- if we could not build the type for the pattern because we do not have a type 
on record, possible since D55483 , we should 
not emit any warning. See [1] for a legitimate problem.

This patch address both cases. For the "setjmp" family a new warning is
introduced and for built-ins without type on record, so far
"pthread_create", we do not emit the warning anymore.

Also see: PR40692

[0] https://lkml.org/lkml/2019/1/11/718
[1] https://bugs.freebsd.org/bugzilla/show_bug.cgi?id=235583


Repository:
  rG LLVM Github Monorepo

https://reviews.llvm.org/D58091

Files:
  clang/include/clang/Basic/DiagnosticSemaKinds.td
  clang/lib/Sema/SemaDecl.cpp
  clang/test/Analysis/retain-release.m
  clang/test/Sema/builtin-setjmp.c
  clang/test/Sema/implicit-builtin-decl.c

Index: clang/test/Sema/implicit-builtin-decl.c
===
--- clang/test/Sema/implicit-builtin-decl.c
+++ clang/test/Sema/implicit-builtin-decl.c
@@ -55,14 +55,17 @@
 
 void snprintf() { }
 
-// PR8316
-void longjmp(); // expected-warning{{declaration of built-in function 'longjmp' requires inclusion of the header }}
+// PR8316 & PR40692
+void longjmp(); // expected-warning{{declaration of built-in 'longjmp' requires the definition of the 'jmp_buf' type}}
 
 extern float fmaxf(float, float);
 
 struct __jmp_buf_tag {};
-void sigsetjmp(struct __jmp_buf_tag[1], int); // expected-warning{{declaration of built-in function 'sigsetjmp' requires inclusion of the header }}
+void sigsetjmp(struct __jmp_buf_tag[1], int); // expected-warning{{declaration of built-in 'sigsetjmp' requires the definition of the 'jmp_buf' type}}
 
 // CHECK: FunctionDecl {{.*}}  col:6 sigsetjmp '
 // CHECK-NOT: FunctionDecl
 // CHECK: ReturnsTwiceAttr {{.*}} <{{.*}}> Implicit
+
+// PR40692
+void pthread_create(); // no warning expected
Index: clang/test/Sema/builtin-setjmp.c
===
--- /dev/null
+++ clang/test/Sema/builtin-setjmp.c
@@ -0,0 +1,10 @@
+// RUN: %clang_cc1 -triple x86_64-unknown-unknown -fsyntax-only -verify=no_jmp_buf -DNO_JMP_BUF %s
+// RUN: %clang_cc1 -triple x86_64-unknown-unknown -fsyntax-only -verify=redecl %s
+
+#ifdef NO_JMP_BUF
+extern long setjmp(long *);   // no_jmp_buf-warning {{declaration of built-in 'setjmp' requires the definition of the 'jmp_buf' type}}
+#else
+typedef long jmp_buf;
+extern int setjmp(char);  // redecl-warning@8 {{incompatible redeclaration of library function 'setjmp'}}
+  // redecl-note@8{{'setjmp' is a builtin with type 'int (jmp_buf)' (aka 'int (long)')}}
+#endif
Index: clang/test/Analysis/retain-release.m
===
--- clang/test/Analysis/retain-release.m
+++ clang/test/Analysis/retain-release.m
@@ -2,7 +2,7 @@
 // RUN: %clang_analyze_cc1 -triple x86_64-apple-darwin10\
 // RUN: -analyzer-checker=core,osx.coreFoundation.CFRetainRelease\
 // RUN: -analyzer-checker=osx.cocoa.ClassRelease,osx.cocoa.RetainCount\
-// RUN: -analyzer-checker=debug.ExprInspection -fblocks -verify=expected,C %s\
+// RUN: -analyzer-checker=debug.ExprInspection -fblocks -verify %s\
 // RUN: -Wno-objc-root-class -analyzer-output=plist -o %t.objc.plist
 // RUN: %clang_analyze_cc1 -triple x86_64-apple-darwin10\
 // RUN: -analyzer-checker=core,osx.coreFoundation.CFRetainRelease\
@@ -1231,7 +1231,7 @@
 typedef unsigned long __darwin_pthread_key_t;
 typedef __darwin_pthread_key_t pthread_key_t;
 
-int pthread_create(pthread_t *, const pthread_attr_t *,  // C-warning{{declaration of built-in function 'pthread_create' requires inclusion of the header }}
+int pthread_create(pthread_t *, const pthread_attr_t *,
void *(*)(void *), void *);
 
 int pthread_setspecific(pthread_key_t key, const void *value);
Index: clang/lib/Sema/SemaDecl.cpp
===
--- clang/lib/Sema/SemaDecl.cpp
+++ clang/lib/Sema/SemaDecl.cpp
@@ -1955,10 +1955,27 @@
   ASTContext::GetBuiltinTypeError Error;
   QualType R = Context.GetBuiltinType(ID, Error);
   if (Error) {
- 

[PATCH] D58091: Customize warnings for missing built-in type

2019-02-13 Thread Johannes Doerfert via Phabricator via cfe-commits
jdoerfert added a comment.

In D58091#1396382 , @aaron.ballman 
wrote:

> If I'm following along properly, it sounds like we want to disable this 
> warning largely because it can appear in header files attempting to declare 
> the functions in question.


That is the situation that exposed the problem, yes.

> - but I wonder why those diagnostics are happening in the first place. It 
> seems like the warning is still useful when it triggers outside of that 
> situation, no?

The underlying conceptual problem, which I didn't know when I added 
`GE_Missing_type`, is that this has _nothing_ to do with the location of the 
declaration. We say, include the header X.h, if we were not able to build a 
type for recognized built-in Y that should be declared in X.h. However, we 
should report _why_ we could not build the type instead. For built-ins we do 
not have a type on record (`GE_Missing_type`), this is always, so no warning 
for now. For the ones that we only fail to build a type because some 
requirement is missing, we should report that, at least when we are in the 
respective header. I don't have a perfect solution of what to do actually.

I could check if the declaration is (probably) in the respective header so we 
can switch between warnings?


Repository:
  rG LLVM Github Monorepo

CHANGES SINCE LAST ACTION
  https://reviews.llvm.org/D58091/new/

https://reviews.llvm.org/D58091



___
cfe-commits mailing list
cfe-commits@lists.llvm.org
https://lists.llvm.org/cgi-bin/mailman/listinfo/cfe-commits


[PATCH] D58091: Customize warnings for missing built-in type

2019-02-13 Thread Johannes Doerfert via Phabricator via cfe-commits
jdoerfert added a comment.

In D58091#1396393 , @bcain wrote:

> I reported PR40692.  I just tried this patch on our local build where we saw 
> the failure on an RTOS implementing pthreads.  Unfortunately with this patch 
> I encountered an (unrelated) assertion.  So this fix was inconclusive for me 
> (for now).  I will follow up but if this fix makes sense then you don't need 
> to wait for my test results.


Are you sure it is unrelated?


Repository:
  rG LLVM Github Monorepo

CHANGES SINCE LAST ACTION
  https://reviews.llvm.org/D58091/new/

https://reviews.llvm.org/D58091



___
cfe-commits mailing list
cfe-commits@lists.llvm.org
https://lists.llvm.org/cgi-bin/mailman/listinfo/cfe-commits


[PATCH] D58091: Customize warnings for missing built-in type

2019-02-13 Thread Johannes Doerfert via Phabricator via cfe-commits
jdoerfert added a comment.

In D58091#1396437 , @aaron.ballman 
wrote:

> In D58091#1396397 , @jdoerfert wrote:
>
> > In D58091#1396382 , @aaron.ballman 
> > wrote:
> >
> > > - but I wonder why those diagnostics are happening in the first place. It 
> > > seems like the warning is still useful when it triggers outside of that 
> > > situation, no?
> >
> >
> > The underlying conceptual problem, which I didn't know when I added 
> > `GE_Missing_type`, is that this has _nothing_ to do with the location of 
> > the declaration. We say, include the header X.h, if we were not able to 
> > build a type for recognized built-in Y that should be declared in X.h. 
> > However, we should report _why_ we could not build the type instead. For 
> > built-ins we do not have a type on record (`GE_Missing_type`), this is 
> > always, so no warning for now. For the ones that we only fail to build a 
> > type because some requirement is missing, we should report that, at least 
> > when we are in the respective header. I don't have a perfect solution of 
> > what to do actually.
> >
> > I could check if the declaration is (probably) in the respective header so 
> > we can switch between warnings?
>
>
> That's kind of what I was wondering, but I deal with builtins so infrequently 
> that my expectations may be wrong here. If a user declares a builtin with a 
> conflicting type outside of a header file, that seems like we'd want to warn 
> the user about right? But this seems to remove that warning, at least in the 
> case of test/Sema/implicit-builtin-decl.c:71. Or do I misunderstand the 
> situation causing the warning to trigger?


After this, we should still warn for all builtins for which we have an expected 
type on record.

I added the `clang/test/Sema/builtin-setjmp.c` test to check for this 
situation. Here, `setjmp` is declared outside of the header (but it actually 
doesn't matter as I mentioned in the above comment). If you declare it without 
defining `jmp_buf` first, that is what the kernel ppl did, you will get a 
warning that states `jmp_buf` is unknown and we require it for the declaration 
of `setjmp/longjmp/...` (line 5). If you define `jmp_buf` and then declare 
`setjmp` with a conflicting type, that is not `T setjmp(jmp_buf)`, you will see 
the incompatible redeclaration warning (line 8). Does that make sense?


Repository:
  rG LLVM Github Monorepo

CHANGES SINCE LAST ACTION
  https://reviews.llvm.org/D58091/new/

https://reviews.llvm.org/D58091



___
cfe-commits mailing list
cfe-commits@lists.llvm.org
https://lists.llvm.org/cgi-bin/mailman/listinfo/cfe-commits


[PATCH] D58091: Customize warnings for missing built-in type

2019-02-25 Thread Johannes Doerfert via Phabricator via cfe-commits
jdoerfert updated this revision to Diff 188238.
jdoerfert marked 3 inline comments as done.
jdoerfert added a comment.

Address comments


Repository:
  rG LLVM Github Monorepo

CHANGES SINCE LAST ACTION
  https://reviews.llvm.org/D58091/new/

https://reviews.llvm.org/D58091

Files:
  clang/include/clang/Basic/DiagnosticSemaKinds.td
  clang/lib/Sema/SemaDecl.cpp
  clang/test/Analysis/retain-release.m
  clang/test/Sema/builtin-setjmp.c
  clang/test/Sema/implicit-builtin-decl.c

Index: clang/test/Sema/implicit-builtin-decl.c
===
--- clang/test/Sema/implicit-builtin-decl.c
+++ clang/test/Sema/implicit-builtin-decl.c
@@ -55,14 +55,17 @@
 
 void snprintf() { }
 
-// PR8316
-void longjmp(); // expected-warning{{declaration of built-in function 'longjmp' requires inclusion of the header }}
+// PR8316 & PR40692
+void longjmp(); // expected-warning{{declaration of built-in function 'longjmp' requires the definition of the 'jmp_buf' type, commonly proived in the header .}}
 
 extern float fmaxf(float, float);
 
 struct __jmp_buf_tag {};
-void sigsetjmp(struct __jmp_buf_tag[1], int); // expected-warning{{declaration of built-in function 'sigsetjmp' requires inclusion of the header }}
+void sigsetjmp(struct __jmp_buf_tag[1], int); // expected-warning{{declaration of built-in function 'sigsetjmp' requires the declaration of the 'jmp_buf' type, commonly proived in the header .}}
 
 // CHECK: FunctionDecl {{.*}}  col:6 sigsetjmp '
 // CHECK-NOT: FunctionDecl
 // CHECK: ReturnsTwiceAttr {{.*}} <{{.*}}> Implicit
+
+// PR40692
+void pthread_create(); // no warning expected
Index: clang/test/Sema/builtin-setjmp.c
===
--- /dev/null
+++ clang/test/Sema/builtin-setjmp.c
@@ -0,0 +1,10 @@
+// RUN: %clang_cc1 -triple x86_64-unknown-unknown -fsyntax-only -verify -DNO_JMP_BUF %s
+// RUN: %clang_cc1 -triple x86_64-unknown-unknown -fsyntax-only -verify %s
+
+#ifdef NO_JMP_BUF
+extern long setjmp(long *);   // expected-warning {{declaration of built-in function 'setjmp' requires the declaration of the 'jmp_buf' type, commonly proived in the header .}}
+#else
+typedef long jmp_buf;
+extern int setjmp(char);  // expected-warning@8 {{incompatible redeclaration of library function 'setjmp'}}
+  // expected-note@8{{'setjmp' is a builtin with type 'int (jmp_buf)' (aka 'int (long)')}}
+#endif
Index: clang/test/Analysis/retain-release.m
===
--- clang/test/Analysis/retain-release.m
+++ clang/test/Analysis/retain-release.m
@@ -2,7 +2,7 @@
 // RUN: %clang_analyze_cc1 -triple x86_64-apple-darwin10\
 // RUN: -analyzer-checker=core,osx.coreFoundation.CFRetainRelease\
 // RUN: -analyzer-checker=osx.cocoa.ClassRelease,osx.cocoa.RetainCount\
-// RUN: -analyzer-checker=debug.ExprInspection -fblocks -verify=expected,C %s\
+// RUN: -analyzer-checker=debug.ExprInspection -fblocks -verify %s\
 // RUN: -Wno-objc-root-class -analyzer-output=plist -o %t.objc.plist
 // RUN: %clang_analyze_cc1 -triple x86_64-apple-darwin10\
 // RUN: -analyzer-checker=core,osx.coreFoundation.CFRetainRelease\
@@ -1231,7 +1231,7 @@
 typedef unsigned long __darwin_pthread_key_t;
 typedef __darwin_pthread_key_t pthread_key_t;
 
-int pthread_create(pthread_t *, const pthread_attr_t *,  // C-warning{{declaration of built-in function 'pthread_create' requires inclusion of the header }}
+int pthread_create(pthread_t *, const pthread_attr_t *,
void *(*)(void *), void *);
 
 int pthread_setspecific(pthread_key_t key, const void *value);
Index: clang/lib/Sema/SemaDecl.cpp
===
--- clang/lib/Sema/SemaDecl.cpp
+++ clang/lib/Sema/SemaDecl.cpp
@@ -1955,10 +1955,27 @@
   ASTContext::GetBuiltinTypeError Error;
   QualType R = Context.GetBuiltinType(ID, Error);
   if (Error) {
-if (ForRedeclaration)
-  Diag(Loc, diag::warn_implicit_decl_requires_sysheader)
-  << getHeaderName(Context.BuiltinInfo, ID, Error)
+if (!ForRedeclaration)
+  return nullptr;
+
+// If we have a builtin without an associated type we should not emit a
+// warning when we were not able to find a type for it.
+if (Error == ASTContext::GE_Missing_type)
+  return nullptr;
+
+// If we could not find a type for setjmp it is because the jmp_buf type was
+// not defined prior to the setjmp declaration.
+if (Error == ASTContext::GE_Missing_setjmp) {
+  Diag(Loc, diag::warn_implicit_decl_no_jmp_buf)
   << Context.BuiltinInfo.getName(ID);
+  return nullptr;
+}
+
+// Generally, we emit a warning that the declaration requires the
+// appropriate header.
+Diag(Loc, diag::warn_implicit_decl_requires_sysheader)
+<< getHeaderName(Context.BuiltinInfo, ID, Error)
+<< Context.BuiltinInfo.getName(ID);
 retur

[PATCH] D58091: Customize warnings for missing built-in type

2019-02-25 Thread Johannes Doerfert via Phabricator via cfe-commits
jdoerfert added a comment.

I did address the comments but I will wait until I hear back on the "warning vs 
warning + note question".

In D58091#1397586 , @jyknight wrote:

> I think this warning (-Wbuiltin-requires-header) doesn't really make sense as 
> its own warning.
>
> [...]
>
> I think for a declaration, if we cannot construct the appropriate type, we 
> should be treating all declarations as an incompatible redeclaration, and 
> explain why in an attached note, like:
>
>   warning: incompatible redeclaration of library function 'exit' 
> [-Wincompatible-library-redeclaration]
>   note: missing declaration of type 'jmp_buf' for argument 1 of standard 
> function signature.
>
>
> For a usage, we could emit something like:
>
>   warning: implicit declaration of library function 'setjmp' 
> [-Wimplicit-function-declaration]
>   note: missing declaration of type 'jmp_buf' for argument 1.
>   note: include the header  or explicitly provide a declaration for 
> 'setjmp'
>


I do not have strong feelings about this, either way is fine with me. However, 
I lack the 
clang expertise to make such a change happen anytime soon which makes this patch
(with actual fix for the warning on pthread_create) my prefered first step.




Comment at: clang/lib/Sema/SemaDecl.cpp:1971
   << Context.BuiltinInfo.getName(ID);
+  return nullptr;
+}

rsmith wrote:
> It'd be nice to produce `note_include_header_or_declare` here. (Ideally, that 
> note should be suppressed if we're transitively in a header with the right 
> name already, but I think it'll be clear enough what's wrong even if we 
> produce the note unconditionally.)
I did add the "include the header" part in the warning now. Does that make 
sense and address your issue or do you think we should have a separate note?


Repository:
  rG LLVM Github Monorepo

CHANGES SINCE LAST ACTION
  https://reviews.llvm.org/D58091/new/

https://reviews.llvm.org/D58091



___
cfe-commits mailing list
cfe-commits@lists.llvm.org
https://lists.llvm.org/cgi-bin/mailman/listinfo/cfe-commits


[PATCH] D60583: [AArch64] Implement Vector Funtion ABI name mangling.

2019-06-04 Thread Johannes Doerfert via Phabricator via cfe-commits
jdoerfert reopened this revision.
jdoerfert added a comment.
This revision is now accepted and ready to land.

Why/Where did we decide to clobber the attribute list with "non-existent 
function names"?

This seems to me like an ad-hoc implementation of the RFC that is currently 
discussed but committed before the discussion is finished.


Repository:
  rC Clang

CHANGES SINCE LAST ACTION
  https://reviews.llvm.org/D60583/new/

https://reviews.llvm.org/D60583



___
cfe-commits mailing list
cfe-commits@lists.llvm.org
https://lists.llvm.org/cgi-bin/mailman/listinfo/cfe-commits


[PATCH] D60583: [AArch64] Implement Vector Funtion ABI name mangling.

2019-06-04 Thread Johannes Doerfert via Phabricator via cfe-commits
jdoerfert added a comment.

In D60583#1529882 , @ABataev wrote:

> In D60583#1529878 , @jdoerfert wrote:
>
> > Why/Where did we decide to clobber the attribute list with "non-existent 
> > function names"?
> >
> > This seems to me like an ad-hoc implementation of the RFC that is currently 
> > discussed but committed before the discussion is finished.
>
>
> It has nothing to do with the RFC for a variant. It is a standard interface 
> to communicate with the backend to generate vectorized versions of the 
> functions. It relies on Vector ABI, provided by Intel and ARM, it follows the 
> way it is implemented in GCC. There was an RFC for this long time ago which 
> was accepted by the community and later implemented.


The RFC states, in a nutshell, let us add one attribute to identify all vector 
variants. This patch adds all vector variants as attributes. Clearly, these 
things are related.


Repository:
  rC Clang

CHANGES SINCE LAST ACTION
  https://reviews.llvm.org/D60583/new/

https://reviews.llvm.org/D60583



___
cfe-commits mailing list
cfe-commits@lists.llvm.org
https://lists.llvm.org/cgi-bin/mailman/listinfo/cfe-commits


[PATCH] D60583: [AArch64] Implement Vector Funtion ABI name mangling.

2019-06-04 Thread Johannes Doerfert via Phabricator via cfe-commits
jdoerfert added a comment.

In D60583#1529937 , @fpetrogalli wrote:

> In D60583#1529878 , @jdoerfert wrote:
>
> > Why/Where did we decide to clobber the attribute list with "non-existent 
> > function names"?
>
>
> The existence of those symbols is guaranteed by the "contract" stipulated via 
> the Vector Function ABI. They cannot be added explicitly by the front-end as 
> `define`s because they would be removed before reaching the vectorizer.


That is not a good argument. Afaik, there are multiple ways designed to keep 
symbols alive, e.g., `@llvm.used`.

>> I don't think an attribute list like this:
>>  `attributes #1 = { "_ZGVsM2v_foo" "_ZGVsM32v_foo" "_ZGVsM4v_foo" 
>> "_ZGVsM6v_foo" "_ZGVsM8v_foo" "_ZGVsMxv_foo" ... `
>>  is helpful in any way, e.g., this would require us to search through all 
>> attributes and interpret them one by one.
> 
> Agree. This is what was agreed : 
> http://lists.llvm.org/pipermail/cfe-dev/2016-March/047732.html
> 
> The new RFC will get rid of this list of string attributes. It will become 
> something like:
> 
>   attribute #0 = { 
> declare-variant="comma,separated,list,of,vector,function,ABI,mangled,names" }.
> 
> 
> 
> 
>> This seems to me like an ad-hoc implementation of the RFC that is currently 
>> discussed but committed before the discussion is finished.
> 
> I can assure you that's not the case.
> 
> The code in this patch is what it is because it is based on previous 
> (accepted) RFC originally proposed by other people and used by VecClone: 
> https://reviews.llvm.org/D22792
> 
> As you can see in the unit tests of the VecClone pass, the variant attribute 
> is added as follows:
> 
>   attributes #0 = { nounwind uwtable 
> "vector-variants"="_ZGVbM4_foo1,_ZGVbN4_foo1,_ZGVcM8_foo1,_ZGVcN8_foo1,_ZGVdM8_foo1,_ZGVdN8_foo1,_ZGVeM16_foo1,_ZGVeN16_foo1"
>    }

I get that it was discussed three years ago and I get that it was accepted then.
My confusing stems from the fact that it was committed just now, three years 
later, but shortly before the new RFC basically proposed a different encoding.

> Nothing in LLVM is using those attributes at the moment, that might be the 
> reason why the string attribute have not yet been moved to a single attribute.

That means we can easily change the encoding, and I strongly believe we should.

Given that you want a different encoding, I don't know if I have to list 
reasons why I dislike this one (direct names as attributes). Though, I can do 
so if we want to discuss it.


Repository:
  rC Clang

CHANGES SINCE LAST ACTION
  https://reviews.llvm.org/D60583/new/

https://reviews.llvm.org/D60583



___
cfe-commits mailing list
cfe-commits@lists.llvm.org
https://lists.llvm.org/cgi-bin/mailman/listinfo/cfe-commits


[PATCH] D59919: [Attributor] Deduce "returned" argument attribute

2019-06-09 Thread Johannes Doerfert via Phabricator via cfe-commits
jdoerfert updated this revision to Diff 203741.
jdoerfert added a comment.

Update to new Attributor design and more tests


Repository:
  rG LLVM Github Monorepo

CHANGES SINCE LAST ACTION
  https://reviews.llvm.org/D59919/new/

https://reviews.llvm.org/D59919

Files:
  clang/test/CodeGenOpenCL/as_type.cl
  llvm/include/llvm/Transforms/IPO/Attributor.h
  llvm/lib/Transforms/IPO/Attributor.cpp
  llvm/test/Transforms/FunctionAttrs/arg_nocapture.ll
  llvm/test/Transforms/FunctionAttrs/arg_returned.ll
  llvm/test/Transforms/FunctionAttrs/read_write_returned_arguments_scc.ll

Index: llvm/test/Transforms/FunctionAttrs/read_write_returned_arguments_scc.ll
===
--- llvm/test/Transforms/FunctionAttrs/read_write_returned_arguments_scc.ll
+++ llvm/test/Transforms/FunctionAttrs/read_write_returned_arguments_scc.ll
@@ -31,7 +31,7 @@
 target datalayout = "e-m:e-i64:64-f80:128-n8:16:32:64-S128"
 
 ; CHECK: Function Attrs: nounwind
-; CHECK-NEXT: define i32* @external_ret2_nrw(i32* %n0, i32* %r0, i32* %w0)
+; CHECK-NEXT: define i32* @external_ret2_nrw(i32* %n0, i32* %r0, i32* returned %w0)
 define i32* @external_ret2_nrw(i32* %n0, i32* %r0, i32* %w0) {
 entry:
   %call = call i32* @internal_ret0_nw(i32* %n0, i32* %w0)
@@ -42,7 +42,7 @@
 }
 
 ; CHECK: Function Attrs: nounwind
-; CHECK-NEXT: define internal i32* @internal_ret0_nw(i32* %n0, i32* %w0)
+; CHECK-NEXT: define internal i32* @internal_ret0_nw(i32* returned %n0, i32* %w0)
 define internal i32* @internal_ret0_nw(i32* %n0, i32* %w0) {
 entry:
   %r0 = alloca i32, align 4
@@ -71,7 +71,7 @@
 }
 
 ; CHECK: Function Attrs: nounwind
-; CHECK-NEXT: define internal i32* @internal_ret1_rrw(i32* %r0, i32* %r1, i32* %w0)
+; CHECK-NEXT: define internal i32* @internal_ret1_rrw(i32* %r0, i32* returned %r1, i32* %w0)
 define internal i32* @internal_ret1_rrw(i32* %r0, i32* %r1, i32* %w0) {
 entry:
   %0 = load i32, i32* %r0, align 4
@@ -122,7 +122,7 @@
 }
 
 ; CHECK: Function Attrs: nounwind
-; CHECK-NEXT: define internal i32* @internal_ret1_rw(i32* %r0, i32* %w0)
+; CHECK-NEXT: define internal i32* @internal_ret1_rw(i32* %r0, i32* returned %w0)
 define internal i32* @internal_ret1_rw(i32* %r0, i32* %w0) {
 entry:
   %0 = load i32, i32* %r0, align 4
@@ -148,7 +148,7 @@
 }
 
 ; CHECK: Function Attrs: nounwind
-; CHECK-NEXT: define i32* @external_source_ret2_nrw(i32* %n0, i32* %r0, i32* %w0)
+; CHECK-NEXT: define i32* @external_source_ret2_nrw(i32* %n0, i32* %r0, i32* returned %w0)
 define i32* @external_source_ret2_nrw(i32* %n0, i32* %r0, i32* %w0) {
 entry:
   %call = call i32* @external_sink_ret2_nrw(i32* %n0, i32* %r0, i32* %w0)
Index: llvm/test/Transforms/FunctionAttrs/arg_returned.ll
===
--- llvm/test/Transforms/FunctionAttrs/arg_returned.ll
+++ llvm/test/Transforms/FunctionAttrs/arg_returned.ll
@@ -1,5 +1,6 @@
-; RUN: opt -functionattrs -attributor -attributor-disable=false -S < %s | FileCheck %s
-; RUN: opt -functionattrs -attributor -attributor-disable=false -attributor-verify=true -S < %s | FileCheck %s
+; RUN: opt -functionattrs -S < %s | FileCheck %s --check-prefix=FNATTR
+; RUN: opt -attributor -attributor-disable=false -S < %s | FileCheck %s --check-prefix=ATTRIBUTOR
+; RUN: opt -attributor -attributor-disable=false -functionattrs -S < %s | FileCheck %s --check-prefix=BOTH
 ;
 ; Test cases specifically designed for the "returned" argument attribute.
 ; We use FIXME's to indicate problems and missing attributes.
@@ -7,16 +8,24 @@
 
 ; TEST SCC test returning an integer value argument
 ;
-; CHECK: Function Attrs: noinline norecurse nounwind readnone uwtable
-; CHECK: define i32 @sink_r0(i32 returned %r)
-;
-; FIXME: returned on %r missing:
-; CHECK: Function Attrs: noinline nounwind readnone uwtable
-; CHECK: define i32 @scc_r1(i32 %a, i32 %r, i32 %b)
-;
-; FIXME: returned on %r missing:
-; CHECK: Function Attrs: noinline nounwind readnone uwtable
-; CHECK: define i32 @scc_r2(i32 %a, i32 %b, i32 %r)
+; BOTH: Function Attrs: noinline norecurse nounwind readnone uwtable
+; BOTH: define i32 @sink_r0(i32 returned %r)
+; BOTH: Function Attrs: noinline nounwind readnone uwtable
+; BOTH: define i32 @scc_r1(i32 %a, i32 returned %r, i32 %b)
+; BOTH: Function Attrs: noinline nounwind readnone uwtable
+; BOTH: define i32 @scc_r2(i32 %a, i32 %b, i32 returned %r)
+; BOTH: Function Attrs: noinline nounwind readnone uwtable
+; BOTH: define i32 @scc_rX(i32 %a, i32 %b, i32 %r)
+;
+; FNATTR: define i32 @sink_r0(i32 returned %r)
+; FNATTR: define i32 @scc_r1(i32 %a, i32 %r, i32 %b)
+; FNATTR: define i32 @scc_r2(i32 %a, i32 %b, i32 %r)
+; FNATTR: define i32 @scc_rX(i32 %a, i32 %b, i32 %r)
+;
+; ATTRIBUTOR: define i32 @sink_r0(i32 returned %r)
+; ATTRIBUTOR: define i32 @scc_r1(i32 %a, i32 returned %r, i32 %b)
+; ATTRIBUTOR: define i32 @scc_r2(i32 %a, i32 %b, i32 returned %r)
+; ATTRIBUTOR: define i32 @scc_rX(i32 %a, i32 %b, i32 %r)
 ;
 ; int scc_r1(int a, int b, int 

[PATCH] D59919: [Attributor] Deduce "returned" argument attribute

2019-06-09 Thread Johannes Doerfert via Phabricator via cfe-commits
jdoerfert updated this revision to Diff 203742.
jdoerfert added a comment.

Cleanup leftover arguments


Repository:
  rG LLVM Github Monorepo

CHANGES SINCE LAST ACTION
  https://reviews.llvm.org/D59919/new/

https://reviews.llvm.org/D59919

Files:
  clang/test/CodeGenOpenCL/as_type.cl
  llvm/include/llvm/Transforms/IPO/Attributor.h
  llvm/lib/Transforms/IPO/Attributor.cpp
  llvm/test/Transforms/FunctionAttrs/arg_nocapture.ll
  llvm/test/Transforms/FunctionAttrs/arg_returned.ll
  llvm/test/Transforms/FunctionAttrs/read_write_returned_arguments_scc.ll

Index: llvm/test/Transforms/FunctionAttrs/read_write_returned_arguments_scc.ll
===
--- llvm/test/Transforms/FunctionAttrs/read_write_returned_arguments_scc.ll
+++ llvm/test/Transforms/FunctionAttrs/read_write_returned_arguments_scc.ll
@@ -31,7 +31,7 @@
 target datalayout = "e-m:e-i64:64-f80:128-n8:16:32:64-S128"
 
 ; CHECK: Function Attrs: nounwind
-; CHECK-NEXT: define i32* @external_ret2_nrw(i32* %n0, i32* %r0, i32* %w0)
+; CHECK-NEXT: define i32* @external_ret2_nrw(i32* %n0, i32* %r0, i32* returned %w0)
 define i32* @external_ret2_nrw(i32* %n0, i32* %r0, i32* %w0) {
 entry:
   %call = call i32* @internal_ret0_nw(i32* %n0, i32* %w0)
@@ -42,7 +42,7 @@
 }
 
 ; CHECK: Function Attrs: nounwind
-; CHECK-NEXT: define internal i32* @internal_ret0_nw(i32* %n0, i32* %w0)
+; CHECK-NEXT: define internal i32* @internal_ret0_nw(i32* returned %n0, i32* %w0)
 define internal i32* @internal_ret0_nw(i32* %n0, i32* %w0) {
 entry:
   %r0 = alloca i32, align 4
@@ -71,7 +71,7 @@
 }
 
 ; CHECK: Function Attrs: nounwind
-; CHECK-NEXT: define internal i32* @internal_ret1_rrw(i32* %r0, i32* %r1, i32* %w0)
+; CHECK-NEXT: define internal i32* @internal_ret1_rrw(i32* %r0, i32* returned %r1, i32* %w0)
 define internal i32* @internal_ret1_rrw(i32* %r0, i32* %r1, i32* %w0) {
 entry:
   %0 = load i32, i32* %r0, align 4
@@ -122,7 +122,7 @@
 }
 
 ; CHECK: Function Attrs: nounwind
-; CHECK-NEXT: define internal i32* @internal_ret1_rw(i32* %r0, i32* %w0)
+; CHECK-NEXT: define internal i32* @internal_ret1_rw(i32* %r0, i32* returned %w0)
 define internal i32* @internal_ret1_rw(i32* %r0, i32* %w0) {
 entry:
   %0 = load i32, i32* %r0, align 4
@@ -148,7 +148,7 @@
 }
 
 ; CHECK: Function Attrs: nounwind
-; CHECK-NEXT: define i32* @external_source_ret2_nrw(i32* %n0, i32* %r0, i32* %w0)
+; CHECK-NEXT: define i32* @external_source_ret2_nrw(i32* %n0, i32* %r0, i32* returned %w0)
 define i32* @external_source_ret2_nrw(i32* %n0, i32* %r0, i32* %w0) {
 entry:
   %call = call i32* @external_sink_ret2_nrw(i32* %n0, i32* %r0, i32* %w0)
Index: llvm/test/Transforms/FunctionAttrs/arg_returned.ll
===
--- llvm/test/Transforms/FunctionAttrs/arg_returned.ll
+++ llvm/test/Transforms/FunctionAttrs/arg_returned.ll
@@ -1,5 +1,6 @@
-; RUN: opt -functionattrs -attributor -attributor-disable=false -S < %s | FileCheck %s
-; RUN: opt -functionattrs -attributor -attributor-disable=false -attributor-verify=true -S < %s | FileCheck %s
+; RUN: opt -functionattrs -S < %s | FileCheck %s --check-prefix=FNATTR
+; RUN: opt -attributor -attributor-disable=false -S < %s | FileCheck %s --check-prefix=ATTRIBUTOR
+; RUN: opt -attributor -attributor-disable=false -functionattrs -S < %s | FileCheck %s --check-prefix=BOTH
 ;
 ; Test cases specifically designed for the "returned" argument attribute.
 ; We use FIXME's to indicate problems and missing attributes.
@@ -7,16 +8,24 @@
 
 ; TEST SCC test returning an integer value argument
 ;
-; CHECK: Function Attrs: noinline norecurse nounwind readnone uwtable
-; CHECK: define i32 @sink_r0(i32 returned %r)
-;
-; FIXME: returned on %r missing:
-; CHECK: Function Attrs: noinline nounwind readnone uwtable
-; CHECK: define i32 @scc_r1(i32 %a, i32 %r, i32 %b)
-;
-; FIXME: returned on %r missing:
-; CHECK: Function Attrs: noinline nounwind readnone uwtable
-; CHECK: define i32 @scc_r2(i32 %a, i32 %b, i32 %r)
+; BOTH: Function Attrs: noinline norecurse nounwind readnone uwtable
+; BOTH: define i32 @sink_r0(i32 returned %r)
+; BOTH: Function Attrs: noinline nounwind readnone uwtable
+; BOTH: define i32 @scc_r1(i32 %a, i32 returned %r, i32 %b)
+; BOTH: Function Attrs: noinline nounwind readnone uwtable
+; BOTH: define i32 @scc_r2(i32 %a, i32 %b, i32 returned %r)
+; BOTH: Function Attrs: noinline nounwind readnone uwtable
+; BOTH: define i32 @scc_rX(i32 %a, i32 %b, i32 %r)
+;
+; FNATTR: define i32 @sink_r0(i32 returned %r)
+; FNATTR: define i32 @scc_r1(i32 %a, i32 %r, i32 %b)
+; FNATTR: define i32 @scc_r2(i32 %a, i32 %b, i32 %r)
+; FNATTR: define i32 @scc_rX(i32 %a, i32 %b, i32 %r)
+;
+; ATTRIBUTOR: define i32 @sink_r0(i32 returned %r)
+; ATTRIBUTOR: define i32 @scc_r1(i32 %a, i32 returned %r, i32 %b)
+; ATTRIBUTOR: define i32 @scc_r2(i32 %a, i32 %b, i32 returned %r)
+; ATTRIBUTOR: define i32 @scc_rX(i32 %a, i32 %b, i32 %r)
 ;
 ; int scc_r1(int a, int b, int r);
 ; int scc_r2(in

[PATCH] D59919: [Attributor] Deduce "returned" argument attribute

2019-06-09 Thread Johannes Doerfert via Phabricator via cfe-commits
jdoerfert marked 2 inline comments as done.
jdoerfert added a comment.

Thanks for looking at this. I'll update the patch asap

In D59919#1535643 , @nicholas wrote:

> CHANGED: build-libcalls   NumNoUnwind 
> 4526 ->   3382 (   -25.276%)
>
> Why did the number of nounwinds drop?


I haven't looked into this but my initial guess would be that we removed code 
due to the "returned" information for which we then did not add nounwind. To be 
honest, I don't see how else it should have happened.




Comment at: llvm/lib/Transforms/IPO/Attributor.cpp:127
+if (Arg.hasReturnedAttr())
+  return gernericValueTraversal(CS.getArgOperand(Arg.getArgNo()), 
State,
+FollowValueCB, VisitValueCB);

nicholas wrote:
> LLVM generally has a preference for not recursing like this, it means that 
> the amount of stack space we need depends on the input IR and it's hard for a 
> user of llvm as a library to foresee or handle an out of stack condition.
> 
> Common practice is to structure it as a loop like:
> ```
> SmallVector Worklist;
> SmallSet Visited;
> Worklist.push_back(V);
> do {
>   Value *V = Worklist.pop_back_val();
>   if (!Visited.insert(V).second)
> continue;
>   V = V->stripPointerCasts();
>   // ...
> } while (!Worklist.empty());
> ```
> 
> Also, consider having some sort of loop iteration limit as a safety value 
> against runaway compile time.
Though there is not really stack space needed I see your point. I'll rewrite 
the recursion.




Comment at: llvm/lib/Transforms/IPO/Attributor.cpp:133
+  // recursion keep a record of the values we followed!
+  if (!FollowValueCB(V, State))
+return;

nicholas wrote:
> Offhand, I think placing this after the CS check is incorrect. I haven't 
> tried it out, but I expect the testcase that triggers infinite loop to look 
> something like this:
> 
> ```
> define i32 @test(i32 %A) {
> entry:
>   ret i32 0
> unreachableblock:
>   %B = call i32 @test(i32 %B)
>   ret i32 %B
> }
> ```
> 
> which should pass the verifier and trigger an infinite loop if you call 
> gernericValueTraversal on %B.
> 
> Also, if you really need a callback and not just a SmallSet named Visited, 
> I'd suggest calling the callback immediately before adding each value to the 
> Worklist (or as written not, call it on each value before recursing).
The test cases above passes just fine but again I see your point. I will add 
that one and the one below which breaks as you predicted. I'll rewrite the 
whole traversal.

```
declare i32 @test2(i32 returned %A);
define i32 @test(i32 %A) {
entry:
  ret i32 %A
unreachableblock:
  %B = call i32 @test2(i32 %B)
  ret i32 %B
}
```


Repository:
  rG LLVM Github Monorepo

CHANGES SINCE LAST ACTION
  https://reviews.llvm.org/D59919/new/

https://reviews.llvm.org/D59919



___
cfe-commits mailing list
cfe-commits@lists.llvm.org
https://lists.llvm.org/cgi-bin/mailman/listinfo/cfe-commits


[PATCH] D59919: [Attributor] Deduce "returned" argument attribute

2019-06-10 Thread Johannes Doerfert via Phabricator via cfe-commits
jdoerfert updated this revision to Diff 203818.
jdoerfert added a comment.

Use worklist instead of recursion, add tests


Repository:
  rG LLVM Github Monorepo

CHANGES SINCE LAST ACTION
  https://reviews.llvm.org/D59919/new/

https://reviews.llvm.org/D59919

Files:
  clang/test/CodeGenOpenCL/as_type.cl
  llvm/include/llvm/Transforms/IPO/Attributor.h
  llvm/lib/Transforms/IPO/Attributor.cpp
  llvm/test/Transforms/FunctionAttrs/arg_nocapture.ll
  llvm/test/Transforms/FunctionAttrs/arg_returned.ll
  llvm/test/Transforms/FunctionAttrs/read_write_returned_arguments_scc.ll

Index: llvm/test/Transforms/FunctionAttrs/read_write_returned_arguments_scc.ll
===
--- llvm/test/Transforms/FunctionAttrs/read_write_returned_arguments_scc.ll
+++ llvm/test/Transforms/FunctionAttrs/read_write_returned_arguments_scc.ll
@@ -31,7 +31,7 @@
 target datalayout = "e-m:e-i64:64-f80:128-n8:16:32:64-S128"
 
 ; CHECK: Function Attrs: nounwind
-; CHECK-NEXT: define i32* @external_ret2_nrw(i32* %n0, i32* %r0, i32* %w0)
+; CHECK-NEXT: define i32* @external_ret2_nrw(i32* %n0, i32* %r0, i32* returned %w0)
 define i32* @external_ret2_nrw(i32* %n0, i32* %r0, i32* %w0) {
 entry:
   %call = call i32* @internal_ret0_nw(i32* %n0, i32* %w0)
@@ -42,7 +42,7 @@
 }
 
 ; CHECK: Function Attrs: nounwind
-; CHECK-NEXT: define internal i32* @internal_ret0_nw(i32* %n0, i32* %w0)
+; CHECK-NEXT: define internal i32* @internal_ret0_nw(i32* returned %n0, i32* %w0)
 define internal i32* @internal_ret0_nw(i32* %n0, i32* %w0) {
 entry:
   %r0 = alloca i32, align 4
@@ -71,7 +71,7 @@
 }
 
 ; CHECK: Function Attrs: nounwind
-; CHECK-NEXT: define internal i32* @internal_ret1_rrw(i32* %r0, i32* %r1, i32* %w0)
+; CHECK-NEXT: define internal i32* @internal_ret1_rrw(i32* %r0, i32* returned %r1, i32* %w0)
 define internal i32* @internal_ret1_rrw(i32* %r0, i32* %r1, i32* %w0) {
 entry:
   %0 = load i32, i32* %r0, align 4
@@ -122,7 +122,7 @@
 }
 
 ; CHECK: Function Attrs: nounwind
-; CHECK-NEXT: define internal i32* @internal_ret1_rw(i32* %r0, i32* %w0)
+; CHECK-NEXT: define internal i32* @internal_ret1_rw(i32* %r0, i32* returned %w0)
 define internal i32* @internal_ret1_rw(i32* %r0, i32* %w0) {
 entry:
   %0 = load i32, i32* %r0, align 4
@@ -148,7 +148,7 @@
 }
 
 ; CHECK: Function Attrs: nounwind
-; CHECK-NEXT: define i32* @external_source_ret2_nrw(i32* %n0, i32* %r0, i32* %w0)
+; CHECK-NEXT: define i32* @external_source_ret2_nrw(i32* %n0, i32* %r0, i32* returned %w0)
 define i32* @external_source_ret2_nrw(i32* %n0, i32* %r0, i32* %w0) {
 entry:
   %call = call i32* @external_sink_ret2_nrw(i32* %n0, i32* %r0, i32* %w0)
Index: llvm/test/Transforms/FunctionAttrs/arg_returned.ll
===
--- llvm/test/Transforms/FunctionAttrs/arg_returned.ll
+++ llvm/test/Transforms/FunctionAttrs/arg_returned.ll
@@ -1,5 +1,6 @@
-; RUN: opt -functionattrs -attributor -attributor-disable=false -S < %s | FileCheck %s
-; RUN: opt -functionattrs -attributor -attributor-disable=false -attributor-verify=true -S < %s | FileCheck %s
+; RUN: opt -functionattrs -S < %s | FileCheck %s --check-prefix=FNATTR
+; RUN: opt -attributor -attributor-disable=false -S < %s | FileCheck %s --check-prefix=ATTRIBUTOR
+; RUN: opt -attributor -attributor-disable=false -functionattrs -S < %s | FileCheck %s --check-prefix=BOTH
 ;
 ; Test cases specifically designed for the "returned" argument attribute.
 ; We use FIXME's to indicate problems and missing attributes.
@@ -7,16 +8,24 @@
 
 ; TEST SCC test returning an integer value argument
 ;
-; CHECK: Function Attrs: noinline norecurse nounwind readnone uwtable
-; CHECK: define i32 @sink_r0(i32 returned %r)
-;
-; FIXME: returned on %r missing:
-; CHECK: Function Attrs: noinline nounwind readnone uwtable
-; CHECK: define i32 @scc_r1(i32 %a, i32 %r, i32 %b)
-;
-; FIXME: returned on %r missing:
-; CHECK: Function Attrs: noinline nounwind readnone uwtable
-; CHECK: define i32 @scc_r2(i32 %a, i32 %b, i32 %r)
+; BOTH: Function Attrs: noinline norecurse nounwind readnone uwtable
+; BOTH: define i32 @sink_r0(i32 returned %r)
+; BOTH: Function Attrs: noinline nounwind readnone uwtable
+; BOTH: define i32 @scc_r1(i32 %a, i32 returned %r, i32 %b)
+; BOTH: Function Attrs: noinline nounwind readnone uwtable
+; BOTH: define i32 @scc_r2(i32 %a, i32 %b, i32 returned %r)
+; BOTH: Function Attrs: noinline nounwind readnone uwtable
+; BOTH: define i32 @scc_rX(i32 %a, i32 %b, i32 %r)
+;
+; FNATTR: define i32 @sink_r0(i32 returned %r)
+; FNATTR: define i32 @scc_r1(i32 %a, i32 %r, i32 %b)
+; FNATTR: define i32 @scc_r2(i32 %a, i32 %b, i32 %r)
+; FNATTR: define i32 @scc_rX(i32 %a, i32 %b, i32 %r)
+;
+; ATTRIBUTOR: define i32 @sink_r0(i32 returned %r)
+; ATTRIBUTOR: define i32 @scc_r1(i32 %a, i32 returned %r, i32 %b)
+; ATTRIBUTOR: define i32 @scc_r2(i32 %a, i32 %b, i32 returned %r)
+; ATTRIBUTOR: define i32 @scc_rX(i32 %a, i32 %b, i32 %r)
 ;
 ; int scc_r1(int a, int b, int r)

[PATCH] D59919: [Attributor] Deduce "returned" argument attribute

2019-06-10 Thread Johannes Doerfert via Phabricator via cfe-commits
jdoerfert updated this revision to Diff 203819.
jdoerfert marked 2 inline comments as done.
jdoerfert added a comment.

Fix Typo


Repository:
  rG LLVM Github Monorepo

CHANGES SINCE LAST ACTION
  https://reviews.llvm.org/D59919/new/

https://reviews.llvm.org/D59919

Files:
  clang/test/CodeGenOpenCL/as_type.cl
  llvm/include/llvm/Transforms/IPO/Attributor.h
  llvm/lib/Transforms/IPO/Attributor.cpp
  llvm/test/Transforms/FunctionAttrs/arg_nocapture.ll
  llvm/test/Transforms/FunctionAttrs/arg_returned.ll
  llvm/test/Transforms/FunctionAttrs/read_write_returned_arguments_scc.ll

Index: llvm/test/Transforms/FunctionAttrs/read_write_returned_arguments_scc.ll
===
--- llvm/test/Transforms/FunctionAttrs/read_write_returned_arguments_scc.ll
+++ llvm/test/Transforms/FunctionAttrs/read_write_returned_arguments_scc.ll
@@ -31,7 +31,7 @@
 target datalayout = "e-m:e-i64:64-f80:128-n8:16:32:64-S128"
 
 ; CHECK: Function Attrs: nounwind
-; CHECK-NEXT: define i32* @external_ret2_nrw(i32* %n0, i32* %r0, i32* %w0)
+; CHECK-NEXT: define i32* @external_ret2_nrw(i32* %n0, i32* %r0, i32* returned %w0)
 define i32* @external_ret2_nrw(i32* %n0, i32* %r0, i32* %w0) {
 entry:
   %call = call i32* @internal_ret0_nw(i32* %n0, i32* %w0)
@@ -42,7 +42,7 @@
 }
 
 ; CHECK: Function Attrs: nounwind
-; CHECK-NEXT: define internal i32* @internal_ret0_nw(i32* %n0, i32* %w0)
+; CHECK-NEXT: define internal i32* @internal_ret0_nw(i32* returned %n0, i32* %w0)
 define internal i32* @internal_ret0_nw(i32* %n0, i32* %w0) {
 entry:
   %r0 = alloca i32, align 4
@@ -71,7 +71,7 @@
 }
 
 ; CHECK: Function Attrs: nounwind
-; CHECK-NEXT: define internal i32* @internal_ret1_rrw(i32* %r0, i32* %r1, i32* %w0)
+; CHECK-NEXT: define internal i32* @internal_ret1_rrw(i32* %r0, i32* returned %r1, i32* %w0)
 define internal i32* @internal_ret1_rrw(i32* %r0, i32* %r1, i32* %w0) {
 entry:
   %0 = load i32, i32* %r0, align 4
@@ -122,7 +122,7 @@
 }
 
 ; CHECK: Function Attrs: nounwind
-; CHECK-NEXT: define internal i32* @internal_ret1_rw(i32* %r0, i32* %w0)
+; CHECK-NEXT: define internal i32* @internal_ret1_rw(i32* %r0, i32* returned %w0)
 define internal i32* @internal_ret1_rw(i32* %r0, i32* %w0) {
 entry:
   %0 = load i32, i32* %r0, align 4
@@ -148,7 +148,7 @@
 }
 
 ; CHECK: Function Attrs: nounwind
-; CHECK-NEXT: define i32* @external_source_ret2_nrw(i32* %n0, i32* %r0, i32* %w0)
+; CHECK-NEXT: define i32* @external_source_ret2_nrw(i32* %n0, i32* %r0, i32* returned %w0)
 define i32* @external_source_ret2_nrw(i32* %n0, i32* %r0, i32* %w0) {
 entry:
   %call = call i32* @external_sink_ret2_nrw(i32* %n0, i32* %r0, i32* %w0)
Index: llvm/test/Transforms/FunctionAttrs/arg_returned.ll
===
--- llvm/test/Transforms/FunctionAttrs/arg_returned.ll
+++ llvm/test/Transforms/FunctionAttrs/arg_returned.ll
@@ -1,5 +1,6 @@
-; RUN: opt -functionattrs -attributor -attributor-disable=false -S < %s | FileCheck %s
-; RUN: opt -functionattrs -attributor -attributor-disable=false -attributor-verify=true -S < %s | FileCheck %s
+; RUN: opt -functionattrs -S < %s | FileCheck %s --check-prefix=FNATTR
+; RUN: opt -attributor -attributor-disable=false -S < %s | FileCheck %s --check-prefix=ATTRIBUTOR
+; RUN: opt -attributor -attributor-disable=false -functionattrs -S < %s | FileCheck %s --check-prefix=BOTH
 ;
 ; Test cases specifically designed for the "returned" argument attribute.
 ; We use FIXME's to indicate problems and missing attributes.
@@ -7,16 +8,24 @@
 
 ; TEST SCC test returning an integer value argument
 ;
-; CHECK: Function Attrs: noinline norecurse nounwind readnone uwtable
-; CHECK: define i32 @sink_r0(i32 returned %r)
-;
-; FIXME: returned on %r missing:
-; CHECK: Function Attrs: noinline nounwind readnone uwtable
-; CHECK: define i32 @scc_r1(i32 %a, i32 %r, i32 %b)
-;
-; FIXME: returned on %r missing:
-; CHECK: Function Attrs: noinline nounwind readnone uwtable
-; CHECK: define i32 @scc_r2(i32 %a, i32 %b, i32 %r)
+; BOTH: Function Attrs: noinline norecurse nounwind readnone uwtable
+; BOTH: define i32 @sink_r0(i32 returned %r)
+; BOTH: Function Attrs: noinline nounwind readnone uwtable
+; BOTH: define i32 @scc_r1(i32 %a, i32 returned %r, i32 %b)
+; BOTH: Function Attrs: noinline nounwind readnone uwtable
+; BOTH: define i32 @scc_r2(i32 %a, i32 %b, i32 returned %r)
+; BOTH: Function Attrs: noinline nounwind readnone uwtable
+; BOTH: define i32 @scc_rX(i32 %a, i32 %b, i32 %r)
+;
+; FNATTR: define i32 @sink_r0(i32 returned %r)
+; FNATTR: define i32 @scc_r1(i32 %a, i32 %r, i32 %b)
+; FNATTR: define i32 @scc_r2(i32 %a, i32 %b, i32 %r)
+; FNATTR: define i32 @scc_rX(i32 %a, i32 %b, i32 %r)
+;
+; ATTRIBUTOR: define i32 @sink_r0(i32 returned %r)
+; ATTRIBUTOR: define i32 @scc_r1(i32 %a, i32 returned %r, i32 %b)
+; ATTRIBUTOR: define i32 @scc_r2(i32 %a, i32 %b, i32 returned %r)
+; ATTRIBUTOR: define i32 @scc_rX(i32 %a, i32 %b, i32 %r)
 ;
 ; int scc_r1(int a, int b

[PATCH] D59919: [Attributor] Deduce "returned" argument attribute

2019-06-10 Thread Johannes Doerfert via Phabricator via cfe-commits
jdoerfert added a comment.

In D59919#1535643 , @nicholas wrote:

> > CHANGED: build-libcalls   NumNoUnwind   
> >   4526 ->   3382 (   -25.276%)
>
> Why did the number of nounwinds drop?


I rerun the experiment with and without this commit. The numbers are the same 
as before except that there is no difference in NumNoUnwind. I don't know if I 
used a different baseline or if there was another problem (e.g., concurrency 
related) but I don't think this patch does affect the NumNoUnwind number at all.

Btw. I only report statistics that changed more than 1% as there seem to be 
some minimal variations all the time.


Repository:
  rG LLVM Github Monorepo

CHANGES SINCE LAST ACTION
  https://reviews.llvm.org/D59919/new/

https://reviews.llvm.org/D59919



___
cfe-commits mailing list
cfe-commits@lists.llvm.org
https://lists.llvm.org/cgi-bin/mailman/listinfo/cfe-commits


[PATCH] D59919: [Attributor] Deduce "returned" argument attribute

2019-06-10 Thread Johannes Doerfert via Phabricator via cfe-commits
jdoerfert updated this revision to Diff 203973.
jdoerfert added a comment.

Update tests


Repository:
  rG LLVM Github Monorepo

CHANGES SINCE LAST ACTION
  https://reviews.llvm.org/D59919/new/

https://reviews.llvm.org/D59919

Files:
  clang/test/CodeGenOpenCL/as_type.cl
  llvm/include/llvm/Transforms/IPO/Attributor.h
  llvm/lib/Transforms/IPO/Attributor.cpp
  llvm/test/Transforms/FunctionAttrs/arg_nocapture.ll
  llvm/test/Transforms/FunctionAttrs/arg_returned.ll
  llvm/test/Transforms/FunctionAttrs/read_write_returned_arguments_scc.ll

Index: llvm/test/Transforms/FunctionAttrs/read_write_returned_arguments_scc.ll
===
--- llvm/test/Transforms/FunctionAttrs/read_write_returned_arguments_scc.ll
+++ llvm/test/Transforms/FunctionAttrs/read_write_returned_arguments_scc.ll
@@ -31,7 +31,7 @@
 target datalayout = "e-m:e-i64:64-f80:128-n8:16:32:64-S128"
 
 ; CHECK: Function Attrs: nounwind
-; CHECK-NEXT: define i32* @external_ret2_nrw(i32* %n0, i32* %r0, i32* %w0)
+; CHECK-NEXT: define i32* @external_ret2_nrw(i32* %n0, i32* %r0, i32* returned %w0)
 define i32* @external_ret2_nrw(i32* %n0, i32* %r0, i32* %w0) {
 entry:
   %call = call i32* @internal_ret0_nw(i32* %n0, i32* %w0)
@@ -42,7 +42,7 @@
 }
 
 ; CHECK: Function Attrs: nounwind
-; CHECK-NEXT: define internal i32* @internal_ret0_nw(i32* %n0, i32* %w0)
+; CHECK-NEXT: define internal i32* @internal_ret0_nw(i32* returned %n0, i32* %w0)
 define internal i32* @internal_ret0_nw(i32* %n0, i32* %w0) {
 entry:
   %r0 = alloca i32, align 4
@@ -71,7 +71,7 @@
 }
 
 ; CHECK: Function Attrs: nounwind
-; CHECK-NEXT: define internal i32* @internal_ret1_rrw(i32* %r0, i32* %r1, i32* %w0)
+; CHECK-NEXT: define internal i32* @internal_ret1_rrw(i32* %r0, i32* returned %r1, i32* %w0)
 define internal i32* @internal_ret1_rrw(i32* %r0, i32* %r1, i32* %w0) {
 entry:
   %0 = load i32, i32* %r0, align 4
@@ -122,7 +122,7 @@
 }
 
 ; CHECK: Function Attrs: nounwind
-; CHECK-NEXT: define internal i32* @internal_ret1_rw(i32* %r0, i32* %w0)
+; CHECK-NEXT: define internal i32* @internal_ret1_rw(i32* %r0, i32* returned %w0)
 define internal i32* @internal_ret1_rw(i32* %r0, i32* %w0) {
 entry:
   %0 = load i32, i32* %r0, align 4
@@ -148,7 +148,7 @@
 }
 
 ; CHECK: Function Attrs: nounwind
-; CHECK-NEXT: define i32* @external_source_ret2_nrw(i32* %n0, i32* %r0, i32* %w0)
+; CHECK-NEXT: define i32* @external_source_ret2_nrw(i32* %n0, i32* %r0, i32* returned %w0)
 define i32* @external_source_ret2_nrw(i32* %n0, i32* %r0, i32* %w0) {
 entry:
   %call = call i32* @external_sink_ret2_nrw(i32* %n0, i32* %r0, i32* %w0)
Index: llvm/test/Transforms/FunctionAttrs/arg_returned.ll
===
--- llvm/test/Transforms/FunctionAttrs/arg_returned.ll
+++ llvm/test/Transforms/FunctionAttrs/arg_returned.ll
@@ -1,5 +1,6 @@
-; RUN: opt -functionattrs -attributor -attributor-disable=false -S < %s | FileCheck %s
-; RUN: opt -functionattrs -attributor -attributor-disable=false -attributor-verify=true -S < %s | FileCheck %s
+; RUN: opt -functionattrs -S < %s | FileCheck %s --check-prefix=FNATTR
+; RUN: opt -attributor -attributor-disable=false -S < %s | FileCheck %s --check-prefix=ATTRIBUTOR
+; RUN: opt -attributor -attributor-disable=false -functionattrs -S < %s | FileCheck %s --check-prefix=BOTH
 ;
 ; Test cases specifically designed for the "returned" argument attribute.
 ; We use FIXME's to indicate problems and missing attributes.
@@ -7,16 +8,24 @@
 
 ; TEST SCC test returning an integer value argument
 ;
-; CHECK: Function Attrs: noinline norecurse nounwind readnone uwtable
-; CHECK: define i32 @sink_r0(i32 returned %r)
-;
-; FIXME: returned on %r missing:
-; CHECK: Function Attrs: noinline nounwind readnone uwtable
-; CHECK: define i32 @scc_r1(i32 %a, i32 %r, i32 %b)
-;
-; FIXME: returned on %r missing:
-; CHECK: Function Attrs: noinline nounwind readnone uwtable
-; CHECK: define i32 @scc_r2(i32 %a, i32 %b, i32 %r)
+; BOTH: Function Attrs: noinline norecurse nounwind readnone uwtable
+; BOTH-NEXT: define i32 @sink_r0(i32 returned %r)
+; BOTH: Function Attrs: noinline nounwind readnone uwtable
+; BOTH-NEXT: define i32 @scc_r1(i32 %a, i32 returned %r, i32 %b)
+; BOTH: Function Attrs: noinline nounwind readnone uwtable
+; BOTH-NEXT: define i32 @scc_r2(i32 %a, i32 %b, i32 returned %r)
+; BOTH: Function Attrs: noinline nounwind readnone uwtable
+; BOTH-NEXT: define i32 @scc_rX(i32 %a, i32 %b, i32 %r)
+;
+; FNATTR: define i32 @sink_r0(i32 returned %r)
+; FNATTR: define i32 @scc_r1(i32 %a, i32 %r, i32 %b)
+; FNATTR: define i32 @scc_r2(i32 %a, i32 %b, i32 %r)
+; FNATTR: define i32 @scc_rX(i32 %a, i32 %b, i32 %r)
+;
+; ATTRIBUTOR: define i32 @sink_r0(i32 returned %r)
+; ATTRIBUTOR: define i32 @scc_r1(i32 %a, i32 returned %r, i32 %b)
+; ATTRIBUTOR: define i32 @scc_r2(i32 %a, i32 %b, i32 returned %r)
+; ATTRIBUTOR: define i32 @scc_rX(i32 %a, i32 %b, i32 %r)
 ;
 ; int scc_r1(int a, int b, int r);
 ; int scc

[PATCH] D59919: [Attributor] Deduce "returned" argument attribute

2019-06-13 Thread Johannes Doerfert via Phabricator via cfe-commits
jdoerfert updated this revision to Diff 204626.
jdoerfert added a comment.

Simplify the interface


Repository:
  rG LLVM Github Monorepo

CHANGES SINCE LAST ACTION
  https://reviews.llvm.org/D59919/new/

https://reviews.llvm.org/D59919

Files:
  clang/test/CodeGenOpenCL/as_type.cl
  llvm/include/llvm/Transforms/IPO/Attributor.h
  llvm/lib/Transforms/IPO/Attributor.cpp
  llvm/test/Transforms/FunctionAttrs/arg_nocapture.ll
  llvm/test/Transforms/FunctionAttrs/arg_returned.ll
  llvm/test/Transforms/FunctionAttrs/read_write_returned_arguments_scc.ll

Index: llvm/test/Transforms/FunctionAttrs/read_write_returned_arguments_scc.ll
===
--- llvm/test/Transforms/FunctionAttrs/read_write_returned_arguments_scc.ll
+++ llvm/test/Transforms/FunctionAttrs/read_write_returned_arguments_scc.ll
@@ -31,7 +31,7 @@
 target datalayout = "e-m:e-i64:64-f80:128-n8:16:32:64-S128"
 
 ; CHECK: Function Attrs: nounwind
-; CHECK-NEXT: define i32* @external_ret2_nrw(i32* %n0, i32* %r0, i32* %w0)
+; CHECK-NEXT: define i32* @external_ret2_nrw(i32* %n0, i32* %r0, i32* returned %w0)
 define i32* @external_ret2_nrw(i32* %n0, i32* %r0, i32* %w0) {
 entry:
   %call = call i32* @internal_ret0_nw(i32* %n0, i32* %w0)
@@ -42,7 +42,7 @@
 }
 
 ; CHECK: Function Attrs: nounwind
-; CHECK-NEXT: define internal i32* @internal_ret0_nw(i32* %n0, i32* %w0)
+; CHECK-NEXT: define internal i32* @internal_ret0_nw(i32* returned %n0, i32* %w0)
 define internal i32* @internal_ret0_nw(i32* %n0, i32* %w0) {
 entry:
   %r0 = alloca i32, align 4
@@ -71,7 +71,7 @@
 }
 
 ; CHECK: Function Attrs: nounwind
-; CHECK-NEXT: define internal i32* @internal_ret1_rrw(i32* %r0, i32* %r1, i32* %w0)
+; CHECK-NEXT: define internal i32* @internal_ret1_rrw(i32* %r0, i32* returned %r1, i32* %w0)
 define internal i32* @internal_ret1_rrw(i32* %r0, i32* %r1, i32* %w0) {
 entry:
   %0 = load i32, i32* %r0, align 4
@@ -122,7 +122,7 @@
 }
 
 ; CHECK: Function Attrs: nounwind
-; CHECK-NEXT: define internal i32* @internal_ret1_rw(i32* %r0, i32* %w0)
+; CHECK-NEXT: define internal i32* @internal_ret1_rw(i32* %r0, i32* returned %w0)
 define internal i32* @internal_ret1_rw(i32* %r0, i32* %w0) {
 entry:
   %0 = load i32, i32* %r0, align 4
@@ -148,7 +148,7 @@
 }
 
 ; CHECK: Function Attrs: nounwind
-; CHECK-NEXT: define i32* @external_source_ret2_nrw(i32* %n0, i32* %r0, i32* %w0)
+; CHECK-NEXT: define i32* @external_source_ret2_nrw(i32* %n0, i32* %r0, i32* returned %w0)
 define i32* @external_source_ret2_nrw(i32* %n0, i32* %r0, i32* %w0) {
 entry:
   %call = call i32* @external_sink_ret2_nrw(i32* %n0, i32* %r0, i32* %w0)
Index: llvm/test/Transforms/FunctionAttrs/arg_returned.ll
===
--- llvm/test/Transforms/FunctionAttrs/arg_returned.ll
+++ llvm/test/Transforms/FunctionAttrs/arg_returned.ll
@@ -1,5 +1,6 @@
-; RUN: opt -functionattrs -attributor -attributor-disable=false -S < %s | FileCheck %s
-; RUN: opt -functionattrs -attributor -attributor-disable=false -attributor-verify=true -S < %s | FileCheck %s
+; RUN: opt -functionattrs -S < %s | FileCheck %s --check-prefix=FNATTR
+; RUN: opt -attributor -attributor-disable=false -S < %s | FileCheck %s --check-prefix=ATTRIBUTOR
+; RUN: opt -attributor -attributor-disable=false -functionattrs -S < %s | FileCheck %s --check-prefix=BOTH
 ;
 ; Test cases specifically designed for the "returned" argument attribute.
 ; We use FIXME's to indicate problems and missing attributes.
@@ -7,16 +8,24 @@
 
 ; TEST SCC test returning an integer value argument
 ;
-; CHECK: Function Attrs: noinline norecurse nounwind readnone uwtable
-; CHECK: define i32 @sink_r0(i32 returned %r)
-;
-; FIXME: returned on %r missing:
-; CHECK: Function Attrs: noinline nounwind readnone uwtable
-; CHECK: define i32 @scc_r1(i32 %a, i32 %r, i32 %b)
-;
-; FIXME: returned on %r missing:
-; CHECK: Function Attrs: noinline nounwind readnone uwtable
-; CHECK: define i32 @scc_r2(i32 %a, i32 %b, i32 %r)
+; BOTH: Function Attrs: noinline norecurse nounwind readnone uwtable
+; BOTH-NEXT: define i32 @sink_r0(i32 returned %r)
+; BOTH: Function Attrs: noinline nounwind readnone uwtable
+; BOTH-NEXT: define i32 @scc_r1(i32 %a, i32 returned %r, i32 %b)
+; BOTH: Function Attrs: noinline nounwind readnone uwtable
+; BOTH-NEXT: define i32 @scc_r2(i32 %a, i32 %b, i32 returned %r)
+; BOTH: Function Attrs: noinline nounwind readnone uwtable
+; BOTH-NEXT: define i32 @scc_rX(i32 %a, i32 %b, i32 %r)
+;
+; FNATTR: define i32 @sink_r0(i32 returned %r)
+; FNATTR: define i32 @scc_r1(i32 %a, i32 %r, i32 %b)
+; FNATTR: define i32 @scc_r2(i32 %a, i32 %b, i32 %r)
+; FNATTR: define i32 @scc_rX(i32 %a, i32 %b, i32 %r)
+;
+; ATTRIBUTOR: define i32 @sink_r0(i32 returned %r)
+; ATTRIBUTOR: define i32 @scc_r1(i32 %a, i32 returned %r, i32 %b)
+; ATTRIBUTOR: define i32 @scc_r2(i32 %a, i32 %b, i32 returned %r)
+; ATTRIBUTOR: define i32 @scc_rX(i32 %a, i32 %b, i32 %r)
 ;
 ; int scc_r1(int a, int b, int r);

[PATCH] D64375: [OpenMP][Docs] Provide implementation status details

2019-07-25 Thread Johannes Doerfert via Phabricator via cfe-commits
jdoerfert added a comment.

I'll add @Hahnfeld comments, anything else? If not, can someone approve this 
and we do separate reviews for improvements?


Repository:
  rG LLVM Github Monorepo

CHANGES SINCE LAST ACTION
  https://reviews.llvm.org/D64375/new/

https://reviews.llvm.org/D64375



___
cfe-commits mailing list
cfe-commits@lists.llvm.org
https://lists.llvm.org/cgi-bin/mailman/listinfo/cfe-commits


[PATCH] D58091: Customize warnings for missing built-in type

2019-07-30 Thread Johannes Doerfert via Phabricator via cfe-commits
jdoerfert updated this revision to Diff 212500.
jdoerfert added a comment.

Fix spelling in tests


Repository:
  rG LLVM Github Monorepo

CHANGES SINCE LAST ACTION
  https://reviews.llvm.org/D58091/new/

https://reviews.llvm.org/D58091

Files:
  clang/include/clang/Basic/DiagnosticSemaKinds.td
  clang/lib/Sema/SemaDecl.cpp
  clang/test/Analysis/retain-release.m
  clang/test/Sema/builtin-setjmp.c
  clang/test/Sema/implicit-builtin-decl.c

Index: clang/test/Sema/implicit-builtin-decl.c
===
--- clang/test/Sema/implicit-builtin-decl.c
+++ clang/test/Sema/implicit-builtin-decl.c
@@ -55,14 +55,17 @@
 
 void snprintf() { }
 
-// PR8316
-void longjmp(); // expected-warning{{declaration of built-in function 'longjmp' requires inclusion of the header }}
+// PR8316 & PR40692
+void longjmp(); // expected-warning{{declaration of built-in function 'longjmp' requires the declaration of the 'jmp_buf' type, commonly provided in the header .}}
 
 extern float fmaxf(float, float);
 
 struct __jmp_buf_tag {};
-void sigsetjmp(struct __jmp_buf_tag[1], int); // expected-warning{{declaration of built-in function 'sigsetjmp' requires inclusion of the header }}
+void sigsetjmp(struct __jmp_buf_tag[1], int); // expected-warning{{declaration of built-in function 'sigsetjmp' requires the declaration of the 'jmp_buf' type, commonly provided in the header .}}
 
 // CHECK: FunctionDecl {{.*}}  col:6 sigsetjmp '
 // CHECK-NOT: FunctionDecl
 // CHECK: ReturnsTwiceAttr {{.*}} <{{.*}}> Implicit
+
+// PR40692
+void pthread_create(); // no warning expected
Index: clang/test/Sema/builtin-setjmp.c
===
--- /dev/null
+++ clang/test/Sema/builtin-setjmp.c
@@ -0,0 +1,10 @@
+// RUN: %clang_cc1 -triple x86_64-unknown-unknown -fsyntax-only -verify -DNO_JMP_BUF %s
+// RUN: %clang_cc1 -triple x86_64-unknown-unknown -fsyntax-only -verify %s
+
+#ifdef NO_JMP_BUF
+extern long setjmp(long *);   // expected-warning {{declaration of built-in function 'setjmp' requires the declaration of the 'jmp_buf' type, commonly provided in the header .}}
+#else
+typedef long jmp_buf;
+extern int setjmp(char);  // expected-warning@8 {{incompatible redeclaration of library function 'setjmp'}}
+  // expected-note@8{{'setjmp' is a builtin with type 'int (jmp_buf)' (aka 'int (long)')}}
+#endif
Index: clang/test/Analysis/retain-release.m
===
--- clang/test/Analysis/retain-release.m
+++ clang/test/Analysis/retain-release.m
@@ -2,7 +2,7 @@
 // RUN: %clang_analyze_cc1 -triple x86_64-apple-darwin10\
 // RUN: -analyzer-checker=core,osx.coreFoundation.CFRetainRelease\
 // RUN: -analyzer-checker=osx.cocoa.ClassRelease,osx.cocoa.RetainCount\
-// RUN: -analyzer-checker=debug.ExprInspection -fblocks -verify=expected,C %s\
+// RUN: -analyzer-checker=debug.ExprInspection -fblocks -verify %s\
 // RUN: -Wno-objc-root-class -analyzer-output=plist -o %t.objc.plist
 // RUN: %clang_analyze_cc1 -triple x86_64-apple-darwin10\
 // RUN: -analyzer-checker=core,osx.coreFoundation.CFRetainRelease\
@@ -1231,7 +1231,7 @@
 typedef unsigned long __darwin_pthread_key_t;
 typedef __darwin_pthread_key_t pthread_key_t;
 
-int pthread_create(pthread_t *, const pthread_attr_t *,  // C-warning{{declaration of built-in function 'pthread_create' requires inclusion of the header }}
+int pthread_create(pthread_t *, const pthread_attr_t *,
void *(*)(void *), void *);
 
 int pthread_setspecific(pthread_key_t key, const void *value);
Index: clang/lib/Sema/SemaDecl.cpp
===
--- clang/lib/Sema/SemaDecl.cpp
+++ clang/lib/Sema/SemaDecl.cpp
@@ -1983,10 +1983,27 @@
   ASTContext::GetBuiltinTypeError Error;
   QualType R = Context.GetBuiltinType(ID, Error);
   if (Error) {
-if (ForRedeclaration)
-  Diag(Loc, diag::warn_implicit_decl_requires_sysheader)
-  << getHeaderName(Context.BuiltinInfo, ID, Error)
+if (!ForRedeclaration)
+  return nullptr;
+
+// If we have a builtin without an associated type we should not emit a
+// warning when we were not able to find a type for it.
+if (Error == ASTContext::GE_Missing_type)
+  return nullptr;
+
+// If we could not find a type for setjmp it is because the jmp_buf type was
+// not defined prior to the setjmp declaration.
+if (Error == ASTContext::GE_Missing_setjmp) {
+  Diag(Loc, diag::warn_implicit_decl_no_jmp_buf)
   << Context.BuiltinInfo.getName(ID);
+  return nullptr;
+}
+
+// Generally, we emit a warning that the declaration requires the
+// appropriate header.
+Diag(Loc, diag::warn_implicit_decl_requires_sysheader)
+<< getHeaderName(Context.BuiltinInfo, ID, Error)
+<< Context.BuiltinInfo.getName(ID);
 return nullptr;
   }
 
Index: clang/incl

[PATCH] D58091: Customize warnings for missing built-in type

2019-07-30 Thread Johannes Doerfert via Phabricator via cfe-commits
This revision was automatically updated to reflect the committed changes.
Closed by commit rL367387: [Fix] Customize warnings for missing built-in types 
(authored by jdoerfert, committed by ).
Herald added a project: LLVM.
Herald added a subscriber: llvm-commits.

Changed prior to commit:
  https://reviews.llvm.org/D58091?vs=212500&id=212503#toc

Repository:
  rL LLVM

CHANGES SINCE LAST ACTION
  https://reviews.llvm.org/D58091/new/

https://reviews.llvm.org/D58091

Files:
  cfe/trunk/include/clang/Basic/DiagnosticSemaKinds.td
  cfe/trunk/lib/Sema/SemaDecl.cpp
  cfe/trunk/test/Analysis/retain-release.m
  cfe/trunk/test/Sema/builtin-setjmp.c
  cfe/trunk/test/Sema/implicit-builtin-decl.c

Index: cfe/trunk/lib/Sema/SemaDecl.cpp
===
--- cfe/trunk/lib/Sema/SemaDecl.cpp
+++ cfe/trunk/lib/Sema/SemaDecl.cpp
@@ -1983,10 +1983,27 @@
   ASTContext::GetBuiltinTypeError Error;
   QualType R = Context.GetBuiltinType(ID, Error);
   if (Error) {
-if (ForRedeclaration)
-  Diag(Loc, diag::warn_implicit_decl_requires_sysheader)
-  << getHeaderName(Context.BuiltinInfo, ID, Error)
+if (!ForRedeclaration)
+  return nullptr;
+
+// If we have a builtin without an associated type we should not emit a
+// warning when we were not able to find a type for it.
+if (Error == ASTContext::GE_Missing_type)
+  return nullptr;
+
+// If we could not find a type for setjmp it is because the jmp_buf type was
+// not defined prior to the setjmp declaration.
+if (Error == ASTContext::GE_Missing_setjmp) {
+  Diag(Loc, diag::warn_implicit_decl_no_jmp_buf)
   << Context.BuiltinInfo.getName(ID);
+  return nullptr;
+}
+
+// Generally, we emit a warning that the declaration requires the
+// appropriate header.
+Diag(Loc, diag::warn_implicit_decl_requires_sysheader)
+<< getHeaderName(Context.BuiltinInfo, ID, Error)
+<< Context.BuiltinInfo.getName(ID);
 return nullptr;
   }
 
Index: cfe/trunk/include/clang/Basic/DiagnosticSemaKinds.td
===
--- cfe/trunk/include/clang/Basic/DiagnosticSemaKinds.td
+++ cfe/trunk/include/clang/Basic/DiagnosticSemaKinds.td
@@ -598,6 +598,10 @@
 def note_include_header_or_declare : Note<
   "include the header <%0> or explicitly provide a declaration for '%1'">;
 def note_previous_builtin_declaration : Note<"%0 is a builtin with type %1">;
+def warn_implicit_decl_no_jmp_buf
+: Warning<"declaration of built-in function '%0' requires the declaration"
+" of the 'jmp_buf' type, commonly provided in the header .">,
+  InGroup>;
 def warn_implicit_decl_requires_sysheader : Warning<
   "declaration of built-in function '%1' requires inclusion of the header <%0>">,
   InGroup;
Index: cfe/trunk/test/Sema/builtin-setjmp.c
===
--- cfe/trunk/test/Sema/builtin-setjmp.c
+++ cfe/trunk/test/Sema/builtin-setjmp.c
@@ -0,0 +1,10 @@
+// RUN: %clang_cc1 -triple x86_64-unknown-unknown -fsyntax-only -verify -DNO_JMP_BUF %s
+// RUN: %clang_cc1 -triple x86_64-unknown-unknown -fsyntax-only -verify %s
+
+#ifdef NO_JMP_BUF
+extern long setjmp(long *);   // expected-warning {{declaration of built-in function 'setjmp' requires the declaration of the 'jmp_buf' type, commonly provided in the header .}}
+#else
+typedef long jmp_buf;
+extern int setjmp(char);  // expected-warning@8 {{incompatible redeclaration of library function 'setjmp'}}
+  // expected-note@8{{'setjmp' is a builtin with type 'int (jmp_buf)' (aka 'int (long)')}}
+#endif
Index: cfe/trunk/test/Sema/implicit-builtin-decl.c
===
--- cfe/trunk/test/Sema/implicit-builtin-decl.c
+++ cfe/trunk/test/Sema/implicit-builtin-decl.c
@@ -55,14 +55,17 @@
 
 void snprintf() { }
 
-// PR8316
-void longjmp(); // expected-warning{{declaration of built-in function 'longjmp' requires inclusion of the header }}
+// PR8316 & PR40692
+void longjmp(); // expected-warning{{declaration of built-in function 'longjmp' requires the declaration of the 'jmp_buf' type, commonly provided in the header .}}
 
 extern float fmaxf(float, float);
 
 struct __jmp_buf_tag {};
-void sigsetjmp(struct __jmp_buf_tag[1], int); // expected-warning{{declaration of built-in function 'sigsetjmp' requires inclusion of the header }}
+void sigsetjmp(struct __jmp_buf_tag[1], int); // expected-warning{{declaration of built-in function 'sigsetjmp' requires the declaration of the 'jmp_buf' type, commonly provided in the header .}}
 
 // CHECK: FunctionDecl {{.*}}  col:6 sigsetjmp '
 // CHECK-NOT: FunctionDecl
 // CHECK: ReturnsTwiceAttr {{.*}} <{{.*}}> Implicit
+
+// PR40692
+void pthread_create(); // no warning expected
Index: cfe/trunk/test/Analysis/retain-release.m
===
--- c

[PATCH] D65582: IR: accept and print numbered %N names for function args

2019-08-01 Thread Johannes Doerfert via Phabricator via cfe-commits
jdoerfert resigned from this revision.
jdoerfert added a comment.

I like the idea but I am not the right person to review.


Repository:
  rG LLVM Github Monorepo

CHANGES SINCE LAST ACTION
  https://reviews.llvm.org/D65582/new/

https://reviews.llvm.org/D65582



___
cfe-commits mailing list
cfe-commits@lists.llvm.org
https://lists.llvm.org/cgi-bin/mailman/listinfo/cfe-commits


[PATCH] D58531: [clang] Specify type of pthread_create builtin

2019-08-01 Thread Johannes Doerfert via Phabricator via cfe-commits
jdoerfert added a comment.

In D58531#1599209 , @probinson wrote:

> We've started running into this too in building the PS4 system. +jdoerfert 
> who added pthread_create to the builtin list.
>
> Looking at the patch, it seems straightforward enough although clearly needs 
> clang-format-diff run over it.
>  I don't touch Clang that much so I'm reluctant to okay it myself.


@probinson Thanks for making me aware of this patch.

> A separate point is whether it makes sense to be emitting this warning in the 
> first place for GE_Missing_type. I'd argue that, if we don't know the type of 
> the builtin, we should never emit the warning

@jrtc27 I hope the immediate need for this is gone after D58091 
 was finally committed? Given that I caused 
this mess I can take a look at the patch if you are still interested in it, are 
you?


Repository:
  rG LLVM Github Monorepo

CHANGES SINCE LAST ACTION
  https://reviews.llvm.org/D58531/new/

https://reviews.llvm.org/D58531



___
cfe-commits mailing list
cfe-commits@lists.llvm.org
https://lists.llvm.org/cgi-bin/mailman/listinfo/cfe-commits


[PATCH] D64375: [OpenMP][Docs] Provide implementation status details

2019-08-01 Thread Johannes Doerfert via Phabricator via cfe-commits
jdoerfert updated this revision to Diff 212957.
jdoerfert added a comment.

Improve based on two comments by @Hahnfeld


Repository:
  rG LLVM Github Monorepo

CHANGES SINCE LAST ACTION
  https://reviews.llvm.org/D64375/new/

https://reviews.llvm.org/D64375

Files:
  clang/docs/OpenMPSupport.rst

Index: clang/docs/OpenMPSupport.rst
===
--- clang/docs/OpenMPSupport.rst
+++ clang/docs/OpenMPSupport.rst
@@ -2,12 +2,12 @@
 
   
 .none { background-color: #FF }
-.partial { background-color: #99 }
+.part { background-color: #99 }
 .good { background-color: #CCFF99 }
   
 
 .. role:: none
-.. role:: partial
+.. role:: part
 .. role:: good
 
 .. contents::
@@ -17,7 +17,7 @@
 OpenMP Support
 ==
 
-Clang supports the following OpenMP 5.0 features
+Clang supports the following OpenMP 5.0 features (see also `OpenMP implementation details`_):
 
 * The `reduction`-based clauses in the `task` and `target`-based directives.
 
@@ -37,7 +37,7 @@
 Clang fully supports OpenMP 4.5. Clang supports offloading to X86_64, AArch64,
 PPC64[LE] and has `basic support for Cuda devices`_.
 
-* #pragma omp declare simd: :partial:`Partial`.  We support parsing/semantic
+* #pragma omp declare simd: :part:`Partial`.  We support parsing/semantic
   analysis + generation of special attributes for X86 target, but still
   missing the LLVM pass for vectorization.
 
@@ -129,3 +129,134 @@
   In some cases the local variables are actually allocated in the global memory,
   but the debug info may be not aware of it.
 
+
+.. _OpenMP implementation details:
+
+OpenMP 5.0 Implementation Details
+-
+
+The following table provides a quick overview over various OpenMP 5.0 features
+and their implementation status. Please contact *openmp-dev* at
+*lists.llvm.org* for more information or if you want to help with the
+implementation.
+
++--+--+--++
+|Category  | Feature  | Status   | Reviews|
++==+==+==++
+| loop extension   | support != in the canonical loop form| :good:`done` | D54441 |
++--+--+--++
+| loop extension   | #pragma omp loop (directive) | :none:`unclaimed`||
++--+--+--++
+| loop extension   | collapse imperfectly nested loop | :none:`unclaimed`||
++--+--+--++
+| loop extension   | collapse non-rectangular nested loop | :part:`worked on`||
++--+--+--++
+| loop extension   | C++ range-base for loop  | :none:`unclaimed`||
++--+--+--++
+| loop extension   | clause: nosimd for SIMD directives   | :none:`unclaimed`||
++--+--+--++
+| loop extension   | inclusive scan extension (matching C++17 PSTL)   | :none:`unclaimed`||
++--+--+--++
+| memory mangagement   | memory allocators| :good:`done` | r341687,r357929|
++--+---

  1   2   3   4   5   6   7   8   9   10   >