This revision was automatically updated to reflect the committed changes.
Closed by commit rL302125: [OpenCL] Add intel_reqd_sub_group_size attribute 
support (authored by pxl).

Changed prior to commit:
  https://reviews.llvm.org/D30805?vs=97772&id=97786#toc

Repository:
  rL LLVM

https://reviews.llvm.org/D30805

Files:
  cfe/trunk/include/clang/Basic/Attr.td
  cfe/trunk/include/clang/Basic/AttrDocs.td
  cfe/trunk/lib/CodeGen/CodeGenFunction.cpp
  cfe/trunk/lib/CodeGen/CodeGenFunction.h
  cfe/trunk/lib/Sema/SemaDeclAttr.cpp
  cfe/trunk/test/CodeGenOpenCL/kernel-attributes.cl
  cfe/trunk/test/Misc/pragma-attribute-supported-attributes-list.test
  cfe/trunk/test/SemaOpenCL/invalid-kernel-attrs.cl

Index: cfe/trunk/include/clang/Basic/AttrDocs.td
===================================================================
--- cfe/trunk/include/clang/Basic/AttrDocs.td
+++ cfe/trunk/include/clang/Basic/AttrDocs.td
@@ -2216,6 +2216,21 @@
   }];
 }
 
+def OpenCLIntelReqdSubGroupSizeDocs : Documentation {
+  let Category = DocCatStmt;
+  let Heading = "__attribute__((intel_reqd_sub_group_size))";
+  let Content = [{
+The optional attribute intel_reqd_sub_group_size can be used to indicate that
+the kernel must be compiled and executed with the specified subgroup size. When
+this attribute is present, get_max_sub_group_size() is guaranteed to return the
+specified integer value. This is important for the correctness of many subgroup
+algorithms, and in some cases may be used by the compiler to generate more optimal
+code. See `cl_intel_required_subgroup_size
+<https://www.khronos.org/registry/OpenCL/extensions/intel/cl_intel_required_subgroup_size.txt>`
+for details.
+  }];
+}
+
 def OpenCLAccessDocs : Documentation {
   let Category = DocCatStmt;
   let Heading = "__read_only, __write_only, __read_write (read_only, write_only, read_write)";
Index: cfe/trunk/include/clang/Basic/Attr.td
===================================================================
--- cfe/trunk/include/clang/Basic/Attr.td
+++ cfe/trunk/include/clang/Basic/Attr.td
@@ -864,6 +864,13 @@
   let Documentation = [OpenCLUnrollHintDocs];
 }
 
+def OpenCLIntelReqdSubGroupSize: InheritableAttr {
+  let Spellings = [GNU<"intel_reqd_sub_group_size">];
+  let Args = [UnsignedArgument<"SubGroupSize">];
+  let Subjects = SubjectList<[Function], ErrorDiag>;
+  let Documentation = [OpenCLIntelReqdSubGroupSizeDocs];
+}
+
 // This attribute is both a type attribute, and a declaration attribute (for
 // parameter variables).
 def OpenCLAccess : Attr {
Index: cfe/trunk/test/CodeGenOpenCL/kernel-attributes.cl
===================================================================
--- cfe/trunk/test/CodeGenOpenCL/kernel-attributes.cl
+++ cfe/trunk/test/CodeGenOpenCL/kernel-attributes.cl
@@ -8,7 +8,11 @@
 kernel __attribute__((vec_type_hint(uint4))) __attribute__((work_group_size_hint(8,16,32))) void kernel2(int a) {}
 // CHECK: define void @kernel2(i32 {{[^%]*}}%a) {{[^{]+}} !vec_type_hint ![[MD3:[0-9]+]] !work_group_size_hint ![[MD4:[0-9]+]]
 
+kernel __attribute__((intel_reqd_sub_group_size(8))) void kernel3(int a) {}
+// CHECK: define void @kernel3(i32 {{[^%]*}}%a) {{[^{]+}} !intel_reqd_sub_group_size ![[MD5:[0-9]+]]
+
 // CHECK: [[MD1]] = !{i32 undef, i32 1}
 // CHECK: [[MD2]] = !{i32 1, i32 2, i32 4}
 // CHECK: [[MD3]] = !{<4 x i32> undef, i32 0}
 // CHECK: [[MD4]] = !{i32 8, i32 16, i32 32}
+// CHECK: [[MD5]] = !{i32 8}
Index: cfe/trunk/test/Misc/pragma-attribute-supported-attributes-list.test
===================================================================
--- cfe/trunk/test/Misc/pragma-attribute-supported-attributes-list.test
+++ cfe/trunk/test/Misc/pragma-attribute-supported-attributes-list.test
@@ -2,7 +2,7 @@
 
 // The number of supported attributes should never go down!
 
-// CHECK: #pragma clang attribute supports 59 attributes:
+// CHECK: #pragma clang attribute supports 60 attributes:
 // CHECK-NEXT: AMDGPUFlatWorkGroupSize (SubjectMatchRule_function)
 // CHECK-NEXT: AMDGPUNumSGPR (SubjectMatchRule_function)
 // CHECK-NEXT: AMDGPUNumVGPR (SubjectMatchRule_function)
@@ -42,6 +42,7 @@
 // CHECK-NEXT: ObjCRuntimeName (SubjectMatchRule_objc_interface, SubjectMatchRule_objc_protocol)
 // CHECK-NEXT: ObjCRuntimeVisible (SubjectMatchRule_objc_interface)
 // CHECK-NEXT: ObjCSubclassingRestricted (SubjectMatchRule_objc_interface)
+// CHECK-NEXT: OpenCLIntelReqdSubGroupSize (SubjectMatchRule_function)
 // CHECK-NEXT: OpenCLNoSVM (SubjectMatchRule_variable)
 // CHECK-NEXT: OptimizeNone (SubjectMatchRule_function, SubjectMatchRule_objc_method)
 // CHECK-NEXT: Overloadable (SubjectMatchRule_function)
Index: cfe/trunk/test/SemaOpenCL/invalid-kernel-attrs.cl
===================================================================
--- cfe/trunk/test/SemaOpenCL/invalid-kernel-attrs.cl
+++ cfe/trunk/test/SemaOpenCL/invalid-kernel-attrs.cl
@@ -33,3 +33,7 @@
 kernel __attribute__((reqd_work_group_size(1,2,0))) void kernel11(){} // expected-error {{'reqd_work_group_size' attribute must be greater than 0}}
 kernel __attribute__((reqd_work_group_size(1,0,2))) void kernel12(){} // expected-error {{'reqd_work_group_size' attribute must be greater than 0}}
 kernel __attribute__((reqd_work_group_size(0,1,2))) void kernel13(){} // expected-error {{'reqd_work_group_size' attribute must be greater than 0}}
+
+__attribute__((intel_reqd_sub_group_size(8))) void kernel14(){} // expected-error {{attribute 'intel_reqd_sub_group_size' can only be applied to a kernel}}
+kernel __attribute__((intel_reqd_sub_group_size(0))) void kernel15(){} // expected-error {{'intel_reqd_sub_group_size' attribute must be greater than 0}}
+kernel __attribute__((intel_reqd_sub_group_size(8))) __attribute__((intel_reqd_sub_group_size(16))) void kernel16() {}  //expected-warning{{attribute 'intel_reqd_sub_group_size' is already applied with different parameters}}
Index: cfe/trunk/lib/Sema/SemaDeclAttr.cpp
===================================================================
--- cfe/trunk/lib/Sema/SemaDeclAttr.cpp
+++ cfe/trunk/lib/Sema/SemaDeclAttr.cpp
@@ -2891,6 +2891,28 @@
                                        Attr.getAttributeSpellingListIndex()));
 }
 
+// Handles intel_reqd_sub_group_size.
+static void handleSubGroupSize(Sema &S, Decl *D, const AttributeList &Attr) {
+  uint32_t SGSize;
+  const Expr *E = Attr.getArgAsExpr(0);
+  if (!checkUInt32Argument(S, Attr, E, SGSize))
+    return;
+  if (SGSize == 0) {
+    S.Diag(Attr.getLoc(), diag::err_attribute_argument_is_zero)
+        << Attr.getName() << E->getSourceRange();
+    return;
+  }
+
+  OpenCLIntelReqdSubGroupSizeAttr *Existing =
+      D->getAttr<OpenCLIntelReqdSubGroupSizeAttr>();
+  if (Existing && Existing->getSubGroupSize() != SGSize)
+    S.Diag(Attr.getLoc(), diag::warn_duplicate_attribute) << Attr.getName();
+
+  D->addAttr(::new (S.Context) OpenCLIntelReqdSubGroupSizeAttr(
+      Attr.getRange(), S.Context, SGSize,
+      Attr.getAttributeSpellingListIndex()));
+}
+
 static void handleVecTypeHint(Sema &S, Decl *D, const AttributeList &Attr) {
   if (!Attr.hasParsedType()) {
     S.Diag(Attr.getLoc(), diag::err_attribute_wrong_number_arguments)
@@ -6157,6 +6179,9 @@
   case AttributeList::AT_ReqdWorkGroupSize:
     handleWorkGroupSize<ReqdWorkGroupSizeAttr>(S, D, Attr);
     break;
+  case AttributeList::AT_OpenCLIntelReqdSubGroupSize:
+    handleSubGroupSize(S, D, Attr);
+    break;
   case AttributeList::AT_VecTypeHint:
     handleVecTypeHint(S, D, Attr);
     break;
@@ -6521,6 +6546,9 @@
       Diag(D->getLocation(), diag::err_attribute_wrong_decl_type)
         << A << ExpectedKernelFunction;
       D->setInvalidDecl();
+    } else if (Attr *A = D->getAttr<OpenCLIntelReqdSubGroupSizeAttr>()) {
+      Diag(D->getLocation(), diag::err_opencl_kernel_attr) << A;
+      D->setInvalidDecl();
     }
   }
 }
Index: cfe/trunk/lib/CodeGen/CodeGenFunction.cpp
===================================================================
--- cfe/trunk/lib/CodeGen/CodeGenFunction.cpp
+++ cfe/trunk/lib/CodeGen/CodeGenFunction.cpp
@@ -658,34 +658,42 @@
   GenOpenCLArgMetadata(FD, Fn, CGM, Context, Builder, getContext());
 
   if (const VecTypeHintAttr *A = FD->getAttr<VecTypeHintAttr>()) {
-    QualType hintQTy = A->getTypeHint();
-    const ExtVectorType *hintEltQTy = hintQTy->getAs<ExtVectorType>();
-    bool isSignedInteger =
-        hintQTy->isSignedIntegerType() ||
-        (hintEltQTy && hintEltQTy->getElementType()->isSignedIntegerType());
-    llvm::Metadata *attrMDArgs[] = {
+    QualType HintQTy = A->getTypeHint();
+    const ExtVectorType *HintEltQTy = HintQTy->getAs<ExtVectorType>();
+    bool IsSignedInteger =
+        HintQTy->isSignedIntegerType() ||
+        (HintEltQTy && HintEltQTy->getElementType()->isSignedIntegerType());
+    llvm::Metadata *AttrMDArgs[] = {
         llvm::ConstantAsMetadata::get(llvm::UndefValue::get(
             CGM.getTypes().ConvertType(A->getTypeHint()))),
         llvm::ConstantAsMetadata::get(llvm::ConstantInt::get(
             llvm::IntegerType::get(Context, 32),
-            llvm::APInt(32, (uint64_t)(isSignedInteger ? 1 : 0))))};
-    Fn->setMetadata("vec_type_hint", llvm::MDNode::get(Context, attrMDArgs));
+            llvm::APInt(32, (uint64_t)(IsSignedInteger ? 1 : 0))))};
+    Fn->setMetadata("vec_type_hint", llvm::MDNode::get(Context, AttrMDArgs));
   }
 
   if (const WorkGroupSizeHintAttr *A = FD->getAttr<WorkGroupSizeHintAttr>()) {
-    llvm::Metadata *attrMDArgs[] = {
+    llvm::Metadata *AttrMDArgs[] = {
         llvm::ConstantAsMetadata::get(Builder.getInt32(A->getXDim())),
         llvm::ConstantAsMetadata::get(Builder.getInt32(A->getYDim())),
         llvm::ConstantAsMetadata::get(Builder.getInt32(A->getZDim()))};
-    Fn->setMetadata("work_group_size_hint", llvm::MDNode::get(Context, attrMDArgs));
+    Fn->setMetadata("work_group_size_hint", llvm::MDNode::get(Context, AttrMDArgs));
   }
 
   if (const ReqdWorkGroupSizeAttr *A = FD->getAttr<ReqdWorkGroupSizeAttr>()) {
-    llvm::Metadata *attrMDArgs[] = {
+    llvm::Metadata *AttrMDArgs[] = {
         llvm::ConstantAsMetadata::get(Builder.getInt32(A->getXDim())),
         llvm::ConstantAsMetadata::get(Builder.getInt32(A->getYDim())),
         llvm::ConstantAsMetadata::get(Builder.getInt32(A->getZDim()))};
-    Fn->setMetadata("reqd_work_group_size", llvm::MDNode::get(Context, attrMDArgs));
+    Fn->setMetadata("reqd_work_group_size", llvm::MDNode::get(Context, AttrMDArgs));
+  }
+
+  if (const OpenCLIntelReqdSubGroupSizeAttr *A =
+          FD->getAttr<OpenCLIntelReqdSubGroupSizeAttr>()) {
+    llvm::Metadata *AttrMDArgs[] = {
+        llvm::ConstantAsMetadata::get(Builder.getInt32(A->getSubGroupSize()))};
+    Fn->setMetadata("intel_reqd_sub_group_size",
+                    llvm::MDNode::get(Context, AttrMDArgs));
   }
 }
 
Index: cfe/trunk/lib/CodeGen/CodeGenFunction.h
===================================================================
--- cfe/trunk/lib/CodeGen/CodeGenFunction.h
+++ cfe/trunk/lib/CodeGen/CodeGenFunction.h
@@ -1413,16 +1413,8 @@
   /// True if we need emit the life-time markers.
   const bool ShouldEmitLifetimeMarkers;
 
-  /// Add a kernel metadata node to the named metadata node 'opencl.kernels'.
-  /// In the kernel metadata node, reference the kernel function and metadata 
-  /// nodes for its optional attribute qualifiers (OpenCL 1.1 6.7.2):
-  /// - A node for the vec_type_hint(<type>) qualifier contains string
-  ///   "vec_type_hint", an undefined value of the <type> data type,
-  ///   and a Boolean that is true if the <type> is integer and signed.
-  /// - A node for the work_group_size_hint(X,Y,Z) qualifier contains string 
-  ///   "work_group_size_hint", and three 32-bit integers X, Y and Z.
-  /// - A node for the reqd_work_group_size(X,Y,Z) qualifier contains string 
-  ///   "reqd_work_group_size", and three 32-bit integers X, Y and Z.
+  /// Add OpenCL kernel arg metadata and the kernel attribute meatadata to
+  /// the function metadata.
   void EmitOpenCLKernelMetadata(const FunctionDecl *FD, 
                                 llvm::Function *Fn);
 
_______________________________________________
cfe-commits mailing list
cfe-commits@lists.llvm.org
http://lists.llvm.org/cgi-bin/mailman/listinfo/cfe-commits

Reply via email to