This revision was automatically updated to reflect the committed changes.
Closed by commit rL325771: [OpenCL] Add '-cl-uniform-work-group-size' 
compile option (authored by AlexeySotkin, committed by ).
Herald added a subscriber: llvm-commits.

Repository:
  rL LLVM

https://reviews.llvm.org/D43570

Files:
  cfe/trunk/include/clang/Driver/Options.td
  cfe/trunk/include/clang/Frontend/CodeGenOptions.def
  cfe/trunk/lib/CodeGen/CGCall.cpp
  cfe/trunk/lib/Driver/ToolChains/Clang.cpp
  cfe/trunk/lib/Frontend/CompilerInvocation.cpp
  cfe/trunk/test/CodeGenOpenCL/amdgpu-abi-struct-coerce.cl
  cfe/trunk/test/CodeGenOpenCL/cl-uniform-wg-size.cl
  cfe/trunk/test/CodeGenOpenCL/convergent.cl
  cfe/trunk/test/Driver/opencl.cl

Index: cfe/trunk/include/clang/Driver/Options.td
===================================================================
--- cfe/trunk/include/clang/Driver/Options.td
+++ cfe/trunk/include/clang/Driver/Options.td
@@ -518,6 +518,8 @@
   HelpText<"OpenCL only. Allow denormals to be flushed to zero.">;
 def cl_fp32_correctly_rounded_divide_sqrt : Flag<["-"], "cl-fp32-correctly-rounded-divide-sqrt">, Group<opencl_Group>, Flags<[CC1Option]>,
   HelpText<"OpenCL only. Specify that single precision floating-point divide and sqrt used in the program source are correctly rounded.">;
+def cl_uniform_work_group_size : Flag<["-"], "cl-uniform-work-group-size">, Group<opencl_Group>, Flags<[CC1Option]>,
+  HelpText<"OpenCL only. Defines that the global work-size be a multiple of the work-group size specified to clEnqueueNDRangeKernel">;
 def client__name : JoinedOrSeparate<["-"], "client_name">;
 def combine : Flag<["-", "--"], "combine">, Flags<[DriverOption, Unsupported]>;
 def compatibility__version : JoinedOrSeparate<["-"], "compatibility_version">;
Index: cfe/trunk/include/clang/Frontend/CodeGenOptions.def
===================================================================
--- cfe/trunk/include/clang/Frontend/CodeGenOptions.def
+++ cfe/trunk/include/clang/Frontend/CodeGenOptions.def
@@ -128,6 +128,7 @@
 CODEGENOPT(NoNaNsFPMath      , 1, 0) ///< Assume FP arguments, results not NaN.
 CODEGENOPT(FlushDenorm       , 1, 0) ///< Allow FP denorm numbers to be flushed to zero
 CODEGENOPT(CorrectlyRoundedDivSqrt, 1, 0) ///< -cl-fp32-correctly-rounded-divide-sqrt
+CODEGENOPT(UniformWGSize     , 1, 0) ///< -cl-uniform-work-group-size
 CODEGENOPT(NoZeroInitializedInBSS , 1, 0) ///< -fno-zero-initialized-in-bss.
 /// \brief Method of Objective-C dispatch to use.
 ENUM_CODEGENOPT(ObjCDispatchMethod, ObjCDispatchMethodKind, 2, Legacy)
Index: cfe/trunk/test/Driver/opencl.cl
===================================================================
--- cfe/trunk/test/Driver/opencl.cl
+++ cfe/trunk/test/Driver/opencl.cl
@@ -13,6 +13,7 @@
 // RUN: %clang -S -### -cl-no-signed-zeros %s 2>&1 | FileCheck --check-prefix=CHECK-NO-SIGNED-ZEROS %s
 // RUN: %clang -S -### -cl-denorms-are-zero %s 2>&1 | FileCheck --check-prefix=CHECK-DENORMS-ARE-ZERO %s
 // RUN: %clang -S -### -cl-fp32-correctly-rounded-divide-sqrt %s 2>&1 | FileCheck --check-prefix=CHECK-ROUND-DIV %s
+// RUN: %clang -S -### -cl-uniform-work-group-size %s 2>&1 | FileCheck --check-prefix=CHECK-UNIFORM-WG %s
 // RUN: not %clang -cl-std=c99 -DOPENCL %s 2>&1 | FileCheck --check-prefix=CHECK-C99 %s
 // RUN: not %clang -cl-std=invalid -DOPENCL %s 2>&1 | FileCheck --check-prefix=CHECK-INVALID %s
 
@@ -31,6 +32,7 @@
 // CHECK-NO-SIGNED-ZEROS: "-cc1" {{.*}} "-cl-no-signed-zeros"
 // CHECK-DENORMS-ARE-ZERO: "-cc1" {{.*}} "-cl-denorms-are-zero"
 // CHECK-ROUND-DIV: "-cc1" {{.*}} "-cl-fp32-correctly-rounded-divide-sqrt"
+// CHECK-UNIFORM-WG: "-cc1" {{.*}} "-cl-uniform-work-group-size"
 // CHECK-C99: error: invalid value 'c99' in '-cl-std=c99'
 // CHECK-INVALID: error: invalid value 'invalid' in '-cl-std=invalid'
 
Index: cfe/trunk/test/CodeGenOpenCL/convergent.cl
===================================================================
--- cfe/trunk/test/CodeGenOpenCL/convergent.cl
+++ cfe/trunk/test/CodeGenOpenCL/convergent.cl
@@ -127,7 +127,7 @@
 // CHECK: declare spir_func void @nodupfun(){{[^#]*}} #[[attr3:[0-9]+]]
 
 // CHECK-LABEL: @assume_convergent_asm
-// CHECK: tail call void asm sideeffect "s_barrier", ""() #4
+// CHECK: tail call void asm sideeffect "s_barrier", ""() #5
 kernel void assume_convergent_asm()
 {
   __asm__ volatile("s_barrier");
@@ -138,4 +138,5 @@
 // CHECK: attributes #2 = { {{[^}]*}}convergent{{[^}]*}} }
 // CHECK: attributes #3 = { {{[^}]*}}convergent noduplicate{{[^}]*}} }
 // CHECK: attributes #4 = { {{[^}]*}}convergent{{[^}]*}} }
-// CHECK: attributes #5 = { {{[^}]*}}convergent noduplicate{{[^}]*}} }
+// CHECK: attributes #5 = { {{[^}]*}}convergent{{[^}]*}} }
+// CHECK: attributes #6 = { {{[^}]*}}convergent noduplicate{{[^}]*}} }
Index: cfe/trunk/test/CodeGenOpenCL/cl-uniform-wg-size.cl
===================================================================
--- cfe/trunk/test/CodeGenOpenCL/cl-uniform-wg-size.cl
+++ cfe/trunk/test/CodeGenOpenCL/cl-uniform-wg-size.cl
@@ -0,0 +1,16 @@
+// RUN: %clang_cc1 -emit-llvm -O0 -cl-std=CL1.2 -o - %s 2>&1 | FileCheck %s -check-prefixes CHECK,CHECK-UNIFORM
+// RUN: %clang_cc1 -emit-llvm -O0 -cl-std=CL2.0 -o - %s 2>&1 | FileCheck %s -check-prefixes CHECK,CHECK-NONUNIFORM
+// RUN: %clang_cc1 -emit-llvm -O0 -cl-std=CL2.0 -cl-uniform-work-group-size -o - %s 2>&1 | FileCheck %s -check-prefixes CHECK,CHECK-UNIFORM
+
+kernel void ker() {};
+// CHECK: define{{.*}}@ker() #0
+
+void foo() {};
+// CHECK: define{{.*}}@foo() #1
+
+// CHECK-LABEL: attributes #0
+// CHECK-UNIFORM: "uniform-work-group-size"="true"
+// CHECK-NONUNIFORM: "uniform-work-group-size"="false"
+
+// CHECK-LABEL: attributes #1
+// CHECK-NOT: uniform-work-group-size
Index: cfe/trunk/test/CodeGenOpenCL/amdgpu-abi-struct-coerce.cl
===================================================================
--- cfe/trunk/test/CodeGenOpenCL/amdgpu-abi-struct-coerce.cl
+++ cfe/trunk/test/CodeGenOpenCL/amdgpu-abi-struct-coerce.cl
@@ -425,7 +425,7 @@
   return s;
 }
 
-// CHECK: define i32 @func_transparent_union_ret() local_unnamed_addr #0 {
+// CHECK: define i32 @func_transparent_union_ret() local_unnamed_addr #1 {
 // CHECK: ret i32 0
 transparent_u func_transparent_union_ret()
 {
Index: cfe/trunk/lib/Frontend/CompilerInvocation.cpp
===================================================================
--- cfe/trunk/lib/Frontend/CompilerInvocation.cpp
+++ cfe/trunk/lib/Frontend/CompilerInvocation.cpp
@@ -659,6 +659,8 @@
   Opts.FlushDenorm = Args.hasArg(OPT_cl_denorms_are_zero);
   Opts.CorrectlyRoundedDivSqrt =
       Args.hasArg(OPT_cl_fp32_correctly_rounded_divide_sqrt);
+  Opts.UniformWGSize =
+      Args.hasArg(OPT_cl_uniform_work_group_size);
   Opts.Reciprocals = Args.getAllArgValues(OPT_mrecip_EQ);
   Opts.ReciprocalMath = Args.hasArg(OPT_freciprocal_math);
   Opts.NoTrappingMath = Args.hasArg(OPT_fno_trapping_math);
Index: cfe/trunk/lib/CodeGen/CGCall.cpp
===================================================================
--- cfe/trunk/lib/CodeGen/CGCall.cpp
+++ cfe/trunk/lib/CodeGen/CGCall.cpp
@@ -1870,6 +1870,21 @@
     }
   }
 
+  if (TargetDecl && TargetDecl->hasAttr<OpenCLKernelAttr>()) {
+    if (getLangOpts().OpenCLVersion <= 120) {
+      // OpenCL v1.2 Work groups are always uniform
+      FuncAttrs.addAttribute("uniform-work-group-size", "true");
+    } else {
+      // OpenCL v2.0 Work groups may be whether uniform or not.
+      // '-cl-uniform-work-group-size' compile option gets a hint
+      // to the compiler that the global work-size be a multiple of
+      // the work-group size specified to clEnqueueNDRangeKernel
+      // (i.e. work groups are uniform).
+      FuncAttrs.addAttribute("uniform-work-group-size",
+                             llvm::toStringRef(CodeGenOpts.UniformWGSize));
+    }
+  }
+
   if (!AttrOnCallSite) {
     bool DisableTailCalls =
         CodeGenOpts.DisableTailCalls ||
Index: cfe/trunk/lib/Driver/ToolChains/Clang.cpp
===================================================================
--- cfe/trunk/lib/Driver/ToolChains/Clang.cpp
+++ cfe/trunk/lib/Driver/ToolChains/Clang.cpp
@@ -2379,6 +2379,7 @@
       options::OPT_cl_no_signed_zeros,
       options::OPT_cl_denorms_are_zero,
       options::OPT_cl_fp32_correctly_rounded_divide_sqrt,
+      options::OPT_cl_uniform_work_group_size
   };
 
   if (Arg *A = Args.getLastArg(options::OPT_cl_std_EQ)) {
_______________________________________________
cfe-commits mailing list
cfe-commits@lists.llvm.org
http://lists.llvm.org/cgi-bin/mailman/listinfo/cfe-commits

Reply via email to