cdai2 created this revision.
Herald added subscribers: Naghasan, ldrumm, pengfei, Anastasia, yaxunl.
Herald added a project: All.
cdai2 requested review of this revision.
Herald added a project: clang.
Herald added a subscriber: cfe-commits.

For x86 target, vector types (both result and arguments) can be coerced
to scalars of the same size, e.g:

  define zeroext i1 @_Z18convert_ulong4_rteDv4_t(<4 x i16> %x)
  ; becomes
  define zeroext i1 @_Z18convert_ulong4_rteDv4_t(i64 %x.coerced)

Such behavior is completely valid for x86, but the backend vectorizer
cannot work with scalars instead of vectors.

With this patch, argument and result types will be leaved unchanged in
the CodeGen.

New option fopencl-force-vector-abi is also added to force-disables
vector to scalar coercion when provided.


Repository:
  rG LLVM Github Monorepo

https://reviews.llvm.org/D142948

Files:
  clang/include/clang/Basic/LangOptions.def
  clang/include/clang/Driver/Options.td
  clang/lib/CodeGen/TargetInfo.cpp
  clang/lib/Frontend/CompilerInvocation.cpp
  clang/test/CodeGenOpenCL/vector-to-scalar-coercion.cl

Index: clang/test/CodeGenOpenCL/vector-to-scalar-coercion.cl
===================================================================
--- /dev/null
+++ clang/test/CodeGenOpenCL/vector-to-scalar-coercion.cl
@@ -0,0 +1,23 @@
+// RUN: %clang_cc1 -x cl -triple i686-unknown-unknown   -fopencl-force-vector-abi %s -O0 -emit-llvm -o - | FileCheck %s --check-prefix NOCOER
+// RUN: %clang_cc1 -x cl -triple x86_64-unknown-unknown -fopencl-force-vector-abi %s -O0 -emit-llvm -o - | FileCheck %s --check-prefix NOCOER
+// RUN: %clang_cc1 -x cl -triple i686-unknown-unknown   %s -O0 -emit-llvm -o - | FileCheck %s --check-prefix COER32CL
+// RUN: %clang_cc1 -x cl -triple x86_64-unknown-unknown %s -O0 -emit-llvm -o - | FileCheck %s --check-prefix COER64CL
+// RUN: %clang_cc1 -x c  -triple i686-unknown-unknown   %s -O0 -emit-llvm -o - | FileCheck %s --check-prefix COER32
+// RUN: %clang_cc1 -x c  -triple x86_64-unknown-unknown %s -O0 -emit-llvm -o - | FileCheck %s --check-prefix COER64
+
+typedef unsigned short ushort;
+typedef ushort ushort4 __attribute__((ext_vector_type(4)));
+
+typedef unsigned long ulong;
+typedef ulong ulong4 __attribute__((ext_vector_type(4)));
+
+ulong4 __attribute__((const)) __attribute__((overloadable)) convert_ulong4_rte(ushort4 x)
+{
+  return 1;
+}
+
+// NOCOER:     define {{.*}}<4 x i64> @_Z18convert_ulong4_rteDv4_t(<4 x i16> noundef %{{.*}})
+// COER32CL:   define {{.*}}<4 x i64> @_Z18convert_ulong4_rteDv4_t(i64 noundef %{{.*}})
+// COER64CL:   define {{.*}}<4 x i64> @_Z18convert_ulong4_rteDv4_t(double noundef %{{.*}})
+// COER32:     define {{.*}}<4 x i32> @_Z18convert_ulong4_rteDv4_t(i64 noundef %{{.*}})
+// COER64:     define {{.*}}<4 x i64> @_Z18convert_ulong4_rteDv4_t(double noundef %{{.*}})
Index: clang/lib/Frontend/CompilerInvocation.cpp
===================================================================
--- clang/lib/Frontend/CompilerInvocation.cpp
+++ clang/lib/Frontend/CompilerInvocation.cpp
@@ -3832,6 +3832,8 @@
     }
   }
 
+  Opts.OpenCLForceVectorABI = Args.hasArg(OPT_fopencl_force_vector_abi);
+
   // Check if -fopenmp is specified and set default version to 5.0.
   Opts.OpenMP = Args.hasArg(OPT_fopenmp) ? 50 : 0;
   // Check if -fopenmp-simd is specified.
Index: clang/lib/CodeGen/TargetInfo.cpp
===================================================================
--- clang/lib/CodeGen/TargetInfo.cpp
+++ clang/lib/CodeGen/TargetInfo.cpp
@@ -100,6 +100,42 @@
   return Address::invalid();
 }
 
+static ABIArgInfo classifyOpenCL(QualType Ty, ASTContext &Context) {
+  if (Ty->isVoidType())
+    return ABIArgInfo::getIgnore();
+
+  if (const EnumType *EnumTy = Ty->getAs<EnumType>())
+    Ty = EnumTy->getDecl()->getIntegerType();
+
+  if (const RecordType *RT = Ty->getAs<RecordType>())
+    return ABIArgInfo::getIndirect(Context.getTypeAlignInChars(RT),
+                                   /*ByVal=*/false);
+
+  if (Context.isPromotableIntegerType(Ty))
+    return ABIArgInfo::getExtend(Ty);
+
+  return ABIArgInfo::getDirect();
+}
+
+static bool doOpenCLClassification(CGFunctionInfo &FI, ASTContext &Context) {
+  if (!Context.getLangOpts().OpenCL)
+    return false;
+  if (!Context.getLangOpts().OpenCLForceVectorABI)
+    return false;
+
+  // Use OpenCL classify to prevent coercing
+  // Vector ABI must be enforced by enabling the corresponding option
+  // Otherwise, vector types will be coerced to a matching integer
+  // type to conform with ABI, e.g.: <8 x i8> will be coerced to i64
+  FI.getReturnInfo() = classifyOpenCL(FI.getReturnType(), Context);
+
+  for (auto &Arg : FI.arguments())
+    Arg.info = classifyOpenCL(Arg.type, Context);
+
+  return true;
+}
+
+
 static llvm::Type *getVAListElementType(CodeGenFunction &CGF) {
   return CGF.ConvertTypeForMem(
       CGF.getContext().getBuiltinVaListType()->getPointeeType());
@@ -1964,6 +2000,10 @@
 }
 
 void X86_32ABIInfo::computeInfo(CGFunctionInfo &FI) const {
+  ASTContext &Context = getContext();
+  if (doOpenCLClassification(FI, Context))
+    return;
+
   CCState State(FI);
   if (IsMCUABI)
     State.FreeRegs = 3;
@@ -3950,6 +3990,9 @@
 }
 
 void X86_64ABIInfo::computeInfo(CGFunctionInfo &FI) const {
+  ASTContext &Context = getContext();
+  if (doOpenCLClassification(FI, Context))
+    return;
 
   const unsigned CallingConv = FI.getCallingConvention();
   // It is possible to force Win64 calling convention on any x86_64 target by
@@ -4407,6 +4450,10 @@
 }
 
 void WinX86_64ABIInfo::computeInfo(CGFunctionInfo &FI) const {
+  ASTContext &Context = getContext();
+  if (doOpenCLClassification(FI, Context))
+    return;
+
   const unsigned CC = FI.getCallingConvention();
   bool IsVectorCall = CC == llvm::CallingConv::X86_VectorCall;
   bool IsRegCall = CC == llvm::CallingConv::X86_RegCall;
Index: clang/include/clang/Driver/Options.td
===================================================================
--- clang/include/clang/Driver/Options.td
+++ clang/include/clang/Driver/Options.td
@@ -6260,6 +6260,9 @@
 def fno_bitfield_type_align : Flag<["-"], "fno-bitfield-type-align">,
   HelpText<"Ignore bit-field types when aligning structures">,
   MarshallingInfoFlag<LangOpts<"NoBitFieldTypeAlign">>;
+def fopencl_force_vector_abi : Flag<["-"], "fopencl-force-vector-abi">,
+  HelpText<"Disable vector to scalar coercion for OpenCL">,
+  MarshallingInfoFlag<LangOpts<"OpenCLForceVectorABI">>;
 def ffake_address_space_map : Flag<["-"], "ffake-address-space-map">,
   HelpText<"Use a fake address space map; OpenCL testing purposes only">,
   MarshallingInfoFlag<LangOpts<"FakeAddressSpaceMap">>;
Index: clang/include/clang/Basic/LangOptions.def
===================================================================
--- clang/include/clang/Basic/LangOptions.def
+++ clang/include/clang/Basic/LangOptions.def
@@ -331,6 +331,7 @@
         "Disable recognition of objc_direct methods")
 LANGOPT(CFProtectionBranch , 1, 0, "Control-Flow Branch Protection enabled")
 LANGOPT(FakeAddressSpaceMap , 1, 0, "OpenCL fake address space map")
+LANGOPT(OpenCLForceVectorABI, 1, 0, "OpenCL vector to scalar coercion disabling")
 ENUM_LANGOPT(AddressSpaceMapMangling , AddrSpaceMapMangling, 2, ASMM_Target, "OpenCL address space map mangling mode")
 LANGOPT(IncludeDefaultHeader, 1, 0, "Include default header file for OpenCL")
 LANGOPT(DeclareOpenCLBuiltins, 1, 0, "Declare OpenCL builtin functions")
_______________________________________________
cfe-commits mailing list
cfe-commits@lists.llvm.org
https://lists.llvm.org/cgi-bin/mailman/listinfo/cfe-commits

Reply via email to