https://github.com/shiltian updated https://github.com/llvm/llvm-project/pull/104661
>From c422624756e9b40b3fee02c6b3bb49d3355a1bbe Mon Sep 17 00:00:00 2001 From: Shilei Tian <i...@tianshilei.me> Date: Fri, 13 Dec 2024 00:35:28 -0500 Subject: [PATCH] [Clang] Remove 3-element vector load and store special handling Clang uses a long-time special handling of the case where 3 element vector loads and stores are performed as 4 element, and then a shufflevector is used to extract the used elements. Odd sized vector codegen should now work reasonably well. This patch removes this special handling, as well as the compiler argument `-fpreserve-vec3-type`. --- clang/include/clang/Basic/CodeGenOptions.def | 3 - clang/include/clang/Basic/TargetInfo.h | 10 +++ clang/include/clang/Driver/Options.td | 4 - clang/lib/Basic/CMakeLists.txt | 1 + clang/lib/Basic/Targets/AMDGPU.h | 6 ++ clang/lib/CodeGen/CGExpr.cpp | 49 ++++++------ clang/test/CodeGen/arm64-abi-vector.c | 1 - .../test/CodeGen/builtins-elementwise-math.c | 2 +- clang/test/CodeGenOpenCL/amdgpu-alignment.cl | 28 +++---- clang/test/CodeGenOpenCL/preserve_vec3.cl | 77 ------------------- 10 files changed, 56 insertions(+), 125 deletions(-) delete mode 100644 clang/test/CodeGenOpenCL/preserve_vec3.cl diff --git a/clang/include/clang/Basic/CodeGenOptions.def b/clang/include/clang/Basic/CodeGenOptions.def index 0f4ed13d5f3d8c..1ab8c7fb4d3c33 100644 --- a/clang/include/clang/Basic/CodeGenOptions.def +++ b/clang/include/clang/Basic/CodeGenOptions.def @@ -413,9 +413,6 @@ CODEGENOPT(StrictReturn, 1, 1) /// Whether emit pseudo probes for sample pgo profile collection. CODEGENOPT(PseudoProbeForProfiling, 1, 0) -/// Whether 3-component vector type is preserved. -CODEGENOPT(PreserveVec3Type, 1, 0) - CODEGENOPT(NoPLT, 1, 0) /// Whether to emit all vtables diff --git a/clang/include/clang/Basic/TargetInfo.h b/clang/include/clang/Basic/TargetInfo.h index 52a1ac9781395c..e5e28907f09dec 100644 --- a/clang/include/clang/Basic/TargetInfo.h +++ b/clang/include/clang/Basic/TargetInfo.h @@ -1846,6 +1846,16 @@ class TargetInfo : public TransferrableTargetInfo, return std::make_pair(64, 64); } + /// Returns the optimal vector type elements based on the provided vector + /// type. For example, on some targets, a vector with 3 elements may be + /// treated as one with 4 elements to enhance performance. + virtual llvm::FixedVectorType * + getOptimalVectorType(llvm::FixedVectorType *T, const LangOptions &Opt) const { + if (!Opt.HLSL && T->getNumElements() == 3) + return llvm::FixedVectorType::get(T->getElementType(), 4); + return T; + } + protected: /// Copy type and layout related info. void copyAuxTarget(const TargetInfo *Aux); diff --git a/clang/include/clang/Driver/Options.td b/clang/include/clang/Driver/Options.td index 88862ae9edb29d..6b35d6ac1ca431 100644 --- a/clang/include/clang/Driver/Options.td +++ b/clang/include/clang/Driver/Options.td @@ -8190,10 +8190,6 @@ def fhlsl_strict_availability : Flag<["-"], "fhlsl-strict-availability">, Group<hlsl_Group>, MarshallingInfoFlag<LangOpts<"HLSLStrictAvailability">>; -def fpreserve_vec3_type : Flag<["-"], "fpreserve-vec3-type">, - HelpText<"Preserve 3-component vector type">, - MarshallingInfoFlag<CodeGenOpts<"PreserveVec3Type">>, - ImpliedByAnyOf<[hlsl.KeyPath]>; def fwchar_type_EQ : Joined<["-"], "fwchar-type=">, HelpText<"Select underlying type for wchar_t">, Values<"char,short,int">, diff --git a/clang/lib/Basic/CMakeLists.txt b/clang/lib/Basic/CMakeLists.txt index e11e1ac4a6fa63..77fa6055e109eb 100644 --- a/clang/lib/Basic/CMakeLists.txt +++ b/clang/lib/Basic/CMakeLists.txt @@ -1,4 +1,5 @@ set(LLVM_LINK_COMPONENTS + Core Support TargetParser FrontendOpenMP diff --git a/clang/lib/Basic/Targets/AMDGPU.h b/clang/lib/Basic/Targets/AMDGPU.h index 8068184a0d4117..2e8f0a78c49755 100644 --- a/clang/lib/Basic/Targets/AMDGPU.h +++ b/clang/lib/Basic/Targets/AMDGPU.h @@ -484,6 +484,12 @@ class LLVM_LIBRARY_VISIBILITY AMDGPUTargetInfo final : public TargetInfo { // architectures. return std::make_pair(128, 128); } + + llvm::FixedVectorType * + getOptimalVectorType(llvm::FixedVectorType *T, + const LangOptions &) const override { + return T; + } }; } // namespace targets diff --git a/clang/lib/CodeGen/CGExpr.cpp b/clang/lib/CodeGen/CGExpr.cpp index 79955f55714164..2d32c681eac46d 100644 --- a/clang/lib/CodeGen/CGExpr.cpp +++ b/clang/lib/CodeGen/CGExpr.cpp @@ -2003,20 +2003,19 @@ llvm::Value *CodeGenFunction::EmitLoadOfScalar(Address Addr, bool Volatile, return EmitFromMemory(V, Ty); } - // Handle vectors of size 3 like size 4 for better performance. - const llvm::Type *EltTy = Addr.getElementType(); - const auto *VTy = cast<llvm::FixedVectorType>(EltTy); - - if (!CGM.getCodeGenOpts().PreserveVec3Type && VTy->getNumElements() == 3) { - - llvm::VectorType *vec4Ty = - llvm::FixedVectorType::get(VTy->getElementType(), 4); - Address Cast = Addr.withElementType(vec4Ty); - // Now load value. - llvm::Value *V = Builder.CreateLoad(Cast, Volatile, "loadVec4"); - - // Shuffle vector to get vec3. - V = Builder.CreateShuffleVector(V, ArrayRef<int>{0, 1, 2}, "extractVec"); + // Handles vectors of sizes that are likely to be expanded to a larger size + // to optimize performance. + auto *VTy = cast<llvm::FixedVectorType>(Addr.getElementType()); + auto *NewVecTy = getTarget().getOptimalVectorType(VTy, getLangOpts()); + + if (VTy != NewVecTy) { + Address Cast = Addr.withElementType(NewVecTy); + llvm::Value *V = Builder.CreateLoad(Cast, Volatile, "loadVecN"); + unsigned OldNumElements = VTy->getNumElements(); + SmallVector<int, 4> Mask(OldNumElements); + for (unsigned I = 0; I < OldNumElements; ++I) + Mask[I] = I; + V = Builder.CreateShuffleVector(V, Mask, "extractVec"); return EmitFromMemory(V, Ty); } } @@ -2146,21 +2145,21 @@ void CodeGenFunction::EmitStoreOfScalar(llvm::Value *Value, Address Addr, Addr = Addr.withPointer(Builder.CreateThreadLocalAddress(GV), NotKnownNonNull); + // Handles vectors of sizes that are likely to be expanded to a larger size + // to optimize performance. llvm::Type *SrcTy = Value->getType(); if (const auto *ClangVecTy = Ty->getAs<VectorType>()) { - auto *VecTy = dyn_cast<llvm::FixedVectorType>(SrcTy); - if (!CGM.getCodeGenOpts().PreserveVec3Type) { - // Handle vec3 special. - if (VecTy && !ClangVecTy->isExtVectorBoolType() && - cast<llvm::FixedVectorType>(VecTy)->getNumElements() == 3) { - // Our source is a vec3, do a shuffle vector to make it a vec4. - Value = Builder.CreateShuffleVector(Value, ArrayRef<int>{0, 1, 2, -1}, - "extractVec"); - SrcTy = llvm::FixedVectorType::get(VecTy->getElementType(), 4); + if (auto *VecTy = dyn_cast<llvm::FixedVectorType>(SrcTy)) { + auto *NewVecTy = getTarget().getOptimalVectorType(VecTy, getLangOpts()); + if (!ClangVecTy->isExtVectorBoolType() && VecTy != NewVecTy) { + SmallVector<int, 4> Mask(NewVecTy->getNumElements(), -1); + for (unsigned I = 0; I < VecTy->getNumElements(); ++I) + Mask[I] = I; + Value = Builder.CreateShuffleVector(Value, Mask, "extractVec"); + SrcTy = NewVecTy; } - if (Addr.getElementType() != SrcTy) { + if (Addr.getElementType() != SrcTy) Addr = Addr.withElementType(SrcTy); - } } } diff --git a/clang/test/CodeGen/arm64-abi-vector.c b/clang/test/CodeGen/arm64-abi-vector.c index 81e42315c883bd..be918bead916e5 100644 --- a/clang/test/CodeGen/arm64-abi-vector.c +++ b/clang/test/CodeGen/arm64-abi-vector.c @@ -416,7 +416,6 @@ double fixed_5i(__int5 *in) { __attribute__((noinline)) double args_vec_3d(int fixed, __double3 c3) { // CHECK: args_vec_3d // CHECK: [[LOAD:%.*]] = load <4 x double>, ptr {{%.*}} - // CHECK: shufflevector <4 x double> [[LOAD]], <4 x double> poison, <3 x i32> <i32 0, i32 1, i32 2> double sum = fixed; sum = sum + c3.x + c3.y; return sum; diff --git a/clang/test/CodeGen/builtins-elementwise-math.c b/clang/test/CodeGen/builtins-elementwise-math.c index 7f6b5f26eb9307..27f93508794cec 100644 --- a/clang/test/CodeGen/builtins-elementwise-math.c +++ b/clang/test/CodeGen/builtins-elementwise-math.c @@ -419,7 +419,7 @@ void test_builtin_elementwise_min(float f1, float f2, double d1, double d2, void test_builtin_elementwise_bitreverse(si8 vi1, si8 vi2, long long int i1, long long int i2, short si, _BitInt(31) bi1, _BitInt(31) bi2) { - + // CHECK: [[I1:%.+]] = load i64, ptr %i1.addr, align 8 // CHECK-NEXT: call i64 @llvm.bitreverse.i64(i64 [[I1]]) diff --git a/clang/test/CodeGenOpenCL/amdgpu-alignment.cl b/clang/test/CodeGenOpenCL/amdgpu-alignment.cl index 8f57713fe1f041..52a5d01588875f 100644 --- a/clang/test/CodeGenOpenCL/amdgpu-alignment.cl +++ b/clang/test/CodeGenOpenCL/amdgpu-alignment.cl @@ -94,43 +94,43 @@ typedef double __attribute__((ext_vector_type(16))) double16; // CHECK-LABEL: @local_memory_alignment_global( // CHECK: store volatile i8 0, ptr addrspace(3) @local_memory_alignment_global.lds_i8, align 1 // CHECK: store volatile <2 x i8> zeroinitializer, ptr addrspace(3) @local_memory_alignment_global.lds_v2i8, align 2 -// CHECK: store volatile <4 x i8> <i8 0, i8 0, i8 0, i8 undef>, ptr addrspace(3) @local_memory_alignment_global.lds_v3i8, align 4 +// CHECK: store volatile <3 x i8> zeroinitializer, ptr addrspace(3) @local_memory_alignment_global.lds_v3i8, align 4 // CHECK: store volatile <4 x i8> zeroinitializer, ptr addrspace(3) @local_memory_alignment_global.lds_v4i8, align 4 // CHECK: store volatile <8 x i8> zeroinitializer, ptr addrspace(3) @local_memory_alignment_global.lds_v8i8, align 8 // CHECK: store volatile <16 x i8> zeroinitializer, ptr addrspace(3) @local_memory_alignment_global.lds_v16i8, align 16 // CHECK: store volatile i16 0, ptr addrspace(3) @local_memory_alignment_global.lds_i16, align 2 // CHECK: store volatile <2 x i16> zeroinitializer, ptr addrspace(3) @local_memory_alignment_global.lds_v2i16, align 4 -// CHECK: store volatile <4 x i16> <i16 0, i16 0, i16 0, i16 undef>, ptr addrspace(3) @local_memory_alignment_global.lds_v3i16, align 8 +// CHECK: store volatile <3 x i16> zeroinitializer, ptr addrspace(3) @local_memory_alignment_global.lds_v3i16, align 8 // CHECK: store volatile <4 x i16> zeroinitializer, ptr addrspace(3) @local_memory_alignment_global.lds_v4i16, align 8 // CHECK: store volatile <8 x i16> zeroinitializer, ptr addrspace(3) @local_memory_alignment_global.lds_v8i16, align 16 // CHECK: store volatile <16 x i16> zeroinitializer, ptr addrspace(3) @local_memory_alignment_global.lds_v16i16, align 32 // CHECK: store volatile i32 0, ptr addrspace(3) @local_memory_alignment_global.lds_i32, align 4 // CHECK: store volatile <2 x i32> zeroinitializer, ptr addrspace(3) @local_memory_alignment_global.lds_v2i32, align 8 -// CHECK: store volatile <4 x i32> <i32 0, i32 0, i32 0, i32 undef>, ptr addrspace(3) @local_memory_alignment_global.lds_v3i32, align 16 +// CHECK: store volatile <3 x i32> zeroinitializer, ptr addrspace(3) @local_memory_alignment_global.lds_v3i32, align 16 // CHECK: store volatile <4 x i32> zeroinitializer, ptr addrspace(3) @local_memory_alignment_global.lds_v4i32, align 16 // CHECK: store volatile <8 x i32> zeroinitializer, ptr addrspace(3) @local_memory_alignment_global.lds_v8i32, align 32 // CHECK: store volatile <16 x i32> zeroinitializer, ptr addrspace(3) @local_memory_alignment_global.lds_v16i32, align 64 // CHECK: store volatile i64 0, ptr addrspace(3) @local_memory_alignment_global.lds_i64, align 8 // CHECK: store volatile <2 x i64> zeroinitializer, ptr addrspace(3) @local_memory_alignment_global.lds_v2i64, align 16 -// CHECK: store volatile <4 x i64> <i64 0, i64 0, i64 0, i64 undef>, ptr addrspace(3) @local_memory_alignment_global.lds_v3i64, align 32 +// CHECK: store volatile <3 x i64> zeroinitializer, ptr addrspace(3) @local_memory_alignment_global.lds_v3i64, align 32 // CHECK: store volatile <4 x i64> zeroinitializer, ptr addrspace(3) @local_memory_alignment_global.lds_v4i64, align 32 // CHECK: store volatile <8 x i64> zeroinitializer, ptr addrspace(3) @local_memory_alignment_global.lds_v8i64, align 64 // CHECK: store volatile <16 x i64> zeroinitializer, ptr addrspace(3) @local_memory_alignment_global.lds_v16i64, align 128 // CHECK: store volatile half 0xH0000, ptr addrspace(3) @local_memory_alignment_global.lds_f16, align 2 // CHECK: store volatile <2 x half> zeroinitializer, ptr addrspace(3) @local_memory_alignment_global.lds_v2f16, align 4 -// CHECK: store volatile <4 x half> <half 0xH0000, half 0xH0000, half 0xH0000, half undef>, ptr addrspace(3) @local_memory_alignment_global.lds_v3f16, align 8 +// CHECK: store volatile <3 x half> zeroinitializer, ptr addrspace(3) @local_memory_alignment_global.lds_v3f16, align 8 // CHECK: store volatile <4 x half> zeroinitializer, ptr addrspace(3) @local_memory_alignment_global.lds_v4f16, align 8 // CHECK: store volatile <8 x half> zeroinitializer, ptr addrspace(3) @local_memory_alignment_global.lds_v8f16, align 16 // CHECK: store volatile <16 x half> zeroinitializer, ptr addrspace(3) @local_memory_alignment_global.lds_v16f16, align 32 // CHECK: store volatile float 0.000000e+00, ptr addrspace(3) @local_memory_alignment_global.lds_f32, align 4 // CHECK: store volatile <2 x float> zeroinitializer, ptr addrspace(3) @local_memory_alignment_global.lds_v2f32, align 8 -// CHECK: store volatile <4 x float> <float 0.000000e+00, float 0.000000e+00, float 0.000000e+00, float undef>, ptr addrspace(3) @local_memory_alignment_global.lds_v3f32, align 16 +// CHECK: store volatile <3 x float> zeroinitializer, ptr addrspace(3) @local_memory_alignment_global.lds_v3f32, align 16 // CHECK: store volatile <4 x float> zeroinitializer, ptr addrspace(3) @local_memory_alignment_global.lds_v4f32, align 16 // CHECK: store volatile <8 x float> zeroinitializer, ptr addrspace(3) @local_memory_alignment_global.lds_v8f32, align 32 // CHECK: store volatile <16 x float> zeroinitializer, ptr addrspace(3) @local_memory_alignment_global.lds_v16f32, align 64 // CHECK: store volatile double 0.000000e+00, ptr addrspace(3) @local_memory_alignment_global.lds_f64, align 8 // CHECK: store volatile <2 x double> zeroinitializer, ptr addrspace(3) @local_memory_alignment_global.lds_v2f64, align 16 -// CHECK: store volatile <4 x double> <double 0.000000e+00, double 0.000000e+00, double 0.000000e+00, double undef>, ptr addrspace(3) @local_memory_alignment_global.lds_v3f64, align 32 +// CHECK: store volatile <3 x double> zeroinitializer, ptr addrspace(3) @local_memory_alignment_global.lds_v3f64, align 32 // CHECK: store volatile <4 x double> zeroinitializer, ptr addrspace(3) @local_memory_alignment_global.lds_v4f64, align 32 // CHECK: store volatile <8 x double> zeroinitializer, ptr addrspace(3) @local_memory_alignment_global.lds_v8f64, align 64 // CHECK: store volatile <16 x double> zeroinitializer, ptr addrspace(3) @local_memory_alignment_global.lds_v16f64, align 128 @@ -381,43 +381,43 @@ kernel void local_memory_alignment_arg( // CHECK: store volatile i8 0, ptr addrspace(5) %arraydecay, align 1 // CHECK: store volatile <2 x i8> zeroinitializer, ptr addrspace(5) %arraydecay{{[0-9]+}}, align 2 -// CHECK: store volatile <4 x i8> <i8 0, i8 0, i8 0, i8 undef>, ptr addrspace(5) %arraydecay{{[0-9]+}}, align 4 +// CHECK: store volatile <3 x i8> zeroinitializer, ptr addrspace(5) %arraydecay{{[0-9]+}}, align 4 // CHECK: store volatile <4 x i8> zeroinitializer, ptr addrspace(5) %arraydecay{{[0-9]+}}, align 4 // CHECK: store volatile <8 x i8> zeroinitializer, ptr addrspace(5) %arraydecay{{[0-9]+}}, align 8 // CHECK: store volatile <16 x i8> zeroinitializer, ptr addrspace(5) %arraydecay{{[0-9]+}}, align 16 // CHECK: store volatile i16 0, ptr addrspace(5) %arraydecay{{[0-9]+}}, align 2 // CHECK: store volatile <2 x i16> zeroinitializer, ptr addrspace(5) %arraydecay{{[0-9]+}}, align 4 -// CHECK: store volatile <4 x i16> <i16 0, i16 0, i16 0, i16 undef>, ptr addrspace(5) %arraydecay{{[0-9]+}}, align 8 +// CHECK: store volatile <3 x i16> zeroinitializer, ptr addrspace(5) %arraydecay{{[0-9]+}}, align 8 // CHECK: store volatile <4 x i16> zeroinitializer, ptr addrspace(5) %arraydecay{{[0-9]+}}, align 8 // CHECK: store volatile <8 x i16> zeroinitializer, ptr addrspace(5) %arraydecay{{[0-9]+}}, align 16 // CHECK: store volatile <16 x i16> zeroinitializer, ptr addrspace(5) %arraydecay{{[0-9]+}}, align 32 // CHECK: store volatile i32 0, ptr addrspace(5) %arraydecay{{[0-9]+}}, align 4 // CHECK: store volatile <2 x i32> zeroinitializer, ptr addrspace(5) %arraydecay{{[0-9]+}}, align 8 -// CHECK: store volatile <4 x i32> <i32 0, i32 0, i32 0, i32 undef>, ptr addrspace(5) %arraydecay{{[0-9]+}}, align 16 +// CHECK: store volatile <3 x i32> zeroinitializer, ptr addrspace(5) %arraydecay{{[0-9]+}}, align 16 // CHECK: store volatile <4 x i32> zeroinitializer, ptr addrspace(5) %arraydecay{{[0-9]+}}, align 16 // CHECK: store volatile <8 x i32> zeroinitializer, ptr addrspace(5) %arraydecay{{[0-9]+}}, align 32 // CHECK: store volatile <16 x i32> zeroinitializer, ptr addrspace(5) %arraydecay{{[0-9]+}}, align 64 // CHECK: store volatile i64 0, ptr addrspace(5) %arraydecay{{[0-9]+}}, align 8 // CHECK: store volatile <2 x i64> zeroinitializer, ptr addrspace(5) %arraydecay{{[0-9]+}}, align 16 -// CHECK: store volatile <4 x i64> <i64 0, i64 0, i64 0, i64 undef>, ptr addrspace(5) %arraydecay{{[0-9]+}}, align 32 +// CHECK: store volatile <3 x i64> zeroinitializer, ptr addrspace(5) %arraydecay{{[0-9]+}}, align 32 // CHECK: store volatile <4 x i64> zeroinitializer, ptr addrspace(5) %arraydecay{{[0-9]+}}, align 32 // CHECK: store volatile <8 x i64> zeroinitializer, ptr addrspace(5) %arraydecay{{[0-9]+}}, align 64 // CHECK: store volatile <16 x i64> zeroinitializer, ptr addrspace(5) %arraydecay{{[0-9]+}}, align 128 // CHECK: store volatile half 0xH0000, ptr addrspace(5) %arraydecay{{[0-9]+}}, align 2 // CHECK: store volatile <2 x half> zeroinitializer, ptr addrspace(5) %arraydecay{{[0-9]+}}, align 4 -// CHECK: store volatile <4 x half> <half 0xH0000, half 0xH0000, half 0xH0000, half undef>, ptr addrspace(5) %arraydecay{{[0-9]+}}, align 8 +// CHECK: store volatile <3 x half> zeroinitializer, ptr addrspace(5) %arraydecay{{[0-9]+}}, align 8 // CHECK: store volatile <4 x half> zeroinitializer, ptr addrspace(5) %arraydecay{{[0-9]+}}, align 8 // CHECK: store volatile <8 x half> zeroinitializer, ptr addrspace(5) %arraydecay{{[0-9]+}}, align 16 // CHECK: store volatile <16 x half> zeroinitializer, ptr addrspace(5) %arraydecay{{[0-9]+}}, align 32 // CHECK: store volatile float 0.000000e+00, ptr addrspace(5) %arraydecay{{[0-9]+}}, align 4 // CHECK: store volatile <2 x float> zeroinitializer, ptr addrspace(5) %arraydecay{{[0-9]+}}, align 8 -// CHECK: store volatile <4 x float> <float 0.000000e+00, float 0.000000e+00, float 0.000000e+00, float undef>, ptr addrspace(5) %arraydecay{{[0-9]+}}, align 16 +// CHECK: store volatile <3 x float> zeroinitializer, ptr addrspace(5) %arraydecay{{[0-9]+}}, align 16 // CHECK: store volatile <4 x float> zeroinitializer, ptr addrspace(5) %arraydecay{{[0-9]+}}, align 16 // CHECK: store volatile <8 x float> zeroinitializer, ptr addrspace(5) %arraydecay{{[0-9]+}}, align 32 // CHECK: store volatile <16 x float> zeroinitializer, ptr addrspace(5) %arraydecay{{[0-9]+}}, align 64 // CHECK: store volatile double 0.000000e+00, ptr addrspace(5) %arraydecay{{[0-9]+}}, align 8 // CHECK: store volatile <2 x double> zeroinitializer, ptr addrspace(5) %arraydecay{{[0-9]+}}, align 16 -// CHECK: store volatile <4 x double> <double 0.000000e+00, double 0.000000e+00, double 0.000000e+00, double undef>, ptr addrspace(5) %arraydecay{{[0-9]+}}, align 32 +// CHECK: store volatile <3 x double> zeroinitializer, ptr addrspace(5) %arraydecay{{[0-9]+}}, align 32 // CHECK: store volatile <4 x double> zeroinitializer, ptr addrspace(5) %arraydecay{{[0-9]+}}, align 32 // CHECK: store volatile <8 x double> zeroinitializer, ptr addrspace(5) %arraydecay{{[0-9]+}}, align 64 // CHECK: store volatile <16 x double> zeroinitializer, ptr addrspace(5) %arraydecay{{[0-9]+}}, align 128 diff --git a/clang/test/CodeGenOpenCL/preserve_vec3.cl b/clang/test/CodeGenOpenCL/preserve_vec3.cl deleted file mode 100644 index 19f0cdff60a9d6..00000000000000 --- a/clang/test/CodeGenOpenCL/preserve_vec3.cl +++ /dev/null @@ -1,77 +0,0 @@ -// RUN: %clang_cc1 %s -emit-llvm -o - -triple spir-unknown-unknown -fpreserve-vec3-type | FileCheck %s - -typedef char char3 __attribute__((ext_vector_type(3))); -typedef char char8 __attribute__((ext_vector_type(8))); -typedef short short3 __attribute__((ext_vector_type(3))); -typedef double double2 __attribute__((ext_vector_type(2))); -typedef float float3 __attribute__((ext_vector_type(3))); -typedef float float4 __attribute__((ext_vector_type(4))); - -void kernel foo(global float3 *a, global float3 *b) { - // CHECK-LABEL: spir_kernel void @foo - // CHECK: %[[LOAD_A:.*]] = load <3 x float>, ptr addrspace(1) %a - // CHECK: store <3 x float> %[[LOAD_A]], ptr addrspace(1) %b - *b = *a; -} - -void kernel float4_to_float3(global float3 *a, global float4 *b) { - // CHECK-LABEL: spir_kernel void @float4_to_float3 - // CHECK: %[[LOAD_A:.*]] = load <4 x float>, ptr addrspace(1) %b, align 16 - // CHECK: %[[ASTYPE:.*]] = shufflevector <4 x float> %[[LOAD_A]], <4 x float> poison, <3 x i32> <i32 0, i32 1, i32 2> - // CHECK: store <3 x float> %[[ASTYPE]], ptr addrspace(1) %a, align 16 - *a = __builtin_astype(*b, float3); -} - -void kernel float3_to_float4(global float3 *a, global float4 *b) { - // CHECK-LABEL: spir_kernel void @float3_to_float4 - // CHECK: %[[LOAD_A:.*]] = load <3 x float>, ptr addrspace(1) %a, align 16 - // CHECK: %[[ASTYPE:.*]] = shufflevector <3 x float> %[[LOAD_A]], <3 x float> poison, <4 x i32> <i32 0, i32 1, i32 2, i32 poison> - // CHECK: store <4 x float> %[[ASTYPE]], ptr addrspace(1) %b, align 16 - *b = __builtin_astype(*a, float4); -} - -void kernel float3_to_double2(global float3 *a, global double2 *b) { - // CHECK-LABEL: spir_kernel void @float3_to_double2 - // CHECK: %[[LOAD_A:.*]] = load <3 x float>, ptr addrspace(1) %a, align 16 - // CHECK: %[[ASTYPE:.*]] = shufflevector <3 x float> %[[LOAD_A]], <3 x float> poison, <4 x i32> <i32 0, i32 1, i32 2, i32 poison> - // CHECK: store <4 x float> %[[ASTYPE]], ptr addrspace(1) %b, align 16 - *b = __builtin_astype(*a, double2); -} - -void kernel char8_to_short3(global short3 *a, global char8 *b) { - // CHECK-LABEL: spir_kernel void @char8_to_short3 - // CHECK: %[[LOAD_B:.*]] = load <4 x i16>, ptr addrspace(1) %b - // CHECK: %[[ASTYPE:.*]] = shufflevector <4 x i16> %[[LOAD_B]], <4 x i16> poison, <3 x i32> <i32 0, i32 1, i32 2> - // CHECK: store <3 x i16> %[[ASTYPE]], ptr addrspace(1) %a, align 8 - *a = __builtin_astype(*b, short3); -} - -void from_char3(char3 a, global int *out) { - // CHECK-LABEL: void @from_char3 - // CHECK: %[[ASTYPE:.*]] = shufflevector <3 x i8> %a, <3 x i8> poison, <4 x i32> <i32 0, i32 1, i32 2, i32 poison> - // CHECK: store <4 x i8> %[[ASTYPE]], ptr addrspace(1) %out - *out = __builtin_astype(a, int); -} - -void from_short3(short3 a, global long *out) { - // CHECK-LABEL: void @from_short3 - // CHECK: %[[ASTYPE:.*]] = shufflevector <3 x i16> %a, <3 x i16> poison, <4 x i32> <i32 0, i32 1, i32 2, i32 poison> - // CHECK: store <4 x i16> %[[ASTYPE]], ptr addrspace(1) %out - *out = __builtin_astype(a, long); -} - -void scalar_to_char3(int a, global char3 *out) { - // CHECK-LABEL: void @scalar_to_char3 - // CHECK: %[[IN_BC:.*]] = bitcast i32 %a to <4 x i8> - // CHECK: %[[ASTYPE:.*]] = shufflevector <4 x i8> %[[IN_BC]], <4 x i8> poison, <3 x i32> <i32 0, i32 1, i32 2> - // CHECK: store <3 x i8> %[[ASTYPE]], ptr addrspace(1) %out - *out = __builtin_astype(a, char3); -} - -void scalar_to_short3(long a, global short3 *out) { - // CHECK-LABEL: void @scalar_to_short3 - // CHECK: %[[IN_BC:.*]] = bitcast i64 %a to <4 x i16> - // CHECK: %[[ASTYPE:.*]] = shufflevector <4 x i16> %[[IN_BC]], <4 x i16> poison, <3 x i32> <i32 0, i32 1, i32 2> - // CHECK: store <3 x i16> %[[ASTYPE]], ptr addrspace(1) %out - *out = __builtin_astype(a, short3); -} _______________________________________________ cfe-commits mailing list cfe-commits@lists.llvm.org https://lists.llvm.org/cgi-bin/mailman/listinfo/cfe-commits