Author: ctopper Date: Fri Jun 8 00:18:33 2018 New Revision: 334266 URL: http://llvm.org/viewvc/llvm-project?rev=334266&view=rev Log: [X86] Add builtins for shufps and shufpd to enable target feature and immediate range checking.
Modified: cfe/trunk/include/clang/Basic/BuiltinsX86.def cfe/trunk/lib/CodeGen/CGBuiltin.cpp cfe/trunk/lib/Headers/avxintrin.h cfe/trunk/lib/Headers/emmintrin.h cfe/trunk/lib/Headers/xmmintrin.h cfe/trunk/lib/Sema/SemaChecking.cpp Modified: cfe/trunk/include/clang/Basic/BuiltinsX86.def URL: http://llvm.org/viewvc/llvm-project/cfe/trunk/include/clang/Basic/BuiltinsX86.def?rev=334266&r1=334265&r2=334266&view=diff ============================================================================== --- cfe/trunk/include/clang/Basic/BuiltinsX86.def (original) +++ cfe/trunk/include/clang/Basic/BuiltinsX86.def Fri Jun 8 00:18:33 2018 @@ -316,6 +316,7 @@ TARGET_BUILTIN(__builtin_ia32_rsqrtps, " TARGET_BUILTIN(__builtin_ia32_rsqrtss, "V4fV4f", "nc", "sse") TARGET_BUILTIN(__builtin_ia32_sqrtps, "V4fV4f", "nc", "sse") TARGET_BUILTIN(__builtin_ia32_sqrtss, "V4fV4f", "nc", "sse") +TARGET_BUILTIN(__builtin_ia32_shufps, "V4fV4fV4fIi", "nc", "sse") TARGET_BUILTIN(__builtin_ia32_maskmovdqu, "vV16cV16cc*", "n", "sse2") TARGET_BUILTIN(__builtin_ia32_movmskpd, "iV2d", "nc", "sse2") @@ -327,6 +328,7 @@ TARGET_BUILTIN(__builtin_ia32_pshufhw, " TARGET_BUILTIN(__builtin_ia32_psadbw128, "V2LLiV16cV16c", "nc", "sse2") TARGET_BUILTIN(__builtin_ia32_sqrtpd, "V2dV2d", "nc", "sse2") TARGET_BUILTIN(__builtin_ia32_sqrtsd, "V2dV2d", "nc", "sse2") +TARGET_BUILTIN(__builtin_ia32_shufpd, "V2dV2dV2di", "nc", "sse2") TARGET_BUILTIN(__builtin_ia32_cvtpd2dq, "V2LLiV2d", "nc", "sse2") TARGET_BUILTIN(__builtin_ia32_cvtpd2ps, "V4fV2d", "nc", "sse2") TARGET_BUILTIN(__builtin_ia32_cvttpd2dq, "V4iV2d", "nc", "sse2") @@ -487,6 +489,8 @@ TARGET_BUILTIN(__builtin_ia32_blendpd256 TARGET_BUILTIN(__builtin_ia32_blendps256, "V8fV8fV8fIi", "nc", "avx") TARGET_BUILTIN(__builtin_ia32_blendvpd256, "V4dV4dV4dV4d", "nc", "avx") TARGET_BUILTIN(__builtin_ia32_blendvps256, "V8fV8fV8fV8f", "nc", "avx") +TARGET_BUILTIN(__builtin_ia32_shufpd256, "V4dV4dV4dIi", "nc", "avx") +TARGET_BUILTIN(__builtin_ia32_shufps256, "V8fV8fV8fIi", "nc", "avx") TARGET_BUILTIN(__builtin_ia32_dpps256, "V8fV8fV8fIc", "nc", "avx") TARGET_BUILTIN(__builtin_ia32_cmppd, "V2dV2dV2dIc", "nc", "avx") TARGET_BUILTIN(__builtin_ia32_cmppd256, "V4dV4dV4dIc", "nc", "avx") @@ -1536,6 +1540,8 @@ TARGET_BUILTIN(__builtin_ia32_shuf_f32x4 TARGET_BUILTIN(__builtin_ia32_shuf_f64x2, "V8dV8dV8dIi", "nc", "avx512f") TARGET_BUILTIN(__builtin_ia32_shuf_i32x4, "V16iV16iV16iIi", "nc", "avx512f") TARGET_BUILTIN(__builtin_ia32_shuf_i64x2, "V8LLiV8LLiV8LLiIi", "nc", "avx512f") +TARGET_BUILTIN(__builtin_ia32_shufpd512, "V8dV8dV8dIi", "nc", "avx512f") +TARGET_BUILTIN(__builtin_ia32_shufps512, "V16fV16fV16fIi", "nc", "avx512f") TARGET_BUILTIN(__builtin_ia32_shuf_f32x4_256, "V8fV8fV8fIi", "nc", "avx512vl") TARGET_BUILTIN(__builtin_ia32_shuf_f64x2_256, "V4dV4dV4dIi", "nc", "avx512vl") TARGET_BUILTIN(__builtin_ia32_shuf_i32x4_256, "V8iV8iV8iIi", "nc", "avx512vl") Modified: cfe/trunk/lib/CodeGen/CGBuiltin.cpp URL: http://llvm.org/viewvc/llvm-project/cfe/trunk/lib/CodeGen/CGBuiltin.cpp?rev=334266&r1=334265&r2=334266&view=diff ============================================================================== --- cfe/trunk/lib/CodeGen/CGBuiltin.cpp (original) +++ cfe/trunk/lib/CodeGen/CGBuiltin.cpp Fri Jun 8 00:18:33 2018 @@ -9403,6 +9403,36 @@ Value *CodeGenFunction::EmitX86BuiltinEx makeArrayRef(Indices, NumElts), "permil"); } + case X86::BI__builtin_ia32_shufpd: + case X86::BI__builtin_ia32_shufpd256: + case X86::BI__builtin_ia32_shufpd512: + case X86::BI__builtin_ia32_shufps: + case X86::BI__builtin_ia32_shufps256: + case X86::BI__builtin_ia32_shufps512: { + uint32_t Imm = cast<llvm::ConstantInt>(Ops[2])->getZExtValue(); + llvm::Type *Ty = Ops[0]->getType(); + unsigned NumElts = Ty->getVectorNumElements(); + unsigned NumLanes = Ty->getPrimitiveSizeInBits() / 128; + unsigned NumLaneElts = NumElts / NumLanes; + + // Splat the 8-bits of immediate 4 times to help the loop wrap around. + Imm = (Imm & 0xff) * 0x01010101; + + uint32_t Indices[16]; + for (unsigned l = 0; l != NumElts; l += NumLaneElts) { + for (unsigned i = 0; i != NumLaneElts; ++i) { + unsigned Index = Imm % NumLaneElts; + Imm /= NumLaneElts; + if (i >= (NumLaneElts / 2)) + Index += NumElts; + Indices[l + i] = l + Index; + } + } + + return Builder.CreateShuffleVector(Ops[0], Ops[1], + makeArrayRef(Indices, NumElts), + "shufp"); + } case X86::BI__builtin_ia32_palignr128: case X86::BI__builtin_ia32_palignr256: case X86::BI__builtin_ia32_palignr512: { Modified: cfe/trunk/lib/Headers/avxintrin.h URL: http://llvm.org/viewvc/llvm-project/cfe/trunk/lib/Headers/avxintrin.h?rev=334266&r1=334265&r2=334266&view=diff ============================================================================== --- cfe/trunk/lib/Headers/avxintrin.h (original) +++ cfe/trunk/lib/Headers/avxintrin.h Fri Jun 8 00:18:33 2018 @@ -1516,16 +1516,8 @@ _mm256_blendv_ps(__m256 __a, __m256 __b, /// 11: Bits [127:96] and [255:224] are copied from the selected operand. /// \returns A 256-bit vector of [8 x float] containing the shuffled values. #define _mm256_shuffle_ps(a, b, mask) \ - (__m256)__builtin_shufflevector((__v8sf)(__m256)(a), \ - (__v8sf)(__m256)(b), \ - 0 + (((mask) >> 0) & 0x3), \ - 0 + (((mask) >> 2) & 0x3), \ - 8 + (((mask) >> 4) & 0x3), \ - 8 + (((mask) >> 6) & 0x3), \ - 4 + (((mask) >> 0) & 0x3), \ - 4 + (((mask) >> 2) & 0x3), \ - 12 + (((mask) >> 4) & 0x3), \ - 12 + (((mask) >> 6) & 0x3)) + (__m256)__builtin_ia32_shufps256((__v8sf)(__m256)(a), \ + (__v8sf)(__m256)(b), (int)(mask)) /// Selects four double-precision values from the 256-bit operands of /// [4 x double], as specified by the immediate value operand. @@ -1570,12 +1562,8 @@ _mm256_blendv_ps(__m256 __a, __m256 __b, /// destination. /// \returns A 256-bit vector of [4 x double] containing the shuffled values. #define _mm256_shuffle_pd(a, b, mask) \ - (__m256d)__builtin_shufflevector((__v4df)(__m256d)(a), \ - (__v4df)(__m256d)(b), \ - 0 + (((mask) >> 0) & 0x1), \ - 4 + (((mask) >> 1) & 0x1), \ - 2 + (((mask) >> 2) & 0x1), \ - 6 + (((mask) >> 3) & 0x1)) + (__m256d)__builtin_ia32_shufpd256((__v4df)(__m256d)(a), \ + (__v4df)(__m256d)(b), (int)(mask)) /* Compare */ #define _CMP_EQ_OQ 0x00 /* Equal (ordered, non-signaling) */ Modified: cfe/trunk/lib/Headers/emmintrin.h URL: http://llvm.org/viewvc/llvm-project/cfe/trunk/lib/Headers/emmintrin.h?rev=334266&r1=334265&r2=334266&view=diff ============================================================================== --- cfe/trunk/lib/Headers/emmintrin.h (original) +++ cfe/trunk/lib/Headers/emmintrin.h Fri Jun 8 00:18:33 2018 @@ -4757,9 +4757,8 @@ _mm_movemask_pd(__m128d __a) /// Bit[1] = 1: upper element of \a b copied to upper element of result. \n /// \returns A 128-bit vector of [2 x double] containing the shuffled values. #define _mm_shuffle_pd(a, b, i) \ - (__m128d)__builtin_shufflevector((__v2df)(__m128d)(a), (__v2df)(__m128d)(b), \ - 0 + (((i) >> 0) & 0x1), \ - 2 + (((i) >> 1) & 0x1)) + (__m128d)__builtin_ia32_shufpd((__v2df)(__m128d)(a), (__v2df)(__m128d)(b), \ + (int)(i)) /// Casts a 128-bit floating-point vector of [2 x double] into a 128-bit /// floating-point vector of [4 x float]. Modified: cfe/trunk/lib/Headers/xmmintrin.h URL: http://llvm.org/viewvc/llvm-project/cfe/trunk/lib/Headers/xmmintrin.h?rev=334266&r1=334265&r2=334266&view=diff ============================================================================== --- cfe/trunk/lib/Headers/xmmintrin.h (original) +++ cfe/trunk/lib/Headers/xmmintrin.h Fri Jun 8 00:18:33 2018 @@ -2605,11 +2605,8 @@ void _mm_setcsr(unsigned int __i); /// 11: Bits [127:96] copied from the specified operand. /// \returns A 128-bit vector of [4 x float] containing the shuffled values. #define _mm_shuffle_ps(a, b, mask) \ - (__m128)__builtin_shufflevector((__v4sf)(__m128)(a), (__v4sf)(__m128)(b), \ - 0 + (((mask) >> 0) & 0x3), \ - 0 + (((mask) >> 2) & 0x3), \ - 4 + (((mask) >> 4) & 0x3), \ - 4 + (((mask) >> 6) & 0x3)) + (__m128)__builtin_ia32_shufps((__v4sf)(__m128)(a), (__v4sf)(__m128)(b), \ + (int)(mask)) /// Unpacks the high-order (index 2,3) values from two 128-bit vectors of /// [4 x float] and interleaves them into a 128-bit vector of [4 x float]. Modified: cfe/trunk/lib/Sema/SemaChecking.cpp URL: http://llvm.org/viewvc/llvm-project/cfe/trunk/lib/Sema/SemaChecking.cpp?rev=334266&r1=334265&r2=334266&view=diff ============================================================================== --- cfe/trunk/lib/Sema/SemaChecking.cpp (original) +++ cfe/trunk/lib/Sema/SemaChecking.cpp Fri Jun 8 00:18:33 2018 @@ -2654,6 +2654,7 @@ bool Sema::CheckX86BuiltinFunctionCall(u break; case X86::BI__builtin_ia32_sha1rnds4: case X86::BI__builtin_ia32_blendpd: + case X86::BI__builtin_ia32_shufpd: case X86::BI__builtin_ia32_vec_set_v4hi: case X86::BI__builtin_ia32_vec_set_v4si: case X86::BI__builtin_ia32_vec_set_v4di: @@ -2721,6 +2722,7 @@ bool Sema::CheckX86BuiltinFunctionCall(u case X86::BI__builtin_ia32_pblendd128: case X86::BI__builtin_ia32_blendps: case X86::BI__builtin_ia32_blendpd256: + case X86::BI__builtin_ia32_shufpd256: case X86::BI__builtin_ia32_roundss: case X86::BI__builtin_ia32_roundsd: case X86::BI__builtin_ia32_rangepd128_mask: @@ -2824,6 +2826,10 @@ bool Sema::CheckX86BuiltinFunctionCall(u case X86::BI__builtin_ia32_shuf_f64x2: case X86::BI__builtin_ia32_shuf_i32x4: case X86::BI__builtin_ia32_shuf_i64x2: + case X86::BI__builtin_ia32_shufpd512: + case X86::BI__builtin_ia32_shufps: + case X86::BI__builtin_ia32_shufps256: + case X86::BI__builtin_ia32_shufps512: case X86::BI__builtin_ia32_dbpsadbw128_mask: case X86::BI__builtin_ia32_dbpsadbw256_mask: case X86::BI__builtin_ia32_dbpsadbw512_mask: _______________________________________________ cfe-commits mailing list cfe-commits@lists.llvm.org http://lists.llvm.org/cgi-bin/mailman/listinfo/cfe-commits