Author: ctopper Date: Sat Oct 29 14:02:07 2016 New Revision: 285503 URL: http://llvm.org/viewvc/llvm-project?rev=285503&view=rev Log: [AVX-512] Remove masked 128/256-bit pmuludq/pmuldq builtins and replace them with unmasked builtins and a select.
Modified: cfe/trunk/include/clang/Basic/BuiltinsX86.def cfe/trunk/lib/Headers/avx512vlintrin.h cfe/trunk/test/CodeGen/avx512vl-builtins.c Modified: cfe/trunk/include/clang/Basic/BuiltinsX86.def URL: http://llvm.org/viewvc/llvm-project/cfe/trunk/include/clang/Basic/BuiltinsX86.def?rev=285503&r1=285502&r2=285503&view=diff ============================================================================== --- cfe/trunk/include/clang/Basic/BuiltinsX86.def (original) +++ cfe/trunk/include/clang/Basic/BuiltinsX86.def Sat Oct 29 14:02:07 2016 @@ -1076,11 +1076,6 @@ TARGET_BUILTIN(__builtin_ia32_ucmpd512_m TARGET_BUILTIN(__builtin_ia32_ucmpq512_mask, "UcV8LLiV8LLiIiUc", "", "avx512f") TARGET_BUILTIN(__builtin_ia32_ucmpw512_mask, "UiV32sV32sIiUi", "", "avx512bw") -TARGET_BUILTIN(__builtin_ia32_pmuldq256_mask, "V4LLiV8iV8iV4LLiUc", "", "avx512vl") -TARGET_BUILTIN(__builtin_ia32_pmuldq128_mask, "V2LLiV4iV4iV2LLiUc", "", "avx512vl") -TARGET_BUILTIN(__builtin_ia32_pmuludq256_mask, "V4LLiV8iV8iV4LLiUc", "", "avx512vl") -TARGET_BUILTIN(__builtin_ia32_pmuludq128_mask, "V2LLiV4iV4iV2LLiUc", "", "avx512vl") - TARGET_BUILTIN(__builtin_ia32_pabsb512_mask, "V64cV64cV64cULLi", "", "avx512bw") TARGET_BUILTIN(__builtin_ia32_pabsw512_mask, "V32sV32sV32sUi", "", "avx512bw") TARGET_BUILTIN(__builtin_ia32_packssdw512_mask, "V32sV16iV16iV32sUi", "", "avx512bw") Modified: cfe/trunk/lib/Headers/avx512vlintrin.h URL: http://llvm.org/viewvc/llvm-project/cfe/trunk/lib/Headers/avx512vlintrin.h?rev=285503&r1=285502&r2=285503&view=diff ============================================================================== --- cfe/trunk/lib/Headers/avx512vlintrin.h (original) +++ cfe/trunk/lib/Headers/avx512vlintrin.h Sat Oct 29 14:02:07 2016 @@ -744,79 +744,67 @@ _mm_maskz_sub_epi64(__mmask8 __U, __m128 } static __inline__ __m256i __DEFAULT_FN_ATTRS -_mm256_mask_mul_epi32 (__m256i __W, __mmask8 __M, __m256i __X, - __m256i __Y) +_mm256_mask_mul_epi32(__m256i __W, __mmask8 __M, __m256i __X, __m256i __Y) { - return (__m256i) __builtin_ia32_pmuldq256_mask ((__v8si) __X, - (__v8si) __Y, - (__v4di) __W, __M); + return (__m256i)__builtin_ia32_selectq_256((__mmask8)__M, + (__v4di)_mm256_mul_epi32(__X, __Y), + (__v4di)__W); } static __inline__ __m256i __DEFAULT_FN_ATTRS -_mm256_maskz_mul_epi32 (__mmask8 __M, __m256i __X, __m256i __Y) +_mm256_maskz_mul_epi32(__mmask8 __M, __m256i __X, __m256i __Y) { - return (__m256i) __builtin_ia32_pmuldq256_mask ((__v8si) __X, - (__v8si) __Y, - (__v4di) - _mm256_setzero_si256 (), - __M); + return (__m256i)__builtin_ia32_selectq_256((__mmask8)__M, + (__v4di)_mm256_mul_epi32(__X, __Y), + (__v4di)_mm256_setzero_si256()); } static __inline__ __m128i __DEFAULT_FN_ATTRS -_mm_mask_mul_epi32 (__m128i __W, __mmask8 __M, __m128i __X, - __m128i __Y) +_mm_mask_mul_epi32(__m128i __W, __mmask8 __M, __m128i __X, __m128i __Y) { - return (__m128i) __builtin_ia32_pmuldq128_mask ((__v4si) __X, - (__v4si) __Y, - (__v2di) __W, __M); + return (__m128i)__builtin_ia32_selectq_128((__mmask8)__M, + (__v2di)_mm_mul_epi32(__X, __Y), + (__v2di)__W); } static __inline__ __m128i __DEFAULT_FN_ATTRS -_mm_maskz_mul_epi32 (__mmask8 __M, __m128i __X, __m128i __Y) +_mm_maskz_mul_epi32(__mmask8 __M, __m128i __X, __m128i __Y) { - return (__m128i) __builtin_ia32_pmuldq128_mask ((__v4si) __X, - (__v4si) __Y, - (__v2di) - _mm_setzero_si128 (), - __M); + return (__m128i)__builtin_ia32_selectq_128((__mmask8)__M, + (__v2di)_mm_mul_epi32(__X, __Y), + (__v2di)_mm_setzero_si128()); } static __inline__ __m256i __DEFAULT_FN_ATTRS -_mm256_mask_mul_epu32 (__m256i __W, __mmask8 __M, __m256i __X, - __m256i __Y) +_mm256_mask_mul_epu32(__m256i __W, __mmask8 __M, __m256i __X, __m256i __Y) { - return (__m256i) __builtin_ia32_pmuludq256_mask ((__v8si) __X, - (__v8si) __Y, - (__v4di) __W, __M); + return (__m256i)__builtin_ia32_selectq_256((__mmask8)__M, + (__v4di)_mm256_mul_epu32(__X, __Y), + (__v4di)__W); } static __inline__ __m256i __DEFAULT_FN_ATTRS -_mm256_maskz_mul_epu32 (__mmask8 __M, __m256i __X, __m256i __Y) +_mm256_maskz_mul_epu32(__mmask8 __M, __m256i __X, __m256i __Y) { - return (__m256i) __builtin_ia32_pmuludq256_mask ((__v8si) __X, - (__v8si) __Y, - (__v4di) - _mm256_setzero_si256 (), - __M); + return (__m256i)__builtin_ia32_selectq_256((__mmask8)__M, + (__v4di)_mm256_mul_epu32(__X, __Y), + (__v4di)_mm256_setzero_si256()); } static __inline__ __m128i __DEFAULT_FN_ATTRS -_mm_mask_mul_epu32 (__m128i __W, __mmask8 __M, __m128i __X, - __m128i __Y) +_mm_mask_mul_epu32(__m128i __W, __mmask8 __M, __m128i __X, __m128i __Y) { - return (__m128i) __builtin_ia32_pmuludq128_mask ((__v4si) __X, - (__v4si) __Y, - (__v2di) __W, __M); + return (__m128i)__builtin_ia32_selectq_128((__mmask8)__M, + (__v2di)_mm_mul_epu32(__X, __Y), + (__v2di)__W); } static __inline__ __m128i __DEFAULT_FN_ATTRS -_mm_maskz_mul_epu32 (__mmask8 __M, __m128i __X, __m128i __Y) +_mm_maskz_mul_epu32(__mmask8 __M, __m128i __X, __m128i __Y) { - return (__m128i) __builtin_ia32_pmuludq128_mask ((__v4si) __X, - (__v4si) __Y, - (__v2di) - _mm_setzero_si128 (), - __M); + return (__m128i)__builtin_ia32_selectq_128((__mmask8)__M, + (__v2di)_mm_mul_epu32(__X, __Y), + (__v2di)_mm_setzero_si128()); } static __inline__ __m256i __DEFAULT_FN_ATTRS Modified: cfe/trunk/test/CodeGen/avx512vl-builtins.c URL: http://llvm.org/viewvc/llvm-project/cfe/trunk/test/CodeGen/avx512vl-builtins.c?rev=285503&r1=285502&r2=285503&view=diff ============================================================================== --- cfe/trunk/test/CodeGen/avx512vl-builtins.c (original) +++ cfe/trunk/test/CodeGen/avx512vl-builtins.c Sat Oct 29 14:02:07 2016 @@ -727,13 +727,15 @@ __m128i test_mm_maskz_sub_epi64 (__mmask __m256i test_mm256_mask_mul_epi32 (__m256i __W, __mmask8 __M, __m256i __X, __m256i __Y) { //CHECK-LABEL: @test_mm256_mask_mul_epi32 - //CHECK: @llvm.x86.avx512.mask.pmul.dq.256 + //CHECK: @llvm.x86.avx2.pmul.dq + //CHECK: select <4 x i1> %{{.*}}, <4 x i64> %{{.*}}, <4 x i64> %{{.*}} return _mm256_mask_mul_epi32(__W, __M, __X, __Y); } __m256i test_mm256_maskz_mul_epi32 (__mmask8 __M, __m256i __X, __m256i __Y) { //CHECK-LABEL: @test_mm256_maskz_mul_epi32 - //CHECK: @llvm.x86.avx512.mask.pmul.dq.256 + //CHECK: @llvm.x86.avx2.pmul.dq + //CHECK: select <4 x i1> %{{.*}}, <4 x i64> %{{.*}}, <4 x i64> %{{.*}} return _mm256_maskz_mul_epi32(__M, __X, __Y); } @@ -741,39 +743,45 @@ __m256i test_mm256_maskz_mul_epi32 (__mm __m128i test_mm_mask_mul_epi32 (__m128i __W, __mmask8 __M, __m128i __X, __m128i __Y) { //CHECK-LABEL: @test_mm_mask_mul_epi32 - //CHECK: @llvm.x86.avx512.mask.pmul.dq.128 + //CHECK: @llvm.x86.sse41.pmuldq + //CHECK: select <2 x i1> %{{.*}}, <2 x i64> %{{.*}}, <2 x i64> %{{.*}} return _mm_mask_mul_epi32(__W, __M, __X, __Y); } __m128i test_mm_maskz_mul_epi32 (__mmask8 __M, __m128i __X, __m128i __Y) { //CHECK-LABEL: @test_mm_maskz_mul_epi32 - //CHECK: @llvm.x86.avx512.mask.pmul.dq.128 + //CHECK: @llvm.x86.sse41.pmuldq + //CHECK: select <2 x i1> %{{.*}}, <2 x i64> %{{.*}}, <2 x i64> %{{.*}} return _mm_maskz_mul_epi32(__M, __X, __Y); } __m256i test_mm256_mask_mul_epu32 (__m256i __W, __mmask8 __M, __m256i __X, __m256i __Y) { //CHECK-LABEL: @test_mm256_mask_mul_epu32 - //CHECK: @llvm.x86.avx512.mask.pmulu.dq.256 + //CHECK: @llvm.x86.avx2.pmulu.dq + //CHECK: select <4 x i1> %{{.*}}, <4 x i64> %{{.*}}, <4 x i64> %{{.*}} return _mm256_mask_mul_epu32(__W, __M, __X, __Y); } __m256i test_mm256_maskz_mul_epu32 (__mmask8 __M, __m256i __X, __m256i __Y) { //CHECK-LABEL: @test_mm256_maskz_mul_epu32 - //CHECK: @llvm.x86.avx512.mask.pmulu.dq.256 + //CHECK: @llvm.x86.avx2.pmulu.dq + //CHECK: select <4 x i1> %{{.*}}, <4 x i64> %{{.*}}, <4 x i64> %{{.*}} return _mm256_maskz_mul_epu32(__M, __X, __Y); } __m128i test_mm_mask_mul_epu32 (__m128i __W, __mmask8 __M, __m128i __X, __m128i __Y) { //CHECK-LABEL: @test_mm_mask_mul_epu32 - //CHECK: @llvm.x86.avx512.mask.pmulu.dq.128 + //CHECK: @llvm.x86.sse2.pmulu.dq + //CHECK: select <2 x i1> %{{.*}}, <2 x i64> %{{.*}}, <2 x i64> %{{.*}} return _mm_mask_mul_epu32(__W, __M, __X, __Y); } __m128i test_mm_maskz_mul_epu32 (__mmask8 __M, __m128i __X, __m128i __Y) { //CHECK-LABEL: @test_mm_maskz_mul_epu32 - //CHECK: @llvm.x86.avx512.mask.pmulu.dq.128 + //CHECK: @llvm.x86.sse2.pmulu.dq + //CHECK: select <2 x i1> %{{.*}}, <2 x i64> %{{.*}}, <2 x i64> %{{.*}} return _mm_maskz_mul_epu32(__M, __X, __Y); } _______________________________________________ cfe-commits mailing list cfe-commits@lists.llvm.org http://lists.llvm.org/cgi-bin/mailman/listinfo/cfe-commits