On Fri, Apr 7, 2017 at 4:44 PM, Jakub Jelinek <ja...@redhat.com> wrote: > Hi! > > Apparently while we weren't closely watching, Intel has added into > ICC various new intrinsics and they have been added into Clang > last fall as well. > > Tested with > make -j272 -k check-gcc RUNTESTFLAGS='--target_board=unix\{-m32,-m64\} > i386.exp' > on KNL, will bootstrap/regtest on my Haswell-E next, ok for trunk > if that passes? > > It is not a regression, on the other side it really shouldn't affect any > code that is not using those intrinsics. > > 2017-04-07 Jakub Jelinek <ja...@redhat.com> > > PR target/80322 > PR target/80323 > PR target/80325 > PR target/80326 > * config/i386/avxintrin.h (_mm256_cvtsd_f64, _mm256_cvtss_f32): New > intrinsics. > * config/i386/avx512fintrin.h (_mm512_int2mask, _mm512_mask2int, > _mm512_abs_ps, _mm512_mask_abs_ps, _mm512_abs_pd, _mm512_mask_abs_pd, > _mm512_cvtsd_f64, _mm512_cvtss_f32): Likewise. > > * gcc.target/i386/avx512f-undefined-1.c: New test. > * gcc.target/i386/avx512f-cvtsd-1.c: New test. > * gcc.target/i386/avx-cvtsd-1.c: New test. > * gcc.target/i386/avx512f-cvtss-1.c: New test. > * gcc.target/i386/avx512f-abspd-1.c: New test. > * gcc.target/i386/avx-cvtss-1.c: New test. > * gcc.target/i386/avx512f-absps-1.c: New test. > * gcc.target/i386/avx512f-int2mask-1.c: New test. > * gcc.target/i386/avx512f-mask2int-1.c: New test.
LGTM. Thanks, Uros. > --- gcc/config/i386/avxintrin.h.jj 2017-01-01 12:45:42.000000000 +0100 > +++ gcc/config/i386/avxintrin.h 2017-04-06 12:13:42.250717878 +0200 > @@ -491,6 +491,20 @@ _mm256_cvttps_epi32 (__m256 __A) > return (__m256i)__builtin_ia32_cvttps2dq256 ((__v8sf) __A); > } > > +extern __inline double > +__attribute__ ((__gnu_inline__, __always_inline__, __artificial__)) > +_mm256_cvtsd_f64 (__m256d __A) > +{ > + return __A[0]; > +} > + > +extern __inline float > +__attribute__ ((__gnu_inline__, __always_inline__, __artificial__)) > +_mm256_cvtss_f32 (__m256 __A) > +{ > + return __A[0]; > +} > + > #ifdef __OPTIMIZE__ > extern __inline __m128d __attribute__((__gnu_inline__, __always_inline__, > __artificial__)) > _mm256_extractf128_pd (__m256d __X, const int __N) > --- gcc/config/i386/avx512fintrin.h.jj 2017-01-26 13:22:55.000000000 +0100 > +++ gcc/config/i386/avx512fintrin.h 2017-04-06 15:25:03.941949154 +0200 > @@ -60,6 +60,20 @@ typedef double __m512d_u __attribute__ ( > typedef unsigned char __mmask8; > typedef unsigned short __mmask16; > > +extern __inline __mmask16 > +__attribute__ ((__gnu_inline__, __always_inline__, __artificial__)) > +_mm512_int2mask (int __M) > +{ > + return (__mmask16) __M; > +} > + > +extern __inline int > +__attribute__ ((__gnu_inline__, __always_inline__, __artificial__)) > +_mm512_mask2int (__mmask16 __M) > +{ > + return (int) __M; > +} > + > extern __inline __m512i > __attribute__ ((__gnu_inline__, __always_inline__, __artificial__)) > _mm512_set_epi64 (long long __A, long long __B, long long __C, > @@ -125,6 +139,8 @@ _mm512_undefined_ps (void) > return __Y; > } > > +#define _mm512_undefined _mm512_undefined_ps > + > extern __inline __m512d > __attribute__ ((__gnu_inline__, __always_inline__, __artificial__)) > _mm512_undefined_pd (void) > @@ -7264,6 +7280,39 @@ _mm512_mask_testn_epi64_mask (__mmask8 _ > (__v8di) __B, __U); > } > > +extern __inline __m512 > +__attribute__ ((__gnu_inline__, __always_inline__, __artificial__)) > +_mm512_abs_ps (__m512 __A) > +{ > + return (__m512) _mm512_and_epi32 ((__m512i) __A, > + _mm512_set1_epi32 (0x7fffffff)); > +} > + > +extern __inline __m512 > +__attribute__ ((__gnu_inline__, __always_inline__, __artificial__)) > +_mm512_mask_abs_ps (__m512 __W, __mmask16 __U, __m512 __A) > +{ > + return (__m512) _mm512_mask_and_epi32 ((__m512i) __W, __U, (__m512i) __A, > + _mm512_set1_epi32 (0x7fffffff)); > +} > + > +extern __inline __m512d > +__attribute__ ((__gnu_inline__, __always_inline__, __artificial__)) > +_mm512_abs_pd (__m512 __A) > +{ > + return (__m512d) _mm512_and_epi64 ((__m512i) __A, > + _mm512_set1_epi64 > (0x7fffffffffffffffLL)); > +} > + > +extern __inline __m512d > +__attribute__ ((__gnu_inline__, __always_inline__, __artificial__)) > +_mm512_mask_abs_pd (__m512d __W, __mmask8 __U, __m512 __A) > +{ > + return (__m512d) > + _mm512_mask_and_epi64 ((__m512i) __W, __U, (__m512i) __A, > + _mm512_set1_epi64 (0x7fffffffffffffffLL)); > +} > + > extern __inline __m512i > __attribute__ ((__gnu_inline__, __always_inline__, __artificial__)) > _mm512_unpackhi_epi32 (__m512i __A, __m512i __B) > @@ -12011,6 +12060,20 @@ _mm512_maskz_cvtps_epu32 (__mmask16 __U, > > _MM_FROUND_CUR_DIRECTION); > } > > +extern __inline double > +__attribute__ ((__gnu_inline__, __always_inline__, __artificial__)) > +_mm512_cvtsd_f64 (__m512d __A) > +{ > + return __A[0]; > +} > + > +extern __inline float > +__attribute__ ((__gnu_inline__, __always_inline__, __artificial__)) > +_mm512_cvtss_f32 (__m512 __A) > +{ > + return __A[0]; > +} > + > #ifdef __x86_64__ > extern __inline __m128 > __attribute__ ((__gnu_inline__, __always_inline__, __artificial__)) > --- gcc/testsuite/gcc.target/i386/avx512f-undefined-1.c.jj 2017-04-07 > 12:25:13.066643742 +0200 > +++ gcc/testsuite/gcc.target/i386/avx512f-undefined-1.c 2017-04-07 > 15:25:06.239160492 +0200 > @@ -0,0 +1,24 @@ > +/* { dg-do run } */ > +/* { dg-options "-mavx512f -O2" } */ > +/* { dg-require-effective-target avx512f } */ > + > +#include "avx512f-check.h" > + > +static void > +avx512f_test (void) > +{ > + __m512 a = _mm512_undefined_ps (); > + __m512 b = _mm512_undefined (); > + __m512d c = _mm512_undefined_pd (); > + __m512i d = _mm512_undefined_epi32 (); > + __m512i e = _mm512_set1_epi32 (0); > + __m512i f = _mm512_and_epi32 ((__m512i) a, e); > + __m512i g = _mm512_and_epi32 ((__m512i) b, e); > + __m512i h = _mm512_and_epi32 ((__m512i) c, e); > + __m512i i = _mm512_and_epi32 (d, e); > + if (_mm512_cmpeq_epi32_mask (f, e) != 0xffff > + || _mm512_cmpeq_epi32_mask (g, e) != 0xffff > + || _mm512_cmpeq_epi32_mask (h, e) != 0xffff > + || _mm512_cmpeq_epi32_mask (i, e) != 0xffff) > + __builtin_abort (); > +} > --- gcc/testsuite/gcc.target/i386/avx512f-cvtsd-1.c.jj 2017-04-07 > 12:25:13.066643742 +0200 > +++ gcc/testsuite/gcc.target/i386/avx512f-cvtsd-1.c 2017-04-07 > 12:25:13.066643742 +0200 > @@ -0,0 +1,23 @@ > +/* { dg-do run } */ > +/* { dg-require-effective-target avx512f } */ > +/* { dg-options "-O2 -mavx512f" } */ > + > +#include "avx512f-check.h" > + > +__attribute__((noinline, noclone)) double > +foo (__m512d x) > +{ > + return _mm512_cvtsd_f64 (x); > +} > + > +static void > +avx512f_test (void) > +{ > + if (_mm512_cvtsd_f64 (_mm512_set_pd (5.5, 24.5, 23.0, 22.5, > + 2.0, 3.0, 4.0, 13.5)) != 13.5) > + __builtin_abort (); > + > + if (foo (_mm512_set_pd (5.25, 24.25, 23.75, 22.0, > + 2.0, 3.0, 4.0, 12.25)) != 12.25) > + __builtin_abort (); > +} > --- gcc/testsuite/gcc.target/i386/avx-cvtsd-1.c.jj 2017-04-07 > 12:25:13.066643742 +0200 > +++ gcc/testsuite/gcc.target/i386/avx-cvtsd-1.c 2017-04-07 12:25:13.066643742 > +0200 > @@ -0,0 +1,21 @@ > +/* { dg-do run } */ > +/* { dg-require-effective-target avx } */ > +/* { dg-options "-O2 -mavx" } */ > + > +#include "avx-check.h" > + > +__attribute__((noinline, noclone)) double > +foo (__m256d x) > +{ > + return _mm256_cvtsd_f64 (x); > +} > + > +static void > +avx_test (void) > +{ > + if (_mm256_cvtsd_f64 (_mm256_set_pd (13.5, 24.5, 23.0, 22.5)) != 22.5) > + __builtin_abort (); > + > + if (foo (_mm256_set_pd (24.25, 23.75, 22.0, 12.25)) != 12.25) > + __builtin_abort (); > +} > --- gcc/testsuite/gcc.target/i386/avx512f-cvtss-1.c.jj 2017-04-07 > 12:25:13.066643742 +0200 > +++ gcc/testsuite/gcc.target/i386/avx512f-cvtss-1.c 2017-04-07 > 12:25:13.066643742 +0200 > @@ -0,0 +1,26 @@ > +/* { dg-do run } */ > +/* { dg-require-effective-target avx512f } */ > +/* { dg-options "-O2 -mavx512f" } */ > + > +#include "avx512f-check.h" > + > +__attribute__((noinline, noclone)) double > +foo (__m512 x) > +{ > + return _mm512_cvtss_f32 (x); > +} > + > +static void > +avx512f_test (void) > +{ > + if (_mm512_cvtss_f32 (_mm512_set_ps (13.0f, 24.5f, 23.0f, 22.5f, > + 2.0f, 3.0f, 4.0f, 5.0f, > + 6.0f, 7.0f, 8.0f, 9.0f, > + 10.0f, 11.0f, 12.0f, 13.5f)) != 13.5f) > + __builtin_abort (); > + > + if (foo (_mm512_set_ps (13.25f, 24.25f, 23.75f, 22.0f, > + 2.0f, 3.0f, 4.0f, 5.0f, 6.0f, 7.0f, 8.0f, 9.0f, > + 10.0f, 11.0f, 12.0f, 12.25f)) != 12.25f) > + __builtin_abort (); > +} > --- gcc/testsuite/gcc.target/i386/avx512f-abspd-1.c.jj 2017-04-07 > 12:25:13.067643728 +0200 > +++ gcc/testsuite/gcc.target/i386/avx512f-abspd-1.c 2017-04-07 > 12:25:13.067643728 +0200 > @@ -0,0 +1,50 @@ > +/* { dg-do run } */ > +/* { dg-options "-O2 -mavx512f" } */ > +/* { dg-require-effective-target avx512f } */ > + > +#define AVX512F > + > +#include "avx512f-helper.h" > + > +#define SIZE (AVX512F_LEN / 32) > +#include "avx512f-mask-type.h" > + > +static void > +CALC (float *i1, float *r) > +{ > + int i; > + > + for (i = 0; i < SIZE; i++) > + if (i1[i] < 0) > + r[i] = -i1[i]; > + else > + r[i] = i1[i]; > +} > + > +void > +TEST (void) > +{ > + float ck[SIZE]; > + int i; > + UNION_TYPE (AVX512F_LEN, ) s, d, dm; > + MASK_TYPE mask = MASK_VALUE; > + > + for (i = 0; i < SIZE; i++) > + { > + s.a[i] = i * ((i & 1) ? 3.5f : -7.5f); > + d.a[i] = DEFAULT_VALUE; > + dm.a[i] = DEFAULT_VALUE; > + } > + > + CALC (s.a, ck); > + > + d.x = INTRINSIC (_abs_ps) (s.x); > + dm.x = INTRINSIC (_mask_abs_ps) (dm.x, mask, s.x); > + > + if (UNION_CHECK (AVX512F_LEN, ) (d, ck)) > + abort (); > + > + MASK_MERGE () (ck, mask, SIZE); > + if (UNION_CHECK (AVX512F_LEN, ) (dm, ck)) > + abort (); > +} > --- gcc/testsuite/gcc.target/i386/avx-cvtss-1.c.jj 2017-04-07 > 12:25:13.067643728 +0200 > +++ gcc/testsuite/gcc.target/i386/avx-cvtss-1.c 2017-04-07 12:25:13.067643728 > +0200 > @@ -0,0 +1,23 @@ > +/* { dg-do run } */ > +/* { dg-require-effective-target avx } */ > +/* { dg-options "-O2 -mavx" } */ > + > +#include "avx-check.h" > + > +__attribute__((noinline, noclone)) double > +foo (__m256 x) > +{ > + return _mm256_cvtss_f32 (x); > +} > + > +static void > +avx_test (void) > +{ > + if (_mm256_cvtss_f32 (_mm256_set_ps (5.5f, 24.5f, 23.0f, 22.5f, > + 2.0f, 3.0f, 4.0f, 13.5f)) != 13.5f) > + __builtin_abort (); > + > + if (foo (_mm256_set_ps (5.25f, 24.25f, 23.75f, 22.0f, > + 2.0f, 3.0f, 4.0f, 12.25f)) != 12.25f) > + __builtin_abort (); > +} > --- gcc/testsuite/gcc.target/i386/avx512f-absps-1.c.jj 2017-04-07 > 12:25:13.067643728 +0200 > +++ gcc/testsuite/gcc.target/i386/avx512f-absps-1.c 2017-04-07 > 12:25:13.067643728 +0200 > @@ -0,0 +1,50 @@ > +/* { dg-do run } */ > +/* { dg-options "-O2 -mavx512f" } */ > +/* { dg-require-effective-target avx512f } */ > + > +#define AVX512F > + > +#include "avx512f-helper.h" > + > +#define SIZE (AVX512F_LEN / 32) > +#include "avx512f-mask-type.h" > + > +static void > +CALC (float *i1, float *r) > +{ > + int i; > + > + for (i = 0; i < SIZE; i++) > + if (i1[i] < 0) > + r[i] = -i1[i]; > + else > + r[i] = i1[i]; > +} > + > +void > +TEST (void) > +{ > + float ck[SIZE]; > + int i; > + UNION_TYPE (AVX512F_LEN, ) s, d, dm; > + MASK_TYPE mask = MASK_VALUE; > + > + for (i = 0; i < SIZE; i++) > + { > + s.a[i] = i * ((i & 1) ? 3.5f : -7.5f); > + d.a[i] = DEFAULT_VALUE; > + dm.a[i] = DEFAULT_VALUE; > + } > + > + CALC (s.a, ck); > + > + d.x = INTRINSIC (_abs_ps) (s.x); > + dm.x = INTRINSIC (_mask_abs_ps) (dm.x, mask, s.x); > + > + if (UNION_CHECK (AVX512F_LEN, ) (d, ck)) > + abort (); > + > + MASK_MERGE () (ck, mask, SIZE); > + if (UNION_CHECK (AVX512F_LEN, ) (dm, ck)) > + abort (); > +} > --- gcc/testsuite/gcc.target/i386/avx512f-int2mask-1.c.jj 2017-04-07 > 12:25:13.067643728 +0200 > +++ gcc/testsuite/gcc.target/i386/avx512f-int2mask-1.c 2017-04-07 > 15:25:06.238160506 +0200 > @@ -0,0 +1,20 @@ > +/* { dg-do run } */ > +/* { dg-options "-mavx512f -O2" } */ > +/* { dg-require-effective-target avx512f } */ > + > +#include "avx512f-check.h" > + > +static void > +avx512f_test (void) > +{ > + __m512i a = _mm512_set_epi32 (1, 17, 2, 12, 4, 14, 6, 16, > + 8, 11, 10, 20, 12, 22, 14, 24); > + __m512i b = _mm512_set_epi32 (0, 1, 11, 3, 13, 5, 15, 7, > + 17, 9, 19, 11, 21, 13, 23, 16); > + __mmask16 c = _mm512_kmov (_mm512_int2mask (2 | 8)); > + __m512i d = _mm512_mask_mov_epi32 (a, c, b); > + __m512i e = _mm512_set_epi32 (1, 17, 2, 12, 4, 14, 6, 16, > + 8, 11, 10, 20, 21, 22, 23, 24); > + if (_mm512_mask2int (_mm512_cmpeq_epi32_mask (d, e)) != 0xffff) > + __builtin_abort (); > +} > --- gcc/testsuite/gcc.target/i386/avx512f-mask2int-1.c.jj 2017-04-07 > 12:25:13.068643715 +0200 > +++ gcc/testsuite/gcc.target/i386/avx512f-mask2int-1.c 2017-04-07 > 15:25:06.239160492 +0200 > @@ -0,0 +1,17 @@ > +/* { dg-do run } */ > +/* { dg-options "-mavx512f -O2" } */ > +/* { dg-require-effective-target avx512f } */ > + > +#include "avx512f-check.h" > + > +static void > +avx512f_test (void) > +{ > + __m512i a > + = _mm512_set_epi32 (1, 17, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, > 16); > + __m512i b > + = _mm512_set_epi32 (0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, > 16); > + __mmask16 c = _mm512_cmpgt_epu32_mask (a, b); > + if (_mm512_mask2int (c) != 0xc000) > + __builtin_abort (); > +} > > Jakub