Hello! 2011-08-06 Uros Bizjak <ubiz...@gmail.com>
* gcc.target/i386/sse-22.c (dg-options): Add -march=k8. (pragma GCC target): Add avx, fma4, lzcnt and bmi options. * gcc.target/i386/sse-23.c (pragma GCC target): Add avx, fma4, lzcnt and bmi options. Tested on x86_64-pc-linux-gnu, committed to mainline SVN. Uros.
Index: gcc.target/i386/sse-23.c =================================================================== --- gcc.target/i386/sse-23.c (revision 177507) +++ gcc.target/i386/sse-23.c (working copy) @@ -4,12 +4,14 @@ #include <mm_malloc.h> /* Test that the intrinsics compile with optimization. All of them - are defined as inline functions in {,x,e,p,t,s,w,a}mmintrin.h, - xopintrin.h, lwpintrin.h, tbmintrin.h, popcntintrin.h and mm3dnow.h - that reference the proper builtin functions. Defining away "extern" - and "__inline" results in all of them being compiled as proper - functions. */ + are defined as inline functions in {,x,e,p,t,s,w,a,b,i}mmintrin.h, + mm3dnow.h, fma4intrin.h, xopintrin.h, abmintrin.h, bmiintrin.h, + tbmintrin.h, lwpintrin.h, popcntintrin.h and mm_malloc.h that + reference the proper builtin functions. + Defining away "extern" and "__inline" results in all of them being + compiled as proper functions. */ + #define extern #define __inline @@ -145,7 +147,7 @@ #define __builtin_ia32_bextri_u32(X, Y) __builtin_ia32_bextr_u32 (X, 1) #define __builtin_ia32_bextri_u64(X, Y) __builtin_ia32_bextr_u64 (X, 1) -#pragma GCC target ("3dnow,sse4,sse4a,aes,pclmul,xop,abm,popcnt,lwp,tbm,fsgsbase,rdrnd,f16c") +#pragma GCC target ("sse4a,3dnow,avx,fma4,xop,aes,pclmul,popcnt,abm,lzcnt,bmi,tbm,lwp,fsgsbase,rdrnd,f16c") #include <wmmintrin.h> #include <smmintrin.h> #include <mm3dnow.h> Index: gcc.target/i386/sse-22.c =================================================================== --- gcc.target/i386/sse-22.c (revision 177507) +++ gcc.target/i386/sse-22.c (working copy) @@ -1,16 +1,18 @@ /* Same as sse-14, except converted to use #pragma GCC option. */ /* { dg-do compile } */ -/* { dg-options "-O0 -Werror-implicit-function-declaration" } */ +/* { dg-options "-O0 -Werror-implicit-function-declaration -march=k8" } */ #include <mm_malloc.h> -/* Test that the intrinsics compile without optimization. All of them - are defined as inline functions in {,x,e,p,t,s,w,a}mmintrin.h, - xopintrin.h, tbmintrin.h, lwpintrin.h, popcntintrin.h and - mm3dnow.h that reference the proper builtin functions. Defining - away "extern" and "__inline" results in all of them being compiled as - proper functions. */ +/* Test that the intrinsics compile with optimization. All of them + are defined as inline functions in {,x,e,p,t,s,w,a,b,i}mmintrin.h, + mm3dnow.h, fma4intrin.h, xopintrin.h, abmintrin.h, bmiintrin.h, + tbmintrin.h, lwpintrin.h, popcntintrin.h and mm_malloc.h that + reference the proper builtin functions. + Defining away "extern" and "__inline" results in all of them being + compiled as proper functions. */ + #define extern #define __inline @@ -32,6 +34,11 @@ type _CONCAT(_,func) (op1_type A, op2_type B, int const I, int const L) \ { return func (A, B, imm1, imm2); } +#define test_3(func, type, op1_type, op2_type, op3_type, imm) \ + type _CONCAT(_,func) (op1_type A, op2_type B, \ + op3_type C, int const I) \ + { return func (A, B, C, imm); } + #define test_4(func, type, op1_type, op2_type, op3_type, op4_type, imm) \ type _CONCAT(_,func) (op1_type A, op2_type B, \ op3_type C, op4_type D, int const I) \ @@ -39,7 +46,7 @@ #ifndef DIFFERENT_PRAGMAS -#pragma GCC target ("mmx,3dnow,sse,sse2,sse3,ssse3,sse4.1,sse4.2,sse4a,aes,pclmul,xop,popcnt,abm,lwp,fsgsbase,rdrnd,f16c,tbm") +#pragma GCC target ("sse4a,3dnow,avx,fma4,xop,aes,pclmul,popcnt,abm,lzcnt,bmi,tbm,lwp,fsgsbase,rdrnd,f16c") #endif /* Following intrinsics require immediate arguments. They @@ -107,14 +114,18 @@ test_1x (_mm_extracti_si64, __m128i, __m128i, 1, 1) test_2x (_mm_inserti_si64, __m128i, __m128i, __m128i, 1, 1) -/* smmintrin.h (SSE4.1). */ -/* nmmintrin.h (SSE4.2). */ -/* Note, nmmintrin.h includes smmintrin.h, and smmintrin.h checks for the - #ifdef. So just set the option to SSE4.2. */ +/* Note, nmmintrin.h includes smmintrin.h, and smmintrin.h + checks for the #ifdef. So just set the option to SSE4.2. */ #ifdef DIFFERENT_PRAGMAS #pragma GCC target ("sse4.2") #endif #include <nmmintrin.h> +/* smmintrin.h (SSE4.2). */ +test_1 (_mm_round_pd, __m128d, __m128d, 1) +test_1 (_mm_round_ps, __m128, __m128, 1) +test_2 (_mm_round_sd, __m128d, __m128d, __m128d, 1) +test_2 (_mm_round_ss, __m128, __m128, __m128, 1) + test_2 (_mm_blend_epi16, __m128i, __m128i, __m128i, 1) test_2 (_mm_blend_ps, __m128, __m128, __m128, 1) test_2 (_mm_blend_pd, __m128d, __m128d, __m128d, 1) @@ -148,6 +159,53 @@ test_4 (_mm_cmpestrs, int, __m128i, int, __m128i, int, 1) test_4 (_mm_cmpestrz, int, __m128i, int, __m128i, int, 1) +/* immintrin.h (AVX/RDRND/FSGSBASE/F16C) */ +#ifdef DIFFERENT_PRAGMAS +#pragma GCC target ("avx,rdrnd,fsgsbase,f16c") +#endif +#include <immintrin.h> +test_2 (_mm256_blend_pd, __m256d, __m256d, __m256d, 1) +test_2 (_mm256_blend_ps, __m256, __m256, __m256, 1) +test_2 (_mm256_dp_ps, __m256, __m256, __m256, 1) +test_2 (_mm256_shuffle_pd, __m256d, __m256d, __m256d, 1) +test_2 (_mm256_shuffle_ps, __m256, __m256, __m256, 1) +test_2 (_mm_cmp_sd, __m128d, __m128d, __m128d, 1) +test_2 (_mm_cmp_ss, __m128, __m128, __m128, 1) +test_2 (_mm_cmp_pd, __m128d, __m128d, __m128d, 1) +test_2 (_mm_cmp_ps, __m128, __m128, __m128, 1) +test_2 (_mm256_cmp_pd, __m256d, __m256d, __m256d, 1) +test_2 (_mm256_cmp_ps, __m256, __m256, __m256, 1) +test_1 (_mm256_extractf128_pd, __m128d, __m256d, 1) +test_1 (_mm256_extractf128_ps, __m128, __m256, 1) +test_1 (_mm256_extractf128_si256, __m128i, __m256i, 1) +test_1 (_mm256_extract_epi8, int, __m256i, 20) +test_1 (_mm256_extract_epi16, int, __m256i, 13) +test_1 (_mm256_extract_epi32, int, __m256i, 6) +#ifdef __x86_64__ +test_1 (_mm256_extract_epi64, long long, __m256i, 2) +#endif +test_1 (_mm_permute_pd, __m128d, __m128d, 1) +test_1 (_mm256_permute_pd, __m256d, __m256d, 1) +test_1 (_mm_permute_ps, __m128, __m128, 1) +test_1 (_mm256_permute_ps, __m256, __m256, 1) +test_2 (_mm256_permute2f128_pd, __m256d, __m256d, __m256d, 1) +test_2 (_mm256_permute2f128_ps, __m256, __m256, __m256, 1) +test_2 (_mm256_permute2f128_si256, __m256i, __m256i, __m256i, 1) +test_2 (_mm256_insertf128_pd, __m256d, __m256d, __m128d, 1) +test_2 (_mm256_insertf128_ps, __m256, __m256, __m128, 1) +test_2 (_mm256_insertf128_si256, __m256i, __m256i, __m128i, 1) +test_2 (_mm256_insert_epi8, __m256i, __m256i, int, 30) +test_2 (_mm256_insert_epi16, __m256i, __m256i, int, 7) +test_2 (_mm256_insert_epi32, __m256i, __m256i, int, 3) +#ifdef __x86_64__ +test_2 (_mm256_insert_epi64, __m256i, __m256i, long long, 1) +#endif +test_1 (_mm256_round_pd, __m256d, __m256d, 1) +test_1 (_mm256_round_ps, __m256, __m256, 1) +test_1 (_cvtss_sh, unsigned short, float, 1) +test_1 (_mm_cvtps_ph, __m128i, __m128, 1) +test_1 (_mm256_cvtps_ph, __m128i, __m256, 1) + /* wmmintrin.h (AES/PCLMUL). */ #ifdef DIFFERENT_PRAGMAS #pragma GCC target ("aes,pclmul") @@ -156,23 +214,28 @@ test_1 (_mm_aeskeygenassist_si128, __m128i, __m128i, 1) test_2 (_mm_clmulepi64_si128, __m128i, __m128i, __m128i, 1) -/* smmintrin.h (SSE4.1). */ -test_1 (_mm_round_pd, __m128d, __m128d, 1) -test_1 (_mm_round_ps, __m128, __m128, 1) -test_2 (_mm_round_sd, __m128d, __m128d, __m128d, 1) -test_2 (_mm_round_ss, __m128, __m128, __m128, 1) +/* popcnintrin.h (POPCNT). */ +#ifdef DIFFERENT_PRAGMAS +#pragma GCC target ("popcnt") +#endif +#include <popcntintrin.h> -/* xopintrin.h (XOP). */ +/* x86intrin.h (FMA4/XOP/LWP/BMI/TBM/LZCNT). */ #ifdef DIFFERENT_PRAGMAS -#pragma GCC target ("xop,lwp") +#pragma GCC target ("fma4,xop,lwp,bmi,tbm,lzcnt") #endif #include <x86intrin.h> +/* xopintrin.h */ test_1 ( _mm_roti_epi8, __m128i, __m128i, 1) test_1 ( _mm_roti_epi16, __m128i, __m128i, 1) test_1 ( _mm_roti_epi32, __m128i, __m128i, 1) test_1 ( _mm_roti_epi64, __m128i, __m128i, 1) +test_3 (_mm_permute2_pd, __m128d, __m128d, __m128d, __m128d, 1) +test_3 (_mm256_permute2_pd, __m256d, __m256d, __m256d, __m256d, 1) +test_3 (_mm_permute2_ps, __m128, __m128, __m128, __m128, 1) +test_3 (_mm256_permute2_ps, __m256, __m256, __m256, __m256, 1) -/* lwpintrin.h (LWP). */ +/* lwpintrin.h */ test_2 ( __lwpval32, void, unsigned int, unsigned int, 1) test_2 ( __lwpins32, unsigned char, unsigned int, unsigned int, 1) #ifdef __x86_64__ @@ -180,20 +243,7 @@ test_2 ( __lwpins64, unsigned char, unsigned long long, unsigned int, 1) #endif -/* immintrin.h (F16C). */ -#ifdef DIFFERENT_PRAGMAS -#pragma GCC target ("f16c") -#endif -#include <x86intrin.h> -test_1 (_cvtss_sh, unsigned short, float, 1) -test_1 (_mm_cvtps_ph, __m128i, __m128, 1) -test_1 (_mm256_cvtps_ph, __m128i, __m256, 1) - -/* tbmintrin.h (TBM). */ -#ifdef DIFFERENT_PRAGMAS -#pragma GCC target ("tbm") -#endif -#include <x86intrin.h> +/* tbmintrin.h */ test_1 ( __bextri_u32, unsigned int, unsigned int, 1) #ifdef __x86_64__ test_1 ( __bextri_u64, unsigned long long, unsigned long long, 1)