Hi, *mmintrin headers does not work with function specific opts.
Example 1: #include <smmintrin.h> __attribute__((target("sse4.1"))) __m128i foo(__m128i *V) { return _mm_stream_load_si128(V); } $ g++ test.cc smmintrin.h:31:3: error: #error "SSE4.1 instruction set not enabled" # error "SSE4.1 instruction set not enabled" This error happens even though foo is marked "sse4.1" There are multiple issues at play here. One, the headers are guarded by macros that are switched on only when the target specific options, like -msse4.1 in this case, are present in the command line. Also, the target specific builtins, like __builtin_ia32_movntdqa called by _mm_stream_load_si128, are exposed only in the presence of the appropriate target ISA option. I have attached a patch that fixes this. I have added an option "-mgenerate-builtins" that will do two things. It will define a macro "__ALL_ISA__" which will expose the *intrin.h functions. It will also expose all the target specific builtins. -mgenerate-builtins will not affect code generation. This feature will greatly benefit the function multiversioning usability too. Comments? Thanks Sri
Index: emmintrin.h =================================================================== --- emmintrin.h (revision 197691) +++ emmintrin.h (working copy) @@ -27,7 +27,7 @@ #ifndef _EMMINTRIN_H_INCLUDED #define _EMMINTRIN_H_INCLUDED -#ifndef __SSE2__ +#if !defined (__SSE2__) && !defined (__ALL_ISA__) # error "SSE2 instruction set not enabled" #else Index: fma4intrin.h =================================================================== --- fma4intrin.h (revision 197691) +++ fma4intrin.h (working copy) @@ -28,7 +28,7 @@ #ifndef _FMA4INTRIN_H_INCLUDED #define _FMA4INTRIN_H_INCLUDED -#ifndef __FMA4__ +#if !defined (__FMA4__) && !defined (__ALL_ISA__) # error "FMA4 instruction set not enabled" #else Index: lwpintrin.h =================================================================== --- lwpintrin.h (revision 197691) +++ lwpintrin.h (working copy) @@ -28,7 +28,7 @@ #ifndef _LWPINTRIN_H_INCLUDED #define _LWPINTRIN_H_INCLUDED -#ifndef __LWP__ +#if !defined (__LWP__) && !defined (__ALL_ISA__) # error "LWP instruction set not enabled" #else Index: xopintrin.h =================================================================== --- xopintrin.h (revision 197691) +++ xopintrin.h (working copy) @@ -28,7 +28,7 @@ #ifndef _XOPMMINTRIN_H_INCLUDED #define _XOPMMINTRIN_H_INCLUDED -#ifndef __XOP__ +#if !defined (__XOP__) && !defined (__ALL_ISA__) # error "XOP instruction set not enabled" #else Index: fmaintrin.h =================================================================== --- fmaintrin.h (revision 197691) +++ fmaintrin.h (working copy) @@ -28,7 +28,7 @@ #ifndef _FMAINTRIN_H_INCLUDED #define _FMAINTRIN_H_INCLUDED -#ifndef __FMA__ +#if !defined (__FMA__) && !defined (__ALL_ISA__) # error "FMA instruction set not enabled" #else Index: bmiintrin.h =================================================================== --- bmiintrin.h (revision 197691) +++ bmiintrin.h (working copy) @@ -25,7 +25,7 @@ # error "Never use <bmiintrin.h> directly; include <x86intrin.h> instead." #endif -#ifndef __BMI__ +#if !defined (__BMI__) && !defined (__ALL_ISA__) # error "BMI instruction set not enabled" #endif /* __BMI__ */ Index: mmintrin.h =================================================================== --- mmintrin.h (revision 197691) +++ mmintrin.h (working copy) @@ -27,7 +27,7 @@ #ifndef _MMINTRIN_H_INCLUDED #define _MMINTRIN_H_INCLUDED -#ifndef __MMX__ +#if !defined (__MMX__) && !defined (__ALL_ISA__) # error "MMX instruction set not enabled" #else /* The Intel API is flexible enough that we must allow aliasing with other Index: nmmintrin.h =================================================================== --- nmmintrin.h (revision 197691) +++ nmmintrin.h (working copy) @@ -27,7 +27,7 @@ #ifndef _NMMINTRIN_H_INCLUDED #define _NMMINTRIN_H_INCLUDED -#ifndef __SSE4_2__ +#if !defined (__SSE4_2__) && !defined (__ALL_ISA__) # error "SSE4.2 instruction set not enabled" #else /* We just include SSE4.1 header file. */ Index: tbmintrin.h =================================================================== --- tbmintrin.h (revision 197691) +++ tbmintrin.h (working copy) @@ -25,7 +25,7 @@ # error "Never use <tbmintrin.h> directly; include <x86intrin.h> instead." #endif -#ifndef __TBM__ +#if !defined (__TBM__) && !defined (__ALL_ISA__) # error "TBM instruction set not enabled" #endif /* __TBM__ */ Index: f16cintrin.h =================================================================== --- f16cintrin.h (revision 197691) +++ f16cintrin.h (working copy) @@ -25,7 +25,7 @@ # error "Never use <f16intrin.h> directly; include <x86intrin.h> or <immintrin.h> instead." #endif -#ifndef __F16C__ +#if !defined (__F16C__) && !defined (__ALL_ISA__) # error "F16C instruction set not enabled" #else Index: i386.opt =================================================================== --- i386.opt (revision 197691) +++ i386.opt (working copy) @@ -626,3 +626,7 @@ Split 32-byte AVX unaligned store mrtm Target Report Mask(ISA_RTM) Var(ix86_isa_flags) Save Support RTM built-in functions and code generation + +mgenerate-builtins +Target Report Var(generate_target_builtins) Save +Generate all target builtins that are otherwise only generated when the approrpriate ISA is turned on. Index: i386-c.c =================================================================== --- i386-c.c (revision 197691) +++ i386-c.c (working copy) @@ -54,6 +54,9 @@ ix86_target_macros_internal (HOST_WIDE_INT isa_fla int last_arch_char = ix86_arch_string[arch_len - 1]; int last_tune_char = ix86_tune_string[tune_len - 1]; + if (generate_target_builtins) + def_or_undef (parse_in, "__ALL_ISA__"); + /* Built-ins based on -march=. */ switch (arch) { Index: bmi2intrin.h =================================================================== --- bmi2intrin.h (revision 197691) +++ bmi2intrin.h (working copy) @@ -25,7 +25,7 @@ # error "Never use <bmi2intrin.h> directly; include <x86intrin.h> instead." #endif -#ifndef __BMI2__ +#if !defined (__BMI2__) && !defined (__ALL_ISA__) # error "BMI2 instruction set not enabled" #endif /* __BMI2__ */ Index: lzcntintrin.h =================================================================== --- lzcntintrin.h (revision 197691) +++ lzcntintrin.h (working copy) @@ -25,7 +25,7 @@ # error "Never use <lzcntintrin.h> directly; include <x86intrin.h> instead." #endif -#ifndef __LZCNT__ +#if !defined (__LZCNT__) && !defined (__ALL_ISA__) # error "LZCNT instruction is not enabled" #endif /* __LZCNT__ */ Index: smmintrin.h =================================================================== --- smmintrin.h (revision 197691) +++ smmintrin.h (working copy) @@ -27,7 +27,7 @@ #ifndef _SMMINTRIN_H_INCLUDED #define _SMMINTRIN_H_INCLUDED -#ifndef __SSE4_1__ +#if !defined (__SSE4_1__) && !defined (__ALL_ISA__) # error "SSE4.1 instruction set not enabled" #else Index: i386.c =================================================================== --- i386.c (revision 197691) +++ i386.c (working copy) @@ -26813,7 +26813,8 @@ def_builtin (HOST_WIDE_INT mask, const char *name, ix86_builtins_isa[(int) code].isa = mask; mask &= ~OPTION_MASK_ISA_64BIT; - if (mask == 0 + if (generate_target_builtins + || mask == 0 || (mask & ix86_isa_flags) != 0 || (lang_hooks.builtin_function == lang_hooks.builtin_function_ext_scope)) Index: wmmintrin.h =================================================================== --- wmmintrin.h (revision 197691) +++ wmmintrin.h (working copy) @@ -30,7 +30,7 @@ /* We need definitions from the SSE2 header file. */ #include <emmintrin.h> -#if !defined (__AES__) && !defined (__PCLMUL__) +#if !defined (__AES__) && !defined (__PCLMUL__) && !defined (__ALL_ISA__) # error "AES/PCLMUL instructions not enabled" #else Index: pmmintrin.h =================================================================== --- pmmintrin.h (revision 197691) +++ pmmintrin.h (working copy) @@ -27,7 +27,7 @@ #ifndef _PMMINTRIN_H_INCLUDED #define _PMMINTRIN_H_INCLUDED -#ifndef __SSE3__ +#if !defined (__SSE3__) && !defined (__ALL_ISA__) # error "SSE3 instruction set not enabled" #else Index: tmmintrin.h =================================================================== --- tmmintrin.h (revision 197691) +++ tmmintrin.h (working copy) @@ -27,7 +27,7 @@ #ifndef _TMMINTRIN_H_INCLUDED #define _TMMINTRIN_H_INCLUDED -#ifndef __SSSE3__ +#if !defined (__SSSE3__) && !defined (__ALL_ISA__) # error "SSSE3 instruction set not enabled" #else Index: xmmintrin.h =================================================================== --- xmmintrin.h (revision 197691) +++ xmmintrin.h (working copy) @@ -27,7 +27,7 @@ #ifndef _XMMINTRIN_H_INCLUDED #define _XMMINTRIN_H_INCLUDED -#ifndef __SSE__ +#if !defined (__SSE__) && !defined (__ALL_ISA__) # error "SSE instruction set not enabled" #else Index: popcntintrin.h =================================================================== --- popcntintrin.h (revision 197691) +++ popcntintrin.h (working copy) @@ -21,7 +21,7 @@ see the files COPYING3 and COPYING.RUNTIME respectively. If not, see <http://www.gnu.org/licenses/>. */ -#ifndef __POPCNT__ +#if !defined (__POPCNT__) && !defined (__ALL_ISA__) # error "POPCNT instruction set not enabled" #endif /* __POPCNT__ */ Index: ammintrin.h =================================================================== --- ammintrin.h (revision 197691) +++ ammintrin.h (working copy) @@ -27,7 +27,7 @@ #ifndef _AMMINTRIN_H_INCLUDED #define _AMMINTRIN_H_INCLUDED -#ifndef __SSE4A__ +#if !defined (__SSE4A__) && !defined (__ALL_ISA__) # error "SSE4A instruction set not enabled" #else