Update patch. Solved __attribute((target("arch=corei7-avx"))) by defining proper architectures for the recent Intel families instead of renaming submodels.
I am thinking the patch is starting to touch a bit many different details, perhaps it should be split up, or is it good as is? Regards `Allan
Index: gcc/ChangeLog =================================================================== --- gcc/ChangeLog (revision 206065) +++ gcc/ChangeLog (working copy) @@ -1,3 +1,9 @@ +2013-12-14 Allan Sandfeld Jensen <sandf...@kde.org> + + PR gcc/59422 + * config/i386/i386.c: Extend function multiversioning + to better support recent Intel and AMD models. + 2013-12-17 Jan Hubicka <hubi...@ucw.cz> * ipa-devirt.c (get_polymorphic_call_info): Fix offset calculatoin Index: gcc/config/i386/i386.c =================================================================== --- gcc/config/i386/i386.c (revision 206065) +++ gcc/config/i386/i386.c (working copy) @@ -29965,9 +29965,14 @@ P_PROC_SSE4_2, P_POPCNT, P_AVX, + P_PROC_AVX, + P_FMA4, + P_XOP, + P_PROC_XOP, + P_FMA, + P_PROC_FMA, P_AVX2, - P_FMA, - P_PROC_FMA + P_PROC_AVX2 }; enum feature_priority priority = P_ZERO; @@ -29986,11 +29991,15 @@ {"sse", P_SSE}, {"sse2", P_SSE2}, {"sse3", P_SSE3}, + {"sse4a", P_SSE4_a}, {"ssse3", P_SSSE3}, {"sse4.1", P_SSE4_1}, {"sse4.2", P_SSE4_2}, {"popcnt", P_POPCNT}, {"avx", P_AVX}, + {"fma4", P_FMA4}, + {"xop", P_XOP}, + {"fma", P_FMA}, {"avx2", P_AVX2} }; @@ -30044,25 +30053,49 @@ break; case PROCESSOR_COREI7_AVX: arg_str = "corei7-avx"; - priority = P_PROC_SSE4_2; + priority = P_PROC_AVX; break; + case PROCESSOR_HASWELL: + arg_str = "core-avx2"; + priority = P_PROC_AVX2; + break; case PROCESSOR_ATOM: arg_str = "atom"; priority = P_PROC_SSSE3; break; + case PROCESSOR_SLM: + arg_str = "slm"; + priority = P_PROC_SSE4_2; + break; case PROCESSOR_AMDFAM10: arg_str = "amdfam10h"; priority = P_PROC_SSE4_a; break; + case PROCESSOR_BTVER1: + arg_str = "bobcat"; + priority = P_PROC_SSE4_a; + break; + case PROCESSOR_BTVER2: + arg_str = "jaguar"; + priority = P_PROC_AVX; + break; case PROCESSOR_BDVER1: arg_str = "bdver1"; - priority = P_PROC_FMA; + priority = P_PROC_XOP; break; case PROCESSOR_BDVER2: arg_str = "bdver2"; priority = P_PROC_FMA; break; - } + case PROCESSOR_BDVER3: + arg_str = "bdver3"; + priority = P_PROC_FMA; + break; + case PROCESSOR_BDVER4: + arg_str = "bdver4"; + priority = P_PROC_AVX2; + break; + } } cl_target_option_restore (&global_options, &cur_target); @@ -30922,9 +30955,13 @@ F_SSE2, F_SSE3, F_SSSE3, + F_SSE4_a, F_SSE4_1, F_SSE4_2, F_AVX, + F_FMA4, + F_XOP, + F_FMA, F_AVX2, F_MAX }; @@ -30943,6 +30980,10 @@ M_AMDFAM10H, M_AMDFAM15H, M_INTEL_SLM, + M_INTEL_COREI7_AVX, + M_INTEL_CORE_AVX2, + M_AMD_BOBCAT, + M_AMD_JAGUAR, M_CPU_SUBTYPE_START, M_INTEL_COREI7_NEHALEM, M_INTEL_COREI7_WESTMERE, @@ -30953,7 +30994,9 @@ M_AMDFAM15H_BDVER1, M_AMDFAM15H_BDVER2, M_AMDFAM15H_BDVER3, - M_AMDFAM15H_BDVER4 + M_AMDFAM15H_BDVER4, + M_INTEL_COREI7_IVYBRIDGE, + M_INTEL_CORE_HASWELL }; static struct _arch_names_table @@ -30971,11 +31014,17 @@ {"corei7", M_INTEL_COREI7}, {"nehalem", M_INTEL_COREI7_NEHALEM}, {"westmere", M_INTEL_COREI7_WESTMERE}, + {"corei7-avx", M_INTEL_COREI7_AVX}, {"sandybridge", M_INTEL_COREI7_SANDYBRIDGE}, + {"ivybridge", M_INTEL_COREI7_IVYBRIDGE}, + {"core-avx2", M_INTEL_CORE_AVX2}, + {"haswell", M_INTEL_CORE_HASWELL}, {"amdfam10h", M_AMDFAM10H}, {"barcelona", M_AMDFAM10H_BARCELONA}, {"shanghai", M_AMDFAM10H_SHANGHAI}, {"istanbul", M_AMDFAM10H_ISTANBUL}, + {"bobcat", M_AMD_BOBCAT}, + {"jaguar", M_AMD_JAGUAR}, {"amdfam15h", M_AMDFAM15H}, {"bdver1", M_AMDFAM15H_BDVER1}, {"bdver2", M_AMDFAM15H_BDVER2}, @@ -30997,9 +31046,13 @@ {"sse2", F_SSE2}, {"sse3", F_SSE3}, {"ssse3", F_SSSE3}, + {"sse4a", F_SSE4_a}, {"sse4.1", F_SSE4_1}, {"sse4.2", F_SSE4_2}, {"avx", F_AVX}, + {"fma4", F_FMA4}, + {"xop", F_XOP}, + {"fma", F_FMA}, {"avx2", F_AVX2} }; Index: gcc/testsuite/gcc.target/i386/funcspec-5.c =================================================================== --- gcc/testsuite/gcc.target/i386/funcspec-5.c (revision 206065) +++ gcc/testsuite/gcc.target/i386/funcspec-5.c (working copy) @@ -17,7 +17,9 @@ extern void test_sse4_1 (void) __attribute__((__target__("sse4.1"))); extern void test_sse4_2 (void) __attribute__((__target__("sse4.2"))); extern void test_sse4a (void) __attribute__((__target__("sse4a"))); +extern void test_fma (void) __attribute__((__target__("fma"))); extern void test_fma4 (void) __attribute__((__target__("fma4"))); +extern void test_xop (void) __attribute__((__target__("xop"))); extern void test_ssse3 (void) __attribute__((__target__("ssse3"))); extern void test_tbm (void) __attribute__((__target__("tbm"))); extern void test_avx (void) __attribute__((__target__("avx"))); @@ -37,7 +39,9 @@ extern void test_no_sse4_1 (void) __attribute__((__target__("no-sse4.1"))); extern void test_no_sse4_2 (void) __attribute__((__target__("no-sse4.2"))); extern void test_no_sse4a (void) __attribute__((__target__("no-sse4a"))); +extern void test_no_fma (void) __attribute__((__target__("no-fma"))); extern void test_no_fma4 (void) __attribute__((__target__("no-fma4"))); +extern void test_no_xop (void) __attribute__((__target__("no-xop"))); extern void test_no_ssse3 (void) __attribute__((__target__("no-ssse3"))); extern void test_no_tbm (void) __attribute__((__target__("no-tbm"))); extern void test_no_avx (void) __attribute__((__target__("no-avx"))); @@ -63,6 +67,9 @@ extern void test_arch_prescott (void) __attribute__((__target__("arch=prescott"))); extern void test_arch_nocona (void) __attribute__((__target__("arch=nocona"))); extern void test_arch_core2 (void) __attribute__((__target__("arch=core2"))); +extern void test_arch_corei7 (void) __attribute__((__target__("arch=corei7"))); +extern void test_arch_corei7_avx (void) __attribute__((__target__("arch=corei7-avx"))); +extern void test_arch_core_avx2 (void) __attribute__((__target__("arch=core-avx2"))); extern void test_arch_geode (void) __attribute__((__target__("arch=geode"))); extern void test_arch_k6 (void) __attribute__((__target__("arch=k6"))); extern void test_arch_k6_2 (void) __attribute__((__target__("arch=k6-2"))); @@ -81,6 +88,9 @@ extern void test_arch_athlon_fx (void) __attribute__((__target__("arch=athlon-fx"))); extern void test_arch_amdfam10 (void) __attribute__((__target__("arch=amdfam10"))); extern void test_arch_barcelona (void) __attribute__((__target__("arch=barcelona"))); +extern void test_arch_bdver1 (void) __attribute__((__target__("arch=bdver1"))); +extern void test_arch_bdver2 (void) __attribute__((__target__("arch=bdver2"))); +extern void test_arch_bdver3 (void) __attribute__((__target__("arch=bdver3"))); extern void test_arch_foo (void) __attribute__((__target__("arch=foo"))); /* { dg-error "bad value" } */ extern void test_tune_i386 (void) __attribute__((__target__("tune=i386"))); @@ -103,6 +113,9 @@ extern void test_tune_prescott (void) __attribute__((__target__("tune=prescott"))); extern void test_tune_nocona (void) __attribute__((__target__("tune=nocona"))); extern void test_tune_core2 (void) __attribute__((__target__("tune=core2"))); +extern void test_tune_corei7 (void) __attribute__((__target__("tune=corei7"))); +extern void test_tune_corei7_avx (void) __attribute__((__target__("tune=corei7-avx"))); +extern void test_tune_core_avx2 (void) __attribute__((__target__("tune=core-avx2"))); extern void test_tune_geode (void) __attribute__((__target__("tune=geode"))); extern void test_tune_k6 (void) __attribute__((__target__("tune=k6"))); extern void test_tune_k6_2 (void) __attribute__((__target__("tune=k6-2"))); @@ -121,6 +134,9 @@ extern void test_tune_athlon_fx (void) __attribute__((__target__("tune=athlon-fx"))); extern void test_tune_amdfam10 (void) __attribute__((__target__("tune=amdfam10"))); extern void test_tune_barcelona (void) __attribute__((__target__("tune=barcelona"))); +extern void test_tune_bdver1 (void) __attribute__((__target__("tune=bdver1"))); +extern void test_tune_bdver2 (void) __attribute__((__target__("tune=bdver2"))); +extern void test_tune_bdver3 (void) __attribute__((__target__("tune=bdver3"))); extern void test_tune_generic (void) __attribute__((__target__("tune=generic"))); extern void test_tune_foo (void) __attribute__((__target__("tune=foo"))); /* { dg-error "bad value" } */ Index: libgcc/ChangeLog =================================================================== --- libgcc/ChangeLog (revision 206065) +++ libgcc/ChangeLog (working copy) @@ -1,3 +1,9 @@ +2013-12-14 Allan Sandfeld Jensen <sandf...@kde.org> + + PR gcc/59422 + * config/i386/cpuinfo.c: Detect sse4a, fma4, xop and fma + ISAs and recent Intel and AMD models. + 2013-12-12 Zhenqiang Chen <zhenqiang.c...@arm.com> * config.host (arm*-*-uclinux*): Move t-arm before t-bpabi. Index: libgcc/config/i386/cpuinfo.c =================================================================== --- libgcc/config/i386/cpuinfo.c (revision 206065) +++ libgcc/config/i386/cpuinfo.c (working copy) @@ -62,6 +62,10 @@ AMDFAM10H, AMDFAM15H, INTEL_SLM, + INTEL_COREI7_AVX, + INTEL_CORE_AVX2, + AMD_BOBCAT, + AMD_JAGUAR, CPU_TYPE_MAX }; @@ -75,6 +79,10 @@ AMDFAM10H_ISTANBUL, AMDFAM15H_BDVER1, AMDFAM15H_BDVER2, + AMDFAM15H_BDVER3, + AMDFAM15H_BDVER4, + INTEL_COREI7_IVYBRIDGE, + INTEL_CORE_HASWELL, CPU_SUBTYPE_MAX }; @@ -89,9 +97,13 @@ FEATURE_SSE2, FEATURE_SSE3, FEATURE_SSSE3, + FEATURE_SSE4_a, FEATURE_SSE4_1, FEATURE_SSE4_2, FEATURE_AVX, + FEATURE_FMA4, + FEATURE_XOP, + FEATURE_FMA, FEATURE_AVX2 }; @@ -113,37 +125,46 @@ { /* AMD Family 10h. */ case 0x10: + __cpu_model.__cpu_type = AMDFAM10H; switch (model) { case 0x2: /* Barcelona. */ - __cpu_model.__cpu_type = AMDFAM10H; __cpu_model.__cpu_subtype = AMDFAM10H_BARCELONA; break; case 0x4: /* Shanghai. */ - __cpu_model.__cpu_type = AMDFAM10H; __cpu_model.__cpu_subtype = AMDFAM10H_SHANGHAI; break; case 0x8: /* Istanbul. */ - __cpu_model.__cpu_type = AMDFAM10H; __cpu_model.__cpu_subtype = AMDFAM10H_ISTANBUL; break; default: break; } break; - /* AMD Family 15h. */ + /* AMD Family 14h "Bobcat". */ + case 0x14: + __cpu_model.__cpu_type = AMD_BOBCAT; + break; + /* AMD Family 15h "Bulldozer". */ case 0x15: __cpu_model.__cpu_type = AMDFAM15H; /* Bulldozer version 1. */ if ( model <= 0xf) __cpu_model.__cpu_subtype = AMDFAM15H_BDVER1; - /* Bulldozer version 2. */ - if (model >= 0x10 && model <= 0x1f) - __cpu_model.__cpu_subtype = AMDFAM15H_BDVER2; + /* Bulldozer version 2 "Piledriver" */ + if (model >= 0x10 && model <= 0x2f) + __cpu_model.__cpu_subtype = AMDFAM15H_BDVER2; + /* Bulldozer version 3 "Steamroller" */ + if (model >= 0x30 && model <= 0x4f) + __cpu_model.__cpu_subtype = AMDFAM15H_BDVER3; break; + /* AMD Family 16h "Jaguar". */ + case 0x16: + __cpu_model.__cpu_type = AMD_JAGUAR; + break; default: break; } @@ -193,9 +214,21 @@ case 0x2a: case 0x2d: /* Sandy Bridge. */ - __cpu_model.__cpu_type = INTEL_COREI7; + __cpu_model.__cpu_type = INTEL_COREI7_AVX; __cpu_model.__cpu_subtype = INTEL_COREI7_SANDYBRIDGE; break; + case 0x3a: + case 0x3e: + /* Ivy Bridge. */ + __cpu_model.__cpu_type = INTEL_COREI7_AVX; + __cpu_model.__cpu_subtype = INTEL_COREI7_IVYBRIDGE; + case 0x3c: + case 0x3f: + case 0x45: + case 0x46: + /* Haswell. */ + __cpu_model.__cpu_type = INTEL_CORE_AVX2; + __cpu_model.__cpu_subtype = INTEL_CORE_HASWELL; case 0x17: case 0x1d: /* Penryn. */ @@ -242,6 +275,8 @@ features |= (1 << FEATURE_SSE4_2); if (ecx & bit_AVX) features |= (1 << FEATURE_AVX); + if (ecx & bit_FMA) + features |= (1 << FEATURE_FMA); /* Get Advanced Features at level 7 (eax = 7, ecx = 0). */ if (max_cpuid_level >= 7) @@ -252,6 +287,23 @@ features |= (1 << FEATURE_AVX2); } + unsigned int ext_level; + unsigned int eax, ebx; + /* Check cpuid level of extended features. */ + __cpuid (0x80000000, ext_level, ebx, ecx, edx); + + if (ext_level > 0x80000000) + { + __cpuid (0x80000001, eax, ebx, ecx, edx); + + if (ecx & bit_SSE4a) + features |= (1 << FEATURE_SSE4_a); + if (ecx & bit_FMA4) + features |= (1 << FEATURE_FMA4); + if (ecx & bit_XOP) + features |= (1 << FEATURE_XOP); + } + __cpu_model.__cpu_features[0] = features; }