On Monday 23 December 2013, H.J. Lu wrote: > > If you use > > {"corei7-avx", M_INTEL_COREI7_SANYBRIDGE}, > {"core-avx2", M_INTEL_COREI7_HASWELL}, > > will it cause any problems? When there are both > Actually I seems I don't need these definitions any more after your clean-up of Intel architecture names. I have attached patch with them removed (and named haswell enums back to corei7_haswell).
If both target("arch=corei7-avx") and target("arch=sandybridge") is present the dispatcher appears to choose "sandybridge". If you want a warning for duplicates in this case, I suggest adding it in a later patch. `Allan
Index: gcc/config/i386/i386.c =================================================================== --- gcc/config/i386/i386.c (revision 206179) +++ gcc/config/i386/i386.c (working copy) @@ -29970,16 +29970,21 @@ P_SSE3, P_SSSE3, P_PROC_SSSE3, - P_SSE4_a, - P_PROC_SSE4_a, + P_SSE4_A, + P_PROC_SSE4_A, P_SSE4_1, P_SSE4_2, P_PROC_SSE4_2, P_POPCNT, P_AVX, + P_PROC_AVX, + P_FMA4, + P_XOP, + P_PROC_XOP, + P_FMA, + P_PROC_FMA, P_AVX2, - P_FMA, - P_PROC_FMA + P_PROC_AVX2 }; enum feature_priority priority = P_ZERO; @@ -29998,11 +30003,15 @@ {"sse", P_SSE}, {"sse2", P_SSE2}, {"sse3", P_SSE3}, + {"sse4a", P_SSE4_A}, {"ssse3", P_SSSE3}, {"sse4.1", P_SSE4_1}, {"sse4.2", P_SSE4_2}, {"popcnt", P_POPCNT}, {"avx", P_AVX}, + {"fma4", P_FMA4}, + {"xop", P_XOP}, + {"fma", P_FMA}, {"avx2", P_AVX2} }; @@ -30054,26 +30063,50 @@ arg_str = "nehalem"; priority = P_PROC_SSE4_2; break; - case PROCESSOR_SANDYBRIDGE: - arg_str = "sandybridge"; - priority = P_PROC_SSE4_2; - break; + case PROCESSOR_SANDYBRIDGE: + arg_str = "sandybridge"; + priority = P_PROC_AVX; + break; + case PROCESSOR_HASWELL: + arg_str = "haswell"; + priority = P_PROC_AVX2; + break; case PROCESSOR_BONNELL: arg_str = "bonnell"; priority = P_PROC_SSSE3; break; + case PROCESSOR_SILVERMONT: + arg_str = "silvermont"; + priority = P_PROC_SSE4_2; + break; case PROCESSOR_AMDFAM10: arg_str = "amdfam10h"; - priority = P_PROC_SSE4_a; + priority = P_PROC_SSE4_A; break; + case PROCESSOR_BTVER1: + arg_str = "bobcat"; + priority = P_PROC_SSE4_A; + break; + case PROCESSOR_BTVER2: + arg_str = "jaguar"; + priority = P_PROC_AVX; + break; case PROCESSOR_BDVER1: arg_str = "bdver1"; - priority = P_PROC_FMA; + priority = P_PROC_XOP; break; case PROCESSOR_BDVER2: arg_str = "bdver2"; priority = P_PROC_FMA; break; + case PROCESSOR_BDVER3: + arg_str = "bdver3"; + priority = P_PROC_FMA; + break; + case PROCESSOR_BDVER4: + arg_str = "bdver4"; + priority = P_PROC_AVX2; + break; } } @@ -30938,6 +30971,10 @@ F_SSE4_2, F_AVX, F_AVX2, + F_SSE4_A, + F_FMA4, + F_XOP, + F_FMA, F_MAX }; @@ -30955,6 +30992,8 @@ M_AMDFAM10H, M_AMDFAM15H, M_INTEL_SILVERMONT, + M_AMD_BOBCAT, + M_AMD_JAGUAR, M_CPU_SUBTYPE_START, M_INTEL_COREI7_NEHALEM, M_INTEL_COREI7_WESTMERE, @@ -30965,7 +31004,9 @@ M_AMDFAM15H_BDVER1, M_AMDFAM15H_BDVER2, M_AMDFAM15H_BDVER3, - M_AMDFAM15H_BDVER4 + M_AMDFAM15H_BDVER4, + M_INTEL_COREI7_IVYBRIDGE, + M_INTEL_COREI7_HASWELL }; static struct _arch_names_table @@ -30984,15 +31025,21 @@ {"nehalem", M_INTEL_COREI7_NEHALEM}, {"westmere", M_INTEL_COREI7_WESTMERE}, {"sandybridge", M_INTEL_COREI7_SANDYBRIDGE}, + {"ivybridge", M_INTEL_COREI7_IVYBRIDGE}, + {"haswell", M_INTEL_COREI7_HASWELL}, + {"bonnell", M_INTEL_BONNELL}, + {"silvermont", M_INTEL_SILVERMONT}, {"amdfam10h", M_AMDFAM10H}, {"barcelona", M_AMDFAM10H_BARCELONA}, {"shanghai", M_AMDFAM10H_SHANGHAI}, {"istanbul", M_AMDFAM10H_ISTANBUL}, + {"bobcat", M_AMD_BOBCAT}, {"amdfam15h", M_AMDFAM15H}, {"bdver1", M_AMDFAM15H_BDVER1}, {"bdver2", M_AMDFAM15H_BDVER2}, {"bdver3", M_AMDFAM15H_BDVER3}, {"bdver4", M_AMDFAM15H_BDVER4}, + {"jaguar", M_AMD_JAGUAR}, }; static struct _isa_names_table @@ -31009,9 +31056,13 @@ {"sse2", F_SSE2}, {"sse3", F_SSE3}, {"ssse3", F_SSSE3}, + {"sse4a", F_SSE4_A}, {"sse4.1", F_SSE4_1}, {"sse4.2", F_SSE4_2}, {"avx", F_AVX}, + {"fma4", F_FMA4}, + {"xop", F_XOP}, + {"fma", F_FMA}, {"avx2", F_AVX2} }; Index: gcc/testsuite/gcc.target/i386/funcspec-5.c =================================================================== --- gcc/testsuite/gcc.target/i386/funcspec-5.c (revision 206179) +++ gcc/testsuite/gcc.target/i386/funcspec-5.c (working copy) @@ -17,7 +17,9 @@ extern void test_sse4_1 (void) __attribute__((__target__("sse4.1"))); extern void test_sse4_2 (void) __attribute__((__target__("sse4.2"))); extern void test_sse4a (void) __attribute__((__target__("sse4a"))); +extern void test_fma (void) __attribute__((__target__("fma"))); extern void test_fma4 (void) __attribute__((__target__("fma4"))); +extern void test_xop (void) __attribute__((__target__("xop"))); extern void test_ssse3 (void) __attribute__((__target__("ssse3"))); extern void test_tbm (void) __attribute__((__target__("tbm"))); extern void test_avx (void) __attribute__((__target__("avx"))); @@ -37,7 +39,9 @@ extern void test_no_sse4_1 (void) __attribute__((__target__("no-sse4.1"))); extern void test_no_sse4_2 (void) __attribute__((__target__("no-sse4.2"))); extern void test_no_sse4a (void) __attribute__((__target__("no-sse4a"))); +extern void test_no_fma (void) __attribute__((__target__("no-fma"))); extern void test_no_fma4 (void) __attribute__((__target__("no-fma4"))); +extern void test_no_xop (void) __attribute__((__target__("no-xop"))); extern void test_no_ssse3 (void) __attribute__((__target__("no-ssse3"))); extern void test_no_tbm (void) __attribute__((__target__("no-tbm"))); extern void test_no_avx (void) __attribute__((__target__("no-avx"))); @@ -63,6 +67,9 @@ extern void test_arch_prescott (void) __attribute__((__target__("arch=prescott"))); extern void test_arch_nocona (void) __attribute__((__target__("arch=nocona"))); extern void test_arch_core2 (void) __attribute__((__target__("arch=core2"))); +extern void test_arch_corei7 (void) __attribute__((__target__("arch=corei7"))); +extern void test_arch_corei7_avx (void) __attribute__((__target__("arch=corei7-avx"))); +extern void test_arch_core_avx2 (void) __attribute__((__target__("arch=core-avx2"))); extern void test_arch_geode (void) __attribute__((__target__("arch=geode"))); extern void test_arch_k6 (void) __attribute__((__target__("arch=k6"))); extern void test_arch_k6_2 (void) __attribute__((__target__("arch=k6-2"))); @@ -81,6 +88,9 @@ extern void test_arch_athlon_fx (void) __attribute__((__target__("arch=athlon-fx"))); extern void test_arch_amdfam10 (void) __attribute__((__target__("arch=amdfam10"))); extern void test_arch_barcelona (void) __attribute__((__target__("arch=barcelona"))); +extern void test_arch_bdver1 (void) __attribute__((__target__("arch=bdver1"))); +extern void test_arch_bdver2 (void) __attribute__((__target__("arch=bdver2"))); +extern void test_arch_bdver3 (void) __attribute__((__target__("arch=bdver3"))); extern void test_arch_foo (void) __attribute__((__target__("arch=foo"))); /* { dg-error "bad value" } */ extern void test_tune_i386 (void) __attribute__((__target__("tune=i386"))); @@ -103,6 +113,9 @@ extern void test_tune_prescott (void) __attribute__((__target__("tune=prescott"))); extern void test_tune_nocona (void) __attribute__((__target__("tune=nocona"))); extern void test_tune_core2 (void) __attribute__((__target__("tune=core2"))); +extern void test_tune_corei7 (void) __attribute__((__target__("tune=corei7"))); +extern void test_tune_corei7_avx (void) __attribute__((__target__("tune=corei7-avx"))); +extern void test_tune_core_avx2 (void) __attribute__((__target__("tune=core-avx2"))); extern void test_tune_geode (void) __attribute__((__target__("tune=geode"))); extern void test_tune_k6 (void) __attribute__((__target__("tune=k6"))); extern void test_tune_k6_2 (void) __attribute__((__target__("tune=k6-2"))); @@ -121,6 +134,9 @@ extern void test_tune_athlon_fx (void) __attribute__((__target__("tune=athlon-fx"))); extern void test_tune_amdfam10 (void) __attribute__((__target__("tune=amdfam10"))); extern void test_tune_barcelona (void) __attribute__((__target__("tune=barcelona"))); +extern void test_tune_bdver1 (void) __attribute__((__target__("tune=bdver1"))); +extern void test_tune_bdver2 (void) __attribute__((__target__("tune=bdver2"))); +extern void test_tune_bdver3 (void) __attribute__((__target__("tune=bdver3"))); extern void test_tune_generic (void) __attribute__((__target__("tune=generic"))); extern void test_tune_foo (void) __attribute__((__target__("tune=foo"))); /* { dg-error "bad value" } */ Index: libgcc/config/i386/cpuinfo.c =================================================================== --- libgcc/config/i386/cpuinfo.c (revision 206179) +++ libgcc/config/i386/cpuinfo.c (working copy) @@ -62,6 +62,8 @@ AMDFAM10H, AMDFAM15H, INTEL_SILVERMONT, + AMD_BOBCAT, + AMD_JAGUAR, CPU_TYPE_MAX }; @@ -75,6 +77,10 @@ AMDFAM10H_ISTANBUL, AMDFAM15H_BDVER1, AMDFAM15H_BDVER2, + AMDFAM15H_BDVER3, + AMDFAM15H_BDVER4, + INTEL_COREI7_IVYBRIDGE, + INTEL_COREI7_HASWELL, CPU_SUBTYPE_MAX }; @@ -92,7 +98,11 @@ FEATURE_SSE4_1, FEATURE_SSE4_2, FEATURE_AVX, - FEATURE_AVX2 + FEATURE_AVX2, + FEATURE_SSE4_A, + FEATURE_FMA4, + FEATURE_XOP, + FEATURE_FMA }; struct __processor_model @@ -113,37 +123,46 @@ { /* AMD Family 10h. */ case 0x10: + __cpu_model.__cpu_type = AMDFAM10H; switch (model) { case 0x2: /* Barcelona. */ - __cpu_model.__cpu_type = AMDFAM10H; __cpu_model.__cpu_subtype = AMDFAM10H_BARCELONA; break; case 0x4: /* Shanghai. */ - __cpu_model.__cpu_type = AMDFAM10H; __cpu_model.__cpu_subtype = AMDFAM10H_SHANGHAI; break; case 0x8: /* Istanbul. */ - __cpu_model.__cpu_type = AMDFAM10H; __cpu_model.__cpu_subtype = AMDFAM10H_ISTANBUL; break; default: break; } break; - /* AMD Family 15h. */ + /* AMD Family 14h "Bobcat". */ + case 0x14: + __cpu_model.__cpu_type = AMD_BOBCAT; + break; + /* AMD Family 15h "Bulldozer". */ case 0x15: __cpu_model.__cpu_type = AMDFAM15H; /* Bulldozer version 1. */ if ( model <= 0xf) __cpu_model.__cpu_subtype = AMDFAM15H_BDVER1; - /* Bulldozer version 2. */ - if (model >= 0x10 && model <= 0x1f) - __cpu_model.__cpu_subtype = AMDFAM15H_BDVER2; + /* Bulldozer version 2 "Piledriver" */ + if (model >= 0x10 && model <= 0x2f) + __cpu_model.__cpu_subtype = AMDFAM15H_BDVER2; + /* Bulldozer version 3 "Steamroller" */ + if (model >= 0x30 && model <= 0x4f) + __cpu_model.__cpu_subtype = AMDFAM15H_BDVER3; break; + /* AMD Family 16h "Jaguar". */ + case 0x16: + __cpu_model.__cpu_type = AMD_JAGUAR; + break; default: break; } @@ -196,6 +215,20 @@ __cpu_model.__cpu_type = INTEL_COREI7; __cpu_model.__cpu_subtype = INTEL_COREI7_SANDYBRIDGE; break; + case 0x3a: + case 0x3e: + /* Ivy Bridge. */ + __cpu_model.__cpu_type = INTEL_COREI7; + __cpu_model.__cpu_subtype = INTEL_COREI7_IVYBRIDGE; + break; + case 0x3c: + case 0x3f: + case 0x45: + case 0x46: + /* Haswell. */ + __cpu_model.__cpu_type = INTEL_COREI7; + __cpu_model.__cpu_subtype = INTEL_COREI7_HASWELL; + break; case 0x17: case 0x1d: /* Penryn. */ @@ -242,6 +275,8 @@ features |= (1 << FEATURE_SSE4_2); if (ecx & bit_AVX) features |= (1 << FEATURE_AVX); + if (ecx & bit_FMA) + features |= (1 << FEATURE_FMA); /* Get Advanced Features at level 7 (eax = 7, ecx = 0). */ if (max_cpuid_level >= 7) @@ -252,6 +287,23 @@ features |= (1 << FEATURE_AVX2); } + unsigned int ext_level; + unsigned int eax, ebx; + /* Check cpuid level of extended features. */ + __cpuid (0x80000000, ext_level, ebx, ecx, edx); + + if (ext_level > 0x80000000) + { + __cpuid (0x80000001, eax, ebx, ecx, edx); + + if (ecx & bit_SSE4a) + features |= (1 << FEATURE_SSE4_A); + if (ecx & bit_FMA4) + features |= (1 << FEATURE_FMA4); + if (ecx & bit_XOP) + features |= (1 << FEATURE_XOP); + } + __cpu_model.__cpu_features[0] = features; }