On Mon, Sep 21, 2015 at 6:57 PM, Kirill Yukhin <kirill.yuk...@gmail.com> wrote: > Hi Uroš, Jakub, > eOn 21 Sep 16:27, Jakub Jelinek wrote: >> On Mon, Sep 21, 2015 at 05:14:45PM +0300, Kirill Yukhin wrote: >> > Hello, >> > This patch introduces switches necessary for new Intel Server CPU >> > (code-named Skylake). >> > >> > Bootstrapped & regtested. >> Is it a good idea to introduce further abbrevs like this? >> I thought we went away from e.g. slm to silvermont, we didn't >> introduce hsw but haswell, etc. >> So, wouldn't it be better to add skylake-xeon instead of skx? > Intel Xeon E3 does not support AVX-512*. > So, I'd suggest skylake-avx512.
Even better, and fits existing practice, e.g. corei7-avx. > gcc/ > * config.gcc: Support "skylake-avx512". > * config/i386/i386-c.c (ix86_target_macros_internal): Handle > PROCESSOR_SKYLAKE_AVX512. > * config/i386/i386.c (m_SKYLAKE_AVX512): Define. > (processor_target_table): Add "skylake-avx512". > (PTA_SKYLAKE_AVX512): Define. > (ix86_option_override_internal): Add "skylake_avx512". > (fold_builtin_cpu): Handle "skylake_avx512", add F_AVX512VL > F_AVX512BW, F_AVX512DQ, F_AVX512ER, F_AVX512PF, F_AVX512CD. > * config/i386/i386.h (TARGET_SKYLAKE_AVX512): Define. > (processor_type): Add PROCESSOR_SKYLAKE_AVX512. > > libgcc/ > * libgcc/config/i386/cpuinfo.c (enum processor_features): Add > FEATURE_AVX512VL, FEATURE_AVX512BW, FEATURE_AVX512DQ, > FEATURE_AVX512CD, FEATURE_AVX512ER, FEATURE_AVX512PF. > (get_available_features): Habdle new features. > > gcc/testsuite/ > * gcc.target/i386/funcspec-5.c: Test avx512vl, avx512bw, > avx512dq, avx512cd, avx512er, avx512pf and skylake-avx512. > * gcc.target/i386/builtin_target.c: Test avx512vl, avx512bw, > avx512dq, avx512cd, avx512er and avx512pf. > > Patch in the bottom. Is it ok? Comments inline. > commit 69d2e45dbc6b944845e3ec91127e62f85ab51490 > Author: Kirill Yukhin <kirill.yuk...@intel.com> > Date: Fri Sep 18 14:03:54 2015 +0300 > > AVX-512. Introduce SKX CPU. > > diff --git a/gcc/config.gcc b/gcc/config.gcc > index 75807f5..efe4806 100644 > --- a/gcc/config.gcc > +++ b/gcc/config.gcc > @@ -595,7 +595,7 @@ x86_64_archs="amdfam10 athlon64 athlon64-sse3 barcelona > bdver1 bdver2 \ > bdver3 bdver4 btver1 btver2 k8 k8-sse3 opteron opteron-sse3 nocona \ > core2 corei7 corei7-avx core-avx-i core-avx2 atom slm nehalem westmere \ > sandybridge ivybridge haswell broadwell bonnell silvermont knl x86-64 \ > -native" > +native skylake-avx512" Please leave x86-64 and native at the last two places. They are generic entries. > # Additional x86 processors supported by --with-cpu=. Each processor > # MUST be separated by exactly one space. > diff --git a/gcc/config/i386/i386-c.c b/gcc/config/i386/i386-c.c > index 35cecd0..5e583ae 100644 > --- a/gcc/config/i386/i386-c.c > +++ b/gcc/config/i386/i386-c.c > @@ -177,6 +177,10 @@ ix86_target_macros_internal (HOST_WIDE_INT isa_flag, > def_or_undef (parse_in, "__knl"); > def_or_undef (parse_in, "__knl__"); > break; > + case PROCESSOR_SKYLAKE_AVX512: > + def_or_undef (parse_in, "__skylake_avx512"); > + def_or_undef (parse_in, "__skylake_avx512__"); > + break; > /* use PROCESSOR_max to not set/unset the arch macro. */ > case PROCESSOR_max: > break; > @@ -286,6 +290,9 @@ ix86_target_macros_internal (HOST_WIDE_INT isa_flag, > case PROCESSOR_KNL: > def_or_undef (parse_in, "__tune_knl__"); > break; > + case PROCESSOR_SKYLAKE_AVX512: > + def_or_undef (parse_in, "__tune_skylake_avx512__"); > + break; > case PROCESSOR_IAMCU: > def_or_undef (parse_in, "__tune_iamcu__"); > break; > diff --git a/gcc/config/i386/i386.c b/gcc/config/i386/i386.c > index 00e7006..41065be 100644 > --- a/gcc/config/i386/i386.c > +++ b/gcc/config/i386/i386.c > @@ -2098,6 +2098,7 @@ const struct processor_costs *ix86_cost = &pentium_cost; > #define m_BONNELL (1<<PROCESSOR_BONNELL) > #define m_SILVERMONT (1<<PROCESSOR_SILVERMONT) > #define m_KNL (1<<PROCESSOR_KNL) > +#define m_SKYLAKE_AVX512 (1<<PROCESSOT_SKYLAKE_AVX512) > #define m_INTEL (1<<PROCESSOR_INTEL) > > #define m_GEODE (1<<PROCESSOR_GEODE) > @@ -2567,6 +2568,7 @@ static const struct ptt > processor_target_table[PROCESSOR_max] = > {"bonnell", &atom_cost, 16, 15, 16, 7, 16}, > {"silvermont", &slm_cost, 16, 15, 16, 7, 16}, > {"knl", &slm_cost, 16, 15, 16, 7, 16}, > + {"skylake-avx512", &core_cost, 16, 10, 16, 10, 16}, > {"intel", &intel_cost, 16, 15, 16, 7, 16}, > {"geode", &geode_cost, 0, 0, 0, 0, 0}, > {"k6", &k6_cost, 32, 7, 32, 7, 32}, > @@ -3286,6 +3288,9 @@ ix86_option_override_internal (bool main_args_p, > (PTA_HASWELL | PTA_ADX | PTA_PRFCHW | PTA_RDSEED) > #define PTA_SKYLAKE \ > (PTA_BROADWELL | PTA_CLFLUSHOPT | PTA_XSAVEC | PTA_XSAVES) > +#define PTA_SKYLAKE_AVX512 \ > + (PTA_SKYLAKE | PTA_AVX512F | PTA_AVX512CD | PTA_AVX512VL \ > + | PTA_AVX512BW | PTA_AVX512DQ) > #define PTA_KNL \ > (PTA_BROADWELL | PTA_AVX512PF | PTA_AVX512ER | PTA_AVX512F | PTA_AVX512CD) > #define PTA_BONNELL \ > @@ -3349,6 +3354,7 @@ ix86_option_override_internal (bool main_args_p, > {"core-avx2", PROCESSOR_HASWELL, CPU_HASWELL, PTA_HASWELL}, > {"broadwell", PROCESSOR_HASWELL, CPU_HASWELL, PTA_BROADWELL}, > {"skylake", PROCESSOR_HASWELL, CPU_HASWELL, PTA_SKYLAKE}, > + {"skylake-avx512", PROCESSOR_HASWELL, CPU_HASWELL, PTA_SKYLAKE_AVX512}, > {"bonnell", PROCESSOR_BONNELL, CPU_ATOM, PTA_BONNELL}, > {"atom", PROCESSOR_BONNELL, CPU_ATOM, PTA_BONNELL}, > {"silvermont", PROCESSOR_SILVERMONT, CPU_SLM, PTA_SILVERMONT}, > @@ -35620,6 +35626,12 @@ fold_builtin_cpu (tree fndecl, tree *args) > F_XOP, > F_FMA, > F_AVX512F, > + F_AVX512VL, > + F_AVX512BW, > + F_AVX512DQ, > + F_AVX512CD, > + F_AVX512ER, > + F_AVX512PF, > F_BMI, > F_BMI2, > F_AES, The above is an ABI change. You should add new entries at the end of the enum in cpuinfo.c and consequently here. Please note the comment above: /* This is the order of bit-fields in __processor_features in cpuinfo.c */ enum processor_features > @@ -35658,7 +35670,8 @@ fold_builtin_cpu (tree fndecl, tree *args) > M_INTEL_COREI7_IVYBRIDGE, > M_INTEL_COREI7_HASWELL, > M_INTEL_COREI7_BROADWELL, > - M_INTEL_COREI7_SKYLAKE > + M_INTEL_COREI7_SKYLAKE, > + M_INTEL_COREI7_SKYLAKE_AVX512 > }; The above is OK. > static struct _arch_names_table > @@ -35681,6 +35694,7 @@ fold_builtin_cpu (tree fndecl, tree *args) > {"haswell", M_INTEL_COREI7_HASWELL}, > {"broadwell", M_INTEL_COREI7_BROADWELL}, > {"skylake", M_INTEL_COREI7_SKYLAKE}, > + {"skylake-avx512", M_INTEL_COREI7_SKYLAKE_AVX512}, > {"bonnell", M_INTEL_BONNELL}, > {"silvermont", M_INTEL_SILVERMONT}, > {"knl", M_INTEL_KNL}, > @@ -35704,26 +35718,32 @@ fold_builtin_cpu (tree fndecl, tree *args) > } > const isa_names_table[] = > { > - {"cmov", F_CMOV}, > - {"mmx", F_MMX}, > - {"popcnt", F_POPCNT}, > - {"sse", F_SSE}, > - {"sse2", F_SSE2}, > - {"sse3", F_SSE3}, > - {"ssse3", F_SSSE3}, > - {"sse4a", F_SSE4_A}, > - {"sse4.1", F_SSE4_1}, > - {"sse4.2", F_SSE4_2}, > - {"avx", F_AVX}, > - {"fma4", F_FMA4}, > - {"xop", F_XOP}, > - {"fma", F_FMA}, > - {"avx2", F_AVX2}, > - {"avx512f",F_AVX512F}, > - {"bmi", F_BMI}, > - {"bmi2", F_BMI2}, > - {"aes", F_AES}, > - {"pclmul", F_PCLMUL} > + {"cmov", F_CMOV}, > + {"mmx", F_MMX}, > + {"popcnt", F_POPCNT}, > + {"sse", F_SSE}, > + {"sse2", F_SSE2}, > + {"sse3", F_SSE3}, > + {"ssse3", F_SSSE3}, > + {"sse4a", F_SSE4_A}, > + {"sse4.1", F_SSE4_1}, > + {"sse4.2", F_SSE4_2}, > + {"avx", F_AVX}, > + {"fma4", F_FMA4}, > + {"xop", F_XOP}, > + {"fma", F_FMA}, > + {"avx2", F_AVX2}, > + {"avx512f", F_AVX512F}, > + {"avx512vl",F_AVX512VL}, > + {"avx512bw",F_AVX512BW}, > + {"avx512dq",F_AVX512DQ}, > + {"avx512cd",F_AVX512CD}, > + {"avx512er",F_AVX512ER}, > + {"avx512pf",F_AVX512PF}, > + {"bmi", F_BMI}, > + {"bmi2", F_BMI2}, > + {"aes", F_AES}, > + {"pclmul", F_PCLMUL} > }; I suggest to maintain the sequence from cpuinfo.c also here. > tree __processor_model_type = build_processor_model_struct (); > diff --git a/gcc/config/i386/i386.h b/gcc/config/i386/i386.h > index 7bd23ec..46fbe42 100644 > --- a/gcc/config/i386/i386.h > +++ b/gcc/config/i386/i386.h > @@ -340,6 +340,7 @@ extern const struct processor_costs ix86_size_cost; > #define TARGET_BONNELL (ix86_tune == PROCESSOR_BONNELL) > #define TARGET_SILVERMONT (ix86_tune == PROCESSOR_SILVERMONT) > #define TARGET_KNL (ix86_tune == PROCESSOR_KNL) > +#define TARGET_SKYLAKE_AVX512 (ix86_tune == PROCESSOR_SKYLAKE_AVX512) > #define TARGET_INTEL (ix86_tune == PROCESSOR_INTEL) > #define TARGET_GENERIC (ix86_tune == PROCESSOR_GENERIC) > #define TARGET_AMDFAM10 (ix86_tune == PROCESSOR_AMDFAM10) > @@ -2290,6 +2291,7 @@ enum processor_type > PROCESSOR_BONNELL, > PROCESSOR_SILVERMONT, > PROCESSOR_KNL, > + PROCESSOR_SKYLAKE_AVX512, > PROCESSOR_INTEL, > PROCESSOR_GEODE, > PROCESSOR_K6, > diff --git a/gcc/doc/invoke.texi b/gcc/doc/invoke.texi > index 547ee2d..06aef1d 100644 > --- a/gcc/doc/invoke.texi > +++ b/gcc/doc/invoke.texi > @@ -22382,6 +22382,12 @@ SSSE3, SSE4.1, SSE4.2, POPCNT, AVX, AVX2, AES, > PCLMUL, FSGSBASE, RDRND, FMA, > BMI, BMI2, F16C, RDSEED, ADCX, PREFETCHW, AVX512F, AVX512PF, AVX512ER and > AVX512CD instruction set support. > > +@item skylake-avx512 > +Intel Skylake Server CPU with 64-bit extensions, MOVBE, MMX, SSE, SSE2, SSE3, > +SSSE3, SSE4.1, SSE4.2, POPCNT, AVX, AVX2, AES, PCLMUL, FSGSBASE, RDRND, FMA, > +BMI, BMI2, F16C, RDSEED, ADCX, PREFETCHW, CLFLUSHOPT, XSAVEC, XSAVES, > AVX512F, > +AVX512VL, AVX512BW, AVX512DQ and AVX512CD instruction set support. > + > @item k6 > AMD K6 CPU with MMX instruction set support. > > diff --git a/gcc/testsuite/gcc.target/i386/builtin_target.c > b/gcc/testsuite/gcc.target/i386/builtin_target.c > index 300d208..aff4559 100644 > --- a/gcc/testsuite/gcc.target/i386/builtin_target.c > +++ b/gcc/testsuite/gcc.target/i386/builtin_target.c > @@ -188,6 +188,18 @@ check_features (unsigned int ecx, unsigned int edx, > assert (__builtin_cpu_supports ("avx2")); > if (ebx & bit_AVX512F) > assert (__builtin_cpu_supports ("avx512f")); > + if (ebx & bit_AVX512VL) > + assert (__builtin_cpu_supports ("avx512vl")); > + if (ebx & bit_AVX512CD) > + assert (__builtin_cpu_supports ("avx512cd")); > + if (ebx & bit_AVX512PF) > + assert (__builtin_cpu_supports ("avx512pf")); > + if (ebx & bit_AVX512ER) > + assert (__builtin_cpu_supports ("avx512er")); > + if (ebx & bit_AVX512BW) > + assert (__builtin_cpu_supports ("avx512bw")); > + if (ebx & bit_AVX512DQ) > + assert (__builtin_cpu_supports ("avx512dq")); > } > } > > diff --git a/gcc/testsuite/gcc.target/i386/funcspec-5.c > b/gcc/testsuite/gcc.target/i386/funcspec-5.c > index 7185b5c..c43bc76 100644 > --- a/gcc/testsuite/gcc.target/i386/funcspec-5.c > +++ b/gcc/testsuite/gcc.target/i386/funcspec-5.c > @@ -25,6 +25,12 @@ extern void test_tbm (void) > __attribute__((__target__("tbm"))); > extern void test_avx (void) > __attribute__((__target__("avx"))); > extern void test_avx2 (void) > __attribute__((__target__("avx2"))); > extern void test_avx512f (void) > __attribute__((__target__("avx512f"))); > +extern void test_avx512vl(void) > __attribute__((__target__("avx512vl"))); > +extern void test_avx512bw(void) > __attribute__((__target__("avx512bw"))); > +extern void test_avx512dq(void) > __attribute__((__target__("avx512dq"))); > +extern void test_avx512er(void) > __attribute__((__target__("avx512er"))); > +extern void test_avx512pf(void) > __attribute__((__target__("avx512pf"))); > +extern void test_avx512cd(void) > __attribute__((__target__("avx512cd"))); > extern void test_bmi (void) > __attribute__((__target__("bmi"))); > extern void test_bmi2 (void) > __attribute__((__target__("bmi2"))); > > @@ -50,6 +56,12 @@ extern void test_no_tbm (void) > __attribute__((__target__("no-tbm"))); > extern void test_no_avx (void) > __attribute__((__target__("no-avx"))); > extern void test_no_avx2 (void) > __attribute__((__target__("no-avx2"))); > extern void test_no_avx512f (void) > __attribute__((__target__("no-avx512f"))); > +extern void test_no_avx512vl(void) > __attribute__((__target__("no-avx512vl"))); > +extern void test_no_avx512bw(void) > __attribute__((__target__("no-avx512bw"))); > +extern void test_no_avx512dq(void) > __attribute__((__target__("no-avx512dq"))); > +extern void test_no_avx512er(void) > __attribute__((__target__("no-avx512er"))); > +extern void test_bo_avx512pf(void) > __attribute__((__target__("no-avx512pf"))); > +extern void test_no_avx512cd(void) > __attribute__((__target__("no-avx512cd"))); > extern void test_no_bmi (void) > __attribute__((__target__("no-bmi"))); > extern void test_no_bmi2 (void) > __attribute__((__target__("no-bmi2"))); > > @@ -77,6 +89,7 @@ extern void test_arch_corei7 (void) > __attribute__((__target__("arch=corei7"))); > extern void test_arch_corei7_avx (void) > __attribute__((__target__("arch=corei7-avx"))); > extern void test_arch_core_avx2 (void) > __attribute__((__target__("arch=core-avx2"))); > extern void test_arch_knl (void) > __attribute__((__target__("arch=knl"))); > +extern void test_arch_skylake_avx512 (void) > __attribute__((__target__("arch=skylake-avx512"))); > extern void test_arch_geode (void) > __attribute__((__target__("arch=geode"))); > extern void test_arch_k6 (void) > __attribute__((__target__("arch=k6"))); > extern void test_arch_k6_2 (void) > __attribute__((__target__("arch=k6-2"))); > diff --git a/libgcc/config/i386/cpuinfo.c b/libgcc/config/i386/cpuinfo.c > index 1436775..c0f12b4 100644 > --- a/libgcc/config/i386/cpuinfo.c > +++ b/libgcc/config/i386/cpuinfo.c > @@ -101,6 +101,12 @@ enum processor_features > FEATURE_XOP, > FEATURE_FMA, > FEATURE_AVX512F, > + FEATURE_AVX512VL, > + FEATURE_AVX512BW, > + FEATURE_AVX512DQ, > + FEATURE_AVX512CD, > + FEATURE_AVX512ER, > + FEATURE_AVX512PF, > FEATURE_BMI, > FEATURE_BMI2, > FEATURE_AES, The change above constitutes unwanted ABI change. Please note the comment: /* Any new types or subtypes have to be inserted at the end. */ which unfortunately applies also to ISA features. Perhaps an extra comment should be added above enum processor_features. > @@ -318,6 +324,16 @@ get_available_features (unsigned int ecx, unsigned int > edx, > features |= (1 << FEATURE_BMI2); > if (ebx & bit_AVX512F) > features |= (1 << FEATURE_AVX512F); > + if (ebx & bit_AVX512BW) > + features |= (1 << FEATURE_AVX512BW); > + if (ebx & bit_AVX512DQ) > + features |= (1 << FEATURE_AVX512DQ); > + if (ebx & bit_AVX512CD) > + features |= (1 << FEATURE_AVX512CD); > + if (ebx & bit_AVX512PF) > + features |= (1 << FEATURE_AVX512PF); > + if (ebx & bit_AVX512ER) > + features |= (1 << FEATURE_AVX512ER); > } > > unsigned int ext_level;