Hi: As mentioned in https://gcc.gnu.org/ml/gcc-patches/2019-11/msg00832.html > So yes, it's poorly named. A preparatory patch to clean this up > (and maybe split it into TARGET_AVX256_SPLIT_REGS and TARGET_AVX128_OPTIMAL) > would be nice.
Bootstrap and regression test for i386 backend is ok. Ok for trunk? Changelog gcc/ PR target/92448 * config/i386/i386-expand.c (ix86_expand_set_or_cpymem): Replace TARGET_AVX128_OPTIMAL with TARGET_AVX256_SPLIT_REGS. * config/i386/i386-option.c (ix86_vec_cost): Ditto. (ix86_reassociation_width): Ditto. * config/i386/i386-options.c (ix86_option_override_internal): Replace TARGET_AVX128_OPTIAML with ix86_tune_features[X86_TUNE_AVX128_OPTIMAL] * config/i386/i386.h (TARGET_AVX256_SPLIT_REGS): New macro. (TARGET_AVX128_OPTIMAL): Deleted. * config/i386/x86-tune.def (X86_TUNE_AVX256_SPLIT_REGS): New DEF_TUNE. -- BR, Hongtao
From 93f49b7739d87106988869ee9a5ebe441e0b56ab Mon Sep 17 00:00:00 2001 From: liuhongt <hongtao....@intel.com> Date: Tue, 12 Nov 2019 16:49:41 +0800 Subject: [PATCH] Split X86_TUNE_AVX128_OPTIMAL into X86_TUNE_AVX256_SPLIT_REGS and X86_TUNE_AVX128_OPTIMAL. Changelog gcc/ PR target/92448 * config/i386/i386-expand.c (ix86_expand_set_or_cpymem): Replace TARGET_AVX128_OPTIMAL with TARGET_AVX256_SPLIT_REGS. * config/i386/i386-option.c (ix86_vec_cost): Ditto. (ix86_reassociation_width): Ditto. * config/i386/i386-options.c (ix86_option_override_internal): Replace TARGET_AVX128_OPTIAML with ix86_tune_features[X86_TUNE_AVX128_OPTIMAL] * config/i386/i386.h (TARGET_AVX256_SPLIT_REGS): New macro. (TARGET_AVX128_OPTIMAL): Deleted. * config/i386/x86-tune.def (X86_TUNE_AVX256_SPLIT_REGS): New DEF_TUNE. --- gcc/config/i386/i386-expand.c | 2 +- gcc/config/i386/i386-options.c | 2 +- gcc/config/i386/i386.c | 4 ++-- gcc/config/i386/i386.h | 4 ++-- gcc/config/i386/x86-tune.def | 4 ++++ 5 files changed, 10 insertions(+), 6 deletions(-) diff --git a/gcc/config/i386/i386-expand.c b/gcc/config/i386/i386-expand.c index be040a1bc3e..392e0f95460 100644 --- a/gcc/config/i386/i386-expand.c +++ b/gcc/config/i386/i386-expand.c @@ -7348,7 +7348,7 @@ ix86_expand_set_or_cpymem (rtx dst, rtx src, rtx count_exp, rtx val_exp, && optab_handler (mov_optab, wider_mode) != CODE_FOR_nothing) move_mode = wider_mode; - if (TARGET_AVX128_OPTIMAL && GET_MODE_BITSIZE (move_mode) > 128) + if (TARGET_AVX256_SPLIT_REGS && GET_MODE_BITSIZE (move_mode) > 128) move_mode = TImode; /* Find the corresponding vector mode with the same size as MOVE_MODE. diff --git a/gcc/config/i386/i386-options.c b/gcc/config/i386/i386-options.c index dfc8ae23ba0..3d87dec8b15 100644 --- a/gcc/config/i386/i386-options.c +++ b/gcc/config/i386/i386-options.c @@ -2692,7 +2692,7 @@ ix86_option_override_internal (bool main_args_p, /* Enable 128-bit AVX instruction generation for the auto-vectorizer. */ - if (TARGET_AVX128_OPTIMAL + if (ix86_tune_features[X86_TUNE_AVX128_OPTIMAL] && (opts_set->x_prefer_vector_width_type == PVW_NONE)) opts->x_prefer_vector_width_type = PVW_AVX128; diff --git a/gcc/config/i386/i386.c b/gcc/config/i386/i386.c index 03a7082d2fc..4a4cf79555e 100644 --- a/gcc/config/i386/i386.c +++ b/gcc/config/i386/i386.c @@ -18960,7 +18960,7 @@ ix86_vec_cost (machine_mode mode, int cost) && TARGET_SSE_SPLIT_REGS) return cost * 2; if (GET_MODE_BITSIZE (mode) > 128 - && TARGET_AVX128_OPTIMAL) + && TARGET_AVX256_SPLIT_REGS) return cost * GET_MODE_BITSIZE (mode) / 128; return cost; } @@ -21298,7 +21298,7 @@ ix86_reassociation_width (unsigned int op, machine_mode mode) return 1; /* Account for targets that splits wide vectors into multiple parts. */ - if (TARGET_AVX128_OPTIMAL && GET_MODE_BITSIZE (mode) > 128) + if (TARGET_AVX256_SPLIT_REGS && GET_MODE_BITSIZE (mode) > 128) div = GET_MODE_BITSIZE (mode) / 128; else if (TARGET_SSE_SPLIT_REGS && GET_MODE_BITSIZE (mode) > 64) div = GET_MODE_BITSIZE (mode) / 64; diff --git a/gcc/config/i386/i386.h b/gcc/config/i386/i386.h index afa0aa83ddf..3954c12f4e7 100644 --- a/gcc/config/i386/i386.h +++ b/gcc/config/i386/i386.h @@ -578,8 +578,8 @@ extern unsigned char ix86_tune_features[X86_TUNE_LAST]; ix86_tune_features[X86_TUNE_AVOID_LEA_FOR_ADDR] #define TARGET_SOFTWARE_PREFETCHING_BENEFICIAL \ ix86_tune_features[X86_TUNE_SOFTWARE_PREFETCHING_BENEFICIAL] -#define TARGET_AVX128_OPTIMAL \ - ix86_tune_features[X86_TUNE_AVX128_OPTIMAL] +#define TARGET_AVX256_SPLIT_REGS \ + ix86_tune_features[X86_TUNE_AVX256_SPLIT_REGS] #define TARGET_GENERAL_REGS_SSE_SPILL \ ix86_tune_features[X86_TUNE_GENERAL_REGS_SSE_SPILL] #define TARGET_AVOID_MEM_OPND_FOR_CMOVE \ diff --git a/gcc/config/i386/x86-tune.def b/gcc/config/i386/x86-tune.def index e289efdf2e0..328535d38d7 100644 --- a/gcc/config/i386/x86-tune.def +++ b/gcc/config/i386/x86-tune.def @@ -453,6 +453,10 @@ DEF_TUNE (X86_TUNE_AVX256_UNALIGNED_LOAD_OPTIMAL, "256_unaligned_load_optimal", DEF_TUNE (X86_TUNE_AVX256_UNALIGNED_STORE_OPTIMAL, "256_unaligned_store_optimal", ~(m_NEHALEM | m_SANDYBRIDGE | m_BDVER | m_ZNVER1 | m_GENERIC)) +/* X86_TUNE_AVX256_SPLIT_REGS: if true, AVX256 ops are split into two AVX128 ops. */ +DEF_TUNE (X86_TUNE_AVX256_SPLIT_REGS, "avx256_split_regs",m_BDVER | m_BTVER2 + | m_ZNVER1) + /* X86_TUNE_AVX128_OPTIMAL: Enable 128-bit AVX instruction generation for the auto-vectorizer. */ DEF_TUNE (X86_TUNE_AVX128_OPTIMAL, "avx128_optimal", m_BDVER | m_BTVER2 -- 2.18.1