On Thu, Jul 4, 2024 at 9:30 AM liuhongt <hongtao....@intel.com> wrote:
>
> From: "H.J. Lu" <hjl.to...@gmail.com>
>
> >The above reads like it would be worth splitting branc_prediction_hits
> >into branch_prediction_hints_taken and branch_prediction_hints_not_taken
> >given not-taken is the default and thus will just increase code size?
> >According to Intel® 64 and IA-32 Architectures Optimization Reference
> >Manual[1], Branch Hint is updated for Redwood Cove.
> Changed.
>
> --------cut from [1]-------------------------
> Starting with the Redwood Cove microarchitecture, if the predictor has
> no stored information about a branch, the branch has the Intel® SSE2
> branch taken hint (i.e., instruction prefix 3EH), When the codec
> decodes the branch, it flips the branch’s prediction from not-taken to
> taken. It then flushes the pipeline in front of it and steers this
> pipeline to fetch the taken path of the branch.
> --------cut end -----------------------------
>
> Split tune branch_prediction_hints into branch_prediction_hints_taken
> and branch_prediction_hints_not_taken, always generate branch hint for
> conditional branches, both tunes are disabled by default.
>
> [1] 
> https://www.intel.com/content/www/us/en/content-details/821612/intel-64-and-ia-32-architectures-optimization-reference-manual-volume-1.html
>
> Bootstrapped and regtested on x86_64-pc-linux-gnu{-m32,}.
> Ok for trunk?
Committed.
>
> gcc/
>
>         * config/i386/i386.cc (ix86_print_operand): Always generate
>         branch hint for conditional branches.
>         * config/i386/i386.h (TARGET_BRANCH_PREDICTION_HINTS): Split
>         into ..
>         (TARGET_BRANCH_PREDICTION_HINTS_TAKEN): .. this, and ..
>         (TARGET_BRANCH_PREDICTION_HINTS_NOT_TAKEN): .. this.
>         * config/i386/x86-tune.def (X86_TUNE_BRANCH_PREDICTION_HINTS):
>         Split into ..
>         (X86_TUNE_BRANCH_PREDICTION_HINTS_TAKEN): .. this, and ..
>         (X86_TUNE_BRANCH_PREDICTION_HINTS_NOT_TAKEN): .. this.
> ---
>  gcc/config/i386/i386.cc      | 29 +++++++++--------------------
>  gcc/config/i386/i386.h       |  6 ++++--
>  gcc/config/i386/x86-tune.def | 13 +++++++++++--
>  3 files changed, 24 insertions(+), 24 deletions(-)
>
> diff --git a/gcc/config/i386/i386.cc b/gcc/config/i386/i386.cc
> index 1f71ed04be6..ea9cb620f8d 100644
> --- a/gcc/config/i386/i386.cc
> +++ b/gcc/config/i386/i386.cc
> @@ -14041,7 +14041,8 @@ ix86_print_operand (FILE *file, rtx x, int code)
>
>             if (!optimize
>                 || optimize_function_for_size_p (cfun)
> -               || !TARGET_BRANCH_PREDICTION_HINTS)
> +               || (!TARGET_BRANCH_PREDICTION_HINTS_NOT_TAKEN
> +                   && !TARGET_BRANCH_PREDICTION_HINTS_TAKEN))
>               return;
>
>             x = find_reg_note (current_output_insn, REG_BR_PROB, 0);
> @@ -14050,25 +14051,13 @@ ix86_print_operand (FILE *file, rtx x, int code)
>                 int pred_val = profile_probability::from_reg_br_prob_note
>                                  (XINT (x, 0)).to_reg_br_prob_base ();
>
> -               if (pred_val < REG_BR_PROB_BASE * 45 / 100
> -                   || pred_val > REG_BR_PROB_BASE * 55 / 100)
> -                 {
> -                   bool taken = pred_val > REG_BR_PROB_BASE / 2;
> -                   bool cputaken
> -                     = final_forward_branch_p (current_output_insn) == 0;
> -
> -                   /* Emit hints only in the case default branch prediction
> -                      heuristics would fail.  */
> -                   if (taken != cputaken)
> -                     {
> -                       /* We use 3e (DS) prefix for taken branches and
> -                          2e (CS) prefix for not taken branches.  */
> -                       if (taken)
> -                         fputs ("ds ; ", file);
> -                       else
> -                         fputs ("cs ; ", file);
> -                     }
> -                 }
> +               bool taken = pred_val > REG_BR_PROB_BASE / 2;
> +               /* We use 3e (DS) prefix for taken branches and
> +                  2e (CS) prefix for not taken branches.  */
> +               if (taken && TARGET_BRANCH_PREDICTION_HINTS_TAKEN)
> +                 fputs ("ds ; ", file);
> +               else if (!taken && TARGET_BRANCH_PREDICTION_HINTS_NOT_TAKEN)
> +                 fputs ("cs ; ", file);
>               }
>             return;
>           }
> diff --git a/gcc/config/i386/i386.h b/gcc/config/i386/i386.h
> index 9ed225ec587..50ebed221dc 100644
> --- a/gcc/config/i386/i386.h
> +++ b/gcc/config/i386/i386.h
> @@ -309,8 +309,10 @@ extern unsigned char ix86_tune_features[X86_TUNE_LAST];
>  #define TARGET_ZERO_EXTEND_WITH_AND \
>         ix86_tune_features[X86_TUNE_ZERO_EXTEND_WITH_AND]
>  #define TARGET_UNROLL_STRLEN   ix86_tune_features[X86_TUNE_UNROLL_STRLEN]
> -#define TARGET_BRANCH_PREDICTION_HINTS \
> -       ix86_tune_features[X86_TUNE_BRANCH_PREDICTION_HINTS]
> +#define TARGET_BRANCH_PREDICTION_HINTS_NOT_TAKEN \
> +       ix86_tune_features[X86_TUNE_BRANCH_PREDICTION_HINTS_NOT_TAKEN]
> +#define TARGET_BRANCH_PREDICTION_HINTS_TAKEN \
> +       ix86_tune_features[X86_TUNE_BRANCH_PREDICTION_HINTS_TAKEN]
>  #define TARGET_DOUBLE_WITH_ADD ix86_tune_features[X86_TUNE_DOUBLE_WITH_ADD]
>  #define TARGET_USE_SAHF                ix86_tune_features[X86_TUNE_USE_SAHF]
>  #define TARGET_MOVX            ix86_tune_features[X86_TUNE_MOVX]
> diff --git a/gcc/config/i386/x86-tune.def b/gcc/config/i386/x86-tune.def
> index 343c32c291f..3d29bffc49c 100644
> --- a/gcc/config/i386/x86-tune.def
> +++ b/gcc/config/i386/x86-tune.def
> @@ -683,15 +683,24 @@ DEF_TUNE (X86_TUNE_NOT_VECTORMODE, "not_vectormode", 
> m_K6)
>  DEF_TUNE (X86_TUNE_AVOID_VECTOR_DECODE, "avoid_vector_decode",
>            m_K8)
>
> +/* X86_TUNE_BRANCH_PREDICTION_HINTS_TAKEN, starting with the Redwood Cove
> +   microarchitecture, if the predictor has no stored information about a 
> branch,
> +   the branch has the Intel® SSE2 branch taken hint
> +   (i.e., instruction prefix 3EH), When the codec decodes the branch, it 
> flips
> +   the branch’s prediction from not-taken to taken. It then flushes the 
> pipeline
> +   in front of it and steers this pipeline to fetch the taken path of the
> +   branch.  */
> +DEF_TUNE (X86_TUNE_BRANCH_PREDICTION_HINTS_TAKEN, 
> "branch_prediction_hints_taken", m_NONE)
> +
>  
> /*****************************************************************************/
>  /* This never worked well before.                                            
> */
>  
> /*****************************************************************************/
>
> -/* X86_TUNE_BRANCH_PREDICTION_HINTS: Branch hints were put in P4 based
> +/* X86_TUNE_BRANCH_PREDICTION_HINTS_NOT_TAKEN: Branch hints were put in P4 
> based
>     on simulation result. But after P4 was made, no performance benefit
>     was observed with branch hints.  It also increases the code size.
>     As a result, icc never generates branch hints.  */
> -DEF_TUNE (X86_TUNE_BRANCH_PREDICTION_HINTS, "branch_prediction_hints", 
> m_NONE)
> +DEF_TUNE (X86_TUNE_BRANCH_PREDICTION_HINTS_NOT_TAKEN, 
> "branch_prediction_hints_not_taken", m_NONE)
>
>  /* X86_TUNE_QIMODE_MATH: Enable use of 8bit arithmetic.  */
>  DEF_TUNE (X86_TUNE_QIMODE_MATH, "qimode_math", m_ALL)
> --
> 2.31.1
>


-- 
BR,
Hongtao

Reply via email to