On Fri, Sep 13, 2013 at 10:28 AM, Wei Mi <[email protected]> wrote:
>> Thanks. At this point you need feedback from x86 and scheduler maintainers.
>> I would recommend you to resubmit the patch with a Changelog text, and with
>> the text of the patch inline in the email (your last mail has the patch as a
>> binary attachment, which makes it harder to review and respond to). Please
>> mention if the updated patch passes bootstrap and regtest.
>
> Thanks! Here is the new patch. bootstrap and regression pass. ok for trunk?
>
> 2013-09-13 Wei Mi <[email protected]>
>
> * sched-rgn.c (add_branch_dependences): Keep insns in
> a SCHED_GROUP at the end of bb to remain their locations.
> * config/i386/x86-tune.def (DEF_TUNE): Add m_COREI7 for
> X86_TUNE_FUSE_CMP_AND_BRANCH.
> * config/i386/i386.c (ix86_macro_fusion_p): New Function.
> (ix86_macro_fusion_pair_p): Ditto.
> * doc/tm.texi.in: Generated.
> * doc/tm.texi: Ditto.
> * target.def: Add two hooks: macro_fusion_p and
> macro_fusion_pair_p.
> * haifa-sched.c (try_group_insn): New function.
> (group_insns_for_macro_fusion): New function.
> (sched_init): Call group_insns_for_macro_fusion.
>
> Index: config/i386/i386.c
> ===================================================================
> --- config/i386/i386.c (revision 201963)
> +++ config/i386/i386.c (working copy)
> @@ -24850,6 +24850,99 @@ ia32_multipass_dfa_lookahead (void)
> }
> }
>
> +/* Return true if target platform supports macro-fusion. */
> +
> +static bool
> +ix86_macro_fusion_p ()
> +{
> + if (TARGET_FUSE_CMP_AND_BRANCH)
> + return true;
> + else
> + return false;
> +}
> +
> +/* Check whether current microarchitecture support macro fusion
> + for insn pair "CONDGEN + CONDJMP". Refer to
> + "Intel Architectures Optimization Reference Manual". */
> +
> +static bool
> +ix86_macro_fusion_pair_p (rtx condgen, rtx condjmp)
> +{
> + rtx src;
> + if (!strcmp (ix86_tune_string, "corei7"))
> + {
> + /* For Nehalem. */
> + rtx single_set = single_set (condgen);
> + /* Nehalem doesn't support macro-fusion for add/sub+jmp. */
> + if (single_set == NULL_RTX)
> + return false;
> +
> + src = SET_SRC (single_set);
> + if (GET_CODE (src) != COMPARE)
> + return false;
> +
> + /* Nehalem doesn't support macro-fusion for cmp/test MEM-IMM
> + insn pattern. */
> + if ((MEM_P (XEXP (src, 0))
> + && CONST_INT_P (XEXP (src, 1)))
> + || (MEM_P (XEXP (src, 1))
> + && CONST_INT_P (XEXP (src, 0))))
> + return false;
> +
> + /* Nehalem doesn't support macro-fusion for add/sub/dec/inc + jmp. */
> + if (get_attr_type (condgen) != TYPE_TEST
> + && get_attr_type (condgen) != TYPE_ICMP)
> + return false;
> + return true;
> + }
> + else if (!strcmp (ix86_tune_string, "corei7-avx"))
> + {
> + /* For Sandybridge. */
> + enum rtx_code ccode;
> + rtx compare_set = NULL_RTX, test_if, cond;
> + rtx single_set = single_set (condgen);
> + if (single_set != NULL_RTX)
> + compare_set = single_set;
> + else
> + {
> + int i;
> + rtx pat = PATTERN (condgen);
> + for (i = 0; i < XVECLEN (pat, 0); i++)
> + if (GET_CODE (XVECEXP (pat, 0, i)) == SET
> + && GET_CODE (SET_SRC (XVECEXP (pat, 0, i))) == COMPARE)
> + compare_set = XVECEXP (pat, 0, i);
> + }
> +
> + if (compare_set == NULL_RTX)
> + return false;
> + src = SET_SRC (compare_set);
> + if (GET_CODE (src) != COMPARE)
> + return false;
> +
> + /* Sandybridge doesn't support macro-fusion for cmp/test MEM-IMM
> + insn pattern. */
> + if ((MEM_P (XEXP (src, 0))
> + && CONST_INT_P (XEXP (src, 1)))
> + || (MEM_P (XEXP (src, 1))
> + && CONST_INT_P (XEXP (src, 0))))
> + return false;
> +
> + /* Sandybridge doesn't support macro-fusion for inc/dec +
> + unsigned comparison jmp. */
> + test_if = SET_SRC (pc_set (condjmp));
> + cond = XEXP (test_if, 0);
> + ccode = GET_CODE (cond);
> + if (get_attr_type (condgen) == TYPE_INCDEC
> + && (ccode == GEU
> + || ccode == GTU
> + || ccode == LEU
> + || ccode == LTU))
> + return false;
> + return true;
> + }
> + return false;
> +}
> +
Checking corei7/corei7-avx explicitly isn't a good idea.
It is also useful for Ivy Bridge and Haswell. I think you
should use a variable to control it, similar to
TARGET_FUSE_CMP_AND_BRANCH.
--
H.J.