* Claudiu Zissulescu <claz...@gmail.com> [2018-10-10 11:00:16 +0300]:
> Handle store cacheline hazard for A700 cpus by inserting two NOP_S > between ST ST LD or their logical equivalent (like ST ST NOP_S NOP_S > J_L.D LD) > > gcc/ > 2016-08-01 Claudiu Zissulescu <claz...@synopsys.com> > > * config/arc/arc-arch.h (ARC_TUNE_ARC7XX): New tune value. > * config/arc/arc.c (arc_active_insn): New function. > (check_store_cacheline_hazard): Likewise. > (workaround_arc_anomaly): Use check_store_cacheline_hazard. > (arc_override_options): Disable delay slot scheduler for older > A7. > (arc_store_addr_hazard_p): New implementation, old one renamed to > ... > (arc_store_addr_hazard_internal_p): Renamed. > (arc_reorg): Don't combine into brcc instructions which are part > of hardware hazard solution. > * config/arc/arc.md (attr tune): Consider new arc7xx tune value. > (tune_arc700): Likewise. > * config/arc/arc.opt (arc7xx): New tune value. > * config/arc/arc700.md: Improve A7 scheduler. Basically happy with this, most just a few missing header comments on new functions. Thanks, Andrew > --- > gcc/config/arc/arc-arch.h | 1 + > gcc/config/arc/arc.c | 142 ++++++++++++++++++++++++++++++++------ > gcc/config/arc/arc.md | 8 ++- > gcc/config/arc/arc.opt | 3 + > gcc/config/arc/arc700.md | 18 +---- > 5 files changed, 132 insertions(+), 40 deletions(-) > > diff --git a/gcc/config/arc/arc-arch.h b/gcc/config/arc/arc-arch.h > index 859af0684b8..ad540607e55 100644 > --- a/gcc/config/arc/arc-arch.h > +++ b/gcc/config/arc/arc-arch.h > @@ -71,6 +71,7 @@ enum arc_tune_attr > { > ARC_TUNE_NONE, > ARC_TUNE_ARC600, > + ARC_TUNE_ARC7XX, > ARC_TUNE_ARC700_4_2_STD, > ARC_TUNE_ARC700_4_2_XMAC, > ARC_TUNE_CORE_3, > diff --git a/gcc/config/arc/arc.c b/gcc/config/arc/arc.c > index ab7735d6b38..90454928379 100644 > --- a/gcc/config/arc/arc.c > +++ b/gcc/config/arc/arc.c > @@ -1308,6 +1308,10 @@ arc_override_options (void) > if (TARGET_LONG_CALLS_SET) > target_flags &= ~MASK_MILLICODE_THUNK_SET; > > + /* A7 has an issue with delay slots. */ > + if (TARGET_ARC700 && (arc_tune != ARC_TUNE_ARC7XX)) > + flag_delayed_branch = 0; > + > /* These need to be done at start up. It's convenient to do them here. */ > arc_init (); > } > @@ -7529,11 +7533,91 @@ arc_invalid_within_doloop (const rtx_insn *insn) > return NULL; > } > > +static rtx_insn * > +arc_active_insn (rtx_insn *insn) Missing header comment. > +{ > + rtx_insn *nxt = next_active_insn (insn); > + > + if (nxt && GET_CODE (PATTERN (nxt)) == ASM_INPUT) > + nxt = next_active_insn (nxt); > + return nxt; > +} > + > +/* Search for a sequence made out of two stores and a given number of > + loads, insert a nop if required. */ > + > +static void > +check_store_cacheline_hazard (void) > +{ > + rtx_insn *insn, *succ0, *insn1; > + bool found = false; > + > + for (insn = get_insns (); insn; insn = arc_active_insn (insn)) > + { > + succ0 = arc_active_insn (insn); > + > + if (!succ0) > + return; > + > + if (!single_set (insn) || !single_set (succ0)) > + continue; > + > + if ((get_attr_type (insn) != TYPE_STORE) > + || (get_attr_type (succ0) != TYPE_STORE)) > + continue; > + > + /* Found at least two consecutive stores. Goto the end of the > + store sequence. */ > + for (insn1 = succ0; insn1; insn1 = arc_active_insn (insn1)) > + if (!single_set (insn1) || get_attr_type (insn1) != TYPE_STORE) > + break; > + > + /* Now, check the next two instructions for the following cases: > + 1. next instruction is a LD => insert 2 nops between store > + sequence and load. > + 2. next-next instruction is a LD => inset 1 nop after the store > + sequence. */ > + if (insn1 && single_set (insn1) > + && (get_attr_type (insn1) == TYPE_LOAD)) > + { > + found = true; > + emit_insn_before (gen_nopv (), insn1); > + emit_insn_before (gen_nopv (), insn1); > + } > + else > + { > + if (insn1 && (get_attr_type (insn1) == TYPE_COMPARE)) > + { > + /* REG_SAVE_NOTE is used by Haifa scheduler, we are in > + reorg, so it is safe to reuse it for avoiding the > + current compare insn to be part of a BRcc > + optimization. */ > + add_reg_note (insn1, REG_SAVE_NOTE, GEN_INT (3)); > + } > + insn1 = arc_active_insn (insn1); > + if (insn1 && single_set (insn1) > + && (get_attr_type (insn1) == TYPE_LOAD)) > + { > + found = true; > + emit_insn_before (gen_nopv (), insn1); > + } > + } > + > + insn = insn1; > + if (found) > + { > + /* warning (0, "Potential lockup sequence found, patching"); */ I'm not a fan of this approach. I'd rather the comment explain what problem was found and patched, and why displaying a warning is not appropriate. The commented out code just leaves me asking ... why? > + found = false; > + } > + } > +} > + > /* Return true if a load instruction (CONSUMER) uses the same address as a > store instruction (PRODUCER). This function is used to avoid st/ld > address hazard in ARC700 cores. */ > -bool > -arc_store_addr_hazard_p (rtx_insn* producer, rtx_insn* consumer) > + > +static bool > +arc_store_addr_hazard_internal_p (rtx_insn* producer, rtx_insn* consumer) > { > rtx in_set, out_set; > rtx out_addr, in_addr; > @@ -7581,6 +7665,14 @@ arc_store_addr_hazard_p (rtx_insn* producer, rtx_insn* > consumer) > return false; > } > > +bool > +arc_store_addr_hazard_p (rtx_insn* producer, rtx_insn* consumer) > +{ Missing header comment. > + if (TARGET_ARC700 && (arc_tune != ARC_TUNE_ARC7XX)) > + return true; > + return arc_store_addr_hazard_internal_p (producer, consumer); > +} > + > /* The same functionality as arc_hazard. It is called in machine > reorg before any other optimization. Hence, the NOP size is taken > into account when doing branch shortening. */ > @@ -7589,6 +7681,7 @@ static void > workaround_arc_anomaly (void) > { > rtx_insn *insn, *succ0; > + rtx_insn *succ1; > > /* For any architecture: call arc_hazard here. */ > for (insn = get_insns (); insn; insn = NEXT_INSN (insn)) > @@ -7600,27 +7693,30 @@ workaround_arc_anomaly (void) > } > } > > - if (TARGET_ARC700) > - { > - rtx_insn *succ1; > + if (!TARGET_ARC700) > + return; > > - for (insn = get_insns (); insn; insn = NEXT_INSN (insn)) > - { > - succ0 = next_real_insn (insn); > - if (arc_store_addr_hazard_p (insn, succ0)) > - { > - emit_insn_after (gen_nopv (), insn); > - emit_insn_after (gen_nopv (), insn); > - continue; > - } > + /* Old A7 are suffering of a cache hazard, and we need to insert two > + nops between any sequence of stores and a load. */ > + if (arc_tune != ARC_TUNE_ARC7XX) > + check_store_cacheline_hazard (); > > - /* Avoid adding nops if the instruction between the ST and LD is > - a call or jump. */ > - succ1 = next_real_insn (succ0); > - if (succ0 && !JUMP_P (succ0) && !CALL_P (succ0) > - && arc_store_addr_hazard_p (insn, succ1)) > - emit_insn_after (gen_nopv (), insn); > + for (insn = get_insns (); insn; insn = NEXT_INSN (insn)) > + { > + succ0 = next_real_insn (insn); > + if (arc_store_addr_hazard_internal_p (insn, succ0)) > + { > + emit_insn_after (gen_nopv (), insn); > + emit_insn_after (gen_nopv (), insn); > + continue; > } > + > + /* Avoid adding nops if the instruction between the ST and LD is > + a call or jump. */ > + succ1 = next_real_insn (succ0); > + if (succ0 && !JUMP_P (succ0) && !CALL_P (succ0) > + && arc_store_addr_hazard_internal_p (insn, succ1)) > + emit_insn_after (gen_nopv (), insn); > } > } > > @@ -8291,11 +8387,15 @@ arc_reorg (void) > if (!link_insn) > continue; > else > - /* Check if this is a data dependency. */ > { > + /* Check if this is a data dependency. */ > rtx op, cc_clob_rtx, op0, op1, brcc_insn, note; > rtx cmp0, cmp1; > > + /* Make sure we can use it for brcc insns. */ > + if (find_reg_note (link_insn, REG_SAVE_NOTE, GEN_INT (3))) > + continue; > + > /* Ok this is the set cc. copy args here. */ > op = XEXP (pc_target, 0); > > diff --git a/gcc/config/arc/arc.md b/gcc/config/arc/arc.md > index fb8a1c9ee09..caf7deda505 100644 > --- a/gcc/config/arc/arc.md > +++ b/gcc/config/arc/arc.md > @@ -600,11 +600,13 @@ > ;; somehow modify them to become inelegible for delay slots if a decision > ;; is made that makes conditional execution required. > > -(define_attr "tune" "none,arc600,arc700_4_2_std,arc700_4_2_xmac, core_3, \ > -archs4x, archs4xd, archs4xd_slow" > +(define_attr "tune" "none,arc600,arc7xx,arc700_4_2_std,arc700_4_2_xmac, \ > +core_3, archs4x, archs4xd, archs4xd_slow" > (const > (cond [(symbol_ref "arc_tune == TUNE_ARC600") > (const_string "arc600") > + (symbol_ref "arc_tune == ARC_TUNE_ARC7XX") > + (const_string "arc7xx") > (symbol_ref "arc_tune == TUNE_ARC700_4_2_STD") > (const_string "arc700_4_2_std") > (symbol_ref "arc_tune == TUNE_ARC700_4_2_XMAC") > @@ -619,7 +621,7 @@ archs4x, archs4xd, archs4xd_slow" > (const_string "none")))) > > (define_attr "tune_arc700" "false,true" > - (if_then_else (eq_attr "tune" "arc700_4_2_std, arc700_4_2_xmac") > + (if_then_else (eq_attr "tune" "arc7xx, arc700_4_2_std, arc700_4_2_xmac") > (const_string "true") > (const_string "false"))) > > diff --git a/gcc/config/arc/arc.opt b/gcc/config/arc/arc.opt > index 93e18af1d27..bcffb2720ba 100644 > --- a/gcc/config/arc/arc.opt > +++ b/gcc/config/arc/arc.opt > @@ -262,6 +262,9 @@ Enum(arc_tune_attr) String(arc600) Value(ARC_TUNE_ARC600) > EnumValue > Enum(arc_tune_attr) String(arc601) Value(ARC_TUNE_ARC600) > > +EnumValue > +Enum(arc_tune_attr) String(arc7xx) Value(ARC_TUNE_ARC7XX) > + > EnumValue > Enum(arc_tune_attr) String(arc700) Value(ARC_TUNE_ARC700_4_2_STD) > > diff --git a/gcc/config/arc/arc700.md b/gcc/config/arc/arc700.md > index a0f9f74a9f2..cbb868d8dcd 100644 > --- a/gcc/config/arc/arc700.md > +++ b/gcc/config/arc/arc700.md > @@ -145,28 +145,14 @@ > ; no functional unit runs when blockage is reserved > (exclusion_set "blockage" "core, multiplier") > > -(define_insn_reservation "data_load_DI" 4 > - (and (eq_attr "tune_arc700" "true") > - (eq_attr "type" "load") > - (match_operand:DI 0 "" "")) > - "issue+dmp, issue+dmp, dmp_write_port, dmp_write_port") > - > (define_insn_reservation "data_load" 3 > (and (eq_attr "tune_arc700" "true") > - (eq_attr "type" "load") > - (not (match_operand:DI 0 "" ""))) > + (eq_attr "type" "load")) > "issue+dmp, nothing, dmp_write_port") > > -(define_insn_reservation "data_store_DI" 2 > - (and (eq_attr "tune_arc700" "true") > - (eq_attr "type" "store") > - (match_operand:DI 0 "" "")) > - "issue+dmp_write_port, issue+dmp_write_port") > - > (define_insn_reservation "data_store" 1 > (and (eq_attr "tune_arc700" "true") > - (eq_attr "type" "store") > - (not (match_operand:DI 0 "" ""))) > + (eq_attr "type" "store")) > "issue+dmp_write_port") > > (define_bypass 3 "data_store" "data_load" "arc_store_addr_hazard_p") > -- > 2.17.1 >