This patch implements despeculation on ARM. We only support it when generating ARM or Thumb2 code (we need conditional execution); and we only support it for sizes up to DImode. For unsupported cases we fall back to the generic code generation sequence so that a suitable failure warning is emitted.
* config/arm/arm.c (arm_inhibit_load_speculation): New function (TARGET_INHIBIT_LOAD_SPECULATION): Redefine. * config/arm/unspec.md (VUNSPEC_NOSPECULATE): New unspec_volatile code. * config/arm/arm.md (cmp_ior): Make this pattern callable. (nospeculate<QHSI:mode>, nospeculatedi): New patterns. --- gcc/config/arm/arm.c | 108 ++++++++++++++++++++++++++++++++++++++++++++++ gcc/config/arm/arm.md | 40 ++++++++++++++++- gcc/config/arm/unspecs.md | 1 + 3 files changed, 148 insertions(+), 1 deletion(-)
diff --git a/gcc/config/arm/arm.c b/gcc/config/arm/arm.c index 7b3f4c1..393bfd6 100644 --- a/gcc/config/arm/arm.c +++ b/gcc/config/arm/arm.c @@ -308,6 +308,8 @@ static unsigned int arm_elf_section_type_flags (tree decl, const char *name, int reloc); static void arm_expand_divmod_libfunc (rtx, machine_mode, rtx, rtx, rtx *, rtx *); static machine_mode arm_floatn_mode (int, bool); +static rtx arm_inhibit_load_speculation (machine_mode, rtx, rtx, rtx, rtx, + rtx, rtx); /* Table of machine attributes. */ static const struct attribute_spec arm_attribute_table[] = @@ -766,6 +768,9 @@ static const struct attribute_spec arm_attribute_table[] = #undef TARGET_CUSTOM_FUNCTION_DESCRIPTORS #define TARGET_CUSTOM_FUNCTION_DESCRIPTORS 2 +#undef TARGET_INHIBIT_LOAD_SPECULATION +#define TARGET_INHIBIT_LOAD_SPECULATION arm_inhibit_load_speculation + struct gcc_target targetm = TARGET_INITIALIZER; /* Obstack for minipool constant handling. */ @@ -31112,4 +31117,107 @@ arm_coproc_ldc_stc_legitimate_address (rtx op) } return false; } + +static rtx +arm_inhibit_load_speculation (machine_mode mode, rtx result, rtx mem, + rtx lower_bound, rtx upper_bound, + rtx fail_result, rtx cmpptr) +{ + rtx cond, comparison; + + /* We can't support this for Thumb1 as we have no suitable conditional + move operations. Nor do we support it for TImode. For both + these cases fall back to the generic code sequence which will emit + a suitable warning for us. */ + if (mode == TImode || TARGET_THUMB1) + return default_inhibit_load_speculation (mode, result, mem, lower_bound, + upper_bound, fail_result, cmpptr); + + + rtx target = gen_reg_rtx (mode); + rtx tgt2 = result; + + if (!register_operand (tgt2, mode)) + tgt2 = gen_reg_rtx (mode); + + if (!register_operand (cmpptr, ptr_mode)) + cmpptr = force_reg (ptr_mode, cmpptr); + + if (upper_bound == NULL) + { + if (!register_operand (lower_bound, ptr_mode)) + lower_bound = force_reg (ptr_mode, lower_bound); + + cond = arm_gen_compare_reg (LTU, cmpptr, lower_bound, NULL); + comparison = gen_rtx_LTU (VOIDmode, cond, const0_rtx); + } + else if (lower_bound == NULL) + { + if (!register_operand (upper_bound, ptr_mode)) + upper_bound = force_reg (ptr_mode, upper_bound); + + cond = arm_gen_compare_reg (GEU, cmpptr, upper_bound, NULL); + comparison = gen_rtx_GEU (VOIDmode, cond, const0_rtx); + } + else + { + /* We want to generate code for + result = (cmpptr < lower || cmpptr >= upper) ? 0 : *ptr; + Which can be recast to + result = (cmpptr < lower || upper <= cmpptr) ? 0 : *ptr; + which can be implemented as + cmp cmpptr, lower + cmpcs upper, cmpptr + bls 1f + ldr result, [ptr] + 1: + movls result, #0 + with suitable IT instructions as needed for thumb2. Later + optimization passes may make the load conditional. */ + + if (!register_operand (lower_bound, ptr_mode)) + lower_bound = force_reg (ptr_mode, lower_bound); + + if (!register_operand (upper_bound, ptr_mode)) + upper_bound = force_reg (ptr_mode, upper_bound); + + rtx comparison1 = gen_rtx_LTU (SImode, cmpptr, lower_bound); + rtx comparison2 = gen_rtx_LEU (SImode, upper_bound, cmpptr); + cond = gen_rtx_REG (arm_select_dominance_cc_mode (comparison1, + comparison2, + DOM_CC_X_OR_Y), + CC_REGNUM); + emit_insn (gen_cmp_ior (cmpptr, lower_bound, upper_bound, cmpptr, + comparison1, comparison2, cond)); + comparison = gen_rtx_NE (SImode, cond, const0_rtx); + } + + rtx_code_label *label = gen_label_rtx (); + emit_jump_insn (gen_arm_cond_branch (label, comparison, cond)); + emit_move_insn (target, mem); + emit_label (label); + + insn_code icode; + + switch (mode) + { + case QImode: icode = CODE_FOR_nospeculateqi; break; + case HImode: icode = CODE_FOR_nospeculatehi; break; + case SImode: icode = CODE_FOR_nospeculatesi; break; + case DImode: icode = CODE_FOR_nospeculatedi; break; + default: + gcc_unreachable (); + } + + if (! insn_operand_matches (icode, 4, fail_result)) + fail_result = force_reg (mode, fail_result); + + emit_insn (GEN_FCN (icode) (tgt2, comparison, cond, target, fail_result)); + + if (tgt2 != result) + emit_move_insn (result, tgt2); + + return result; +} + #include "gt-arm.h" diff --git a/gcc/config/arm/arm.md b/gcc/config/arm/arm.md index f9365cd..7a6c134 100644 --- a/gcc/config/arm/arm.md +++ b/gcc/config/arm/arm.md @@ -9511,7 +9511,7 @@ (set_attr "type" "multiple")] ) -(define_insn "*cmp_ior" +(define_insn "cmp_ior" [(set (match_operand 6 "dominant_cc_register" "") (compare (ior:SI @@ -12054,6 +12054,44 @@ [(set_attr "length" "4") (set_attr "type" "coproc")]) +(define_insn "nospeculate<QHSI:mode>" + [(set (match_operand:QHSI 0 "s_register_operand" "=l,l,r") + (unspec_volatile:QHSI + [(match_operator 1 "arm_comparison_operator" + [(match_operand 2 "cc_register" "") (const_int 0)]) + (match_operand:QHSI 3 "s_register_operand" "0,0,0") + (match_operand:QHSI 4 "arm_not_operand" "I,K,r")] + VUNSPEC_NOSPECULATE))] + "TARGET_32BIT" + { + if (TARGET_THUMB) + return \"it\\t%d1\;mov%d1\\t%0, %4\;.inst 0xf3af8014\t%@ CSDB\"; + return \"mov%d1\\t%0, %4\;.inst 0xe320f014\t%@ CSDB\"; + } + [(set_attr "type" "mov_imm,mvn_imm,mov_reg") + (set_attr "conds" "use") + (set_attr "length" "8")] +) + +(define_insn "nospeculatedi" + [(set (match_operand:DI 0 "s_register_operand" "=r") + (unspec_volatile:DI + [(match_operator 1 "arm_comparison_operator" + [(match_operand 2 "cc_register" "") (const_int 0)]) + (match_operand:DI 3 "s_register_operand" "0") + (match_operand:DI 4 "arm_rhs_operand" "rI")] + VUNSPEC_NOSPECULATE))] + "TARGET_32BIT" + { + if (TARGET_THUMB) + return \"it\\t%d1\;mov%d1\\t%Q0, %Q4\;it\\t%d1\;mov%d1\\t%R0, %R4\;.inst 0xf3af8014\t%@ CSDB\"; + return \"mov%d1\\t%Q0, %Q4\;mov%d1\\t%R0, %R4\;.inst 0xe320f014\t%@ CSDB\"; + } + [(set_attr "type" "mov_reg") + (set_attr "conds" "use") + (set_attr "length" "12")] +) + ;; Vector bits common to IWMMXT and Neon (include "vec-common.md") ;; Load the Intel Wireless Multimedia Extension patterns diff --git a/gcc/config/arm/unspecs.md b/gcc/config/arm/unspecs.md index 99cfa41..7f296ae 100644 --- a/gcc/config/arm/unspecs.md +++ b/gcc/config/arm/unspecs.md @@ -168,6 +168,7 @@ VUNSPEC_MCRR2 ; Represent the coprocessor mcrr2 instruction. VUNSPEC_MRRC ; Represent the coprocessor mrrc instruction. VUNSPEC_MRRC2 ; Represent the coprocessor mrrc2 instruction. + VUNSPEC_NOSPECULATE ; Represent a despeculation sequence. ]) ;; Enumerators for NEON unspecs.