On Fri, May 24, 2024 at 9:27 AM Richard Biener <rguent...@suse.de> wrote: > > On Thu, 23 May 2024, Manolis Tsamis wrote: > > > This pass detects cases of expensive store forwarding and tries to avoid > > them > > by reordering the stores and using suitable bit insertion sequences. > > For example it can transform this: > > > > strb w2, [x1, 1] > > ldr x0, [x1] # Epxensive store forwarding to larger load. > > > > To: > > > > ldr x0, [x1] > > strb w2, [x1] > > bfi x0, x2, 0, 8 > > How do we represent atomics? If the latter is a load-acquire or release > the transform would be invalid. > > > Assembly like this can appear with bitfields or type punning / unions. > > On stress-ng when running the cpu-union microbenchmark the following > > speedups > > have been observed. > > > > Neoverse-N1: +29.4% > > Intel Coffeelake: +13.1% > > AMD 5950X: +17.5% > > > > PR rtl-optimization/48696 > > > > gcc/ChangeLog: > > > > * Makefile.in: Add avoid-store-forwarding.o. > > * common.opt: New option -favoid-store-forwarding. > > * params.opt: New param store-forwarding-max-distance. > > * passes.def: Schedule a new pass. > > * tree-pass.h (make_pass_rtl_avoid_store_forwarding): Declare. > > * avoid-store-forwarding.cc: New file. > > > > gcc/testsuite/ChangeLog: > > > > * gcc.dg/avoid-store-forwarding-1.c: New test. > > * gcc.dg/avoid-store-forwarding-2.c: New test. > > * gcc.dg/avoid-store-forwarding-3.c: New test. > > > > Signed-off-by: Manolis Tsamis <manolis.tsa...@vrull.eu> > > --- > > > > gcc/Makefile.in | 1 + > > gcc/avoid-store-forwarding.cc | 554 ++++++++++++++++++ > > gcc/common.opt | 4 + > > gcc/params.opt | 4 + > > gcc/passes.def | 1 + > > .../gcc.dg/avoid-store-forwarding-1.c | 46 ++ > > .../gcc.dg/avoid-store-forwarding-2.c | 39 ++ > > .../gcc.dg/avoid-store-forwarding-3.c | 31 + > > gcc/tree-pass.h | 1 + > > 9 files changed, 681 insertions(+) > > create mode 100644 gcc/avoid-store-forwarding.cc > > create mode 100644 gcc/testsuite/gcc.dg/avoid-store-forwarding-1.c > > create mode 100644 gcc/testsuite/gcc.dg/avoid-store-forwarding-2.c > > create mode 100644 gcc/testsuite/gcc.dg/avoid-store-forwarding-3.c > > > > diff --git a/gcc/Makefile.in b/gcc/Makefile.in > > index a7f15694c34..be969b1ca1d 100644 > > --- a/gcc/Makefile.in > > +++ b/gcc/Makefile.in > > @@ -1681,6 +1681,7 @@ OBJS = \ > > statistics.o \ > > stmt.o \ > > stor-layout.o \ > > + avoid-store-forwarding.o \ > > store-motion.o \ > > streamer-hooks.o \ > > stringpool.o \ > > diff --git a/gcc/avoid-store-forwarding.cc b/gcc/avoid-store-forwarding.cc > > new file mode 100644 > > index 00000000000..d90627c4872 > > --- /dev/null > > +++ b/gcc/avoid-store-forwarding.cc > > @@ -0,0 +1,554 @@ > > +/* Avoid store forwarding optimization pass. > > + Copyright (C) 2024 Free Software Foundation, Inc. > > + Contributed by VRULL GmbH. > > + > > + This file is part of GCC. > > + > > + GCC is free software; you can redistribute it and/or modify it > > + under the terms of the GNU General Public License as published by > > + the Free Software Foundation; either version 3, or (at your option) > > + any later version. > > + > > + GCC is distributed in the hope that it will be useful, but > > + WITHOUT ANY WARRANTY; without even the implied warranty of > > + MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU > > + General Public License for more details. > > + > > + You should have received a copy of the GNU General Public License > > + along with GCC; see the file COPYING3. If not see > > + <http://www.gnu.org/licenses/>. */ > > + > > +#include "config.h" > > +#include "system.h" > > +#include "coretypes.h" > > +#include "backend.h" > > +#include "rtl.h" > > +#include "alias.h" > > +#include "rtlanal.h" > > +#include "tree-pass.h" > > +#include "cselib.h" > > +#include "predict.h" > > +#include "insn-config.h" > > +#include "expmed.h" > > +#include "recog.h" > > +#include "regset.h" > > +#include "df.h" > > +#include "expr.h" > > +#include "memmodel.h" > > +#include "emit-rtl.h" > > +#include "vec.h" > > + > > +/* This pass tries to detect and avoid cases of store forwarding. > > + On many processors there is a large penalty when smaller stores are > > + forwarded to larger loads. The idea used to avoid the stall is to move > > + the store after the load and in addition emit a bit insert sequence so > > + the load register has the correct value. For example the following: > > + > > + strb w2, [x1, 1] > > + ldr x0, [x1] > > + > > + Will be transformed to: > > + > > + ldr x0, [x1] > > + and w2, w2, 255 > > + strb w2, [x1] > > + bfi x0, x2, 0, 8 > > +*/ > > + > > +namespace { > > + > > +const pass_data pass_data_avoid_store_forwarding = > > +{ > > + RTL_PASS, /* type. */ > > + "avoid_store_forwarding", /* name. */ > > + OPTGROUP_NONE, /* optinfo_flags. */ > > + TV_NONE, /* tv_id. */ > > + 0, /* properties_required. */ > > + 0, /* properties_provided. */ > > + 0, /* properties_destroyed. */ > > + 0, /* todo_flags_start. */ > > + TODO_df_finish /* todo_flags_finish. */ > > +}; > > + > > +class pass_rtl_avoid_store_forwarding : public rtl_opt_pass > > +{ > > +public: > > + pass_rtl_avoid_store_forwarding (gcc::context *ctxt) > > + : rtl_opt_pass (pass_data_avoid_store_forwarding, ctxt) > > + {} > > + > > + /* opt_pass methods: */ > > + virtual bool gate (function *) > > + { > > + return flag_avoid_store_forwarding && optimize >= 1; > > + } > > + > > + virtual unsigned int execute (function *) override; > > +}; // class pass_rtl_avoid_store_forwarding > > + > > +typedef struct > > +{ > > + /* The store instruction that is a store forwarding candidate. */ > > + rtx_insn *store_insn; > > + /* SET_DEST (single_set (store_insn)). */ > > + rtx store_mem; > > + /* The temporary that will hold the stored value at the original store > > + position. */ > > + rtx mov_reg; > > + /* The instruction sequence that inserts the stored value's bits at the > > + appropriate position in the loaded value. */ > > + rtx_insn *bits_insert_insns; > > + /* The byte offset for the store's position within the load. */ > > + HOST_WIDE_INT offset; > > + > > + unsigned int insn_cnt; > > + bool remove; > > + bool forwarded; > > +} store_info; > > + > > +static unsigned int stats_sf_detected = 0; > > +static unsigned int stats_sf_avoided = 0; > > + > > +static rtx > > +get_load_mem (rtx expr) > > +{ > > + if (!expr) > > + return NULL_RTX; > > + > > + rtx mem = SET_SRC (expr); > > + > > + if (GET_CODE (mem) == ZERO_EXTEND > > + || GET_CODE (mem) == SIGN_EXTEND) > > + mem = XEXP (mem, 0); > > + > > + if (MEM_P (mem)) > > + return mem; > > + else > > + return NULL_RTX; > > +} > > + > > +/* Return true iff a store to STORE_MEM would write to a sub-region of > > bytes > > + from what LOAD_MEM would read. If true also store the relative byte > > offset > > + of the store within the load to OFF_VAL. */ > > + > > +static bool > > +is_store_forwarding (rtx store_mem, rtx load_mem, HOST_WIDE_INT *off_val) > > +{ > > + if (known_ge (GET_MODE_SIZE (GET_MODE (store_mem)), > > + GET_MODE_SIZE (GET_MODE (load_mem)))) > > + return false; > > + > > + rtx off = simplify_gen_binary (MINUS, GET_MODE (XEXP (store_mem, 0)), > > + XEXP (store_mem, 0), XEXP (load_mem, 0)); > > + > > + if (CONST_INT_P (off)) > > + { > > + *off_val = INTVAL (off); > > + scalar_int_mode store_mode, load_mode; > > + if (is_int_mode (GET_MODE (store_mem), &store_mode) > > + && is_int_mode (GET_MODE (load_mem), &load_mode)) > > This is a quite severe limitation - most forwarding issues I ran into > are caused by vectorization where we have scalar stores and a vector > load. And it happens for both integer and floating point elements. > Addressed in V2. I haven't searched extensively for such cases yet and don't have performance numbers but the pass now works fine with scalar -> vector store forwardings (added a simple testcase for it too). I also added some simple costing for the generated bit insert sequences. Depending on the architecture the scalar -> vector sequences may be large and/or unprofitable. But on others (e.g. aarch64) they're just a few instructions so I can see the benefit.
> > + { > > + HOST_WIDE_INT store_mode_size = GET_MODE_SIZE (store_mode); > > + HOST_WIDE_INT load_mode_size = GET_MODE_SIZE (load_mode); > > + > > + return *off_val >= 0 > > + && (*off_val + store_mode_size <= load_mode_size); > > + } > > + } > > + > > + return false; > > +} > > + > > +/* Return a bit insertion sequence that would make DEST have the correct > > value > > + if the store represented by STORE_INFO were to be moved after DEST. */ > > + > > +static rtx_insn * > > +generate_bit_insert_sequence (store_info *store_info, rtx dest, > > + machine_mode load_inner_mode) > > +{ > > + scalar_int_mode store_mode, load_mode; > > + if (!is_int_mode (GET_MODE (store_info->store_mem), &store_mode) > > + || !is_int_mode (load_inner_mode, &load_mode)) > > + return NULL; > > + > > + HOST_WIDE_INT load_mem_size = GET_MODE_SIZE (load_mode); > > + HOST_WIDE_INT store_mem_size = GET_MODE_SIZE (store_mode); > > + HOST_WIDE_INT bf_offset_bytes; > > + > > + if (BYTES_BIG_ENDIAN) > > + bf_offset_bytes = load_mem_size - store_mem_size - store_info->offset; > > + else > > + bf_offset_bytes = store_info->offset; > > + > > + start_sequence (); > > + store_bit_field (dest, store_mem_size * BITS_PER_UNIT, > > + bf_offset_bytes * BITS_PER_UNIT, 0, 0, > > + GET_MODE (dest), store_info->mov_reg, > > + false, false); > > + rtx_insn *insns = get_insns (); > > While convenient this can actually end up spilling to memory which > would be worse. I think you want to call the actual workers for > the cases you want to handle which can fail instead of spilling. > Addressed in V2. I decided to just iterate the generated sequence and search for MEM with contains_mem_rtx_p; I believe it is desirable to not expose the worker functions. This should also make the pass independent of any future refactorings of the store_bit_field helper functions. > > + end_sequence (); > > + > > + return insns; > > +} > > + > > +/* Given a list of small stores that are forwarded to LOAD_INSN, try to > > + rearrange them so that a store-forwarding penalty doesn't occur. */ > > + > > +static bool > > +process_forwardings (vec<store_info> &stores, rtx_insn *load_insn) > > +{ > > + rtx load = single_set (load_insn); > > + machine_mode load_inner_mode = GET_MODE (get_load_mem (load)); > > + > > + /* If the stores cover all the bytes of the load without overlap then we > > can > > + eliminate the load entirely and use the computed value instead. */ > > + HOST_WIDE_INT load_size > > + = GET_MODE_SIZE (as_a <scalar_int_mode> (GET_MODE (get_load_mem > > (load)))); > > + sbitmap forwarded_bytes = sbitmap_alloc (load_size); > > + > > + unsigned int i; > > + store_info* it; > > + FOR_EACH_VEC_ELT (stores, i, it) > > + { > > + HOST_WIDE_INT store_size > > + = GET_MODE_SIZE (as_a <scalar_int_mode> (GET_MODE (it->store_mem))); > > + if (bitmap_bit_in_range_p (forwarded_bytes, it->offset, > > + it->offset + store_size - 1)) > > + break; > > + bitmap_set_range (forwarded_bytes, it->offset, store_size); > > + } > > + > > + bitmap_not (forwarded_bytes, forwarded_bytes); > > + bool eliminate_load = bitmap_empty_p (forwarded_bytes); > > + > > + stats_sf_detected++; > > + > > + if (dump_file) > > + { > > + fprintf (dump_file, "Store forwarding%s detected:\n", > > + (stores.length () > 1) ? "s" : ""); > > + > > + FOR_EACH_VEC_ELT (stores, i, it) > > + { > > + fprintf (dump_file, "From: "); > > + print_rtl_single (dump_file, it->store_insn); > > + } > > + > > + fprintf (dump_file, "To: "); > > + print_rtl_single (dump_file, load_insn); > > + > > + if (eliminate_load) > > + fprintf (dump_file, "(Load elimination candidate)\n"); > > + } > > + > > + rtx dest; > > + if (eliminate_load) > > + dest = gen_reg_rtx (load_inner_mode); > > + else > > + dest = SET_DEST (load); > > + > > + int move_to_front = -1; > > + > > + /* Check if we can emit bit insert instructions for all forwarded > > stores. */ > > + FOR_EACH_VEC_ELT (stores, i, it) > > + { > > + it->mov_reg = gen_reg_rtx (GET_MODE (it->store_mem)); > > + rtx_insn *insns = NULL; > > + > > + /* If we're eliminating the load then find the store with zero offset > > + and use it as the base register to avoid a bit insert. */ > > + if (eliminate_load && it->offset == 0) > > + { > > + start_sequence (); > > + > > + /* We can use a paradoxical subreg to force this to a wider mode, as > > + the only use will be inserting the bits (i.e., we don't care > > about > > + the value of the higher bits). */ > > + rtx ext0 = gen_rtx_SUBREG (GET_MODE (dest), it->mov_reg, 0); > > + rtx_insn *move0 = emit_move_insn (dest, ext0); > > + if (recog_memoized (move0) >= 0) > > + { > > + insns = get_insns (); > > + move_to_front = (int) i; > > + } > > + > > + end_sequence (); > > + } > > + > > + if (!insns) > > + insns = generate_bit_insert_sequence (&(*it), dest, load_inner_mode); > > + > > + if (!insns) > > + { > > + if (dump_file) > > + { > > + fprintf (dump_file, "Failed due to: "); > > + print_rtl_single (dump_file, it->store_insn); > > + } > > + return false; > > + } > > + > > + it->bits_insert_insns = insns; > > + } > > + > > + /* If we have a move instead of bit insert, it needs to be emitted first > > in > > + the resulting sequence. */ > > + if (move_to_front != -1) > > + { > > + stores.safe_push (stores[move_to_front]); > > + stores.ordered_remove (move_to_front); > > + } > > + > > + if (dump_file) > > + { > > + fprintf (dump_file, "Store forwarding%s avoided with bit inserts:\n", > > + (stores.length () > 1) ? "s" : ""); > > + > > + FOR_EACH_VEC_ELT (stores, i, it) > > + { > > + if (stores.length () > 1) > > + { > > + fprintf (dump_file, "For: "); > > + print_rtl_single (dump_file, it->store_insn); > > + } > > + > > + fprintf (dump_file, "With sequence:\n"); > > + > > + for (rtx_insn *insn = it->bits_insert_insns; insn; > > + insn = NEXT_INSN (insn)) > > + { > > + fprintf (dump_file, " "); > > + print_rtl_single (dump_file, insn); > > + } > > + } > > + } > > + > > + stats_sf_avoided++; > > + > > + if (eliminate_load) > > + { > > + machine_mode outter_mode = GET_MODE (SET_DEST (load)); > > + rtx_code extend = ZERO_EXTEND; > > + if (outter_mode != load_inner_mode) > > + extend = GET_CODE (SET_SRC (load)); > > + > > + rtx load_value = simplify_gen_unary (extend, outter_mode, dest, > > + load_inner_mode); > > + rtx load_move = gen_move_insn (SET_DEST (load), load_value); > > + df_insn_rescan (emit_insn_after (load_move, load_insn)); > > + } > > + > > + FOR_EACH_VEC_ELT (stores, i, it) > > + { > > + /* Emit code that updated the loaded value to account for the > > + missing store. */ > > + df_insn_rescan (emit_insn_after (it->bits_insert_insns, load_insn)); > > + } > > + > > + FOR_EACH_VEC_ELT (stores, i, it) > > + { > > + rtx store_set = single_set (it->store_insn); > > + /* Create a register move at the store's original position to save > > the > > + stored value. */ > > + rtx mov1 = gen_move_insn (it->mov_reg, SET_SRC (store_set)); > > + df_insn_rescan (emit_insn_before (mov1, it->store_insn)); > > + /* Create a new store after the load with the saved original value. > > + This avoids the forwarding stall. */ > > + rtx mov2 = gen_move_insn (SET_DEST (store_set), it->mov_reg); > > + df_insn_rescan (emit_insn_after (mov2, load_insn)); > > + /* Done, delete the original store. */ > > + set_insn_deleted (it->store_insn); > > + } > > + > > + df_insn_rescan (load_insn); > > + > > + if (eliminate_load) > > + set_insn_deleted (load_insn); > > + > > + return true; > > +} > > + > > +/* Process BB for expensive store forwardings. */ > > + > > +static void > > +avoid_store_forwarding (basic_block bb) > > +{ > > + auto_vec<store_info, 8> store_exprs; > > + rtx_insn *insn; > > + unsigned int insn_cnt = 0; > > + > > + FOR_BB_INSNS (bb, insn) > > + { > > + if (!NONDEBUG_INSN_P (insn)) > > + continue; > > + > > + rtx set = single_set (insn); > > + > > + /* Store forwarding issues are unlikely if we cross a call. > > + Clear store forwarding candidates if we can't understand INSN. */ > > + if (CALL_P (insn) || !set || volatile_refs_p (set)) > > + { > > + store_exprs.truncate (0); > > + continue; > > + } > > + > > + rtx load_mem = get_load_mem (set); > > + int removed_count = 0; > > + > > + if (MEM_P (SET_DEST (set))) > > + { > > + /* Record store forwarding candidate. */ > > + store_info info; > > + info.store_insn = insn; > > + info.store_mem = SET_DEST (set); > > + info.insn_cnt = insn_cnt; > > + info.remove = false; > > + info.forwarded = false; > > + store_exprs.safe_push (info); > > + } > > + else if (load_mem) > > + { > > + /* Process load for possible store forwardings. */ > > + auto_vec<store_info> forwardings; > > + bool partial_forwarding = false; > > + bool remove_rest = false; > > + > > + unsigned int i; > > + store_info *it; > > + FOR_EACH_VEC_ELT_REVERSE (store_exprs, i, it) > > + { > > If you have a basic-block with a lot of stores and loads this becomes > quadratic. In practice the store buffer of CPUs has limited size > and stores remain in the store buffer for a finite time. I think > it makes sense to only keep a fixed number of stores (use a > configurable --param?) and likewise limit the distance between > store-load candidates you inspect (or maybe just that, by counting > non-debug insns). That makes it technically O(1) but at least > avoids quadraticness for large BBs. > > > + rtx store_mem = it->store_mem; > > + HOST_WIDE_INT off_val; > > + > > + if (remove_rest) > > + { > > + it->remove = true; > > + removed_count++; > > + } > > + else if (is_store_forwarding (store_mem, load_mem, &off_val)) > > + { > > + /* Check if moving this store after the load is legal. */ > > + bool write_dep = false; > > + for (unsigned int j = store_exprs.length () - 1; j != i; > > j--) > > + if (!store_exprs[j].forwarded > > + && output_dependence (store_mem, > > + store_exprs[j].store_mem)) > > + { > > + write_dep = true; > > + break; > > + } > > + > > + if (!write_dep) > > + { > > + it->forwarded = true; > > + it->offset = off_val; > > + forwardings.safe_push (*it); > > + } > > + else > > + partial_forwarding = true; > > + > > + it->remove = true; > > + removed_count++; > > + } > > + else if (true_dependence (store_mem, GET_MODE (store_mem), > > + load_mem)) > > + { > > + /* We cannot keep a store forwarding candidate if it > > possibly > > + interferes with this load. */ > > + it->remove = true; > > + removed_count++; > > + remove_rest = true; > > + } > > + } > > + > > + if (!forwardings.is_empty () && !partial_forwarding) > > + process_forwardings (forwardings, insn); > > + } > > + else > > + { > > + rtx reg = SET_DEST (set); > > + > > + while (GET_CODE (reg) == ZERO_EXTRACT > > + || GET_CODE (reg) == STRICT_LOW_PART > > + || GET_CODE (reg) == SUBREG) > > + reg = XEXP (reg, 0); > > + > > + /* Drop store forwarding candidates when the address register is > > + overwritten. */ > > + if (REG_P (reg)) > > + { > > + bool remove_rest = false; > > + unsigned int i; > > + store_info *it; > > + FOR_EACH_VEC_ELT_REVERSE (store_exprs, i, it) > > + { > > + if (remove_rest > > + || reg_overlap_mentioned_p (reg, it->store_mem)) > > + { > > + it->remove = true; > > + removed_count++; > > + remove_rest = true; > > + } > > + } > > + } > > + else > > + { > > + /* We can't understand INSN. */ > > + store_exprs.truncate (0); > > + continue; > > + } > > + } > > + > > + if (removed_count) > > + { > > + unsigned int i, j; > > + store_info *it; > > + VEC_ORDERED_REMOVE_IF (store_exprs, i, j, it, it->remove); > > + } > > + > > + /* Don't consider store forwarding if the RTL instruction distance is > > + more than PARAM_STORE_FORWARDING_MAX_DISTANCE. */ > > + if (!store_exprs.is_empty () > > + && (store_exprs[0].insn_cnt > > + + param_store_forwarding_max_distance <= insn_cnt)) > > + store_exprs.ordered_remove (0); > > + > > + insn_cnt++; > > + } > > +} > > + > > +unsigned int > > +pass_rtl_avoid_store_forwarding::execute (function *fn) > > +{ > > + df_set_flags (DF_DEFER_INSN_RESCAN); > > + df_note_add_problem (); > > + > > + init_alias_analysis (); > > + cselib_init (CSELIB_RECORD_MEMORY | CSELIB_PRESERVE_CONSTANTS); > > + > > + stats_sf_detected = 0; > > + stats_sf_avoided = 0; > > + > > + basic_block bb; > > + FOR_EACH_BB_FN (bb, fn) > > + avoid_store_forwarding (bb); > > + > > + end_alias_analysis (); > > + cselib_finish (); > > + df_analyze (); > > + > > + statistics_counter_event (fn, "Store forwardings detected: ", > > + stats_sf_detected); > > + statistics_counter_event (fn, "Store forwardings avoided: ", > > + stats_sf_detected); > > + > > + return 0; > > +} > > + > > +} // anon namespace. > > + > > +rtl_opt_pass * > > +make_pass_rtl_avoid_store_forwarding (gcc::context *ctxt) > > +{ > > + return new pass_rtl_avoid_store_forwarding (ctxt); > > +} > > diff --git a/gcc/common.opt b/gcc/common.opt > > index 2c078fdd1f8..2fcf7170c2a 100644 > > --- a/gcc/common.opt > > +++ b/gcc/common.opt > > @@ -1747,6 +1747,10 @@ fgcse-sm > > Common Var(flag_gcse_sm) Init(0) Optimization > > Perform store motion after global common subexpression elimination. > > > > +favoid-store-forwarding > > +Common Var(flag_avoid_store_forwarding) Init(0) Optimization > > +Try to avoid store forwarding. > > + > > fgcse-las > > Common Var(flag_gcse_las) Init(0) Optimization > > Perform redundant load after store elimination in global common > > subexpression > > diff --git a/gcc/params.opt b/gcc/params.opt > > index d34ef545bf0..b8115f5c27a 100644 > > --- a/gcc/params.opt > > +++ b/gcc/params.opt > > @@ -1032,6 +1032,10 @@ Allow the store merging pass to introduce unaligned > > stores if it is legal to do > > Common Joined UInteger Var(param_store_merging_max_size) Init(65536) > > IntegerRange(1, 65536) Param Optimization > > Maximum size of a single store merging region in bytes. > > > > +-param=store-forwarding-max-distance= > > +Common Joined UInteger Var(param_store_forwarding_max_distance) Init(10) > > IntegerRange(1, 1000) Param Optimization > > +Maximum number of instruction distance that a small store forwarded to a > > larger load may stall. > > + > > -param=switch-conversion-max-branch-ratio= > > Common Joined UInteger Var(param_switch_conversion_branch_ratio) Init(8) > > IntegerRange(1, 65536) Param Optimization > > The maximum ratio between array size and switch branches for a switch > > conversion to take place. > > diff --git a/gcc/passes.def b/gcc/passes.def > > index 1cbbd413097..1e608774707 100644 > > --- a/gcc/passes.def > > +++ b/gcc/passes.def > > @@ -462,6 +462,7 @@ along with GCC; see the file COPYING3. If not see > > NEXT_PASS (pass_lower_subreg); > > NEXT_PASS (pass_df_initialize_opt); > > NEXT_PASS (pass_cse); > > + NEXT_PASS (pass_rtl_avoid_store_forwarding); > > NEXT_PASS (pass_rtl_fwprop); > > NEXT_PASS (pass_rtl_cprop); > > NEXT_PASS (pass_rtl_pre); > > diff --git a/gcc/testsuite/gcc.dg/avoid-store-forwarding-1.c > > b/gcc/testsuite/gcc.dg/avoid-store-forwarding-1.c > > new file mode 100644 > > index 00000000000..0775aee898b > > --- /dev/null > > +++ b/gcc/testsuite/gcc.dg/avoid-store-forwarding-1.c > > @@ -0,0 +1,46 @@ > > +/* { dg-do compile } */ > > +/* { dg-options "-O2 -fdump-rtl-avoid_store_forwarding" } */ > > + > > +typedef union { > > + char arr_8[8]; > > + long long_value; > > +} DataUnion; > > + > > +long ssll_1 (DataUnion *data, char x) > > +{ > > + data->arr_8[0] = x; > > + return data->long_value; > > +} > > + > > +long ssll_2 (DataUnion *data, char x) > > +{ > > + data->arr_8[1] = x; > > + return data->long_value; > > +} > > + > > +long ssll_3 (DataUnion *data, char x) > > +{ > > + data->arr_8[7] = x; > > + return data->long_value; > > +} > > + > > +long ssll_4 (DataUnion **data, char x) > > +{ > > + (*data)->arr_8[0] = x; > > + return (*data)->long_value; > > +} > > + > > +long ssll_5 (DataUnion **data, char x) > > +{ > > + (*data)->arr_8[1] = x; > > + return (*data)->long_value; > > +} > > + > > +long ssll_6 (DataUnion **data, char x) > > +{ > > + (*data)->arr_8[7] = x; > > + return (*data)->long_value; > > +} > > + > > +/* { dg-final { scan-tree-dump-times "Store forwarding detected" 6 } } */ > > +/* { dg-final { scan-tree-dump-times "Store forwarding avoided" 6 } } */ > > diff --git a/gcc/testsuite/gcc.dg/avoid-store-forwarding-2.c > > b/gcc/testsuite/gcc.dg/avoid-store-forwarding-2.c > > new file mode 100644 > > index 00000000000..cd81aa248fe > > --- /dev/null > > +++ b/gcc/testsuite/gcc.dg/avoid-store-forwarding-2.c > > @@ -0,0 +1,39 @@ > > +/* { dg-do compile } */ > > +/* { dg-options "-O2 -fdump-rtl-avoid_store_forwarding" } */ > > + > > +typedef union { > > + char arr_8[8]; > > + int long_value; > > +} DataUnion1; > > + > > +long no_ssll_1 (DataUnion1 *data, char x) > > +{ > > + data->arr_8[4] = x; > > + return data->long_value; > > +} > > + > > +long no_ssll_2 (DataUnion1 *data, char x) > > +{ > > + data->arr_8[5] = x; > > + return data->long_value; > > +} > > + > > +typedef union { > > + char arr_8[8]; > > + short long_value[4]; > > +} DataUnion2; > > + > > +long no_ssll_3 (DataUnion2 *data, char x) > > +{ > > + data->arr_8[4] = x; > > + return data->long_value[1]; > > +} > > + > > +long no_ssll_4 (DataUnion2 *data, char x) > > +{ > > + data->arr_8[0] = x; > > + return data->long_value[1]; > > +} > > + > > +/* { dg-final { scan-tree-dump-times "Store forwarding detected" 0 } } */ > > +/* { dg-final { scan-tree-dump-times "Store forwarding avoided" 0 } } */ > > diff --git a/gcc/testsuite/gcc.dg/avoid-store-forwarding-3.c > > b/gcc/testsuite/gcc.dg/avoid-store-forwarding-3.c > > new file mode 100644 > > index 00000000000..3175f882c86 > > --- /dev/null > > +++ b/gcc/testsuite/gcc.dg/avoid-store-forwarding-3.c > > @@ -0,0 +1,31 @@ > > +/* { dg-do compile } */ > > +/* { dg-options "-O2 -fdump-rtl-avoid_store_forwarding" } */ > > + > > +typedef union { > > + char arr_8[8]; > > + long long_value; > > +} DataUnion; > > + > > +long ssll_multi_1 (DataUnion **data, char x) > > +{ > > + (*data)->arr_8[0] = x; > > + (*data)->arr_8[2] = x; > > + return (*data)->long_value; > > +} > > + > > +long ssll_multi_2 (DataUnion **data, char x) > > +{ > > + (*data)->arr_8[0] = x; > > + (*data)->arr_8[1] = 11; > > + return (*data)->long_value; > > +} > > + > > +long ssll_multi_3 (DataUnion **data, char x, short y) > > +{ > > + (*data)->arr_8[1] = x; > > + __builtin_memcpy((*data)->arr_8 + 4, &y, sizeof(short)); > > + return (*data)->long_value; > > +} > > + > > +/* { dg-final { scan-tree-dump-times "Store forwardings detected" 3 } } */ > > +/* { dg-final { scan-tree-dump-times "Store forwardings avoided" 3 } } */ > > diff --git a/gcc/tree-pass.h b/gcc/tree-pass.h > > index 29267589eeb..49957ba3373 100644 > > --- a/gcc/tree-pass.h > > +++ b/gcc/tree-pass.h > > @@ -570,6 +570,7 @@ extern rtl_opt_pass *make_pass_rtl_dse3 (gcc::context > > *ctxt); > > extern rtl_opt_pass *make_pass_rtl_cprop (gcc::context *ctxt); > > extern rtl_opt_pass *make_pass_rtl_pre (gcc::context *ctxt); > > extern rtl_opt_pass *make_pass_rtl_hoist (gcc::context *ctxt); > > +extern rtl_opt_pass *make_pass_rtl_avoid_store_forwarding (gcc::context > > *ctxt); > > extern rtl_opt_pass *make_pass_rtl_store_motion (gcc::context *ctxt); > > extern rtl_opt_pass *make_pass_cse_after_global_opts (gcc::context *ctxt); > > extern rtl_opt_pass *make_pass_rtl_ifcvt (gcc::context *ctxt); > > > > -- > Richard Biener <rguent...@suse.de> > SUSE Software Solutions Germany GmbH, > Frankenstrasse 146, 90461 Nuernberg, Germany; > GF: Ivo Totev, Andrew McDonald, Werner Knoblich; (HRB 36809, AG Nuernberg)