On Tue, Aug 6, 2024 at 4:38 PM Andi Kleen <a...@firstfloor.org> wrote: > > The gimple-if-to-switch pass converts if statements with > multiple equal checks on the same value to a switch. This breaks > vectorization which cannot handle switches. > > Teach the tree-if-conv pass used by the vectorizer to handle > simple switch statements, like those created by if-to-switch earlier. > These are switches that only have a single non default block, > and no ranges. They are handled similar to if in if conversion. > > Some notes: > > In theory this handles switches with case ranges, but it seems > for the simple "one target label" switch case that is supported > here these are always optimized by the cfg passes to COND, > so this case is latent. > > This makes the vect-bitfield-read-1-not test fail. The test > checks for a bitfield analysis failing, but it actually > relied on the ifcvt erroring out early because the test > is using a switch. The if conversion still does not > work because the switch is not in a form that this > patch can handle, but it fails much later and the bitfield > analysis succeeds, which makes the test fail. I marked > it xfail because it doesn't seem to be testing what it wants > to test. > > gcc/ChangeLog: > > PR tree-opt/115866 > * tree-if-conv.cc (if_convertible_switch_p): New function. > (if_convertible_stmt_p): Check for switch. > (get_loop_body_in_if_conv_order): Handle switch. > (predicate_bbs): Likewise. > (predicate_statements): Likewise. > (remove_conditions_and_labels): Likewise. > (ifcvt_split_critical_edges): Likewise. > (ifcvt_local_dce): Likewise. > > gcc/testsuite/ChangeLog: > > * gcc.dg/vect/vect-switch-ifcvt-1.c: New test. > * gcc.dg/vect/vect-switch-ifcvt-2.c: New test. > * gcc.dg/vect/vect-switch-search-line-fast.c: New test. > * gcc.dg/vect/vect-bitfield-read-1-not.c: Change to xfail. > --- > .../gcc.dg/vect/vect-bitfield-read-1-not.c | 2 +- > .../gcc.dg/vect/vect-switch-ifcvt-1.c | 107 ++++++++++++++++++ > .../gcc.dg/vect/vect-switch-ifcvt-2.c | 28 +++++ > .../vect/vect-switch-search-line-fast.c | 17 +++ > gcc/tree-if-conv.cc | 90 ++++++++++++++- > 5 files changed, 238 insertions(+), 6 deletions(-) > create mode 100644 gcc/testsuite/gcc.dg/vect/vect-switch-ifcvt-1.c > create mode 100644 gcc/testsuite/gcc.dg/vect/vect-switch-ifcvt-2.c > create mode 100644 gcc/testsuite/gcc.dg/vect/vect-switch-search-line-fast.c > > diff --git a/gcc/testsuite/gcc.dg/vect/vect-bitfield-read-1-not.c > b/gcc/testsuite/gcc.dg/vect/vect-bitfield-read-1-not.c > index 0d91067ebb2..85f4de8464a 100644 > --- a/gcc/testsuite/gcc.dg/vect/vect-bitfield-read-1-not.c > +++ b/gcc/testsuite/gcc.dg/vect/vect-bitfield-read-1-not.c > @@ -55,6 +55,6 @@ int main (void) > return 0; > } > > -/* { dg-final { scan-tree-dump-not "Bitfield OK to lower." "ifcvt" } } */ > +/* { dg-final { scan-tree-dump-times "Bitfield OK to lower." 0 "ifcvt" { > xfail *-*-* } } } */ > > > diff --git a/gcc/testsuite/gcc.dg/vect/vect-switch-ifcvt-1.c > b/gcc/testsuite/gcc.dg/vect/vect-switch-ifcvt-1.c > new file mode 100644 > index 00000000000..0b06d3c84a7 > --- /dev/null > +++ b/gcc/testsuite/gcc.dg/vect/vect-switch-ifcvt-1.c > @@ -0,0 +1,107 @@ > +/* { dg-require-effective-target vect_int } */ > + > +extern void abort (void); > + > +int > +f1 (char *s) > +{ > + int c = 0; > + int i; > + for (i = 0; i < 64; i++) > + { > + switch (*s) > + { > + case ',': > + case '|': > + c++; > + } > + s++; > + } > + return c; > +} > + > +int > +f2 (char *s) > +{ > + int c = 0; > + int i; > + for (i = 0; i < 64; i++) > + { > + if (*s != '#') > + { > + switch (*s) > + { > + case ',': > + case '|': > + c++; > + } > + } > + s++; > + } > + return c; > +} > + > +int > +f3 (char *s) > +{ > + int c = 0; > + int i; > + for (i = 0; i < 64; i++) > + { > + if (*s != '#') > + if (*s == ',' || *s == '|' || *s == '@' || *s == '*') > + c++; > + s++; > + } > + return c; > +} > + > + > +int > +f4 (char *s) > +{ > + int c = 0; > + int i; > + for (i = 0; i < 64; i++) > + { > + if (*s == ',' || *s == '|' || *s == '@' || *s == '*') > + c++; > + s++; > + } > + return c; > +} > + > +#define CHECK(f, str, res) \ > + __builtin_strcpy(buf, str); n = f(buf); if (n != res) abort(); > + > +int > +main () > +{ > + int n; > + char buf[64]; > + > + CHECK (f1, ",,,,,,,,,,", 10); > + CHECK (f1, "||||||||||", 10); > + CHECK (f1, "aaaaaaaaaa", 0); > + CHECK (f1, "", 0); > + CHECK (f1, ",|,|xxxxxx", 4); > + > + CHECK (f2, ",|,|xxxxxx", 4); > + CHECK (f2, ",|,|xxxxxx", 4); > + CHECK (f2, ",|,|xxxxxx", 4); > + CHECK (f2, ",|,|xxxxxx", 4); > + > + CHECK (f3, ",|,|xxxxxx", 4); > + CHECK (f3, ",|,|xxxxxx", 4); > + CHECK (f3, ",|,|xxxxxx", 4); > + CHECK (f3, ",|,|xxxxxx", 4); > + > + CHECK (f4, ",|,|xxxxxx", 4); > + CHECK (f4, ",|,|xxxxxx", 4); > + CHECK (f4, ",|,|xxxxxx", 4); > + CHECK (f4, ",|,|xxxxxx", 4); > + > + return 0; > +} > + > +/* { dg-final { scan-tree-dump-times "vectorized 1 loops" 4 "vect" } } */ > diff --git a/gcc/testsuite/gcc.dg/vect/vect-switch-ifcvt-2.c > b/gcc/testsuite/gcc.dg/vect/vect-switch-ifcvt-2.c > new file mode 100644 > index 00000000000..c9da6ebb40b > --- /dev/null > +++ b/gcc/testsuite/gcc.dg/vect/vect-switch-ifcvt-2.c > @@ -0,0 +1,28 @@ > +/* { dg-do compile } */ > +/* { dg-require-effective-target vect_int } */ > + > +/* Check for cases currently not supported by switch tree if conversion. */ > + > +int > +f1 (char *s) > +{ > + int c = 0; > + int i; > + for (i = 0; i < 64; i++) > + { > + switch (*s) > + { > + case ',': > + case '|': > + c++; > + break; > + case '^': > + c += 2; > + break; > + } > + s++; > + } > + return c; > +} > + > +/* { dg-final { scan-tree-dump-times "vectorized 1 loops" 0 "vect" } } */ > diff --git a/gcc/testsuite/gcc.dg/vect/vect-switch-search-line-fast.c > b/gcc/testsuite/gcc.dg/vect/vect-switch-search-line-fast.c > new file mode 100644 > index 00000000000..15f3a4ef38a > --- /dev/null > +++ b/gcc/testsuite/gcc.dg/vect/vect-switch-search-line-fast.c > @@ -0,0 +1,17 @@ > +/* PR116126 -- once this works use this version in libcpp/lex.c. > + This also requires working value range propagation for s/end. */ > +/* { dg-do compile } */ > +/* { dg-require-effective-target vect_int } */ > + > +const unsigned char *search_line_fast2 (const unsigned char *s, > + const unsigned char *end) > +{ > + while (s < end) { > + if (*s == '\n' || *s == '\r' || *s == '\\' || *s == '?') > + break; > + s++; > + } > + return s; > +} > + > +/* { dg-final { scan-tree-dump-times "vectorized 1 loops" 1 "vect" { xfail > *-*-* } } } */ > diff --git a/gcc/tree-if-conv.cc b/gcc/tree-if-conv.cc > index 57992b6deca..e41ccd421c8 100644 > --- a/gcc/tree-if-conv.cc > +++ b/gcc/tree-if-conv.cc > @@ -124,6 +124,7 @@ along with GCC; see the file COPYING3. If not see > #include "tree-vectorizer.h" > #include "tree-eh.h" > #include "cgraph.h" > +#include "print-tree.h" > > /* For lang_hooks.types.type_for_mode. */ > #include "langhooks.h" > @@ -1082,11 +1083,30 @@ if_convertible_gimple_assign_stmt_p (gimple *stmt, > return true; > } > > +/* Return true when SW switch statement is equivalent to cond, that > + all non default labels point to the same label. > + This is intended for switches created by the if-switch-conversion > + pass, but can handle some programmer supplied cases too. */ > + > +static bool > +if_convertible_switch_p (gswitch *sw) > +{ > + gcc_checking_assert (gimple_switch_num_labels (sw) > 1);
Defensive programming asks for if (gimple_switch_num_labels (sw) <= 1) return false; alternatively the assert is redundant - the access of label '1' below will assert the same. > + tree label = CASE_LABEL (gimple_switch_label (sw, 1)); > + for (unsigned i = 1; i < gimple_switch_num_labels (sw); i++) > + { > + if (CASE_LABEL (gimple_switch_label (sw, i)) != label) > + return false; > + } > + return true; > +} > + > /* Return true when STMT is if-convertible. > > A statement is if-convertible if: > - it is an if-convertible GIMPLE_ASSIGN, > - it is a GIMPLE_LABEL or a GIMPLE_COND, > + - it is a switch equivalent to COND > - it is builtins call, > - it is a call to a function with a SIMD clone. */ > > @@ -1100,6 +1120,9 @@ if_convertible_stmt_p (gimple *stmt, > vec<data_reference_p> refs) > case GIMPLE_COND: > return true; > > + case GIMPLE_SWITCH: > + return if_convertible_switch_p (as_a <gswitch *> (stmt)); > + > case GIMPLE_ASSIGN: > return if_convertible_gimple_assign_stmt_p (stmt, refs); > > @@ -1327,6 +1350,7 @@ get_loop_body_in_if_conv_order (const class loop *loop) > case GIMPLE_CALL: > case GIMPLE_DEBUG: > case GIMPLE_COND: > + case GIMPLE_SWITCH: > gimple_set_uid (gsi_stmt (gsi), 0); > break; > default: > @@ -1426,6 +1450,60 @@ predicate_bbs (loop_p loop) > cond = NULL_TREE; > } > > + /* Assumes the limited COND like switches checked for earlier. */ > + if (gswitch *sw = safe_dyn_cast <gswitch *> (*gsi_last_bb (bb))) else if > + { > + location_t loc = gimple_location (*gsi_last_bb (bb)); > + > + tree default_label = CASE_LABEL (gimple_switch_label (sw, 0)); gimple_switch_default_label (sw) > + tree cond_label = CASE_LABEL (gimple_switch_label (sw, 1)); > + > + edge false_edge = find_edge (bb, label_to_block (cfun, > default_label)); > + edge true_edge = find_edge (bb, label_to_block (cfun, cond_label)); > + > + /* Create chain of switch tests for each case. */ > + tree switch_cond = NULL_TREE; > + tree index = gimple_switch_index (sw); > + for (unsigned i = 1; i < gimple_switch_num_labels (sw); i++) > + { > + tree label = gimple_switch_label (sw, i); > + tree case_cond; > + /* This currently cannot happen because tree-cfg lowers range > + switches with a single destination to COND. */ But it should also lower non-range switches with a single destination ...? See convert_single_case_switch. You say switch (i) { case 1: case 5 ... 7: return 42; default: return 0; } doesn't hit here with a CASE_HIGH for the 5 ... 7 CASE_LABEL? Otherwise the patch looks Ok to me. Thanks, Richard. > + if (CASE_HIGH (label)) > + { > + tree low = build2_loc (loc, GE_EXPR, > + boolean_type_node, > + index, CASE_LOW (label)); > + tree high = build2_loc (loc, LE_EXPR, > + boolean_type_node, > + index, CASE_HIGH (label)); > + case_cond = build2_loc (loc, TRUTH_AND_EXPR, > + boolean_type_node, > + low, high); > + } > + else > + case_cond = build2_loc (loc, EQ_EXPR, > + boolean_type_node, > + index, > + CASE_LOW (gimple_switch_label (sw, > i))); > + if (i > 1) > + switch_cond = build2_loc (loc, TRUTH_OR_EXPR, > + boolean_type_node, > + case_cond, switch_cond); > + else > + switch_cond = case_cond; > + } > + > + add_to_dst_predicate_list (loop, true_edge, unshare_expr (cond), > + unshare_expr (switch_cond)); > + switch_cond = build1_loc (loc, TRUTH_NOT_EXPR, boolean_type_node, > + unshare_expr (switch_cond)); > + add_to_dst_predicate_list (loop, false_edge, > + unshare_expr (cond), switch_cond); > + cond = NULL_TREE; > + } > + > /* If current bb has only one successor, then consider it as an > unconditional goto. */ > if (single_succ_p (bb)) > @@ -2852,9 +2930,9 @@ predicate_statements (loop_p loop) > } > } > > -/* Remove all GIMPLE_CONDs and GIMPLE_LABELs of all the basic blocks > - other than the exit and latch of the LOOP. Also resets the > - GIMPLE_DEBUG information. */ > +/* Remove all GIMPLE_CONDs and GIMPLE_LABELs and GIMPLE_SWITCH of all > + the basic blocks other than the exit and latch of the LOOP. Also > + resets the GIMPLE_DEBUG information. */ > > static void > remove_conditions_and_labels (loop_p loop) > @@ -2875,6 +2953,7 @@ remove_conditions_and_labels (loop_p loop) > { > case GIMPLE_COND: > case GIMPLE_LABEL: > + case GIMPLE_SWITCH: > gsi_remove (&gsi, true); > break; > > @@ -3265,7 +3344,8 @@ ifcvt_split_critical_edges (class loop *loop, bool > aggressive_if_conv) > continue; > > /* Skip basic blocks not ending with conditional branch. */ > - if (!safe_is_a <gcond *> (*gsi_last_bb (bb))) > + if (!safe_is_a <gcond *> (*gsi_last_bb (bb)) > + && !safe_is_a <gswitch *> (*gsi_last_bb (bb))) > continue; > > FOR_EACH_EDGE (e, ei, bb->succs) > @@ -3330,7 +3410,7 @@ ifcvt_local_dce (class loop *loop) > continue; > } > code = gimple_code (stmt); > - if (code == GIMPLE_COND || code == GIMPLE_CALL) > + if (code == GIMPLE_COND || code == GIMPLE_CALL || code == > GIMPLE_SWITCH) > { > gimple_set_plf (stmt, GF_PLF_2, true); > worklist.safe_push (stmt); > -- > 2.45.2 >