https://gcc.gnu.org/g:26615af93cf759999c5dfae0c51a827b05968cca
commit r14-11151-g26615af93cf759999c5dfae0c51a827b05968cca Author: Jakub Jelinek <ja...@redhat.com> Date: Thu Dec 5 13:01:21 2024 +0100 doloop: Fix up doloop df use [PR116799] The following testcases are miscompiled on s390x-linux, because the doloop_optimize /* Ensure that the new sequence doesn't clobber a register that is live at the end of the block. */ { bitmap modified = BITMAP_ALLOC (NULL); for (rtx_insn *i = doloop_seq; i != NULL; i = NEXT_INSN (i)) note_stores (i, record_reg_sets, modified); basic_block loop_end = desc->out_edge->src; bool fail = bitmap_intersect_p (df_get_live_out (loop_end), modified); check doesn't work as intended. The problem is that it uses df, but the df analysis was only done using iv_analysis_loop_init (loop); -> df_analyze_loop (loop); which computes df inside on the bbs of the loop. While loop_end bb is inside of the loop, df_get_live_out computed that way includes registers set in the loop and used at the start of the next iteration, but doesn't include registers set in the loop (or before the loop) and used after the loop. The following patch fixes that by doing whole function df_analyze first, changes the loop iteration mode from 0 to LI_ONLY_INNERMOST (on many targets which use can_use_doloop_if_innermost target hook a so are known to only handle innermost loops) or LI_FROM_INNERMOST (I think only bfin actually allows non-innermost loops) and checking not just df_get_live_out (loop_end) (that is needed for something used by the next iteration), but also df_get_live_in (desc->out_edge->dest), i.e. what will be used after the loop. df of such a bb shouldn't be affected by the df_analyze_loop and so should be from df_analyze of the whole function. 2024-12-05 Jakub Jelinek <ja...@redhat.com> PR rtl-optimization/113994 PR rtl-optimization/116799 * loop-doloop.cc: Include targhooks.h. (doloop_optimize): Also punt on intersection of modified with df_get_live_in (desc->out_edge->dest). (doloop_optimize_loops): Call df_analyze. Use LI_ONLY_INNERMOST or LI_FROM_INNERMOST instead of 0 as second loops_list argument. * gcc.c-torture/execute/pr116799.c: New test. * g++.dg/torture/pr113994.C: New test. (cherry picked from commit 0eed81612ad6eac2bec60286348a103d4dc02a5a) Diff: --- gcc/loop-doloop.cc | 20 ++++++++++++- gcc/testsuite/g++.dg/torture/pr113994.C | 31 +++++++++++++++++++ gcc/testsuite/gcc.c-torture/execute/pr116799.c | 41 ++++++++++++++++++++++++++ 3 files changed, 91 insertions(+), 1 deletion(-) diff --git a/gcc/loop-doloop.cc b/gcc/loop-doloop.cc index 529e810e530c..0d101d64bbfc 100644 --- a/gcc/loop-doloop.cc +++ b/gcc/loop-doloop.cc @@ -36,6 +36,7 @@ along with GCC; see the file COPYING3. If not see #include "loop-unroll.h" #include "regs.h" #include "df.h" +#include "targhooks.h" /* This module is used to modify loops with a determinable number of iterations to use special low-overhead looping instructions. @@ -770,6 +771,18 @@ doloop_optimize (class loop *loop) basic_block loop_end = desc->out_edge->src; bool fail = bitmap_intersect_p (df_get_live_out (loop_end), modified); + /* iv_analysis_loop_init calls df_analyze_loop, which computes just + partial df for blocks of the loop only. The above will catch if + any of the modified registers are use inside of the loop body, but + it will most likely not have accurate info on registers used + at the destination of the out_edge. We call df_analyze on the + whole function at the start of the pass though and iterate only + on innermost loops or from innermost loops, so + live in on desc->out_edge->dest should be still unmodified from + the initial df_analyze. */ + if (!fail) + fail = bitmap_intersect_p (df_get_live_in (desc->out_edge->dest), + modified); BITMAP_FREE (modified); if (fail) @@ -795,7 +808,12 @@ doloop_optimize_loops (void) df_live_set_all_dirty (); } - for (auto loop : loops_list (cfun, 0)) + df_analyze (); + + for (auto loop : loops_list (cfun, + targetm.can_use_doloop_p + == can_use_doloop_if_innermost + ? LI_ONLY_INNERMOST : LI_FROM_INNERMOST)) doloop_optimize (loop); if (optimize == 1) diff --git a/gcc/testsuite/g++.dg/torture/pr113994.C b/gcc/testsuite/g++.dg/torture/pr113994.C new file mode 100644 index 000000000000..c9c186d45ee7 --- /dev/null +++ b/gcc/testsuite/g++.dg/torture/pr113994.C @@ -0,0 +1,31 @@ +// PR rtl-optimization/113994 +// { dg-do run } + +#include <string> + +void +foo (const std::string &x, size_t &y, std::string &z) +{ + size_t w = x.find (']'); + if (w >= x.size ()) + return; + else if (w == 1) + y = std::string::npos; + while (++w < x.size () && x[w] == u'.') + ; + z = x.substr (w); +} + +__attribute__((noipa)) void +bar () +{ +} + +int +main () +{ + size_t y = 0; + std::string z; + foo ("[0]", y, z); + bar (); +} diff --git a/gcc/testsuite/gcc.c-torture/execute/pr116799.c b/gcc/testsuite/gcc.c-torture/execute/pr116799.c new file mode 100644 index 000000000000..d9af4d9fe4b9 --- /dev/null +++ b/gcc/testsuite/gcc.c-torture/execute/pr116799.c @@ -0,0 +1,41 @@ +/* PR rtl-optimization/116799 */ + +const char *l; + +__attribute__((noipa)) void +foo (const char *x, const char *y, int z) +{ + if (x != l + 1 || y != x || z) + __builtin_abort (); +} + +__attribute__((noipa)) void +bar (const char *x, char *v) +{ + const char *w = x + __builtin_strlen (x); + + while (x[0] == '*' && x < w - 1) + x++; + + const char *y = w - 1; + int z = 1; + if (y >= x) + { + while (y - x > 0 && *y == '*') + y--; + z = 0; + } + int i = 0; + if (z) + v[i++] = 'a'; + v[i] = 'b'; + foo (x, y, z); +} + +int +main () +{ + char v[2] = { 0 }; + l = "**"; + bar (l, v); +}