On Thu, 11 Apr 2019, Jakub Jelinek wrote:

> On Thu, Apr 11, 2019 at 09:54:24AM +0200, Richard Biener wrote:
> > > Bootstrapped/regtested on x86_64-linux and i686-linux, ok for trunk?
> > > 
> > > During those 2 bootstraps/regtests, data.load_found has been set just
> > > on the new testcase on ia32.
> > 
> > Hmm, I wonder whether we really need to DCE calls after reload?
> > That said, I'm not familiar enough with the code to check if the
> > patch makes sense (can there ever be uses of the argument slots
> > _after_ the call?).
> 
> And here is the patch on top of the refactoring patch.
> As for the argument slots after the call, hope Jeff and Vlad won't mind if I
> copy'n'paste what they said on IRC yesterday:
> law: vmakarov: in PR89965, is it valid RTL to store something in stack 
> argument slot and read it again before the call that uses that stack slot?
> law: vmakarov: if yes, I have a rough idea what to do, but if stack argument 
> slots may be only stored and can be read only by a callee and not by 
> something else in the caller, then it is a RA issue
> <vmakarov> jakub: i think it is not defined (or described). But the old 
> reload used equiv memory for long time to store value in memory.  LRA just 
> uses the same approach.
> <law> i think you're allowed to read it up to the call point, who "owns" the 
> contents of the slot after the call point is subject to debate :-)
> <law> not sure if we defined things once a REG_EQUIV is on the MEM, but that 
> would tend to imply the memory is unchanging across the call
> <law> (though one could ask how in the world the caller would know the callee 
> didn't clobber the slot....)

Ok, so that says "well, nothing prevents it from being used" for example
for a const/pure call (depending on what alias analysis does here...).
Who owns the argument slots should be defined by the ABI I guess.

So iff alias-analysis doesn't leave us with the chance to pick up
the stack slot contents after the call in any case we're of course fine.
And if we do have the chance but should not then the call instruction
needs some explicit clobbering of the argument area (or the RTL IL
needs to be documented that such clobbering is implicit and the
alias machinery then needs to honor that).

Still leaving review of the patch to somebody else (it's clearly
closing at least part of the hole).

Richard.

> 2019-04-11  Jakub Jelinek  <ja...@redhat.com>
>       
>       PR rtl-optimization/89965
>       * dce.c: Include rtl-iter.h.
>       (struct check_argument_load_data): New type.
>       (check_argument_load): New function.
>       (find_call_stack_args): Check for loads from stack slots still tracked
>       in sp_bytes and punt if any is found.
> 
>       * gcc.target/i386/pr89965.c: New test.
> 
> --- gcc/dce.c.jj      2019-04-11 10:30:00.436705065 +0200
> +++ gcc/dce.c 2019-04-11 10:43:00.926828390 +0200
> @@ -35,6 +35,7 @@ along with GCC; see the file COPYING3.
>  #include "valtrack.h"
>  #include "tree-pass.h"
>  #include "dbgcnt.h"
> +#include "rtl-iter.h"
>  
>  
>  /* -------------------------------------------------------------------------
> @@ -325,6 +326,48 @@ sp_based_mem_offset (rtx_call_insn *call
>    return off;
>  }
>  
> +/* Data for check_argument_load called via note_uses.  */
> +struct check_argument_load_data {
> +  bitmap sp_bytes;
> +  HOST_WIDE_INT min_sp_off, max_sp_off;
> +  rtx_call_insn *call_insn;
> +  bool fast;
> +  bool load_found;
> +};
> +
> +/* Helper function for find_call_stack_args.  Check if there are
> +   any loads from the argument slots in between the const/pure call
> +   and store to the argument slot, set LOAD_FOUND if any is found.  */
> +
> +static void
> +check_argument_load (rtx *loc, void *data)
> +{
> +  struct check_argument_load_data *d
> +    = (struct check_argument_load_data *) data;
> +  subrtx_iterator::array_type array;
> +  FOR_EACH_SUBRTX (iter, array, *loc, NONCONST)
> +    {
> +      const_rtx mem = *iter;
> +      HOST_WIDE_INT size;
> +      if (MEM_P (mem)
> +       && MEM_SIZE_KNOWN_P (mem)
> +       && MEM_SIZE (mem).is_constant (&size))
> +     {
> +       HOST_WIDE_INT off = sp_based_mem_offset (d->call_insn, mem, d->fast);
> +       if (off != INTTYPE_MINIMUM (HOST_WIDE_INT)
> +           && off < d->max_sp_off
> +           && off + size > d->min_sp_off)
> +         for (HOST_WIDE_INT byte = MAX (off, d->min_sp_off);
> +              byte < MIN (off + size, d->max_sp_off); byte++)
> +           if (bitmap_bit_p (d->sp_bytes, byte - d->min_sp_off))
> +             {
> +               d->load_found = true;
> +               return;
> +             }
> +     }
> +    }
> +}
> +
>  /* Try to find all stack stores of CALL_INSN arguments if
>     ACCUMULATE_OUTGOING_ARGS.  If all stack stores have been found
>     and it is therefore safe to eliminate the call, return true,
> @@ -396,6 +439,8 @@ find_call_stack_args (rtx_call_insn *cal
>    /* Walk backwards, looking for argument stores.  The search stops
>       when seeing another call, sp adjustment or memory store other than
>       argument store.  */
> +  struct check_argument_load_data data
> +    = { sp_bytes, min_sp_off, max_sp_off, call_insn, fast, false };
>    ret = false;
>    for (insn = PREV_INSN (call_insn); insn; insn = prev_insn)
>      {
> @@ -414,6 +459,10 @@ find_call_stack_args (rtx_call_insn *cal
>        if (!set || SET_DEST (set) == stack_pointer_rtx)
>       break;
>  
> +      note_uses (&PATTERN (insn), check_argument_load, &data);
> +      if (data.load_found)
> +     break;
> +
>        if (!MEM_P (SET_DEST (set)))
>       continue;
>  
> --- gcc/testsuite/gcc.target/i386/pr89965.c.jj        2019-04-11 
> 10:28:56.211764660 +0200
> +++ gcc/testsuite/gcc.target/i386/pr89965.c   2019-04-11 10:28:56.211764660 
> +0200
> @@ -0,0 +1,39 @@
> +/* PR rtl-optimization/89965 */
> +/* { dg-do run } */
> +/* { dg-options "-O -mtune=nano-x2 -fcaller-saves -fexpensive-optimizations 
> -fno-tree-dce -fno-tree-ter" } */
> +/* { dg-additional-options "-march=i386" { target ia32 } } */
> +
> +int a;
> +
> +__attribute__ ((noipa)) unsigned long long
> +foo (unsigned char c, unsigned d, unsigned e, unsigned long long f,
> +     unsigned char g, unsigned h, unsigned long long i)
> +{
> +  (void) d;
> +  unsigned short j = __builtin_mul_overflow_p (~0, h, c);
> +  e <<= e;
> +  i >>= 7;
> +  c *= i;
> +  i /= 12;
> +  a = __builtin_popcount (c);
> +  __builtin_add_overflow (e, a, &f);
> +  return c + f + g + j + h;
> +}
> +
> +__attribute__ ((noipa)) void
> +bar (void)
> +{
> +  char buf[64];
> +  __builtin_memset (buf, 0x55, sizeof buf);
> +  asm volatile ("" : : "r" (&buf[0]) : "memory");
> +}
> +
> +int
> +main (void)
> +{
> +  bar ();
> +  unsigned long long x = foo (2, 0, 0, 0, 0, 0, 0);
> +  if (x != 0)
> +    __builtin_abort ();
> +  return 0;
> +}
> 
>       Jakub
> 

-- 
Richard Biener <rguent...@suse.de>
SUSE Linux GmbH, Maxfeldstrasse 5, 90409 Nuernberg, Germany;
GF: Felix Imendörffer, Mary Higgins, Sri Rasiah; HRB 21284 (AG Nürnberg)

Reply via email to