On Fri, 14 Feb 2025, Uros Bizjak wrote:

> On Fri, Feb 14, 2025 at 4:56 AM H.J. Lu <hjl.to...@gmail.com> wrote:
> >
> > On Thu, Feb 13, 2025 at 5:17 PM Uros Bizjak <ubiz...@gmail.com> wrote:
> > >
> > > On Thu, Feb 13, 2025 at 9:31 AM H.J. Lu <hjl.to...@gmail.com> wrote:
> > > >
> > > > Don't assume that stack slots can only be accessed by stack or frame
> > > > registers.  We first find all registers defined by stack or frame
> > > > registers.  Then check memory accesses by such registers, including
> > > > stack and frame registers.
> > > >
> > > > gcc/
> > > >
> > > > PR target/109780
> > > > PR target/109093
> > > > * config/i386/i386.cc (ix86_update_stack_alignment): New.
> > > > (ix86_find_all_reg_use_1): Likewise.
> > > > (ix86_find_all_reg_use): Likewise.
> > > > (ix86_find_max_used_stack_alignment): Also check memory accesses
> > > > from registers defined by stack or frame registers.
> > > >
> > > > gcc/testsuite/
> > > >
> > > > PR target/109780
> > > > PR target/109093
> > > > * g++.target/i386/pr109780-1.C: New test.
> > > > * gcc.target/i386/pr109093-1.c: Likewise.
> > > > * gcc.target/i386/pr109780-1.c: Likewise.
> > > > * gcc.target/i386/pr109780-2.c: Likewise.
> > > > * gcc.target/i386/pr109780-3.c: Likewise.
> > >
> > > Some non-algorithmical changes below, otherwise LGTM. Please also get
> > > someone to review dataflow infrastructure usage, I am not well versed
> > > with it.
> > >
> > > +/* Helper function for ix86_find_all_reg_use.  */
> > > +
> > > +static void
> > > +ix86_find_all_reg_use_1 (rtx set, HARD_REG_SET &stack_slot_access,
> > > +             auto_bitmap &worklist)
> > > +{
> > > +  rtx src = SET_SRC (set);
> > > +  if (MEM_P (src))
> > >
> > > Also reject assignment from CONST_SCALAR_INT?
> >
> > Done.
> >
> > > +    return;
> > > +
> > > +  rtx dest = SET_DEST (set);
> > > +  if (!REG_P (dest))
> > > +    return;
> > >
> > > Can we switch these two so the test for REG_P (dest) will be first? We
> > > are not interested in anything that doesn't assign to a register.
> >
> > Done.
> >
> > > +/* Find all registers defined with REG.  */
> > > +
> > > +static void
> > > +ix86_find_all_reg_use (HARD_REG_SET &stack_slot_access,
> > > +               unsigned int reg, auto_bitmap &worklist)
> > > +{
> > > +  for (df_ref ref = DF_REG_USE_CHAIN (reg);
> > > +       ref != NULL;
> > > +       ref = DF_REF_NEXT_REG (ref))
> > > +    {
> > > +      if (DF_REF_IS_ARTIFICIAL (ref))
> > > +    continue;
> > > +
> > > +      rtx_insn *insn = DF_REF_INSN (ref);
> > > +      if (!NONDEBUG_INSN_P (insn))
> > > +    continue;
> > >
> > > Here we pass only NONJUMP_INSN_P (X) || JUMP_P (X) || CALL_P (X)
> > >
> > > +      if (CALL_P (insn) || JUMP_P (insn))
> > > +    continue;
> > >
> > > And here remains only NONJUMP_INSN_P (X), so both above conditions
> > > could be substituted with:
> > >
> > > if (!NONJUMP_INSN_P (X))
> > >   continue;
> >
> > Done.
> >
> > > +
> > > +      rtx set = single_set (insn);
> > > +      if (set)
> > > +    ix86_find_all_reg_use_1 (set, stack_slot_access, worklist);
> > > +
> > > +      rtx pat = PATTERN (insn);
> > > +      if (GET_CODE (pat) != PARALLEL)
> > > +    continue;
> > > +
> > > +      for (int i = 0; i < XVECLEN (pat, 0); i++)
> > > +    {
> > > +      rtx exp = XVECEXP (pat, 0, i);
> > > +      switch (GET_CODE (exp))
> > > +        {
> > > +        case ASM_OPERANDS:
> > > +        case CLOBBER:
> > > +        case PREFETCH:
> > > +        case USE:
> > > +          break;
> > > +        case UNSPEC:
> > > +        case UNSPEC_VOLATILE:
> > > +          for (int j = XVECLEN (exp, 0) - 1; j >= 0; j--)
> > > +        {
> > > +          rtx x = XVECEXP (exp, 0, j);
> > > +          if (GET_CODE (x) == SET)
> > > +            ix86_find_all_reg_use_1 (x, stack_slot_access,
> > > +                         worklist);
> > > +        }
> > > +          break;
> > > +        case SET:
> > > +          ix86_find_all_reg_use_1 (exp, stack_slot_access,
> > > +                       worklist);
> > > +          break;
> > > +        default:
> > > +          debug_rtx (exp);
> > >
> > > Stray debug remaining?
> >
> > Removed.
> >
> > > +  HARD_REG_SET stack_slot_access;
> > > +  CLEAR_HARD_REG_SET (stack_slot_access);
> > > +
> > > +  /* Stack slot can be accessed by stack pointer, frame pointer or
> > > +     registers defined by stack pointer or frame pointer.  */
> > > +  auto_bitmap worklist;
> > >
> > > Please put a line of vertical space here ...
> >
> > Done.
> >
> > > +  add_to_hard_reg_set (&stack_slot_access, Pmode,
> > > +               STACK_POINTER_REGNUM);
> > > +  bitmap_set_bit (worklist, STACK_POINTER_REGNUM);
> > >
> > > ... here ...
> >
> > Done.
> >
> > > +  if (frame_pointer_needed)
> > > +    {
> > > +      add_to_hard_reg_set (&stack_slot_access, Pmode,
> > > +               HARD_FRAME_POINTER_REGNUM);
> > > +      bitmap_set_bit (worklist, HARD_FRAME_POINTER_REGNUM);
> > > +    }
> > >
> > > ... here ...
> > >
> >
> > Done.
> >
> > > +  unsigned int reg;
> > >
> > > ... here ...
> >
> > Done.
> >
> > > +  do
> > > +    {
> > > +      reg = bitmap_clear_first_set_bit (worklist);
> > > +      ix86_find_all_reg_use (stack_slot_access, reg, worklist);
> > > +    }
> > > +  while (!bitmap_empty_p (worklist));
> > > +
> > > +  hard_reg_set_iterator hrsi;
> > >
> > > ... here ...
> >
> > Done.
> >
> > > +  EXECUTE_IF_SET_IN_HARD_REG_SET (stack_slot_access, 0, reg, hrsi)
> > > +    for (df_ref ref = DF_REG_USE_CHAIN (reg);
> > > +     ref != NULL;
> > > +     ref = DF_REF_NEXT_REG (ref))
> > > +      {
> > > +    if (DF_REF_IS_ARTIFICIAL (ref))
> > > +      continue;
> > > +
> > > +    rtx_insn *insn = DF_REF_INSN (ref);
> > >
> > > ... and here.
> > >
> >
> > Done.
> >
> > > +    if (!NONDEBUG_INSN_P (insn))
> > >
> > > !NONJUMP_INSN_P ?
> >
> > Changed.
> >
> > > +      continue;
> > >
> > > Also some vertical space here.
> >
> > Done.
> >
> > > +    note_stores (insn, ix86_update_stack_alignment,
> > > +             &stack_alignment);
> > > +      }
> > >  }
> > >
> > > diff --git a/gcc/testsuite/gcc.target/i386/pr109093-1.c
> > > b/gcc/testsuite/gcc.target/i386/pr109093-1.c
> > > new file mode 100644
> > > index 00000000000..0459d1947f9
> > > --- /dev/null
> > > +++ b/gcc/testsuite/gcc.target/i386/pr109093-1.c
> > > @@ -0,0 +1,39 @@
> > > +/* { dg-do run }  */
> > > +/* { dg-options "-O2 -mavx2 -mtune=znver1
> > > -ftrivial-auto-var-init=zero -fno-stack-protector" } */
> > > +
> > >
> > > Please use
> > >
> > > /* { dg-do run { target avx2_runtime } } */
> > >
> > Done.
> >
> > > +{
> > > +  __builtin_cpu_init ();
> > > +
> > > +  if (__builtin_cpu_supports ("avx2"))
> > > +    run ();
> > > +
> > >
> > > And remove the above code from the test.
> > >
> > Done.
> >
> > > diff --git a/gcc/testsuite/gcc.target/i386/pr109780-3.c
> > > b/gcc/testsuite/gcc.target/i386/pr109780-3.c
> > > new file mode 100644
> > > index 00000000000..f26fdd79af1
> > > --- /dev/null
> > > +++ b/gcc/testsuite/gcc.target/i386/pr109780-3.c
> > > @@ -0,0 +1,52 @@
> > > +/* { dg-do run }  */
> > > +/* { dg-options "-O2 -mavx2 -mtune=znver1 -fno-stack-protector
> > > -fno-stack-clash-protection" } */
> > > +
> > >
> > > Also here.
> > >
> >
> > Done.
> >
> > Here is the v3 patch.
> 
> LGTM, modulo DF part that would need another pair of eyes.

Looks good, it's not easy to follow the context this is invoked from
but I guess if it's not properly df_analyze()ed it would have exploded
during testing.

Richard.

Reply via email to