On Fri, Feb 14, 2025 at 4:56 AM H.J. Lu <hjl.to...@gmail.com> wrote:
>
> On Thu, Feb 13, 2025 at 5:17 PM Uros Bizjak <ubiz...@gmail.com> wrote:
> >
> > On Thu, Feb 13, 2025 at 9:31 AM H.J. Lu <hjl.to...@gmail.com> wrote:
> > >
> > > Don't assume that stack slots can only be accessed by stack or frame
> > > registers.  We first find all registers defined by stack or frame
> > > registers.  Then check memory accesses by such registers, including
> > > stack and frame registers.
> > >
> > > gcc/
> > >
> > > PR target/109780
> > > PR target/109093
> > > * config/i386/i386.cc (ix86_update_stack_alignment): New.
> > > (ix86_find_all_reg_use_1): Likewise.
> > > (ix86_find_all_reg_use): Likewise.
> > > (ix86_find_max_used_stack_alignment): Also check memory accesses
> > > from registers defined by stack or frame registers.
> > >
> > > gcc/testsuite/
> > >
> > > PR target/109780
> > > PR target/109093
> > > * g++.target/i386/pr109780-1.C: New test.
> > > * gcc.target/i386/pr109093-1.c: Likewise.
> > > * gcc.target/i386/pr109780-1.c: Likewise.
> > > * gcc.target/i386/pr109780-2.c: Likewise.
> > > * gcc.target/i386/pr109780-3.c: Likewise.
> >
> > Some non-algorithmical changes below, otherwise LGTM. Please also get
> > someone to review dataflow infrastructure usage, I am not well versed
> > with it.
> >
> > +/* Helper function for ix86_find_all_reg_use.  */
> > +
> > +static void
> > +ix86_find_all_reg_use_1 (rtx set, HARD_REG_SET &stack_slot_access,
> > +             auto_bitmap &worklist)
> > +{
> > +  rtx src = SET_SRC (set);
> > +  if (MEM_P (src))
> >
> > Also reject assignment from CONST_SCALAR_INT?
>
> Done.
>
> > +    return;
> > +
> > +  rtx dest = SET_DEST (set);
> > +  if (!REG_P (dest))
> > +    return;
> >
> > Can we switch these two so the test for REG_P (dest) will be first? We
> > are not interested in anything that doesn't assign to a register.
>
> Done.
>
> > +/* Find all registers defined with REG.  */
> > +
> > +static void
> > +ix86_find_all_reg_use (HARD_REG_SET &stack_slot_access,
> > +               unsigned int reg, auto_bitmap &worklist)
> > +{
> > +  for (df_ref ref = DF_REG_USE_CHAIN (reg);
> > +       ref != NULL;
> > +       ref = DF_REF_NEXT_REG (ref))
> > +    {
> > +      if (DF_REF_IS_ARTIFICIAL (ref))
> > +    continue;
> > +
> > +      rtx_insn *insn = DF_REF_INSN (ref);
> > +      if (!NONDEBUG_INSN_P (insn))
> > +    continue;
> >
> > Here we pass only NONJUMP_INSN_P (X) || JUMP_P (X) || CALL_P (X)
> >
> > +      if (CALL_P (insn) || JUMP_P (insn))
> > +    continue;
> >
> > And here remains only NONJUMP_INSN_P (X), so both above conditions
> > could be substituted with:
> >
> > if (!NONJUMP_INSN_P (X))
> >   continue;
>
> Done.
>
> > +
> > +      rtx set = single_set (insn);
> > +      if (set)
> > +    ix86_find_all_reg_use_1 (set, stack_slot_access, worklist);
> > +
> > +      rtx pat = PATTERN (insn);
> > +      if (GET_CODE (pat) != PARALLEL)
> > +    continue;
> > +
> > +      for (int i = 0; i < XVECLEN (pat, 0); i++)
> > +    {
> > +      rtx exp = XVECEXP (pat, 0, i);
> > +      switch (GET_CODE (exp))
> > +        {
> > +        case ASM_OPERANDS:
> > +        case CLOBBER:
> > +        case PREFETCH:
> > +        case USE:
> > +          break;
> > +        case UNSPEC:
> > +        case UNSPEC_VOLATILE:
> > +          for (int j = XVECLEN (exp, 0) - 1; j >= 0; j--)
> > +        {
> > +          rtx x = XVECEXP (exp, 0, j);
> > +          if (GET_CODE (x) == SET)
> > +            ix86_find_all_reg_use_1 (x, stack_slot_access,
> > +                         worklist);
> > +        }
> > +          break;
> > +        case SET:
> > +          ix86_find_all_reg_use_1 (exp, stack_slot_access,
> > +                       worklist);
> > +          break;
> > +        default:
> > +          debug_rtx (exp);
> >
> > Stray debug remaining?
>
> Removed.
>
> > +  HARD_REG_SET stack_slot_access;
> > +  CLEAR_HARD_REG_SET (stack_slot_access);
> > +
> > +  /* Stack slot can be accessed by stack pointer, frame pointer or
> > +     registers defined by stack pointer or frame pointer.  */
> > +  auto_bitmap worklist;
> >
> > Please put a line of vertical space here ...
>
> Done.
>
> > +  add_to_hard_reg_set (&stack_slot_access, Pmode,
> > +               STACK_POINTER_REGNUM);
> > +  bitmap_set_bit (worklist, STACK_POINTER_REGNUM);
> >
> > ... here ...
>
> Done.
>
> > +  if (frame_pointer_needed)
> > +    {
> > +      add_to_hard_reg_set (&stack_slot_access, Pmode,
> > +               HARD_FRAME_POINTER_REGNUM);
> > +      bitmap_set_bit (worklist, HARD_FRAME_POINTER_REGNUM);
> > +    }
> >
> > ... here ...
> >
>
> Done.
>
> > +  unsigned int reg;
> >
> > ... here ...
>
> Done.
>
> > +  do
> > +    {
> > +      reg = bitmap_clear_first_set_bit (worklist);
> > +      ix86_find_all_reg_use (stack_slot_access, reg, worklist);
> > +    }
> > +  while (!bitmap_empty_p (worklist));
> > +
> > +  hard_reg_set_iterator hrsi;
> >
> > ... here ...
>
> Done.
>
> > +  EXECUTE_IF_SET_IN_HARD_REG_SET (stack_slot_access, 0, reg, hrsi)
> > +    for (df_ref ref = DF_REG_USE_CHAIN (reg);
> > +     ref != NULL;
> > +     ref = DF_REF_NEXT_REG (ref))
> > +      {
> > +    if (DF_REF_IS_ARTIFICIAL (ref))
> > +      continue;
> > +
> > +    rtx_insn *insn = DF_REF_INSN (ref);
> >
> > ... and here.
> >
>
> Done.
>
> > +    if (!NONDEBUG_INSN_P (insn))
> >
> > !NONJUMP_INSN_P ?
>
> Changed.
>
> > +      continue;
> >
> > Also some vertical space here.
>
> Done.
>
> > +    note_stores (insn, ix86_update_stack_alignment,
> > +             &stack_alignment);
> > +      }
> >  }
> >
> > diff --git a/gcc/testsuite/gcc.target/i386/pr109093-1.c
> > b/gcc/testsuite/gcc.target/i386/pr109093-1.c
> > new file mode 100644
> > index 00000000000..0459d1947f9
> > --- /dev/null
> > +++ b/gcc/testsuite/gcc.target/i386/pr109093-1.c
> > @@ -0,0 +1,39 @@
> > +/* { dg-do run }  */
> > +/* { dg-options "-O2 -mavx2 -mtune=znver1
> > -ftrivial-auto-var-init=zero -fno-stack-protector" } */
> > +
> >
> > Please use
> >
> > /* { dg-do run { target avx2_runtime } } */
> >
> Done.
>
> > +{
> > +  __builtin_cpu_init ();
> > +
> > +  if (__builtin_cpu_supports ("avx2"))
> > +    run ();
> > +
> >
> > And remove the above code from the test.
> >
> Done.
>
> > diff --git a/gcc/testsuite/gcc.target/i386/pr109780-3.c
> > b/gcc/testsuite/gcc.target/i386/pr109780-3.c
> > new file mode 100644
> > index 00000000000..f26fdd79af1
> > --- /dev/null
> > +++ b/gcc/testsuite/gcc.target/i386/pr109780-3.c
> > @@ -0,0 +1,52 @@
> > +/* { dg-do run }  */
> > +/* { dg-options "-O2 -mavx2 -mtune=znver1 -fno-stack-protector
> > -fno-stack-clash-protection" } */
> > +
> >
> > Also here.
> >
>
> Done.
>
> Here is the v3 patch.

LGTM, modulo DF part that would need another pair of eyes.

Thanks,
Uros.

Reply via email to