4.9 Regression] ICE when expanding assignment to unaligned zero-sized array

bernd.edlinger at hotmail dot de Wed, 04 Sep 2013 03:18:10 -0700

http://gcc.gnu.org/bugzilla/show_bug.cgi?id=57748


--- Comment #31 from Bernd Edlinger <bernd.edlinger at hotmail dot de> ---
(In reply to Richard Biener from comment #29)
> (In reply to Bernd Edlinger from comment #28)
> > (In reply to Richard Biener from comment #19)
> > > Barking up wrong trees.  Hacky fix looks like:
> > > 
> > > Index: gcc/expr.c
> > > ===================================================================
> > > --- gcc/expr.c  (revision 202043)
> > > +++ gcc/expr.c  (working copy)
> > > @@ -4753,6 +4753,9 @@ expand_assignment (tree to, tree from, b
> > >         {
> > >           enum machine_mode address_mode;
> > >           rtx offset_rtx;
> > > +         rtx saved_to_rtx = to_rtx;
> > > +         if (misalignp)
> > > +           to_rtx = mem;
> > >  
> > >           if (!MEM_P (to_rtx))
> > >             {
> > > @@ -4785,6 +4788,11 @@ expand_assignment (tree to, tree from, b
> > >           to_rtx = offset_address (to_rtx, offset_rtx,
> > >                                    highest_pow2_factor_for_target (to,
> > >                                                                    
> > > offset));
> > > +         if (misalignp)
> > > +           {
> > > +             mem = to_rtx;
> > > +             to_rtx = saved_to_rtx;
> > > +           }
> > >         }
> > >  
> > >        /* No action is needed if the target is not a memory and the field
> > > 
> > > 
> > 
> > this patch generates wrong code too:
> > 
> > foo:
> > .LFB9:
> >         .cfi_startproc
> >         pushq   %rbx
> >         .cfi_def_cfa_offset 16
> >         .cfi_offset 3, -16
> >         movq    %rdi, %rbx
> >         subq    $16, %rsp
> >         .cfi_def_cfa_offset 32
> >         call    get_i
> >         movdqu  (%rbx), %xmm0
> >         cltq
> >         movq    .LC1(%rip), %xmm1
> >         psrldq  $8, %xmm0
> >         punpcklqdq      %xmm0, %xmm1
> >         movdqu  %xmm1, 16(%rbx,%rax,8)
> >         addq    $16, %rsp
> >         .cfi_def_cfa_offset 16
> >         popq    %rbx
> >         .cfi_def_cfa_offset 8
> >         ret
> >         .cfi_endproc
> > 
> > loads *s into xmm0, modifies low part,
> > writes back at s->b[0] and s->b[1].
> 
> This bug is latent because we use the mode of the base object to query
> for movmisalign_optab (and use it).  It highlights the issue I raised
> in my last comment.
> 
> So, new, completely untested patch:
> 
> Index: gcc/expr.c
> ===================================================================
> --- gcc/expr.c  (revision 202240)
> +++ gcc/expr.c  (working copy)
> @@ -4646,10 +4646,12 @@ expand_assignment (tree to, tree from, b
>  
>    /* Handle misaligned stores.  */
>    mode = TYPE_MODE (TREE_TYPE (to));
> -  if ((TREE_CODE (to) == MEM_REF
> -       || TREE_CODE (to) == TARGET_MEM_REF)
> -      && mode != BLKmode
> -      && !mem_ref_refers_to_non_mem_p (to)
> +  if (mode != BLKmode
> +      && (DECL_P (to)
> +         || handled_component_p (to)
> +         || ((TREE_CODE (to) == MEM_REF
> +              || TREE_CODE (to) == TARGET_MEM_REF)
> +             && !mem_ref_refers_to_non_mem_p (to)))
>        && ((align = get_object_alignment (to))
>           < GET_MODE_ALIGNMENT (mode))
>        && (((icode = optab_handler (movmisalign_optab, mode))
> @@ -4696,7 +4698,6 @@ expand_assignment (tree to, tree from, b
>        int unsignedp;
>        int volatilep = 0;
>        tree tem;
> -      bool misalignp;
>        rtx mem = NULL_RTX;
>  
>        push_temp_slots ();
> @@ -4707,40 +4708,7 @@ expand_assignment (tree to, tree from, b
>           && DECL_BIT_FIELD_TYPE (TREE_OPERAND (to, 1)))
>         get_bit_range (&bitregion_start, &bitregion_end, to, &bitpos,
> &offset);
>  
> -      /* If we are going to use store_bit_field and extract_bit_field,
> -        make sure to_rtx will be safe for multiple use.  */
> -      mode = TYPE_MODE (TREE_TYPE (tem));
> -      if (TREE_CODE (tem) == MEM_REF
> -         && mode != BLKmode
> -         && ((align = get_object_alignment (tem))
> -             < GET_MODE_ALIGNMENT (mode))
> -         && ((icode = optab_handler (movmisalign_optab, mode))
> -             != CODE_FOR_nothing))
> -       {
> -         struct expand_operand ops[2];
> -
> -         misalignp = true;
> -         to_rtx = gen_reg_rtx (mode);
> -         mem = expand_expr (tem, NULL_RTX, VOIDmode, EXPAND_WRITE);
> -
> -         /* If the misaligned store doesn't overwrite all bits, perform
> -            rmw cycle on MEM.  */
> -         if (bitsize != GET_MODE_BITSIZE (mode))
> -           {
> -             create_input_operand (&ops[0], to_rtx, mode);
> -             create_fixed_operand (&ops[1], mem);
> -             /* The movmisalign<mode> pattern cannot fail, else the
> assignment
> -                would silently be omitted.  */
> -             expand_insn (icode, 2, ops);
> -
> -             mem = copy_rtx (mem);
> -           }
> -       }
> -      else
> -       {
> -         misalignp = false;
> -         to_rtx = expand_expr (tem, NULL_RTX, VOIDmode, EXPAND_WRITE);
> -       }
> +      to_rtx = expand_expr (tem, NULL_RTX, VOIDmode, EXPAND_WRITE);
>  
>        /* If the bitfield is volatile, we want to access it in the
>          field's mode, not the computed mode.
> @@ -4879,17 +4847,6 @@ expand_assignment (tree to, tree from, b
>                                   get_alias_set (to), nontemporal);
>         }
>  
> -      if (misalignp)
> -       {
> -         struct expand_operand ops[2];
> -
> -         create_fixed_operand (&ops[0], mem);
> -         create_input_operand (&ops[1], to_rtx, mode);
> -         /* The movmisalign<mode> pattern cannot fail, else the assignment
> -            would silently be omitted.  */
> -         expand_insn (icode, 2, ops);
> -       }
> -
>        if (result)
>         preserve_temp_slots (result);
>        pop_temp_slots ();

Richard, this will break the bitregion_start/end handling.

IMHO the misalign path is just a very old performance improvement.
It is totally under the control of the target hooks,
if that is executed.
It is only meant for structures that fit into registers,
like:

struct
{
  doulbe: x;
};

=> mode = double

or

union
{
  double x;
  float y[2];
};

=> mode = int64


when it is dangerous to go there we do not have to.

I am beginning to think that my patch is right.

[Bug middle-end/57748] [4.7/4.8/4.9 Regression] ICE when expanding assignment to unaligned zero-sized array

Reply via email to