http://gcc.gnu.org/bugzilla/show_bug.cgi?id=57748
Richard Biener <rguenth at gcc dot gnu.org> changed:
What |Removed |Added
----------------------------------------------------------------------------
CC| |uros at gcc dot gnu.org
--- Comment #29 from Richard Biener <rguenth at gcc dot gnu.org> ---
(In reply to Bernd Edlinger from comment #28)
> (In reply to Richard Biener from comment #19)
> > Barking up wrong trees. Hacky fix looks like:
> >
> > Index: gcc/expr.c
> > ===================================================================
> > --- gcc/expr.c (revision 202043)
> > +++ gcc/expr.c (working copy)
> > @@ -4753,6 +4753,9 @@ expand_assignment (tree to, tree from, b
> > {
> > enum machine_mode address_mode;
> > rtx offset_rtx;
> > + rtx saved_to_rtx = to_rtx;
> > + if (misalignp)
> > + to_rtx = mem;
> >
> > if (!MEM_P (to_rtx))
> > {
> > @@ -4785,6 +4788,11 @@ expand_assignment (tree to, tree from, b
> > to_rtx = offset_address (to_rtx, offset_rtx,
> > highest_pow2_factor_for_target (to,
> > offset));
> > + if (misalignp)
> > + {
> > + mem = to_rtx;
> > + to_rtx = saved_to_rtx;
> > + }
> > }
> >
> > /* No action is needed if the target is not a memory and the field
> >
> >
>
> this patch generates wrong code too:
>
> foo:
> .LFB9:
> .cfi_startproc
> pushq %rbx
> .cfi_def_cfa_offset 16
> .cfi_offset 3, -16
> movq %rdi, %rbx
> subq $16, %rsp
> .cfi_def_cfa_offset 32
> call get_i
> movdqu (%rbx), %xmm0
> cltq
> movq .LC1(%rip), %xmm1
> psrldq $8, %xmm0
> punpcklqdq %xmm0, %xmm1
> movdqu %xmm1, 16(%rbx,%rax,8)
> addq $16, %rsp
> .cfi_def_cfa_offset 16
> popq %rbx
> .cfi_def_cfa_offset 8
> ret
> .cfi_endproc
>
> loads *s into xmm0, modifies low part,
> writes back at s->b[0] and s->b[1].
This bug is latent because we use the mode of the base object to query
for movmisalign_optab (and use it). It highlights the issue I raised
in my last comment.
So, new, completely untested patch:
Index: gcc/expr.c
===================================================================
--- gcc/expr.c (revision 202240)
+++ gcc/expr.c (working copy)
@@ -4646,10 +4646,12 @@ expand_assignment (tree to, tree from, b
/* Handle misaligned stores. */
mode = TYPE_MODE (TREE_TYPE (to));
- if ((TREE_CODE (to) == MEM_REF
- || TREE_CODE (to) == TARGET_MEM_REF)
- && mode != BLKmode
- && !mem_ref_refers_to_non_mem_p (to)
+ if (mode != BLKmode
+ && (DECL_P (to)
+ || handled_component_p (to)
+ || ((TREE_CODE (to) == MEM_REF
+ || TREE_CODE (to) == TARGET_MEM_REF)
+ && !mem_ref_refers_to_non_mem_p (to)))
&& ((align = get_object_alignment (to))
< GET_MODE_ALIGNMENT (mode))
&& (((icode = optab_handler (movmisalign_optab, mode))
@@ -4696,7 +4698,6 @@ expand_assignment (tree to, tree from, b
int unsignedp;
int volatilep = 0;
tree tem;
- bool misalignp;
rtx mem = NULL_RTX;
push_temp_slots ();
@@ -4707,40 +4708,7 @@ expand_assignment (tree to, tree from, b
&& DECL_BIT_FIELD_TYPE (TREE_OPERAND (to, 1)))
get_bit_range (&bitregion_start, &bitregion_end, to, &bitpos, &offset);
- /* If we are going to use store_bit_field and extract_bit_field,
- make sure to_rtx will be safe for multiple use. */
- mode = TYPE_MODE (TREE_TYPE (tem));
- if (TREE_CODE (tem) == MEM_REF
- && mode != BLKmode
- && ((align = get_object_alignment (tem))
- < GET_MODE_ALIGNMENT (mode))
- && ((icode = optab_handler (movmisalign_optab, mode))
- != CODE_FOR_nothing))
- {
- struct expand_operand ops[2];
-
- misalignp = true;
- to_rtx = gen_reg_rtx (mode);
- mem = expand_expr (tem, NULL_RTX, VOIDmode, EXPAND_WRITE);
-
- /* If the misaligned store doesn't overwrite all bits, perform
- rmw cycle on MEM. */
- if (bitsize != GET_MODE_BITSIZE (mode))
- {
- create_input_operand (&ops[0], to_rtx, mode);
- create_fixed_operand (&ops[1], mem);
- /* The movmisalign<mode> pattern cannot fail, else the assignment
- would silently be omitted. */
- expand_insn (icode, 2, ops);
-
- mem = copy_rtx (mem);
- }
- }
- else
- {
- misalignp = false;
- to_rtx = expand_expr (tem, NULL_RTX, VOIDmode, EXPAND_WRITE);
- }
+ to_rtx = expand_expr (tem, NULL_RTX, VOIDmode, EXPAND_WRITE);
/* If the bitfield is volatile, we want to access it in the
field's mode, not the computed mode.
@@ -4879,17 +4847,6 @@ expand_assignment (tree to, tree from, b
get_alias_set (to), nontemporal);
}
- if (misalignp)
- {
- struct expand_operand ops[2];
-
- create_fixed_operand (&ops[0], mem);
- create_input_operand (&ops[1], to_rtx, mode);
- /* The movmisalign<mode> pattern cannot fail, else the assignment
- would silently be omitted. */
- expand_insn (icode, 2, ops);
- }
-
if (result)
preserve_temp_slots (result);
pop_temp_slots ();