http://gcc.gnu.org/bugzilla/show_bug.cgi?id=57748
--- Comment #31 from Bernd Edlinger <bernd.edlinger at hotmail dot de> --- (In reply to Richard Biener from comment #29) > (In reply to Bernd Edlinger from comment #28) > > (In reply to Richard Biener from comment #19) > > > Barking up wrong trees. Hacky fix looks like: > > > > > > Index: gcc/expr.c > > > =================================================================== > > > --- gcc/expr.c (revision 202043) > > > +++ gcc/expr.c (working copy) > > > @@ -4753,6 +4753,9 @@ expand_assignment (tree to, tree from, b > > > { > > > enum machine_mode address_mode; > > > rtx offset_rtx; > > > + rtx saved_to_rtx = to_rtx; > > > + if (misalignp) > > > + to_rtx = mem; > > > > > > if (!MEM_P (to_rtx)) > > > { > > > @@ -4785,6 +4788,11 @@ expand_assignment (tree to, tree from, b > > > to_rtx = offset_address (to_rtx, offset_rtx, > > > highest_pow2_factor_for_target (to, > > > > > > offset)); > > > + if (misalignp) > > > + { > > > + mem = to_rtx; > > > + to_rtx = saved_to_rtx; > > > + } > > > } > > > > > > /* No action is needed if the target is not a memory and the field > > > > > > > > > > this patch generates wrong code too: > > > > foo: > > .LFB9: > > .cfi_startproc > > pushq %rbx > > .cfi_def_cfa_offset 16 > > .cfi_offset 3, -16 > > movq %rdi, %rbx > > subq $16, %rsp > > .cfi_def_cfa_offset 32 > > call get_i > > movdqu (%rbx), %xmm0 > > cltq > > movq .LC1(%rip), %xmm1 > > psrldq $8, %xmm0 > > punpcklqdq %xmm0, %xmm1 > > movdqu %xmm1, 16(%rbx,%rax,8) > > addq $16, %rsp > > .cfi_def_cfa_offset 16 > > popq %rbx > > .cfi_def_cfa_offset 8 > > ret > > .cfi_endproc > > > > loads *s into xmm0, modifies low part, > > writes back at s->b[0] and s->b[1]. > > This bug is latent because we use the mode of the base object to query > for movmisalign_optab (and use it). It highlights the issue I raised > in my last comment. > > So, new, completely untested patch: > > Index: gcc/expr.c > =================================================================== > --- gcc/expr.c (revision 202240) > +++ gcc/expr.c (working copy) > @@ -4646,10 +4646,12 @@ expand_assignment (tree to, tree from, b > > /* Handle misaligned stores. */ > mode = TYPE_MODE (TREE_TYPE (to)); > - if ((TREE_CODE (to) == MEM_REF > - || TREE_CODE (to) == TARGET_MEM_REF) > - && mode != BLKmode > - && !mem_ref_refers_to_non_mem_p (to) > + if (mode != BLKmode > + && (DECL_P (to) > + || handled_component_p (to) > + || ((TREE_CODE (to) == MEM_REF > + || TREE_CODE (to) == TARGET_MEM_REF) > + && !mem_ref_refers_to_non_mem_p (to))) > && ((align = get_object_alignment (to)) > < GET_MODE_ALIGNMENT (mode)) > && (((icode = optab_handler (movmisalign_optab, mode)) > @@ -4696,7 +4698,6 @@ expand_assignment (tree to, tree from, b > int unsignedp; > int volatilep = 0; > tree tem; > - bool misalignp; > rtx mem = NULL_RTX; > > push_temp_slots (); > @@ -4707,40 +4708,7 @@ expand_assignment (tree to, tree from, b > && DECL_BIT_FIELD_TYPE (TREE_OPERAND (to, 1))) > get_bit_range (&bitregion_start, &bitregion_end, to, &bitpos, > &offset); > > - /* If we are going to use store_bit_field and extract_bit_field, > - make sure to_rtx will be safe for multiple use. */ > - mode = TYPE_MODE (TREE_TYPE (tem)); > - if (TREE_CODE (tem) == MEM_REF > - && mode != BLKmode > - && ((align = get_object_alignment (tem)) > - < GET_MODE_ALIGNMENT (mode)) > - && ((icode = optab_handler (movmisalign_optab, mode)) > - != CODE_FOR_nothing)) > - { > - struct expand_operand ops[2]; > - > - misalignp = true; > - to_rtx = gen_reg_rtx (mode); > - mem = expand_expr (tem, NULL_RTX, VOIDmode, EXPAND_WRITE); > - > - /* If the misaligned store doesn't overwrite all bits, perform > - rmw cycle on MEM. */ > - if (bitsize != GET_MODE_BITSIZE (mode)) > - { > - create_input_operand (&ops[0], to_rtx, mode); > - create_fixed_operand (&ops[1], mem); > - /* The movmisalign<mode> pattern cannot fail, else the > assignment > - would silently be omitted. */ > - expand_insn (icode, 2, ops); > - > - mem = copy_rtx (mem); > - } > - } > - else > - { > - misalignp = false; > - to_rtx = expand_expr (tem, NULL_RTX, VOIDmode, EXPAND_WRITE); > - } > + to_rtx = expand_expr (tem, NULL_RTX, VOIDmode, EXPAND_WRITE); > > /* If the bitfield is volatile, we want to access it in the > field's mode, not the computed mode. > @@ -4879,17 +4847,6 @@ expand_assignment (tree to, tree from, b > get_alias_set (to), nontemporal); > } > > - if (misalignp) > - { > - struct expand_operand ops[2]; > - > - create_fixed_operand (&ops[0], mem); > - create_input_operand (&ops[1], to_rtx, mode); > - /* The movmisalign<mode> pattern cannot fail, else the assignment > - would silently be omitted. */ > - expand_insn (icode, 2, ops); > - } > - > if (result) > preserve_temp_slots (result); > pop_temp_slots (); Richard, this will break the bitregion_start/end handling. IMHO the misalign path is just a very old performance improvement. It is totally under the control of the target hooks, if that is executed. It is only meant for structures that fit into registers, like: struct { doulbe: x; }; => mode = double or union { double x; float y[2]; }; => mode = int64 when it is dangerous to go there we do not have to. I am beginning to think that my patch is right.