On Tue, 24 Jan 2012, Richard Guenther wrote: > One issue that I am running into now is that we need to robustify/change > expand_assignment quite a bit. I have a patch for SRA that makes sure > to create properly aligned MEM_REFs but then we have, for example > > MEM[p].m = ... > > and in the handled_component_p path of expand_assignment we happily > expand MEM[p] via expand_normal ("for multiple use"). That of course > breaks, as doing so might go to the rvalue-producing movmisalign > path, miscompiling the store. Even if we handle this case specially > in expand_normal, the UNSPEC x86 for example might produce most certainly > isn't safe for "reuse". Similar for the path that would use > extract_bit_field (and would need to use store_bitfield). > > Which means that, 1) we need to avoid to call expand_normal (tem) > in those cases, and we probably need to fall back to a stack > temporary for non-trivial cases? > > Note that the SRA plus SSE-mode aggregate is probably a latent > pre-existing issue as get_object_or_type_alignment might > discover misalignment if we happen to know about an explicit > misalignment. > > So, I am going to try to address this issue first.
Like with the following, currently testing on x86_64-linux. The idea is to simply simulate a (piecewise) store into a pseudo (which we hopefully can handle well enough - almost all cases can happen right now, as we have MEM_REFs) and simply delay the misaligned move until the rvalue is ready. That fixes my current issue, but it might not scale for a possible store_bit_field path - I suppose that instead both optimize_bitfield_assignment_op and store_field have to handle the misalignment themselves (to_rtx is already a MEM with MEM_ALIGN indicating the non-mode alignment). Richard. 2012-01-24 Richard Guenther <rguent...@suse.de> PR tree-optimization/50444 * expr.c (expand_assignment): Handle misaligned bases consistently, even when wrapped inside component references. Index: gcc/expr.c =================================================================== *** gcc/expr.c (revision 183470) --- gcc/expr.c (working copy) *************** expand_assignment (tree to, tree from, b *** 4556,4564 **** { rtx to_rtx = 0; rtx result; - enum machine_mode mode; - unsigned int align; - enum insn_code icode; /* Don't crash if the lhs of the assignment was erroneous. */ if (TREE_CODE (to) == ERROR_MARK) --- 4556,4561 ---- *************** expand_assignment (tree to, tree from, b *** 4571,4647 **** if (operand_equal_p (to, from, 0)) return; - mode = TYPE_MODE (TREE_TYPE (to)); - if ((TREE_CODE (to) == MEM_REF - || TREE_CODE (to) == TARGET_MEM_REF) - && mode != BLKmode - && ((align = get_object_or_type_alignment (to)) - < GET_MODE_ALIGNMENT (mode)) - && ((icode = optab_handler (movmisalign_optab, mode)) - != CODE_FOR_nothing)) - { - struct expand_operand ops[2]; - enum machine_mode address_mode; - rtx reg, op0, mem; - - reg = expand_expr (from, NULL_RTX, VOIDmode, EXPAND_NORMAL); - reg = force_not_mem (reg); - - if (TREE_CODE (to) == MEM_REF) - { - addr_space_t as - = TYPE_ADDR_SPACE (TREE_TYPE (TREE_TYPE (TREE_OPERAND (to, 0)))); - tree base = TREE_OPERAND (to, 0); - address_mode = targetm.addr_space.address_mode (as); - op0 = expand_expr (base, NULL_RTX, VOIDmode, EXPAND_NORMAL); - op0 = convert_memory_address_addr_space (address_mode, op0, as); - if (!integer_zerop (TREE_OPERAND (to, 1))) - { - rtx off - = immed_double_int_const (mem_ref_offset (to), address_mode); - op0 = simplify_gen_binary (PLUS, address_mode, op0, off); - } - op0 = memory_address_addr_space (mode, op0, as); - mem = gen_rtx_MEM (mode, op0); - set_mem_attributes (mem, to, 0); - set_mem_addr_space (mem, as); - } - else if (TREE_CODE (to) == TARGET_MEM_REF) - { - addr_space_t as - = TYPE_ADDR_SPACE (TREE_TYPE (TREE_TYPE (TREE_OPERAND (to, 0)))); - struct mem_address addr; - - get_address_description (to, &addr); - op0 = addr_for_mem_ref (&addr, as, true); - op0 = memory_address_addr_space (mode, op0, as); - mem = gen_rtx_MEM (mode, op0); - set_mem_attributes (mem, to, 0); - set_mem_addr_space (mem, as); - } - else - gcc_unreachable (); - if (TREE_THIS_VOLATILE (to)) - MEM_VOLATILE_P (mem) = 1; - - create_fixed_operand (&ops[0], mem); - create_input_operand (&ops[1], reg, mode); - /* The movmisalign<mode> pattern cannot fail, else the assignment would - silently be omitted. */ - expand_insn (icode, 2, ops); - return; - } - /* Assignment of a structure component needs special treatment if the structure component's rtx is not simply a MEM. Assignment of an array element at a constant index, and assignment of an array element in an unaligned packed structure field, has the same problem. */ if (handled_component_p (to) ! /* ??? We only need to handle MEM_REF here if the access is not ! a full access of the base object. */ ! || (TREE_CODE (to) == MEM_REF ! && TREE_CODE (TREE_OPERAND (to, 0)) == ADDR_EXPR) || TREE_CODE (TREE_TYPE (to)) == ARRAY_TYPE) { enum machine_mode mode1; --- 4568,4581 ---- if (operand_equal_p (to, from, 0)) return; /* Assignment of a structure component needs special treatment if the structure component's rtx is not simply a MEM. Assignment of an array element at a constant index, and assignment of an array element in an unaligned packed structure field, has the same problem. */ if (handled_component_p (to) ! || TREE_CODE (to) == MEM_REF ! || TREE_CODE (to) == TARGET_MEM_REF || TREE_CODE (TREE_TYPE (to)) == ARRAY_TYPE) { enum machine_mode mode1; *************** expand_assignment (tree to, tree from, b *** 4652,4657 **** --- 4586,4595 ---- int unsignedp; int volatilep = 0; tree tem; + enum machine_mode mode; + unsigned int align; + enum insn_code icode; + bool misalignp; push_temp_slots (); tem = get_inner_reference (to, &bitsize, &bitpos, &offset, &mode1, *************** expand_assignment (tree to, tree from, b *** 4664,4671 **** /* If we are going to use store_bit_field and extract_bit_field, make sure to_rtx will be safe for multiple use. */ ! ! to_rtx = expand_normal (tem); /* If the bitfield is volatile, we want to access it in the field's mode, not the computed mode. --- 4602,4624 ---- /* If we are going to use store_bit_field and extract_bit_field, make sure to_rtx will be safe for multiple use. */ ! mode = TYPE_MODE (TREE_TYPE (tem)); ! if ((TREE_CODE (tem) == MEM_REF ! || TREE_CODE (tem) == TARGET_MEM_REF) ! && mode != BLKmode ! && ((align = get_object_or_type_alignment (tem)) ! < GET_MODE_ALIGNMENT (mode)) ! && ((icode = optab_handler (movmisalign_optab, mode)) ! != CODE_FOR_nothing)) ! { ! misalignp = true; ! to_rtx = gen_reg_rtx (mode); ! } ! else ! { ! misalignp = false; ! to_rtx = expand_normal (tem); ! } /* If the bitfield is volatile, we want to access it in the field's mode, not the computed mode. *************** expand_assignment (tree to, tree from, b *** 4811,4816 **** --- 4764,4819 ---- nontemporal); } + if (misalignp) + { + struct expand_operand ops[2]; + enum machine_mode address_mode; + rtx reg, op0, mem; + + if (TREE_CODE (tem) == MEM_REF) + { + addr_space_t as = TYPE_ADDR_SPACE + (TREE_TYPE (TREE_TYPE (TREE_OPERAND (tem, 0)))); + tree base = TREE_OPERAND (tem, 0); + address_mode = targetm.addr_space.address_mode (as); + op0 = expand_expr (base, NULL_RTX, VOIDmode, EXPAND_NORMAL); + op0 = convert_memory_address_addr_space (address_mode, op0, as); + if (!integer_zerop (TREE_OPERAND (tem, 1))) + { + rtx off = immed_double_int_const (mem_ref_offset (tem), + address_mode); + op0 = simplify_gen_binary (PLUS, address_mode, op0, off); + } + op0 = memory_address_addr_space (mode, op0, as); + mem = gen_rtx_MEM (mode, op0); + set_mem_attributes (mem, tem, 0); + set_mem_addr_space (mem, as); + } + else if (TREE_CODE (tem) == TARGET_MEM_REF) + { + addr_space_t as = TYPE_ADDR_SPACE + (TREE_TYPE (TREE_TYPE (TREE_OPERAND (tem, 0)))); + struct mem_address addr; + + get_address_description (tem, &addr); + op0 = addr_for_mem_ref (&addr, as, true); + op0 = memory_address_addr_space (mode, op0, as); + mem = gen_rtx_MEM (mode, op0); + set_mem_attributes (mem, tem, 0); + set_mem_addr_space (mem, as); + } + else + gcc_unreachable (); + if (TREE_THIS_VOLATILE (tem)) + MEM_VOLATILE_P (mem) = 1; + + create_fixed_operand (&ops[0], mem); + create_input_operand (&ops[1], to_rtx, mode); + /* The movmisalign<mode> pattern cannot fail, else the assignment + would silently be omitted. */ + expand_insn (icode, 2, ops); + } + if (result) preserve_temp_slots (result); free_temp_slots ();