Tejas Belagod wrote: > > Ulrich Weigand wrote: > >> The following patch implements this idea; it passes a basic regression > >> test on arm-linux-gnueabi. (Obviously this would need a lot more > >> testing on various platforms before getting into mainline ...) > >> > >> Can you have a look whether this fixes the problem you're seeing? [snip]
> Sorry for the delay in replying. These new issues that I was seeing were > bugs specific to my code that I've fixed. Your patch seems to work fine. > Thanks a lot for the patch. I've now checked the patch in to mainline. In addition to your test on aarch64, I've completed tests without regression on ppc(64)-linux, s390(x)-linux, and i386-linux (with Uros' patch). Note that Uros' patch here: http://gcc.gnu.org/ml/gcc-patches/2012-10/msg01463.html is a pre-requisite for the reload patch to avoid regressions on i386. Current version (nearly unchanged) of the patch appended below. Bye, Ulrich ChangeLog: * reload.c (find_reloads_subreg_address): Remove FORCE_REPLACE parameter. Always replace normal subreg with memory reference whenever possible. Return NULL otherwise. (find_reloads_toplev): Always call find_reloads_subreg_address for subregs of registers equivalent to a memory location. Only recurse further if find_reloads_subreg_address fails. (find_reloads_address_1): Only call find_reloads_subreg_address for subregs of registers equivalent to a memory location. Properly handle failure of find_reloads_subreg_address. Index: gcc/reload.c =================================================================== *** gcc/reload.c (revision 191665) --- gcc/reload.c (working copy) *************** static int find_reloads_address_1 (enum *** 282,288 **** static void find_reloads_address_part (rtx, rtx *, enum reg_class, enum machine_mode, int, enum reload_type, int); ! static rtx find_reloads_subreg_address (rtx, int, int, enum reload_type, int, rtx, int *); static void copy_replacements_1 (rtx *, rtx *, int); static int find_inc_amount (rtx, rtx); --- 282,288 ---- static void find_reloads_address_part (rtx, rtx *, enum reg_class, enum machine_mode, int, enum reload_type, int); ! static rtx find_reloads_subreg_address (rtx, int, enum reload_type, int, rtx, int *); static void copy_replacements_1 (rtx *, rtx *, int); static int find_inc_amount (rtx, rtx); *************** find_reloads_toplev (rtx x, int opnum, e *** 4810,4840 **** } /* If the subreg contains a reg that will be converted to a mem, ! convert the subreg to a narrower memref now. ! Otherwise, we would get (subreg (mem ...) ...), ! which would force reload of the mem. ! ! We also need to do this if there is an equivalent MEM that is ! not offsettable. In that case, alter_subreg would produce an ! invalid address on big-endian machines. ! ! For machines that extend byte loads, we must not reload using ! a wider mode if we have a paradoxical SUBREG. find_reloads will ! force a reload in that case. So we should not do anything here. */ if (regno >= FIRST_PSEUDO_REGISTER ! #ifdef LOAD_EXTEND_OP ! && !paradoxical_subreg_p (x) ! #endif ! && (reg_equiv_address (regno) != 0 ! || (reg_equiv_mem (regno) != 0 ! && (! strict_memory_address_addr_space_p ! (GET_MODE (x), XEXP (reg_equiv_mem (regno), 0), ! MEM_ADDR_SPACE (reg_equiv_mem (regno))) ! || ! offsettable_memref_p (reg_equiv_mem (regno)) ! || num_not_at_initial_offset)))) ! x = find_reloads_subreg_address (x, 1, opnum, type, ind_levels, ! insn, address_reloaded); } for (copied = 0, i = GET_RTX_LENGTH (code) - 1; i >= 0; i--) --- 4810,4828 ---- } /* If the subreg contains a reg that will be converted to a mem, ! attempt to convert the whole subreg to a (narrower or wider) ! memory reference instead. If this succeeds, we're done -- ! otherwise fall through to check whether the inner reg still ! needs address reloads anyway. */ if (regno >= FIRST_PSEUDO_REGISTER ! && reg_equiv_memory_loc (regno) != 0) ! { ! tem = find_reloads_subreg_address (x, opnum, type, ind_levels, ! insn, address_reloaded); ! if (tem) ! return tem; ! } } for (copied = 0, i = GET_RTX_LENGTH (code) - 1; i >= 0; i--) *************** find_reloads_address_1 (enum machine_mod *** 6070,6081 **** if (ira_reg_class_max_nregs [rclass][GET_MODE (SUBREG_REG (x))] > reg_class_size[(int) rclass]) { ! x = find_reloads_subreg_address (x, 0, opnum, ! ADDR_TYPE (type), ! ind_levels, insn, NULL); ! push_reload (x, NULL_RTX, loc, (rtx*) 0, rclass, ! GET_MODE (x), VOIDmode, 0, 0, opnum, type); ! return 1; } } } --- 6058,6088 ---- if (ira_reg_class_max_nregs [rclass][GET_MODE (SUBREG_REG (x))] > reg_class_size[(int) rclass]) { ! /* If the inner register will be replaced by a memory ! reference, we can do this only if we can replace the ! whole subreg by a (narrower) memory reference. If ! this is not possible, fall through and reload just ! the inner register (including address reloads). */ ! if (reg_equiv_memory_loc (REGNO (SUBREG_REG (x))) != 0) ! { ! rtx tem = find_reloads_subreg_address (x, opnum, ! ADDR_TYPE (type), ! ind_levels, insn, ! NULL); ! if (tem) ! { ! push_reload (tem, NULL_RTX, loc, (rtx*) 0, rclass, ! GET_MODE (tem), VOIDmode, 0, 0, ! opnum, type); ! return 1; ! } ! } ! else ! { ! push_reload (x, NULL_RTX, loc, (rtx*) 0, rclass, ! GET_MODE (x), VOIDmode, 0, 0, opnum, type); ! return 1; ! } } } } *************** find_reloads_address_part (rtx x, rtx *l *** 6152,6168 **** } /* X, a subreg of a pseudo, is a part of an address that needs to be ! reloaded. ! ! If the pseudo is equivalent to a memory location that cannot be directly ! addressed, make the necessary address reloads. ! If address reloads have been necessary, or if the address is changed ! by register elimination, return the rtx of the memory location; ! otherwise, return X. ! ! If FORCE_REPLACE is nonzero, unconditionally replace the subreg with the ! memory location. OPNUM and TYPE identify the purpose of the reload. --- 6159,6170 ---- } /* X, a subreg of a pseudo, is a part of an address that needs to be ! reloaded, and the pseusdo is equivalent to a memory location. ! Attempt to replace the whole subreg by a (possibly narrower or wider) ! memory reference. If this is possible, return this new memory ! reference, and push all required address reloads. Otherwise, ! return NULL. OPNUM and TYPE identify the purpose of the reload. *************** find_reloads_address_part (rtx x, rtx *l *** 6174,6304 **** stack slots. */ static rtx ! find_reloads_subreg_address (rtx x, int force_replace, int opnum, ! enum reload_type type, int ind_levels, rtx insn, ! int *address_reloaded) { int regno = REGNO (SUBREG_REG (x)); int reloaded = 0; ! if (reg_equiv_memory_loc (regno)) ! { ! /* If the address is not directly addressable, or if the address is not ! offsettable, then it must be replaced. */ ! if (! force_replace ! && (reg_equiv_address (regno) ! || ! offsettable_memref_p (reg_equiv_mem (regno)))) ! force_replace = 1; ! ! if (force_replace || num_not_at_initial_offset) ! { ! rtx tem = make_memloc (SUBREG_REG (x), regno); ! ! /* If the address changes because of register elimination, then ! it must be replaced. */ ! if (force_replace ! || ! rtx_equal_p (tem, reg_equiv_mem (regno))) ! { ! unsigned outer_size = GET_MODE_SIZE (GET_MODE (x)); ! unsigned inner_size = GET_MODE_SIZE (GET_MODE (SUBREG_REG (x))); ! int offset; ! rtx orig = tem; ! ! /* For big-endian paradoxical subregs, SUBREG_BYTE does not ! hold the correct (negative) byte offset. */ ! if (BYTES_BIG_ENDIAN && outer_size > inner_size) ! offset = inner_size - outer_size; ! else ! offset = SUBREG_BYTE (x); ! XEXP (tem, 0) = plus_constant (GET_MODE (XEXP (tem, 0)), ! XEXP (tem, 0), offset); ! PUT_MODE (tem, GET_MODE (x)); ! if (MEM_OFFSET_KNOWN_P (tem)) ! set_mem_offset (tem, MEM_OFFSET (tem) + offset); ! if (MEM_SIZE_KNOWN_P (tem) ! && MEM_SIZE (tem) != (HOST_WIDE_INT) outer_size) ! set_mem_size (tem, outer_size); ! ! /* If this was a paradoxical subreg that we replaced, the ! resulting memory must be sufficiently aligned to allow ! us to widen the mode of the memory. */ ! if (outer_size > inner_size) ! { ! rtx base; ! base = XEXP (tem, 0); ! if (GET_CODE (base) == PLUS) ! { ! if (CONST_INT_P (XEXP (base, 1)) ! && INTVAL (XEXP (base, 1)) % outer_size != 0) ! return x; ! base = XEXP (base, 0); ! } ! if (!REG_P (base) ! || (REGNO_POINTER_ALIGN (REGNO (base)) ! < outer_size * BITS_PER_UNIT)) ! return x; ! } - reloaded = find_reloads_address (GET_MODE (tem), &tem, - XEXP (tem, 0), &XEXP (tem, 0), - opnum, type, ind_levels, insn); - /* ??? Do we need to handle nonzero offsets somehow? */ - if (!offset && !rtx_equal_p (tem, orig)) - push_reg_equiv_alt_mem (regno, tem); - - /* For some processors an address may be valid in the - original mode but not in a smaller mode. For - example, ARM accepts a scaled index register in - SImode but not in HImode. Note that this is only - a problem if the address in reg_equiv_mem is already - invalid in the new mode; other cases would be fixed - by find_reloads_address as usual. - - ??? We attempt to handle such cases here by doing an - additional reload of the full address after the - usual processing by find_reloads_address. Note that - this may not work in the general case, but it seems - to cover the cases where this situation currently - occurs. A more general fix might be to reload the - *value* instead of the address, but this would not - be expected by the callers of this routine as-is. - - If find_reloads_address already completed replaced - the address, there is nothing further to do. */ - if (reloaded == 0 - && reg_equiv_mem (regno) != 0 - && !strict_memory_address_addr_space_p - (GET_MODE (x), XEXP (reg_equiv_mem (regno), 0), - MEM_ADDR_SPACE (reg_equiv_mem (regno)))) - { - push_reload (XEXP (tem, 0), NULL_RTX, &XEXP (tem, 0), (rtx*) 0, - base_reg_class (GET_MODE (tem), - MEM_ADDR_SPACE (tem), - MEM, SCRATCH), - GET_MODE (XEXP (tem, 0)), VOIDmode, 0, 0, - opnum, type); - reloaded = 1; - } - /* If this is not a toplevel operand, find_reloads doesn't see - this substitution. We have to emit a USE of the pseudo so - that delete_output_reload can see it. */ - if (replace_reloads && recog_data.operand[opnum] != x) - /* We mark the USE with QImode so that we recognize it - as one that can be safely deleted at the end of - reload. */ - PUT_MODE (emit_insn_before (gen_rtx_USE (VOIDmode, - SUBREG_REG (x)), - insn), QImode); - x = tem; - } - } - } if (address_reloaded) *address_reloaded = reloaded; ! return x; } /* Substitute into the current INSN the registers into which we have reloaded --- 6176,6281 ---- stack slots. */ static rtx ! find_reloads_subreg_address (rtx x, int opnum, enum reload_type type, ! int ind_levels, rtx insn, int *address_reloaded) { + enum machine_mode outer_mode = GET_MODE (x); + enum machine_mode inner_mode = GET_MODE (SUBREG_REG (x)); int regno = REGNO (SUBREG_REG (x)); int reloaded = 0; + rtx tem, orig; + int offset; ! gcc_assert (reg_equiv_memory_loc (regno) != 0); ! /* We cannot replace the subreg with a modified memory reference if: ! - we have a paradoxical subreg that implicitly acts as a zero or ! sign extension operation due to LOAD_EXTEND_OP; ! ! - we have a subreg that is implicitly supposed to act on the full ! register due to WORD_REGISTER_OPERATIONS (see also eliminate_regs); ! ! - the address of the equivalent memory location is mode-dependent; or ! ! - we have a paradoxical subreg and the resulting memory is not ! sufficiently aligned to allow access in the wider mode. ! ! In addition, we choose not to perform the replacement for *any* ! paradoxical subreg, even if it were possible in principle. This ! is to avoid generating wider memory references than necessary. ! ! This corresponds to how previous versions of reload used to handle ! paradoxical subregs where no address reload was required. */ ! ! if (paradoxical_subreg_p (x)) ! return NULL; ! ! #ifdef WORD_REGISTER_OPERATIONS ! if (GET_MODE_SIZE (outer_mode) < GET_MODE_SIZE (inner_mode) ! && ((GET_MODE_SIZE (outer_mode) - 1) / UNITS_PER_WORD ! == (GET_MODE_SIZE (inner_mode) - 1) / UNITS_PER_WORD)) ! return NULL; ! #endif ! ! /* Since we don't attempt to handle paradoxical subregs, we can just ! call into simplify_subreg, which will handle all remaining checks ! for us. */ ! orig = make_memloc (SUBREG_REG (x), regno); ! offset = SUBREG_BYTE (x); ! tem = simplify_subreg (outer_mode, orig, inner_mode, offset); ! if (!tem || !MEM_P (tem)) ! return NULL; ! ! /* Now push all required address reloads, if any. */ ! reloaded = find_reloads_address (GET_MODE (tem), &tem, ! XEXP (tem, 0), &XEXP (tem, 0), ! opnum, type, ind_levels, insn); ! /* ??? Do we need to handle nonzero offsets somehow? */ ! if (!offset && !rtx_equal_p (tem, orig)) ! push_reg_equiv_alt_mem (regno, tem); ! ! /* For some processors an address may be valid in the original mode but ! not in a smaller mode. For example, ARM accepts a scaled index register ! in SImode but not in HImode. Note that this is only a problem if the ! address in reg_equiv_mem is already invalid in the new mode; other ! cases would be fixed by find_reloads_address as usual. ! ! ??? We attempt to handle such cases here by doing an additional reload ! of the full address after the usual processing by find_reloads_address. ! Note that this may not work in the general case, but it seems to cover ! the cases where this situation currently occurs. A more general fix ! might be to reload the *value* instead of the address, but this would ! not be expected by the callers of this routine as-is. ! ! If find_reloads_address already completed replaced the address, there ! is nothing further to do. */ ! if (reloaded == 0 ! && reg_equiv_mem (regno) != 0 ! && !strict_memory_address_addr_space_p ! (GET_MODE (x), XEXP (reg_equiv_mem (regno), 0), ! MEM_ADDR_SPACE (reg_equiv_mem (regno)))) ! { ! push_reload (XEXP (tem, 0), NULL_RTX, &XEXP (tem, 0), (rtx*) 0, ! base_reg_class (GET_MODE (tem), MEM_ADDR_SPACE (tem), ! MEM, SCRATCH), ! GET_MODE (XEXP (tem, 0)), VOIDmode, 0, 0, opnum, type); ! reloaded = 1; ! } ! ! /* If this is not a toplevel operand, find_reloads doesn't see this ! substitution. We have to emit a USE of the pseudo so that ! delete_output_reload can see it. */ ! if (replace_reloads && recog_data.operand[opnum] != x) ! /* We mark the USE with QImode so that we recognize it as one that ! can be safely deleted at the end of reload. */ ! PUT_MODE (emit_insn_before (gen_rtx_USE (VOIDmode, SUBREG_REG (x)), insn), ! QImode); if (address_reloaded) *address_reloaded = reloaded; ! return tem; } /* Substitute into the current INSN the registers into which we have reloaded -- Dr. Ulrich Weigand GNU Toolchain for Linux on System z and Cell BE ulrich.weig...@de.ibm.com