On 01/11/2012 05:55 PM, Ulrich Weigand wrote:
In either case, it is always odd to have RTX in the insn stream that isn't "stable" under common simplication ... Do you have any suggestions on how to fix this? If we add the fwprop patch, should we then disable apply_distributive_law for SUBREGs?
This strange canonicalization is also the cause of PR39726, where I also wanted to move subregs inside arithmetic operations. See the attached patch (which is not enough to fix the PR; this is just the combine part).
Unfortunately, doing this also caused x86_64 regressions, I think in zero_extract patterns.
Paolo
change canonicalization Index: gcc/Makefile.in =================================================================== --- gcc/Makefile.in (branch pr39726) +++ gcc/Makefile.in (working copy) @@ -2844,7 +2844,7 @@ jump.o : jump.c $(CONFIG_H) $(SYSTEM_H) simplify-rtx.o : simplify-rtx.c $(CONFIG_H) $(SYSTEM_H) coretypes.h $(TM_H) \ $(RTL_H) $(REGS_H) hard-reg-set.h $(FLAGS_H) $(REAL_H) insn-config.h \ $(RECOG_H) $(EXPR_H) $(TOPLEV_H) output.h $(FUNCTION_H) $(GGC_H) $(TM_P_H) \ - $(TREE_H) $(TARGET_H) + $(TREE_H) $(TARGET_H) $(OPTABS_H) cgraph.o : cgraph.c $(CONFIG_H) $(SYSTEM_H) coretypes.h $(TM_H) $(TREE_H) \ langhooks.h $(TOPLEV_H) $(FLAGS_H) $(GGC_H) $(TARGET_H) $(CGRAPH_H) \ gt-cgraph.h output.h intl.h $(BASIC_BLOCK_H) debug.h $(HASHTAB_H) \ Index: gcc/simplify-rtx.c =================================================================== --- gcc/simplify-rtx.c (branch pr39726) +++ gcc/simplify-rtx.c (working copy) @@ -39,6 +39,7 @@ along with GCC; see the file COPYING3. #include "output.h" #include "ggc.h" #include "target.h" +#include "optabs.h" /* Simplification and canonicalization of RTL. */ @@ -5191,6 +5192,8 @@ rtx simplify_subreg (enum machine_mode outermode, rtx op, enum machine_mode innermode, unsigned int byte) { + int is_lowpart; + /* Little bit of sanity checking. */ gcc_assert (innermode != VOIDmode); gcc_assert (outermode != VOIDmode); @@ -5300,11 +5303,13 @@ simplify_subreg (enum machine_mode outer return NULL_RTX; } + is_lowpart = subreg_lowpart_offset (outermode, innermode) == byte; + /* Merge implicit and explicit truncations. */ if (GET_CODE (op) == TRUNCATE && GET_MODE_SIZE (outermode) < GET_MODE_SIZE (innermode) - && subreg_lowpart_offset (outermode, innermode) == byte) + && is_lowpart) return simplify_gen_unary (TRUNCATE, outermode, XEXP (op, 0), GET_MODE (XEXP (op, 0))); @@ -5343,7 +5348,7 @@ simplify_subreg (enum machine_mode outer The information is used only by alias analysis that can not grog partial register anyway. */ - if (subreg_lowpart_offset (outermode, innermode) == byte) + if (is_lowpart) ORIGINAL_REGNO (x) = ORIGINAL_REGNO (op); return x; } @@ -5393,6 +5398,51 @@ simplify_subreg (enum machine_mode outer return NULL_RTX; } + /* Try to move a subreg inside an arithmetic operation. */ + if (is_lowpart && ARITHMETIC_P (op) + && GET_MODE_CLASS (outermode) == MODE_INT + && GET_MODE_CLASS (innermode) == MODE_INT) + { + enum insn_code ic; + enum machine_mode cnt_mode; + switch (GET_CODE (op)) + { + case ABS: + case NEG: + return simplify_gen_unary (GET_CODE (op), outermode, + rtl_hooks.gen_lowpart_no_emit + (outermode, XEXP (op, 0)), + outermode); + + case ASHIFT: + /* i386 always uses QImode for the shift count. Get the + appropriate mode from the optab. */ + ic = ashl_optab->handlers[outermode].insn_code; + if (ic != CODE_FOR_nothing) + cnt_mode = insn_data[ic].operand[2].mode; + else + cnt_mode = outermode; + return simplify_gen_binary (GET_CODE (op), outermode, + rtl_hooks.gen_lowpart_no_emit + (outermode, XEXP (op, 0)), + rtl_hooks.gen_lowpart_no_emit + (cnt_mode, XEXP (op, 1))); + + case PLUS: + case MINUS: + case AND: + case IOR: + case XOR: + return simplify_gen_binary (GET_CODE (op), outermode, + rtl_hooks.gen_lowpart_no_emit + (outermode, XEXP (op, 0)), + rtl_hooks.gen_lowpart_no_emit + (outermode, XEXP (op, 1))); + default: + break; + } + } + /* Optimize SUBREG truncations of zero and sign extended values. */ if ((GET_CODE (op) == ZERO_EXTEND || GET_CODE (op) == SIGN_EXTEND) Index: gcc/combine.c =================================================================== --- gcc/combine.c (branch pr39726) +++ gcc/combine.c (working copy) @@ -11155,47 +11155,6 @@ simplify_comparison (enum rtx_code code, continue; } - /* If this is (and:M1 (subreg:M2 X 0) (const_int C1)) where C1 - fits in both M1 and M2 and the SUBREG is either paradoxical - or represents the low part, permute the SUBREG and the AND - and try again. */ - if (GET_CODE (XEXP (op0, 0)) == SUBREG) - { - unsigned HOST_WIDE_INT c1; - tmode = GET_MODE (SUBREG_REG (XEXP (op0, 0))); - /* Require an integral mode, to avoid creating something like - (AND:SF ...). */ - if (SCALAR_INT_MODE_P (tmode) - /* It is unsafe to commute the AND into the SUBREG if the - SUBREG is paradoxical and WORD_REGISTER_OPERATIONS is - not defined. As originally written the upper bits - have a defined value due to the AND operation. - However, if we commute the AND inside the SUBREG then - they no longer have defined values and the meaning of - the code has been changed. */ - && (0 -#ifdef WORD_REGISTER_OPERATIONS - || (mode_width > GET_MODE_BITSIZE (tmode) - && mode_width <= BITS_PER_WORD) -#endif - || (mode_width <= GET_MODE_BITSIZE (tmode) - && subreg_lowpart_p (XEXP (op0, 0)))) - && CONST_INT_P (XEXP (op0, 1)) - && mode_width <= HOST_BITS_PER_WIDE_INT - && GET_MODE_BITSIZE (tmode) <= HOST_BITS_PER_WIDE_INT - && ((c1 = INTVAL (XEXP (op0, 1))) & ~mask) == 0 - && (c1 & ~GET_MODE_MASK (tmode)) == 0 - && c1 != mask - && c1 != GET_MODE_MASK (tmode)) - { - op0 = simplify_gen_binary (AND, tmode, - SUBREG_REG (XEXP (op0, 0)), - gen_int_mode (c1, tmode)); - op0 = gen_lowpart (mode, op0); - continue; - } - } - /* Convert (ne (and (not X) 1) 0) to (eq (and X 1) 0). */ if (const_op == 0 && equality_comparison_p && XEXP (op0, 1) == const1_rtx