On Fri, Feb 19, 2016 at 10:24 PM, Jeff Law <l...@redhat.com> wrote: > On 02/16/2016 11:43 AM, Bin Cheng wrote: >> >> ________________________________________ >> From: Jeff Law <l...@redhat.com> >> Sent: 11 February 2016 23:26 >> To: Bin.Cheng >> Cc: Bin Cheng; gcc-patches@gcc.gnu.org; nd >> Subject: Re: [PATCH PR69052]Check if loop inv can be propagated into mem >> ref with additional addr expr canonicalization >> >>>> On 02/11/2016 10:59 AM, Bin.Cheng wrote: >> >> >>>> Hi Jeff, >>>> Thanks for detailed review. I also think a generic canonical >>>> interface for RTL is much better. I will give it a try. But with >>>> high chance it's a next stage1 stuff. >>> >>> That is, of course, fine. However, if you do get something ready, I'd >>> support using it within LICM for gcc-6, then using it in other places >>> for gcc-7. >> >> Hi, >> This is the updated version patch. It fixes the problem by introducing a >> generic address canonicalization interface. This new interface >> canonicalizes address expression in following steps: >> 1) Rewrite ASHIFT into MULT recursively. >> 2) Divide address into sub expressions with PLUS as the separator. >> 3) Sort sub expressions according to precedence defined for >> communative operations. >> 4) Simplify CONST_INT_P sub expressions. >> 5) Create new canonicalized address and return. >> >> According to review comments, this interface is now restricted in LCIM, >> and will probably be expanded to other passes like fwprop and combine after >> entering GCC7. >> Bootstrap and test on x86_64 and AArch64. Is it OK? >> >> Thanks, >> bin >> >> 2016-02-15 Bin Cheng <bin.ch...@arm.com> >> >> PR tree-optimization/69052 >> * loop-invariant.c (canonicalize_address_mult): New function. >> (MAX_CANON_ADDR_PARTS): New macro. >> (collect_address_parts): New function. >> (compare_address_parts, canonicalize_address): New functions. >> (inv_can_prop_to_addr_use): Check validity of address expression >> which is canonicalized by above canonicalize_address. >> >> gcc/testsuite/ChangeLog >> 2016-02-15 Bin Cheng <bin.ch...@arm.com> >> >> PR tree-optimization/69052 >> * gcc.target/i386/pr69052.c: New test. > > This is exactly what I was looking for from a design standpoint. > > My only question is why didn't you use FOR_EACH_SUBRTX_VRA from rtl-iter.h > to walk the RTX expressions in collect_address_parts and > canonicalize_address_mult? Hi Jeff, Here comes the updated patch using FOR_EACH_SUBRTX_VAR in both functions you mentioned. Bootstrap and test on x86_64 and AArch64, is it OK? The ChangeLog isn't affected.
Thanks, bin > > Jeff > > >> >>> >>> Jeff >> >> >
diff --git a/gcc/loop-invariant.c b/gcc/loop-invariant.c index 707f044..dcbe932 100644 --- a/gcc/loop-invariant.c +++ b/gcc/loop-invariant.c @@ -52,6 +52,7 @@ along with GCC; see the file COPYING3. If not see #include "cfgloop.h" #include "expr.h" #include "params.h" +#include "rtl-iter.h" #include "dumpfile.h" /* The data stored for the loop. */ @@ -754,6 +755,130 @@ create_new_invariant (struct def *def, rtx_insn *insn, bitmap depends_on, return inv; } +/* Return a canonical version of X for the address, from the point of view, + that all multiplications are represented as MULT instead of the multiply + by a power of 2 being represented as ASHIFT. + + Callers should prepare a copy of X because this function may modify it + in place. */ + +static void +canonicalize_address_mult (rtx x) +{ + subrtx_var_iterator::array_type array; + FOR_EACH_SUBRTX_VAR (iter, array, x, NONCONST) + { + rtx sub = *iter; + + if (GET_CODE (sub) == ASHIFT + && CONST_INT_P (XEXP (sub, 1)) + && INTVAL (XEXP (sub, 1)) < GET_MODE_BITSIZE (GET_MODE (sub)) + && INTVAL (XEXP (sub, 1)) >= 0) + { + HOST_WIDE_INT shift = INTVAL (XEXP (sub, 1)); + PUT_CODE (sub, MULT); + XEXP (sub, 1) = gen_int_mode ((HOST_WIDE_INT) 1 << shift, + GET_MODE (sub)); + iter.skip_subrtxes (); + } + } +} + +/* Maximum number of sub expressions in address. We set it to + a small integer since it's unlikely to have a complicated + address expression. */ + +#define MAX_CANON_ADDR_PARTS (5) + +/* Collect sub expressions in address X with PLUS as the seperator. + Sub expressions are stored in vector ADDR_PARTS. */ + +static void +collect_address_parts (rtx x, vec<rtx> *addr_parts) +{ + subrtx_var_iterator::array_type array; + FOR_EACH_SUBRTX_VAR (iter, array, x, NONCONST) + { + rtx sub = *iter; + + if (GET_CODE (sub) != PLUS) + { + addr_parts->safe_push (sub); + iter.skip_subrtxes (); + } + } +} + +/* Compare function for sorting sub expressions X and Y based on + precedence defined for communitive operations. */ + +static int +compare_address_parts (const void *x, const void *y) +{ + const rtx *rx = (const rtx *)x; + const rtx *ry = (const rtx *)y; + int px = commutative_operand_precedence (*rx); + int py = commutative_operand_precedence (*ry); + + return (py - px); +} + +/* Return a canonical version address for X by following steps: + 1) Rewrite ASHIFT into MULT recursively. + 2) Divide address into sub expressions with PLUS as the + separator. + 3) Sort sub expressions according to precedence defined + for communative operations. + 4) Simplify CONST_INT_P sub expressions. + 5) Create new canonicalized address and return. + Callers should prepare a copy of X because this function may + modify it in place. */ + +static rtx +canonicalize_address (rtx x) +{ + rtx res; + unsigned int i, j; + machine_mode mode = GET_MODE (x); + auto_vec<rtx, MAX_CANON_ADDR_PARTS> addr_parts; + + /* Rewrite ASHIFT into MULT. */ + canonicalize_address_mult (x); + /* Divide address into sub expressions. */ + collect_address_parts (x, &addr_parts); + /* Unlikely to have very complicated address. */ + if (addr_parts.length () < 2 + || addr_parts.length () > MAX_CANON_ADDR_PARTS) + return x; + + /* Sort sub expressions according to canonicalization precedence. */ + addr_parts.qsort (compare_address_parts); + + /* Simplify all constant int summary if possible. */ + for (i = 0; i < addr_parts.length (); i++) + if (CONST_INT_P (addr_parts[i])) + break; + + for (j = i + 1; j < addr_parts.length (); j++) + { + gcc_assert (CONST_INT_P (addr_parts[j])); + addr_parts[i] = simplify_gen_binary (PLUS, mode, + addr_parts[i], + addr_parts[j]); + } + + /* Chain PLUS operators to the left for !CONST_INT_P sub expressions. */ + res = addr_parts[0]; + for (j = 1; j < i; j++) + res = simplify_gen_binary (PLUS, mode, res, addr_parts[j]); + + /* Pickup the last CONST_INT_P sub expression. */ + if (i < addr_parts.length ()) + res = simplify_gen_binary (PLUS, mode, res, addr_parts[i]); + + return res; +} + /* Given invariant DEF and its address USE, check if the corresponding invariant expr can be propagated into the use or not. */ @@ -761,7 +886,7 @@ static bool inv_can_prop_to_addr_use (struct def *def, df_ref use) { struct invariant *inv; - rtx *pos = DF_REF_REAL_LOC (use), def_set; + rtx *pos = DF_REF_REAL_LOC (use), def_set, use_set; rtx_insn *use_insn = DF_REF_INSN (use); rtx_insn *def_insn; bool ok; @@ -778,6 +903,29 @@ inv_can_prop_to_addr_use (struct def *def, df_ref use) validate_unshare_change (use_insn, pos, SET_SRC (def_set), true); ok = verify_changes (0); + /* Try harder with canonicalization in address expression. */ + if (!ok && (use_set = single_set (use_insn)) != NULL_RTX) + { + rtx src, dest, mem = NULL_RTX; + + src = SET_SRC (use_set); + dest = SET_DEST (use_set); + if (MEM_P (src)) + mem = src; + else if (MEM_P (dest)) + mem = dest; + + if (mem != NULL_RTX + && !memory_address_addr_space_p (GET_MODE (mem), + XEXP (mem, 0), + MEM_ADDR_SPACE (mem))) + { + rtx addr = canonicalize_address (copy_rtx (XEXP (mem, 0))); + if (memory_address_addr_space_p (GET_MODE (mem), + addr, MEM_ADDR_SPACE (mem))) + ok = true; + } + } cancel_changes (0); return ok; } diff --git a/gcc/testsuite/gcc.target/i386/pr69052.c b/gcc/testsuite/gcc.target/i386/pr69052.c new file mode 100644 index 0000000..6f491e9 --- /dev/null +++ b/gcc/testsuite/gcc.target/i386/pr69052.c @@ -0,0 +1,54 @@ +/* { dg-do compile } */ +/* { dg-require-effective-target pie } */ +/* { dg-options "-O2 -fPIE -pie" } */ + +int look_nbits[256], loop_sym[256]; +const int ind[] = { + 0, 1, 8, 16, 9, 2, 3, 10, 17, 24, 32, 25, 18, 11, 4, 5, + 12, 19, 26, 33, 40, 48, 41, 34, 27, 20, 13, 6, 7, 14, 21, 28, + 35, 42, 49, 56, 57, 50, 43, 36, 29, 22, 15, 23, 30, 37, 44, 51, + 58, 59, 52, 45, 38, 31, 39, 46, 53, 60, 61, 54, 47, 55, 62, 63 +}; +int out[256]; +extern void bar (int *, int *); +void foo (int *l1, int *l2, int *v, int *v1, int *m1, int i) +{ + int L = i + 1, b = 20; + int result, k; + + for (k = 1; k < 64; k++) + { + int look = (((L >> (b - 8))) & ((1 << 8) - 1)); + int nb = l1[look]; + int code; + int r; + + if (nb) + { + b -= nb; + result = l2[look]; + } + else + { + nb = 9; + code = (((L >> (b -= nb))) & ((1 << nb) - 1)); + result = v[(code + v1[nb])]; + } + r = result >> 4; + result &= 15; + if (result) + { + k += r; + r = (((L >> (b -= result))) & ((1 << result) - 1)); + if (r < (1 << (result - 1))) + result = r + (((-1) << result) + 1); + else + result = r; + + out[ind[k]] = result; + } + bar (&L, &b); + } +} + +/* { dg-final { scan-assembler-not "leal\[ \t\]ind@GOTOFF\\(%\[^,\]*\\), %" { target ia32 } } } */