or linearization?)

Marc Glisse Sun, 04 Nov 2018 12:54:01 -0800

(resent because of mail issues on my end)

On Mon, 22 Oct 2018, Thomas Schwinge wrote:

I had a quick look at the difference, and a[j][i] remains in this form
throughout optimization. If I write instead *((*(a+j))+i) = 0; I get

   j_10 = tmp_17 / 1025;
   i_11 = tmp_17 % 1025;
   _1 = (long unsigned int) j_10;
   _2 = _1 * 1025;
   _3 = (sizetype) i_11;
   _4 = _2 + _3;

or for a power of 2

   j_10 = tmp_17 >> 10;
   i_11 = tmp_17 & 1023;
   _1 = (long unsigned int) j_10;
   _2 = _1 * 1024;
   _3 = (sizetype) i_11;
   _4 = _2 + _3;

and in both cases we fail to notice that _4 = (sizetype) tmp_17; (at least
I think that's true).

So there are missing match.pd transformations in addition to whatever
scev/ivdep/other work is needed.


With a very simplistic "match.pd" rule (not yet any special cases
checking etc.):

diff --git gcc/match.pd gcc/match.pd
index b36d7ccb5dc3..4c23116308da 100644
--- gcc/match.pd
+++ gcc/match.pd
@@ -5126,3 +5126,28 @@ DEFINE_INT_AND_FLOAT_ROUND_FN (RINT)
                            { wide_int_to_tree (sizetype, off); })
              { swap_p ? @0 : @2; }))
            { rhs_tree; })))))))))
+
+/* Given:
+
+       j = in / N_I
+       i = in % N_I
+
+   ..., fold:
+
+       out = j * N_I + i
+
+   ..., into:
+
+       out = in
+*/
+
+/* As long as only considering N_I being INTEGER_CST (which are always second
+   argument?), probably don't need ":c" variants?  */
+
+(simplify
+ (plus:c
+  (mult:c
+   (trunc_div @0 INTEGER_CST@1)
+   INTEGER_CST@1)
+  (trunc_mod @0 INTEGER_CST@1))
+ (convert @0))


You should only specify INTEGER_CST@1 on the first occurence, the others
can be just @1. (you may be interested in @@1 at some point, but that
gets tricky)

..., the original code:

   int f1(int in)
   {
     int j = in / N_I;
     int i = in % N_I;

     int out = j * N_I + i;

     return out;
   }

... gets simplified from ("div-mod-0.c.027t.objsz1"):

   f1 (int in)
   {
     int out;
     int i;
     int j;
     int _1;
     int _6;

     <bb 2> :
     gimple_assign <trunc_div_expr, j_3, in_2(D), 100, NULL>
     gimple_assign <trunc_mod_expr, i_4, in_2(D), 100, NULL>
     gimple_assign <mult_expr, _1, j_3, 100, NULL>
     gimple_assign <plus_expr, out_5, i_4, _1, NULL>
     gimple_assign <ssa_name, _6, out_5, NULL, NULL>
     gimple_return <_6>

   }

... to ("div-mod-0.c.028t.ccp1"):

   f1 (int in)
   {
     int out;
     int i;
     int j;
     int _1;

     <bb 2> :
     gimple_assign <trunc_div_expr, j_3, in_2(D), 100, NULL>
     gimple_assign <trunc_mod_expr, i_4, in_2(D), 100, NULL>
     gimple_assign <mult_expr, _1, j_3, 100, NULL>
     gimple_return <in_2(D)>

   }

(The three dead "gimple_assign"s get eliminated later on.)

So, that works.

However, it doesn't work yet for the original construct that I'd ran
into, which looks like this:

   [...]
   int i;
   int j;
   [...]
   signed int .offset.5_2;
   [...]
   unsigned int .offset.7_23;
   unsigned int .iter.0_24;
   unsigned int _25;
   unsigned int _26;
   [...]
   unsigned int .iter.0_32;
   [...]

   <bb 9> :
   # gimple_phi <.offset.5_2, .offset.5_21(8), .offset.5_30(9)>
   gimple_assign <nop_expr, .offset.7_23, .offset.5_2, NULL, NULL>
   gimple_assign <plus_expr, .iter.0_24, 0, .offset.7_23, NULL>
   gimple_assign <trunc_div_expr, _25, .iter.0_24, 100, NULL>
   gimple_assign <trunc_mod_expr, _26, .iter.0_24, 100, NULL>
   gimple_assign <nop_expr, i_27, _26, NULL, NULL>
   gimple_assign <nop_expr, j_28, _25, NULL, NULL>
   gimple_assign <integer_cst, a[j_28][i_27], 123, NULL, NULL>
   [...]

Resolving the "a[j][i] = 123" we'll need to look into later.

As Marc noted above, with that changed into "*(*(a + j) + i) = 123", we
get:

   [...]
   int i;
   int j;
   long unsigned int _1;
   long unsigned int _2;
   sizetype _3;
   sizetype _4;
   sizetype _5;
   int * _6;
   [...]
   signed int .offset.5_8;
   [...]
   unsigned int .offset.7_29;
   unsigned int .iter.0_30;
   unsigned int _31;
   unsigned int _32;
   [...]

   <bb 9> :
   # gimple_phi <.offset.5_8, .offset.5_27(8), .offset.5_36(9)>
   gimple_assign <nop_expr, .offset.7_29, .offset.5_8, NULL, NULL>
   gimple_assign <plus_expr, .iter.0_30, 0, .offset.7_29, NULL>
   gimple_assign <trunc_div_expr, _31, .iter.0_30, 100, NULL>
   gimple_assign <trunc_mod_expr, _32, .iter.0_30, 100, NULL>
   gimple_assign <nop_expr, i_33, _32, NULL, NULL>
   gimple_assign <nop_expr, j_34, _31, NULL, NULL>
   gimple_assign <nop_expr, _1, j_34, NULL, NULL>
   gimple_assign <mult_expr, _2, _1, 100, NULL>
   gimple_assign <nop_expr, _3, i_33, NULL, NULL>
   gimple_assign <plus_expr, _4, _2, _3, NULL>
   gimple_assign <mult_expr, _5, _4, 4, NULL>
   gimple_assign <pointer_plus_expr, _6, &a, _5, NULL>
   gimple_assign <integer_cst, *_6, 123, NULL, NULL>
   [...]

Here, unless I'm confused, "_4" is supposed to be equal to ".iter.0_30",
but "match.pd" doesn't agree yet.  Note the many "nop_expr"s here, which
I have not yet figured out how to handle, I suppose?  I tried some things
but couldn't get it to work.  Apparently the existing instances of
"(match (nop_convert @0)" and "Basic strip-useless-type-conversions /
strip_nops" rule also don't handle these; should they?  Or, are in fact
here the types mixed up too much?


"(match (nop_convert @0)" defines a shortcut so some transformations can
use nop_convert to detect some specific conversions, but it doesn't do
anything by itself. "Basic strip-useless-type-conversions" strips
conversions that are *useless*, essentially from a type to the same
type. If you want to handle true conversions, you need to do that
explicitly, see the many transformations that use convert? convert1?
convert2? and specify for which particular conversions the
transformation is valid.  Finding out the right conditions to detect
these conversions is often the most painful part of writing a match.pd
transformation.

I hope to get some time again soon to continue looking into this, but if
anybody got any ideas, I'm all ears.


--
Marc Glisse

Re: "match.pd" (was: Can support TRUNC_DIV_EXPR, TRUNC_MOD_EXPR in GCC vectorization/scalar evolution -- and/or linearization?)

Reply via email to