Hi, The only thing that graphite modifies is from canonicalize_loop_ivs: here is the diff between "1" that is the debug_loops (3) before graphite and "2" that is after graphite.
--- 1 2009-03-02 12:20:03.000000000 -0600 +++ 2 2009-03-02 12:20:18.000000000 -0600 @@ -23,6 +23,8 @@ bb_4 (preds = {bb_3 }, succs = {bb_10 }) { <bb 4>: + D.1655_27 = (unsigned int) width_12(D); + D.1656_25 = D.1655_27 + 4294967295; goto <bb 10>; } @@ -90,6 +92,8 @@ bb_11 (preds = {bb_10 }, succs = {bb_6 }) { <bb 11>: + D.1652_24 = (unsigned int) num_comp_14(D); + D.1653_6 = D.1652_24 + 4294967295; goto <bb 6>; } @@ -98,16 +102,18 @@ bb_5 (preds = {bb_6 }, succs = {bb_6 }) { <bb 5>: + ivtmp.25_2 = ivtmp.25_4 + 1; } bb_6 (preds = {bb_5 bb_11 }, succs = {bb_5 bb_7 }) { <bb 6>: - # in_47 = PHI <in_17(5), in_21(11)> - # out_48 = PHI <out_16(5), out_32(11)> - # i_49 = PHI <i_18(5), 0(11)> # SMT.10_50 = PHI <SMT.10_30(5), SMT.10_34(11)> # SMT.11_52 = PHI <SMT.11_31(5), SMT.11_35(11)> + # ivtmp.25_4 = PHI <ivtmp.25_2(5), 0(11)> + in_47 = in_21 + ivtmp.25_4; + out_48 = out_32 + ivtmp.25_4; + i_49 = (int) ivtmp.25_4; # VUSE <SMT.10_50, SMT.11_52> { SMT.10 SMT.11 } D.1617_15 = *in_47; # SMT.10_30 = VDEF <SMT.10_50> @@ -116,7 +122,7 @@ out_16 = out_48 + 1; in_17 = in_47 + 1; i_18 = i_49 + 1; - if (num_comp_14(D) > i_18) + if (ivtmp.25_4 < D.1653_6) goto <bb 5>; else goto <bb 7>; The fail is in RTL expand in copy_to_mode_reg: gcc_assert (GET_MODE (x) == mode || GET_MODE (x) == VOIDmode); (gdb) p x->mode $16 = SImode (gdb) p mode $17 = DImode It looks like a type problem for the condition to be expanded: (gdb) p exp $18 = (tree) 0x7fa9967d2e00 (gdb) pgs ivtmp.25 < D.1653; So after figuring out that canonicalize_loop_ivs does compute the largest precision for all the phi nodes of the loop, such that the new induction variable can represent all the values of the old IVs, i.e: for (psi = gsi_start_phis (loop->header); !gsi_end_p (psi); gsi_next (&psi)) { phi = gsi_stmt (psi); res = PHI_RESULT (phi); if (is_gimple_reg (res) && TYPE_PRECISION (TREE_TYPE (res)) > precision) precision = TYPE_PRECISION (TREE_TYPE (res)); } type = lang_hooks.types.type_for_size (precision, 1); it does not fold_convert the number of iterations to this new type, and thus we end up building a condition with two different precision types: 32 for niter and 64 for the new IV. Attached is a fix for this problem, and the diff between 1 before and 5 after graphite looks like this: --- 1 2009-03-02 12:20:03.000000000 -0600 +++ 5 2009-03-02 12:54:27.000000000 -0600 @@ -23,6 +23,8 @@ bb_4 (preds = {bb_3 }, succs = {bb_10 }) { <bb 4>: + D.1656_25 = (unsigned int) width_12(D); + D.1657_5 = D.1656_25 + 4294967295; goto <bb 10>; } @@ -90,6 +92,9 @@ bb_11 (preds = {bb_10 }, succs = {bb_6 }) { <bb 11>: + D.1652_24 = (unsigned int) num_comp_14(D); + D.1653_6 = D.1652_24 + 4294967295; + D.1654_4 = (long unsigned int) D.1653_6; goto <bb 6>; } @@ -98,16 +103,18 @@ bb_5 (preds = {bb_6 }, succs = {bb_6 }) { <bb 5>: + ivtmp.25_27 = ivtmp.25_2 + 1; } bb_6 (preds = {bb_5 bb_11 }, succs = {bb_5 bb_7 }) { <bb 6>: - # in_47 = PHI <in_17(5), in_21(11)> - # out_48 = PHI <out_16(5), out_32(11)> - # i_49 = PHI <i_18(5), 0(11)> # SMT.10_50 = PHI <SMT.10_30(5), SMT.10_34(11)> # SMT.11_52 = PHI <SMT.11_31(5), SMT.11_35(11)> + # ivtmp.25_2 = PHI <ivtmp.25_27(5), 0(11)> + in_47 = in_21 + ivtmp.25_2; + out_48 = out_32 + ivtmp.25_2; + i_49 = (int) ivtmp.25_2; # VUSE <SMT.10_50, SMT.11_52> { SMT.10 SMT.11 } D.1617_15 = *in_47; # SMT.10_30 = VDEF <SMT.10_50> @@ -116,7 +123,7 @@ out_16 = out_48 + 1; in_17 = in_47 + 1; i_18 = i_49 + 1; - if (num_comp_14(D) > i_18) + if (ivtmp.25_2 < D.1654_4) goto <bb 5>; else goto <bb 7>; Sebastian Pop -- AMD - GNU Tools
Index: tree-parloops.c =================================================================== --- tree-parloops.c (revision 144544) +++ tree-parloops.c (working copy) @@ -1338,6 +1338,7 @@ canonicalize_loop_ivs (struct loop *loop affine_iv iv; edge exit = single_dom_exit (loop); struct reduction_info *red; + gimple_seq stmts; for (psi = gsi_start_phis (loop->header); !gsi_end_p (psi); gsi_next (&psi)) @@ -1351,6 +1352,11 @@ canonicalize_loop_ivs (struct loop *loop type = lang_hooks.types.type_for_size (precision, 1); + nit = fold_convert (type, nit); + nit = force_gimple_operand (nit, &stmts, true, NULL_TREE); + if (stmts) + gsi_insert_seq_on_edge_immediate (loop_preheader_edge (loop), stmts); + gsi = gsi_last_bb (loop->latch); create_iv (build_int_cst_type (type, 0), build_int_cst (type, 1), NULL_TREE, loop, &gsi, true, &var_before, NULL);