Hi! The following testcase is miscompiled, because prepare_shrink_wrap attempts to copyprop_hardreg_forward_1 the first bb. We see DImode rbx being copied to DImode r11, and then we have (dead since postreload) an assignment of SImode r11d to SImode ebx, and later on some uses of DImode r11. copyprop_hardreg_forward_1 notes the oldest reg is rbx, and replaces in the SImode move r11d with ebx (that is fine), and later on the DImode uses of r11 with rbx. The problem is that we now have a DImode rbx setter, then SImode noop move of ebx to ebx (which by definition means the upper bits are undefined), and then DImode rbx use. If the noop move is DCEd soon enough, that wouldn't be a problem, but before that happens, regrename is performed and we get the last use of DImode rbx replaced with rcx and the ebx, ebx noop move changed int ecx = ebx SImode move. That of course doesn't work.
The problem is that copyprop_hardreg_forward_1 ignores noop_p moves (most likely in the hope that they will be soon optimized away). That is fine if they use as wide mode as we have recorded for the register, but if it is narrower, we can choose to either remove the noop move, or adjust the regcprop data structure. The patch below chooses to do both of these, the first one if DCE would remove the noop move, the latter otherwise. Bootstrapped/regtested on x86_64-linux and i686-linux, ok for trunk? 2016-02-22 Jakub Jelinek <ja...@redhat.com> PR rtl-optimization/69896 * regcprop.c: Include cfgrtl.h. (copyprop_hardreg_forward_1): If noop_p insn uses narrower than remembered mode, either delete it (if noop_move_p), or treat like copy_p but not noop_p instruction. * gcc.dg/pr69896.c: New test. --- gcc/regcprop.c.jj 2016-01-04 14:55:53.000000000 +0100 +++ gcc/regcprop.c 2016-02-22 16:47:49.587425032 +0100 @@ -32,6 +32,7 @@ #include "addresses.h" #include "tree-pass.h" #include "rtl-iter.h" +#include "cfgrtl.h" /* The following code does forward propagation of hard register copies. The object is to eliminate as many dependencies as possible, so that @@ -739,9 +740,9 @@ static bool copyprop_hardreg_forward_1 (basic_block bb, struct value_data *vd) { bool anything_changed = false; - rtx_insn *insn; + rtx_insn *insn, *next; - for (insn = BB_HEAD (bb); ; insn = NEXT_INSN (insn)) + for (insn = BB_HEAD (bb); ; insn = next) { int n_ops, i, predicated; bool is_asm, any_replacements; @@ -751,6 +752,7 @@ copyprop_hardreg_forward_1 (basic_block bool changed = false; struct kill_set_value_data ksvd; + next = NEXT_INSN (insn); if (!NONDEBUG_INSN_P (insn)) { if (DEBUG_INSN_P (insn)) @@ -1042,6 +1044,23 @@ copyprop_hardreg_forward_1 (basic_block bool noop_p = (copy_p && rtx_equal_p (SET_DEST (set), SET_SRC (set))); + /* If a noop move is using narrower mode than we have recorded, + we need to either remove the noop move, or kill_set_value. */ + if (noop_p + && (GET_MODE_BITSIZE (GET_MODE (SET_DEST (set))) + < GET_MODE_BITSIZE (vd->e[REGNO (SET_DEST (set))].mode))) + { + if (noop_move_p (insn)) + { + bool last = insn == BB_END (bb); + delete_insn (insn); + if (last) + break; + } + else + noop_p = false; + } + if (!noop_p) { /* Notice stores. */ --- gcc/testsuite/gcc.dg/pr69896.c.jj 2016-02-22 17:02:53.219068093 +0100 +++ gcc/testsuite/gcc.dg/pr69896.c 2016-02-22 16:59:43.000000000 +0100 @@ -0,0 +1,33 @@ +/* PR rtl-optimization/69896 */ +/* { dg-do run { target int128 } } */ +/* { dg-options "-w -O -fcaller-saves -fno-dse -frename-registers -fno-tree-ter" } */ +/* { dg-additional-options "-mno-sse" { target x86_64-*-* i?86-*-* } } */ + +typedef unsigned short A; +typedef unsigned short B __attribute__ ((vector_size (32))); +typedef unsigned int C; +typedef unsigned int D __attribute__ ((vector_size (32))); +typedef unsigned long long E; +typedef unsigned long long F __attribute__ ((vector_size (32))); +typedef unsigned __int128 G; +typedef unsigned __int128 H __attribute__ ((vector_size (32))); + +G __attribute__ ((noinline, noclone)) +foo (A a, C b, E c, G d, A e, C f, E g, G h, B i, D j, F k, H l, B m, D n, F o, H p) +{ + j /= (D) { -c, -c, ~h, 1, ~l[0], -m[0], p[0], 1} | 1; + l %= (H) o | 1; + l[1] = (l[1] << (e & 127)) | (l[1] >> (e & 127)); + return j[6] + l[0] + l[1] + n[7] + o[0] + o[2] + o[3] + p[0] + p[1]; +} + +int +main () +{ + if (__CHAR_BIT__ != 8 || sizeof (A) != 2 || sizeof (C) != 4 || sizeof (E) != 8 || sizeof (G) != 16) + return 0; + G x = foo (0, 1, 2, 3, 4, 5, 6, 7, (B) {}, (D) {}, (F) {}, (H) {}, (B) {}, (D) {}, (F) {}, (H) { 0xffffffffffffffffULL, 0x74a3e4aULL }); + if ((E) x != 0x00000000074a3e49ULL || (E) (x >> 64) != 1) + __builtin_abort (); + return 0; +} Jakub