https://gcc.gnu.org/bugzilla/show_bug.cgi?id=101445
--- Comment #4 from Richard Biener <rguenth at gcc dot gnu.org> --- Simplified testcase, fails at -O2 -ftree-loop-vectorize int a[35] = {1, 1, 3}; void __attribute__((noipa)) foo () { for (int b = 4; b >= 0; b--) { int tem = a[b * 5 + 3 + 1]; a[b * 5 + 3] = tem; a[b * 5 + 2] = tem; a[b * 5 + 1] = tem; a[b * 5 + 0] = tem; } } int main() { foo (); for (int d = 0; d < 25; d++) if (a[d] != 0) __builtin_abort (); return 0; } the load is vectorized in an odd way, but "correct" - but the final IV update(s) are bogus. <bb 2> vectp_a.7_34 = &a + 84; // &a[21] <bb 3> # vectp_a.6_35 = PHI <vectp_a.6_36(5), vectp_a.7_34(2)> ... vect_tem_9.8_37 = MEM <vector(4) int> [(int *)vectp_a.6_35]; vect_tem_9.9_38 = VEC_PERM_EXPR <vect_tem_9.8_37, vect_tem_9.8_37, { 3, 2, 1, 0 }>; vectp_a.6_39 = vectp_a.6_35 + 18446744073709551600; // -16 vect_tem_9.10_40 = MEM <vector(4) int> [(int *)vectp_a.6_39]; vect_tem_9.11_41 = VEC_PERM_EXPR <vect_tem_9.10_40, vect_tem_9.10_40, { 3, 2, 1, 0 }>; vectp_a.6_42 = vectp_a.6_39 + 18446744073709551604; // -12 vect_tem_9.12_43 = VEC_PERM_EXPR <vect_tem_9.9_38, vect_tem_9.9_38, { 0, 0, 0, 0 }>; ... vectp_a.6_36 = vectp_a.6_42 + 18446744073709551600; // -16 we're doing VMAT_CONTIGUOUS_REVERSE but the group has gaps and we fail to account for the reverse when computing group_gap_adj (which should have been +12, not -12).