https://gcc.gnu.org/bugzilla/show_bug.cgi?id=113590
--- Comment #2 from Richard Biener <rguenth at gcc dot gnu.org> --- Move update to the latch: diff --git a/gcc/tree-vect-loop.cc b/gcc/tree-vect-loop.cc index edd7d4d8763..8b282019840 100644 --- a/gcc/tree-vect-loop.cc +++ b/gcc/tree-vect-loop.cc @@ -10808,7 +10808,9 @@ vectorizable_induction (loop_vec_info loop_vinfo, vec_def = gimple_build (&stmts, PLUS_EXPR, step_vectype, vec_def, up); vec_def = gimple_convert (&stmts, vectype, vec_def); - gsi_insert_seq_before (&si, stmts, GSI_SAME_STMT); + gimple_stmt_iterator si2 + = gsi_after_labels (loop_latch_edge (iv_loop)->src); + gsi_insert_seq_before (&si2, stmts, GSI_SAME_STMT); add_phi_arg (induction_phi, vec_def, loop_latch_edge (iv_loop), UNKNOWN_LOCATION); generates jmp .L6 .p2align 5 .p2align 4,,10 .p2align 3 .L4: paddd %xmm3, %xmm0 .L6: movdqa %xmm0, %xmm1 addq $16, %rax paddd %xmm2, %xmm1 movups %xmm1, -16(%rax) cmpq %rdx, %rax jne .L4 instead of .p2align 5 .p2align 4 .p2align 3 .L4: movdqa %xmm0, %xmm1 addq $16, %rax paddd %xmm2, %xmm0 paddd %xmm3, %xmm1 movups %xmm1, -16(%rax) cmpq %rdx, %rax jne .L4