https://gcc.gnu.org/bugzilla/show_bug.cgi?id=113590

--- Comment #2 from Richard Biener <rguenth at gcc dot gnu.org> ---
Move update to the latch:

diff --git a/gcc/tree-vect-loop.cc b/gcc/tree-vect-loop.cc
index edd7d4d8763..8b282019840 100644
--- a/gcc/tree-vect-loop.cc
+++ b/gcc/tree-vect-loop.cc
@@ -10808,7 +10808,9 @@ vectorizable_induction (loop_vec_info loop_vinfo,
          vec_def = gimple_build (&stmts,
                                  PLUS_EXPR, step_vectype, vec_def, up);
          vec_def = gimple_convert (&stmts, vectype, vec_def);
-         gsi_insert_seq_before (&si, stmts, GSI_SAME_STMT);
+         gimple_stmt_iterator si2
+           = gsi_after_labels (loop_latch_edge (iv_loop)->src);
+         gsi_insert_seq_before (&si2, stmts, GSI_SAME_STMT);
          add_phi_arg (induction_phi, vec_def, loop_latch_edge (iv_loop),
                       UNKNOWN_LOCATION);


generates

        jmp     .L6
        .p2align 5
        .p2align 4,,10
        .p2align 3
.L4:
        paddd   %xmm3, %xmm0
.L6:
        movdqa  %xmm0, %xmm1
        addq    $16, %rax
        paddd   %xmm2, %xmm1
        movups  %xmm1, -16(%rax)
        cmpq    %rdx, %rax
        jne     .L4

instead of

        .p2align 5
        .p2align 4
        .p2align 3
.L4:
        movdqa  %xmm0, %xmm1
        addq    $16, %rax
        paddd   %xmm2, %xmm0
        paddd   %xmm3, %xmm1
        movups  %xmm1, -16(%rax)
        cmpq    %rdx, %rax
        jne     .L4

Reply via email to