https://bugs.llvm.org/show_bug.cgi?id=49296

            Bug ID: 49296
           Summary: [Regression] Suboptimal loop exit codegen
           Product: libraries
           Version: trunk
          Hardware: PC
                OS: Linux
            Status: NEW
          Severity: enhancement
          Priority: P
         Component: Loop Optimizer
          Assignee: unassignedb...@nondot.org
          Reporter: david.bolvan...@gmail.com
                CC: llvm-bugs@lists.llvm.org

#include <immintrin.h>
#include <stdint.h>

typedef uint32_t u32v8 __attribute__((vector_size(32)));
void double_load(int8_t *__restrict out, const int8_t *__restrict input)
{
    u32v8 *vin = (u32v8 *)input;
    u32v8 *vout = (u32v8 *)out;
    for (unsigned i=0 ; i<1024 ; i+=32){
        u32v8 in = *vin++;
        *vout++ = in | (in >> 4);
    }
}


Flags: -O3 -mavx2 -fno-unroll-loops

LLVM 10 and newer:
double_load(signed char*, signed char const*):                   #
@double_load(signed char*, signed char const*)
        xorl    %eax, %eax
.LBB0_1:                                # =>This Inner Loop Header: Depth=1
        vmovdqa (%rsi,%rax), %ymm0
        vpsrld  $4, %ymm0, %ymm1
        vpor    %ymm0, %ymm1, %ymm0
        vmovdqa %ymm0, (%rdi,%rax)
        addq    $32, %rax
        leal    -32(%rax), %ecx
        cmpl    $992, %ecx              # imm = 0x3E0
        jb      .LBB0_1
        vzeroupper
        retq


LLVM 9:
double_load(signed char*, signed char const*):                   #
@double_load(signed char*, signed char const*)
        xorl    %eax, %eax
.LBB0_1:                                # =>This Inner Loop Header: Depth=1
        vmovdqa (%rsi,%rax), %ymm0
        vpsrld  $4, %ymm0, %ymm1
        vpor    %ymm0, %ymm1, %ymm0
        vmovdqa %ymm0, (%rdi,%rax)
        addq    $32, %rax
        cmpl    $1024, %eax             # imm = 0x400
        jb      .LBB0_1
        vzeroupper
        retq

LLVM IR comparison:
LLVM 10+:
  %15 = add nuw nsw i32 %8, 32
  %16 = icmp ult i32 %8, 992

LLVM 9:
  %15 = add nuw nsw i32 %8, 32
  %16 = icmp ult i32 %15, 1024



Codegen: https://godbolt.org/z/GzaTPj

-- 
You are receiving this mail because:
You are on the CC list for the bug.
_______________________________________________
llvm-bugs mailing list
llvm-bugs@lists.llvm.org
https://lists.llvm.org/cgi-bin/mailman/listinfo/llvm-bugs

Reply via email to