[llvm-bugs] [Bug 27988] New: Performance regression with r271410

via llvm-bugs Fri, 03 Jun 2016 04:02:27 -0700

https://llvm.org/bugs/show_bug.cgi?id=27988


            Bug ID: 27988
           Summary: Performance regression with r271410
           Product: libraries
           Version: trunk
          Hardware: PC
                OS: Linux
            Status: NEW
          Severity: normal
          Priority: P
         Component: Loop Optimizer
          Assignee: unassignedb...@nondot.org
          Reporter: arnaud.degrandmai...@arm.com
                CC: llvm-bugs@lists.llvm.org
    Classification: Unclassified

Created attachment 16458
  --> https://llvm.org/bugs/attachment.cgi?id=16458&action=edit
Reproducer

Commit r271410 significantly regresses some common industry benchmarks, at
least for the ARM platforms, but the reproducer below shows also that codegen
for x86 does not look better at all.

With the following testcase:

> cat test.c
#define LEN 10000
#define ALIGNMENT 16

__attribute__((aligned(ALIGNMENT))) float a[LEN];
__attribute__((aligned(ALIGNMENT))) float b[LEN];

extern int dummy(float *, float *);

int s173() {

  int k = LEN / 2;
  for (int i = 0; i < LEN / 2; i++) {
    a[i + k] = a[i] + b[i];
  }
  return dummy(a, b);
}

On the AArch64 target, we get:
(with r271410)
> clang -target arm64-linux-gnu -O2 -S -o - test.c
...
.LBB0_1:
    fmov    x11, d2
    lsl    x11, x11, #2
    add    x12, x9, x11
    add    v3.2d, v2.2d, v1.2d
    ldr    q2, [x12]
    ldr    q4, [x10, x11]
    add    v0.2d, v0.2d, v1.2d
    sub    x8, x8, #4
    fadd    v2.4s, v2.4s, v4.4s
    str    q2, [x12, #20000]
    mov    v2.16b, v3.16b
    cbnz    x8, .LBB0_1
...

(with r271410 reverted)
> clang -target arm64-linux-gnu -O2 -S -o - test.c
...
.LBB0_1:
    add    x11, x9, x8
    add    x12, x10, x8
    ldr    q0, [x11, #20000]
    ldr    q1, [x12, #20000]
    add    x8, x8, #16
    fadd    v0.4s, v0.4s, v1.4s
    str    q0, [x11, #40000]
    cbnz    x8, .LBB0_1
...


It seems the generate code is also regressed on x86:
(with r271410)
> clang -O2 -S -o - test.c
...
.LBB0_1:
    movdqa    %xmm0, %xmm4
    paddq    %xmm2, %xmm4
    movd    %xmm0, %rcx
    movups    a(,%rcx,4), %xmm5
    movups    b(,%rcx,4), %xmm6
    addps    %xmm5, %xmm6
    movups    %xmm6, a+20000(,%rcx,4)
    paddq    %xmm3, %xmm0
    paddq    %xmm3, %xmm1
    movd    %xmm4, %rcx
    movups    a(,%rcx,4), %xmm4
    movups    b(,%rcx,4), %xmm5
    addps    %xmm4, %xmm5
    movups    %xmm5, a+20000(,%rcx,4)
    addq    $-8, %rax
    jne    .LBB0_1
...

(with r271410 reverted)
> clang -O2 -S -o - test.c
...
.LBB0_1:
    movaps    a+20000(%rax), %xmm0
    addps    b+20000(%rax), %xmm0
    movaps    %xmm0, a+40000(%rax)
    movaps    a+20016(%rax), %xmm0
    addps    b+20016(%rax), %xmm0
    movaps    %xmm0, a+40016(%rax)
    addq    $32, %rax
    jne    .LBB0_1
...

-- 
You are receiving this mail because:
You are on the CC list for the bug.

_______________________________________________
llvm-bugs mailing list
llvm-bugs@lists.llvm.org
http://lists.llvm.org/cgi-bin/mailman/listinfo/llvm-bugs

[llvm-bugs] [Bug 27988] New: Performance regression with r271410

Reply via email to