Issue 142961
Summary [aarch64] gcc get better result than llvm for abs
Labels NEON
Assignees
Reporter vfdff
    * test: https://godbolt.org/z/cTcjvPE1z
```
#include <stdlib.h>

int square (unsigned char *pix1, unsigned char *pix2) {
    int sum=0;
    for (int i=0; i< 16; i++)
       sum += abs (pix1[i]-pix2[i]);
    return sum;
}

```

* gcc: uabdl2 + uabal + uaddlv
```
square:
        ldr q31, [x0]
        ldr     q30, [x1]
        uabdl2  v29.8h, v31.16b, v30.16b
        uabal   v29.8h, v31.8b, v30.8b
        uaddlv  s31, v29.8h
        fmov    w0, s31
        ret
```

* llvm:
```
square:
 ldr     q0, [x0]
        ldr     q1, [x1]
        uabd    v0.16b, v0.16b, v1.16b
        ushll2  v1.8h, v0.16b, #0
        ushll   v0.8h, v0.8b, #0
        uaddl2  v2.4s, v0.8h, v1.8h
        uaddl   v0.4s, v0.4h, v1.4h
        add     v0.4s, v0.4s, v2.4s
        addv    s0, v0.4s
 fmov    w0, s0
        ret
```

_______________________________________________
llvm-bugs mailing list
llvm-bugs@lists.llvm.org
https://lists.llvm.org/cgi-bin/mailman/listinfo/llvm-bugs

Reply via email to