Issue 120034
Summary [NEON] Load Multiple Increment After Skipped in Inlined Code
Labels new issue
Assignees
Reporter ytw16
    The function add_arrays() is directly compiled using the Load Multiple Increment After instructions (e.g., vldmia and vstmia). However, when add_arrays() is inlined into the parent function during compilation, the corresponding loop in the parent function does not utilize Load Multiple Increment After instructions. Instead, it generates individual adds and vldr/vstr instructions, resulting in less efficient code.

Example:

```
#define SIZE 1000000

double c[SIZE];

void add_arrays(double *a, double *b, double *c, int size)  {
    for (int i = 0; i < size; i++) {
        c[i] = a[i] + b[i];
    }
}

int main() {
 double a[SIZE];
    double b[SIZE];
    for (int i = 0; i < SIZE; i++) {
 a[i] = i * 1.0;
        b[i] = i * 2.0;
    }

    add_arrays(a, b, c, SIZE);
    return 0;
}

```

The disassemble codes:
```
00000000 <add_arrays>:
   0:   2b01            cmp     r3, #1
   2:   bfb8 it      lt
   4:   4770            bxlt    lr
   6:   ecf1 0b02 vldmia  r1!, {d16}
   a:   3b01            subs    r3, #1
   c:   ecf0 1b02 vldmia  r0!, {d17}
  10:   ee71 0ba0       vadd.f64        d16, d17, d16
  14:   ece2 0b02       vstmia  r2!, {d16}
  18:   d1f5 bne.n   6 <add_arrays+0x6>
  1a:   4770            bx      lr

0000001c <main>:
  1c:   b5b0            push    {r4, r5, r7, lr}
  1e:   af02 add     r7, sp, #8
  20:   f5ad 0d74       sub.w   sp, sp, #15990784 @ 0xf40000
  24:   f5ad 5d10       sub.w   sp, sp, #9216   @ 0x2400
 28:   efc0 1010       vmov.i32        d17, #0 @ 0x00000000
  2c:   f241 2e00 movw    lr, #4608       @ 0x1200
  30:   f50d 5090       add.w   r0, sp, #4608   @ 0x1200
  34:   2300            movs    r3, #0
  36:   f2c0 0e7a       movt    lr, #122        @ 0x7a
  3a:   f500 01f4       add.w r1, r0, #7995392        @ 0x7a0000
  3e:   466a            mov     r2, sp
 40:   eef7 0b00       vmov.f64        d16, #112       @ 0x3f800000  1.0
 44:   ee71 3ba0       vadd.f64        d19, d17, d16
  48:   18c8 adds    r0, r1, r3
  4a:   ee71 2ba1       vadd.f64        d18, d17, d17
 4e:   edc0 1b00       vstr    d17, [r0]
  52:   18d0            adds    r0, r2, r3
  54:   3308            adds    r3, #8
  56:   459e            cmp lr, r3
  58:   eef0 1b63       vmov.f64        d17, d19
  5c:   edc0 2b00 vstr    d18, [r0]
  60:   d1f0            bne.n   44 <main+0x28>
 62:   f240 0c00       movw    ip, #0
  66:   2300            movs    r3, #0
  68:   f2c0 0c00       movt    ip, #0
  6c:   18d0            adds r0, r2, r3
  6e:   edd0 0b00       vldr    d16, [r0]
  72:   18c8 adds    r0, r1, r3
  74:   edd0 1b00       vldr    d17, [r0]
  78:   eb0c 0003       add.w   r0, ip, r3
  7c:   3308            adds    r3, #8
  7e: ee71 0ba0       vadd.f64        d16, d17, d16
  82:   459e            cmp lr, r3
  84:   edc0 0b00       vstr    d16, [r0]
  88:   d1f0 bne.n   6c <main+0x50>
  8a:   2000            movs    r0, #0
  8c:   f50d 0d74       add.w   sp, sp, #15990784       @ 0xf40000
  90:   f50d 5d10 add.w   sp, sp, #9216   @ 0x2400
  94:   bdb0            pop     {r4, r5, r7, pc}
```
_______________________________________________
llvm-bugs mailing list
llvm-bugs@lists.llvm.org
https://lists.llvm.org/cgi-bin/mailman/listinfo/llvm-bugs

Reply via email to