Issue |
149155
|
Summary |
Missed optimization when copying large object to multiple targets
|
Labels |
new issue
|
Assignees |
|
Reporter |
Alcaro
|
```c
#include <string.h>
struct S
{
alignas(32) char data[176];
};
typedef struct S S;
void copy1(S* dest1, S* dest2, const S* src)
{
memcpy(dest1->data, src->data, 1);
memcpy(dest2->data, src->data, 1);
}
void copy8(S* dest1, S* dest2, const S* src)
{
memcpy(dest1->data, src->data, 8);
memcpy(dest2->data, src->data, 8);
}
void copy16(S* dest1, S* dest2, const S* src)
{
memcpy(dest1->data, src->data, 16);
memcpy(dest2->data, src->data, 16);
}
void copy16b(S* restrict dest1, S* restrict dest2, const S* restrict src)
{
memcpy(dest1->data, src->data, 16);
memcpy(dest2->data, src->data, 16);
}
```
-O2
The pointers can't overlap non-exactly, because the member is aligned to greater than the copy's size (and the pointers are tagged restrict in the last one), so a single appropriately-sized read of src->data is sufficient.
Expected:
```
copy1:
movzx eax, byte ptr [rdx]
mov byte ptr [rdi], al
mov byte ptr [rsi], al
ret
copy8:
mov rax, qword ptr [rdx]
mov qword ptr [rdi], rax
mov qword ptr [rsi], rax
ret
copy16:
movaps xmm0, xmmword ptr [rdx]
movaps xmmword ptr [rdi], xmm0
movaps xmmword ptr [rsi], xmm0
ret
copy16b:
movaps xmm0, xmmword ptr [rdx]
movaps xmmword ptr [rdi], xmm0
movaps xmmword ptr [rsi], xmm0
ret
```
Actual:
```
copy1:
movzx eax, byte ptr [rdx]
mov byte ptr [rdi], al
mov byte ptr [rsi], al
ret
copy8:
mov rax, qword ptr [rdx]
mov qword ptr [rdi], rax
mov qword ptr [rsi], rax
ret
copy16:
movaps xmm0, xmmword ptr [rdx]
movaps xmmword ptr [rdi], xmm0
movaps xmm0, xmmword ptr [rdx]
movaps xmmword ptr [rsi], xmm0
ret
copy16b:
movaps xmm0, xmmword ptr [rdx]
movaps xmmword ptr [rdi], xmm0
movaps xmm0, xmmword ptr [rdx]
movaps xmmword ptr [rsi], xmm0
ret
```
https://godbolt.org/z/fKr8E16zd
_______________________________________________
llvm-bugs mailing list
llvm-bugs@lists.llvm.org
https://lists.llvm.org/cgi-bin/mailman/listinfo/llvm-bugs