https://bugs.llvm.org/show_bug.cgi?id=43899
Bug ID: 43899
Summary: Incomplete optimization during loop vectorization on
large arrays.
Product: libraries
Version: trunk
Hardware: PC
OS: All
Status: NEW
Severity: enhancement
Priority: P
Component: Loop Optimizer
Assignee: unassignedb...@nondot.org
Reporter: a.rain...@gmail.com
CC: llvm-bugs@lists.llvm.org
I use x86-64 trunk or 9.0 version with -Ofast
The example functions creates different code for same array and loop size, this
is a bug because loops are identical.
For comparsion asm codes I create the example: https://godbolt.org/z/YgKRZO
#include <cstddef>
#include <cstdint>
#include <array>
typedef
int64_t
my_c_arr[1024 * 1024 * 1024];
typedef
std::array<int64_t, 1024 * 1024 * 1024>
my_arr;
void compute_1(my_c_arr& input)
{
for (auto i: input)
{
input[i] = (input[i] + 3254) * 3;
}
}
void compute_2(my_arr& input)
{
for (auto i: input)
{
input[i] = (input[i] + 3254) * 3;
}
}
void compute_3(my_arr& input)
{
for (auto i = input.begin(); i != input.cend(); ++i)
{
*i = (*i + 3254) * 3;
}
}
all compute_1... creates asm:
compute_1(long (&) [1073741824]): # @compute_1(long (&) [1073741824])
movabs rax, 8589934592
add rax, rdi
mov rcx, rdi
.LBB0_1: # =>This Inner Loop Header: Depth=1
mov rdx, qword ptr [rcx]
mov rsi, qword ptr [rdi + 8*rdx]
lea rsi, [rsi + 2*rsi]
add rsi, 9762
mov qword ptr [rdi + 8*rdx], rsi
mov rdx, qword ptr [rcx + 8]
mov rsi, qword ptr [rdi + 8*rdx]
lea rsi, [rsi + 2*rsi + 9762]
mov qword ptr [rdi + 8*rdx], rsi
mov rdx, qword ptr [rcx + 16]
mov rsi, qword ptr [rdi + 8*rdx]
lea rsi, [rsi + 2*rsi + 9762]
mov qword ptr [rdi + 8*rdx], rsi
mov rdx, qword ptr [rcx + 24]
mov rsi, qword ptr [rdi + 8*rdx]
lea rsi, [rsi + 2*rsi]
add rsi, 9762
mov qword ptr [rdi + 8*rdx], rsi
add rcx, 32
cmp rcx, rax
jne .LBB0_1
ret
void compute_10(my_c_arr& input)
{
for (auto i = 0; i != sizeof(input) / sizeof(input[0]); ++i)
{
input[i] = (input[i] + 3254) * 3;
}
}
void compute_11(my_arr& input)
{
for (auto i = 0; i != input.size(); ++i)
{
input[i] = (input[i] + 3254) * 3;
}
}
all of compute_1*... creates this asm code:
compute_10(long (&) [1073741824]): # @compute_10(long (&) [1073741824])
xor eax, eax
.LBB0_1: # =>This Inner Loop Header: Depth=1
mov rcx, qword ptr [rdi + 8*rax]
mov rdx, qword ptr [rdi + 8*rax + 8]
lea rcx, [rcx + 2*rcx]
add rcx, 9762
mov qword ptr [rdi + 8*rax], rcx
lea rcx, [rdx + 2*rdx + 9762]
mov qword ptr [rdi + 8*rax + 8], rcx
mov rcx, qword ptr [rdi + 8*rax + 16]
lea rcx, [rcx + 2*rcx + 9762]
mov qword ptr [rdi + 8*rax + 16], rcx
mov rcx, qword ptr [rdi + 8*rax + 24]
lea rcx, [rcx + 2*rcx]
add rcx, 9762
mov qword ptr [rdi + 8*rax + 24], rcx
add rax, 4
cmp rax, 1073741824
jne .LBB0_1
ret
--
You are receiving this mail because:
You are on the CC list for the bug.
_______________________________________________
llvm-bugs mailing list
llvm-bugs@lists.llvm.org
https://lists.llvm.org/cgi-bin/mailman/listinfo/llvm-bugs