Issue 122689
Summary Unnecessary call to `memset` when initializing an array of structs with non zero member initialization.
Labels new issue
Assignees
Reporter gchatelet
    [godbolt link](https://godbolt.org/z/h39sa6fnv)

```
struct T {
  int a = 1;
 int b = 0;
};

template <int n>
struct S { T a[n]; };

S<75> F() { return {}; }
```

Compiled with `-O3 -std=c++20 -DNDEBUG -fno-exceptions -march=skylake` generates the following assembly
```
.LCPI0_0:
 .long   1
        .long   0
F():
        pushq   %rbx
        movq %rdi, %rbx
        movl    $600, %edx
        xorl    %esi, %esi
 callq   memset@PLT
        vbroadcastsd    .LCPI0_0(%rip), %ymm0
 vmovups %ymm0, 32(%rbx)
        vmovups %ymm0, (%rbx)
        vmovups %ymm0, 96(%rbx)
        vmovups %ymm0, 64(%rbx)
        vmovups %ymm0, 160(%rbx)
        vmovups %ymm0, 128(%rbx)
        vmovups %ymm0, 224(%rbx)
        vmovups %ymm0, 192(%rbx)
        vmovups %ymm0, 288(%rbx)
        vmovups %ymm0, 256(%rbx)
        vmovups %ymm0, 352(%rbx)
        vmovups %ymm0, 320(%rbx)
        vmovups %ymm0, 416(%rbx)
        vmovups %ymm0, 384(%rbx)
        vmovups %ymm0, 480(%rbx)
        vmovups %ymm0, 448(%rbx)
        vmovups %ymm0, 544(%rbx)
        vmovups %ymm0, 512(%rbx)
        vmovups %xmm0, 576(%rbx)
        movq    $1, 592(%rbx)
        movq    %rbx, %rax
 popq    %rbx
        vzeroupper
        retq
```

The compiler first clears `S<75>` content with a call to `memset` and then sets its content through an unrolled loop of YMM stores.
If using `S<74>` instead of `S<75>` the call to `memset` goes away.

Apparently the clearing part  is created in the frontend (clang), here is the LLVM IR with `-O0`:
```
%struct.S = type { [75 x %struct.T] }
%struct.T = type { i32, i32 }

define dso_local void @F()(ptr dead_on_unwind noalias writable sret(%struct.S) align 4 %agg.result) {
entry:
  call void @llvm.memset.p0.i64(ptr align 4 %agg.result, i8 0, i64 592, i1 false)
  %a = getelementptr inbounds nuw %struct.S, ptr %agg.result, i32 0, i32 0
  %arrayinit.end = getelementptr inbounds %struct.T, ptr %a, i64 75
  br label %arrayinit.body

arrayinit.body:
  %arrayinit.cur = phi ptr [ %a, %entry ], [ %arrayinit.next, %arrayinit.body ]
  %a1 = getelementptr inbounds nuw %struct.T, ptr %arrayinit.cur, i32 0, i32 0
  store i32 1, ptr %a1, align 4
  %b = getelementptr inbounds nuw %struct.T, ptr %arrayinit.cur, i32 0, i32 1
  store i32 0, ptr %b, align 4
  %arrayinit.next = getelementptr inbounds %struct.T, ptr %arrayinit.cur, i64 1
  %arrayinit.done = icmp eq ptr %arrayinit.next, %arrayinit.end
  br i1 %arrayinit.done, label %arrayinit.end2, label %arrayinit.body

arrayinit.end2:
  ret void
}

declare void @llvm.memset.p0.i64(ptr nocapture writeonly, i8, i64, i1 immarg) #1
```
_______________________________________________
llvm-bugs mailing list
llvm-bugs@lists.llvm.org
https://lists.llvm.org/cgi-bin/mailman/listinfo/llvm-bugs

Reply via email to