https://gcc.gnu.org/bugzilla/show_bug.cgi?id=112992

--- Comment #5 from Hongtao Liu <liuhongt at gcc dot gnu.org> ---
(In reply to Roger Sayle from comment #0)
> The following four functions should in theory all produce the same code:
> 
> typedef unsigned long long v4di __attribute((vector_size(32)));
> typedef unsigned int v8si __attribute((vector_size(32)));
> typedef unsigned short v16hi __attribute((vector_size(32)));
> typedef unsigned char v32qi __attribute((vector_size(32)));
> 
> #define MASK  0x01010101
> #define MASKL 0x0101010101010101ULL
> #define MASKS 0x0101
> 
> v4di fooq() {
>   return (v4di){MASKL,MASKL,MASKL,MASKL};
> }
> 
> v8si food() {
>   return (v8si){MASK,MASK,MASK,MASK,MASK,MASK,MASK,MASK};
> }
> 
> v16hi foow() {
>   return (v16hi){MASKS,MASKS,MASKS,MASKS,MASKS,MASKS,MASKS,MASKS,
>                  MASKS,MASKS,MASKS,MASKS,MASKS,MASKS,MASKS,MASKS};
> }
> 
> v32qi foob() {
>   return (v32qi){1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,
>                  1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1};
> }
> 
> On x86_64 with -mavx, we currently produce very different implementations:
> 
> fooq:
>         movabs  rax, 72340172838076673
>         push    rbp
>         mov     rbp, rsp
>         and     rsp, -32
>         mov     QWORD PTR [rsp-8], rax
>         vbroadcastsd    ymm0, QWORD PTR [rsp-8]
>         leave
>         ret
> food:
>         vbroadcastss    ymm0, DWORD PTR .LC2[rip]
>         ret
> foow:
>         vmovdqa ymm0, YMMWORD PTR .LC3[rip]
>         ret
> foob:
>         vmovdqa ymm0, YMMWORD PTR .LC4[rip]
>         ret
> 
> clang currently produces the vbroadcastss for all four.
I guess here, you mean .rodata optimization, not sure about this part, with the
fix we now generate 

        .file   "test.c"
        .text
        .p2align 4
        .globl  fooq
        .type   fooq, @function
fooq:
.LFB0:
        .cfi_startproc
        vbroadcastsd    .LC1(%rip), %ymm0
        ret
        .cfi_endproc
.LFE0:
        .size   fooq, .-fooq
        .p2align 4
        .globl  food
        .type   food, @function
food:
.LFB1:
        .cfi_startproc
        vbroadcastss    .LC3(%rip), %ymm0
        ret
        .cfi_endproc
.LFE1:
        .size   food, .-food
        .p2align 4
        .globl  foow
        .type   foow, @function
foow:
.LFB2:
        .cfi_startproc
        vmovdqa .LC4(%rip), %ymm0
        ret
        .cfi_endproc
.LFE2:
        .size   foow, .-foow
        .p2align 4
        .globl  foob
        .type   foob, @function
foob:
.LFB3:
        .cfi_startproc
        vmovdqa .LC5(%rip), %ymm0
        ret
        .cfi_endproc
.LFE3:
        .size   foob, .-foob
        .set    .LC1,.LC4
        .set    .LC3,.LC4
        .section        .rodata.cst32,"aM",@progbits,32
        .align 32
.LC4:
        .value  257
        .value  257
        .value  257
        .value  257
        .value  257
        .value  257
        .value  257
        .value  257
        .value  257
        .value  257
        .value  257
        .value  257
        .value  257
        .value  257
        .value  257
        .value  257
        .set    .LC5,.LC4
        .ident  "GCC: (GNU) 14.0.0 20231212 (experimental)"
        .section        .note.GNU-stack,"",@progbits

Reply via email to