https://gcc.gnu.org/bugzilla/show_bug.cgi?id=112992
--- Comment #5 from Hongtao Liu <liuhongt at gcc dot gnu.org> --- (In reply to Roger Sayle from comment #0) > The following four functions should in theory all produce the same code: > > typedef unsigned long long v4di __attribute((vector_size(32))); > typedef unsigned int v8si __attribute((vector_size(32))); > typedef unsigned short v16hi __attribute((vector_size(32))); > typedef unsigned char v32qi __attribute((vector_size(32))); > > #define MASK 0x01010101 > #define MASKL 0x0101010101010101ULL > #define MASKS 0x0101 > > v4di fooq() { > return (v4di){MASKL,MASKL,MASKL,MASKL}; > } > > v8si food() { > return (v8si){MASK,MASK,MASK,MASK,MASK,MASK,MASK,MASK}; > } > > v16hi foow() { > return (v16hi){MASKS,MASKS,MASKS,MASKS,MASKS,MASKS,MASKS,MASKS, > MASKS,MASKS,MASKS,MASKS,MASKS,MASKS,MASKS,MASKS}; > } > > v32qi foob() { > return (v32qi){1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1, > 1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1}; > } > > On x86_64 with -mavx, we currently produce very different implementations: > > fooq: > movabs rax, 72340172838076673 > push rbp > mov rbp, rsp > and rsp, -32 > mov QWORD PTR [rsp-8], rax > vbroadcastsd ymm0, QWORD PTR [rsp-8] > leave > ret > food: > vbroadcastss ymm0, DWORD PTR .LC2[rip] > ret > foow: > vmovdqa ymm0, YMMWORD PTR .LC3[rip] > ret > foob: > vmovdqa ymm0, YMMWORD PTR .LC4[rip] > ret > > clang currently produces the vbroadcastss for all four. I guess here, you mean .rodata optimization, not sure about this part, with the fix we now generate .file "test.c" .text .p2align 4 .globl fooq .type fooq, @function fooq: .LFB0: .cfi_startproc vbroadcastsd .LC1(%rip), %ymm0 ret .cfi_endproc .LFE0: .size fooq, .-fooq .p2align 4 .globl food .type food, @function food: .LFB1: .cfi_startproc vbroadcastss .LC3(%rip), %ymm0 ret .cfi_endproc .LFE1: .size food, .-food .p2align 4 .globl foow .type foow, @function foow: .LFB2: .cfi_startproc vmovdqa .LC4(%rip), %ymm0 ret .cfi_endproc .LFE2: .size foow, .-foow .p2align 4 .globl foob .type foob, @function foob: .LFB3: .cfi_startproc vmovdqa .LC5(%rip), %ymm0 ret .cfi_endproc .LFE3: .size foob, .-foob .set .LC1,.LC4 .set .LC3,.LC4 .section .rodata.cst32,"aM",@progbits,32 .align 32 .LC4: .value 257 .value 257 .value 257 .value 257 .value 257 .value 257 .value 257 .value 257 .value 257 .value 257 .value 257 .value 257 .value 257 .value 257 .value 257 .value 257 .set .LC5,.LC4 .ident "GCC: (GNU) 14.0.0 20231212 (experimental)" .section .note.GNU-stack,"",@progbits