https://llvm.org/bugs/show_bug.cgi?id=27100
Bug ID: 27100 Summary: Optimize memset for AVX2 Product: new-bugs Version: trunk Hardware: PC OS: Linux Status: NEW Severity: normal Priority: P Component: new bugs Assignee: unassignedb...@nondot.org Reporter: hjl.to...@gmail.com CC: david.l.kreit...@intel.com, llvm-bugs@lists.llvm.org, zia.ans...@intel.com Classification: Unclassified For [hjl@gnu-6 memcpy-3]$ cat c.i extern char *src, *dst; void foo3 (int x) { __builtin_memset (dst, x, 64); } void foo4 (int x) { __builtin_memset (dst, x, 32); } void foo5 (int x) { __builtin_memset (dst, x, 16); } [hjl@gnu-6 memcpy-3]$ clang trunk -O2 -mavx2 generates [hjl@gnu-6 memcpy-3]$ cat c.s .text .file "c.i" .globl foo3 .p2align 4, 0x90 .type foo3,@function foo3: # @foo3 .cfi_startproc # BB#0: movl dst(%rip), %eax movzbl %dil, %ecx movabsq $72340172838076673, %rdx # imm = 0x101010101010101 imulq %rcx, %rdx movq %rdx, 56(%eax) movq %rdx, 48(%eax) movq %rdx, 40(%eax) movq %rdx, 32(%eax) movq %rdx, 24(%eax) movq %rdx, 16(%eax) movq %rdx, 8(%eax) movq %rdx, (%eax) retq .Lfunc_end0: .size foo3, .Lfunc_end0-foo3 .cfi_endproc .globl foo4 .p2align 4, 0x90 .type foo4,@function foo4: # @foo4 .cfi_startproc # BB#0: movl dst(%rip), %eax movzbl %dil, %ecx movabsq $72340172838076673, %rdx # imm = 0x101010101010101 imulq %rcx, %rdx movq %rdx, 24(%eax) movq %rdx, 16(%eax) movq %rdx, 8(%eax) movq %rdx, (%eax) retq .Lfunc_end1: .size foo4, .Lfunc_end1-foo4 .cfi_endproc .globl foo5 .p2align 4, 0x90 .type foo5,@function foo5: # @foo5 .cfi_startproc # BB#0: movl dst(%rip), %eax movzbl %dil, %ecx movabsq $72340172838076673, %rdx # imm = 0x101010101010101 imulq %rcx, %rdx movq %rdx, 8(%eax) movq %rdx, (%eax) retq .Lfunc_end2: .size foo5, .Lfunc_end2-foo5 .cfi_endproc Will .text .p2align 4,,15 .globl foo3 .type foo3, @function foo3: .LFB0: .cfi_startproc movq dst(%rip), %rax vmovd %edi, %xmm0 pushq %rbp .cfi_def_cfa_offset 16 .cfi_offset 6, -16 vpbroadcastb %xmm0, %ymm0 vmovdqu %ymm0, (%rax) movq %rsp, %rbp .cfi_def_cfa_register 6 vmovdqu %ymm0, 32(%rax) vzeroupper popq %rbp .cfi_def_cfa 7, 8 ret .cfi_endproc .LFE0: .size foo3, .-foo3 .p2align 4,,15 .globl foo4 .type foo4, @function foo4: .LFB1: .cfi_startproc movq dst(%rip), %rax vmovd %edi, %xmm0 pushq %rbp .cfi_def_cfa_offset 16 .cfi_offset 6, -16 vpbroadcastb %xmm0, %ymm0 vmovdqu %ymm0, (%rax) movq %rsp, %rbp .cfi_def_cfa_register 6 vzeroupper popq %rbp .cfi_def_cfa 7, 8 ret .cfi_endproc .LFE1: .size foo4, .-foo4 .p2align 4,,15 .globl foo5 .type foo5, @function foo5: .LFB2: .cfi_startproc movq dst(%rip), %rax vmovd %edi, %xmm0 vpbroadcastb %xmm0, %xmm0 vmovdqu %xmm0, (%rax) ret .cfi_endproc .LFE2: .size foo5, .-foo5 be better? -- You are receiving this mail because: You are on the CC list for the bug.
_______________________________________________ llvm-bugs mailing list llvm-bugs@lists.llvm.org http://lists.llvm.org/cgi-bin/mailman/listinfo/llvm-bugs