On 2025/6/26 02:17, Linus Torvalds wrote: > On Wed, 25 Jun 2025 at 05:15, Johannes Berg <johan...@sipsolutions.net> wrote: >> >> - reduce stack use with clang 19 > > Interesting. The patch looks fine, I'm wondering if people made a > clang bug report about this behavior with structure assignments? > > Even if most other projects likely don't have issues with stack size, > it looks very non-optimal from a performance standpoint too to create > a pointless temporary copy on the stack. > > I assume - but didn't check - that gcc didn't do the same stupid thing > for that code?
The behavior of gcc and clang differs. Clang's behavior appears to be related to the volatile qualifier in arch_spinlock_t: https://git.kernel.org/pub/scm/linux/kernel/git/torvalds/linux.git/tree/include/linux/spinlock_types_up.h?id=b555cb66583e99158cfef8e91c025252cefae55b#n18 It can be reproduced with this code snippet: ```c struct foo { char a; volatile char b; char c[512]; }; char bar(void); void baz(struct foo *p); void baz(struct foo *p) { *p = (struct foo) { .a = bar() }; } ``` $ clang-19 --version Ubuntu clang version 19.1.7 (++20250114103320+cd708029e0b2-1~exp1~20250114103432.75) Target: x86_64-pc-linux-gnu Thread model: posix InstalledDir: /usr/lib/llvm-19/bin $ clang-19 -O2 -c test.c $ objdump -dr ./test.o ./test.o: file format elf64-x86-64 Disassembly of section .text: 0000000000000000 <baz>: 0: 41 56 push %r14 2: 53 push %rbx 3: 48 81 ec 18 02 00 00 sub $0x218,%rsp a: 48 89 fb mov %rdi,%rbx d: 4c 8d 74 24 10 lea 0x10(%rsp),%r14 12: ba 01 02 00 00 mov $0x201,%edx 17: 4c 89 f7 mov %r14,%rdi 1a: 31 f6 xor %esi,%esi 1c: e8 00 00 00 00 call 21 <baz+0x21> 1d: R_X86_64_PLT32 memset-0x4 21: e8 00 00 00 00 call 26 <baz+0x26> 22: R_X86_64_PLT32 bar-0x4 26: 88 44 24 0f mov %al,0xf(%rsp) 2a: 0f b6 44 24 0f movzbl 0xf(%rsp),%eax 2f: 88 03 mov %al,(%rbx) 31: 48 ff c3 inc %rbx 34: ba 01 02 00 00 mov $0x201,%edx 39: 48 89 df mov %rbx,%rdi 3c: 4c 89 f6 mov %r14,%rsi 3f: e8 00 00 00 00 call 44 <baz+0x44> 40: R_X86_64_PLT32 memcpy-0x4 44: 48 81 c4 18 02 00 00 add $0x218,%rsp 4b: 5b pop %rbx 4c: 41 5e pop %r14 4e: c3 ret $ clang --version Ubuntu clang version 14.0.0-1ubuntu1.1 Target: x86_64-pc-linux-gnu Thread model: posix InstalledDir: /usr/bin $ clang -O2 -c test.c $ objdump -dr ./test.o ./test.o: file format elf64-x86-64 Disassembly of section .text: 0000000000000000 <baz>: 0: 41 56 push %r14 2: 53 push %rbx 3: 48 81 ec 18 02 00 00 sub $0x218,%rsp a: 48 89 fb mov %rdi,%rbx d: 4c 8d 74 24 10 lea 0x10(%rsp),%r14 12: ba 01 02 00 00 mov $0x201,%edx 17: 4c 89 f7 mov %r14,%rdi 1a: 31 f6 xor %esi,%esi 1c: e8 00 00 00 00 call 21 <baz+0x21> 1d: R_X86_64_PLT32 memset-0x4 21: e8 00 00 00 00 call 26 <baz+0x26> 22: R_X86_64_PLT32 bar-0x4 26: 88 44 24 0f mov %al,0xf(%rsp) 2a: 8a 44 24 0f mov 0xf(%rsp),%al 2e: 88 03 mov %al,(%rbx) 30: 48 83 c3 01 add $0x1,%rbx 34: ba 01 02 00 00 mov $0x201,%edx 39: 48 89 df mov %rbx,%rdi 3c: 4c 89 f6 mov %r14,%rsi 3f: e8 00 00 00 00 call 44 <baz+0x44> 40: R_X86_64_PLT32 memcpy-0x4 44: 48 81 c4 18 02 00 00 add $0x218,%rsp 4b: 5b pop %rbx 4c: 41 5e pop %r14 4e: c3 ret $ gcc --version gcc (Ubuntu 11.4.0-1ubuntu1~22.04) 11.4.0 Copyright (C) 2021 Free Software Foundation, Inc. This is free software; see the source for copying conditions. There is NO warranty; not even for MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. $ gcc -O2 -c test.c $ objdump -dr ./test.o ./test.o: file format elf64-x86-64 Disassembly of section .text: 0000000000000000 <baz>: 0: f3 0f 1e fa endbr64 4: 53 push %rbx 5: 48 89 fb mov %rdi,%rbx 8: e8 00 00 00 00 call d <baz+0xd> 9: R_X86_64_PLT32 bar-0x4 d: 48 8d 7b 08 lea 0x8(%rbx),%rdi 11: 48 89 d9 mov %rbx,%rcx 14: 48 c7 03 00 00 00 00 movq $0x0,(%rbx) 1b: 48 83 e7 f8 and $0xfffffffffffffff8,%rdi 1f: 41 89 c0 mov %eax,%r8d 22: 31 c0 xor %eax,%eax 24: 48 c7 83 fa 01 00 00 movq $0x0,0x1fa(%rbx) 2b: 00 00 00 00 2f: 48 29 f9 sub %rdi,%rcx 32: 81 c1 02 02 00 00 add $0x202,%ecx 38: c1 e9 03 shr $0x3,%ecx 3b: f3 48 ab rep stos %rax,%es:(%rdi) 3e: 44 88 03 mov %r8b,(%rbx) 41: 5b pop %rbx 42: c3 ret After 's/volatile char b;/char b;/', clang-19 produces: ./test.o: file format elf64-x86-64 Disassembly of section .text: 0000000000000000 <baz>: 0: 53 push %rbx 1: 48 89 fb mov %rdi,%rbx 4: e8 00 00 00 00 call 9 <baz+0x9> 5: R_X86_64_PLT32 bar-0x4 9: 88 03 mov %al,(%rbx) b: 48 ff c3 inc %rbx e: ba 01 02 00 00 mov $0x201,%edx 13: 48 89 df mov %rbx,%rdi 16: 31 f6 xor %esi,%esi 18: 5b pop %rbx 19: e9 00 00 00 00 jmp 1e <baz+0x1e> 1a: R_X86_64_PLT32 memset-0x4 Regards, Tiwei