On 2025/6/26 02:17, Linus Torvalds wrote:
> On Wed, 25 Jun 2025 at 05:15, Johannes Berg <johan...@sipsolutions.net> wrote:
>>
>>  - reduce stack use with clang 19
> 
> Interesting. The patch looks fine, I'm wondering if people made a
> clang bug report about this behavior with structure assignments?
> 
> Even if most other projects likely don't have issues with stack size,
> it looks very non-optimal from a performance standpoint too to create
> a pointless temporary copy on the stack.
> 
> I assume - but didn't check - that gcc didn't do the same stupid thing
> for that code?

The behavior of gcc and clang differs. Clang's behavior appears to be
related to the volatile qualifier in arch_spinlock_t:

https://git.kernel.org/pub/scm/linux/kernel/git/torvalds/linux.git/tree/include/linux/spinlock_types_up.h?id=b555cb66583e99158cfef8e91c025252cefae55b#n18

It can be reproduced with this code snippet:

```c
struct foo {
        char a;
        volatile char b;
        char c[512];
};

char bar(void);
void baz(struct foo *p);

void baz(struct foo *p)
{
        *p = (struct foo) { .a = bar() };
}
```

$ clang-19 --version
Ubuntu clang version 19.1.7 
(++20250114103320+cd708029e0b2-1~exp1~20250114103432.75)
Target: x86_64-pc-linux-gnu
Thread model: posix
InstalledDir: /usr/lib/llvm-19/bin
$ clang-19 -O2 -c test.c
$ objdump -dr ./test.o

./test.o:     file format elf64-x86-64


Disassembly of section .text:

0000000000000000 <baz>:
   0:   41 56                   push   %r14
   2:   53                      push   %rbx
   3:   48 81 ec 18 02 00 00    sub    $0x218,%rsp
   a:   48 89 fb                mov    %rdi,%rbx
   d:   4c 8d 74 24 10          lea    0x10(%rsp),%r14
  12:   ba 01 02 00 00          mov    $0x201,%edx
  17:   4c 89 f7                mov    %r14,%rdi
  1a:   31 f6                   xor    %esi,%esi
  1c:   e8 00 00 00 00          call   21 <baz+0x21>
                        1d: R_X86_64_PLT32      memset-0x4
  21:   e8 00 00 00 00          call   26 <baz+0x26>
                        22: R_X86_64_PLT32      bar-0x4
  26:   88 44 24 0f             mov    %al,0xf(%rsp)
  2a:   0f b6 44 24 0f          movzbl 0xf(%rsp),%eax
  2f:   88 03                   mov    %al,(%rbx)
  31:   48 ff c3                inc    %rbx
  34:   ba 01 02 00 00          mov    $0x201,%edx
  39:   48 89 df                mov    %rbx,%rdi
  3c:   4c 89 f6                mov    %r14,%rsi
  3f:   e8 00 00 00 00          call   44 <baz+0x44>
                        40: R_X86_64_PLT32      memcpy-0x4
  44:   48 81 c4 18 02 00 00    add    $0x218,%rsp
  4b:   5b                      pop    %rbx
  4c:   41 5e                   pop    %r14
  4e:   c3                      ret    


$ clang --version
Ubuntu clang version 14.0.0-1ubuntu1.1
Target: x86_64-pc-linux-gnu
Thread model: posix
InstalledDir: /usr/bin
$ clang -O2 -c test.c
$ objdump -dr ./test.o

./test.o:     file format elf64-x86-64


Disassembly of section .text:

0000000000000000 <baz>:
   0:   41 56                   push   %r14
   2:   53                      push   %rbx
   3:   48 81 ec 18 02 00 00    sub    $0x218,%rsp
   a:   48 89 fb                mov    %rdi,%rbx
   d:   4c 8d 74 24 10          lea    0x10(%rsp),%r14
  12:   ba 01 02 00 00          mov    $0x201,%edx
  17:   4c 89 f7                mov    %r14,%rdi
  1a:   31 f6                   xor    %esi,%esi
  1c:   e8 00 00 00 00          call   21 <baz+0x21>
                        1d: R_X86_64_PLT32      memset-0x4
  21:   e8 00 00 00 00          call   26 <baz+0x26>
                        22: R_X86_64_PLT32      bar-0x4
  26:   88 44 24 0f             mov    %al,0xf(%rsp)
  2a:   8a 44 24 0f             mov    0xf(%rsp),%al
  2e:   88 03                   mov    %al,(%rbx)
  30:   48 83 c3 01             add    $0x1,%rbx
  34:   ba 01 02 00 00          mov    $0x201,%edx
  39:   48 89 df                mov    %rbx,%rdi
  3c:   4c 89 f6                mov    %r14,%rsi
  3f:   e8 00 00 00 00          call   44 <baz+0x44>
                        40: R_X86_64_PLT32      memcpy-0x4
  44:   48 81 c4 18 02 00 00    add    $0x218,%rsp
  4b:   5b                      pop    %rbx
  4c:   41 5e                   pop    %r14
  4e:   c3                      ret    


$ gcc --version
gcc (Ubuntu 11.4.0-1ubuntu1~22.04) 11.4.0
Copyright (C) 2021 Free Software Foundation, Inc.
This is free software; see the source for copying conditions.  There is NO
warranty; not even for MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.
$ gcc -O2 -c test.c
$ objdump -dr ./test.o

./test.o:     file format elf64-x86-64


Disassembly of section .text:

0000000000000000 <baz>:
   0:   f3 0f 1e fa             endbr64 
   4:   53                      push   %rbx
   5:   48 89 fb                mov    %rdi,%rbx
   8:   e8 00 00 00 00          call   d <baz+0xd>
                        9: R_X86_64_PLT32       bar-0x4
   d:   48 8d 7b 08             lea    0x8(%rbx),%rdi
  11:   48 89 d9                mov    %rbx,%rcx
  14:   48 c7 03 00 00 00 00    movq   $0x0,(%rbx)
  1b:   48 83 e7 f8             and    $0xfffffffffffffff8,%rdi
  1f:   41 89 c0                mov    %eax,%r8d
  22:   31 c0                   xor    %eax,%eax
  24:   48 c7 83 fa 01 00 00    movq   $0x0,0x1fa(%rbx)
  2b:   00 00 00 00 
  2f:   48 29 f9                sub    %rdi,%rcx
  32:   81 c1 02 02 00 00       add    $0x202,%ecx
  38:   c1 e9 03                shr    $0x3,%ecx
  3b:   f3 48 ab                rep stos %rax,%es:(%rdi)
  3e:   44 88 03                mov    %r8b,(%rbx)
  41:   5b                      pop    %rbx
  42:   c3                      ret    


After 's/volatile char b;/char b;/', clang-19 produces:

./test.o:     file format elf64-x86-64


Disassembly of section .text:

0000000000000000 <baz>:
   0:   53                      push   %rbx
   1:   48 89 fb                mov    %rdi,%rbx
   4:   e8 00 00 00 00          call   9 <baz+0x9>
                        5: R_X86_64_PLT32       bar-0x4
   9:   88 03                   mov    %al,(%rbx)
   b:   48 ff c3                inc    %rbx
   e:   ba 01 02 00 00          mov    $0x201,%edx
  13:   48 89 df                mov    %rbx,%rdi
  16:   31 f6                   xor    %esi,%esi
  18:   5b                      pop    %rbx
  19:   e9 00 00 00 00          jmp    1e <baz+0x1e>
                        1a: R_X86_64_PLT32      memset-0x4

Regards,
Tiwei

Reply via email to