https://gcc.gnu.org/bugzilla/show_bug.cgi?id=114169
Bug ID: 114169 Summary: miss optimization of repeat load&store in place Product: gcc Version: 13.2.0 Status: UNCONFIRMED Severity: normal Priority: P3 Component: rtl-optimization Assignee: unassigned at gcc dot gnu.org Reporter: absoler at smail dot nju.edu.cn Target Milestone: --- Hi, here's the code: ``` typedef signed char int8_t; typedef unsigned char uint8_t; typedef signed short int int16_t; typedef unsigned short int uint16_t; typedef signed int int32_t; typedef unsigned int uint32_t; typedef signed long int int64_t; typedef unsigned long int uint64_t; #include<stdlib.h> #include<signal.h> #include<stdio.h> #include<string.h> /* --- Struct/Union Declarations --- */ struct S0 { int32_t f0; uint16_t f1; int32_t f2; int32_t f3; uint16_t f4; uint16_t f5; int32_t f6; int16_t f7; }; /* --- GLOBAL VARIABLES --- */ struct S0 g_2 = {1L,0xF8C7L,0x5C6EFF3DL,0x0369BD69L,65535UL,0x0CA9L,-9L,0x9C92L}; struct S0 g_4 = {-1L,0UL,0x314A5EA9L,0x4A90C6D2L,0xCD43L,65528UL,0x2E40C18AL,0x9C27L}; int16_t g_17 = 0xF85AL; uint16_t g_18 = 0xA88AL; const uint64_t g_19 = 0UL; /* --- FORWARD DECLARATIONS --- */ struct S0 func_1(void); void func_10(struct S0 p_13); struct S0 func_1() { int32_t a; g_4 = g_2; func_10(g_4); } void func_10(struct S0 b) { int32_t c = 0; int32_t *d = &g_4.f0; struct S0 *e = &g_2; *d = c; *e = b; } ``` compiled with gcc-13.2.0 -O3, it generates: https://godbolt.org/z/4d9roGWTz ``` 0000000000401630 <func_1>: func_1(): /root/loadtest3/test/output2.c:49 401630: movdqa 0x2a58(%rip),%xmm0 # 404090 <g_2> 401638: mov 0x2a52(%rip),%eax # 404090 <g_2> # load 40163e: movdqu 0x2a56(%rip),%xmm1 # 40409c <g_2+0xc> 401646: movaps %xmm0,0x2a23(%rip) # 404070 <g_4> func_10(): /root/loadtest3/test/output2.c:57 40164d: mov %eax,0x2a3d(%rip) # 404090 <g_2> # store 401653: movzwl 0x2a1a(%rip),%eax # 404074 <g_4+0x4> func_1(): /root/loadtest3/test/output2.c:49 40165a: movups %xmm1,0x2a1b(%rip) # 40407c <g_4+0xc> func_10(): /root/loadtest3/test/output2.c:57 401661: mov %ax,0x2a2c(%rip) # 404094 <g_2+0x4> 401668: mov 0x2a09(%rip),%rax # 404078 <g_4+0x8> /root/loadtest3/test/output2.c:56 40166f: movl $0x0,0x29f7(%rip) # 404070 <g_4> /root/loadtest3/test/output2.c:57 401679: mov %rax,0x2a18(%rip) # 404098 <g_2+0x8> 401680: mov 0x29f9(%rip),%rax # 404080 <g_4+0x10> 401687: mov %rax,0x2a12(%rip) # 4040a0 <g_2+0x10> 40168e: movzwl 0x29f3(%rip),%eax # 404088 <g_4+0x18> 401695: mov %ax,0x2a0c(%rip) # 4040a8 <g_2+0x18> func_1(): /root/loadtest3/test/output2.c:51 40169c: mov %rdi,%rax 40169f: retq ``` we can see the pair of load&store at address 0x401638 and 0x40164d is unnecessary