https://gcc.gnu.org/bugzilla/show_bug.cgi?id=105830
Bug ID: 105830 Summary: call to memcpy when -nostdlib -nodefaultlibs flags provided Product: gcc Version: 12.1.1 Status: UNCONFIRMED Severity: normal Priority: P3 Component: tree-optimization Assignee: unassigned at gcc dot gnu.org Reporter: hiraditya at msn dot com Target Milestone: --- https://godbolt.org/z/jTEa6ajn3 ``` // test.c // Type your code here, or load an example. /* Nonzero if either X or Y is not aligned on a "long" boundary. */ #define UNALIGNED(X, Y) \ (((unsigned long)X & (sizeof (unsigned long) - 1)) | ((unsigned long)Y & (sizeof (unsigned long) - 1))) #define UNALIGNED1(a) \ ((unsigned long)(a) & (sizeof(unsigned long)-1)) /* How many bytes are copied each iteration of the 4X unrolled loop. */ #define BIGBLOCKSIZE (sizeof (unsigned long) * 4) /* How many bytes are copied each iteration of the word copy loop. */ #define LITTLEBLOCKSIZE (sizeof (unsigned long)) /* Threshhold for punting to the byte copier. */ #define TOO_SMALL(LEN) ((LEN) < BIGBLOCKSIZE) void * memcpy (void *__restrict dst0, const void *__restrict src0, unsigned long len0) { unsigned char *dst = dst0; const unsigned char *src = src0; /* If the size is small, or either SRC or DST is unaligned, then punt into the byte copy loop. This should be rare. */ if (len0 >= LITTLEBLOCKSIZE && !UNALIGNED (src, dst)) { unsigned long *aligned_dst; const unsigned long *aligned_src; aligned_dst = (unsigned long*)dst; aligned_src = (const unsigned long*)src; /* Copy one long word at a time if possible. */ /* Copy one long word at a time if possible. */ do { *aligned_dst++ = *aligned_src++; len0 -= LITTLEBLOCKSIZE; } while (len0 >= LITTLEBLOCKSIZE); /* Pick up any residual with a byte copier. */ dst = (unsigned char*)aligned_dst; src = (const unsigned char*)aligned_src; } for (; len0; len0--) *dst++ = *src++; return dst0; } // ARM gcc trunk gcc -O3 -nostdlib -nodefaultlibs -S -o - memcpy: push {r3, r4, r5, r6, r7, lr} cmp r2, #3 mov r4, r2 mov r5, r0 mov r6, r1 bls .L5 orr r3, r0, r1 lsls r3, r3, #30 beq .L9 .L3: mov r2, r4 mov r1, r6 bl memcpy ; <------------- call to memcpy mov r0, r5 pop {r3, r4, r5, r6, r7, pc} .L9: subs r7, r2, #4 and r4, r2, #3 bic r7, r7, #3 adds r7, r7, #4 mov r2, r7 add r6, r6, r7 bl memcpy ; <------------- call to memcpy adds r0, r5, r7 .L5: cmp r4, #0 bne .L3 mov r0, r5 pop {r3, r4, r5, r6, r7, pc}