The branch main has been updated by fuz: URL: https://cgit.FreeBSD.org/src/commit/?id=2f83319214d9adb8ab7a77e35d1014658b3f9cae
commit 2f83319214d9adb8ab7a77e35d1014658b3f9cae Author: Robert Clausecker <[email protected]> AuthorDate: 2025-12-18 23:37:33 +0000 Commit: Robert Clausecker <[email protected]> CommitDate: 2026-01-04 13:21:41 +0000 libc/amd64: fix stpncpy.S again The previous fix introduced a regression on machines without the BMI1 instruction set extension. The TZCNT instruction used in this function behaves different on old machines when the source operand is zero, but the code was originally designed to never trigger this case. The bug fix caused this case to be possible, leading to a regression on sufficiently old hardware. Fix the code by messing with things such that the source operand is never zero. PR: 291720 Fixes: 66eb78377bf109af1d9e25626bf254b4369436ec Tested by: cy Approved by: markj (mentor) Differential Revision: https://reviews.freebsd.org/D54303 --- lib/libc/amd64/string/stpncpy.S | 9 +++------ 1 file changed, 3 insertions(+), 6 deletions(-) diff --git a/lib/libc/amd64/string/stpncpy.S b/lib/libc/amd64/string/stpncpy.S index 764ee1d4008c..2efadc97a435 100644 --- a/lib/libc/amd64/string/stpncpy.S +++ b/lib/libc/amd64/string/stpncpy.S @@ -36,9 +36,7 @@ .set stpncpy, __stpncpy ARCHFUNCS(__stpncpy) ARCHFUNC(__stpncpy, scalar) -#if 0 /* temporarily disabled cf. PR 291720 */ ARCHFUNC(__stpncpy, baseline) -#endif ENDARCHFUNCS(__stpncpy) ARCHENTRY(__stpncpy, scalar) @@ -93,7 +91,6 @@ ARCHEND(__stpncpy, scalar) /* stpncpy(char *restrict rdi, const char *rsi, size_t rdx) */ ARCHENTRY(__stpncpy, baseline) #define bounce (-3*16-8) /* location of on-stack bounce buffer */ - test %rdx, %rdx # no bytes to copy? jz .L0 @@ -225,8 +222,8 @@ ARCHENTRY(__stpncpy, baseline) /* 1--32 bytes to copy, bounce through the stack */ .Lrunt: movdqa %xmm1, bounce+16(%rsp) # clear out rest of on-stack copy - bts %r10, %r8 # treat end of buffer as end of string and %r9d, %r8d # mask out head before string + bts %r10, %r8 # treat end of buffer as end of string test $0x1ffff, %r8d # end of string within first chunk or right after? jnz 0f # if yes, do not inspect second buffer @@ -235,10 +232,10 @@ ARCHENTRY(__stpncpy, baseline) pcmpeqb %xmm1, %xmm0 # NUL in second chunk? pmovmskb %xmm0, %r9d shl $16, %r9d - or %r9d, %r8d # merge found NUL bytes into NUL mask + or %r9, %r8 # merge found NUL bytes into NUL mask /* end of string after one buffer */ -0: tzcnt %r8d, %r8d # location of last char in string +0: tzcnt %r8, %r8 # location of last char in string movdqu %xmm1, bounce(%rsp, %r8, 1) # clear bytes behind string lea bounce(%rsp, %rcx, 1), %rsi # start of string copy on stack lea (%rdi, %r8, 1), %rax # return pointer to NUL byte
