The branch stable/14 has been updated by fuz:

URL: 
https://cgit.FreeBSD.org/src/commit/?id=a5c6c95631195181bd3707d96ab79184235df6a6

commit a5c6c95631195181bd3707d96ab79184235df6a6
Author:     Robert Clausecker <[email protected]>
AuthorDate: 2025-12-18 23:37:33 +0000
Commit:     Robert Clausecker <[email protected]>
CommitDate: 2026-01-04 13:25:33 +0000

    libc/amd64: fix stpncpy.S again
    
    The previous fix introduced a regression on machines without the BMI1
    instruction set extension.  The TZCNT instruction used in this function
    behaves different on old machines when the source operand is zero, but
    the code was originally designed to never trigger this case.  The bug
    fix caused this case to be possible, leading to a regression on
    sufficiently old hardware.
    
    Fix the code by messing with things such that the source operand is
    never zero.
    
    PR:             291720
    Fixes:          66eb78377bf109af1d9e25626bf254b4369436ec
    Tested by:      cy
    Approved by:    markj (mentor)
    Differential Revision:  https://reviews.freebsd.org/D54303
    
    (cherry picked from commit 2f83319214d9adb8ab7a77e35d1014658b3f9cae)
---
 lib/libc/amd64/string/stpncpy.S | 9 +++------
 1 file changed, 3 insertions(+), 6 deletions(-)

diff --git a/lib/libc/amd64/string/stpncpy.S b/lib/libc/amd64/string/stpncpy.S
index 764ee1d4008c..2efadc97a435 100644
--- a/lib/libc/amd64/string/stpncpy.S
+++ b/lib/libc/amd64/string/stpncpy.S
@@ -36,9 +36,7 @@
        .set stpncpy, __stpncpy
 ARCHFUNCS(__stpncpy)
        ARCHFUNC(__stpncpy, scalar)
-#if 0 /* temporarily disabled cf. PR 291720 */
        ARCHFUNC(__stpncpy, baseline)
-#endif
 ENDARCHFUNCS(__stpncpy)
 
 ARCHENTRY(__stpncpy, scalar)
@@ -93,7 +91,6 @@ ARCHEND(__stpncpy, scalar)
 /* stpncpy(char *restrict rdi, const char *rsi, size_t rdx) */
 ARCHENTRY(__stpncpy, baseline)
 #define bounce         (-3*16-8)               /* location of on-stack bounce 
buffer */
-
        test            %rdx, %rdx              # no bytes to copy?
        jz              .L0
 
@@ -225,8 +222,8 @@ ARCHENTRY(__stpncpy, baseline)
 
        /* 1--32 bytes to copy, bounce through the stack */
 .Lrunt:        movdqa          %xmm1, bounce+16(%rsp)  # clear out rest of 
on-stack copy
-       bts             %r10, %r8               # treat end of buffer as end of 
string
        and             %r9d, %r8d              # mask out head before string
+       bts             %r10, %r8               # treat end of buffer as end of 
string
        test            $0x1ffff, %r8d          # end of string within first 
chunk or right after?
        jnz             0f                      # if yes, do not inspect second 
buffer
 
@@ -235,10 +232,10 @@ ARCHENTRY(__stpncpy, baseline)
        pcmpeqb         %xmm1, %xmm0            # NUL in second chunk?
        pmovmskb        %xmm0, %r9d
        shl             $16, %r9d
-       or              %r9d, %r8d              # merge found NUL bytes into 
NUL mask
+       or              %r9, %r8                # merge found NUL bytes into 
NUL mask
 
        /* end of string after one buffer */
-0:     tzcnt           %r8d, %r8d              # location of last char in 
string
+0:     tzcnt           %r8, %r8                # location of last char in 
string
        movdqu          %xmm1, bounce(%rsp, %r8, 1) # clear bytes behind string
        lea             bounce(%rsp, %rcx, 1), %rsi # start of string copy on 
stack
        lea             (%rdi, %r8, 1), %rax    # return pointer to NUL byte

Reply via email to