The branch main has been updated by fuz:

URL: 
https://cgit.FreeBSD.org/src/commit/?id=30acc84270266e41f66cf572f67c3290d923da2f

commit 30acc84270266e41f66cf572f67c3290d923da2f
Author:     Robert Clausecker <f...@freebsd.org>
AuthorDate: 2025-07-29 20:12:11 +0000
Commit:     Robert Clausecker <f...@freebsd.org>
CommitDate: 2025-08-09 20:13:27 +0000

    libc/amd64: rewrite memrchr() scalar impl. to read the string from the back
    
    A very simple implementation as I don't have the patience right now
    to write a full SWAR kernel.  Should still do the trick if you wish
    to opt out of SSE for some reason.
    
    Reported by:    Mikael Simonsson <m...@mikaelsimonsson.com>
    Reviewed by:    strajabot
    PR:             288321
    MFC after:      1 month
---
 lib/libc/amd64/string/memrchr.S | 72 +++++++++++++++++++----------------------
 1 file changed, 34 insertions(+), 38 deletions(-)

diff --git a/lib/libc/amd64/string/memrchr.S b/lib/libc/amd64/string/memrchr.S
index f1ba48d6bb41..80fb306af2a3 100644
--- a/lib/libc/amd64/string/memrchr.S
+++ b/lib/libc/amd64/string/memrchr.S
@@ -16,58 +16,54 @@ ARCHFUNCS(memrchr)
 ENDARCHFUNCS(memrchr)
 
 ARCHENTRY(memrchr, scalar)
-       xor     %eax, %eax              # prospective return value
-       sub     $4, %rdx                # 4 bytes left to process?
-       jb      1f
+       lea             -1(%rdi, %rdx, 1), %rax # point to last char in buffer
+       sub             $4, %rdx                # 4 bytes left to process?
+       jb              .Ltail
 
        ALIGN_TEXT
-0:     xor     %r8, %r8
-       lea     2(%rdi), %r10
-       cmp     %sil, 2(%rdi)
-       cmovne  %r8, %r10               # point to null if no match
+0:     cmp             %sil, (%rax)            # match at last entry?
+       je              1f
 
-       cmp     %sil, (%rdi)
-       cmove   %rdi, %r8               # point to first char if match
+       cmp             %sil, -1(%rax)          # match at second to last entry?
+       je              2f
 
-       lea     1(%rdi), %r9
-       cmp     %sil, 1(%rdi)
-       cmovne  %r8, %r9                # point to first result if no match in 
second
+       cmp             %sil, -2(%rax)          # match at third to last entry?
+       je              3f
 
-       lea     3(%rdi), %r11
-       cmp     %sil, 3(%rdi)
-       cmovne  %r10, %r11
+       cmp             %sil, -3(%rax)          # match at fourth to last entry?
+       je              4f
 
-       test    %r11, %r11
-       cmovz   %r9, %r11               # take first pair match if none in 
second
+       sub             $4, %rax
+       sub             $4, %rdx
+       jae             0b
 
-       test    %r11, %r11
-       cmovnz  %r11, %rax              # take match in current set if any
+.Ltail:        cmp             $-3, %edx               # at least one 
character left to process?
+       jb              .Lnotfound
 
-       add     $4, %rdi
-       sub     $4, %rdx
-       jae     0b
+       cmp             %sil, (%rax)
+       je              1f
 
-1:     cmp     $-3, %edx               # a least one character left to process?
-       jb      2f
+       cmp             $-2, %edx               # at least two characters left 
to process?
+       jb              .Lnotfound
 
-       cmp     %sil, (%rdi)
-       cmove   %rdi, %rax
+       cmp             %sil, -1(%rax)
+       je              2f
 
-       lea     1(%rdi), %rcx
-       cmp     $-2, %edx               # at least two characters left to 
process?
-       jb      2f
+       cmp             $-1, %edx               # at least three characters 
left to process?
+       jb              .Lnotfound
 
-       cmp     %sil, 1(%rdi)
-       cmove   %rcx, %rax
+       cmp             %sil, -2(%rax)
+       je              3f
 
-       lea     2(%rdi), %rcx
-       cmp     $-1, %edx               # at least three character left to 
process?
-       jb      2f
-
-       cmp     %sil, 2(%rdi)
-       cmove   %rcx, %rax
+.Lnotfound:
+       xor             %eax, %eax
+       ret
 
-2:     ret
+       /* match found -- adjust rax to point to matching byte */
+4:     dec             %rax
+3:     dec             %rax
+2:     dec             %rax
+1:     ret
 ARCHEND(memrchr, scalar)
 
 ARCHENTRY(memrchr, baseline)

Reply via email to