Hello FreeBSD devs,
I have found a performance improvement to libc's strnlen().
lib/libc/string/strnlen.c is a trivial byte-by-byte implementation,
where strlen.c has a smarter word-by-word implementation. I have
implemented strnlen similarly to strlen. It runs about 4x as fast, at
the cost of a binary codesize increase from 30 bytes to 221.
In writing this I needed a test, and there isn't one in
tools/regression/lib/libc/string, so I wrote a unit test for strnlen.
This really only makes sense for a word-by-word implementation, as it
tests each combination of string and limit length, overread characters,
and starting alignment.
Could someone please review these patches? I am not very experienced in
C, and am even less experienced with FreeBSD's style guidelines, so they
likely have a bunch of style and idiom issues. Even if one or both of
them prove not worth committing, it would still be useful for my
learning.
If/When these patches prove worthy of submitting, what is the next step
after that? Should I submit a PR, or is there some other process? This
code is quite similar to the existing strlen.c, and doesn't do anything
fancy with e.g. endianness, but I haven't tested it for correctness or
performance on anything other than x86...
And finally, there is some other low-hanging fruit in the other strn*
functions. Would it be worth it for me to give those the same treatment?
Thanks,
Lee Thomas
Test platform:
$uname -a
FreeBSD LeeDesktop 9.1-STABLE FreeBSD 9.1-STABLE #15 r250831: Mon May
20 17:31:28 EDT 2013
lthomas@LeeDesktop:/usr/obj/usr/src/sys/NOSOUND amd64
$dmesg | grep CPU:
CPU: Intel(R) Xeon(R) CPU X5650 @ 2.67GHz (2666.81-MHz
K8-class CPU)
$clang --version
FreeBSD clang version 3.2 (tags/RELEASE_32/final 170710)
20121221
Target: x86_64-unknown-freebsd9.1
Thread model: posix
My timing test was a file of 10000 strings, of uniformly random length,
50% between 0 and 20 bytes long, and 50% between 21 and 1000 bytes long.
Each was filled with random generated bytes in the range [1, 255]. The
test executables run their strlen on each string in the file 1000 times,
and a shell script ran the test programs alternately 100 times. Here are
the clang results; gcc results are roughly the same. I will share the
actual timing code if someone wants it, but it is pretty ugly C++ and
shell and I don't guarantee it working :-).
Results:
x ./times_bsd_strnlen.txt
+ ./times_my_strnlen.txt
+--------------------------------------------------------------------------+
|+
x|
|+
x|
|+
x|
|+
x|
|+
x|
|+
x|
|+
x|
|+
x|
|+
x|
|+
x|
|+
x|
|+
x|
|+
x|
|+
x|
|+
x|
|+
x|
|+
x|
|A
A|
+--------------------------------------------------------------------------+
N Min Max Median Avg
Stddev
x 101 1.8685486 1.8693889 1.8687506 1.8687894
0.0001484903
+ 101 0.42704683 0.42779486 0.42712813 0.42715597
0.00010681494
Difference at 95.0% confidence
-1.44163 +/- 3.56739e-05
-77.1426% +/- 0.00190893%
(Student's t, pooled s = 0.000129342)
Note: I manually shortened the histogram, as it was 100 lines long of
exactly the same.
Index: strnlen.c
===================================================================
diff --git a/head/lib/libc/string/strnlen.c b/head/lib/libc/string/strnlen.c
--- a/head/lib/libc/string/strnlen.c (revision 250951)
+++ b/head/lib/libc/string/strnlen.c (working copy)
@@ -1,5 +1,6 @@
/*-
- * Copyright (c) 2009 David Schultz <d...@freebsd.org>
+ * Copyright (c) 2009, 2010 Xin LI <delp...@freebsd.org>
+ * Copyright (c) 2013 Lee Thomas <lee_tho...@aslantools.com>
* All rights reserved.
*
* Redistribution and use in source and binary forms, with or without
@@ -27,16 +28,91 @@
#include <sys/cdefs.h>
__FBSDID("$FreeBSD$");
+#include <sys/limits.h>
+#include <sys/types.h>
#include <string.h>
+/*
+ * Portable strnlen() for 32-bit and 64-bit systems.
+ *
+ * Rationale: it is generally much more efficient to do word length
+ * operations and avoid branches on modern computer systems, as
+ * compared to byte-length operations with a lot of branches.
+ *
+ * The expression:
+ *
+ * ((x - 0x01....01) & ~x & 0x80....80)
+ *
+ * would evaluate to a non-zero value iff any of the bytes in the
+ * original word is zero.
+ *
+ * On multi-issue processors, we can divide the above expression into:
+ * a) (x - 0x01....01)
+ * b) (~x & 0x80....80)
+ * c) a & b
+ *
+ * Where, a) and b) can be partially computed in parallel.
+ *
+ * The algorithm above is found on "Hacker's Delight" by
+ * Henry S. Warren, Jr.
+ */
+
+/* Magic numbers for the algorithm */
+#if LONG_BIT == 32
+static const unsigned long mask01 = 0x01010101;
+static const unsigned long mask80 = 0x80808080;
+#elif LONG_BIT == 64
+static const unsigned long mask01 = 0x0101010101010101;
+static const unsigned long mask80 = 0x8080808080808080;
+#else
+#error Unsupported word size
+#endif
+
+#define LONGPTR_MASK (sizeof(long) - 1)
+
size_t
-strnlen(const char *s, size_t maxlen)
+strnlen(const char *str, size_t maxlen)
{
- size_t len;
+ const char *stop, *short_stop;
+ const char *p;
+ const unsigned long *lp;
+ long va, vb;
- for (len = 0; len < maxlen; len++, s++) {
- if (!*s)
- break;
+ if (maxlen==0) return 0;
+
+ stop=str+maxlen;
+ /*
+ * Before trying the hard (unaligned byte-by-byte access) way
+ * to figure out whether there is a nul character, try to see
+ * if there is a nul character is within this accessible word
+ * first.
+ *
+ * p and (p & ~LONGPTR_MASK) must be equally accessible since
+ * they always fall in the same memory page, as long as page
+ * boundaries is integral multiple of word size.
+ */
+ lp = (const unsigned long *)((uintptr_t)str & ~LONGPTR_MASK);
+ va = (*lp - mask01);
+ vb = ((~*lp) & mask80);
+ lp++;
+ if (va & vb) {
+ /* Check if we have \0 in the first part */
+ short_stop=(const char *)lp;
+ if (stop<short_stop) short_stop=stop;
+ for (p=str; p != short_stop; p++)
+ if (*p == '\0')
+ return (p-str);
}
- return (len);
+ /* Scan the rest of the string using word sized operation */
+ for (; (const char *)lp < stop; lp++) {
+ va = (*lp - mask01);
+ vb = ((~*lp) & mask80);
+ if (va & vb) {
+ for (p=(const char *)lp; p != stop; p++)
+ if (*p == '\0')
+ break;
+ return (p-str);
+ }
+ }
+ return (maxlen);
}
Index: Makefile
===================================================================
diff --git a/head/tools/regression/lib/libc/string/Makefile
b/head/tools/regression/lib/libc/string/Makefile
--- a/head/tools/regression/lib/libc/string/Makefile (revision 250951)
+++ b/head/tools/regression/lib/libc/string/Makefile (working copy)
@@ -4,7 +4,7 @@
LDFLAGS+= -L/usr/local/lib
LDLIBS= -ltap
-TESTS= test-stpncpy test-strerror test-wcscasecmp test-wcsnlen
+TESTS= test-stpncpy test-strerror test-strnlen test-wcscasecmp test-wcsnlen
.PHONY: tests
tests: ${TESTS}
Index: test-strnlen.c
===================================================================
diff --git a/head/tools/regression/lib/libc/string/test-strnlen.c
b/head/tools/regression/lib/libc/string/test-strnlen.c
new file mode 10644
--- /dev/null (revision 0)
+++ b/head/tools/regression/lib/libc/string/test-strnlen.c (working copy)
@@ -0,0 +1,83 @@
+/*-
+ * Copyright (c) 2013 Lee Thomas <lee_tho...@aslantools.com>
+ * All rights reserved.
+ *
+ * Redistribution and use in source and binary forms, with or without
+ * modification, are permitted provided that the following conditions
+ * are met:
+ * 1. Redistributions of source code must retain the above copyright
+ * notice, this list of conditions and the following disclaimer.
+ * 2. Redistributions in binary form must reproduce the above copyright
+ * notice, this list of conditions and the following disclaimer in the
+ * documentation and/or other materials provided with the distribution.
+ *
+ * THIS SOFTWARE IS PROVIDED BY THE AUTHOR AND CONTRIBUTORS ``AS IS'' AND
+ * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
+ * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
+ * ARE DISCLAIMED. IN NO EVENT SHALL THE AUTHOR OR CONTRIBUTORS BE LIABLE
+ * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
+ * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
+ * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
+ * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
+ * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
+ * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
+ * SUCH DAMAGE.
+ */
+#include <sys/cdefs.h>
+__FBSDID("$FreeBSD$");
+
+#include <stdio.h>
+#include <stdlib.h>
+#include <string.h>
+
+#define MAX_LEN (sizeof(long) * 2)
+static void test_strnlen(char junk_fill, size_t str_len, size_t al);
+
+/*
+ * Tests strnlen for various lengths, buffer restrictions, and alignments.
+ * Since strnlen potentially reads (and branches) on all of the bytes in the
+ * aligned words containing the start and end of the unaligned string, we
+ * need to test both zero and nonzero values of those read-but-disregarded
bytes.
+ * Note that the reads themselves are safe because the page size is an integer
+ * multiple of the wordsize.
+ */
+int
+main(int argc, char **argv)
+{
+ size_t al, str_len, runs;
+ char junk_fill;
+ printf("1..1\n");
+ for (junk_fill = 0; junk_fill <= 1; junk_fill++) {
+ for (str_len = 0; str_len < MAX_LEN; str_len++) {
+ for (al = sizeof(long); al < 2 * sizeof(long); al++) {
+ test_strnlen(junk_fill, str_len, al);
+ }
+ }
+ }
+ printf("ok 1 - strnlen\n");
+ return 0;
+}
+static void
+test_strnlen(char junk_fill, size_t str_len, size_t al)
+{
+ /*
+ * Required size is 2*sizeof(long) junk, MAX_LEN chars, 1 nul, and
+ * then sizeof(long) junk
+ */
+ char buf[MAX_LEN + 1 + 3 * sizeof(long)];
+ char * str;
+ size_t limit_len, correct_result;
+
+ str = buf + al;
+ memset(buf, junk_fill, al);
+ memset(str, 'a', str_len);
+ str[str_len] = '\0';
+ memset(str + str_len + 1, junk_fill, sizeof(long));
+ for (limit_len = 0; limit_len < MAX_LEN + 2; limit_len++) {
+ correct_result = limit_len < str_len ? limit_len : str_len;
+ if (strnlen(str, limit_len) != correct_result) {
+ printf("not ok - strnlen\n");
+ exit(1);
+ }
+ }
+}
Property changes on: head/tools/regression/lib/libc/string/test-strnlen.c
___________________________________________________________________
Added: svn:mime-type
## -0,0 +1 ##
+text/plain
\ No newline at end of property
Added: svn:keywords
## -0,0 +1 ##
+FreeBSD=%H
\ No newline at end of property
Added: svn:eol-style
## -0,0 +1 ##
+native
\ No newline at end of property
_______________________________________________
freebsd-hackers@freebsd.org mailing list
http://lists.freebsd.org/mailman/listinfo/freebsd-hackers
To unsubscribe, send any mail to "freebsd-hackers-unsubscr...@freebsd.org"