On 2023-07-10 07:58, Bruno Haible wrote:
- The rationale for defining and initializing the mbstate_t at the function
scope was that on BSD and macOS systems, an mbstate_t is 128 bytes large,
We can improve on that. I installed the attached two performance tweaks;
the second tweak cuts that initialization from 128 down to at most 12
bytes on those platforms. On x86-64 GNU/Linux these patches barely make
a difference (one insn smaller). On the BSDs the patch is a bigger deal.
Even on the rare untweaked platform I expect (though I haven't measured)
that mbcel's speed is competitive.From d28395ef8d06a3567a9bcb9e968d7c7ba4982396 Mon Sep 17 00:00:00 2001
From: Paul Eggert <egg...@cs.ucla.edu>
Date: Mon, 10 Jul 2023 10:14:10 -0700
Subject: [PATCH 1/2] diff: add mbcel checks, compiler advice
* lib/mbcel.h: Include limits.h, stddef.h.
Add static assertions that MB_LEN_MAX has a sane value,
as the code relies on this. Help GCC by advising
it that mbrtoc32 never returns a value between
MB_LEN_MAX + 1 and (size_t) -1 / 2 inclusive.
---
lib/mbcel.h | 12 ++++++++++++
1 file changed, 12 insertions(+)
diff --git a/lib/mbcel.h b/lib/mbcel.h
index 173153a..0406c00 100644
--- a/lib/mbcel.h
+++ b/lib/mbcel.h
@@ -51,6 +51,8 @@
#error "Please include config.h first."
#endif
+#include <limits.h>
+#include <stddef.h>
#include <uchar.h>
/* mbcel_t is a type representing a character CH or an encoding error byte ERR,
@@ -65,6 +67,10 @@ typedef struct
unsigned char len;
} mbcel_t;
+/* On all known platforms, every multi-byte character length fits in
+ mbcel_t's LEN. Check this. */
+static_assert (MB_LEN_MAX <= UCHAR_MAX);
+
/* Pacify GCC re '*p <= 0x7f' below. */
#if defined __GNUC__ && 4 < __GNUC__ + (3 <= __GNUC_MINOR__)
# pragma GCC diagnostic ignored "-Wtype-limits"
@@ -100,9 +106,15 @@ mbcel_scan (char const *p, char const *lim)
/* Any LEN with top bit set is an encoding error, as LEN == (size_t) -3
is not supported and MB_LEN_MAX <= (size_t) -1 / 2 on all platforms. */
+ static_assert (MB_LEN_MAX <= (size_t) -1 / 2);
if ((size_t) -1 / 2 < len)
return (mbcel_t) { .err = *p, .len = 1 };
+ /* Tell the compiler LEN is at most MB_LEN_MAX,
+ as this can help GCC generate better code. */
+ if (! (len <= MB_LEN_MAX))
+ unreachable ();
+
/* A multi-byte character. LEN must be positive,
as *P != '\0' and shift sequences are not supported. */
return (mbcel_t) { .ch = ch, .len = len };
--
2.39.2
From 5d0554f0a1428154641ec51dbb732e30172bc503 Mon Sep 17 00:00:00 2001
From: Paul Eggert <egg...@cs.ucla.edu>
Date: Tue, 11 Jul 2023 00:50:57 -0700
Subject: [PATCH 2/2] diff: tweak mbstate_t performance
* lib/mbcel.h (mbcel_scan): Improve performance when initializing
an mbstate_t.
---
lib/mbcel.h | 19 ++++++++++++++++++-
1 file changed, 18 insertions(+), 1 deletion(-)
diff --git a/lib/mbcel.h b/lib/mbcel.h
index 0406c00..00c5fe0 100644
--- a/lib/mbcel.h
+++ b/lib/mbcel.h
@@ -100,8 +100,25 @@ mbcel_scan (char const *p, char const *lim)
if (0 <= *p && *p <= 0x7f)
return (mbcel_t) { .ch = *p, .len = 1 };
- char32_t ch;
+ /* An initial mbstate_t; initialization optimized for some platforms. */
+#if defined __GLIBC__ && 2 < __GLIBC__ + (2 <= __GLIBC_MINOR__)
+ mbstate_t mbs; mbs.__count = 0;
+#elif (defined __FreeBSD__ || defined __DragonFly__ || defined __OpenBSD__ \
+ || (defined __APPLE__ && defined __MACH__))
+ /* Initialize for all encodings: UTF-8, EUC, etc. */
+ union { mbstate_t m; struct { uchar_t ch; int utf8_want, euc_want; } s; } u;
+ u.s.ch = u.s.utf8_want = u.s.euc_want = 0;
+# define mbs u.m
+#elif defined __NetBSD__
+ union { mbstate_t m; struct _RuneLocale *s; } u;
+ u.s = nullptr;
+# define mbs u.m
+#else
+ /* mbstate_t has unknown structure or is not worth optimizing. */
mbstate_t mbs = {0};
+#endif
+
+ char32_t ch;
size_t len = mbrtoc32 (&ch, p, lim - p, &mbs);
/* Any LEN with top bit set is an encoding error, as LEN == (size_t) -3
--
2.39.2