On 2023-07-10 07:58, Bruno Haible wrote:

   - The rationale for defining and initializing the mbstate_t at the function
     scope was that on BSD and macOS systems, an mbstate_t is 128 bytes large,

We can improve on that. I installed the attached two performance tweaks; the second tweak cuts that initialization from 128 down to at most 12 bytes on those platforms. On x86-64 GNU/Linux these patches barely make a difference (one insn smaller). On the BSDs the patch is a bigger deal.

Even on the rare untweaked platform I expect (though I haven't measured) that mbcel's speed is competitive.
From d28395ef8d06a3567a9bcb9e968d7c7ba4982396 Mon Sep 17 00:00:00 2001
From: Paul Eggert <egg...@cs.ucla.edu>
Date: Mon, 10 Jul 2023 10:14:10 -0700
Subject: [PATCH 1/2] diff: add mbcel checks, compiler advice

* lib/mbcel.h: Include limits.h, stddef.h.
Add static assertions that MB_LEN_MAX has a sane value,
as the code relies on this.  Help GCC by advising
it that mbrtoc32 never returns a value between
MB_LEN_MAX + 1 and (size_t) -1 / 2 inclusive.
---
 lib/mbcel.h | 12 ++++++++++++
 1 file changed, 12 insertions(+)

diff --git a/lib/mbcel.h b/lib/mbcel.h
index 173153a..0406c00 100644
--- a/lib/mbcel.h
+++ b/lib/mbcel.h
@@ -51,6 +51,8 @@
  #error "Please include config.h first."
 #endif
 
+#include <limits.h>
+#include <stddef.h>
 #include <uchar.h>
 
 /* mbcel_t is a type representing a character CH or an encoding error byte ERR,
@@ -65,6 +67,10 @@ typedef struct
   unsigned char len;
 } mbcel_t;
 
+/* On all known platforms, every multi-byte character length fits in
+   mbcel_t's LEN.  Check this.  */
+static_assert (MB_LEN_MAX <= UCHAR_MAX);
+
 /* Pacify GCC re '*p <= 0x7f' below.  */
 #if defined __GNUC__ && 4 < __GNUC__ + (3 <= __GNUC_MINOR__)
 # pragma GCC diagnostic ignored "-Wtype-limits"
@@ -100,9 +106,15 @@ mbcel_scan (char const *p, char const *lim)
 
   /* Any LEN with top bit set is an encoding error, as LEN == (size_t) -3
      is not supported and MB_LEN_MAX <= (size_t) -1 / 2 on all platforms.  */
+  static_assert (MB_LEN_MAX <= (size_t) -1 / 2);
   if ((size_t) -1 / 2 < len)
     return (mbcel_t) { .err = *p, .len = 1 };
 
+  /* Tell the compiler LEN is at most MB_LEN_MAX,
+     as this can help GCC generate better code.  */
+  if (! (len <= MB_LEN_MAX))
+    unreachable ();
+
   /* A multi-byte character.  LEN must be positive,
      as *P != '\0' and shift sequences are not supported.  */
   return (mbcel_t) { .ch = ch, .len = len };
-- 
2.39.2

From 5d0554f0a1428154641ec51dbb732e30172bc503 Mon Sep 17 00:00:00 2001
From: Paul Eggert <egg...@cs.ucla.edu>
Date: Tue, 11 Jul 2023 00:50:57 -0700
Subject: [PATCH 2/2] diff: tweak mbstate_t performance

* lib/mbcel.h (mbcel_scan): Improve performance when initializing
an mbstate_t.
---
 lib/mbcel.h | 19 ++++++++++++++++++-
 1 file changed, 18 insertions(+), 1 deletion(-)

diff --git a/lib/mbcel.h b/lib/mbcel.h
index 0406c00..00c5fe0 100644
--- a/lib/mbcel.h
+++ b/lib/mbcel.h
@@ -100,8 +100,25 @@ mbcel_scan (char const *p, char const *lim)
   if (0 <= *p && *p <= 0x7f)
     return (mbcel_t) { .ch = *p, .len = 1 };
 
-  char32_t ch;
+  /* An initial mbstate_t; initialization optimized for some platforms.  */
+#if defined __GLIBC__ && 2 < __GLIBC__ + (2 <= __GLIBC_MINOR__)
+  mbstate_t mbs; mbs.__count = 0;
+#elif (defined __FreeBSD__ || defined __DragonFly__ || defined __OpenBSD__ \
+       || (defined __APPLE__ && defined __MACH__))
+  /* Initialize for all encodings: UTF-8, EUC, etc.  */
+  union { mbstate_t m; struct { uchar_t ch; int utf8_want, euc_want; } s; } u;
+  u.s.ch = u.s.utf8_want = u.s.euc_want = 0;
+# define mbs u.m
+#elif defined __NetBSD__
+  union { mbstate_t m; struct _RuneLocale *s; } u;
+  u.s = nullptr;
+# define mbs u.m
+#else
+  /* mbstate_t has unknown structure or is not worth optimizing.  */
   mbstate_t mbs = {0};
+#endif
+
+  char32_t ch;
   size_t len = mbrtoc32 (&ch, p, lim - p, &mbs);
 
   /* Any LEN with top bit set is an encoding error, as LEN == (size_t) -3
-- 
2.39.2

Reply via email to