Hi.

Here is my proposed patch for merging the byte to w.c. caches in gawk
by using the one in dfa.

I renamed the one in dfa to 'btowc_cache' since it caches bytes,
not multibyte characters.   This compiles and gets through the test
suite.

I also changed the check for the return of mbrtowc since it returns
unsigned.

Thanks,

Arnold
diff --git a/awk.h b/awk.h
index 86c8883..636be96 100644
--- a/awk.h
+++ b/awk.h
@@ -1591,10 +1591,6 @@ extern const wchar_t *wcasestrstr(const wchar_t *haystack, size_t hs_len,
 		const wchar_t *needle, size_t needle_len);
 extern void r_free_wstr(NODE *n);
 #define free_wstr(n)	do { if ((n)->flags & WSTRCUR) r_free_wstr(n); } while(0)
-extern wint_t btowc_cache[];
-#define btowc_cache(x) btowc_cache[(x)&0xFF]
-extern void init_btowc_cache();
-#define is_valid_character(b)	(btowc_cache[(b)&0xFF] != WEOF)
 /* re.c */
 extern Regexp *make_regexp(const char *s, size_t len, bool ignorecase, bool dfa, bool canfatal);
 extern int research(Regexp *rp, char *str, int start, size_t len, int flags);
diff --git a/dfa.c b/dfa.c
index fff4599..a2c73b1 100644
--- a/dfa.c
+++ b/dfa.c
@@ -464,10 +464,10 @@ static void regexp (void);
 /* A table indexed by byte values that contains the corresponding wide
    character (if any) for that byte.  WEOF means the byte is not a
    valid single-byte character.  */
-static wint_t mbrtowc_cache[NOTCHAR];
+wint_t btowc_cache[NOTCHAR];
 
 /* Store into *PWC the result of converting the leading bytes of the
-   multibyte buffer S of length N bytes, using the mbrtowc_cache in *D
+   multibyte buffer S of length N bytes, using the btowc_cache in *D
    and updating the conversion state in *D.  On conversion error,
    convert just a single byte, to WEOF.  Return the number of bytes
    converted.
@@ -476,7 +476,7 @@ static wint_t mbrtowc_cache[NOTCHAR];
 
    * PWC points to wint_t, not to wchar_t.
    * The last arg is a dfa *D instead of merely a multibyte conversion
-     state D->mbs.  D also contains an mbrtowc_cache for speed.
+     state D->mbs.  D also contains an btowc_cache for speed.
    * N must be at least 1.
    * S[N - 1] must be a sentinel byte.
    * Shift encodings are not supported.
@@ -487,7 +487,7 @@ static size_t
 mbs_to_wchar (wint_t *pwc, char const *s, size_t n, struct dfa *d)
 {
   unsigned char uc = s[0];
-  wint_t wc = mbrtowc_cache[uc];
+  wint_t wc = btowc_cache[uc];
 
   if (wc == WEOF)
     {
@@ -695,7 +695,7 @@ static charclass newline;
 static bool
 unibyte_word_constituent (unsigned char c)
 {
-  return mbrtowc_cache[c] != WEOF && (isalnum (c) || (c) == '_');
+  return btowc_cache[c] != WEOF && (isalnum (c) || (c) == '_');
 }
 
 static int
@@ -718,25 +718,44 @@ wchar_context (wint_t wc)
   return CTX_NONE;
 }
 
+void init_btowc_cache(void)
+{
+  static bool inited = false;
+  int i;
+
+  if (inited)
+    return;
+
+  for (i = CHAR_MIN; i <= CHAR_MAX; ++i)
+    {
+      char c = i;
+      unsigned char uc = i;
+      mbstate_t s = { 0 };
+      wchar_t wc;
+      size_t ret = mbrtowc (&wc, &c, 1, &s);
+      btowc_cache[uc] = (ret == (size_t)-1 || ret == (size_t) -2) ? WEOF : wc;
+    }
+
+  inited = true;
+}
+
 /* Entry point to set syntax options.  */
 void
 dfasyntax (reg_syntax_t bits, int fold, unsigned char eol)
 {
   int i;
+
   syntax_bits_set = 1;
   syntax_bits = bits;
   case_fold = fold != 0;
   eolbyte = eol;
+  init_btowc_cache();
 
+  /* Now that btowc_cache[uc] is set, use it to calculate sbit.  */
   for (i = CHAR_MIN; i <= CHAR_MAX; ++i)
     {
-      char c = i;
       unsigned char uc = i;
-      mbstate_t s = { 0 };
-      wchar_t wc;
-      mbrtowc_cache[uc] = mbrtowc (&wc, &c, 1, &s) <= 1 ? wc : WEOF;
 
-      /* Now that mbrtowc_cache[uc] is set, use it to calculate sbit.  */
       sbit[uc] = char_context (uc);
       switch (sbit[uc])
         {
diff --git a/dfa.h b/dfa.h
index 18be7f5..f2dd656 100644
--- a/dfa.h
+++ b/dfa.h
@@ -120,4 +120,15 @@ extern void dfawarn (const char *);
    The user must supply a dfaerror.  */
 extern _Noreturn void dfaerror (const char *);
 
+/* General support routines. */
+
+/* using_utf8() lets us know if our locale is one based on UTF-8.  */
 extern int using_utf8 (void);
+
+/* init_mbcache() initializes the cache that maps bytes to m.b. characters.  */
+extern void init_btowc_cache(void);
+
+/* is_valid_character() tells us if a byte is also a valid m.b. character. */
+extern wint_t btowc_cache[];
+#define is_valid_character(byte)  (btowc_cache[(byte)&0xFF] != WEOF)
+#define btowc_cache(x) btowc_cache[(x)&0xFF]
diff --git a/node.c b/node.c
index a7c19db..22119d2 100644
--- a/node.c
+++ b/node.c
@@ -949,19 +949,6 @@ get_ieee_magic_val(const char *val)
 	return v;
 }
 
-wint_t btowc_cache[256];
-
-/* init_btowc_cache --- initialize the cache */
-
-void init_btowc_cache()
-{
-	int i;
-
-	for (i = 0; i < 255; i++) {
-		btowc_cache[i] = btowc(i);
-	}
-}
-
 #define BLOCKCHUNK 100
 
 BLOCK nextfree[BLOCK_MAX] = {

Reply via email to