Hello, I'm adding multibyte character support to the info viewer using the mbiter and mbuiter modules. Those modules currently depend on mbrtowc () and friends.
The mbs* functions in gnulib currently have two separate implementations of most algorithms, one for HAVE_MBRTOWC using mbchar, and one for !HAVE_MBRTOWC. To avoid duplicating the large amount of text-processing code in the info viewer in a similar way, the attached patches add a !HAVE_MBRTOWC implementation of the mbiter, mbuiter and mbchar interfaces. * lib/mbchar.h: #include <stddef.h> for ptrdiff_t. * lib/mbchar.h * lib/mbiter.h * lib/mbuiter.h: Add an implementation for !HAVE_MBRTOWC. * m4/mbchar.m4: Require gl_FUNC_MBRTOWC. * m4/mbiter.m4: Remove an obsolete comment. * modules/mbchar: Include m4/mbrtowc.m4. * modules/mbiter * modules/mbuiter: Don't suggest #if HAVE_MBRTOWC around header #includes. Thanks, Mirek
Index: lib/mbchar.h =================================================================== RCS file: /sources/gnulib/gnulib/lib/mbchar.h,v retrieving revision 1.10 diff -u -r1.10 mbchar.h --- lib/mbchar.h 27 Dec 2006 19:54:25 -0000 1.10 +++ lib/mbchar.h 14 Feb 2007 16:59:55 -0000 @@ -146,8 +146,15 @@ #define _MBCHAR_H 1 #include <stdbool.h> +#include <stddef.h> #include <string.h> +/* Multibyte characters could in principle be handled without mbrtowc (), but + all current users of mbchar (mbfile, mbiter and mbuiter) need mbrtowc (), + so their !HAVE_MBRTOWC fallbacks need a non-multibyte mbchar + implementation. */ +#if HAVE_MBRTOWC + /* Tru64 with Desktop Toolkit C has a bug: <stdio.h> must be included before <wchar.h>. BSD/OS 4.1 has a bug: <stdio.h> and <time.h> must be included before @@ -274,6 +281,110 @@ new_mbc->wc = old_mbc->wc; } +#else /* not HAVE_MBRTOWC */ + +#include <ctype.h> + +struct mbchar +{ + const char *ptr; /* pointer to current character */ + bool c_valid; /* true if c is a valid character */ + unsigned char c; /* if c_valid: the current character */ + char buf[1]; /* room for the character, used for file input only */ +}; + +/* EOF (not a real character) is represented with c_valid = false. */ + +typedef struct mbchar mbchar_t; + +/* Access the current character. */ +#define mb_ptr(mbc) ((mbc).ptr) +#define mb_len(mbc) ((mbc).c_valid ? 1 : 0) + +/* Comparison of characters. */ +#define mb_iseq(mbc, sc) ((mbc).c_valid && (mbc).c == (sc)) +#define mb_isnul(mbc) ((mbc).c_valid && (mbc).c == 0) +#define mb_cmp(mbc1, mbc2) \ + ((mbc1).c_valid \ + ? ((mbc2).c_valid \ + ? (int) (mbc1).c - (int) (mbc2).c \ + : -1) \ + : ((mbc2).c_valid \ + ? 1 \ + : 0)) +#define mb_casecmp(mbc1, mbc2) \ + ((mbc1).c_valid \ + ? ((mbc2).c_valid \ + ? ((int) (unsigned char) tolower ((mbc1).c) \ + - (int) (unsigned char) tolower ((mbc2).c)) \ + : -1) \ + : ((mbc2).c_valid \ + ? 1 \ + : 0)) +#define mb_equal(mbc1, mbc2) \ + ((mbc1).c_valid && (mbc2).c_valid \ + ? (mbc1).c == (mbc2).c \ + : !(mbc1).c_valid && !(mbc2).c_valid) +#define mb_caseequal(mbc1, mbc2) \ + ((mbc1).c_valid && (mbc2).c_valid \ + ? tolower ((mbc1).c) == tolower ((mbc2).c) \ + : !(mbc1).c_valid && !(mbc2).c_valid) + +/* <ctype.h>, <wctype.h> classification. */ +#define mb_isascii(mbc) ((mbc).c_valid && (mbc).c <= 127) +#define mb_isalnum(mbc) ((mbc).c_valid && isalnum ((mbc).c)) +#define mb_isalpha(mbc) ((mbc).c_valid && isalpha ((mbc).c)) +#define mb_isblank(mbc) ((mbc).c_valid && isblank ((mbc).c)) +#define mb_iscntrl(mbc) ((mbc).c_valid && iscntrl ((mbc).c)) +#define mb_isdigit(mbc) ((mbc).c_valid && isdigit ((mbc).c)) +#define mb_isgraph(mbc) ((mbc).c_valid && isgraph ((mbc).c)) +#define mb_islower(mbc) ((mbc).c_valid && islower ((mbc).c)) +#define mb_isprint(mbc) ((mbc).c_valid && isprint ((mbc).c)) +#define mb_ispunct(mbc) ((mbc).c_valid && ispunct ((mbc).c)) +#define mb_isspace(mbc) ((mbc).c_valid && isspace ((mbc).c)) +#define mb_isupper(mbc) ((mbc).c_valid && isupper ((mbc).c)) +#define mb_isxdigit(mbc) ((mbc).c_valid && isxdigit ((mbc).c)) + +/* Extra <wchar.h> function. */ + +/* Unprintable characters appear as a small box of width 1. */ +#define MB_UNPRINTABLE_WIDTH 1 + +static inline int +mb_width_aux (int c) +{ + /* For unprintable characters, arbitrarily return 0 for control characters + and MB_UNPRINTABLE_WIDTH otherwise. */ + return isprint (c) ? 1 : iscntrl (c) ? 0 : MB_UNPRINTABLE_WIDTH; +} + +#define mb_width(mbc) \ + ((mbc).c_valid ? mb_width_aux ((mbc).c) : MB_UNPRINTABLE_WIDTH) + +/* Output. */ +#define mb_putc(mbc, stream) fwrite ((mbc).ptr, 1, mb_len (mbc), (stream)) + +/* Assignment. */ +#define mb_setascii(mbc, sc) \ + ((mbc)->ptr = (mbc)->buf, (mbc)->c_valid = 1, \ + (mbc)->c = (mbc)->buf[0] = (sc)) + +/* Copying a character. */ +static inline void +mb_copy (mbchar_t *new_mbc, const mbchar_t *old_mbc) +{ + if (old_mbc->ptr == &old_mbc->buf[0]) + { + new_mbc->buf[0] = old_mbc->buf[0]; + new_mbc->ptr = &new_mbc->buf[0]; + } + else + new_mbc->ptr = old_mbc->ptr; + if ((new_mbc->c_valid = old_mbc->c_valid)) + new_mbc->c = old_mbc->c; +} + +#endif /* not HAVE_MBRTOWC */ /* is_basic(c) tests whether the single-byte character c is in the ISO C "basic character set". Index: lib/mbiter.h =================================================================== RCS file: /sources/gnulib/gnulib/lib/mbiter.h,v retrieving revision 1.3 diff -u -r1.3 mbiter.h --- lib/mbiter.h 11 Feb 2007 17:17:09 -0000 1.3 +++ lib/mbiter.h 14 Feb 2007 16:59:55 -0000 @@ -87,6 +87,8 @@ #include <stdbool.h> #include <string.h> +#ifdef HAVE_MBRTOWC + /* Tru64 with Desktop Toolkit C has a bug: <stdio.h> must be included before <wchar.h>. BSD/OS 4.1 has a bug: <stdio.h> and <time.h> must be included before @@ -173,13 +175,6 @@ } static inline void -mbiter_multi_reloc (struct mbiter_multi *iter, ptrdiff_t ptrdiff) -{ - iter->cur.ptr += ptrdiff; - iter->limit += ptrdiff; -} - -static inline void mbiter_multi_copy (struct mbiter_multi *new_iter, const struct mbiter_multi *old_iter) { new_iter->limit = old_iter->limit; @@ -202,6 +197,59 @@ #define mbi_advance(iter) \ ((iter).cur.ptr += (iter).cur.bytes, (iter).next_done = false) +#else /* not HAVE_MBRTOWC */ + +#include "mbchar.h" + +struct mbiter_multi +{ + const char *limit; /* pointer to end of string */ + bool next_done; /* true if mbi_avail has already filled the following */ + struct mbchar cur; /* the current character: + const char *cur.ptr pointer to current character + The following are only valid after mbi_avail. + bool cur.c_valid true if c is a valid wide character + unsigned char cur.c if c_valid: the current character + */ +}; + +static inline void +mbiter_multi_next (struct mbiter_multi *iter) +{ + if (iter->next_done) + return; + iter->cur.c = *iter->cur.ptr; + iter->cur.c_valid = true; + iter->next_done = true; +} + +static inline void +mbiter_multi_copy (struct mbiter_multi *new_iter, const struct mbiter_multi *old_iter) +{ + new_iter->limit = old_iter->limit; + new_iter->next_done = old_iter->next_done; + mb_copy (&new_iter->cur, &old_iter->cur); +} + +/* Iteration macros. */ +typedef struct mbiter_multi mbi_iterator_t; +#define mbi_init(iter, startptr, length) \ + ((iter).cur.ptr = (startptr), (iter).limit = (iter).cur.ptr + (length), \ + (iter).next_done = false) +#define mbi_avail(iter) \ + ((iter).cur.ptr < (iter).limit && (mbiter_multi_next (&(iter)), true)) +#define mbi_advance(iter) \ + ((iter).cur.ptr++, (iter).next_done = false) + +#endif /* not HAVE_MBRTOWC */ + +static inline void +mbiter_multi_reloc (struct mbiter_multi *iter, ptrdiff_t ptrdiff) +{ + iter->cur.ptr += ptrdiff; + iter->limit += ptrdiff; +} + /* Access to the current character. */ #define mbi_cur(iter) (iter).cur #define mbi_cur_ptr(iter) (iter).cur.ptr Index: lib/mbuiter.h =================================================================== RCS file: /sources/gnulib/gnulib/lib/mbuiter.h,v retrieving revision 1.2 diff -u -r1.2 mbuiter.h --- lib/mbuiter.h 11 Feb 2007 17:17:09 -0000 1.2 +++ lib/mbuiter.h 14 Feb 2007 16:59:55 -0000 @@ -95,6 +95,8 @@ #include <stdlib.h> #include <string.h> +#ifdef HAVE_MBRTOWC + /* Tru64 with Desktop Toolkit C has a bug: <stdio.h> must be included before <wchar.h>. BSD/OS 4.1 has a bug: <stdio.h> and <time.h> must be included before @@ -182,12 +184,6 @@ } static inline void -mbuiter_multi_reloc (struct mbuiter_multi *iter, ptrdiff_t ptrdiff) -{ - iter->cur.ptr += ptrdiff; -} - -static inline void mbuiter_multi_copy (struct mbuiter_multi *new_iter, const struct mbuiter_multi *old_iter) { if ((new_iter->in_shift = old_iter->in_shift)) @@ -209,6 +205,56 @@ #define mbui_advance(iter) \ ((iter).cur.ptr += (iter).cur.bytes, (iter).next_done = false) +#else /* not HAVE_MBRTOWC */ + +#include "mbchar.h" + +struct mbuiter_multi +{ + bool next_done; /* true if mbui_avail has already filled the following */ + struct mbchar cur; /* the current character: + const char *cur.ptr pointer to current character + The following are only valid after mbui_avail. + bool cur.c_valid true if wc is a valid wide character + unsigned char cur.c if c_valid: the current character + */ +}; + +static inline void +mbuiter_multi_next (struct mbuiter_multi *iter) +{ + if (iter->next_done) + return; + iter->cur.c = *iter->cur.ptr; + iter->cur.c_valid = true; + iter->next_done = true; +} + +static inline void +mbuiter_multi_copy (struct mbuiter_multi *new_iter, const struct mbuiter_multi *old_iter) +{ + new_iter->next_done = old_iter->next_done; + mb_copy (&new_iter->cur, &old_iter->cur); +} + +/* Iteration macros. */ +typedef struct mbuiter_multi mbui_iterator_t; +#define mbui_init(iter, startptr) \ + ((iter).cur.ptr = (startptr), \ + (iter).next_done = false) +#define mbui_avail(iter) \ + (mbuiter_multi_next (&(iter)), !mb_isnul ((iter).cur)) +#define mbui_advance(iter) \ + ((iter).cur.ptr++, (iter).next_done = false) + +#endif /* not HAVE_MBRTOWC */ + +static inline void +mbuiter_multi_reloc (struct mbuiter_multi *iter, ptrdiff_t ptrdiff) +{ + iter->cur.ptr += ptrdiff; +} + /* Access to the current character. */ #define mbui_cur(iter) (iter).cur #define mbui_cur_ptr(iter) (iter).cur.ptr Index: m4/mbchar.m4 =================================================================== RCS file: /sources/gnulib/gnulib/m4/mbchar.m4,v retrieving revision 1.7 diff -u -r1.7 mbchar.m4 --- m4/mbchar.m4 28 Jan 2007 16:00:03 -0000 1.7 +++ m4/mbchar.m4 14 Feb 2007 16:59:55 -0000 @@ -10,4 +10,5 @@ AC_DEFUN([gl_MBCHAR], [ AC_REQUIRE([AC_GNU_SOURCE]) + AC_REQUIRE([gl_FUNC_MBRTOWC]) ]) Index: m4/mbiter.m4 =================================================================== RCS file: /sources/gnulib/gnulib/m4/mbiter.m4,v retrieving revision 1.2 diff -u -r1.2 mbiter.m4 --- m4/mbiter.m4 26 Sep 2005 13:58:51 -0000 1.2 +++ m4/mbiter.m4 14 Feb 2007 16:59:55 -0000 @@ -10,8 +10,6 @@ AC_DEFUN([gl_MBITER], [ AC_REQUIRE([AC_TYPE_MBSTATE_T]) - dnl The following line is that so the user can test HAVE_MBRTOWC before - dnl #include "mbiter.h" or "mbuiter.h". AC_REQUIRE([gl_FUNC_MBRTOWC]) : ]) Index: modules/mbchar =================================================================== RCS file: /sources/gnulib/gnulib/modules/mbchar,v retrieving revision 1.10 diff -u -r1.10 mbchar --- modules/mbchar 28 Jan 2007 16:00:02 -0000 1.10 +++ modules/mbchar 14 Feb 2007 16:59:55 -0000 @@ -5,6 +5,7 @@ lib/mbchar.h lib/mbchar.c m4/mbchar.m4 +m4/mbrtowc.m4 Depends-on: stdbool Index: modules/mbiter =================================================================== RCS file: /sources/gnulib/gnulib/modules/mbiter,v retrieving revision 1.2 diff -u -r1.2 mbiter --- modules/mbiter 26 Sep 2005 13:58:51 -0000 1.2 +++ modules/mbiter 14 Feb 2007 16:59:55 -0000 @@ -17,9 +17,7 @@ lib_SOURCES += mbiter.h Include: -#if HAVE_MBRTOWC #include "mbiter.h" -#endif License: LGPL Index: modules/mbuiter =================================================================== RCS file: /sources/gnulib/gnulib/modules/mbuiter,v retrieving revision 1.2 diff -u -r1.2 mbuiter --- modules/mbuiter 26 Sep 2005 13:58:51 -0000 1.2 +++ modules/mbuiter 14 Feb 2007 16:59:55 -0000 @@ -18,9 +18,7 @@ lib_SOURCES += mbuiter.h Include: -#if HAVE_MBRTOWC #include "mbuiter.h" -#endif License: LGPL