Jim Meyering wrote: > Jim Meyering wrote: >> In comparing regcomp.c from gnulib with the one in glibc, >> I found numerous important differences: >> >> - one bug that's been fixed in gnulib for 5 years, yet not in glibc. >> I reported it here: >> http://sourceware.org/bugzilla/show_bug.cgi?id=11127 >> >> - numerous bugs fixed in glibc but not in gnulib: >> regcomp: sync from glibc; always use nl_langinfo > ... > > I nearly forgot to ask for opinions on this first change, in particular. > nl_langinfo is new, but I'll be sure to test this before pushing anything. > >> Subject: [PATCH 1/6] regcomp: sync from glibc; always use nl_langinfo > ... >> + regcomp: sync from glibc; always use nl_langinfo >> + * lib/regcomp.c (init_dfa) [!LIBC]: Always use nl_langinfo (CODESET), >> + now that gnulib provides it. Recognize UTF8 as well as UTF-8. >> + * modules/regex (Depends-on): Add nl_langinfo. > ...
One tweak required: include <langinfo.h> unconditionally: >From 0cfc3b87f0c3be63db1075ed465443c4b3c4cec2 Mon Sep 17 00:00:00 2001 From: Jim Meyering <meyer...@redhat.com> Date: Mon, 4 Jan 2010 09:07:52 +0100 Subject: [PATCH 1/7] regcomp: sync from glibc; always use nl_langinfo * lib/regcomp.c (init_dfa) [!LIBC]: Always use nl_langinfo (CODESET), now that gnulib provides it. Recognize UTF8 as well as UTF-8. * lib/regex_internal.h: Always include <langinfo.h>, now. * modules/regex (Depends-on): Add nl_langinfo. --- ChangeLog | 7 +++++++ lib/regcomp.c | 7 ++++++- lib/regex_internal.h | 5 ++--- modules/regex | 1 + 4 files changed, 16 insertions(+), 4 deletions(-) diff --git a/ChangeLog b/ChangeLog index 272c77f..a69a14c 100644 --- a/ChangeLog +++ b/ChangeLog @@ -1,3 +1,10 @@ +2010-01-04 Jim Meyering <meyer...@redhat.com> + + regcomp: sync from glibc; always use nl_langinfo + * lib/regcomp.c (init_dfa) [!LIBC]: Always use nl_langinfo (CODESET), + now that gnulib provides it. Recognize UTF8 as well as UTF-8. + * modules/regex (Depends-on): Add nl_langinfo. + 2010-01-04 Eric Blake <e...@byu.net> fdopendir: fix configure test diff --git a/lib/regcomp.c b/lib/regcomp.c index 9745bea..3f21722 100644 --- a/lib/regcomp.c +++ b/lib/regcomp.c @@ -850,6 +850,9 @@ static reg_errcode_t init_dfa (re_dfa_t *dfa, size_t pat_len) { __re_size_t table_size; +#ifndef _LIBC + char *codeset_name; +#endif #ifdef RE_ENABLE_I18N size_t max_i18n_object_size = MAX (sizeof (wchar_t), sizeof (wctype_t)); #else @@ -893,7 +896,9 @@ init_dfa (re_dfa_t *dfa, size_t pat_len) dfa->map_notascii = (_NL_CURRENT_WORD (LC_CTYPE, _NL_CTYPE_MAP_TO_NONASCII) != 0); #else - if (strcmp (locale_charset (), "UTF-8") == 0) + codeset_name = nl_langinfo (CODESET); + if (strcasecmp (codeset_name, "UTF-8") == 0 + || strcasecmp (codeset_name, "UTF8") == 0) dfa->is_utf8 = 1; /* We check exhaustively in the loop below if this charset is a diff --git a/lib/regex_internal.h b/lib/regex_internal.h index 1f39507..f5c3125 100644 --- a/lib/regex_internal.h +++ b/lib/regex_internal.h @@ -28,9 +28,8 @@ #include <stdlib.h> #include <string.h> -#ifdef _LIBC -# include <langinfo.h> -#else +#include <langinfo.h> +#ifndef _LIBC # include "localcharset.h" #endif #if defined HAVE_LOCALE_H || defined _LIBC diff --git a/modules/regex b/modules/regex index c6a1235..f516406 100644 --- a/modules/regex +++ b/modules/regex @@ -22,6 +22,7 @@ memcmp memmove mbrtowc mbsinit +nl_langinfo stdbool stdint ssize_t -- 1.6.6.384.g14e6a