This is follow-up patches for "crt: add internal function __mingw_mbrtows_cp" which add similar changes for wcrtomb and wcsrtombs functions.
- Kirill Makurin
From bcd1cbf6b65714788442dc0d7c9950b459162df6 Mon Sep 17 00:00:00 2001 From: Kirill Makurin <[email protected]> Date: Sat, 4 Oct 2025 23:24:32 +0900 Subject: [PATCH 1/2] crt: add internal functions __mingw_wcrtomb_cp This function is internally called from wcrtomb and wcsrtombs functions. Previous implementation of wcsrtombs was calling wcsrtomb, which internally was calling ___lc_codepage_func. Implementation of ___lc_codepage_func for crtdll.dll is quite expensive as it parses return value of setlocale(LC_ALL, NULL). Signed-off-by: Kirill Makurin <[email protected]> --- mingw-w64-crt/include/mingw-wchar.h | 1 + mingw-w64-crt/misc/wcrtomb.c | 46 ++++++++++++++++++++++++----- mingw-w64-crt/misc/wcsrtombs.c | 12 +++++++- 3 files changed, 50 insertions(+), 9 deletions(-) diff --git a/mingw-w64-crt/include/mingw-wchar.h b/mingw-w64-crt/include/mingw-wchar.h index 902221083..e372a74ea 100644 --- a/mingw-w64-crt/include/mingw-wchar.h +++ b/mingw-w64-crt/include/mingw-wchar.h @@ -9,4 +9,5 @@ #include <wchar.h> size_t __cdecl __mingw_mbrtowc_cp(wchar_t * __restrict__ _DstCh,const char * __restrict__ _SrcCh,size_t _SizeInBytes,mbstate_t * __restrict__ _State, unsigned _Cp, int _MbCurMax); +size_t __cdecl __mingw_wcrtomb_cp(char * __restrict__ _Dest,wchar_t _Source,mbstate_t * __restrict__ _State, unsigned cp, int mb_cur_max); #endif diff --git a/mingw-w64-crt/misc/wcrtomb.c b/mingw-w64-crt/misc/wcrtomb.c index 552d6bd0a..55f37cbf2 100644 --- a/mingw-w64-crt/misc/wcrtomb.c +++ b/mingw-w64-crt/misc/wcrtomb.c @@ -11,10 +11,32 @@ #define WIN32_LEAN_AND_MEAN #include <windows.h> -size_t wcrtomb ( +#include "mingw-wchar.h" + +/** + * __mingw_wcrtomb_cp is internal implementation for C95 functions wcrtomb and + * wcsrtombs. + * + * In order to perform conversion we need the following information: + * + * - code page used by active locale (which can be a thread locale for + * msvcr80.dll and later); obtained by calling ___lc_codepage_func + * + * - maximum character length in used code page; obtained by calling + * ___mb_cur_max_func + * + * crtdll.dll's ___lc_codepage_func is quite expensive as it obtains this + * information by parsing return value of setlocale(LC_CTYPE, NULL). Using + * __mingw_wcrtomb_cp allows wcsrtombs call both ___lc_codepage_func and + * ___mb_cur_max_func only once. + */ + +size_t __mingw_wcrtomb_cp ( char *__restrict__ mbc, wchar_t wc, - mbstate_t *__restrict__ state + mbstate_t *__restrict__ state, + unsigned cp, + int mb_cur_max ) { /* Set `state` to initial state */ if (mbc == NULL) { @@ -38,12 +60,6 @@ size_t wcrtomb ( return 1; } - /* Code page used by current locale */ - unsigned cp = ___lc_codepage_func (); - - /* Maximum character length in `cp` */ - int mb_cur_max = ___mb_cur_max_func (); - /* Handle "C" locale */ if (cp == 0) { if (wc > 0xFF) { @@ -74,3 +90,17 @@ eilseq: errno = EILSEQ; return (size_t) -1; } + +size_t wcrtomb ( + char *__restrict__ mbc, + wchar_t wc, + mbstate_t *__restrict__ state +) { + /* Code page used by current locale */ + unsigned cp = ___lc_codepage_func (); + + /* Maximum character length in `cp` */ + int mb_cur_max = ___mb_cur_max_func (); + + return __mingw_wcrtomb_cp (mbc, wc, state, cp, mb_cur_max); +} diff --git a/mingw-w64-crt/misc/wcsrtombs.c b/mingw-w64-crt/misc/wcsrtombs.c index 7dfc5e88d..92538d03a 100644 --- a/mingw-w64-crt/misc/wcsrtombs.c +++ b/mingw-w64-crt/misc/wcsrtombs.c @@ -3,9 +3,13 @@ * This file is part of the mingw-w64 runtime package. * No warranty is given; refer to the file DISCLAIMER.PD within this package. */ +#include <locale.h> +#include <stdlib.h> #include <string.h> #include <wchar.h> +#include "mingw-wchar.h" + size_t wcsrtombs ( char *__restrict__ mbs, const wchar_t **__restrict__ wcs, @@ -20,8 +24,14 @@ size_t wcsrtombs ( /* Next wide character to convert */ const wchar_t *wc = *wcs; + /* Code page used by current locale */ + unsigned cp = ___lc_codepage_func (); + + /* Maximum character length in `cp` */ + int mb_cur_max = ___mb_cur_max_func (); + while (1) { - const size_t length = wcrtomb (mbc, *wc, state); + const size_t length = __mingw_wcrtomb_cp (mbc, *wc, state, cp, mb_cur_max); /* Conversion failed */ if (length == (size_t) -1) { -- 2.51.0.windows.1
From cbae6ea51b58b1d350da538a0b422e387cc877bb Mon Sep 17 00:00:00 2001 From: Kirill Makurin <[email protected]> Date: Sat, 4 Oct 2025 23:31:58 +0900 Subject: [PATCH 2/2] crt: move definition of __mingw_wcrtomb_cp to a separate file Signed-off-by: Kirill Makurin <[email protected]> --- mingw-w64-crt/Makefile.am | 1 + mingw-w64-crt/misc/__mingw_wcrtomb_cp.c | 92 +++++++++++++++++++++++++ mingw-w64-crt/misc/wcrtomb.c | 82 ---------------------- 3 files changed, 93 insertions(+), 82 deletions(-) create mode 100644 mingw-w64-crt/misc/__mingw_wcrtomb_cp.c diff --git a/mingw-w64-crt/Makefile.am b/mingw-w64-crt/Makefile.am index cae1bf309..6130ddb83 100644 --- a/mingw-w64-crt/Makefile.am +++ b/mingw-w64-crt/Makefile.am @@ -168,6 +168,7 @@ src_libws2_32=libsrc/ws2_32.c \ # Files included in all libmsvcr*.a src_msvcrt_common=\ misc/__mingw_mbrtowc_cp.c \ + misc/__mingw_wcrtomb_cp.c \ misc/_onexit.c \ misc/mbrlen.c \ misc/mbrtowc.c \ diff --git a/mingw-w64-crt/misc/__mingw_wcrtomb_cp.c b/mingw-w64-crt/misc/__mingw_wcrtomb_cp.c new file mode 100644 index 000000000..2eb4abaed --- /dev/null +++ b/mingw-w64-crt/misc/__mingw_wcrtomb_cp.c @@ -0,0 +1,92 @@ +/** + * This file has no copyright assigned and is placed in the Public Domain. + * This file is part of the mingw-w64 runtime package. + * No warranty is given; refer to the file DISCLAIMER.PD within this package. + */ +#include <errno.h> +#include <stdlib.h> +#include <string.h> +#include <wchar.h> + +#define WIN32_LEAN_AND_MEAN +#include <windows.h> + +#include "mingw-wchar.h" + +/** + * __mingw_wcrtomb_cp is internal implementation for C95 functions wcrtomb and + * wcsrtombs. + * + * In order to perform conversion we need the following information: + * + * - code page used by active locale (which can be a thread locale for + * msvcr80.dll and later); obtained by calling ___lc_codepage_func + * + * - maximum character length in used code page; obtained by calling + * ___mb_cur_max_func + * + * crtdll.dll's ___lc_codepage_func is quite expensive as it obtains this + * information by parsing return value of setlocale(LC_CTYPE, NULL). Using + * __mingw_wcrtomb_cp allows wcsrtombs call both ___lc_codepage_func and + * ___mb_cur_max_func only once. + */ + +size_t __mingw_wcrtomb_cp ( + char *__restrict__ mbc, + wchar_t wc, + mbstate_t *__restrict__ state, + unsigned cp, + int mb_cur_max +) { + /* Set `state` to initial state */ + if (mbc == NULL) { + if (state != NULL) { + *state = 0; + } + return 1; + } + + /* Detect invalid conversion state */ + if (state != NULL && *state) { + errno = EINVAL; + return (size_t) -1; + } + + /* Store terminating L'\0' */ + if (wc == L'\0') { + if (mbc != NULL) { + mbc[0] = '\0'; + } + return 1; + } + + /* Handle "C" locale */ + if (cp == 0) { + if (wc > 0xFF) { + goto eilseq; + } + if (mbc != NULL) { + mbc[0] = (char) wc; + } + return 1; + } + + BOOL defaultCharacterUsed = FALSE; + char buffer[2] = {0, 0}; + + /* For consistency with CRT, we do not use WC_NO_BEST_FIT_CHARS */ + int ret = WideCharToMultiByte ( + cp, 0, &wc, 1, buffer, mb_cur_max, NULL, &defaultCharacterUsed + ); + + if (ret == 0 || ret > mb_cur_max || defaultCharacterUsed) { + goto eilseq; + } + + memcpy (mbc, buffer, ret); + return ret; + +eilseq: + errno = EILSEQ; + return (size_t) -1; +} diff --git a/mingw-w64-crt/misc/wcrtomb.c b/mingw-w64-crt/misc/wcrtomb.c index 55f37cbf2..10c7bbae5 100644 --- a/mingw-w64-crt/misc/wcrtomb.c +++ b/mingw-w64-crt/misc/wcrtomb.c @@ -3,94 +3,12 @@ * This file is part of the mingw-w64 runtime package. * No warranty is given; refer to the file DISCLAIMER.PD within this package. */ -#include <errno.h> #include <locale.h> #include <stdlib.h> #include <wchar.h> -#define WIN32_LEAN_AND_MEAN -#include <windows.h> - #include "mingw-wchar.h" -/** - * __mingw_wcrtomb_cp is internal implementation for C95 functions wcrtomb and - * wcsrtombs. - * - * In order to perform conversion we need the following information: - * - * - code page used by active locale (which can be a thread locale for - * msvcr80.dll and later); obtained by calling ___lc_codepage_func - * - * - maximum character length in used code page; obtained by calling - * ___mb_cur_max_func - * - * crtdll.dll's ___lc_codepage_func is quite expensive as it obtains this - * information by parsing return value of setlocale(LC_CTYPE, NULL). Using - * __mingw_wcrtomb_cp allows wcsrtombs call both ___lc_codepage_func and - * ___mb_cur_max_func only once. - */ - -size_t __mingw_wcrtomb_cp ( - char *__restrict__ mbc, - wchar_t wc, - mbstate_t *__restrict__ state, - unsigned cp, - int mb_cur_max -) { - /* Set `state` to initial state */ - if (mbc == NULL) { - if (state != NULL) { - *state = 0; - } - return 1; - } - - /* Detect invalid conversion state */ - if (state != NULL && *state) { - errno = EINVAL; - return (size_t) -1; - } - - /* Store terminating L'\0' */ - if (wc == L'\0') { - if (mbc != NULL) { - mbc[0] = '\0'; - } - return 1; - } - - /* Handle "C" locale */ - if (cp == 0) { - if (wc > 0xFF) { - goto eilseq; - } - if (mbc != NULL) { - mbc[0] = (char) wc; - } - return 1; - } - - BOOL defaultCharacterUsed = FALSE; - char buffer[2] = {0, 0}; - - /* For consistency with CRT, we do not use WC_NO_BEST_FIT_CHARS */ - int ret = WideCharToMultiByte ( - cp, 0, &wc, 1, buffer, mb_cur_max, NULL, &defaultCharacterUsed - ); - - if (ret == 0 || ret > mb_cur_max || defaultCharacterUsed) { - goto eilseq; - } - - memcpy (mbc, buffer, ret); - return ret; - -eilseq: - errno = EILSEQ; - return (size_t) -1; -} - size_t wcrtomb ( char *__restrict__ mbc, wchar_t wc, -- 2.51.0.windows.1
_______________________________________________ Mingw-w64-public mailing list [email protected] https://lists.sourceforge.net/lists/listinfo/mingw-w64-public
