Current implementation of mbrtowc incorrectly handles situations when its first or second argument is NULL. See POSIX specification[1].
When first argument is NULL it must behave as usual, except that it produces no output. Currently, it does not update conversion state in this case. When second argument is NULL, it must act as if was called as mbrtowc (NULL, "", 1, state). Currently it simply puts `state` to initial conversion state. Fourth patch in this series makes detection of invalid conversion state (optional POSIX feature) more robust. This may be a little overkill, but I see no harm in doing so. Pali, remember there previously was mbrtowc_cp? I think we could return it. crtdll.dll's ___lc_codepage_func seems to do the trick with strchr (setlocale (LC_CTYPE, NULL), '.') and atoi to convert code page to int. I find it unnecessary expensive to do on each call to mbrtowc from mbsrtowcs. - Kirill Makurin [1] https://pubs.opengroup.org/onlinepubs/9799919799/functions/mbrtowc.htm
From d1d0bc3442b3f91b4a12e42e2ab3d84409052bfd Mon Sep 17 00:00:00 2001 From: Kirill Makurin <[email protected]> Date: Wed, 24 Sep 2025 20:31:29 +0900 Subject: [PATCH 1/4] crt: correctly handle NULL arguments to mbrtowc When first argument to mbrtowc is NULL, mbrtowc must behave as usual except that it produces no output. Before this commit, it would not update conversion state. When second argument to mbrtowc is NULL, it must behave if it has been called as mbrtowc (NULL, "", 1, state). Before this commit it would simply set `state` to initial conversion state. Signed-off-by: Kirill Makurin <[email protected]> --- mingw-w64-crt/misc/mbrtowc.c | 20 +++++++++++++++----- mingw-w64-crt/testcases/t_mbrlen.c | 9 ++------- mingw-w64-crt/testcases/t_mbrtowc.c | 11 ++--------- 3 files changed, 19 insertions(+), 21 deletions(-) diff --git a/mingw-w64-crt/misc/mbrtowc.c b/mingw-w64-crt/misc/mbrtowc.c index ba0b9c09e..3c079aaa9 100644 --- a/mingw-w64-crt/misc/mbrtowc.c +++ b/mingw-w64-crt/misc/mbrtowc.c @@ -24,10 +24,15 @@ size_t mbrtowc ( state = &state_mbrtowc; } - /* Set `state` to initial state */ + /** + * Calling mbrtowc (..., NULL, ..., state) is equivalent to + * + * mbrtowc (NULL, "", 1, state) + */ if (mbs == NULL) { - *state = 0; - return 0; + wc = NULL; + mbs = ""; + count = 1; } /* Detect invalid conversion state */ @@ -96,8 +101,11 @@ size_t mbrtowc ( if (conversion_state.bytes[0] == '\0') { if (wc != NULL) { *wc = L'\0'; - *state = 0; } + + /* Set `state` to initial conversion state */ + *state = 0; + return 0; } @@ -119,9 +127,11 @@ size_t mbrtowc ( if (wc != NULL) { *wc = wcOut; - *state = 0; } + /* Set `state` to initial conversion state */ + *state = 0; + return bytes_consumed; eilseq: diff --git a/mingw-w64-crt/testcases/t_mbrlen.c b/mingw-w64-crt/testcases/t_mbrlen.c index 4b1c46ce9..bd7b19635 100644 --- a/mingw-w64-crt/testcases/t_mbrlen.c +++ b/mingw-w64-crt/testcases/t_mbrlen.c @@ -72,13 +72,8 @@ int main (void) { // reset errno _set_errno (0); - /** - * Set conversion state to initial state - */ - - assert (mbrlen (NULL, 0, &state) == 0); - assert (mbsinit (&state)); - assert (errno == 0); + // reset `state` + set_conversion_state (&state, 0); /** * Test SBCS code page diff --git a/mingw-w64-crt/testcases/t_mbrtowc.c b/mingw-w64-crt/testcases/t_mbrtowc.c index 4d97b9095..d13a19465 100644 --- a/mingw-w64-crt/testcases/t_mbrtowc.c +++ b/mingw-w64-crt/testcases/t_mbrtowc.c @@ -76,15 +76,8 @@ int main (void) { // reset errno _set_errno (0); - /** - * Set conversion state to initial state - */ - wc = WEOF; - - assert (mbrtowc (&wc, NULL, 0, &state) == 0); - assert (wc == WEOF); - assert (mbsinit (&state)); - assert (errno == 0); + // reset `state` + set_conversion_state (&state, 0); /** * Test SBCS code page -- 2.51.0.windows.1
From 70c3eb86b69858998b796f85d00e9df5caead8c7 Mon Sep 17 00:00:00 2001 From: Kirill Makurin <[email protected]> Date: Wed, 24 Sep 2025 20:35:44 +0900 Subject: [PATCH 2/4] crt: call mbrtowc with NULL first argument from mbrlen mbrlen (mbs, count, state) is equivalent to mbrtowc (NULL, mbs, count, state), except that it uses its own private mbstate_t object if `state` is NULL. Signed-off-by: Kirill Makurin <[email protected]> --- mingw-w64-crt/misc/mbrlen.c | 3 +-- 1 file changed, 1 insertion(+), 2 deletions(-) diff --git a/mingw-w64-crt/misc/mbrlen.c b/mingw-w64-crt/misc/mbrlen.c index 19afda540..7b57f5753 100644 --- a/mingw-w64-crt/misc/mbrlen.c +++ b/mingw-w64-crt/misc/mbrlen.c @@ -15,6 +15,5 @@ size_t mbrlen ( static mbstate_t state_mbrlen = {0}; state = &state_mbrlen; } - wchar_t wc = WEOF; - return mbrtowc (&wc, mbs, count, state); + return mbrtowc (NULL, mbs, count, state); } -- 2.51.0.windows.1
From cd93573020dd78b52a109b586b9ac48ac653c974 Mon Sep 17 00:00:00 2001 From: Kirill Makurin <[email protected]> Date: Wed, 24 Sep 2025 20:39:19 +0900 Subject: [PATCH 3/4] crt: reorder includes in misc/mbrtowc.c Signed-off-by: Kirill Makurin <[email protected]> --- mingw-w64-crt/misc/mbrtowc.c | 9 ++++----- 1 file changed, 4 insertions(+), 5 deletions(-) diff --git a/mingw-w64-crt/misc/mbrtowc.c b/mingw-w64-crt/misc/mbrtowc.c index 3c079aaa9..a6933a0cb 100644 --- a/mingw-w64-crt/misc/mbrtowc.c +++ b/mingw-w64-crt/misc/mbrtowc.c @@ -3,13 +3,12 @@ * This file is part of the mingw-w64 runtime package. * No warranty is given; refer to the file DISCLAIMER.PD within this package. */ -#ifndef WIN32_LEAN_AND_MEAN -#define WIN32_LEAN_AND_MEAN -#endif +#include <errno.h> #include <locale.h> -#include <wchar.h> #include <stdlib.h> -#include <errno.h> +#include <wchar.h> + +#define WIN32_LEAN_AND_MEAN #include <windows.h> size_t mbrtowc ( -- 2.51.0.windows.1
From 823ccd02cf8b0026af4954198e5e6c343fa96606 Mon Sep 17 00:00:00 2001 From: Kirill Makurin <[email protected]> Date: Wed, 24 Sep 2025 21:04:01 +0900 Subject: [PATCH 4/4] crt: make detection of invalid conversion state by mbrtowc more robust In addition to storing consumed lead byte, store value of corresponding DBCS code page in mbstate_t object. This allows us to detect cases when `mbstate_t` object is reused with different code pages. Signed-off-by: Kirill Makurin <[email protected]> --- mingw-w64-crt/misc/mbrtowc.c | 34 +++++++++++++++++++++++++--------- 1 file changed, 25 insertions(+), 9 deletions(-) diff --git a/mingw-w64-crt/misc/mbrtowc.c b/mingw-w64-crt/misc/mbrtowc.c index a6933a0cb..594fc956e 100644 --- a/mingw-w64-crt/misc/mbrtowc.c +++ b/mingw-w64-crt/misc/mbrtowc.c @@ -34,11 +34,6 @@ size_t mbrtowc ( count = 1; } - /* Detect invalid conversion state */ - if ((unsigned) *state > 0xFF) { - goto einval; - } - /* Both ISO C and POSIX do not mention this case */ if (count == 0) { return (size_t) -2; @@ -53,12 +48,32 @@ size_t mbrtowc ( /* Treat `state` as an array of bytes */ union { mbstate_t state; - char bytes[4]; + struct { + char bytes[2]; + unsigned short cp; + }; } conversion_state = {.state = *state}; - /* For SBCS code pages `state` must always be in initial state */ - if (mb_cur_max == 1 && conversion_state.bytes[0]) { - goto einval; + if (!mbsinit (&conversion_state.state)) { + /* For SBCS code pages `state` must always be in initial state */ + if (mb_cur_max == 1) { + goto einval; + } + + /* `state` was set with a different code page */ + if (conversion_state.cp != cp) { + goto einval; + } + + /* conversion_state.bytes[0] must contain a lead byte used by `cp` */ + if (!isleadbyte ((unsigned char) conversion_state.bytes[0])) { + goto einval; + } + + /* conversion_state.bytes[1] must be 0 */ + if (conversion_state.bytes[1] != 0) { + goto einval; + } } /* Handle "C" locale */ @@ -81,6 +96,7 @@ size_t mbrtowc ( length = 2; } else if (mb_cur_max == 2 && isleadbyte ((unsigned char) mbs[0])) { conversion_state.bytes[0] = mbs[0]; + conversion_state.cp = cp; /* We need to examine mbs[1] */ if (count < 2) { -- 2.51.0.windows.1
_______________________________________________ Mingw-w64-public mailing list [email protected] https://lists.sourceforge.net/lists/listinfo/mingw-w64-public
