After mbrtowc or mbrtoc32 failed with return value (size_t) -1, we don't know in which state the mbstate_t is. Therefore it's best to clear it before potentially calling mbrtowc or mbrtoc32 again.
2023-07-04 Bruno Haible <br...@clisp.org> mbiter, mbuiter, mbfile: Improve state handling after invalid input. * lib/mbiter.h (mbiter_multi_next): After an invalid multibyte sequence was encountered, clear the state. * lib/mbuiter.h (mbuiter_multi_next): Likewise. * lib/mbfile.h (mbfile_multi_getc): Likewise. diff --git a/lib/mbfile.h b/lib/mbfile.h index 9a2532992e..7670dabfbb 100644 --- a/lib/mbfile.h +++ b/lib/mbfile.h @@ -147,6 +147,8 @@ mbfile_multi_getc (struct mbchar *mbc, struct mbfile_multi *mbf) /* Return a single byte. */ bytes = 1; mbc->wc_valid = false; + /* Allow the next invocation to continue from a sane state. */ + memset (&mbf->state, '\0', sizeof (mbstate_t)); break; } else if (bytes == (size_t) -2) diff --git a/lib/mbiter.h b/lib/mbiter.h index bc88b4f3a2..e4963010f3 100644 --- a/lib/mbiter.h +++ b/lib/mbiter.h @@ -146,8 +146,9 @@ mbiter_multi_next (struct mbiter_multi *iter) /* An invalid multibyte sequence was encountered. */ iter->cur.bytes = 1; iter->cur.wc_valid = false; - /* Whether to set iter->in_shift = false and reset iter->state - or not is not very important; the string is bogus anyway. */ + /* Allow the next invocation to continue from a sane state. */ + iter->in_shift = false; + memset (&iter->state, '\0', sizeof (mbstate_t)); } else if (iter->cur.bytes == (size_t) -2) { diff --git a/lib/mbuiter.h b/lib/mbuiter.h index 93dec81603..e0fcd03d3d 100644 --- a/lib/mbuiter.h +++ b/lib/mbuiter.h @@ -155,8 +155,9 @@ mbuiter_multi_next (struct mbuiter_multi *iter) /* An invalid multibyte sequence was encountered. */ iter->cur.bytes = 1; iter->cur.wc_valid = false; - /* Whether to set iter->in_shift = false and reset iter->state - or not is not very important; the string is bogus anyway. */ + /* Allow the next invocation to continue from a sane state. */ + iter->in_shift = false; + memset (&iter->state, '\0', sizeof (mbstate_t)); } else if (iter->cur.bytes == (size_t) -2) {