Module Name: src Committed By: riastradh Date: Sun Aug 18 04:51:16 UTC 2024
Modified Files: src/tests/lib/libc/locale: t_c16rtomb.c t_c8rtomb.c Log Message: c8rtomb(3), c16rtomb(3), c32rtomb(3): Test stateful shift sequences. PR lib/58612: c8rtomb/c16rtomb/c32rtomb yield suboptimal shift sequences To generate a diff of this commit: cvs rdiff -u -r1.3 -r1.4 src/tests/lib/libc/locale/t_c16rtomb.c \ src/tests/lib/libc/locale/t_c8rtomb.c Please note that diffs are not public domain; they are subject to the copyright notices on the relevant files.
Modified files: Index: src/tests/lib/libc/locale/t_c16rtomb.c diff -u src/tests/lib/libc/locale/t_c16rtomb.c:1.3 src/tests/lib/libc/locale/t_c16rtomb.c:1.4 --- src/tests/lib/libc/locale/t_c16rtomb.c:1.3 Sun Aug 18 02:19:35 2024 +++ src/tests/lib/libc/locale/t_c16rtomb.c Sun Aug 18 04:51:16 2024 @@ -1,4 +1,4 @@ -/* $NetBSD: t_c16rtomb.c,v 1.3 2024/08/18 02:19:35 riastradh Exp $ */ +/* $NetBSD: t_c16rtomb.c,v 1.4 2024/08/18 04:51:16 riastradh Exp $ */ /*- * Copyright (c) 2002 Tim J. Robbins @@ -33,7 +33,7 @@ */ #include <sys/cdefs.h> -__RCSID("$NetBSD: t_c16rtomb.c,v 1.3 2024/08/18 02:19:35 riastradh Exp $"); +__RCSID("$NetBSD: t_c16rtomb.c,v 1.4 2024/08/18 04:51:16 riastradh Exp $"); #include <errno.h> #include <limits.h> @@ -59,7 +59,7 @@ require_lc_ctype(const char *locale_name } static mbstate_t s; -static char buf[MB_LEN_MAX + 1]; +static char buf[7*MB_LEN_MAX + 1]; ATF_TC_WITHOUT_HEAD(c16rtomb_c_locale_test); ATF_TC_BODY(c16rtomb_c_locale_test, tc) @@ -114,6 +114,88 @@ ATF_TC_BODY(c16rtomb_c_locale_test, tc) "buf=[%02x %02x]", buf[0], buf[1]); } +ATF_TC_WITHOUT_HEAD(c16rtomb_iso2022jp_locale_test); +ATF_TC_BODY(c16rtomb_iso2022jp_locale_test, tc) +{ + char *p; + size_t n; + + require_lc_ctype("ja_JP.ISO-2022-JP"); + + /* + * If the buffer argument is NULL, c16 is implicitly 0, + * c16rtomb() resets its internal state. + */ + ATF_CHECK_EQ_MSG((n = c16rtomb(NULL, L'\0', NULL)), 1, "n=%zu", n); + ATF_CHECK_EQ_MSG((n = c16rtomb(NULL, 0xdc00, NULL)), 1, "n=%zu", n); + + /* Null wide character. */ + memset(&s, 0, sizeof(s)); + memset(buf, 0xcc, sizeof(buf)); + ATF_CHECK_EQ_MSG((n = c16rtomb(buf, 0, &s)), 1, "n=%zu", n); + ATF_CHECK_MSG(((unsigned char)buf[0] == 0 && + (unsigned char)buf[1] == 0xcc), + "buf=[%02x %02x]", buf[0], buf[1]); + + /* Latin letter A, internal state. */ + ATF_CHECK_EQ_MSG((n = c16rtomb(NULL, L'\0', NULL)), 1, "n=%zu", n); + ATF_CHECK_EQ_MSG((n = c16rtomb(NULL, L'A', NULL)), 1, "n=%zu", n); + + /* + * 1. U+0042 LATIN CAPITAL LETTER A + * 2. U+00A5 YEN SIGN + * 3. U+00A5 YEN SIGN (again, no shift needed) + * 4. U+30A2 KATAKANA LETTER A + * 5. U+30A2 KATAKANA LETTER A (again, no shift needed) + * 6. incomplete UTF-16 surrogate pair -- no output + * 7. U+0000 NUL (plus shift sequence to initial state) + */ + memset(&s, 0, sizeof(s)); + memset(buf, 0xcc, sizeof(buf)); + p = buf; + ATF_CHECK_EQ_MSG((n = c16rtomb(p, L'A', &s)), 1, "n=%zu", n); /* 1 */ + p += 1; + atf_tc_expect_fail("PR lib/58612:" + " c8rtomb/c16rtomb/c32rtomb yield suboptimal shift sequences"); + ATF_CHECK_EQ_MSG((n = c16rtomb(p, 0xa5, &s)), 4, "n=%zu", n); /* 2 */ + p += 4; + ATF_CHECK_EQ_MSG((n = c16rtomb(p, 0xa5, &s)), 1, "n=%zu", n); /* 3 */ + p += 1; + ATF_CHECK_EQ_MSG((n = c16rtomb(p, 0x30a2, &s)), 5, "n=%zu", n); /* 4 */ + p += 5; + ATF_CHECK_EQ_MSG((n = c16rtomb(p, 0x30a2, &s)), 2, "n=%zu", n); /* 5 */ + p += 2; + ATF_CHECK_EQ_MSG((n = c16rtomb(p, 0xd800, &s)), 0, "n=%zu", n); /* 6 */ + ATF_CHECK_EQ_MSG((n = c16rtomb(p, L'\0', &s)), 4, "n=%zu", n); /* 7 */ + p += 4; + ATF_CHECK_MSG(((unsigned char)buf[0] == 'A' && + (unsigned char)buf[1] == 0x1b && /* shift ISO/IEC 646:JP */ + (unsigned char)buf[2] == '(' && + (unsigned char)buf[3] == 'J' && + (unsigned char)buf[4] == 0x5c && /* YEN SIGN */ + (unsigned char)buf[5] == 0x5c && /* YEN SIGN */ + (unsigned char)buf[6] == 0x1b && /* shift JIS X 0208-1978 */ + (unsigned char)buf[7] == '$' && + (unsigned char)buf[8] == 'B' && + (unsigned char)buf[9] == 0x25 && /* KATAKANA LETTER A */ + (unsigned char)buf[10] == 0x22 && + (unsigned char)buf[11] == 0x25 && /* KATAKANA LETTER A */ + (unsigned char)buf[12] == 0x22 && + (unsigned char)buf[13] == 0x1b && /* shift US-ASCII */ + (unsigned char)buf[14] == '(' && + (unsigned char)buf[15] == 'B' && + (unsigned char)buf[16] == '\0' && + (unsigned char)buf[17] == 0xcc), + "buf=[%02x %02x %02x %02x %02x %02x %02x %02x " + " %02x %02x %02x %02x %02x %02x %02x %02x " + " %02x %02x]", + buf[0], buf[1], buf[2], buf[3], + buf[4], buf[5], buf[6], buf[7], + buf[8], buf[9], buf[10], buf[11], + buf[12], buf[13], buf[14], buf[15], + buf[16], buf[17]); +} + ATF_TC_WITHOUT_HEAD(c16rtomb_iso_8859_1_test); ATF_TC_BODY(c16rtomb_iso_8859_1_test, tc) { @@ -198,6 +280,7 @@ ATF_TP_ADD_TCS(tp) { ATF_TP_ADD_TC(tp, c16rtomb_c_locale_test); + ATF_TP_ADD_TC(tp, c16rtomb_iso2022jp_locale_test); ATF_TP_ADD_TC(tp, c16rtomb_iso_8859_1_test); ATF_TP_ADD_TC(tp, c16rtomb_iso_8859_15_test); ATF_TP_ADD_TC(tp, c16rtomb_utf_8_test); Index: src/tests/lib/libc/locale/t_c8rtomb.c diff -u src/tests/lib/libc/locale/t_c8rtomb.c:1.3 src/tests/lib/libc/locale/t_c8rtomb.c:1.4 --- src/tests/lib/libc/locale/t_c8rtomb.c:1.3 Sun Aug 18 02:19:35 2024 +++ src/tests/lib/libc/locale/t_c8rtomb.c Sun Aug 18 04:51:16 2024 @@ -1,4 +1,4 @@ -/* $NetBSD: t_c8rtomb.c,v 1.3 2024/08/18 02:19:35 riastradh Exp $ */ +/* $NetBSD: t_c8rtomb.c,v 1.4 2024/08/18 04:51:16 riastradh Exp $ */ /*- * Copyright (c) 2002 Tim J. Robbins @@ -33,7 +33,7 @@ */ #include <sys/cdefs.h> -__RCSID("$NetBSD: t_c8rtomb.c,v 1.3 2024/08/18 02:19:35 riastradh Exp $"); +__RCSID("$NetBSD: t_c8rtomb.c,v 1.4 2024/08/18 04:51:16 riastradh Exp $"); #include <errno.h> #include <limits.h> @@ -59,7 +59,7 @@ require_lc_ctype(const char *locale_name } static mbstate_t s; -static char buf[MB_LEN_MAX + 1]; +static char buf[7*MB_LEN_MAX + 1]; ATF_TC_WITHOUT_HEAD(c8rtomb_c_locale_test); ATF_TC_BODY(c8rtomb_c_locale_test, tc) @@ -142,6 +142,102 @@ ATF_TC_BODY(c8rtomb_c_locale_test, tc) "buf=[%02x %02x]", buf[0], buf[1]); } +ATF_TC_WITHOUT_HEAD(c8rtomb_iso2022jp_locale_test); +ATF_TC_BODY(c8rtomb_iso2022jp_locale_test, tc) +{ + char *p; + size_t n; + + require_lc_ctype("ja_JP.ISO-2022-JP"); + + /* + * If the buffer argument is NULL, c8 is implicitly 0, + * c8rtomb() resets its internal state. + */ + ATF_CHECK_EQ_MSG((n = c8rtomb(NULL, '\0', NULL)), 1, "n=%zu", n); + ATF_CHECK_EQ_MSG((n = c8rtomb(NULL, 0x80, NULL)), 1, "n=%zu", n); + ATF_CHECK_EQ_MSG((n = c8rtomb(NULL, 0xc0, NULL)), 1, "n=%zu", n); + ATF_CHECK_EQ_MSG((n = c8rtomb(NULL, 0xe0, NULL)), 1, "n=%zu", n); + ATF_CHECK_EQ_MSG((n = c8rtomb(NULL, 0xf0, NULL)), 1, "n=%zu", n); + ATF_CHECK_EQ_MSG((n = c8rtomb(NULL, 0xf8, NULL)), 1, "n=%zu", n); + ATF_CHECK_EQ_MSG((n = c8rtomb(NULL, 0xfc, NULL)), 1, "n=%zu", n); + ATF_CHECK_EQ_MSG((n = c8rtomb(NULL, 0xfe, NULL)), 1, "n=%zu", n); + ATF_CHECK_EQ_MSG((n = c8rtomb(NULL, 0xff, NULL)), 1, "n=%zu", n); + + /* Null wide character. */ + memset(&s, 0, sizeof(s)); + memset(buf, 0xcc, sizeof(buf)); + ATF_CHECK_EQ_MSG((n = c8rtomb(buf, 0, &s)), 1, "n=%zu", n); + ATF_CHECK_MSG(((unsigned char)buf[0] == 0 && + (unsigned char)buf[1] == 0xcc), + "buf=[%02x %02x]", buf[0], buf[1]); + + /* Latin letter A, internal state. */ + ATF_CHECK_EQ_MSG((n = c8rtomb(NULL, '\0', NULL)), 1, "n=%zu", n); + ATF_CHECK_EQ_MSG((n = c8rtomb(NULL, 'A', NULL)), 1, "n=%zu", n); + + /* + * 1. U+0042 LATIN CAPITAL LETTER A + * 2. U+00A5 YEN SIGN + * 3. U+00A5 YEN SIGN (again, no shift needed) + * 4. U+30A2 KATAKANA LETTER A + * 5. U+30A2 KATAKANA LETTER A (again, no shift needed) + * 6. incomplete UTF-8 multibyte sequence -- no output + * 7. U+0000 NUL (plus shift sequence to initial state) + */ + memset(&s, 0, sizeof(s)); + memset(buf, 0xcc, sizeof(buf)); + p = buf; + ATF_CHECK_EQ_MSG((n = c8rtomb(p, 'A', &s)), 1, "n=%zu", n); /* 1 */ + p += 1; + ATF_CHECK_EQ_MSG((n = c8rtomb(p, 0xc2, &s)), 0, "n=%zu", n); /* 2 */ + atf_tc_expect_fail("PR lib/58612:" + " c8rtomb/c16rtomb/c32rtomb yield suboptimal shift sequences"); + ATF_CHECK_EQ_MSG((n = c8rtomb(p, 0xa5, &s)), 4, "n=%zu", n); + p += 4; + ATF_CHECK_EQ_MSG((n = c8rtomb(p, 0xc2, &s)), 0, "n=%zu", n); /* 3 */ + ATF_CHECK_EQ_MSG((n = c8rtomb(p, 0xa5, &s)), 1, "n=%zu", n); + p += 1; + ATF_CHECK_EQ_MSG((n = c8rtomb(p, 0xe3, &s)), 0, "n=%zu", n); /* 4 */ + ATF_CHECK_EQ_MSG((n = c8rtomb(p, 0x82, &s)), 0, "n=%zu", n); + ATF_CHECK_EQ_MSG((n = c8rtomb(p, 0xa2, &s)), 4, "n=%zu", n); + p += 5; + ATF_CHECK_EQ_MSG((n = c8rtomb(p, 0xe3, &s)), 0, "n=%zu", n); /* 5 */ + ATF_CHECK_EQ_MSG((n = c8rtomb(p, 0x82, &s)), 0, "n=%zu", n); + ATF_CHECK_EQ_MSG((n = c8rtomb(p, 0xa2, &s)), 2, "n=%zu", n); + p += 2; + ATF_CHECK_EQ_MSG((n = c8rtomb(p, 0xe3, &s)), 0, "n=%zu", n); /* 6 */ + ATF_CHECK_EQ_MSG((n = c8rtomb(p, 0x82, &s)), 0, "n=%zu", n); + ATF_CHECK_EQ_MSG((n = c8rtomb(p, '\0', &s)), 4, "n=%zu", n); /* 7 */ + p += 4; + ATF_CHECK_MSG(((unsigned char)buf[0] == 'A' && + (unsigned char)buf[1] == 0x1b && /* shift ISO/IEC 646:JP */ + (unsigned char)buf[2] == '(' && + (unsigned char)buf[3] == 'J' && + (unsigned char)buf[4] == 0x5c && /* YEN SIGN */ + (unsigned char)buf[5] == 0x5c && /* YEN SIGN */ + (unsigned char)buf[6] == 0x1b && /* shift JIS X 0208-1978 */ + (unsigned char)buf[7] == '$' && + (unsigned char)buf[8] == 'B' && + (unsigned char)buf[9] == 0x25 && /* KATAKANA LETTER A */ + (unsigned char)buf[10] == 0x22 && + (unsigned char)buf[11] == 0x25 && /* KATAKANA LETTER A */ + (unsigned char)buf[12] == 0x22 && + (unsigned char)buf[13] == 0x1b && /* shift US-ASCII */ + (unsigned char)buf[14] == '(' && + (unsigned char)buf[15] == 'B' && + (unsigned char)buf[16] == '\0' && + (unsigned char)buf[17] == 0xcc), + "buf=[%02x %02x %02x %02x %02x %02x %02x %02x " + " %02x %02x %02x %02x %02x %02x %02x %02x " + " %02x %02x]", + buf[0], buf[1], buf[2], buf[3], + buf[4], buf[5], buf[6], buf[7], + buf[8], buf[9], buf[10], buf[11], + buf[12], buf[13], buf[14], buf[15], + buf[16], buf[17]); +} + ATF_TC_WITHOUT_HEAD(c8rtomb_iso_8859_1_test); ATF_TC_BODY(c8rtomb_iso_8859_1_test, tc) { @@ -252,6 +348,7 @@ ATF_TP_ADD_TCS(tp) { ATF_TP_ADD_TC(tp, c8rtomb_c_locale_test); + ATF_TP_ADD_TC(tp, c8rtomb_iso2022jp_locale_test); ATF_TP_ADD_TC(tp, c8rtomb_iso_8859_1_test); ATF_TP_ADD_TC(tp, c8rtomb_iso_8859_15_test); ATF_TP_ADD_TC(tp, c8rtomb_utf_8_test);