Module Name: src
Committed By: riastradh
Date: Fri Mar 28 22:52:35 UTC 2025
Modified Files:
src/tests/lib/libc/gen: t_ctype.c
Log Message:
t_ctype: Test some more code points with potential for EOF confusion.
PR lib/58208: ctype(3) provides poor runtime feedback of abuse
To generate a diff of this commit:
cvs rdiff -u -r1.5 -r1.6 src/tests/lib/libc/gen/t_ctype.c
Please note that diffs are not public domain; they are subject to the
copyright notices on the relevant files.
Modified files:
Index: src/tests/lib/libc/gen/t_ctype.c
diff -u src/tests/lib/libc/gen/t_ctype.c:1.5 src/tests/lib/libc/gen/t_ctype.c:1.6
--- src/tests/lib/libc/gen/t_ctype.c:1.5 Fri Mar 28 22:51:58 2025
+++ src/tests/lib/libc/gen/t_ctype.c Fri Mar 28 22:52:35 2025
@@ -1,4 +1,4 @@
-/* $NetBSD: t_ctype.c,v 1.5 2025/03/28 22:51:58 riastradh Exp $ */
+/* $NetBSD: t_ctype.c,v 1.6 2025/03/28 22:52:35 riastradh Exp $ */
/*-
* Copyright (c) 2025 The NetBSD Foundation, Inc.
@@ -27,7 +27,7 @@
*/
#include <sys/cdefs.h>
-__RCSID("$NetBSD: t_ctype.c,v 1.5 2025/03/28 22:51:58 riastradh Exp $");
+__RCSID("$NetBSD: t_ctype.c,v 1.6 2025/03/28 22:52:35 riastradh Exp $");
#include <atf-c.h>
#include <ctype.h>
@@ -931,6 +931,114 @@ DEF_TEST_USE(isblank)
DEF_TEST_USE(toupper)
DEF_TEST_USE(tolower)
+ATF_TC(eof_confusion_iso8859_1);
+ATF_TC_HEAD(eof_confusion_iso8859_1, tc)
+{
+ atf_tc_set_md_var(tc, "descr",
+ "Test potential confusion with EOF in ISO-8859-1");
+}
+ATF_TC_BODY(eof_confusion_iso8859_1, tc)
+{
+ int ydots = 0xff; /* ÿ, LATIN SMALL LETTER Y WITH DIAERESIS */
+ int ch;
+
+ /*
+ * The LATIN SMALL LETTER Y WITH DIAERESIS code point 0xff in
+ * ISO-8859-1 is curious primarily because its bit pattern
+ * coincides with an 8-bit signed -1, which is to say, EOF as
+ * an 8-bit quantity; of course, for EOF, all of the is*
+ * functions are supposed to return false (as we test above).
+ * It also has the curious property that it lacks any
+ * corresponding uppercase code point in ISO-8859-1, so we
+ * can't distinguish it from EOF by tolower/toupper.
+ */
+ ATF_REQUIRE(setlocale(LC_CTYPE, "fr_FR.ISO8859-1") != NULL);
+ ATF_CHECK(isalpha(ydots));
+ ATF_CHECK(!isupper(ydots));
+ ATF_CHECK(islower(ydots));
+ ATF_CHECK(!isdigit(ydots));
+ ATF_CHECK(!isxdigit(ydots));
+ ATF_CHECK(isalnum(ydots));
+ ATF_CHECK(!isspace(ydots));
+ ATF_CHECK(!ispunct(ydots));
+ ATF_CHECK(isprint(ydots));
+ ATF_CHECK(isgraph(ydots));
+ ATF_CHECK(!iscntrl(ydots));
+ ATF_CHECK(!isblank(ydots));
+ ATF_CHECK_MSG((ch = toupper(ydots)) == ydots, "ch=0x%x", ch);
+ ATF_CHECK_MSG((ch = tolower(ydots)) == ydots, "ch=0x%x", ch);
+}
+
+ATF_TC(eof_confusion_koi8_u);
+ATF_TC_HEAD(eof_confusion_koi8_u, tc)
+{
+ atf_tc_set_md_var(tc, "descr",
+ "Test potential confusion with EOF in KOI8-U");
+}
+ATF_TC_BODY(eof_confusion_koi8_u, tc)
+{
+ int Hard = 0xff; /* Ъ, CYRILLIC CAPITAL LETTER HARD SIGN */
+ int hard = 0xdf; /* ъ, CYRILLIC SMALL LETTER HARD SIGN */
+ int ch;
+
+ /*
+ * The CYRILLIC CAPITAL LETTER HARD SIGN code point 0xff in
+ * KOI8-U (and KOI8-R) also coincides with the bit pattern of
+ * an 8-bit signed -1. Unlike LATIN SMALL LETTER Y WITH
+ * DIAERESIS, it has a lowercase equivalent in KOI8-U.
+ */
+ ATF_REQUIRE(setlocale(LC_CTYPE, "uk_UA.KOI8-U") != NULL);
+ ATF_CHECK(isalpha(Hard));
+ ATF_CHECK(isupper(Hard));
+ ATF_CHECK(!islower(Hard));
+ ATF_CHECK(!isdigit(Hard));
+ ATF_CHECK(!isxdigit(Hard));
+ ATF_CHECK(isalnum(Hard));
+ ATF_CHECK(!isspace(Hard));
+ ATF_CHECK(!ispunct(Hard));
+ ATF_CHECK(isprint(Hard));
+ ATF_CHECK(isgraph(Hard));
+ ATF_CHECK(!iscntrl(Hard));
+ ATF_CHECK(!isblank(Hard));
+ ATF_CHECK_MSG((ch = toupper(Hard)) == Hard, "ch=0x%x", ch);
+ ATF_CHECK_MSG((ch = tolower(Hard)) == hard, "ch=0x%x", ch);
+}
+
+ATF_TC(eof_confusion_pt154);
+ATF_TC_HEAD(eof_confusion_pt154, tc)
+{
+ atf_tc_set_md_var(tc, "descr",
+ "Test potential confusion with EOF in PT154");
+}
+ATF_TC_BODY(eof_confusion_pt154, tc)
+{
+ int ya = 0xff; /* я, CYRILLIC SMALL LETTER YA */
+ int Ya = 0xdf; /* Я, CYRILLIC CAPITAL LETTER YA */
+ int ch;
+
+ /*
+ * The CYRILLIC SMALL LETTER YA code point 0xff in PT154 also
+ * coincides with the bit pattern of an 8-bit signed -1, and is
+ * lowercase with a corresponding uppercase code point in
+ * PT154.
+ */
+ ATF_REQUIRE(setlocale(LC_CTYPE, "kk_KZ.PT154") != NULL);
+ ATF_CHECK(isalpha(ya));
+ ATF_CHECK(!isupper(ya));
+ ATF_CHECK(islower(ya));
+ ATF_CHECK(!isdigit(ya));
+ ATF_CHECK(!isxdigit(ya));
+ ATF_CHECK(isalnum(ya));
+ ATF_CHECK(!isspace(ya));
+ ATF_CHECK(!ispunct(ya));
+ ATF_CHECK(isprint(ya));
+ ATF_CHECK(isgraph(ya));
+ ATF_CHECK(!iscntrl(ya));
+ ATF_CHECK(!isblank(ya));
+ ATF_CHECK_MSG((ch = toupper(ya)) == Ya, "ch=0x%x", ch);
+ ATF_CHECK_MSG((ch = tolower(ya)) == ya, "ch=0x%x", ch);
+}
+
ATF_TP_ADD_TCS(tp)
{
@@ -964,5 +1072,9 @@ ATF_TP_ADD_TCS(tp)
ADD_TEST_USE(tp, toupper);
ADD_TEST_USE(tp, tolower);
+ ATF_TP_ADD_TC(tp, eof_confusion_iso8859_1);
+ ATF_TP_ADD_TC(tp, eof_confusion_koi8_u);
+ ATF_TP_ADD_TC(tp, eof_confusion_pt154);
+
return atf_no_error();
}