Module Name:    src
Committed By:   riastradh
Date:           Fri Mar 28 22:52:35 UTC 2025

Modified Files:
        src/tests/lib/libc/gen: t_ctype.c

Log Message:
t_ctype: Test some more code points with potential for EOF confusion.

PR lib/58208: ctype(3) provides poor runtime feedback of abuse


To generate a diff of this commit:
cvs rdiff -u -r1.5 -r1.6 src/tests/lib/libc/gen/t_ctype.c

Please note that diffs are not public domain; they are subject to the
copyright notices on the relevant files.

Modified files:

Index: src/tests/lib/libc/gen/t_ctype.c
diff -u src/tests/lib/libc/gen/t_ctype.c:1.5 src/tests/lib/libc/gen/t_ctype.c:1.6
--- src/tests/lib/libc/gen/t_ctype.c:1.5	Fri Mar 28 22:51:58 2025
+++ src/tests/lib/libc/gen/t_ctype.c	Fri Mar 28 22:52:35 2025
@@ -1,4 +1,4 @@
-/*	$NetBSD: t_ctype.c,v 1.5 2025/03/28 22:51:58 riastradh Exp $	*/
+/*	$NetBSD: t_ctype.c,v 1.6 2025/03/28 22:52:35 riastradh Exp $	*/
 
 /*-
  * Copyright (c) 2025 The NetBSD Foundation, Inc.
@@ -27,7 +27,7 @@
  */
 
 #include <sys/cdefs.h>
-__RCSID("$NetBSD: t_ctype.c,v 1.5 2025/03/28 22:51:58 riastradh Exp $");
+__RCSID("$NetBSD: t_ctype.c,v 1.6 2025/03/28 22:52:35 riastradh Exp $");
 
 #include <atf-c.h>
 #include <ctype.h>
@@ -931,6 +931,114 @@ DEF_TEST_USE(isblank)
 DEF_TEST_USE(toupper)
 DEF_TEST_USE(tolower)
 
+ATF_TC(eof_confusion_iso8859_1);
+ATF_TC_HEAD(eof_confusion_iso8859_1, tc)
+{
+	atf_tc_set_md_var(tc, "descr",
+	    "Test potential confusion with EOF in ISO-8859-1");
+}
+ATF_TC_BODY(eof_confusion_iso8859_1, tc)
+{
+	int ydots = 0xff;	/* ÿ, LATIN SMALL LETTER Y WITH DIAERESIS */
+	int ch;
+
+	/*
+	 * The LATIN SMALL LETTER Y WITH DIAERESIS code point 0xff in
+	 * ISO-8859-1 is curious primarily because its bit pattern
+	 * coincides with an 8-bit signed -1, which is to say, EOF as
+	 * an 8-bit quantity; of course, for EOF, all of the is*
+	 * functions are supposed to return false (as we test above).
+	 * It also has the curious property that it lacks any
+	 * corresponding uppercase code point in ISO-8859-1, so we
+	 * can't distinguish it from EOF by tolower/toupper.
+	 */
+	ATF_REQUIRE(setlocale(LC_CTYPE, "fr_FR.ISO8859-1") != NULL);
+	ATF_CHECK(isalpha(ydots));
+	ATF_CHECK(!isupper(ydots));
+	ATF_CHECK(islower(ydots));
+	ATF_CHECK(!isdigit(ydots));
+	ATF_CHECK(!isxdigit(ydots));
+	ATF_CHECK(isalnum(ydots));
+	ATF_CHECK(!isspace(ydots));
+	ATF_CHECK(!ispunct(ydots));
+	ATF_CHECK(isprint(ydots));
+	ATF_CHECK(isgraph(ydots));
+	ATF_CHECK(!iscntrl(ydots));
+	ATF_CHECK(!isblank(ydots));
+	ATF_CHECK_MSG((ch = toupper(ydots)) == ydots, "ch=0x%x", ch);
+	ATF_CHECK_MSG((ch = tolower(ydots)) == ydots, "ch=0x%x", ch);
+}
+
+ATF_TC(eof_confusion_koi8_u);
+ATF_TC_HEAD(eof_confusion_koi8_u, tc)
+{
+	atf_tc_set_md_var(tc, "descr",
+	    "Test potential confusion with EOF in KOI8-U");
+}
+ATF_TC_BODY(eof_confusion_koi8_u, tc)
+{
+	int Hard = 0xff;	/* Ъ, CYRILLIC CAPITAL LETTER HARD SIGN */
+	int hard = 0xdf;	/* ъ, CYRILLIC SMALL LETTER HARD SIGN */
+	int ch;
+
+	/*
+	 * The CYRILLIC CAPITAL LETTER HARD SIGN code point 0xff in
+	 * KOI8-U (and KOI8-R) also coincides with the bit pattern of
+	 * an 8-bit signed -1.  Unlike LATIN SMALL LETTER Y WITH
+	 * DIAERESIS, it has a lowercase equivalent in KOI8-U.
+	 */
+	ATF_REQUIRE(setlocale(LC_CTYPE, "uk_UA.KOI8-U") != NULL);
+	ATF_CHECK(isalpha(Hard));
+	ATF_CHECK(isupper(Hard));
+	ATF_CHECK(!islower(Hard));
+	ATF_CHECK(!isdigit(Hard));
+	ATF_CHECK(!isxdigit(Hard));
+	ATF_CHECK(isalnum(Hard));
+	ATF_CHECK(!isspace(Hard));
+	ATF_CHECK(!ispunct(Hard));
+	ATF_CHECK(isprint(Hard));
+	ATF_CHECK(isgraph(Hard));
+	ATF_CHECK(!iscntrl(Hard));
+	ATF_CHECK(!isblank(Hard));
+	ATF_CHECK_MSG((ch = toupper(Hard)) == Hard, "ch=0x%x", ch);
+	ATF_CHECK_MSG((ch = tolower(Hard)) == hard, "ch=0x%x", ch);
+}
+
+ATF_TC(eof_confusion_pt154);
+ATF_TC_HEAD(eof_confusion_pt154, tc)
+{
+	atf_tc_set_md_var(tc, "descr",
+	    "Test potential confusion with EOF in PT154");
+}
+ATF_TC_BODY(eof_confusion_pt154, tc)
+{
+	int ya = 0xff;		/* я, CYRILLIC SMALL LETTER YA */
+	int Ya = 0xdf;		/* Я, CYRILLIC CAPITAL LETTER YA */
+	int ch;
+
+	/*
+	 * The CYRILLIC SMALL LETTER YA code point 0xff in PT154 also
+	 * coincides with the bit pattern of an 8-bit signed -1, and is
+	 * lowercase with a corresponding uppercase code point in
+	 * PT154.
+	 */
+	ATF_REQUIRE(setlocale(LC_CTYPE, "kk_KZ.PT154") != NULL);
+	ATF_CHECK(isalpha(ya));
+	ATF_CHECK(!isupper(ya));
+	ATF_CHECK(islower(ya));
+	ATF_CHECK(!isdigit(ya));
+	ATF_CHECK(!isxdigit(ya));
+	ATF_CHECK(isalnum(ya));
+	ATF_CHECK(!isspace(ya));
+	ATF_CHECK(!ispunct(ya));
+	ATF_CHECK(isprint(ya));
+	ATF_CHECK(isgraph(ya));
+	ATF_CHECK(!iscntrl(ya));
+	ATF_CHECK(!isblank(ya));
+	ATF_CHECK_MSG((ch = toupper(ya)) == Ya, "ch=0x%x", ch);
+	ATF_CHECK_MSG((ch = tolower(ya)) == ya, "ch=0x%x", ch);
+}
+
 ATF_TP_ADD_TCS(tp)
 {
 
@@ -964,5 +1072,9 @@ ATF_TP_ADD_TCS(tp)
 	ADD_TEST_USE(tp, toupper);
 	ADD_TEST_USE(tp, tolower);
 
+	ATF_TP_ADD_TC(tp, eof_confusion_iso8859_1);
+	ATF_TP_ADD_TC(tp, eof_confusion_koi8_u);
+	ATF_TP_ADD_TC(tp, eof_confusion_pt154);
+
 	return atf_no_error();
 }

Reply via email to