Module Name:    src
Committed By:   riastradh
Date:           Sat Mar 29 01:06:37 UTC 2025

Modified Files:
        src/lib/libc/gen: isctype.c
        src/tests/lib/libc/gen: t_ctype.c

Log Message:
ctype(3): Summon a demon from caller's nose if abused out-of-line.

This way, applications which opt for the out-of-line functions will
crash with a potentially meaningful message to stderr if they pass
inputs on which the ctype(3) functions are undefined.  (If fd 2 is
something else, tough -- this is nasal demon country, and they fly
where they please, application intent be damned.)

This probably won't catch many applications -- but it might catch C++
applications at runtime that can't be caught at build-time because
they eschew the macros.

The cost is a single predicted-not-taken branch -- it's checking c
for membership in the interval [-1,0xff] when we're already computing
c + 1, so it can be a single unsigned-compare.  By deferring the
message and abort to an out-of-line function we avoid a stack frame
in the good case.  And this is for the unlikely, out-of-line versions
of the ctype(3) functions -- most applications get the inline macros.
So I'm not concerned by the prospect of a performance impact.

Update the tests so they recognize SIGABRT as noisy failure too, not
just SIGSEGV.

PR lib/58208: ctype(3) provides poor runtime feedback of abuse


To generate a diff of this commit:
cvs rdiff -u -r1.26 -r1.27 src/lib/libc/gen/isctype.c
cvs rdiff -u -r1.8 -r1.9 src/tests/lib/libc/gen/t_ctype.c

Please note that diffs are not public domain; they are subject to the
copyright notices on the relevant files.

Modified files:

Index: src/lib/libc/gen/isctype.c
diff -u src/lib/libc/gen/isctype.c:1.26 src/lib/libc/gen/isctype.c:1.27
--- src/lib/libc/gen/isctype.c:1.26	Fri Jun  7 13:53:22 2024
+++ src/lib/libc/gen/isctype.c	Sat Mar 29 01:06:36 2025
@@ -1,4 +1,4 @@
-/* $NetBSD: isctype.c,v 1.26 2024/06/07 13:53:22 riastradh Exp $ */
+/* $NetBSD: isctype.c,v 1.27 2025/03/29 01:06:36 riastradh Exp $ */
 
 /*-
  * Copyright (c)2008 Citrus Project,
@@ -28,18 +28,23 @@
 
 #include <sys/cdefs.h>
 #if defined(LIBC_SCCS) && !defined(lint)
-__RCSID("$NetBSD: isctype.c,v 1.26 2024/06/07 13:53:22 riastradh Exp $");
+__RCSID("$NetBSD: isctype.c,v 1.27 2025/03/29 01:06:36 riastradh Exp $");
 #endif /* LIBC_SCCS and not lint */
 
 #include "namespace.h"
 #include <sys/types.h>
 #include <sys/ctype_bits.h>
 #define _CTYPE_NOINLINE
+#include <assert.h>
 #include <ctype.h>
 #include <langinfo.h>
 #define __SETLOCALE_SOURCE__
 #include <locale.h>
 #include <stdio.h>
+#include <stdlib.h>
+#include <string.h>
+#include <unistd.h>
+
 #if EOF != -1
 #error "EOF != -1"
 #endif
@@ -50,15 +55,38 @@ __RCSID("$NetBSD: isctype.c,v 1.26 2024/
 #define _RUNE_LOCALE(loc) \
     ((_RuneLocale *)((loc)->part_impl[LC_CTYPE]))
 
+static void __noinline __dead
+ctype_nasaldemon(const char *func, int c)
+{
+	char buf[128];
+
+	snprintf_ss(buf, sizeof(buf), "ctype(3) %s: invalid input: %d\n", func,
+	    c);
+	(void)write(STDERR_FILENO, buf, strlen(buf));
+	abort();
+}
+
+static inline void
+ctype_check(const char *func, int c)
+{
+
+	if (__predict_false((c != EOF && c < 0) || c > UCHAR_MAX))
+		ctype_nasaldemon(func, c);
+}
+
+#define	CTYPE_CHECK(c)	ctype_check(__func__, c)
+
 #define _ISCTYPE_FUNC(name, bit) \
 int \
 is##name(int c) \
 { \
+	CTYPE_CHECK(c); \
 	return (int)_ctype_tab_[c + 1] & (bit); \
 } \
 int \
 is##name ## _l(int c, locale_t loc) \
 { \
+	CTYPE_CHECK(c); \
 	return (int)((_RUNE_LOCALE(loc)->rl_ctype_tab[c + 1]) & (bit)); \
 }
 
@@ -78,24 +106,28 @@ _ISCTYPE_FUNC(xdigit, _CTYPE_X)
 int
 toupper(int c)
 {
+	CTYPE_CHECK(c);
 	return (int)_toupper_tab_[c + 1];
 }
 
 int
 toupper_l(int c, locale_t loc)
 {
+	CTYPE_CHECK(c);
 	return (int)(_RUNE_LOCALE(loc)->rl_toupper_tab[c + 1]);
 }
 
 int
 tolower(int c)
 {
+	CTYPE_CHECK(c);
 	return (int)_tolower_tab_[c + 1];
 }
 
 int
 tolower_l(int c, locale_t loc)
 {
+	CTYPE_CHECK(c);
 	return (int)(_RUNE_LOCALE(loc)->rl_tolower_tab[c + 1]);
 }
 

Index: src/tests/lib/libc/gen/t_ctype.c
diff -u src/tests/lib/libc/gen/t_ctype.c:1.8 src/tests/lib/libc/gen/t_ctype.c:1.9
--- src/tests/lib/libc/gen/t_ctype.c:1.8	Fri Mar 28 23:30:34 2025
+++ src/tests/lib/libc/gen/t_ctype.c	Sat Mar 29 01:06:36 2025
@@ -1,4 +1,4 @@
-/*	$NetBSD: t_ctype.c,v 1.8 2025/03/28 23:30:34 riastradh Exp $	*/
+/*	$NetBSD: t_ctype.c,v 1.9 2025/03/29 01:06:36 riastradh Exp $	*/
 
 /*-
  * Copyright (c) 2025 The NetBSD Foundation, Inc.
@@ -27,7 +27,7 @@
  */
 
 #include <sys/cdefs.h>
-__RCSID("$NetBSD: t_ctype.c,v 1.8 2025/03/28 23:30:34 riastradh Exp $");
+__RCSID("$NetBSD: t_ctype.c,v 1.9 2025/03/29 01:06:36 riastradh Exp $");
 
 #include <atf-c.h>
 #include <ctype.h>
@@ -65,7 +65,7 @@ static int tolower_wrapper(int ch) { ret
 jmp_buf env;
 
 static void
-handle_sigsegv(int signo)
+handle_signal(int signo)
 {
 
 	longjmp(env, 1);
@@ -77,22 +77,23 @@ test_abuse(const char *name, int (*ctype
 	volatile int ch;	/* for longjmp */
 
 	for (ch = CHAR_MIN; ch < 0; ch++) {
-		void (*h)(int) = SIG_DFL;
 		volatile int result;
 
 		if (ch == EOF)
 			continue;
 		ATF_REQUIRE_MSG(ch != (int)(unsigned char)ch, "ch=%d", ch);
 		if (setjmp(env) == 0) {
-			REQUIRE_LIBC(h = signal(SIGSEGV, &handle_sigsegv),
-			    SIG_ERR);
+			REQUIRE_LIBC(signal(SIGABRT, &handle_signal), SIG_ERR);
+			REQUIRE_LIBC(signal(SIGSEGV, &handle_signal), SIG_ERR);
 			result = (*ctypefn)(ch);
-			REQUIRE_LIBC(signal(SIGSEGV, h), SIG_ERR);
+			REQUIRE_LIBC(signal(SIGABRT, SIG_DFL), SIG_ERR);
+			REQUIRE_LIBC(signal(SIGSEGV, SIG_DFL), SIG_ERR);
 			atf_tc_fail_nonfatal("%s failed to detect invalid %d,"
 			    " returned %d",
 			    name, ch, result);
 		} else {
-			REQUIRE_LIBC(signal(SIGSEGV, h), SIG_ERR);
+			REQUIRE_LIBC(signal(SIGABRT, SIG_DFL), SIG_ERR);
+			REQUIRE_LIBC(signal(SIGSEGV, SIG_DFL), SIG_ERR);
 		}
 	}
 
@@ -101,7 +102,7 @@ test_abuse(const char *name, int (*ctype
 }
 
 static void
-test_abuse_in_locales(const char *name, int (*ctypefn)(int))
+test_abuse_in_locales(const char *name, int (*ctypefn)(int), bool macro)
 {
 	size_t i;
 
@@ -111,7 +112,7 @@ test_abuse_in_locales(const char *name, 
 		ATF_REQUIRE_MSG(setlocale(LC_CTYPE, locales[i]) != NULL,
 		    "locales[i]=%s", locales[i]);
 		snprintf(buf, sizeof(buf), "[%s]%s", locales[i], name);
-		if (strcmp(locales[i], "C") == 0) {
+		if (macro && strcmp(locales[i], "C") == 0) {
 			atf_tc_expect_fail("PR lib/58208: ctype(3)"
 			    " provides poor runtime feedback of abuse");
 		}
@@ -127,19 +128,20 @@ test_use(const char *name, int (*ctypefn
 	volatile int ch;	/* for longjmp */
 
 	for (ch = EOF; ch <= CHAR_MAX; ch = (ch == EOF ? 0 : ch + 1)) {
-		void (*h)(int) = SIG_DFL;
 		volatile int result;
 
 		if (setjmp(env) == 0) {
-			REQUIRE_LIBC(h = signal(SIGSEGV, &handle_sigsegv),
-			    SIG_ERR);
+			REQUIRE_LIBC(signal(SIGABRT, &handle_signal), SIG_ERR);
+			REQUIRE_LIBC(signal(SIGSEGV, &handle_signal), SIG_ERR);
 			result = (*ctypefn)(ch);
-			REQUIRE_LIBC(signal(SIGSEGV, h), SIG_ERR);
+			REQUIRE_LIBC(signal(SIGABRT, SIG_DFL), SIG_ERR);
+			REQUIRE_LIBC(signal(SIGSEGV, SIG_DFL), SIG_ERR);
 			(void)result;
 		} else {
+			REQUIRE_LIBC(signal(SIGABRT, SIG_DFL), SIG_ERR);
+			REQUIRE_LIBC(signal(SIGSEGV, SIG_DFL), SIG_ERR);
 			atf_tc_fail_nonfatal("%s(%d) raised SIGSEGV",
 			    name, ch);
-			REQUIRE_LIBC(signal(SIGSEGV, h), SIG_ERR);
 		}
 	}
 }
@@ -803,8 +805,6 @@ ATF_TC_BODY(abuse_##FN##_function_c, tc)
 		atf_tc_skip("runtime ctype(3) abuse is impossible with"	      \
 		    " unsigned char");					      \
 	}								      \
-	atf_tc_expect_fail("PR lib/58208:"				      \
-	    " ctype(3) provides poor runtime feedback of abuse");	      \
 	test_abuse(#FN, &FN);						      \
 }									      \
 ATF_TC(abuse_##FN##_macro_locale);					      \
@@ -819,7 +819,7 @@ ATF_TC_BODY(abuse_##FN##_macro_locale, t
 		atf_tc_skip("runtime ctype(3) abuse is impossible with"	      \
 		    " unsigned char");					      \
 	}								      \
-	test_abuse_in_locales(#FN, &FN##_wrapper);			      \
+	test_abuse_in_locales(#FN, &FN##_wrapper, /*macro*/true);	      \
 }									      \
 ATF_TC(abuse_##FN##_function_locale);					      \
 ATF_TC_HEAD(abuse_##FN##_function_locale, tc)				      \
@@ -833,7 +833,7 @@ ATF_TC_BODY(abuse_##FN##_function_locale
 		atf_tc_skip("runtime ctype(3) abuse is impossible with"	      \
 		    " unsigned char");					      \
 	}								      \
-	test_abuse_in_locales(#FN, &FN);				      \
+	test_abuse_in_locales(#FN, &FN, /*macro*/false);		      \
 }
 
 #define	ADD_TEST_USE(TP, FN) do						      \

Reply via email to