Module Name: src Committed By: riastradh Date: Sat Mar 29 01:06:37 UTC 2025
Modified Files: src/lib/libc/gen: isctype.c src/tests/lib/libc/gen: t_ctype.c Log Message: ctype(3): Summon a demon from caller's nose if abused out-of-line. This way, applications which opt for the out-of-line functions will crash with a potentially meaningful message to stderr if they pass inputs on which the ctype(3) functions are undefined. (If fd 2 is something else, tough -- this is nasal demon country, and they fly where they please, application intent be damned.) This probably won't catch many applications -- but it might catch C++ applications at runtime that can't be caught at build-time because they eschew the macros. The cost is a single predicted-not-taken branch -- it's checking c for membership in the interval [-1,0xff] when we're already computing c + 1, so it can be a single unsigned-compare. By deferring the message and abort to an out-of-line function we avoid a stack frame in the good case. And this is for the unlikely, out-of-line versions of the ctype(3) functions -- most applications get the inline macros. So I'm not concerned by the prospect of a performance impact. Update the tests so they recognize SIGABRT as noisy failure too, not just SIGSEGV. PR lib/58208: ctype(3) provides poor runtime feedback of abuse To generate a diff of this commit: cvs rdiff -u -r1.26 -r1.27 src/lib/libc/gen/isctype.c cvs rdiff -u -r1.8 -r1.9 src/tests/lib/libc/gen/t_ctype.c Please note that diffs are not public domain; they are subject to the copyright notices on the relevant files.
Modified files: Index: src/lib/libc/gen/isctype.c diff -u src/lib/libc/gen/isctype.c:1.26 src/lib/libc/gen/isctype.c:1.27 --- src/lib/libc/gen/isctype.c:1.26 Fri Jun 7 13:53:22 2024 +++ src/lib/libc/gen/isctype.c Sat Mar 29 01:06:36 2025 @@ -1,4 +1,4 @@ -/* $NetBSD: isctype.c,v 1.26 2024/06/07 13:53:22 riastradh Exp $ */ +/* $NetBSD: isctype.c,v 1.27 2025/03/29 01:06:36 riastradh Exp $ */ /*- * Copyright (c)2008 Citrus Project, @@ -28,18 +28,23 @@ #include <sys/cdefs.h> #if defined(LIBC_SCCS) && !defined(lint) -__RCSID("$NetBSD: isctype.c,v 1.26 2024/06/07 13:53:22 riastradh Exp $"); +__RCSID("$NetBSD: isctype.c,v 1.27 2025/03/29 01:06:36 riastradh Exp $"); #endif /* LIBC_SCCS and not lint */ #include "namespace.h" #include <sys/types.h> #include <sys/ctype_bits.h> #define _CTYPE_NOINLINE +#include <assert.h> #include <ctype.h> #include <langinfo.h> #define __SETLOCALE_SOURCE__ #include <locale.h> #include <stdio.h> +#include <stdlib.h> +#include <string.h> +#include <unistd.h> + #if EOF != -1 #error "EOF != -1" #endif @@ -50,15 +55,38 @@ __RCSID("$NetBSD: isctype.c,v 1.26 2024/ #define _RUNE_LOCALE(loc) \ ((_RuneLocale *)((loc)->part_impl[LC_CTYPE])) +static void __noinline __dead +ctype_nasaldemon(const char *func, int c) +{ + char buf[128]; + + snprintf_ss(buf, sizeof(buf), "ctype(3) %s: invalid input: %d\n", func, + c); + (void)write(STDERR_FILENO, buf, strlen(buf)); + abort(); +} + +static inline void +ctype_check(const char *func, int c) +{ + + if (__predict_false((c != EOF && c < 0) || c > UCHAR_MAX)) + ctype_nasaldemon(func, c); +} + +#define CTYPE_CHECK(c) ctype_check(__func__, c) + #define _ISCTYPE_FUNC(name, bit) \ int \ is##name(int c) \ { \ + CTYPE_CHECK(c); \ return (int)_ctype_tab_[c + 1] & (bit); \ } \ int \ is##name ## _l(int c, locale_t loc) \ { \ + CTYPE_CHECK(c); \ return (int)((_RUNE_LOCALE(loc)->rl_ctype_tab[c + 1]) & (bit)); \ } @@ -78,24 +106,28 @@ _ISCTYPE_FUNC(xdigit, _CTYPE_X) int toupper(int c) { + CTYPE_CHECK(c); return (int)_toupper_tab_[c + 1]; } int toupper_l(int c, locale_t loc) { + CTYPE_CHECK(c); return (int)(_RUNE_LOCALE(loc)->rl_toupper_tab[c + 1]); } int tolower(int c) { + CTYPE_CHECK(c); return (int)_tolower_tab_[c + 1]; } int tolower_l(int c, locale_t loc) { + CTYPE_CHECK(c); return (int)(_RUNE_LOCALE(loc)->rl_tolower_tab[c + 1]); } Index: src/tests/lib/libc/gen/t_ctype.c diff -u src/tests/lib/libc/gen/t_ctype.c:1.8 src/tests/lib/libc/gen/t_ctype.c:1.9 --- src/tests/lib/libc/gen/t_ctype.c:1.8 Fri Mar 28 23:30:34 2025 +++ src/tests/lib/libc/gen/t_ctype.c Sat Mar 29 01:06:36 2025 @@ -1,4 +1,4 @@ -/* $NetBSD: t_ctype.c,v 1.8 2025/03/28 23:30:34 riastradh Exp $ */ +/* $NetBSD: t_ctype.c,v 1.9 2025/03/29 01:06:36 riastradh Exp $ */ /*- * Copyright (c) 2025 The NetBSD Foundation, Inc. @@ -27,7 +27,7 @@ */ #include <sys/cdefs.h> -__RCSID("$NetBSD: t_ctype.c,v 1.8 2025/03/28 23:30:34 riastradh Exp $"); +__RCSID("$NetBSD: t_ctype.c,v 1.9 2025/03/29 01:06:36 riastradh Exp $"); #include <atf-c.h> #include <ctype.h> @@ -65,7 +65,7 @@ static int tolower_wrapper(int ch) { ret jmp_buf env; static void -handle_sigsegv(int signo) +handle_signal(int signo) { longjmp(env, 1); @@ -77,22 +77,23 @@ test_abuse(const char *name, int (*ctype volatile int ch; /* for longjmp */ for (ch = CHAR_MIN; ch < 0; ch++) { - void (*h)(int) = SIG_DFL; volatile int result; if (ch == EOF) continue; ATF_REQUIRE_MSG(ch != (int)(unsigned char)ch, "ch=%d", ch); if (setjmp(env) == 0) { - REQUIRE_LIBC(h = signal(SIGSEGV, &handle_sigsegv), - SIG_ERR); + REQUIRE_LIBC(signal(SIGABRT, &handle_signal), SIG_ERR); + REQUIRE_LIBC(signal(SIGSEGV, &handle_signal), SIG_ERR); result = (*ctypefn)(ch); - REQUIRE_LIBC(signal(SIGSEGV, h), SIG_ERR); + REQUIRE_LIBC(signal(SIGABRT, SIG_DFL), SIG_ERR); + REQUIRE_LIBC(signal(SIGSEGV, SIG_DFL), SIG_ERR); atf_tc_fail_nonfatal("%s failed to detect invalid %d," " returned %d", name, ch, result); } else { - REQUIRE_LIBC(signal(SIGSEGV, h), SIG_ERR); + REQUIRE_LIBC(signal(SIGABRT, SIG_DFL), SIG_ERR); + REQUIRE_LIBC(signal(SIGSEGV, SIG_DFL), SIG_ERR); } } @@ -101,7 +102,7 @@ test_abuse(const char *name, int (*ctype } static void -test_abuse_in_locales(const char *name, int (*ctypefn)(int)) +test_abuse_in_locales(const char *name, int (*ctypefn)(int), bool macro) { size_t i; @@ -111,7 +112,7 @@ test_abuse_in_locales(const char *name, ATF_REQUIRE_MSG(setlocale(LC_CTYPE, locales[i]) != NULL, "locales[i]=%s", locales[i]); snprintf(buf, sizeof(buf), "[%s]%s", locales[i], name); - if (strcmp(locales[i], "C") == 0) { + if (macro && strcmp(locales[i], "C") == 0) { atf_tc_expect_fail("PR lib/58208: ctype(3)" " provides poor runtime feedback of abuse"); } @@ -127,19 +128,20 @@ test_use(const char *name, int (*ctypefn volatile int ch; /* for longjmp */ for (ch = EOF; ch <= CHAR_MAX; ch = (ch == EOF ? 0 : ch + 1)) { - void (*h)(int) = SIG_DFL; volatile int result; if (setjmp(env) == 0) { - REQUIRE_LIBC(h = signal(SIGSEGV, &handle_sigsegv), - SIG_ERR); + REQUIRE_LIBC(signal(SIGABRT, &handle_signal), SIG_ERR); + REQUIRE_LIBC(signal(SIGSEGV, &handle_signal), SIG_ERR); result = (*ctypefn)(ch); - REQUIRE_LIBC(signal(SIGSEGV, h), SIG_ERR); + REQUIRE_LIBC(signal(SIGABRT, SIG_DFL), SIG_ERR); + REQUIRE_LIBC(signal(SIGSEGV, SIG_DFL), SIG_ERR); (void)result; } else { + REQUIRE_LIBC(signal(SIGABRT, SIG_DFL), SIG_ERR); + REQUIRE_LIBC(signal(SIGSEGV, SIG_DFL), SIG_ERR); atf_tc_fail_nonfatal("%s(%d) raised SIGSEGV", name, ch); - REQUIRE_LIBC(signal(SIGSEGV, h), SIG_ERR); } } } @@ -803,8 +805,6 @@ ATF_TC_BODY(abuse_##FN##_function_c, tc) atf_tc_skip("runtime ctype(3) abuse is impossible with" \ " unsigned char"); \ } \ - atf_tc_expect_fail("PR lib/58208:" \ - " ctype(3) provides poor runtime feedback of abuse"); \ test_abuse(#FN, &FN); \ } \ ATF_TC(abuse_##FN##_macro_locale); \ @@ -819,7 +819,7 @@ ATF_TC_BODY(abuse_##FN##_macro_locale, t atf_tc_skip("runtime ctype(3) abuse is impossible with" \ " unsigned char"); \ } \ - test_abuse_in_locales(#FN, &FN##_wrapper); \ + test_abuse_in_locales(#FN, &FN##_wrapper, /*macro*/true); \ } \ ATF_TC(abuse_##FN##_function_locale); \ ATF_TC_HEAD(abuse_##FN##_function_locale, tc) \ @@ -833,7 +833,7 @@ ATF_TC_BODY(abuse_##FN##_function_locale atf_tc_skip("runtime ctype(3) abuse is impossible with" \ " unsigned char"); \ } \ - test_abuse_in_locales(#FN, &FN); \ + test_abuse_in_locales(#FN, &FN, /*macro*/false); \ } #define ADD_TEST_USE(TP, FN) do \