Module Name: src Committed By: rin Date: Fri Jan 5 02:38:06 UTC 2024
Modified Files: src/usr.bin/mklocale: mklocale.1 yacc.y Log Message: mklocale(1): Add range check for TODIGIT, rather than disabling it PR lib/57798 Digit value specified by TODIGIT is storaged as lowest 8 bits of _RuneType, see lib/libc/locale/runetype_file.h: https://nxr.netbsd.org/xref/src/lib/libc/locale/runetype_file.h#56 The symptom reported in the PR is due to missing range check for this value; values of 256 and above were mistakenly treated as other flag bits in _RuneType. For example, U+5146 has numerical value 1000,000,000,000 == 0xe8d4a51000 where __BITS(30, 31) == _RUNETYPE_SW3 are turned on. This is why wcwidth(3) returned 3 for this character. This apparently affected not only character width, but also other attributes storaged in _RuneType. IIUC, digit value attributes in _RuneType have never been utilized until now, but preserve these if digit fits within (0, 256). This should be safer for pulling this up into netbsd-10. Also, these attributes may be useful to implement some I18N features as suggested by uwe@ in the PR. netbsd-[98] is not affected as these use old UTF-8 ctype definitions. To generate a diff of this commit: cvs rdiff -u -r1.18 -r1.19 src/usr.bin/mklocale/mklocale.1 cvs rdiff -u -r1.35 -r1.36 src/usr.bin/mklocale/yacc.y Please note that diffs are not public domain; they are subject to the copyright notices on the relevant files.
Modified files: Index: src/usr.bin/mklocale/mklocale.1 diff -u src/usr.bin/mklocale/mklocale.1:1.18 src/usr.bin/mklocale/mklocale.1:1.19 --- src/usr.bin/mklocale/mklocale.1:1.18 Thu Dec 28 03:49:35 2023 +++ src/usr.bin/mklocale/mklocale.1 Fri Jan 5 02:38:06 2024 @@ -1,4 +1,4 @@ -.\" $NetBSD: mklocale.1,v 1.18 2023/12/28 03:49:35 rin Exp $ +.\" $NetBSD: mklocale.1,v 1.19 2024/01/05 02:38:06 rin Exp $ .\" FreeBSD: src/usr.bin/mklocale/mklocale.1,v 1.6 1999/09/20 09:15:21 phantom Exp .\" .\" Copyright (c) 1993, 1994 @@ -33,7 +33,7 @@ .\" .\" @(#)mklocale.1 8.2 (Berkeley) 4/18/94 .\" -.Dd December 28, 2023 +.Dd January 5, 2024 .Dt MKLOCALE 1 .Os .Sh NAME @@ -212,7 +212,9 @@ For example, the ASCII character would map to the decimal value 0. On .Nx , -this information is ignored and not put into the binary output file. +this information has never been used until now. +Only values up to 255 are allowed, and mapping to 256 and above is +silently ignored. .El .Pp The following keywords may appear multiple times and have the following Index: src/usr.bin/mklocale/yacc.y diff -u src/usr.bin/mklocale/yacc.y:1.35 src/usr.bin/mklocale/yacc.y:1.36 --- src/usr.bin/mklocale/yacc.y:1.35 Thu Dec 28 03:49:35 2023 +++ src/usr.bin/mklocale/yacc.y Fri Jan 5 02:38:06 2024 @@ -1,4 +1,4 @@ -/* $NetBSD: yacc.y,v 1.35 2023/12/28 03:49:35 rin Exp $ */ +/* $NetBSD: yacc.y,v 1.36 2024/01/05 02:38:06 rin Exp $ */ %{ /*- @@ -43,7 +43,7 @@ static char sccsid[] = "@(#)yacc.y 8.1 (Berkeley) 6/6/93"; static char rcsid[] = "$FreeBSD$"; #else -__RCSID("$NetBSD: yacc.y,v 1.35 2023/12/28 03:49:35 rin Exp $"); +__RCSID("$NetBSD: yacc.y,v 1.36 2024/01/05 02:38:06 rin Exp $"); #endif #endif /* not lint */ @@ -82,9 +82,7 @@ __nbrune_t charsetmask = (__nbrune_t)0x0 __nbrune_t charsetmask = (__nbrune_t)0xffffffff; void set_map(rune_map *, rune_list *, u_int32_t); -#if 0 void set_digitmap(rune_map *, rune_list *); -#endif void add_map(rune_map *, rune_list *, u_int32_t); __dead void usage(void); @@ -189,19 +187,8 @@ entry : ENCODING STRING { set_map(&maplower, $2, 0); } | MAPUPPER map { set_map(&mapupper, $2, 0); } -/* - * XXX PR lib/57798 - * set_digitmap() was implemented with an assumption that - * all characters are mapped to numerical values <= 255. - * This is no longer true for Unicode, and results in, e.g., - * wrong return values of wcwidth(3) for U+5146 or U+16B60. - * - * | DIGITMAP map - * { set_digitmap(&types, $2); } - * - */ - | DIGITMAP mapignore - { } + | DIGITMAP map + { set_digitmap(&types, $2); } ; list : RUNE @@ -267,12 +254,6 @@ map : LBRK RUNE RUNE RBRK $$->next = $1; } ; - -mapignore : LBRK RUNE RUNE RBRK { } - | map LBRK RUNE RUNE RBRK { } - | LBRK RUNE THRU RUNE ':' RUNE RBRK { } - | map LBRK RUNE THRU RUNE ':' RUNE RBRK { } - ; %% int debug = 0; @@ -401,7 +382,6 @@ set_map(rune_map *map, rune_list *list, } } -#if 0 void set_digitmap(rune_map *map, rune_list *list) { @@ -410,18 +390,24 @@ set_digitmap(rune_map *map, rune_list *l while (list) { rune_list *nlist = list->next; for (i = list->min; i <= list->max; ++i) { - if (list->map + (i - list->min)) { + /* + * XXX PR lib/57798 + * Currently, we support mapping up to 255. Attempts to map + * 256 (== _RUNETYPE_A) and above are silently ignored. + */ + _RuneType digit = list->map + (i - list->min); + if (digit > 0 && digit <= 0xff) { rune_list *tmp = (rune_list *)xmalloc(sizeof(rune_list)); + memset(tmp, 0, sizeof(*tmp)); tmp->min = i; tmp->max = i; - add_map(map, tmp, list->map + (i - list->min)); + add_map(map, tmp, digit); } } free(list); list = nlist; } } -#endif void add_map(rune_map *map, rune_list *list, u_int32_t flag)