Module Name:    src
Committed By:   rin
Date:           Thu Dec 28 03:49:35 UTC 2023

Modified Files:
        src/usr.bin/mklocale: mklocale.1 yacc.y

Log Message:
mklocale: XXX: Neglect TODIGIT at the moment

PR lib/57798

It was implemented with an assumption that all digit characters
can be mapped to numerical values <= 255.

This is no longer true for Unicode, and results in, e.g., wrong
return values of wcwidth(3) for U+5146 or U+16B60.

As a workaround, neglect TODIGIT for now, as done for OpenBSD:
https://github.com/OpenBSD/src/commit/4efe9bdeb34

XXX
At least netbsd-10 should be fixed, but it requires some tests.


To generate a diff of this commit:
cvs rdiff -u -r1.17 -r1.18 src/usr.bin/mklocale/mklocale.1
cvs rdiff -u -r1.34 -r1.35 src/usr.bin/mklocale/yacc.y

Please note that diffs are not public domain; they are subject to the
copyright notices on the relevant files.

Modified files:

Index: src/usr.bin/mklocale/mklocale.1
diff -u src/usr.bin/mklocale/mklocale.1:1.17 src/usr.bin/mklocale/mklocale.1:1.18
--- src/usr.bin/mklocale/mklocale.1:1.17	Mon Jul  3 21:34:20 2017
+++ src/usr.bin/mklocale/mklocale.1	Thu Dec 28 03:49:35 2023
@@ -1,4 +1,4 @@
-.\" $NetBSD: mklocale.1,v 1.17 2017/07/03 21:34:20 wiz Exp $
+.\" $NetBSD: mklocale.1,v 1.18 2023/12/28 03:49:35 rin Exp $
 .\" FreeBSD: src/usr.bin/mklocale/mklocale.1,v 1.6 1999/09/20 09:15:21 phantom Exp
 .\"
 .\" Copyright (c) 1993, 1994
@@ -33,7 +33,7 @@
 .\"
 .\"	@(#)mklocale.1	8.2 (Berkeley) 4/18/94
 .\"
-.Dd July 15, 2013
+.Dd December 28, 2023
 .Dt MKLOCALE 1
 .Os
 .Sh NAME
@@ -210,7 +210,9 @@ is the integer value represented by
 For example, the ASCII character
 .Sq 0
 would map to the decimal value 0.
-Only values up to 255 are allowed.
+On
+.Nx ,
+this information is ignored and not put into the binary output file.
 .El
 .Pp
 The following keywords may appear multiple times and have the following

Index: src/usr.bin/mklocale/yacc.y
diff -u src/usr.bin/mklocale/yacc.y:1.34 src/usr.bin/mklocale/yacc.y:1.35
--- src/usr.bin/mklocale/yacc.y:1.34	Sun Oct 13 21:12:32 2019
+++ src/usr.bin/mklocale/yacc.y	Thu Dec 28 03:49:35 2023
@@ -1,4 +1,4 @@
-/*	$NetBSD: yacc.y,v 1.34 2019/10/13 21:12:32 christos Exp $	*/
+/*	$NetBSD: yacc.y,v 1.35 2023/12/28 03:49:35 rin Exp $	*/
 
 %{
 /*-
@@ -43,7 +43,7 @@
 static char sccsid[] = "@(#)yacc.y	8.1 (Berkeley) 6/6/93";
 static char rcsid[] = "$FreeBSD$";
 #else
-__RCSID("$NetBSD: yacc.y,v 1.34 2019/10/13 21:12:32 christos Exp $");
+__RCSID("$NetBSD: yacc.y,v 1.35 2023/12/28 03:49:35 rin Exp $");
 #endif
 #endif /* not lint */
 
@@ -82,7 +82,9 @@ __nbrune_t	charsetmask = (__nbrune_t)0x0
 __nbrune_t	charsetmask = (__nbrune_t)0xffffffff;
 
 void set_map(rune_map *, rune_list *, u_int32_t);
+#if 0
 void set_digitmap(rune_map *, rune_list *);
+#endif
 void add_map(rune_map *, rune_list *, u_int32_t);
 
 __dead void	usage(void);
@@ -187,8 +189,19 @@ entry	:	ENCODING STRING
 		{ set_map(&maplower, $2, 0); }
 	|	MAPUPPER map
 		{ set_map(&mapupper, $2, 0); }
-	|	DIGITMAP map
-		{ set_digitmap(&types, $2); }
+/*
+ * XXX PR lib/57798
+ * set_digitmap() was implemented with an assumption that
+ * all characters are mapped to numerical values <= 255.
+ * This is no longer true for Unicode, and results in, e.g.,
+ * wrong return values of wcwidth(3) for U+5146 or U+16B60.
+ *
+ *	|	DIGITMAP map
+ *		{ set_digitmap(&types, $2); }
+ *
+ */
+	|	DIGITMAP mapignore
+		{ }
 	;
 
 list	:	RUNE
@@ -254,6 +267,12 @@ map	:	LBRK RUNE RUNE RBRK
 		    $$->next = $1;
 		}
 	;
+
+mapignore :	LBRK RUNE RUNE RBRK { }
+	|	map LBRK RUNE RUNE RBRK { }
+	|	LBRK RUNE THRU RUNE ':' RUNE RBRK { }
+	|	map LBRK RUNE THRU RUNE ':' RUNE RBRK { }
+	;
 %%
 
 int debug = 0;
@@ -382,6 +401,7 @@ set_map(rune_map *map, rune_list *list, 
     }
 }
 
+#if 0
 void
 set_digitmap(rune_map *map, rune_list *list)
 {
@@ -401,6 +421,7 @@ set_digitmap(rune_map *map, rune_list *l
 	list = nlist;
     }
 }
+#endif
 
 void
 add_map(rune_map *map, rune_list *list, u_int32_t flag)

Reply via email to