By default, nvi uses its bundled regex which handles wchar_t strings. However, it is still buggy for wide chars. On the other hand, tre also provides wchar_t version of regex routines, that are "much better" as I can see. For example, bracket expressions work fine at least for ja_JP.UTF-8 and ja_JP.eucJP locales, that do not with the nvi-bundled regex. I therefore propose to switch nvi from the bundled regex to tre. For this purpose,
- Install headers from tre into /usr/include/tre, that may also be useful for 3rd party softwares. (Alternatively: do not install them, and nvi uses them directly from external/bsd/tre/somewhere) - Build /rescue/vi (and other crunched-binary versions of nvi in miniroot) with USE_WIDECHAR == "no" in order not to link the extra library into them. Actually, this is not a real problem; they cannot handle wide chars even if they are built with USE_WIDECHAR == "yes", because locale stuffs in libc do not work for statically-linked binaries. Any comments or suggestions? rin
Index: distrib/sets/lists/base/mi =================================================================== RCS file: /cvsroot/src/distrib/sets/lists/base/mi,v retrieving revision 1.1164 diff -u -r1.1164 mi --- distrib/sets/lists/base/mi 24 Oct 2017 02:22:09 -0000 1.1164 +++ distrib/sets/lists/base/mi 12 Nov 2017 12:09:33 -0000 @@ -1219,6 +1219,7 @@ ./usr/include/ss base-obsolete obsolete ./usr/include/ssp base-c-usr ./usr/include/sys base-c-usr +./usr/include/tre base-c-usr ./usr/include/trousers base-c-usr ./usr/include/tss base-c-usr ./usr/include/ufs base-c-usr Index: distrib/sets/lists/comp/mi =================================================================== RCS file: /cvsroot/src/distrib/sets/lists/comp/mi,v retrieving revision 1.2159 diff -u -r1.2159 mi --- distrib/sets/lists/comp/mi 7 Nov 2017 22:20:05 -0000 1.2159 +++ distrib/sets/lists/comp/mi 12 Nov 2017 12:09:46 -0000 @@ -3024,6 +3024,9 @@ ./usr/include/termios.h comp-c-include ./usr/include/threadlib.h comp-obsolete obsolete ./usr/include/time.h comp-c-include +./usr/include/tre/regex.h comp-c-include +./usr/include/tre/tre-config.h comp-c-include +./usr/include/tre/tre.h comp-c-include ./usr/include/trousers/trousers.h comp-c-include tpm ./usr/include/trousers/tss.h comp-c-include tpm ./usr/include/tss/compat11b.h comp-c-include tpm Index: etc/mtree/NetBSD.dist.base =================================================================== RCS file: /cvsroot/src/etc/mtree/NetBSD.dist.base,v retrieving revision 1.163 diff -u -r1.163 NetBSD.dist.base --- etc/mtree/NetBSD.dist.base 21 Oct 2017 05:30:48 -0000 1.163 +++ etc/mtree/NetBSD.dist.base 12 Nov 2017 12:09:46 -0000 @@ -235,6 +235,7 @@ ./usr/include/security ./usr/include/ssp ./usr/include/sys +./usr/include/tre ./usr/include/trousers ./usr/include/tss ./usr/include/ufs Index: external/bsd/nvi/dist/common/multibyte.h =================================================================== RCS file: /home/netbsd/src/external/bsd/nvi/dist/common/multibyte.h,v retrieving revision 1.4 diff -u -r1.4 multibyte.h --- external/bsd/nvi/dist/common/multibyte.h 13 Nov 2017 01:34:59 -0000 1.4 +++ external/bsd/nvi/dist/common/multibyte.h 13 Nov 2017 02:42:49 -0000 @@ -113,4 +113,12 @@ ((void *)((char *)MEMCPY(p, t, len) + (len) * sizeof(*(p)))) #define SIZE(w) (sizeof(w)/sizeof(*w)) +#if defined(USE_WIDECHAR) && defined(HAVE_TRE) +#define REGCOMP regwcomp +#define REGEXEC regwexec +#else +#define REGCOMP regcomp +#define REGEXEC regexec +#endif + #endif Index: external/bsd/nvi/dist/common/search.c =================================================================== RCS file: /home/netbsd/src/external/bsd/nvi/dist/common/search.c,v retrieving revision 1.3 diff -u -r1.3 search.c --- external/bsd/nvi/dist/common/search.c 26 Jan 2014 21:43:45 -0000 1.3 +++ external/bsd/nvi/dist/common/search.c 13 Nov 2017 02:42:49 -0000 @@ -237,7 +237,7 @@ lno, coff, len != 0 ? len - 1 : len); #endif /* Search the line. */ - eval = regexec(&sp->re_c, l, 1, match, + eval = REGEXEC(&sp->re_c, l, 1, match, (match[0].rm_so == 0 ? 0 : REG_NOTBOL) | REG_STARTEND); if (eval == REG_NOMATCH) continue; @@ -374,7 +374,7 @@ "B search: %lu from 0 to %qu\n", lno, match[0].rm_eo); #endif /* Search the line. */ - eval = regexec(&sp->re_c, l, 1, match, + eval = REGEXEC(&sp->re_c, l, 1, match, ((size_t)match[0].rm_eo == len ? 0 : REG_NOTEOL) | REG_STARTEND); if (eval == REG_NOMATCH) continue; @@ -409,7 +409,7 @@ if ((size_t)match[0].rm_so >= len) break; match[0].rm_eo = len; - eval = regexec(&sp->re_c, l, 1, match, + eval = REGEXEC(&sp->re_c, l, 1, match, (match[0].rm_so == 0 ? 0 : REG_NOTBOL) | REG_STARTEND); if (eval == REG_NOMATCH) Index: external/bsd/nvi/dist/ex/ex_global.c =================================================================== RCS file: /home/netbsd/src/external/bsd/nvi/dist/ex/ex_global.c,v retrieving revision 1.5 diff -u -r1.5 ex_global.c --- external/bsd/nvi/dist/ex/ex_global.c 26 Jan 2014 21:43:45 -0000 1.5 +++ external/bsd/nvi/dist/ex/ex_global.c 13 Nov 2017 02:42:49 -0000 @@ -216,7 +216,7 @@ match[0].rm_so = 0; match[0].rm_eo = len; switch (eval = - regexec(&sp->re_c, dbp, 0, match, REG_STARTEND)) { + REGEXEC(&sp->re_c, dbp, 0, match, REG_STARTEND)) { case 0: if (cmd == V) continue; Index: external/bsd/nvi/dist/ex/ex_subst.c =================================================================== RCS file: /home/netbsd/src/external/bsd/nvi/dist/ex/ex_subst.c,v retrieving revision 1.4 diff -u -r1.4 ex_subst.c --- external/bsd/nvi/dist/ex/ex_subst.c 26 Jan 2014 21:43:45 -0000 1.4 +++ external/bsd/nvi/dist/ex/ex_subst.c 13 Nov 2017 02:42:49 -0000 @@ -557,7 +557,7 @@ match[0].rm_eo = len; /* Get the next match. */ - eval = regexec(re, st + offset, 10, match, eflags); + eval = REGEXEC(re, st + offset, 10, match, eflags); /* * There wasn't a match or if there was an error, deal with @@ -992,7 +992,7 @@ * Regcomp isn't 8-bit clean, so we just lost if the pattern * contained a nul. Bummer! */ - if ((rval = regcomp(rep, ptrn, /* plen, */ reflags)) != 0) { + if ((rval = REGCOMP(rep, ptrn, /* plen, */ reflags)) != 0) { if (LF_ISSET(SEARCH_MSG)) re_error(sp, rval, rep); return (1); Index: external/bsd/nvi/usr.bin/nvi/Makefile =================================================================== RCS file: /cvsroot/src/external/bsd/nvi/usr.bin/nvi/Makefile,v retrieving revision 1.12 diff -u -r1.12 Makefile --- external/bsd/nvi/usr.bin/nvi/Makefile 13 Nov 2017 04:09:41 -0000 1.12 +++ external/bsd/nvi/usr.bin/nvi/Makefile 13 Nov 2017 04:10:29 -0000 @@ -1,8 +1,12 @@ -# $NetBSD: Makefile,v 1.12 2017/11/13 04:09:41 rin Exp $ +# $NetBSD: Makefile,v 1.11 2017/11/13 02:33:13 rin Exp $ .include <bsd.own.mk> +.ifndef SMALLPROG USE_WIDECHAR?=yes +.else +USE_WIDECHAR=no +.endif CWARNFLAGS.clang+= -Wno-uninitialized -Wno-format-security .if ${USE_WIDECHAR} != "yes" @@ -57,8 +61,12 @@ # For wide char support .if ${USE_WIDECHAR} == "yes" -SRCS+= regcomp.c regerror.c regexec.c regfree.c -CPPFLAGS+=-I${DIST}/regex -D__REGEX_PRIVATE -DUSE_WIDECHAR +CPPFLAGS+=-DUSE_WIDECHAR +CPPFLAGS+=-I${DESTDIR}/usr/include/tre -DHAVE_TRE +LDADD+= -ltre +DPADD+= ${LIBTRE} +#SRCS+= regcomp.c regerror.c regexec.c regfree.c +#CPPFLAGS+=-I${DIST}/regex -D__REGEX_PRIVATE .endif # For db3 db1 emulation Index: external/bsd/tre/Makefile.inc =================================================================== RCS file: /cvsroot/src/external/bsd/tre/Makefile.inc,v retrieving revision 1.2 diff -u -r1.2 Makefile.inc --- external/bsd/tre/Makefile.inc 5 Nov 2011 22:39:12 -0000 1.2 +++ external/bsd/tre/Makefile.inc 12 Nov 2017 12:09:46 -0000 @@ -7,7 +7,5 @@ CPPFLAGS+= -I${TREDIST}/lib -I${.CURDIR}/../include CPPFLAGS+= -DHAVE_CONFIG_H=1 CPPFLAGS+= -DTRE_SYSTEM_REGEX_H_PATH=\"${NETBSDSRCDIR}/include/regex.h\" -CPPFLAGS+= -DTRE_USE_SYSTEM_REGEX_H=1 - WARNS= 4 Index: external/bsd/tre/include/tre-config.h =================================================================== RCS file: /cvsroot/src/external/bsd/tre/include/tre-config.h,v retrieving revision 1.1 diff -u -r1.1 tre-config.h --- external/bsd/tre/include/tre-config.h 5 Nov 2011 22:39:13 -0000 1.1 +++ external/bsd/tre/include/tre-config.h 12 Nov 2017 12:09:46 -0000 @@ -23,10 +23,12 @@ #define TRE_MULTIBYTE 1 /* Define to the absolute path to the system tre.h */ -/* #undef TRE_SYSTEM_REGEX_H_PATH */ +#ifndef TRE_SYSTEM_REGEX_H_PATH +#define TRE_SYSTEM_REGEX_H_PATH "/usr/include/regex.h" +#endif /* Define to include the system regex.h from tre.h */ -/* #undef TRE_USE_SYSTEM_REGEX_H */ +#define TRE_USE_SYSTEM_REGEX_H /* Define to enable wide character (wchar_t) support. */ #define TRE_WCHAR 1 Index: external/bsd/tre/lib/Makefile =================================================================== RCS file: /cvsroot/src/external/bsd/tre/lib/Makefile,v retrieving revision 1.2 diff -u -r1.2 Makefile --- external/bsd/tre/lib/Makefile 6 Nov 2011 10:55:27 -0000 1.2 +++ external/bsd/tre/lib/Makefile 12 Nov 2017 12:09:46 -0000 @@ -5,10 +5,11 @@ # ./configure --prefix=/usr --without-alloca .include <bsd.own.mk> -TREDIST= ${.CURDIR}/../dist +TREDIR= ${.CURDIR}/.. # external tre sources -.PATH: ${TREDIST}/lib +.PATH: ${TREDIR}/dist/lib +.PATH: ${TREDIR}/include CPPFLAGS+= -I${.CURDIR} @@ -19,6 +20,9 @@ SRCS+= tre-match-backtrack.c tre-match-parallel.c tre-mem.c SRCS+= tre-parse.c tre-stack.c xmalloc.c +INCS= regex.h tre-config.h tre.h +INCSDIR=/usr/include/tre + WARNS= 4 .include <bsd.lib.mk> Index: rescue/Makefile =================================================================== RCS file: /home/netbsd/src/rescue/Makefile,v retrieving revision 1.33 diff -u -r1.33 Makefile --- rescue/Makefile 8 Oct 2017 15:02:33 -0000 1.33 +++ rescue/Makefile 12 Nov 2017 15:06:26 -0000 @@ -20,10 +20,12 @@ CRUNCHBIN= rescue CRUNCHENV= RESCUEDIR=${RESCUEDIR} SMALLPROG= 0 -LISTS= ${.CURDIR}/list TARGETDIR= ${DESTDIR}/rescue PARSELISTENV+= TARGETDIR=${TARGETDIR:Q} +LISTS= ${.CURDIR}/list +CRUNCHENV+= USE_WIDECHAR=no # for nvi + .for f in ldconfig PROG_${f}!= cd ${NETBSDSRCDIR}/sbin/${f} && ${MAKE} -V PROG .if (${PROG_${f}} != "")