By default, nvi uses its bundled regex which handles wchar_t strings.
However, it is still buggy for wide chars. On the other hand, tre also
provides wchar_t version of regex routines, that are "much better" as
I can see. For example, bracket expressions work fine at least for
ja_JP.UTF-8 and ja_JP.eucJP locales, that do not with the nvi-bundled
regex. I therefore propose to switch nvi from the bundled regex to tre.
For this purpose,

- Install headers from tre into /usr/include/tre, that may also be
  useful for 3rd party softwares.
  (Alternatively: do not install them, and nvi uses them directly from
  external/bsd/tre/somewhere)

- Build /rescue/vi (and other crunched-binary versions of nvi in
  miniroot) with USE_WIDECHAR == "no" in order not to link the extra
  library into them. Actually, this is not a real problem; they cannot
  handle wide chars even if they are built with USE_WIDECHAR == "yes",
  because locale stuffs in libc do not work for statically-linked
  binaries.

Any comments or suggestions?

rin
Index: distrib/sets/lists/base/mi
===================================================================
RCS file: /cvsroot/src/distrib/sets/lists/base/mi,v
retrieving revision 1.1164
diff -u -r1.1164 mi
--- distrib/sets/lists/base/mi  24 Oct 2017 02:22:09 -0000      1.1164
+++ distrib/sets/lists/base/mi  12 Nov 2017 12:09:33 -0000
@@ -1219,6 +1219,7 @@
 ./usr/include/ss                               base-obsolete           obsolete
 ./usr/include/ssp                              base-c-usr
 ./usr/include/sys                              base-c-usr
+./usr/include/tre                              base-c-usr
 ./usr/include/trousers                         base-c-usr
 ./usr/include/tss                              base-c-usr
 ./usr/include/ufs                              base-c-usr
Index: distrib/sets/lists/comp/mi
===================================================================
RCS file: /cvsroot/src/distrib/sets/lists/comp/mi,v
retrieving revision 1.2159
diff -u -r1.2159 mi
--- distrib/sets/lists/comp/mi  7 Nov 2017 22:20:05 -0000       1.2159
+++ distrib/sets/lists/comp/mi  12 Nov 2017 12:09:46 -0000
@@ -3024,6 +3024,9 @@
 ./usr/include/termios.h                                comp-c-include
 ./usr/include/threadlib.h                      comp-obsolete           obsolete
 ./usr/include/time.h                           comp-c-include
+./usr/include/tre/regex.h                      comp-c-include
+./usr/include/tre/tre-config.h                 comp-c-include
+./usr/include/tre/tre.h                                comp-c-include
 ./usr/include/trousers/trousers.h              comp-c-include          tpm
 ./usr/include/trousers/tss.h                   comp-c-include          tpm
 ./usr/include/tss/compat11b.h                  comp-c-include          tpm
Index: etc/mtree/NetBSD.dist.base
===================================================================
RCS file: /cvsroot/src/etc/mtree/NetBSD.dist.base,v
retrieving revision 1.163
diff -u -r1.163 NetBSD.dist.base
--- etc/mtree/NetBSD.dist.base  21 Oct 2017 05:30:48 -0000      1.163
+++ etc/mtree/NetBSD.dist.base  12 Nov 2017 12:09:46 -0000
@@ -235,6 +235,7 @@
 ./usr/include/security
 ./usr/include/ssp
 ./usr/include/sys
+./usr/include/tre
 ./usr/include/trousers
 ./usr/include/tss
 ./usr/include/ufs
Index: external/bsd/nvi/dist/common/multibyte.h
===================================================================
RCS file: /home/netbsd/src/external/bsd/nvi/dist/common/multibyte.h,v
retrieving revision 1.4
diff -u -r1.4 multibyte.h
--- external/bsd/nvi/dist/common/multibyte.h    13 Nov 2017 01:34:59 -0000      
1.4
+++ external/bsd/nvi/dist/common/multibyte.h    13 Nov 2017 02:42:49 -0000
@@ -113,4 +113,12 @@
        ((void *)((char *)MEMCPY(p, t, len) + (len) * sizeof(*(p))))
 #define SIZE(w)                (sizeof(w)/sizeof(*w))
 
+#if defined(USE_WIDECHAR) && defined(HAVE_TRE)
+#define        REGCOMP regwcomp
+#define        REGEXEC regwexec
+#else
+#define        REGCOMP regcomp
+#define        REGEXEC regexec
+#endif
+
 #endif
Index: external/bsd/nvi/dist/common/search.c
===================================================================
RCS file: /home/netbsd/src/external/bsd/nvi/dist/common/search.c,v
retrieving revision 1.3
diff -u -r1.3 search.c
--- external/bsd/nvi/dist/common/search.c       26 Jan 2014 21:43:45 -0000      
1.3
+++ external/bsd/nvi/dist/common/search.c       13 Nov 2017 02:42:49 -0000
@@ -237,7 +237,7 @@
                    lno, coff, len != 0 ? len - 1 : len);
 #endif
                /* Search the line. */
-               eval = regexec(&sp->re_c, l, 1, match,
+               eval = REGEXEC(&sp->re_c, l, 1, match,
                    (match[0].rm_so == 0 ? 0 : REG_NOTBOL) | REG_STARTEND);
                if (eval == REG_NOMATCH)
                        continue;
@@ -374,7 +374,7 @@
                    "B search: %lu from 0 to %qu\n", lno, match[0].rm_eo);
 #endif
                /* Search the line. */
-               eval = regexec(&sp->re_c, l, 1, match,
+               eval = REGEXEC(&sp->re_c, l, 1, match,
                    ((size_t)match[0].rm_eo == len ? 0 : REG_NOTEOL) | 
REG_STARTEND);
                if (eval == REG_NOMATCH)
                        continue;
@@ -409,7 +409,7 @@
                        if ((size_t)match[0].rm_so >= len)
                                break;
                        match[0].rm_eo = len;
-                       eval = regexec(&sp->re_c, l, 1, match,
+                       eval = REGEXEC(&sp->re_c, l, 1, match,
                            (match[0].rm_so == 0 ? 0 : REG_NOTBOL) |
                            REG_STARTEND);
                        if (eval == REG_NOMATCH)
Index: external/bsd/nvi/dist/ex/ex_global.c
===================================================================
RCS file: /home/netbsd/src/external/bsd/nvi/dist/ex/ex_global.c,v
retrieving revision 1.5
diff -u -r1.5 ex_global.c
--- external/bsd/nvi/dist/ex/ex_global.c        26 Jan 2014 21:43:45 -0000      
1.5
+++ external/bsd/nvi/dist/ex/ex_global.c        13 Nov 2017 02:42:49 -0000
@@ -216,7 +216,7 @@
                match[0].rm_so = 0;
                match[0].rm_eo = len;
                switch (eval =
-                   regexec(&sp->re_c, dbp, 0, match, REG_STARTEND)) {
+                   REGEXEC(&sp->re_c, dbp, 0, match, REG_STARTEND)) {
                case 0:
                        if (cmd == V)
                                continue;
Index: external/bsd/nvi/dist/ex/ex_subst.c
===================================================================
RCS file: /home/netbsd/src/external/bsd/nvi/dist/ex/ex_subst.c,v
retrieving revision 1.4
diff -u -r1.4 ex_subst.c
--- external/bsd/nvi/dist/ex/ex_subst.c 26 Jan 2014 21:43:45 -0000      1.4
+++ external/bsd/nvi/dist/ex/ex_subst.c 13 Nov 2017 02:42:49 -0000
@@ -557,7 +557,7 @@
                match[0].rm_eo = len;
 
                /* Get the next match. */
-               eval = regexec(re, st + offset, 10, match, eflags);
+               eval = REGEXEC(re, st + offset, 10, match, eflags);
 
                /*
                 * There wasn't a match or if there was an error, deal with
@@ -992,7 +992,7 @@
         * Regcomp isn't 8-bit clean, so we just lost if the pattern
         * contained a nul.  Bummer!
         */
-       if ((rval = regcomp(rep, ptrn, /* plen, */ reflags)) != 0) {
+       if ((rval = REGCOMP(rep, ptrn, /* plen, */ reflags)) != 0) {
                if (LF_ISSET(SEARCH_MSG))
                        re_error(sp, rval, rep); 
                return (1);
Index: external/bsd/nvi/usr.bin/nvi/Makefile
===================================================================
RCS file: /cvsroot/src/external/bsd/nvi/usr.bin/nvi/Makefile,v
retrieving revision 1.12
diff -u -r1.12 Makefile
--- external/bsd/nvi/usr.bin/nvi/Makefile       13 Nov 2017 04:09:41 -0000      
1.12
+++ external/bsd/nvi/usr.bin/nvi/Makefile       13 Nov 2017 04:10:29 -0000
@@ -1,8 +1,12 @@
-#      $NetBSD: Makefile,v 1.12 2017/11/13 04:09:41 rin Exp $
+#      $NetBSD: Makefile,v 1.11 2017/11/13 02:33:13 rin Exp $
 
 .include <bsd.own.mk>
 
+.ifndef SMALLPROG
 USE_WIDECHAR?=yes
+.else
+USE_WIDECHAR=no
+.endif
 
 CWARNFLAGS.clang+=     -Wno-uninitialized -Wno-format-security
 .if ${USE_WIDECHAR} != "yes"
@@ -57,8 +61,12 @@
 
 # For wide char support
 .if ${USE_WIDECHAR} == "yes"
-SRCS+= regcomp.c regerror.c regexec.c regfree.c
-CPPFLAGS+=-I${DIST}/regex -D__REGEX_PRIVATE -DUSE_WIDECHAR
+CPPFLAGS+=-DUSE_WIDECHAR
+CPPFLAGS+=-I${DESTDIR}/usr/include/tre -DHAVE_TRE
+LDADD+=        -ltre
+DPADD+=        ${LIBTRE}
+#SRCS+=        regcomp.c regerror.c regexec.c regfree.c
+#CPPFLAGS+=-I${DIST}/regex -D__REGEX_PRIVATE
 .endif
 
 # For db3 db1 emulation
Index: external/bsd/tre/Makefile.inc
===================================================================
RCS file: /cvsroot/src/external/bsd/tre/Makefile.inc,v
retrieving revision 1.2
diff -u -r1.2 Makefile.inc
--- external/bsd/tre/Makefile.inc       5 Nov 2011 22:39:12 -0000       1.2
+++ external/bsd/tre/Makefile.inc       12 Nov 2017 12:09:46 -0000
@@ -7,7 +7,5 @@
 CPPFLAGS+=     -I${TREDIST}/lib -I${.CURDIR}/../include
 CPPFLAGS+=     -DHAVE_CONFIG_H=1
 CPPFLAGS+=     -DTRE_SYSTEM_REGEX_H_PATH=\"${NETBSDSRCDIR}/include/regex.h\"
-CPPFLAGS+=     -DTRE_USE_SYSTEM_REGEX_H=1
-
 
 WARNS= 4
Index: external/bsd/tre/include/tre-config.h
===================================================================
RCS file: /cvsroot/src/external/bsd/tre/include/tre-config.h,v
retrieving revision 1.1
diff -u -r1.1 tre-config.h
--- external/bsd/tre/include/tre-config.h       5 Nov 2011 22:39:13 -0000       
1.1
+++ external/bsd/tre/include/tre-config.h       12 Nov 2017 12:09:46 -0000
@@ -23,10 +23,12 @@
 #define TRE_MULTIBYTE 1
 
 /* Define to the absolute path to the system tre.h */
-/* #undef TRE_SYSTEM_REGEX_H_PATH */
+#ifndef TRE_SYSTEM_REGEX_H_PATH
+#define TRE_SYSTEM_REGEX_H_PATH "/usr/include/regex.h"
+#endif
 
 /* Define to include the system regex.h from tre.h */
-/* #undef TRE_USE_SYSTEM_REGEX_H */
+#define TRE_USE_SYSTEM_REGEX_H
 
 /* Define to enable wide character (wchar_t) support. */
 #define TRE_WCHAR 1
Index: external/bsd/tre/lib/Makefile
===================================================================
RCS file: /cvsroot/src/external/bsd/tre/lib/Makefile,v
retrieving revision 1.2
diff -u -r1.2 Makefile
--- external/bsd/tre/lib/Makefile       6 Nov 2011 10:55:27 -0000       1.2
+++ external/bsd/tre/lib/Makefile       12 Nov 2017 12:09:46 -0000
@@ -5,10 +5,11 @@
 #      ./configure --prefix=/usr --without-alloca
 
 .include <bsd.own.mk>
-TREDIST=       ${.CURDIR}/../dist
+TREDIR=        ${.CURDIR}/..
 
 # external tre sources
-.PATH: ${TREDIST}/lib
+.PATH: ${TREDIR}/dist/lib
+.PATH: ${TREDIR}/include
 
 CPPFLAGS+=     -I${.CURDIR}
 
@@ -19,6 +20,9 @@
 SRCS+= tre-match-backtrack.c tre-match-parallel.c tre-mem.c
 SRCS+= tre-parse.c tre-stack.c xmalloc.c
 
+INCS=  regex.h tre-config.h tre.h
+INCSDIR=/usr/include/tre
+
 WARNS= 4
 
 .include <bsd.lib.mk>
Index: rescue/Makefile
===================================================================
RCS file: /home/netbsd/src/rescue/Makefile,v
retrieving revision 1.33
diff -u -r1.33 Makefile
--- rescue/Makefile     8 Oct 2017 15:02:33 -0000       1.33
+++ rescue/Makefile     12 Nov 2017 15:06:26 -0000
@@ -20,10 +20,12 @@
 CRUNCHBIN=     rescue
 CRUNCHENV=     RESCUEDIR=${RESCUEDIR}
 SMALLPROG=     0
-LISTS=         ${.CURDIR}/list
 TARGETDIR=     ${DESTDIR}/rescue
 PARSELISTENV+=  TARGETDIR=${TARGETDIR:Q}
 
+LISTS=         ${.CURDIR}/list
+CRUNCHENV+=    USE_WIDECHAR=no         # for nvi
+
 .for f in ldconfig
 PROG_${f}!=    cd ${NETBSDSRCDIR}/sbin/${f} && ${MAKE} -V PROG
 .if (${PROG_${f}} != "")

Reply via email to