git: 0bb4c188d363 - stable/14 - MFC: MFV: less v668.

Xin LI Sat, 21 Dec 2024 23:47:20 -0800

The branch stable/14 has been updated by delphij:

URL: 
https://cgit.FreeBSD.org/src/commit/?id=0bb4c188d363a24b856b9b885717a0dd15c5b2de


commit 0bb4c188d363a24b856b9b885717a0dd15c5b2de
Author:     Xin LI <delp...@freebsd.org>
AuthorDate: 2024-12-09 04:50:00 +0000
Commit:     Xin LI <delp...@freebsd.org>
CommitDate: 2024-12-22 07:46:59 +0000

    MFC: MFV: less v668.
    
    (cherry picked from commit c77c488926555ca344ae3a417544cf7a720e1de1)
---
 contrib/less/LICENSE         |   2 +-
 contrib/less/NEWS            | 134 ++++++++
 contrib/less/brac.c          |   2 +-
 contrib/less/ch.c            | 131 ++++----
 contrib/less/charset.c       | 181 +++++-----
 contrib/less/charset.h       |   2 +-
 contrib/less/cmd.h           |   6 +-
 contrib/less/cmdbuf.c        | 217 ++++++------
 contrib/less/command.c       | 392 ++++++++++++++--------
 contrib/less/compose.uni     |   2 +-
 contrib/less/cvt.c           |  33 +-
 contrib/less/decode.c        | 376 +++++++++++++++------
 contrib/less/edit.c          | 362 ++++++++++----------
 contrib/less/evar.c          | 192 +++++++++++
 contrib/less/filename.c      | 430 ++++++++++++------------
 contrib/less/fmt.uni         |   2 +-
 contrib/less/forwback.c      |  84 +++--
 contrib/less/funcs.h         | 387 ++++++++++++----------
 contrib/less/help.c          |  54 +--
 contrib/less/ifile.c         |  18 +-
 contrib/less/input.c         | 109 +++---
 contrib/less/jump.c          |  51 ++-
 contrib/less/lang.h          |  57 ++++
 contrib/less/less.h          | 125 ++++---
 contrib/less/less.hlp        |  52 ++-
 contrib/less/less.nro        | 389 +++++++++++++++++-----
 contrib/less/lessecho.c      |  15 +-
 contrib/less/lessecho.nro    |   6 +-
 contrib/less/lesskey.c       |  73 ++--
 contrib/less/lesskey.h       |  24 +-
 contrib/less/lesskey.nro     |  38 ++-
 contrib/less/lesskey_parse.c | 114 +++++--
 contrib/less/lglob.h         |   2 +-
 contrib/less/line.c          | 440 ++++++++++++++++--------
 contrib/less/linenum.c       |  83 +++--
 contrib/less/lsystem.c       |  17 +-
 contrib/less/main.c          | 272 ++++++++++++---
 contrib/less/mark.c          |  36 +-
 contrib/less/optfunc.c       | 533 ++++++++++++++++++-----------
 contrib/less/option.c        | 262 ++++++++++-----
 contrib/less/option.h        |   9 +-
 contrib/less/opttbl.c        | 113 ++++---
 contrib/less/os.c            |  64 ++--
 contrib/less/output.c        | 501 ++++++++++++++++------------
 contrib/less/pattern.c       |  40 +--
 contrib/less/pattern.h       |   6 +-
 contrib/less/pckeys.h        |   2 +-
 contrib/less/position.c      |  70 +++-
 contrib/less/position.h      |   2 +-
 contrib/less/prompt.c        | 112 ++++---
 contrib/less/regexp.c        | 120 +++----
 contrib/less/regexp.h        |  14 +-
 contrib/less/screen.c        | 744 ++++++++++++++++++++++++++---------------
 contrib/less/scrsize.c       |   2 +-
 contrib/less/search.c        | 774 +++++++++++++++++++++++++++++++++++++------
 contrib/less/signal.c        |  22 +-
 contrib/less/tags.c          |  79 ++---
 contrib/less/ttyin.c         |  47 ++-
 contrib/less/ubin.uni        |   2 +-
 contrib/less/version.c       |  47 ++-
 contrib/less/wide.uni        |   6 +-
 contrib/less/xbuf.c          |  64 +++-
 contrib/less/xbuf.h          |  13 +-
 usr.bin/less/Makefile        |   2 +-
 usr.bin/less/defines.h       |  62 ++--
 65 files changed, 5718 insertions(+), 2904 deletions(-)

diff --git a/contrib/less/LICENSE b/contrib/less/LICENSE
index d22cc6070c51..d4f3bf6d1ce6 100644
--- a/contrib/less/LICENSE
+++ b/contrib/less/LICENSE
@@ -2,7 +2,7 @@
                           ------------
 
 Less
-Copyright (C) 1984-2023  Mark Nudelman
+Copyright (C) 1984-2024  Mark Nudelman
 
 Redistribution and use in source and binary forms, with or without
 modification, are permitted provided that the following conditions
diff --git a/contrib/less/NEWS b/contrib/less/NEWS
index b1f8771c6c31..e869439ea4ee 100644
--- a/contrib/less/NEWS
+++ b/contrib/less/NEWS
@@ -9,6 +9,140 @@
   Report bugs, suggestions or comments at 
   https://github.com/gwsw/less/issues.
 
+======================================================================
+
+       Major changes between "less" versions 661 and 668
+
+* Make 256/true colors work better on Windows without -Da
+  (github #539, github #546, github #562).
+
+* Fix build using --with-secure (github #544).
+
+* Fix crash when using --header on command line (github #545).
+
+* Fix possible crash when scrolling left/right or toggling -S (github #547).
+
+* Fix bug when using #stop in a lesskey file (github #551).
+
+* Fix bug when using --shift or --match-shift on command line with
+  a parameter starting with '.' (github #554).
+
+* Fix bug in R command when file size changes (github #553).
+
+* Fix bug using --header when file does not fill screen (github #556).
+
+* Fix ^X bug when output is not a terminal (github #558).
+
+* Fix bug where ^Z is not handled immediately (github #563).
+
+* Fix bug where first byte from a LESSOPEN filter is deleted if it is
+  greater than 0x7F (github #568).
+
+* Fix uninitialized variable in edit_ifile (github #573).
+
+* Fix incorrect handling of UTF-8 chars in prompts (github #576).
+
+======================================================================
+
+       Major changes between "less" versions 643 and 661
+
+* Add ^O^N, ^O^P, ^O^L and ^O^O commands and mouse clicks (with --mouse)
+  to find and open OSC8 hyperlinks (github #251).
+
+* Add --match-shift option.
+
+* Add --lesskey-content option (github #447).
+
+* Add LESSKEY_CONTENT environment variable (github #447).
+
+* Add --no-search-header-lines and --no-search-header-columns options
+  (github #397).
+
+* Add ctrl-L search modifier (github #367).
+
+* A ctrl-P at the start of a shell command suppresses the "done" 
+  message (github #462).
+
+* Add attribute characters ('*', '~', '_', '&') to --color
+  parameter (github #471).
+
+* Allow expansion of environment variables in lesskey files.
+
+* Add LESSSECURE_ALLOW environment variable (github #449).
+
+* Add LESS_UNSUPPORT environment variable.
+
+* Add line number parameter to --header option (github #436).
+
+* Mouse right-click jumps to position marked by left-click (github #390).
+
+* Ensure that the target line is not obscured by a header line 
+  set by --header (github #444).
+
+* Change default character set to "utf-8", except remains "dos" on MS-DOS.
+
+* Add message when search with ^W wraps (github #459).
+
+* UCRT builds on Windows 10 and later now support Unicode file names
+  (github #438).
+
+* Improve behavior of interrupt while reading non-terminated pipe
+  (github #414).
+
+* Improve parsing of -j, -x and -# options (github #393).
+
+* Support files larger than 4GB on Windows (github #417).
+
+* Support entry of Unicode chars larger than U+FFFF on Windows (github #391).
+
+* Improve colors of bold, underline and standout text on Windows.
+
+* Allow --rscroll to accept non-ASCII characters (github #483).
+
+* Allow the parameter to certain options to be terminated with a
+  space (--color, --quotes, --rscroll, --search-options 
+  and --intr) (github #495).
+
+* Fix bug where # substitution failed after viewing help (github #420).
+
+* Fix crash if files are deleted while less is viewing them (github #404).
+
+* Workaround unreliable ReadConsoleInputW behavior on Windows
+  with non-ASCII input.
+
+* Fix -J display when searching for non-ASCII characters (github #422).
+
+* Don't filter header lines via the & command (github #423).
+
+* Fix bug when horizontally shifting long lines (github #425).
+
+* Add -x and -D options to lesstest, to make it easier to diagnose
+  a failed lesstest run.
+
+* Fix bug searching long lines with --incsearch and -S (github #428).
+
+* Fix bug that made ESC-} fail if top line on screen was empty (github #429).
+
+* Fix bug with --mouse on Windows when used with pipes (github #440).
+
+* Fix bug in --+OPTION command line syntax.
+
+* Fix display bug when using -w with an empty line with a CR/LF
+  line ending (github #474).
+
+* When substituting '#' or '%' with a filename, quote the filename
+  if it contains a space (github #480).
+
+* Fix wrong sleep time when system has usleep but not nanosleep (github #489).
+
+* Fix bug when file name contains a newline.
+
+* Fix bug when file name contains nonprintable characters (github #503).
+
+* Fix DJGPP build (github #497).
+
+* Update Unicode tables.
+
 ======================================================================
 
        Major changes between "less" versions 633 and 643
diff --git a/contrib/less/brac.c b/contrib/less/brac.c
index da4efab8605e..170cd73da516 100644
--- a/contrib/less/brac.c
+++ b/contrib/less/brac.c
@@ -1,5 +1,5 @@
 /*
- * Copyright (C) 1984-2023  Mark Nudelman
+ * Copyright (C) 1984-2024  Mark Nudelman
  *
  * You may distribute under the terms of either the GNU General Public
  * License or the Less License, as specified in the README file.
diff --git a/contrib/less/ch.c b/contrib/less/ch.c
index 51045438a296..336af7792120 100644
--- a/contrib/less/ch.c
+++ b/contrib/less/ch.c
@@ -1,5 +1,5 @@
 /*
- * Copyright (C) 1984-2023  Mark Nudelman
+ * Copyright (C) 1984-2024  Mark Nudelman
  *
  * You may distribute under the terms of either the GNU General Public
  * License or the Less License, as specified in the README file.
@@ -20,13 +20,6 @@
 #include <windows.h>
 #endif
 
-#if HAVE_PROCFS
-#include <sys/statfs.h>
-#if HAVE_LINUX_MAGIC_H
-#include <linux/magic.h>
-#endif
-#endif
-
 typedef POSITION BLOCKNUM;
 
 public int ignore_eoi;
@@ -46,7 +39,7 @@ struct bufnode {
 struct buf {
        struct bufnode node;
        BLOCKNUM block;
-       unsigned int datasize;
+       size_t datasize;
        unsigned char data[LBUFSIZE];
 };
 #define bufnode_buf(bn)  ((struct buf *) bn)
@@ -64,7 +57,7 @@ struct filestate {
        POSITION fpos;
        int nbufs;
        BLOCKNUM block;
-       unsigned int offset;
+       size_t offset;
        POSITION fsize;
 };
 
@@ -122,15 +115,14 @@ struct filestate {
        thisfile->hashtbl[h].hnext = (bn);
 
 static struct filestate *thisfile;
-static int ch_ungotchar = -1;
+static unsigned char ch_ungotchar;
+static lbool ch_have_ungotchar = FALSE;
 static int maxbufs = -1;
 
 extern int autobuf;
 extern int sigs;
-extern int secure;
-extern int screen_trashed;
 extern int follow_mode;
-extern int waiting_for_data;
+extern lbool waiting_for_data;
 extern constant char helpdata[];
 extern constant int size_helpdata;
 extern IFILE curr_ifile;
@@ -141,6 +133,13 @@ extern char *namelogfile;
 
 static int ch_addbuf();
 
+/*
+ * Return the file position corresponding to an offset within a block.
+ */
+static POSITION ch_position(BLOCKNUM block, size_t offset)
+{
+       return (block * LBUFSIZE) + (POSITION) offset;
+}
 
 /*
  * Get the character pointed to by the read pointer.
@@ -149,8 +148,8 @@ static int ch_get(void)
 {
        struct buf *bp;
        struct bufnode *bn;
-       int n;
-       int read_again;
+       ssize_t n;
+       lbool read_again;
        int h;
        POSITION pos;
        POSITION len;
@@ -187,6 +186,8 @@ static int ch_get(void)
                        goto found;
                }
        }
+       if (ABORT_SIGS())
+               return (EOI);
        if (bn == END_OF_HCHAIN(h))
        {
                /*
@@ -223,7 +224,7 @@ static int ch_get(void)
 
        for (;;)
        {
-               pos = (ch_block * LBUFSIZE) + bp->datasize;
+               pos = ch_position(ch_block, bp->datasize);
                if ((len = ch_length()) != NULL_POSITION && pos >= len)
                        /*
                         * At end of file.
@@ -239,7 +240,7 @@ static int ch_get(void)
                         */
                        if (!(ch_flags & CH_CANSEEK))
                                return ('?');
-                       if (lseek(ch_file, (off_t)pos, SEEK_SET) == BAD_LSEEK)
+                       if (less_lseek(ch_file, (less_off_t)pos, SEEK_SET) == 
BAD_LSEEK)
                        {
                                error("seek error", NULL_PARG);
                                clear_eol();
@@ -253,19 +254,18 @@ static int ch_get(void)
                 * If we read less than a full block, that's ok.
                 * We use partial block and pick up the rest next time.
                 */
-               if (ch_ungotchar != -1)
+               if (ch_have_ungotchar)
                {
                        bp->data[bp->datasize] = ch_ungotchar;
                        n = 1;
-                       ch_ungotchar = -1;
+                       ch_have_ungotchar = FALSE;
                } else if (ch_flags & CH_HELPFILE)
                {
-                       bp->data[bp->datasize] = helpdata[ch_fpos];
+                       bp->data[bp->datasize] = (unsigned char) 
helpdata[ch_fpos];
                        n = 1;
                } else
                {
-                       n = iread(ch_file, &bp->data[bp->datasize], 
-                               (unsigned int)(LBUFSIZE - bp->datasize));
+                       n = iread(ch_file, &bp->data[bp->datasize], LBUFSIZE - 
bp->datasize);
                }
 
                read_again = FALSE;
@@ -295,12 +295,15 @@ static int ch_get(void)
                /*
                 * If we have a log file, write the new data to it.
                 */
-               if (!secure && logfile >= 0 && n > 0)
-                       write(logfile, (char *) &bp->data[bp->datasize], n);
+               if (secure_allow(SF_LOGFILE))
+               {
+                       if (logfile >= 0 && n > 0)
+                               write(logfile, &bp->data[bp->datasize], 
(size_t) n);
+               }
 #endif
 
                ch_fpos += n;
-               bp->datasize += n;
+               bp->datasize += (size_t) n;
 
                if (n == 0)
                {
@@ -323,7 +326,7 @@ static int ch_get(void)
                        if (ignore_eoi && follow_mode == FOLLOW_NAME && 
curr_ifile_changed())
                        {
                                /* screen_trashed=2 causes make_display to 
reopen the file. */
-                               screen_trashed = 2;
+                               screen_trashed_num(2);
                                return (EOI);
                        }
                        if (sigs)
@@ -363,9 +366,15 @@ static int ch_get(void)
  */
 public void ch_ungetchar(int c)
 {
-       if (c != -1 && ch_ungotchar != -1)
-               error("ch_ungetchar overrun", NULL_PARG);
-       ch_ungotchar = c;
+       if (c < 0)
+               ch_have_ungotchar = FALSE;
+       else
+       {
+               if (ch_have_ungotchar)
+                       error("ch_ungetchar overrun", NULL_PARG);
+               ch_ungotchar = (unsigned char) c;
+               ch_have_ungotchar = TRUE;
+       }
 }
 
 #if LOGFILE
@@ -375,7 +384,7 @@ public void ch_ungetchar(int c)
  */
 public void end_logfile(void)
 {
-       static int tried = FALSE;
+       static lbool tried = FALSE;
 
        if (logfile < 0)
                return;
@@ -402,7 +411,7 @@ public void sync_logfile(void)
 {
        struct buf *bp;
        struct bufnode *bn;
-       int warned = FALSE;
+       lbool warned = FALSE;
        BLOCKNUM block;
        BLOCKNUM nblocks;
 
@@ -411,13 +420,13 @@ public void sync_logfile(void)
        nblocks = (ch_fpos + LBUFSIZE - 1) / LBUFSIZE;
        for (block = 0;  block < nblocks;  block++)
        {
-               int wrote = FALSE;
+               lbool wrote = FALSE;
                FOR_BUFS(bn)
                {
                        bp = bufnode_buf(bn);
                        if (bp->block == block)
                        {
-                               write(logfile, (char *) bp->data, bp->datasize);
+                               write(logfile, bp->data, bp->datasize);
                                wrote = TRUE;
                                break;
                        }
@@ -436,7 +445,7 @@ public void sync_logfile(void)
 /*
  * Determine if a specific block is currently in one of the buffers.
  */
-static int buffered(BLOCKNUM block)
+static lbool buffered(BLOCKNUM block)
 {
        struct buf *bp;
        struct bufnode *bn;
@@ -486,7 +495,7 @@ public int ch_seek(POSITION pos)
         * Set read pointer.
         */
        ch_block = new_block;
-       ch_offset = pos % LBUFSIZE;
+       ch_offset = (size_t) (pos % LBUFSIZE);
        return (0);
 }
 
@@ -533,7 +542,7 @@ public int ch_end_buffer_seek(void)
        FOR_BUFS(bn)
        {
                bp = bufnode_buf(bn);
-               buf_pos = (bp->block * LBUFSIZE) + bp->datasize;
+               buf_pos = ch_position(bp->block, bp->datasize);
                if (buf_pos > end_pos)
                        end_pos = buf_pos;
        }
@@ -597,7 +606,7 @@ public POSITION ch_tell(void)
 {
        if (thisfile == NULL)
                return (NULL_POSITION);
-       return (ch_block * LBUFSIZE) + ch_offset;
+       return ch_position(ch_block, ch_offset);
 }
 
 /*
@@ -647,14 +656,14 @@ public int ch_back_get(void)
  * Set max amount of buffer space.
  * bufspace is in units of 1024 bytes.  -1 mean no limit.
  */
-public void ch_setbufspace(int bufspace)
+public void ch_setbufspace(ssize_t bufspace)
 {
        if (bufspace < 0)
                maxbufs = -1;
        else
        {
-               int lbufk = LBUFSIZE / 1024;
-               maxbufs = bufspace / lbufk + (bufspace % lbufk != 0);
+               size_t lbufk = LBUFSIZE / 1024;
+               maxbufs = (int) (bufspace / lbufk + (bufspace % lbufk != 0));
                if (maxbufs < 1)
                        maxbufs = 1;
        }
@@ -688,11 +697,6 @@ public void ch_flush(void)
                bufnode_buf(bn)->block = -1;
        }
 
-       /*
-        * Figure out the size of the file, if we can.
-        */
-       ch_fsize = filesize(ch_file);
-
        /*
         * Seek to a known position: the beginning of the file.
         */
@@ -700,19 +704,16 @@ public void ch_flush(void)
        ch_block = 0; /* ch_fpos / LBUFSIZE; */
        ch_offset = 0; /* ch_fpos % LBUFSIZE; */
 
-       /*
-        * This is a kludge to workaround a Linux kernel bug: files in
-        * /proc have a size of 0 according to fstat() but have readable 
-        * data.  They are sometimes, but not always, seekable.
-        * Force them to be non-seekable here.
-        */
-       if (ch_fsize == 0)
+       if (ch_flags & CH_NOTRUSTSIZE)
        {
                ch_fsize = NULL_POSITION;
                ch_flags &= ~CH_CANSEEK;
+       } else
+       {
+               ch_fsize = (ch_flags & CH_HELPFILE) ? size_helpdata : 
filesize(ch_file);
        }
 
-       if (lseek(ch_file, (off_t)0, SEEK_SET) == BAD_LSEEK)
+       if (less_lseek(ch_file, (less_off_t)0, SEEK_SET) == BAD_LSEEK)
        {
                /*
                 * Warning only; even if the seek fails for some reason,
@@ -795,7 +796,7 @@ public int seekable(int f)
                return (0);
        }
 #endif
-       return (lseek(f, (off_t)1, SEEK_SET) != BAD_LSEEK);
+       return (less_lseek(f, (less_off_t)1, SEEK_SET) != BAD_LSEEK);
 }
 
 /*
@@ -812,7 +813,7 @@ public void ch_set_eof(void)
 /*
  * Initialize file state for a new file.
  */
-public void ch_init(int f, int flags)
+public void ch_init(int f, int flags, ssize_t nread)
 {
        /*
         * See if we already have a filestate for this file.
@@ -843,6 +844,22 @@ public void ch_init(int f, int flags)
        }
        if (thisfile->file == -1)
                thisfile->file = f;
+
+       /*
+        * Figure out the size of the file, if we can.
+        */
+       ch_fsize = (flags & CH_HELPFILE) ? size_helpdata : filesize(ch_file);
+
+       /*
+        * This is a kludge to workaround a Linux kernel bug: files in some
+        * pseudo filesystems like /proc and tracefs have a size of 0 according
+        * to fstat() but have readable data.
+        */
+       if (ch_fsize == 0 && nread > 0)
+       {
+               ch_flags |= CH_NOTRUSTSIZE;
+       }
+
        ch_flush();
 }
 
@@ -851,7 +868,7 @@ public void ch_init(int f, int flags)
  */
 public void ch_close(void)
 {
-       int keepstate = FALSE;
+       lbool keepstate = FALSE;
 
        if (thisfile == NULL)
                return;
diff --git a/contrib/less/charset.c b/contrib/less/charset.c
index 881ebafd02cf..3e8fb0fa9354 100644
--- a/contrib/less/charset.c
+++ b/contrib/less/charset.c
@@ -1,5 +1,5 @@
 /*
- * Copyright (C) 1984-2023  Mark Nudelman
+ * Copyright (C) 1984-2024  Mark Nudelman
  *
  * You may distribute under the terms of either the GNU General Public
  * License or the Less License, as specified in the README file.
@@ -120,8 +120,8 @@ struct cs_alias {
 #define IS_CONTROL_CHAR 02
 
 static char chardef[256];
-static char *binfmt = NULL;
-static char *utfbinfmt = NULL;
+static constant char *binfmt = NULL;
+static constant char *utfbinfmt = NULL;
 public int binattr = AT_STANDOUT|AT_COLOR_BIN;
 
 static struct xbuffer user_wide_array;
@@ -139,13 +139,13 @@ static struct wchar_range_table user_prt_table;
 static void wchar_range_table_set(struct wchar_range_table *tbl, struct 
xbuffer *arr)
 {
        tbl->table = (struct wchar_range *) arr->data;
-       tbl->count = arr->end / sizeof(struct wchar_range);
+       tbl->count = (unsigned int) (arr->end / sizeof(struct wchar_range));
 }
 
 /*
  * Skip over a "U" or "U+" prefix before a hex codepoint.
  */
-static char * skip_uprefix(char *s)
+static constant char * skip_uprefix(constant char *s)
 {
        if (*s == 'U' || *s == 'u')
                if (*++s == '+') ++s;
@@ -155,14 +155,14 @@ static char * skip_uprefix(char *s)
 /*
  * Parse a dash-separated range of hex values.
  */
-static void wchar_range_get(char **ss, struct wchar_range *range)
+static void wchar_range_get(constant char **ss, struct wchar_range *range)
 {
-       char *s = skip_uprefix(*ss);
-       range->first = lstrtoul(s, &s, 16);
+       constant char *s = skip_uprefix(*ss);
+       range->first = lstrtoulc(s, &s, 16);
        if (s[0] == '-')
        {
                s = skip_uprefix(&s[1]);
-               range->last = lstrtoul(s, &s, 16);
+               range->last = lstrtoulc(s, &s, 16);
        } else 
        {
                range->last = range->first;
@@ -173,7 +173,7 @@ static void wchar_range_get(char **ss, struct wchar_range 
*range)
 /*
  * Parse the LESSUTFCHARDEF variable.
  */
-static void ichardef_utf(char *s)
+static void ichardef_utf(constant char *s)
 {
        xbuf_init(&user_wide_array);
        xbuf_init(&user_ubin_array);
@@ -241,7 +241,7 @@ static void ichardef_utf(char *s)
  *      b binary character
  *      c control character
  */
-static void ichardef(char *s)
+static void ichardef(constant char *s)
 {
        char *cp;
        int n;
@@ -298,7 +298,7 @@ static void ichardef(char *s)
  * Define a charset, given a charset name.
  * The valid charset names are listed in the "charsets" array.
  */
-static int icharset(char *name, int no_error)
+static int icharset(constant char *name, int no_error)
 {
        struct charset *p;
        struct cs_alias *a;
@@ -363,23 +363,8 @@ static void ilocale(void)
 /*
  * Define the printing format for control (or binary utf) chars.
  */
-public void setfmt(char *s, char **fmtvarptr, int *attrptr, char *default_fmt, 
int for_printf)
+public void setfmt(constant char *s, constant char **fmtvarptr, int *attrptr, 
constant char *default_fmt, lbool for_printf)
 {
-       if (s && utf_mode)
-       {
-               /* It would be too hard to account for width otherwise.  */
-               char constant *t = s;
-               while (*t)
-               {
-                       if (*t < ' ' || *t > '~')
-                       {
-                               s = default_fmt;
-                               goto attr;
-                       }
-                       t++;
-               }
-       }
-
        if (s == NULL || *s == '\0')
                s = default_fmt;
        else if (for_printf &&
@@ -391,7 +376,6 @@ public void setfmt(char *s, char **fmtvarptr, int *attrptr, 
char *default_fmt, i
        /*
         * Select the attributes if it starts with "*".
         */
- attr:
        if (*s == '*' && s[1] != '\0')
        {
                switch (s[1])
@@ -412,16 +396,7 @@ public void setfmt(char *s, char **fmtvarptr, int 
*attrptr, char *default_fmt, i
  */
 static void set_charset(void)
 {
-       char *s;
-
-#if MSDOS_COMPILER==WIN32C
-       /*
-        * If the Windows console is using UTF-8, we'll use it too.
-        */
-       if (GetConsoleOutputCP() == CP_UTF8)
-               if (icharset("utf-8", 1))
-                       return;
-#endif
+       constant char *s;
 
        ichardef_utf(lgetenv("LESSUTFCHARDEF"));
 
@@ -476,15 +451,13 @@ static void set_charset(void)
        ilocale();
 #else
 #if MSDOS_COMPILER
-       /*
-        * Default to "dos".
-        */
+#if MSDOS_COMPILER==WIN32C
+       (void) icharset("utf-8", 1);
+#else
        (void) icharset("dos", 1);
+#endif
 #else
-       /*
-        * Default to "latin1".
-        */
-       (void) icharset("latin1", 1);
+       (void) icharset("utf-8", 1);
 #endif
 #endif
 }
@@ -494,7 +467,7 @@ static void set_charset(void)
  */
 public void init_charset(void)
 {
-       char *s;
+       constant char *s;
 
 #if HAVE_LOCALE
        setlocale(LC_ALL, "");
@@ -512,20 +485,22 @@ public void init_charset(void)
 /*
  * Is a given character a "binary" character?
  */
-public int binary_char(LWCHAR c)
+public lbool binary_char(LWCHAR c)
 {
        if (utf_mode)
                return (is_ubin_char(c));
-       c &= 0377;
-       return (chardef[c] & IS_BINARY_CHAR);
+       if (c >= sizeof(chardef))
+               return TRUE;
+       return ((chardef[c] & IS_BINARY_CHAR) != 0);
 }
 
 /*
  * Is a given character a "control" character?
  */
-public int control_char(LWCHAR c)
+public lbool control_char(LWCHAR c)
 {
-       c &= 0377;
+       if (c >= sizeof(chardef))
+               return TRUE;
        return (chardef[c] & IS_CONTROL_CHAR);
 }
 
@@ -533,12 +508,12 @@ public int control_char(LWCHAR c)
  * Return the printable form of a character.
  * For example, in the "ascii" charset '\3' is printed as "^C".
  */
-public char * prchar(LWCHAR c)
+public constant char * prchar(LWCHAR c)
 {
-       /* {{ This buffer can be overrun if LESSBINFMT is a long string. }} */
+       /* {{ Fixed buffer size means LESSBINFMT etc can be truncated. }} */
        static char buf[MAX_PRCHAR_LEN+1];
 
-       c &= 0377;
+       c &= 0377; /*{{type-issue}}*/
        if ((c < 128 || !utf_mode) && !control_char(c))
                SNPRINTF1(buf, sizeof(buf), "%c", (int) c);
        else if (c == ESC)
@@ -567,7 +542,7 @@ public char * prchar(LWCHAR c)
 /*
  * Return the printable form of a UTF-8 character.
  */
-public char * prutfchar(LWCHAR ch)
+public constant char * prutfchar(LWCHAR ch)
 {
        static char buf[MAX_PRCHAR_LEN+1];
 
@@ -596,7 +571,7 @@ public char * prutfchar(LWCHAR ch)
 /*
  * Get the length of a UTF-8 character in bytes.
  */
-public int utf_len(int ch)
+public int utf_len(char ch)
 {
        if ((ch & 0x80) == 0)
                return 1;
@@ -606,10 +581,12 @@ public int utf_len(int ch)
                return 3;
        if ((ch & 0xF8) == 0xF0)
                return 4;
+#if 0
        if ((ch & 0xFC) == 0xF8)
                return 5;
        if ((ch & 0xFE) == 0xFC)
                return 6;
+#endif
        /* Invalid UTF-8 encoding. */
        return 1;
 }
@@ -617,42 +594,41 @@ public int utf_len(int ch)
 /*
  * Does the parameter point to the lead byte of a well-formed UTF-8 character?
  */
-public int is_utf8_well_formed(char *ss, int slen)
+public lbool is_utf8_well_formed(constant char *ss, int slen)
 {
        int i;
        int len;
-       unsigned char *s = (unsigned char *) ss;
+       unsigned char s0 = (unsigned char) ss[0];
 
-       if (IS_UTF8_INVALID(s[0]))
-               return (0);
+       if (IS_UTF8_INVALID(s0))
+               return (FALSE);
 
-       len = utf_len(s[0]);
+       len = utf_len(ss[0]);
        if (len > slen)
-               return (0);
+               return (FALSE);
        if (len == 1)
-               return (1);
+               return (TRUE);
        if (len == 2)
        {
-               if (s[0] < 0xC2)
-                   return (0);
+               if (s0 < 0xC2)
+                       return (FALSE);
        } else
        {
-               unsigned char mask;
-               mask = (~((1 << (8-len)) - 1)) & 0xFF;
-               if (s[0] == mask && (s[1] & mask) == 0x80)
-                       return (0);
+               unsigned char mask = (unsigned char) (~((1 << (8-len)) - 1));
+               if (s0 == mask && (ss[1] & mask) == 0x80)
+                       return (FALSE);
        }
 
        for (i = 1;  i < len;  i++)
-               if (!IS_UTF8_TRAIL(s[i]))
-                       return (0);
-       return (1);
+               if (!IS_UTF8_TRAIL(ss[i]))
+                       return (FALSE);
+       return (TRUE);
 }
 
 /*
  * Skip bytes until a UTF-8 lead byte (11xxxxxx) or ASCII byte (0xxxxxxx) is 
found.
  */
-public void utf_skip_to_lead(char **pp, char *limit)
+public void utf_skip_to_lead(constant char **pp, constant char *limit)
 {
        do {
                ++(*pp);
@@ -663,9 +639,10 @@ public void utf_skip_to_lead(char **pp, char *limit)
 /*
  * Get the value of a UTF-8 character.
  */
-public LWCHAR get_wchar(constant char *p)
+public LWCHAR get_wchar(constant char *sp)
 {
-       switch (utf_len(p[0]))
+       constant unsigned char *p = (constant unsigned char *) sp;
+       switch (utf_len(sp[0]))
        {
        case 1:
        default:
@@ -690,6 +667,7 @@ public LWCHAR get_wchar(constant char *p)
                        ((p[1] & 0x3F) << 12) | 
                        ((p[2] & 0x3F) << 6) | 
                        (p[3] & 0x3F));
+#if 0
        case 5:
                /* 111110xx 10xxxxxx 10xxxxxx 10xxxxxx 10xxxxxx */
                return (LWCHAR) (
@@ -707,13 +685,14 @@ public LWCHAR get_wchar(constant char *p)
                        ((p[3] & 0x3F) << 12) | 
                        ((p[4] & 0x3F) << 6) | 
                        (p[5] & 0x3F));
+#endif
        }
 }
 
 /*
  * Store a character into a UTF-8 string.
  */
-public void put_wchar(char **pp, LWCHAR ch)
+public void put_wchar(mutable char **pp, LWCHAR ch)
 {
        if (!utf_mode || ch < 0x80) 
        {
@@ -737,6 +716,7 @@ public void put_wchar(char **pp, LWCHAR ch)
                *(*pp)++ = (char) (0x80 | ((ch >> 12) & 0x3F));
                *(*pp)++ = (char) (0x80 | ((ch >> 6) & 0x3F));
                *(*pp)++ = (char) (0x80 | (ch & 0x3F));
+#if 0
        } else if (ch < 0x4000000)
        {
                /* 111110xx 10xxxxxx 10xxxxxx 10xxxxxx 10xxxxxx */
@@ -754,17 +734,18 @@ public void put_wchar(char **pp, LWCHAR ch)
                *(*pp)++ = (char) (0x80 | ((ch >> 12) & 0x3F));
                *(*pp)++ = (char) (0x80 | ((ch >> 6) & 0x3F));
                *(*pp)++ = (char) (0x80 | (ch & 0x3F));
+#endif
        }
 }
 
 /*
  * Step forward or backward one character in a string.
  */
-public LWCHAR step_char(char **pp, signed int dir, constant char *limit)
+public LWCHAR step_charc(constant char **pp, signed int dir, constant char 
*limit)
 {
        LWCHAR ch;
        int len;
-       char *p = *pp;
+       constant char *p = *pp;
 
        if (!utf_mode)
        {
@@ -798,6 +779,14 @@ public LWCHAR step_char(char **pp, signed int dir, 
constant char *limit)
        return ch;
 }
 
+public LWCHAR step_char(char **pp, signed int dir, constant char *limit)
+{
+       constant char *p = (constant char *) *pp;
+       LWCHAR ch = step_charc(&p, dir, limit);
+       *pp = (char *) p;
+       return ch;
+}
+
*** 16754 LINES SKIPPED ***

git: 0bb4c188d363 - stable/14 - MFC: MFV: less v668.

Reply via email to