On Sat, Apr 05, 2025 at 09:36:07AM +0200, Walter Alejandro Iglesias wrote:
> Hi again Lucas,
> 
> This time I paid a little more attention :-).  Maybe I'm missing
> something, but it seems to me that, in your patch, the skip_utf8_cont
> variable is unnecessary.
> 
> Anyway, at first I'd also tried doing something similar to what you
> suggested, I still think it doesn't require so much fuss.  Let's see if
> I don't make any stupid mistakes with this new version of mine:

anton pointed the same out in a private email and also requested
regress modifications. The current patch is


diff refs/heads/master 563a90a52e59962e09d4d2c0897c06024dab84be
commit - 58fd8d0bdc1e6222119987e7aaad111eae245668
commit + 563a90a52e59962e09d4d2c0897c06024dab84be
blob - cdda9cb24b1a4a395547e081ff3adca380d3b6c1
blob + 0b6459cb5f31c2a8e00de8fb837b4e7039d59214
--- bin/ksh/vi.c
+++ bin/ksh/vi.c
@@ -1590,15 +1590,18 @@ backword(int argcnt)
 static int
 endword(int argcnt)
 {
-       int ncursor, skip_space, want_letnum;
+       int ncursor, skip_space, skip_utf8_cont, want_letnum;
        unsigned char uc;
 
        ncursor = es->cursor;
        while (ncursor < es->linelen && argcnt--) {
-               skip_space = 1;
+               skip_space = skip_utf8_cont = 1;
                want_letnum = -1;
                while (++ncursor < es->linelen) {
                        uc = es->cbuf[ncursor];
+                       if (skip_utf8_cont && isu8cont(uc))
+                               continue;
+                       skip_utf8_cont = 0;
                        if (isspace(uc)) {
                                if (skip_space)
                                        continue;
@@ -1663,6 +1666,9 @@ Endword(int argcnt)
        ncursor = es->cursor;
        while (ncursor < es->linelen && argcnt--) {
                while (++ncursor < es->linelen &&
+                   isu8cont((unsigned char)es->cbuf[ncursor]))
+                       ;
+               while (++ncursor < es->linelen &&
                    isspace((unsigned char)es->cbuf[ncursor]))
                        ;
                while (ncursor < es->linelen &&
blob - 2c33d0005da16ffd525336ada48374de632235a9
blob + 348511d26252c3d5f1afe9b9cfecdf8da2bcc272
--- regress/bin/ksh/edit/vi.sh
+++ regress/bin/ksh/edit/vi.sh
@@ -87,6 +87,15 @@ testseq "1.00 two\00330ED" " # 1.00 two\b\r # 1.0     
 # e: Move to end of word.
 testseq "onex two\00330eD" " # onex two\b\r # one     \b\b\b\b\b\b"
 
+# No infinite loop moving to end of {,big} word for non-ASCII UTF-8-ending
+# words.
+# EURO SIGN U+20AC is encoded as bytes 0xe2 0x82 0xac = \0342\0202\0254
+euro='\0342\0202\0254'
+testseq "1.00$euro 2.00 three\00330EED" \
+    " # 1.00$euro 2.00 three\b\r # 1.00$euro 2.0       \b\b\b\b\b\b\b\b"
+testseq "one$euro twox three\00330eeD" \
+    " # one$euro twox three\b\r # one$euro two       \b\b\b\b\b\b\b\b"
+
 # F: Find character backward.
 # ;: Repeat last search.
 # ,: Repeat last search in opposite direction.

Reply via email to