Looks like an off-by-one introduced in the utf8 conversion.
The following fixes the bug for me.  I will file a PR upstream.

 - todd

Index: usr.bin/awk/run.c
===================================================================
RCS file: /cvs/src/usr.bin/awk/run.c,v
retrieving revision 1.79
diff -u -p -u -r1.79 run.c
--- usr.bin/awk/run.c   6 Oct 2023 22:29:24 -0000       1.79
+++ usr.bin/awk/run.c   28 Oct 2023 16:32:18 -0000
@@ -1018,10 +1018,10 @@ Cell *substr(Node **a, int nnn)         /* subs
        mb = u8_char2byte(s, m-1); /* byte offset of start char in s */
        nb = u8_char2byte(s, m-1+n);  /* byte offset of end+1 char in s */
 
-       temp = s[nb];   /* with thanks to John Linderman */
-       s[nb] = '\0';
+       temp = s[nb-1]; /* with thanks to John Linderman */
+       s[nb-1] = '\0';
        setsval(y, s + mb);
-       s[nb] = temp;
+       s[nb-1] = temp;
        tempfree(x);
        return(y);
 }

Reply via email to