On Sat, Apr 29, 2023, 14:02 Chet Ramey <chet.ra...@case.edu> wrote: > On 4/28/23 9:28 PM, Grisha Levit wrote: > > Piping input that simply ends in an leading byte doesn't trigger the > issue > > -- that byte byte don't seem to make it into the input line. > > > > This is a bit off topic, but I don't really understand what happens with > > invalid input sequences in the input, see e.g.: > > They should be treated as individual bytes. >
I think I see what's happening now. Readline accumulates the bytes until a complete character is read. However, this buffer is not flushed when the reading of a multibyte character is interrupted by inserting a single byte character, or by any non-insertion command. So for example, the \317 byte never gets a chance to be inserted here: bash --norc -in <<<$':\317:' $ :: And inserting the byte is deferred until the next byte with the 8th bit set is read (which can be at some arbitrary future time) here: bash --norc -in <<<$':\317\n: \200' $ : $ : π You can also reproduce interactively by binding the above input to a macro. Attached is a patch that I think should address this.
From d7d1043e2bc2ad35ac2ef6a2cc45467085cf5835 Mon Sep 17 00:00:00 2001 From: Grisha Levit <grishale...@gmail.com> Date: Fri, 28 Apr 2023 23:24:34 -0400 Subject: [PATCH] fix invalid mb insert lib/readline/text.c - _rl_insert_char: return 1 when there are any pending bytes - _rl_insert_char: treat being called with a count of 0 as a signal to insert any pending bytes read so far. - rl_insert, rl_quoted_insert: if the final _rl_insert_char returns 1, call _rl_insert_char with a count of 0 to insert any pending bytes. --- lib/readline/text.c | 68 +++++++++++++++++++++++++++++++++++---------- 1 file changed, 54 insertions(+), 14 deletions(-) diff --git a/lib/readline/text.c b/lib/readline/text.c index 356cac5f..27d9292b 100644 --- a/lib/readline/text.c +++ b/lib/readline/text.c @@ -704,7 +704,8 @@ static mbstate_t ps = {0}; /* Insert the character C at the current location, moving point forward. If C introduces a multibyte sequence, we read the whole sequence and - then insert the multibyte char into the line buffer. */ + then insert the multibyte char into the line buffer. + If COUNT is 0, just insert any accumulated multibyte sequence as is. */ int _rl_insert_char (int count, int c) { @@ -718,11 +719,32 @@ _rl_insert_char (int count, int c) static int stored_count = 0; #endif +#if !defined (HANDLE_MULTIBYTE) if (count <= 0) return 0; +#else + if (count < 0) + return 0; + if (count == 0) + { + /* If COUNT is 0, we were called because we are no longer reading + a multibyte character. If we were waiting to complete a character, + just insert any bytes we have read so far. */ + if (pending_bytes_length == 0) + return 0; -#if defined (HANDLE_MULTIBYTE) - if (MB_CUR_MAX == 1 || rl_byte_oriented) + if (stored_count <= 0) + stored_count = count; + else + count = stored_count; + + memcpy (incoming, pending_bytes, pending_bytes_length); + incoming[pending_bytes_length] = '\0'; + incoming_length = pending_bytes_length; + pending_bytes_length = 0; + memset (&ps, 0, sizeof (mbstate_t)); + } + else if (MB_CUR_MAX == 1 || rl_byte_oriented) { incoming[0] = c; incoming[1] = '\0'; @@ -730,6 +752,9 @@ _rl_insert_char (int count, int c) } else if (_rl_utf8locale && (c & 0x80) == 0) { + if (pending_bytes_length) + _rl_insert_char (0, 0); + incoming[0] = c; incoming[1] = '\0'; incoming_length = 1; @@ -827,7 +852,11 @@ _rl_insert_char (int count, int c) rl_insert_text (string); xfree (string); +#if defined (HANDLE_MULTIBYTE) + return pending_bytes_length ? 1 : 0; +#else return 0; +#endif } if (count > TEXT_COUNT_MAX) @@ -860,6 +889,8 @@ _rl_insert_char (int count, int c) xfree (string); incoming_length = 0; stored_count = 0; + + return pending_bytes_length ? 1 : 0; #else /* !HANDLE_MULTIBYTE */ char str[TEXT_COUNT_MAX+1]; @@ -873,9 +904,9 @@ _rl_insert_char (int count, int c) rl_insert_text (str); count -= decreaser; } -#endif /* !HANDLE_MULTIBYTE */ return 0; +#endif /* !HANDLE_MULTIBYTE */ } if (MB_CUR_MAX == 1 || rl_byte_oriented) @@ -903,9 +934,12 @@ _rl_insert_char (int count, int c) rl_insert_text (incoming); stored_count = 0; } -#endif + return pending_bytes_length ? 1 : 0; +#else /* !HANDLE_MULTIBYTE */ return 0; +#endif /* !HANDLE_MULTIBYTE */ + } /* Overwrite the character at point (or next COUNT characters) with C. @@ -955,7 +989,7 @@ rl_insert (int count, int c) r = (rl_insert_mode == RL_IM_INSERT) ? _rl_insert_char (count, c) : _rl_overwrite_char (count, c); /* XXX -- attempt to batch-insert pending input that maps to self-insert */ - x = 0; + r = x = 0; n = (unsigned short)-2; while (_rl_optimize_typeahead && rl_num_chars_to_read == 0 && @@ -983,6 +1017,10 @@ rl_insert (int count, int c) break; } + /* We are done with self-insert, make sure any pending bytes get inserted */ + if (r == 1 && rl_insert_mode == RL_IM_INSERT) + r = _rl_insert_char (0, 0); + if (n != (unsigned short)-2) /* -2 = sentinel value for having inserted N */ { /* setting rl_pending_input inhibits setting rl_last_func so we do it @@ -1054,6 +1092,8 @@ _rl_insert_next_callback (_rl_callback_generic_arg *data) int rl_quoted_insert (int count, int key) { + int r; + /* Let's see...should the callback interface futz with signal handling? */ #if defined (HANDLE_SIGNALS) if (RL_ISSTATE (RL_STATE_CALLBACK) == 0) @@ -1071,16 +1111,16 @@ rl_quoted_insert (int count, int key) /* A negative count means to quote the next -COUNT characters. */ if (count < 0) - { - int r; + do + r = _rl_insert_next (1); + while (r == 0 && ++count < 0); + else + r = _rl_insert_next (count); - do - r = _rl_insert_next (1); - while (r == 0 && ++count < 0); - return r; - } + if (r == 1) + _rl_insert_char (0, 0); - return _rl_insert_next (count); + return r; } /* Insert a tab character. */ -- 2.40.1