On 10.09.18 21:49, Heinrich Schuchardt wrote: > Up to now the EFI_TEXT_INPUT_PROTOCOL only supported ASCII characters. > With the patch it can consume UTF-8 from the console. > > Currently only the serial console and the console can deliver UTF-8. > Local consoles are restricted to ASCII. > > Signed-off-by: Heinrich Schuchardt <xypron.g...@gmx.de> > --- > v2: > drop support for German keyboard > move reading of Unicode code to charset.c > --- > include/charset.h | 10 +++ > lib/charset.c | 137 +++++++++++++++++++++++------------ > lib/efi_loader/efi_console.c | 11 ++- > test/unicode_ut.c | 8 +- > 4 files changed, 108 insertions(+), 58 deletions(-) > > diff --git a/include/charset.h b/include/charset.h > index 686db5a1fe..d1dc326c9b 100644 > --- a/include/charset.h > +++ b/include/charset.h > @@ -8,11 +8,21 @@ > #ifndef __CHARSET_H_ > #define __CHARSET_H_ > > +#include <efi.h> > #include <linux/kernel.h> > #include <linux/types.h> > > #define MAX_UTF8_PER_UTF16 3 > > +/** > + * console_read_unicode() - read Unicode code point from console > + * > + * @code: code point > + * Return: status code: > + * EFI_SUCCESS, EFI_NOT_READY, or EFI_INVALID_PARAMETER > + */ > +efi_status_t console_read_unicode(s32 *code); > + > /** > * utf8_get() - get next UTF-8 code point from buffer > * > diff --git a/lib/charset.c b/lib/charset.c > index 72c808ce64..2e582a51f7 100644 > --- a/lib/charset.c > +++ b/lib/charset.c > @@ -5,6 +5,7 @@ > * Copyright (c) 2017 Rob Clark > */ > > +#include <common.h> > #include <charset.h> > #include <capitalization.h> > #include <malloc.h> > @@ -18,67 +19,107 @@ static struct capitalization_table > capitalization_table[] = > CP437_CAPITALIZATION_TABLE; > #endif > > -s32 utf8_get(const char **src) > +/** > + * get_code() - read Unicode code point from UTF-8 stream > + * > + * @next: - stream reader > + * @src: - string buffer passed to stream reader, optional > + * Return: - Unicode code point > + */ > +static int get_code(s32 (*next)(const char **src), const char **src)
The stream reader should probably return a u8. Also, I think it makes sense to declare the argument as opaque so that a real stream reader can implement reads from a non-string later. With that I think it should look like this: static int get_code(u8 (*next)(void *opaque), void *opaque) > { > - s32 code = 0; > - unsigned char c; > + s32 ch = 0; > > - if (!src || !*src) > - return -1; > - if (!**src) > + ch = next(src); > + if (!ch) > return 0; > - c = **src; > - if (c >= 0x80) { > - ++*src; > - if (!**src) > - return -1; > - /* > - * We do not expect a continuation byte (0x80 - 0xbf). > - * 0x80 is coded as 0xc2 0x80, so we cannot have less then 0xc2 > - * here. > - * The highest code point is 0x10ffff which is coded as > - * 0xf4 0x8f 0xbf 0xbf. So we cannot have a byte above 0xf4. > - */ > - if (c < 0xc2 || code > 0xf4) > - return -1; > - if (c >= 0xe0) { > - if (c >= 0xf0) { > + if (ch >= 0xc2 && ch <= 0xf4) { > + int code = 0; > + > + if (ch >= 0xe0) { > + if (ch >= 0xf0) { > /* 0xf0 - 0xf4 */ > - c &= 0x07; > - code = c << 18; > - c = **src; > - ++*src; > - if (!**src) > - return -1; > - if (c < 0x80 || c > 0xbf) > - return -1; > - c &= 0x3f; > + ch &= 0x07; > + code = ch << 18; > + ch = next(src); > + if (ch < 0x80 || ch > 0xbf) > + goto error; > + ch &= 0x3f; > } else { > /* 0xe0 - 0xef */ > - c &= 0x0f; > + ch &= 0x0f; > } > - code += c << 12; > + code += ch << 12; > if ((code >= 0xD800 && code <= 0xDFFF) || > code >= 0x110000) > - return -1; > - c = **src; > - ++*src; > - if (!**src) > - return -1; > - if (c < 0x80 || c > 0xbf) > - return -1; > + goto error; > + ch = next(src); > + if (ch < 0x80 || ch > 0xbf) > + goto error; > } > /* 0xc0 - 0xdf or continuation byte (0x80 - 0xbf) */ > - c &= 0x3f; > - code += c << 6; > - c = **src; > - if (c < 0x80 || c > 0xbf) > - return -1; > - c &= 0x3f; > + ch &= 0x3f; > + code += ch << 6; > + ch = next(src); > + if (ch < 0x80 || ch > 0xbf) > + goto error; > + ch &= 0x3f; > + ch += code; > + } else if (ch >= 0x80) { > + goto error; > } > - code += c; > + return ch; > +error: > + return '?'; > +} > + > +/** > + * read_string() - read byte from character string > + * > + * @src: - pointer to string > + * Return: - byte read > + * > + * The string pointer is incremented if it does not point to '\0'. > + */ > +static s32 read_string(const char **src) Please be consistent in your naming. Either the stream reader is called "next" or "read" :). So either this function would be next_string/next_char or the function pointer above would be "read_char"/"read_u8". > +{ > + s32 c; > + > + if (!src || !*src || !**src) > + return 0; > + c = (unsigned char)**src; If you make c a u8 you can save yourself this cast too. > ++*src; > - return code; > + return c; > +} > + > +/** > + * read_console() - read byte from console > + * > + * @src - not used, needed to match interface > + * Return: - byte read > + */ > +static s32 read_console(const char **src) > +{ > + return getc(); > +} > + > +efi_status_t console_read_unicode(s32 *code) > +{ > + /* Check parameter */ > + if (!code) > + return EFI_INVALID_PARAMETER; We should not have any efi dependencies in charset.c. I think it's perfectly fine to just have this function and read_console() in efi_console.c. > + /* Check if input available */ > + if (!tstc()) > + return EFI_NOT_READY; > + /* Read Unicode code */ > + *code = get_code(read_console, NULL); > + > + return EFI_SUCCESS; > +} > + > +s32 utf8_get(const char **src) > +{ > + return get_code(read_string, src); > } > > int utf8_put(s32 code, char **dst) > diff --git a/lib/efi_loader/efi_console.c b/lib/efi_loader/efi_console.c > index 3ca6fe536c..a96eda1b22 100644 > --- a/lib/efi_loader/efi_console.c > +++ b/lib/efi_loader/efi_console.c > @@ -449,23 +449,22 @@ static efi_status_t EFIAPI efi_cin_read_key_stroke( > struct efi_simple_text_input_protocol *this, > struct efi_input_key *key) > { > + efi_status_t ret; > struct efi_input_key pressed_key = { > .scan_code = 0, > .unicode_char = 0, > }; > - char ch; > + s32 ch; > > EFI_ENTRY("%p, %p", this, key); > > /* We don't do interrupts, so check for timers cooperatively */ > efi_timer_check(); > > - if (!tstc()) { > - /* No key pressed */ > + ret = console_read_unicode(&ch); > + /* We do not support multi-word codes */ > + if (ret != EFI_SUCCESS || ch >= 0x10000) For ch > U16_MAX I'd just declare ch as '?'. /* * We can not handle multi-char UTF-16 yet, so ignore input * that would show up as multiple words. */ if (ch > U16_MAX) ch = '='; Alex > return EFI_EXIT(EFI_NOT_READY); > - } > - > - ch = getc(); > if (ch == cESC) { > /* > * Xterm Control Sequences > diff --git a/test/unicode_ut.c b/test/unicode_ut.c > index b94b4a651f..b115d18afd 100644 > --- a/test/unicode_ut.c > +++ b/test/unicode_ut.c > @@ -178,7 +178,7 @@ static int ut_utf8_utf16_strlen(struct unit_test_state > *uts) > > /* illegal utf-8 sequences */ > ut_asserteq(4, utf8_utf16_strlen(j1)); > - ut_asserteq(5, utf8_utf16_strlen(j2)); > + ut_asserteq(4, utf8_utf16_strlen(j2)); > ut_asserteq(3, utf8_utf16_strlen(j3)); > > return 0; > @@ -196,7 +196,7 @@ static int ut_utf8_utf16_strnlen(struct unit_test_state > *uts) > > /* illegal utf-8 sequences */ > ut_asserteq(4, utf8_utf16_strnlen(j1, 16)); > - ut_asserteq(5, utf8_utf16_strnlen(j2, 16)); > + ut_asserteq(4, utf8_utf16_strnlen(j2, 16)); > ut_asserteq(3, utf8_utf16_strnlen(j3, 16)); > > return 0; > @@ -255,8 +255,8 @@ static int ut_utf8_utf16_strcpy(struct unit_test_state > *uts) > > pos = buf; > utf8_utf16_strcpy(&pos, j2); > - ut_asserteq(5, pos - buf); > - ut_assert(!ut_u16_strcmp(buf, L"j2??l", SIZE_MAX)); > + ut_asserteq(4, pos - buf); > + ut_assert(!ut_u16_strcmp(buf, L"j2?l", SIZE_MAX)); > > pos = buf; > utf8_utf16_strcpy(&pos, j3); > _______________________________________________ U-Boot mailing list U-Boot@lists.denx.de https://lists.denx.de/listinfo/u-boot