On Sun, Jun 18, 2023 at 10:50:16AM +0900, Michael Paquier wrote: > The difference between scanner_isspace() and array_isspace() is that > the former matches with what scan.l stores as rules for whitespace > characters, but the latter works on values. For hstore, we want the > latter, with something that works on values. To keep the change > locale to hstore, I think that we should just introduce an > hstore_isspace() which is a copy of array_isspace. That's a > duplication, sure, but I think that we may want to think harder about > \v in the flex scanner, and that's just a few extra lines for > something that has not changed in 13 years for arrays. That's also > easier to think about for stable branches. If you can send a patch, > that helps a lot, for sure!
At the end, no need to do that. I have been able to hack the attached, that shows the difference of treatment for \v when running in macOS. Evan, what do you think? -- Michael
From 7ede07940011d08f8c0cf05d57b3782f367c0adf Mon Sep 17 00:00:00 2001 From: Michael Paquier <mich...@paquier.xyz> Date: Sun, 18 Jun 2023 17:29:37 +0900 Subject: [PATCH] More fixes for hstore and locales This is not really complete without the treatment of \v like array values. --- contrib/hstore/expected/hstore_utf8.out | 31 +++++++++++++++++++++++++ contrib/hstore/hstore_io.c | 30 ++++++++++++++++++++---- contrib/hstore/sql/hstore_utf8.sql | 7 ++++++ 3 files changed, 63 insertions(+), 5 deletions(-) diff --git a/contrib/hstore/expected/hstore_utf8.out b/contrib/hstore/expected/hstore_utf8.out index 4405824413..bbc885a181 100644 --- a/contrib/hstore/expected/hstore_utf8.out +++ b/contrib/hstore/expected/hstore_utf8.out @@ -34,3 +34,34 @@ SELECT 'keyąfoo=>valueą'::hstore; "keyąfoo"=>"valueą" (1 row) +-- More patterns that may depend on isspace() and locales, all discarded. +SELECT E'key\u000A=>value\u000A'::hstore; -- \n + hstore +---------------- + "key"=>"value" +(1 row) + +SELECT E'key\u0009=>value\u0009'::hstore; -- \t + hstore +---------------- + "key"=>"value" +(1 row) + +SELECT E'key\u000D=>value\u000D'::hstore; -- \r + hstore +---------------- + "key"=>"value" +(1 row) + +SELECT E'key\u000B=>value\u000B'::hstore; -- \v + hstore +---------------- + "key"=>"value" +(1 row) + +SELECT E'key\u000C=>value\u000C'::hstore; -- \f + hstore +---------------- + "key"=>"value" +(1 row) + diff --git a/contrib/hstore/hstore_io.c b/contrib/hstore/hstore_io.c index 999ddad76d..f33b241d54 100644 --- a/contrib/hstore/hstore_io.c +++ b/contrib/hstore/hstore_io.c @@ -13,7 +13,6 @@ #include "lib/stringinfo.h" #include "libpq/pqformat.h" #include "nodes/miscnodes.h" -#include "parser/scansup.h" #include "utils/builtins.h" #include "utils/json.h" #include "utils/jsonb.h" @@ -41,6 +40,7 @@ typedef struct int plen; } HSParser; +static bool hstore_isspace(char ch); static bool hstoreCheckKeyLength(size_t len, HSParser *state); static bool hstoreCheckValLength(size_t len, HSParser *state); @@ -82,6 +82,26 @@ prseof(HSParser *state) return false; } +/* + * hstore_isspace() --- a non-locale-dependent isspace() + * + * We used to use isspace() for parsing hstore values, but that has + * undesirable results: a hstore value might be silently interpreted + * differently depending on the locale setting. Now we just hard-wire + * the traditional ASCII definition of isspace(). + */ +static bool +hstore_isspace(char ch) +{ + if (ch == ' ' || + ch == '\t' || + ch == '\n' || + ch == '\r' || + ch == '\v' || + ch == '\f') + return true; + return false; +} #define GV_WAITVAL 0 #define GV_INVAL 1 @@ -119,7 +139,7 @@ get_val(HSParser *state, bool ignoreeq, bool *escaped) { st = GV_WAITESCIN; } - else if (!scanner_isspace((unsigned char) *(state->ptr))) + else if (!hstore_isspace((unsigned char) *(state->ptr))) { *(state->cur) = *(state->ptr); state->cur++; @@ -142,7 +162,7 @@ get_val(HSParser *state, bool ignoreeq, bool *escaped) state->ptr--; return true; } - else if (scanner_isspace((unsigned char) *(state->ptr))) + else if (hstore_isspace((unsigned char) *(state->ptr))) { return true; } @@ -256,7 +276,7 @@ parse_hstore(HSParser *state) { PRSEOF; } - else if (!scanner_isspace((unsigned char) *(state->ptr))) + else if (!hstore_isspace((unsigned char) *(state->ptr))) { PRSSYNTAXERROR; } @@ -310,7 +330,7 @@ parse_hstore(HSParser *state) { return true; } - else if (!scanner_isspace((unsigned char) *(state->ptr))) + else if (!hstore_isspace((unsigned char) *(state->ptr))) { PRSSYNTAXERROR; } diff --git a/contrib/hstore/sql/hstore_utf8.sql b/contrib/hstore/sql/hstore_utf8.sql index face878324..38c9481ee6 100644 --- a/contrib/hstore/sql/hstore_utf8.sql +++ b/contrib/hstore/sql/hstore_utf8.sql @@ -17,3 +17,10 @@ SELECT E'key\u0105=>value\u0105'::hstore; SELECT 'keyą=>valueą'::hstore; SELECT 'ą=>ą'::hstore; SELECT 'keyąfoo=>valueą'::hstore; + +-- More patterns that may depend on isspace() and locales, all discarded. +SELECT E'key\u000A=>value\u000A'::hstore; -- \n +SELECT E'key\u0009=>value\u0009'::hstore; -- \t +SELECT E'key\u000D=>value\u000D'::hstore; -- \r +SELECT E'key\u000B=>value\u000B'::hstore; -- \v +SELECT E'key\u000C=>value\u000C'::hstore; -- \f -- 2.40.1
signature.asc
Description: PGP signature