On Sun, Jun 18, 2023 at 10:50:16AM +0900, Michael Paquier wrote:
> The difference between scanner_isspace() and array_isspace() is that
> the former matches with what scan.l stores as rules for whitespace
> characters, but the latter works on values.  For hstore, we want the
> latter, with something that works on values.  To keep the change
> locale to hstore, I think that we should just introduce an
> hstore_isspace() which is a copy of array_isspace.  That's a
> duplication, sure, but I think that we may want to think harder about
> \v in the flex scanner, and that's just a few extra lines for 
> something that has not changed in 13 years for arrays.  That's also
> easier to think about for stable branches.  If you can send a patch,
> that helps a lot, for sure!

At the end, no need to do that.  I have been able to hack the
attached, that shows the difference of treatment for \v when running
in macOS.  Evan, what do you think?
--
Michael
From 7ede07940011d08f8c0cf05d57b3782f367c0adf Mon Sep 17 00:00:00 2001
From: Michael Paquier <mich...@paquier.xyz>
Date: Sun, 18 Jun 2023 17:29:37 +0900
Subject: [PATCH] More fixes for hstore and locales

This is not really complete without the treatment of \v like array
values.
---
 contrib/hstore/expected/hstore_utf8.out | 31 +++++++++++++++++++++++++
 contrib/hstore/hstore_io.c              | 30 ++++++++++++++++++++----
 contrib/hstore/sql/hstore_utf8.sql      |  7 ++++++
 3 files changed, 63 insertions(+), 5 deletions(-)

diff --git a/contrib/hstore/expected/hstore_utf8.out b/contrib/hstore/expected/hstore_utf8.out
index 4405824413..bbc885a181 100644
--- a/contrib/hstore/expected/hstore_utf8.out
+++ b/contrib/hstore/expected/hstore_utf8.out
@@ -34,3 +34,34 @@ SELECT 'keyąfoo=>valueą'::hstore;
  "keyąfoo"=>"valueą"
 (1 row)
 
+-- More patterns that may depend on isspace() and locales, all discarded.
+SELECT E'key\u000A=>value\u000A'::hstore; -- \n
+     hstore     
+----------------
+ "key"=>"value"
+(1 row)
+
+SELECT E'key\u0009=>value\u0009'::hstore; -- \t
+     hstore     
+----------------
+ "key"=>"value"
+(1 row)
+
+SELECT E'key\u000D=>value\u000D'::hstore; -- \r
+     hstore     
+----------------
+ "key"=>"value"
+(1 row)
+
+SELECT E'key\u000B=>value\u000B'::hstore; -- \v
+     hstore     
+----------------
+ "key"=>"value"
+(1 row)
+
+SELECT E'key\u000C=>value\u000C'::hstore; -- \f
+     hstore     
+----------------
+ "key"=>"value"
+(1 row)
+
diff --git a/contrib/hstore/hstore_io.c b/contrib/hstore/hstore_io.c
index 999ddad76d..f33b241d54 100644
--- a/contrib/hstore/hstore_io.c
+++ b/contrib/hstore/hstore_io.c
@@ -13,7 +13,6 @@
 #include "lib/stringinfo.h"
 #include "libpq/pqformat.h"
 #include "nodes/miscnodes.h"
-#include "parser/scansup.h"
 #include "utils/builtins.h"
 #include "utils/json.h"
 #include "utils/jsonb.h"
@@ -41,6 +40,7 @@ typedef struct
 	int			plen;
 } HSParser;
 
+static bool hstore_isspace(char ch);
 static bool hstoreCheckKeyLength(size_t len, HSParser *state);
 static bool hstoreCheckValLength(size_t len, HSParser *state);
 
@@ -82,6 +82,26 @@ prseof(HSParser *state)
 	return false;
 }
 
+/*
+ * hstore_isspace() --- a non-locale-dependent isspace()
+ *
+ * We used to use isspace() for parsing hstore values, but that has
+ * undesirable results: a hstore value might be silently interpreted
+ * differently depending on the locale setting.  Now we just hard-wire
+ * the traditional ASCII definition of isspace().
+ */
+static bool
+hstore_isspace(char ch)
+{
+	if (ch == ' ' ||
+		ch == '\t' ||
+		ch == '\n' ||
+		ch == '\r' ||
+		ch == '\v' ||
+		ch == '\f')
+		return true;
+	return false;
+}
 
 #define GV_WAITVAL 0
 #define GV_INVAL 1
@@ -119,7 +139,7 @@ get_val(HSParser *state, bool ignoreeq, bool *escaped)
 			{
 				st = GV_WAITESCIN;
 			}
-			else if (!scanner_isspace((unsigned char) *(state->ptr)))
+			else if (!hstore_isspace((unsigned char) *(state->ptr)))
 			{
 				*(state->cur) = *(state->ptr);
 				state->cur++;
@@ -142,7 +162,7 @@ get_val(HSParser *state, bool ignoreeq, bool *escaped)
 				state->ptr--;
 				return true;
 			}
-			else if (scanner_isspace((unsigned char) *(state->ptr)))
+			else if (hstore_isspace((unsigned char) *(state->ptr)))
 			{
 				return true;
 			}
@@ -256,7 +276,7 @@ parse_hstore(HSParser *state)
 			{
 				PRSEOF;
 			}
-			else if (!scanner_isspace((unsigned char) *(state->ptr)))
+			else if (!hstore_isspace((unsigned char) *(state->ptr)))
 			{
 				PRSSYNTAXERROR;
 			}
@@ -310,7 +330,7 @@ parse_hstore(HSParser *state)
 			{
 				return true;
 			}
-			else if (!scanner_isspace((unsigned char) *(state->ptr)))
+			else if (!hstore_isspace((unsigned char) *(state->ptr)))
 			{
 				PRSSYNTAXERROR;
 			}
diff --git a/contrib/hstore/sql/hstore_utf8.sql b/contrib/hstore/sql/hstore_utf8.sql
index face878324..38c9481ee6 100644
--- a/contrib/hstore/sql/hstore_utf8.sql
+++ b/contrib/hstore/sql/hstore_utf8.sql
@@ -17,3 +17,10 @@ SELECT E'key\u0105=>value\u0105'::hstore;
 SELECT 'keyą=>valueą'::hstore;
 SELECT 'ą=>ą'::hstore;
 SELECT 'keyąfoo=>valueą'::hstore;
+
+-- More patterns that may depend on isspace() and locales, all discarded.
+SELECT E'key\u000A=>value\u000A'::hstore; -- \n
+SELECT E'key\u0009=>value\u0009'::hstore; -- \t
+SELECT E'key\u000D=>value\u000D'::hstore; -- \r
+SELECT E'key\u000B=>value\u000B'::hstore; -- \v
+SELECT E'key\u000C=>value\u000C'::hstore; -- \f
-- 
2.40.1

Attachment: signature.asc
Description: PGP signature

Reply via email to