Tom Lane wrote:
I think that suppressing unquoted trailing whitespace is probably
reasonable.  I'm much less enthusiastic about rejecting unquoted
embedded whitespace, though.  I think that's significantly likely
to break apps, and it doesn't seem like it's really needed to have
sane behavior.  The leading/trailing whitespace asymmetry is just
weird, but it doesn't follow that embedded whitespace is weird.

If we are going to make such changes, 8.0 is the right time to do it.

The attached patch suppresses trailing whitespace, but allows embedded whitespace in unquoted elements as discussed above. It also rejects some previously accepted cases that were just too strange to be correct:


-- Postgres 8.0, with the patch
-- none of these should be accepted
select '{{1,{2}},{2,3}}'::text[];
ERROR:  malformed array literal: "{{1,{2}},{2,3}}"
select '{{},{}}'::text[];
ERROR:  malformed array literal: "{{},{}}"
select '{{1,2},\\{2,3}}'::text[];
ERROR:  malformed array literal: "{{1,2},\{2,3}}"
select '{{"1 2" x},{3}}'::text[];
ERROR:  malformed array literal: "{{"1 2" x},{3}}"

The third case above actually does get an ERROR in 7.4 but it looks suspicious itself:


-- in Postgres 7.4 select '{{1,2},\\{2,3}}'::text[]; ERROR: malformed array literal: "{{1"


More examples:

-- Postgres 8.0, with the patch
-- all of these should be accepted
select '{}'::text[];
 text
------
 {}
(1 row)

select '{0 second  ,0 second}'::interval[];
      interval
---------------------
 {00:00:00,00:00:00}
(1 row)

select '{ { "," } , { 3 } }'::text[];
    text
-------------
 {{","},{3}}
(1 row)

select '  {   {  "  0 second  "   ,  0 second  }   }'::text[];
             text
-------------------------------
 {{"  0 second  ","0 second"}}
(1 row)


If there are no objections, I'll update the docs and commit tomorrow sometime.


Thanks,

Joe
Index: src/backend/utils/adt/arrayfuncs.c
===================================================================
RCS file: /cvsroot/pgsql-server/src/backend/utils/adt/arrayfuncs.c,v
retrieving revision 1.106
diff -c -r1.106 arrayfuncs.c
*** src/backend/utils/adt/arrayfuncs.c	5 Aug 2004 03:29:37 -0000	1.106
--- src/backend/utils/adt/arrayfuncs.c	7 Aug 2004 01:34:02 -0000
***************
*** 351,368 ****
   *		 The syntax for array input is C-like nested curly braces
   *-----------------------------------------------------------------------------
   */
  static int
  ArrayCount(char *str, int *dim, char typdelim)
  {
! 	int			nest_level = 0,
! 				i;
! 	int			ndim = 1,
! 				temp[MAXDIM],
! 				nelems[MAXDIM],
! 				nelems_last[MAXDIM];
! 	bool		scanning_string = false;
! 	bool		eoArray = false;
! 	char	   *ptr;
  
  	for (i = 0; i < MAXDIM; ++i)
  	{
--- 351,382 ----
   *		 The syntax for array input is C-like nested curly braces
   *-----------------------------------------------------------------------------
   */
+ typedef enum
+ {
+ 	ARRAY_NO_LEVEL,
+ 	ARRAY_LEVEL_STARTED,
+ 	ARRAY_ELEM_STARTED,
+ 	ARRAY_ELEM_COMPLETED,
+ 	ARRAY_QUOTED_ELEM_STARTED,
+ 	ARRAY_QUOTED_ELEM_COMPLETED,
+ 	ARRAY_ELEM_DELIMITED,
+ 	ARRAY_LEVEL_COMPLETED,
+ 	ARRAY_LEVEL_DELIMITED
+ } ArrayParseState;
+ 
  static int
  ArrayCount(char *str, int *dim, char typdelim)
  {
! 	int				nest_level = 0,
! 					i;
! 	int				ndim = 1,
! 					temp[MAXDIM],
! 					nelems[MAXDIM],
! 					nelems_last[MAXDIM];
! 	bool			scanning_string = false;
! 	bool			eoArray = false;
! 	char		   *ptr;
! 	ArrayParseState	parse_state = ARRAY_NO_LEVEL;
  
  	for (i = 0; i < MAXDIM; ++i)
  	{
***************
*** 370,375 ****
--- 384,390 ----
  		nelems_last[i] = nelems[i] = 1;
  	}
  
+ 	/* special case for an empty array */
  	if (strncmp(str, "{}", 2) == 0)
  		return 0;
  
***************
*** 389,394 ****
--- 404,423 ----
  						errmsg("malformed array literal: \"%s\"", str)));
  					break;
  				case '\\':
+ 					/*
+ 					 * An escape must be after a level start, after an
+ 					 * element start, or after an element delimiter. In any
+ 					 * case we now must be past an element start.
+ 					 */
+ 					if (parse_state != ARRAY_LEVEL_STARTED &&
+ 						parse_state != ARRAY_ELEM_STARTED &&
+ 						parse_state != ARRAY_QUOTED_ELEM_STARTED &&
+ 						parse_state != ARRAY_ELEM_DELIMITED)
+ 						ereport(ERROR,
+ 							(errcode(ERRCODE_INVALID_TEXT_REPRESENTATION),
+ 							errmsg("malformed array literal: \"%s\"", str)));
+ 					if (parse_state != ARRAY_QUOTED_ELEM_STARTED)
+ 						parse_state = ARRAY_ELEM_STARTED;
  					/* skip the escaped character */
  					if (*(ptr + 1))
  						ptr++;
***************
*** 398,408 ****
--- 427,464 ----
  						errmsg("malformed array literal: \"%s\"", str)));
  					break;
  				case '\"':
+ 					/*
+ 					 * A quote must be after a level start, after a quoted
+ 					 * element start, or after an element delimiter. In any
+ 					 * case we now must be past an element start.
+ 					 */
+ 					if (parse_state != ARRAY_LEVEL_STARTED &&
+ 						parse_state != ARRAY_QUOTED_ELEM_STARTED &&
+ 						parse_state != ARRAY_ELEM_DELIMITED)
+ 						ereport(ERROR,
+ 							(errcode(ERRCODE_INVALID_TEXT_REPRESENTATION),
+ 							errmsg("malformed array literal: \"%s\"", str)));
  					scanning_string = !scanning_string;
+ 					if (scanning_string)
+ 						parse_state = ARRAY_QUOTED_ELEM_STARTED;
+ 					else
+ 						parse_state = ARRAY_QUOTED_ELEM_COMPLETED;
  					break;
  				case '{':
  					if (!scanning_string)
  					{
+ 						/*
+ 						 * A left brace can occur if no nesting has
+ 						 * occurred yet, after a level start, or
+ 						 * after a level delimiter.
+ 						 */
+ 						if (parse_state != ARRAY_NO_LEVEL &&
+ 							parse_state != ARRAY_LEVEL_STARTED &&
+ 							parse_state != ARRAY_LEVEL_DELIMITED)
+ 							ereport(ERROR,
+ 								(errcode(ERRCODE_INVALID_TEXT_REPRESENTATION),
+ 								errmsg("malformed array literal: \"%s\"", str)));
+ 						parse_state = ARRAY_LEVEL_STARTED;
  						if (nest_level >= MAXDIM)
  							ereport(ERROR,
  								(errcode(ERRCODE_PROGRAM_LIMIT_EXCEEDED),
***************
*** 417,422 ****
--- 473,491 ----
  				case '}':
  					if (!scanning_string)
  					{
+ 						/*
+ 						 * A right brace can occur after an element start,
+ 						 * an element completion, a quoted element completion,
+ 						 * or a level completion.
+ 						 */
+ 						if (parse_state != ARRAY_ELEM_STARTED &&
+ 							parse_state != ARRAY_ELEM_COMPLETED &&
+ 							parse_state != ARRAY_QUOTED_ELEM_COMPLETED &&
+ 							parse_state != ARRAY_LEVEL_COMPLETED)
+ 							ereport(ERROR,
+ 								(errcode(ERRCODE_INVALID_TEXT_REPRESENTATION),
+ 								errmsg("malformed array literal: \"%s\"", str)));
+ 						parse_state = ARRAY_LEVEL_COMPLETED;
  						if (nest_level == 0)
  							ereport(ERROR,
  							(errcode(ERRCODE_INVALID_TEXT_REPRESENTATION),
***************
*** 445,454 ****
  					}
  					break;
  				default:
! 					if (*ptr == typdelim && !scanning_string)
  					{
! 						itemdone = true;
! 						nelems[nest_level - 1]++;
  					}
  					break;
  			}
--- 514,558 ----
  					}
  					break;
  				default:
! 					if (!scanning_string)
  					{
! 						if (*ptr == typdelim)
! 						{
! 							/*
! 							* Delimiters can occur after an element start,
! 							* an element completion, a quoted element
! 							* completion, or a level completion.
! 							*/
! 							if (parse_state != ARRAY_ELEM_STARTED &&
! 								parse_state != ARRAY_ELEM_COMPLETED &&
! 								parse_state != ARRAY_QUOTED_ELEM_COMPLETED &&
! 								parse_state != ARRAY_LEVEL_COMPLETED)
! 								ereport(ERROR,
! 									(errcode(ERRCODE_INVALID_TEXT_REPRESENTATION),
! 									errmsg("malformed array literal: \"%s\"", str)));
! 							if (parse_state == ARRAY_LEVEL_COMPLETED)
! 								parse_state = ARRAY_LEVEL_DELIMITED;
! 							else
! 								parse_state = ARRAY_ELEM_DELIMITED;
! 							itemdone = true;
! 							nelems[nest_level - 1]++;
! 						}
! 						else if (!isspace(*ptr))
! 						{
! 							/*
! 							* Other non-space characters must be after a level
! 							* start, after an element start, or after an element
! 							* delimiter. In any case we now must be past an
! 							* element start.
! 							*/
! 							if (parse_state != ARRAY_LEVEL_STARTED &&
! 								parse_state != ARRAY_ELEM_STARTED &&
! 								parse_state != ARRAY_ELEM_DELIMITED)
! 								ereport(ERROR,
! 									(errcode(ERRCODE_INVALID_TEXT_REPRESENTATION),
! 									errmsg("malformed array literal: \"%s\"", str)));
! 							parse_state = ARRAY_ELEM_STARTED;
! 						}
  					}
  					break;
  			}
***************
*** 511,522 ****
--- 615,629 ----
  	while (!eoArray)
  	{
  		bool		itemdone = false;
+ 		bool		itemquoted = false;
  		int			i = -1;
  		char	   *itemstart;
+ 		char	   *eptr;
  
  		/* skip leading whitespace */
  		while (isspace((unsigned char) *ptr))
  			ptr++;
+ 
  		itemstart = ptr;
  
  		while (!itemdone)
***************
*** 547,557 ****
  						char	   *cptr;
  
  						scanning_string = !scanning_string;
! 						/* Crunch the string on top of the quote. */
! 						for (cptr = ptr; *cptr != '\0'; cptr++)
! 							*cptr = *(cptr + 1);
! 						/* Back up to not miss following character. */
! 						ptr--;
  						break;
  					}
  				case '{':
--- 654,668 ----
  						char	   *cptr;
  
  						scanning_string = !scanning_string;
! 						if (scanning_string)
! 						{
! 							itemquoted = true;
! 							/* Crunch the string on top of the first quote. */
! 							for (cptr = ptr; *cptr != '\0'; cptr++)
! 								*cptr = *(cptr + 1);
! 							/* Back up to not miss following character. */
! 							ptr--;
! 						}
  						break;
  					}
  				case '{':
***************
*** 615,620 ****
--- 726,750 ----
  					(errcode(ERRCODE_INVALID_TEXT_REPRESENTATION),
  				   errmsg("malformed array literal: \"%s\"", arrayStr)));
  
+ 		/*
+ 		 * skip trailing whitespace
+ 		 */
+ 		eptr = ptr - 1;
+ 		if (!itemquoted)
+ 		{
+ 			/* skip to last non-NULL, non-space, character */
+ 			while ((*eptr == '\0') || (isspace((unsigned char) *eptr)))
+ 				eptr--;
+ 			*(++eptr) = '\0';
+ 		}
+ 		else
+ 		{
+ 			/* skip to last quote character */
+ 			while (*eptr != '"')
+ 				eptr--;
+ 			*eptr = '\0';
+ 		}
+ 
  		values[i] = FunctionCall3(inputproc,
  								  CStringGetDatum(itemstart),
  								  ObjectIdGetDatum(typioparam),
---------------------------(end of broadcast)---------------------------
TIP 8: explain analyze is your friend

Reply via email to