Quoting Mike Fowler <m...@mlfowler.com>:


The following bug has been logged online:

Bug reference:      5534
Logged by:          Mike Fowler
Email address:      m...@mlfowler.com
PostgreSQL version: 9.0beta2
Operating system:   Linux 2.6.31-14-generic #48-Ubuntu SMP
Description:        IS DOCUMENT predicate errors instead of returning false
Details:

IS DOCUMENT should return false for a non-well formed document, and indeed
is coded to do such. However, the conversion to the xml type which happens
before the underlying xml_is_document function is even called fails and
exceptions out. I've mentioned this on -hackers with message ID
20100701172553.w5vdy1xbocos8...@www.mlfowler.com

--
Sent via pgsql-bugs mailing list (pgsql-bugs@postgresql.org)
To make changes to your subscription:
http://www.postgresql.org/mailpref/pgsql-bugs


The attached patch is a very small patch that changes parse_expr.c to not convert everything to xml. This now means that when passed malformed XML it will return false instead of throwing an exception. In my mind this acceptable as I don't see anywhere in the standard that mandates that:

xmlval IS NOT DOCUMENT == xmlval IS CONTENT

Regards,

--
Mike Fowler
Registered Linux user: 379787
*** a/src/backend/parser/parse_expr.c
--- b/src/backend/parser/parse_expr.c
***************
*** 1950,1956 **** transformXmlExpr(ParseState *pstate, XmlExpr *x)
  				Assert(false);
  				break;
  			case IS_DOCUMENT:
! 				newe = coerce_to_specific_type(pstate, newe, XMLOID,
  											   "IS DOCUMENT");
  				break;
  		}
--- 1950,1956 ----
  				Assert(false);
  				break;
  			case IS_DOCUMENT:
! 				newe = coerce_to_specific_type(pstate, newe, TEXTOID,
  											   "IS DOCUMENT");
  				break;
  		}
*** a/src/backend/utils/adt/xml.c
--- b/src/backend/utils/adt/xml.c
***************
*** 795,801 **** xmlvalidate(PG_FUNCTION_ARGS)
  
  
  bool
! xml_is_document(xmltype *arg)
  {
  #ifdef USE_LIBXML
  	bool		result;
--- 795,801 ----
  
  
  bool
! xml_is_document(text *arg)
  {
  #ifdef USE_LIBXML
  	bool		result;
***************
*** 805,811 **** xml_is_document(xmltype *arg)
  	/* We want to catch ereport(INVALID_XML_DOCUMENT) and return false */
  	PG_TRY();
  	{
! 		doc = xml_parse((text *) arg, XMLOPTION_DOCUMENT, true,
  						GetDatabaseEncoding());
  		result = true;
  	}
--- 805,811 ----
  	/* We want to catch ereport(INVALID_XML_DOCUMENT) and return false */
  	PG_TRY();
  	{
! 		doc = xml_parse(arg, XMLOPTION_DOCUMENT, true,
  						GetDatabaseEncoding());
  		result = true;
  	}
*** a/src/include/utils/xml.h
--- b/src/include/utils/xml.h
***************
*** 70,76 **** extern xmltype *xmlelement(XmlExprState *xmlExpr, ExprContext *econtext);
  extern xmltype *xmlparse(text *data, XmlOptionType xmloption, bool preserve_whitespace);
  extern xmltype *xmlpi(char *target, text *arg, bool arg_is_null, bool *result_is_null);
  extern xmltype *xmlroot(xmltype *data, text *version, int standalone);
! extern bool xml_is_document(xmltype *arg);
  extern text *xmltotext_with_xmloption(xmltype *data, XmlOptionType xmloption_arg);
  extern char *escape_xml(const char *str);
  
--- 70,76 ----
  extern xmltype *xmlparse(text *data, XmlOptionType xmloption, bool preserve_whitespace);
  extern xmltype *xmlpi(char *target, text *arg, bool arg_is_null, bool *result_is_null);
  extern xmltype *xmlroot(xmltype *data, text *version, int standalone);
! extern bool xml_is_document(text *arg);
  extern text *xmltotext_with_xmloption(xmltype *data, XmlOptionType xmloption_arg);
  extern char *escape_xml(const char *str);
  
*** a/src/test/regress/expected/xml.out
--- b/src/test/regress/expected/xml.out
***************
*** 357,362 **** SELECT xml '<foo>bar</foo>' IS DOCUMENT;
--- 357,378 ----
   t
  (1 row)
  
+ SELECT xml '<foo>bar</foo' IS DOCUMENT;
+ ERROR:  invalid XML content
+ LINE 1: SELECT xml '<foo>bar</foo' IS DOCUMENT;
+                    ^
+ DETAIL:  Entity: line 1: parser error : expected '>'
+ <foo>bar</foo
+              ^
+ Entity: line 1: parser error : chunk is not well balanced
+ <foo>bar</foo
+              ^
+ SELECT '<foo>bar</foo' IS DOCUMENT;
+  ?column? 
+ ----------
+  f
+ (1 row)
+ 
  SELECT xml '<foo>bar</foo><bar>foo</bar>' IS DOCUMENT;
   ?column? 
  ----------
***************
*** 376,387 **** SELECT xml 'abc' IS NOT DOCUMENT;
  (1 row)
  
  SELECT '<>' IS NOT DOCUMENT;
  ERROR:  invalid XML content
! LINE 1: SELECT '<>' IS NOT DOCUMENT;
!                ^
! DETAIL:  Entity: line 1: parser error : StartTag: invalid element name
! <>
!  ^
  SELECT xmlagg(data) FROM xmltest;
                  xmlagg                
  --------------------------------------
--- 392,418 ----
  (1 row)
  
  SELECT '<>' IS NOT DOCUMENT;
+  ?column? 
+ ----------
+  t
+ (1 row)
+ 
+ SELECT xml '<foo>bar</foo' IS NOT DOCUMENT;
  ERROR:  invalid XML content
! LINE 1: SELECT xml '<foo>bar</foo' IS NOT DOCUMENT;
!                    ^
! DETAIL:  Entity: line 1: parser error : expected '>'
! <foo>bar</foo
!              ^
! Entity: line 1: parser error : chunk is not well balanced
! <foo>bar</foo
!              ^
! SELECT '<foo>bar</foo' IS NOT DOCUMENT;
!  ?column? 
! ----------
!  t
! (1 row)
! 
  SELECT xmlagg(data) FROM xmltest;
                  xmlagg                
  --------------------------------------
*** a/src/test/regress/sql/xml.sql
--- b/src/test/regress/sql/xml.sql
***************
*** 112,121 **** SELECT xmlserialize(document 'bad' as text);
--- 112,125 ----
  
  
  SELECT xml '<foo>bar</foo>' IS DOCUMENT;
+ SELECT xml '<foo>bar</foo' IS DOCUMENT;
+ SELECT '<foo>bar</foo' IS DOCUMENT;
  SELECT xml '<foo>bar</foo><bar>foo</bar>' IS DOCUMENT;
  SELECT xml '<abc/>' IS NOT DOCUMENT;
  SELECT xml 'abc' IS NOT DOCUMENT;
  SELECT '<>' IS NOT DOCUMENT;
+ SELECT xml '<foo>bar</foo' IS NOT DOCUMENT;
+ SELECT '<foo>bar</foo' IS NOT DOCUMENT;
  
  
  SELECT xmlagg(data) FROM xmltest;
-- 
Sent via pgsql-bugs mailing list (pgsql-bugs@postgresql.org)
To make changes to your subscription:
http://www.postgresql.org/mailpref/pgsql-bugs

Reply via email to