Quoting Mike Fowler <m...@mlfowler.com>:
The following bug has been logged online: Bug reference: 5534 Logged by: Mike Fowler Email address: m...@mlfowler.com PostgreSQL version: 9.0beta2 Operating system: Linux 2.6.31-14-generic #48-Ubuntu SMP Description: IS DOCUMENT predicate errors instead of returning false Details: IS DOCUMENT should return false for a non-well formed document, and indeed is coded to do such. However, the conversion to the xml type which happens before the underlying xml_is_document function is even called fails and exceptions out. I've mentioned this on -hackers with message ID 20100701172553.w5vdy1xbocos8...@www.mlfowler.com -- Sent via pgsql-bugs mailing list (pgsql-bugs@postgresql.org) To make changes to your subscription: http://www.postgresql.org/mailpref/pgsql-bugs
The attached patch is a very small patch that changes parse_expr.c to not convert everything to xml. This now means that when passed malformed XML it will return false instead of throwing an exception. In my mind this acceptable as I don't see anywhere in the standard that mandates that:
xmlval IS NOT DOCUMENT == xmlval IS CONTENT Regards, -- Mike Fowler Registered Linux user: 379787
*** a/src/backend/parser/parse_expr.c --- b/src/backend/parser/parse_expr.c *************** *** 1950,1956 **** transformXmlExpr(ParseState *pstate, XmlExpr *x) Assert(false); break; case IS_DOCUMENT: ! newe = coerce_to_specific_type(pstate, newe, XMLOID, "IS DOCUMENT"); break; } --- 1950,1956 ---- Assert(false); break; case IS_DOCUMENT: ! newe = coerce_to_specific_type(pstate, newe, TEXTOID, "IS DOCUMENT"); break; } *** a/src/backend/utils/adt/xml.c --- b/src/backend/utils/adt/xml.c *************** *** 795,801 **** xmlvalidate(PG_FUNCTION_ARGS) bool ! xml_is_document(xmltype *arg) { #ifdef USE_LIBXML bool result; --- 795,801 ---- bool ! xml_is_document(text *arg) { #ifdef USE_LIBXML bool result; *************** *** 805,811 **** xml_is_document(xmltype *arg) /* We want to catch ereport(INVALID_XML_DOCUMENT) and return false */ PG_TRY(); { ! doc = xml_parse((text *) arg, XMLOPTION_DOCUMENT, true, GetDatabaseEncoding()); result = true; } --- 805,811 ---- /* We want to catch ereport(INVALID_XML_DOCUMENT) and return false */ PG_TRY(); { ! doc = xml_parse(arg, XMLOPTION_DOCUMENT, true, GetDatabaseEncoding()); result = true; } *** a/src/include/utils/xml.h --- b/src/include/utils/xml.h *************** *** 70,76 **** extern xmltype *xmlelement(XmlExprState *xmlExpr, ExprContext *econtext); extern xmltype *xmlparse(text *data, XmlOptionType xmloption, bool preserve_whitespace); extern xmltype *xmlpi(char *target, text *arg, bool arg_is_null, bool *result_is_null); extern xmltype *xmlroot(xmltype *data, text *version, int standalone); ! extern bool xml_is_document(xmltype *arg); extern text *xmltotext_with_xmloption(xmltype *data, XmlOptionType xmloption_arg); extern char *escape_xml(const char *str); --- 70,76 ---- extern xmltype *xmlparse(text *data, XmlOptionType xmloption, bool preserve_whitespace); extern xmltype *xmlpi(char *target, text *arg, bool arg_is_null, bool *result_is_null); extern xmltype *xmlroot(xmltype *data, text *version, int standalone); ! extern bool xml_is_document(text *arg); extern text *xmltotext_with_xmloption(xmltype *data, XmlOptionType xmloption_arg); extern char *escape_xml(const char *str); *** a/src/test/regress/expected/xml.out --- b/src/test/regress/expected/xml.out *************** *** 357,362 **** SELECT xml '<foo>bar</foo>' IS DOCUMENT; --- 357,378 ---- t (1 row) + SELECT xml '<foo>bar</foo' IS DOCUMENT; + ERROR: invalid XML content + LINE 1: SELECT xml '<foo>bar</foo' IS DOCUMENT; + ^ + DETAIL: Entity: line 1: parser error : expected '>' + <foo>bar</foo + ^ + Entity: line 1: parser error : chunk is not well balanced + <foo>bar</foo + ^ + SELECT '<foo>bar</foo' IS DOCUMENT; + ?column? + ---------- + f + (1 row) + SELECT xml '<foo>bar</foo><bar>foo</bar>' IS DOCUMENT; ?column? ---------- *************** *** 376,387 **** SELECT xml 'abc' IS NOT DOCUMENT; (1 row) SELECT '<>' IS NOT DOCUMENT; ERROR: invalid XML content ! LINE 1: SELECT '<>' IS NOT DOCUMENT; ! ^ ! DETAIL: Entity: line 1: parser error : StartTag: invalid element name ! <> ! ^ SELECT xmlagg(data) FROM xmltest; xmlagg -------------------------------------- --- 392,418 ---- (1 row) SELECT '<>' IS NOT DOCUMENT; + ?column? + ---------- + t + (1 row) + + SELECT xml '<foo>bar</foo' IS NOT DOCUMENT; ERROR: invalid XML content ! LINE 1: SELECT xml '<foo>bar</foo' IS NOT DOCUMENT; ! ^ ! DETAIL: Entity: line 1: parser error : expected '>' ! <foo>bar</foo ! ^ ! Entity: line 1: parser error : chunk is not well balanced ! <foo>bar</foo ! ^ ! SELECT '<foo>bar</foo' IS NOT DOCUMENT; ! ?column? ! ---------- ! t ! (1 row) ! SELECT xmlagg(data) FROM xmltest; xmlagg -------------------------------------- *** a/src/test/regress/sql/xml.sql --- b/src/test/regress/sql/xml.sql *************** *** 112,121 **** SELECT xmlserialize(document 'bad' as text); --- 112,125 ---- SELECT xml '<foo>bar</foo>' IS DOCUMENT; + SELECT xml '<foo>bar</foo' IS DOCUMENT; + SELECT '<foo>bar</foo' IS DOCUMENT; SELECT xml '<foo>bar</foo><bar>foo</bar>' IS DOCUMENT; SELECT xml '<abc/>' IS NOT DOCUMENT; SELECT xml 'abc' IS NOT DOCUMENT; SELECT '<>' IS NOT DOCUMENT; + SELECT xml '<foo>bar</foo' IS NOT DOCUMENT; + SELECT '<foo>bar</foo' IS NOT DOCUMENT; SELECT xmlagg(data) FROM xmltest;
-- Sent via pgsql-bugs mailing list (pgsql-bugs@postgresql.org) To make changes to your subscription: http://www.postgresql.org/mailpref/pgsql-bugs