On 09.02.24 14:19, Jim Jones wrote: > v9 attached with rebase due to changes done to primnodes.h in 615f5f6 > v10 attached with rebase due to changes in primnodes, parsenodes.h, and gram.y
-- Jim
From fbd98149d50fe19b886b30ed49b9d553a18f30b4 Mon Sep 17 00:00:00 2001 From: Jim Jones <jim.jo...@uni-muenster.de> Date: Wed, 19 Jun 2024 10:22:10 +0200 Subject: [PATCH v10] Add CANONICAL output format to xmlserialize This patch introduces the CANONICAL option to xmlserialize, which serializes xml documents in their canonical form - as described in the W3C Canonical XML Version 1.1 specification. This option can be used with the additional parameter WITH [NO] COMMENTS to keep or remove xml comments from the canonical xml output. In case no parameter is provided, WITH COMMENTS will be used as default. This feature is based on the function xmlC14NDocDumpMemory from the C14N module of libxml2. This patch also includes regression tests and documentation. --- doc/src/sgml/datatype.sgml | 41 +++- src/backend/executor/execExprInterp.c | 2 +- src/backend/parser/gram.y | 21 +- src/backend/parser/parse_expr.c | 2 +- src/backend/utils/adt/xml.c | 272 ++++++++++++++++---------- src/include/nodes/parsenodes.h | 1 + src/include/nodes/primnodes.h | 10 + src/include/parser/kwlist.h | 1 + src/include/utils/xml.h | 2 +- src/test/regress/expected/xml.out | 114 +++++++++++ src/test/regress/expected/xml_1.out | 108 ++++++++++ src/test/regress/expected/xml_2.out | 114 +++++++++++ src/test/regress/sql/xml.sql | 63 ++++++ src/tools/pgindent/typedefs.list | 1 + 14 files changed, 642 insertions(+), 110 deletions(-) diff --git a/doc/src/sgml/datatype.sgml b/doc/src/sgml/datatype.sgml index 6646820d6a..7c28d34866 100644 --- a/doc/src/sgml/datatype.sgml +++ b/doc/src/sgml/datatype.sgml @@ -4483,7 +4483,7 @@ xml '<foo>bar</foo>' <type>xml</type>, uses the function <function>xmlserialize</function>:<indexterm><primary>xmlserialize</primary></indexterm> <synopsis> -XMLSERIALIZE ( { DOCUMENT | CONTENT } <replaceable>value</replaceable> AS <replaceable>type</replaceable> [ [ NO ] INDENT ] ) +XMLSERIALIZE ( { DOCUMENT | CONTENT } <replaceable>value</replaceable> AS <replaceable>type</replaceable> [ { [ NO ] INDENT ] | CANONICAL [ WITH [NO] COMMENTS ]}) </synopsis> <replaceable>type</replaceable> can be <type>character</type>, <type>character varying</type>, or @@ -4500,6 +4500,45 @@ XMLSERIALIZE ( { DOCUMENT | CONTENT } <replaceable>value</replaceable> AS <repla type likewise produces the original string. </para> + <para> + The option <type>CANONICAL</type> converts a given + XML document to its <ulink url="https://www.w3.org/TR/xml-c14n11/#Terminology">canonical form</ulink> + based on the <ulink url="https://www.w3.org/TR/xml-c14n11/">W3C Canonical XML 1.1 Specification</ulink>. + It is basically designed to provide applications the ability to compare xml documents or test if they + have been changed. The optional parameters <type>WITH COMMENTS</type> (which is the default) or + <type>WITH NO COMMENTS</type>, respectively, keep or remove XML comments from the given document. + </para> + + <para> + Example: + +<screen><![CDATA[ +SELECT + xmlserialize(DOCUMENT + '<foo> + <!-- a comment --> + <bar c="3" b="2" a="1">42</bar> + <empty/> + </foo>'::xml AS text CANONICAL); + xmlserialize +----------------------------------------------------------------------------- + <foo><!-- a comment --><bar a="1" b="2" c="3">42</bar><empty></empty></foo> +(1 row) + +SELECT + xmlserialize(DOCUMENT + '<foo> + <!-- a comment --> + <bar c="3" b="2" a="1">42</bar> + <empty/> + </foo>'::xml AS text CANONICAL WITH NO COMMENTS); + xmlserialize +----------------------------------------------------------- + <foo><bar a="1" b="2" c="3">42</bar><empty></empty></foo> +(1 row) + +]]></screen> + </para> <para> When a character string value is cast to or from type <type>xml</type> without going through <type>XMLPARSE</type> or diff --git a/src/backend/executor/execExprInterp.c b/src/backend/executor/execExprInterp.c index 852186312c..f14d7464ef 100644 --- a/src/backend/executor/execExprInterp.c +++ b/src/backend/executor/execExprInterp.c @@ -4053,7 +4053,7 @@ ExecEvalXmlExpr(ExprState *state, ExprEvalStep *op) *op->resvalue = PointerGetDatum(xmltotext_with_options(DatumGetXmlP(value), xexpr->xmloption, - xexpr->indent)); + xexpr->format)); *op->resnull = false; } break; diff --git a/src/backend/parser/gram.y b/src/backend/parser/gram.y index 4d582950b7..ff1caa21f2 100644 --- a/src/backend/parser/gram.y +++ b/src/backend/parser/gram.y @@ -619,12 +619,13 @@ static Node *makeRecursiveViewSelect(char *relname, List *aliases, Node *query); %type <node> xml_root_version opt_xml_root_standalone %type <node> xmlexists_argument %type <ival> document_or_content -%type <boolean> xml_indent_option xml_whitespace_option +%type <boolean> xml_whitespace_option %type <list> xmltable_column_list xmltable_column_option_list %type <node> xmltable_column_el %type <defelt> xmltable_column_option_el %type <list> xml_namespace_list %type <target> xml_namespace_el +%type <ival> opt_xml_serialize_format %type <node> func_application func_expr_common_subexpr %type <node> func_expr func_expr_windowless @@ -712,7 +713,7 @@ static Node *makeRecursiveViewSelect(char *relname, List *aliases, Node *query); BACKWARD BEFORE BEGIN_P BETWEEN BIGINT BINARY BIT BOOLEAN_P BOTH BREADTH BY - CACHE CALL CALLED CASCADE CASCADED CASE CAST CATALOG_P CHAIN CHAR_P + CACHE CALL CALLED CANONICAL CASCADE CASCADED CASE CAST CATALOG_P CHAIN CHAR_P CHARACTER CHARACTERISTICS CHECK CHECKPOINT CLASS CLOSE CLUSTER COALESCE COLLATE COLLATION COLUMN COLUMNS COMMENT COMMENTS COMMIT COMMITTED COMPRESSION CONCURRENTLY CONDITIONAL CONFIGURATION CONFLICT @@ -15965,14 +15966,14 @@ func_expr_common_subexpr: $$ = makeXmlExpr(IS_XMLROOT, NULL, NIL, list_make3($3, $5, $6), @1); } - | XMLSERIALIZE '(' document_or_content a_expr AS SimpleTypename xml_indent_option ')' + | XMLSERIALIZE '(' document_or_content a_expr AS SimpleTypename opt_xml_serialize_format ')' { XmlSerialize *n = makeNode(XmlSerialize); n->xmloption = $3; n->expr = $4; n->typeName = $6; - n->indent = $7; + n->format = $7; n->location = @1; $$ = (Node *) n; } @@ -16192,9 +16193,13 @@ document_or_content: DOCUMENT_P { $$ = XMLOPTION_DOCUMENT; } | CONTENT_P { $$ = XMLOPTION_CONTENT; } ; -xml_indent_option: INDENT { $$ = true; } - | NO INDENT { $$ = false; } - | /*EMPTY*/ { $$ = false; } +opt_xml_serialize_format: + INDENT { $$ = XMLSERIALIZE_INDENT; } + | NO INDENT { $$ = XMLSERIALIZE_NO_FORMAT; } + | CANONICAL { $$ = XMLSERIALIZE_CANONICAL; } + | CANONICAL WITH NO COMMENTS { $$ = XMLSERIALIZE_CANONICAL_WITH_NO_COMMENTS; } + | CANONICAL WITH COMMENTS { $$ = XMLSERIALIZE_CANONICAL; } + | /*EMPTY*/ { $$ = XMLSERIALIZE_NO_FORMAT; } ; xml_whitespace_option: PRESERVE WHITESPACE_P { $$ = true; } @@ -17591,6 +17596,7 @@ unreserved_keyword: | CACHE | CALL | CALLED + | CANONICAL | CASCADE | CASCADED | CATALOG_P @@ -18145,6 +18151,7 @@ bare_label_keyword: | CACHE | CALL | CALLED + | CANONICAL | CASCADE | CASCADED | CASE diff --git a/src/backend/parser/parse_expr.c b/src/backend/parser/parse_expr.c index 00cd7358eb..b28a89f411 100644 --- a/src/backend/parser/parse_expr.c +++ b/src/backend/parser/parse_expr.c @@ -2490,7 +2490,7 @@ transformXmlSerialize(ParseState *pstate, XmlSerialize *xs) typenameTypeIdAndMod(pstate, xs->typeName, &targetType, &targetTypmod); xexpr->xmloption = xs->xmloption; - xexpr->indent = xs->indent; + xexpr->format = xs->format; xexpr->location = xs->location; /* We actually only need these to be able to parse back the expression. */ xexpr->type = targetType; diff --git a/src/backend/utils/adt/xml.c b/src/backend/utils/adt/xml.c index 3e4ca874d8..44738b09d2 100644 --- a/src/backend/utils/adt/xml.c +++ b/src/backend/utils/adt/xml.c @@ -58,6 +58,7 @@ #include <libxml/xmlwriter.h> #include <libxml/xpath.h> #include <libxml/xpathInternals.h> +#include <libxml/c14n.h> /* * We used to check for xmlStructuredErrorContext via a configure test; but @@ -653,7 +654,7 @@ xmltotext(PG_FUNCTION_ARGS) text * -xmltotext_with_options(xmltype *data, XmlOptionType xmloption_arg, bool indent) +xmltotext_with_options(xmltype *data, XmlOptionType xmloption_arg, XmlSerializeFormat format) { #ifdef USE_LIBXML text *volatile result; @@ -666,7 +667,7 @@ xmltotext_with_options(xmltype *data, XmlOptionType xmloption_arg, bool indent) PgXmlErrorContext *xmlerrcxt; #endif - if (xmloption_arg != XMLOPTION_DOCUMENT && !indent) + if (xmloption_arg != XMLOPTION_DOCUMENT && format == XMLSERIALIZE_NO_FORMAT) { /* * We don't actually need to do anything, so just return the @@ -677,10 +678,23 @@ xmltotext_with_options(xmltype *data, XmlOptionType xmloption_arg, bool indent) } #ifdef USE_LIBXML - /* Parse the input according to the xmloption */ - doc = xml_parse(data, xmloption_arg, true, GetDatabaseEncoding(), - &parsed_xmloptiontype, &content_nodes, - (Node *) &escontext); + /* + * Parse the input according to the xmloption. XML canonical expects + * a well-formed XML input, so here in case of XMLSERIALIZE_CANONICAL + * or XMLSERIALIZE_CANONICAL_WITH_NO_COMMENTS we force xml_parse() to parse + * 'data' as XMLOPTION_DOCUMENT despite of the XmlOptionType given in + * 'xmloption_arg'. This enables the canonicalization of CONTENT fragments + * if they contain a singly-rooted XML - xml_parse() will thrown an error + * otherwise. + */ + if(format == XMLSERIALIZE_CANONICAL || format == XMLSERIALIZE_CANONICAL_WITH_NO_COMMENTS) + doc = xml_parse(data, XMLOPTION_DOCUMENT, false, + GetDatabaseEncoding(), NULL, NULL, NULL); + else + doc = xml_parse(data, xmloption_arg, true, GetDatabaseEncoding(), + &parsed_xmloptiontype, &content_nodes, + (Node *) &escontext); + if (doc == NULL || escontext.error_occurred) { if (doc) @@ -692,7 +706,7 @@ xmltotext_with_options(xmltype *data, XmlOptionType xmloption_arg, bool indent) } /* If we weren't asked to indent, we're done. */ - if (!indent) + if (format == XMLSERIALIZE_NO_FORMAT) { xmlFreeDoc(doc); return (text *) data; @@ -701,128 +715,188 @@ xmltotext_with_options(xmltype *data, XmlOptionType xmloption_arg, bool indent) /* Otherwise, we gotta spin up some error handling. */ xmlerrcxt = pg_xml_init(PG_XML_STRICTNESS_ALL); - PG_TRY(); + if(format == XMLSERIALIZE_INDENT) { - size_t decl_len = 0; - - /* The serialized data will go into this buffer. */ - buf = xmlBufferCreate(); - - if (buf == NULL || xmlerrcxt->err_occurred) - xml_ereport(xmlerrcxt, ERROR, ERRCODE_OUT_OF_MEMORY, - "could not allocate xmlBuffer"); - - /* Detect whether there's an XML declaration */ - parse_xml_decl(xml_text2xmlChar(data), &decl_len, NULL, NULL, NULL); - - /* - * Emit declaration only if the input had one. Note: some versions of - * xmlSaveToBuffer leak memory if a non-null encoding argument is - * passed, so don't do that. We don't want any encoding conversion - * anyway. - */ - if (decl_len == 0) - ctxt = xmlSaveToBuffer(buf, NULL, - XML_SAVE_NO_DECL | XML_SAVE_FORMAT); - else - ctxt = xmlSaveToBuffer(buf, NULL, - XML_SAVE_FORMAT); - - if (ctxt == NULL || xmlerrcxt->err_occurred) - xml_ereport(xmlerrcxt, ERROR, ERRCODE_OUT_OF_MEMORY, - "could not allocate xmlSaveCtxt"); - - if (parsed_xmloptiontype == XMLOPTION_DOCUMENT) - { - /* If it's a document, saving is easy. */ - if (xmlSaveDoc(ctxt, doc) == -1 || xmlerrcxt->err_occurred) - xml_ereport(xmlerrcxt, ERROR, ERRCODE_INTERNAL_ERROR, - "could not save document to xmlBuffer"); - } - else if (content_nodes != NULL) + PG_TRY(); { - /* - * Deal with the case where we have non-singly-rooted XML. - * libxml's dump functions don't work well for that without help. - * We build a fake root node that serves as a container for the - * content nodes, and then iterate over the nodes. - */ - xmlNodePtr root; - xmlNodePtr newline; + size_t decl_len = 0; + + /* The serialized data will go into this buffer. */ + buf = xmlBufferCreate(); - root = xmlNewNode(NULL, (const xmlChar *) "content-root"); - if (root == NULL || xmlerrcxt->err_occurred) + if (buf == NULL || xmlerrcxt->err_occurred) xml_ereport(xmlerrcxt, ERROR, ERRCODE_OUT_OF_MEMORY, - "could not allocate xml node"); + "could not allocate xmlBuffer"); - /* This attaches root to doc, so we need not free it separately. */ - xmlDocSetRootElement(doc, root); - xmlAddChild(root, content_nodes); + /* Detect whether there's an XML declaration */ + parse_xml_decl(xml_text2xmlChar(data), &decl_len, NULL, NULL, NULL); /* - * We use this node to insert newlines in the dump. Note: in at - * least some libxml versions, xmlNewDocText would not attach the - * node to the document even if we passed it. Therefore, manage - * freeing of this node manually, and pass NULL here to make sure - * there's not a dangling link. + * Emit declaration only if the input had one. Note: some versions of + * xmlSaveToBuffer leak memory if a non-null encoding argument is + * passed, so don't do that. We don't want any encoding conversion + * anyway. */ - newline = xmlNewDocText(NULL, (const xmlChar *) "\n"); - if (newline == NULL || xmlerrcxt->err_occurred) + if (decl_len == 0) + ctxt = xmlSaveToBuffer(buf, NULL, + XML_SAVE_NO_DECL | XML_SAVE_FORMAT); + else + ctxt = xmlSaveToBuffer(buf, NULL, + XML_SAVE_FORMAT); + + if (ctxt == NULL || xmlerrcxt->err_occurred) xml_ereport(xmlerrcxt, ERROR, ERRCODE_OUT_OF_MEMORY, - "could not allocate xml node"); + "could not allocate xmlSaveCtxt"); - for (xmlNodePtr node = root->children; node; node = node->next) + if (parsed_xmloptiontype == XMLOPTION_DOCUMENT) { - /* insert newlines between nodes */ - if (node->type != XML_TEXT_NODE && node->prev != NULL) + /* If it's a document, saving is easy. */ + if (xmlSaveDoc(ctxt, doc) == -1 || xmlerrcxt->err_occurred) + xml_ereport(xmlerrcxt, ERROR, ERRCODE_INTERNAL_ERROR, + "could not save document to xmlBuffer"); + } + else if (content_nodes != NULL) + { + /* + * Deal with the case where we have non-singly-rooted XML. + * libxml's dump functions don't work well for that without help. + * We build a fake root node that serves as a container for the + * content nodes, and then iterate over the nodes. + */ + xmlNodePtr root; + xmlNodePtr newline; + + root = xmlNewNode(NULL, (const xmlChar *) "content-root"); + if (root == NULL || xmlerrcxt->err_occurred) + xml_ereport(xmlerrcxt, ERROR, ERRCODE_OUT_OF_MEMORY, + "could not allocate xml node"); + + /* This attaches root to doc, so we need not free it separately. */ + xmlDocSetRootElement(doc, root); + xmlAddChild(root, content_nodes); + + /* + * We use this node to insert newlines in the dump. Note: in at + * least some libxml versions, xmlNewDocText would not attach the + * node to the document even if we passed it. Therefore, manage + * freeing of this node manually, and pass NULL here to make sure + * there's not a dangling link. + */ + newline = xmlNewDocText(NULL, (const xmlChar *) "\n"); + if (newline == NULL || xmlerrcxt->err_occurred) + xml_ereport(xmlerrcxt, ERROR, ERRCODE_OUT_OF_MEMORY, + "could not allocate xml node"); + + for (xmlNodePtr node = root->children; node; node = node->next) { - if (xmlSaveTree(ctxt, newline) == -1 || xmlerrcxt->err_occurred) + /* insert newlines between nodes */ + if (node->type != XML_TEXT_NODE && node->prev != NULL) + { + if (xmlSaveTree(ctxt, newline) == -1 || xmlerrcxt->err_occurred) + { + xmlFreeNode(newline); + xml_ereport(xmlerrcxt, ERROR, ERRCODE_INTERNAL_ERROR, + "could not save newline to xmlBuffer"); + } + } + + if (xmlSaveTree(ctxt, node) == -1 || xmlerrcxt->err_occurred) { xmlFreeNode(newline); xml_ereport(xmlerrcxt, ERROR, ERRCODE_INTERNAL_ERROR, - "could not save newline to xmlBuffer"); + "could not save content to xmlBuffer"); } } - if (xmlSaveTree(ctxt, node) == -1 || xmlerrcxt->err_occurred) - { - xmlFreeNode(newline); - xml_ereport(xmlerrcxt, ERROR, ERRCODE_INTERNAL_ERROR, - "could not save content to xmlBuffer"); - } + xmlFreeNode(newline); } - xmlFreeNode(newline); - } + if (xmlSaveClose(ctxt) == -1 || xmlerrcxt->err_occurred) + { + ctxt = NULL; /* don't try to close it again */ + xml_ereport(xmlerrcxt, ERROR, ERRCODE_INTERNAL_ERROR, + "could not close xmlSaveCtxtPtr"); + } - if (xmlSaveClose(ctxt) == -1 || xmlerrcxt->err_occurred) + result = (text *) xmlBuffer_to_xmltype(buf); + } + PG_CATCH(); { - ctxt = NULL; /* don't try to close it again */ - xml_ereport(xmlerrcxt, ERROR, ERRCODE_INTERNAL_ERROR, - "could not close xmlSaveCtxtPtr"); + if (ctxt) + xmlSaveClose(ctxt); + if (buf) + xmlBufferFree(buf); + if (doc) + xmlFreeDoc(doc); + + pg_xml_done(xmlerrcxt, true); + + PG_RE_THROW(); } + PG_END_TRY(); + + xmlBufferFree(buf); + xmlFreeDoc(doc); - result = (text *) xmlBuffer_to_xmltype(buf); + pg_xml_done(xmlerrcxt, false); } - PG_CATCH(); + else if (format == XMLSERIALIZE_CANONICAL || format == XMLSERIALIZE_CANONICAL_WITH_NO_COMMENTS) { - if (ctxt) - xmlSaveClose(ctxt); - if (buf) - xmlBufferFree(buf); - if (doc) - xmlFreeDoc(doc); + xmlChar *xmlbuf = NULL; + int nbytes; + int with_comments = 0; /* 0 = no xml comments (default) */ - pg_xml_done(xmlerrcxt, true); + PG_TRY(); + { + /* 1 = keeps xml comments */ + if (format == XMLSERIALIZE_CANONICAL) + with_comments = 1; - PG_RE_THROW(); - } - PG_END_TRY(); + if (doc == NULL || escontext.error_occurred) + { + if (doc) + xmlFreeDoc(doc); + /* A soft error must be failure to conform to XMLOPTION_DOCUMENT */ + ereport(ERROR, + (errcode(ERRCODE_NOT_AN_XML_DOCUMENT), + errmsg("not an XML document"))); + } - xmlBufferFree(buf); - xmlFreeDoc(doc); + /* + * This dumps the canonicalized XML doc into the xmlChar* buffer. + * mode = 2 means the doc will be canonicalized using the C14N 1.1 standard. + */ + nbytes = xmlC14NDocDumpMemory(doc, NULL, 2, NULL, with_comments, &xmlbuf); - pg_xml_done(xmlerrcxt, false); + if(nbytes < 0 || escontext.error_occurred) + ereport(ERROR, + (errcode(ERRCODE_INTERNAL_ERROR), + errmsg("could not canonicalize the given XML document"))); + + result = cstring_to_text_with_len((const char *) xmlbuf, nbytes); + } + PG_CATCH(); + { + if (ctxt) + xmlSaveClose(ctxt); + if (xmlbuf) + xmlFree(xmlbuf); + if (doc) + xmlFreeDoc(doc); + + pg_xml_done(xmlerrcxt, true); + + PG_RE_THROW(); + } + PG_END_TRY(); + + xmlFreeDoc(doc); + xmlFree(xmlbuf); + + pg_xml_done(xmlerrcxt, false); + } + else + elog(ERROR,"invalid xmlserialize option"); return result; #else diff --git a/src/include/nodes/parsenodes.h b/src/include/nodes/parsenodes.h index 85a62b538e..47bffb7cd1 100644 --- a/src/include/nodes/parsenodes.h +++ b/src/include/nodes/parsenodes.h @@ -846,6 +846,7 @@ typedef struct XmlSerialize Node *expr; TypeName *typeName; bool indent; /* [NO] INDENT */ + XmlSerializeFormat format; /* serialization format */ ParseLoc location; /* token location, or -1 if unknown */ } XmlSerialize; diff --git a/src/include/nodes/primnodes.h b/src/include/nodes/primnodes.h index 4830efc573..236ace6857 100644 --- a/src/include/nodes/primnodes.h +++ b/src/include/nodes/primnodes.h @@ -1593,6 +1593,14 @@ typedef enum XmlOptionType XMLOPTION_CONTENT, } XmlOptionType; +typedef enum XmlSerializeFormat +{ + XMLSERIALIZE_INDENT, /* pretty-printed xml serialization */ + XMLSERIALIZE_CANONICAL, /* canonical form with xml comments */ + XMLSERIALIZE_CANONICAL_WITH_NO_COMMENTS, /* canonical form without xml comments */ + XMLSERIALIZE_NO_FORMAT /* unformatted xml representation */ +} XmlSerializeFormat; + typedef struct XmlExpr { Expr xpr; @@ -1615,6 +1623,8 @@ typedef struct XmlExpr int32 typmod pg_node_attr(query_jumble_ignore); /* token location, or -1 if unknown */ ParseLoc location; + /* serialization format: XMLCANONICAL, XMLCANONICAL_WITH_COMMENTS, XMLINDENT */ + XmlSerializeFormat format pg_node_attr(query_jumble_ignore); } XmlExpr; /* diff --git a/src/include/parser/kwlist.h b/src/include/parser/kwlist.h index f7fe834cf4..68d939d754 100644 --- a/src/include/parser/kwlist.h +++ b/src/include/parser/kwlist.h @@ -68,6 +68,7 @@ PG_KEYWORD("by", BY, UNRESERVED_KEYWORD, BARE_LABEL) PG_KEYWORD("cache", CACHE, UNRESERVED_KEYWORD, BARE_LABEL) PG_KEYWORD("call", CALL, UNRESERVED_KEYWORD, BARE_LABEL) PG_KEYWORD("called", CALLED, UNRESERVED_KEYWORD, BARE_LABEL) +PG_KEYWORD("canonical", CANONICAL, UNRESERVED_KEYWORD, BARE_LABEL) PG_KEYWORD("cascade", CASCADE, UNRESERVED_KEYWORD, BARE_LABEL) PG_KEYWORD("cascaded", CASCADED, UNRESERVED_KEYWORD, BARE_LABEL) PG_KEYWORD("case", CASE, RESERVED_KEYWORD, BARE_LABEL) diff --git a/src/include/utils/xml.h b/src/include/utils/xml.h index ed20e21375..05f11c0517 100644 --- a/src/include/utils/xml.h +++ b/src/include/utils/xml.h @@ -78,7 +78,7 @@ extern xmltype *xmlpi(const char *target, text *arg, bool arg_is_null, bool *res extern xmltype *xmlroot(xmltype *data, text *version, int standalone); extern bool xml_is_document(xmltype *arg); extern text *xmltotext_with_options(xmltype *data, XmlOptionType xmloption_arg, - bool indent); + XmlSerializeFormat format); extern char *escape_xml(const char *str); extern char *map_sql_identifier_to_xml_name(const char *ident, bool fully_escaped, bool escape_period); diff --git a/src/test/regress/expected/xml.out b/src/test/regress/expected/xml.out index 6500cff885..3f9fce8a77 100644 --- a/src/test/regress/expected/xml.out +++ b/src/test/regress/expected/xml.out @@ -672,6 +672,120 @@ SELECT xmlserialize(CONTENT '<foo><bar><val x="y">42</val></bar></foo>' AS text t (1 row) +-- xmlserialize: canonical +CREATE TABLE xmltest_serialize (id int, doc xml); +INSERT INTO xmltest_serialize VALUES + (1,'<?xml version="1.0" encoding="ISO-8859-1"?> + <!DOCTYPE doc SYSTEM "doc.dtd" [ + <!ENTITY val "42"> + <!ATTLIST xyz attr CDATA "default"> + ]> + + <!-- attributes and namespces will be sorted --> + <foo a:attr="out" b:attr="sorted" attr2="all" attr="I am" + xmlns:b="http://www.ietf.org" + xmlns:a="http://www.w3.org" + xmlns="http://example.org"> + + <!-- Normalization of whitespace in start and end tags --> + <!-- Elimination of superfluous namespace declarations, as already declared in <foo> --> + <bar xmlns="" xmlns:a="http://www.w3.org" >&val;</bar > + + <!-- empty element will be converted to start-end tag pair --> + <empty/> + + <!-- text will be transcoded to UTF-8 --> + <transcode>1</transcode> + + <!-- default attribute will be added --> + <!-- whitespace inside tag will be preserved --> + <whitespace> 321 </whitespace> + + <!-- empty namespace will be removed of child tag --> + <emptyns xmlns="" > + <emptyns_child xmlns=""></emptyns_child> + </emptyns> + + <!-- CDATA section will be replaced by its value --> + <compute><![CDATA[value>"0" && value<"10" ?"valid":"error"]]></compute> + </foo> + <!-- comment outside doc -->'::xml), + (2,'<foo> + <bar> + <!-- important comment --> + <val x="y">42</val> + </bar> + </foo> '::xml); +SELECT xmlserialize(DOCUMENT doc AS text CANONICAL) FROM xmltest_serialize WHERE id = 1; + xmlserialize +------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------ + <!-- attributes and namespces will be sorted --> + + <foo xmlns="http://example.org" xmlns:a="http://www.w3.org" xmlns:b="http://www.ietf.org" attr="I am" attr2="all" b:attr="sorted" a:attr="out"><!-- Normalization of whitespace in start and end tags --><!-- Elimination of superfluous namespace declarations, as already declared in <foo> --><bar xmlns="">42</bar><!-- empty element will be converted to start-end tag pair --><empty></empty><!-- text will be transcoded to UTF-8 --><transcode>1</transcode><!-- default attribute will be added --><!-- whitespace inside tag will be preserved --><whitespace> 321 </whitespace><!-- empty namespace will be removed of child tag --><emptyns xmlns=""><emptyns_child></emptyns_child></emptyns><!-- CDATA section will be replaced by its value --><compute>value>"0" && value<"10" ?"valid":"error"</compute></foo>+ + <!-- comment outside doc --> +(1 row) + +SELECT xmlserialize(DOCUMENT doc AS text CANONICAL WITH COMMENTS) FROM xmltest_serialize WHERE id = 2; + xmlserialize +--------------------------------------------------------------------- + <foo><bar><!-- important comment --><val x="y">42</val></bar></foo> +(1 row) + +SELECT xmlserialize(DOCUMENT doc AS text CANONICAL) = xmlserialize(DOCUMENT doc AS text CANONICAL WITH COMMENTS) FROM xmltest_serialize; + ?column? +---------- + t + t +(2 rows) + +SELECT xmlserialize(CONTENT doc AS text CANONICAL WITH NO COMMENTS) FROM xmltest_serialize WHERE id = 1; + xmlserialize +-------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------- + <foo xmlns="http://example.org" xmlns:a="http://www.w3.org" xmlns:b="http://www.ietf.org" attr="I am" attr2="all" b:attr="sorted" a:attr="out"><bar xmlns="">42</bar><empty></empty><transcode>1</transcode><whitespace> 321 </whitespace><emptyns xmlns=""><emptyns_child></emptyns_child></emptyns><compute>value>"0" && value<"10" ?"valid":"error"</compute></foo> +(1 row) + +SELECT xmlserialize(CONTENT doc AS text CANONICAL WITH COMMENTS) FROM xmltest_serialize WHERE id = 2; + xmlserialize +--------------------------------------------------------------------- + <foo><bar><!-- important comment --><val x="y">42</val></bar></foo> +(1 row) + +SELECT xmlserialize(CONTENT doc AS text CANONICAL) = xmlserialize(CONTENT doc AS text CANONICAL WITH COMMENTS) FROM xmltest_serialize; + ?column? +---------- + t + t +(2 rows) + +SELECT xmlserialize(DOCUMENT NULL AS text CANONICAL); + xmlserialize +-------------- + +(1 row) + +SELECT xmlserialize(CONTENT NULL AS text CANONICAL); + xmlserialize +-------------- + +(1 row) + +\set VERBOSITY terse +SELECT xmlserialize(DOCUMENT '' AS text CANONICAL); +ERROR: invalid XML document +SELECT xmlserialize(DOCUMENT ' ' AS text CANONICAL); +ERROR: invalid XML document +SELECT xmlserialize(DOCUMENT 'foo' AS text CANONICAL); +ERROR: invalid XML document +SELECT xmlserialize(CONTENT '' AS text CANONICAL); +ERROR: invalid XML document +SELECT xmlserialize(CONTENT ' ' AS text CANONICAL); +ERROR: invalid XML document +SELECT xmlserialize(CONTENT 'foo' AS text CANONICAL); +ERROR: invalid XML document +SELECT xmlserialize(DOCUMENT '<foo><bar>73</bar></foo>' AS text CANONICAL INDENT); +ERROR: syntax error at or near "INDENT" at character 75 +SELECT xmlserialize(CONTENT '<foo><bar>73</bar></foo>' AS text CANONICAL INDENT); +ERROR: syntax error at or near "INDENT" at character 74 +\set VERBOSITY default SELECT xml '<foo>bar</foo>' IS DOCUMENT; ?column? ---------- diff --git a/src/test/regress/expected/xml_1.out b/src/test/regress/expected/xml_1.out index 9323b84ae2..6a76a4fd9a 100644 --- a/src/test/regress/expected/xml_1.out +++ b/src/test/regress/expected/xml_1.out @@ -443,6 +443,114 @@ ERROR: unsupported XML feature LINE 1: SELECT xmlserialize(CONTENT '<foo><bar><val x="y">42</val><... ^ DETAIL: This functionality requires the server to be built with libxml support. +-- xmlserialize: canonical +CREATE TABLE xmltest_serialize (id int, doc xml); +INSERT INTO xmltest_serialize VALUES + (1,'<?xml version="1.0" encoding="ISO-8859-1"?> + <!DOCTYPE doc SYSTEM "doc.dtd" [ + <!ENTITY val "42"> + <!ATTLIST xyz attr CDATA "default"> + ]> + + <!-- attributes and namespces will be sorted --> + <foo a:attr="out" b:attr="sorted" attr2="all" attr="I am" + xmlns:b="http://www.ietf.org" + xmlns:a="http://www.w3.org" + xmlns="http://example.org"> + + <!-- Normalization of whitespace in start and end tags --> + <!-- Elimination of superfluous namespace declarations, as already declared in <foo> --> + <bar xmlns="" xmlns:a="http://www.w3.org" >&val;</bar > + + <!-- empty element will be converted to start-end tag pair --> + <empty/> + + <!-- text will be transcoded to UTF-8 --> + <transcode>1</transcode> + + <!-- default attribute will be added --> + <!-- whitespace inside tag will be preserved --> + <whitespace> 321 </whitespace> + + <!-- empty namespace will be removed of child tag --> + <emptyns xmlns="" > + <emptyns_child xmlns=""></emptyns_child> + </emptyns> + + <!-- CDATA section will be replaced by its value --> + <compute><![CDATA[value>"0" && value<"10" ?"valid":"error"]]></compute> + </foo> + <!-- comment outside doc -->'::xml), + (2,'<foo> + <bar> + <!-- important comment --> + <val x="y">42</val> + </bar> + </foo> '::xml); +ERROR: unsupported XML feature +LINE 2: (1,'<?xml version="1.0" encoding="ISO-8859-1"?> + ^ +DETAIL: This functionality requires the server to be built with libxml support. +SELECT xmlserialize(DOCUMENT doc AS text CANONICAL) FROM xmltest_serialize WHERE id = 1; + xmlserialize +-------------- +(0 rows) + +SELECT xmlserialize(DOCUMENT doc AS text CANONICAL WITH COMMENTS) FROM xmltest_serialize WHERE id = 2; + xmlserialize +-------------- +(0 rows) + +SELECT xmlserialize(DOCUMENT doc AS text CANONICAL) = xmlserialize(DOCUMENT doc AS text CANONICAL WITH COMMENTS) FROM xmltest_serialize; + ?column? +---------- +(0 rows) + +SELECT xmlserialize(CONTENT doc AS text CANONICAL WITH NO COMMENTS) FROM xmltest_serialize WHERE id = 1; + xmlserialize +-------------- +(0 rows) + +SELECT xmlserialize(CONTENT doc AS text CANONICAL WITH COMMENTS) FROM xmltest_serialize WHERE id = 2; + xmlserialize +-------------- +(0 rows) + +SELECT xmlserialize(CONTENT doc AS text CANONICAL) = xmlserialize(CONTENT doc AS text CANONICAL WITH COMMENTS) FROM xmltest_serialize; + ?column? +---------- +(0 rows) + +SELECT xmlserialize(DOCUMENT NULL AS text CANONICAL); + xmlserialize +-------------- + +(1 row) + +SELECT xmlserialize(CONTENT NULL AS text CANONICAL); + xmlserialize +-------------- + +(1 row) + +\set VERBOSITY terse +SELECT xmlserialize(DOCUMENT '' AS text CANONICAL); +ERROR: unsupported XML feature at character 30 +SELECT xmlserialize(DOCUMENT ' ' AS text CANONICAL); +ERROR: unsupported XML feature at character 30 +SELECT xmlserialize(DOCUMENT 'foo' AS text CANONICAL); +ERROR: unsupported XML feature at character 30 +SELECT xmlserialize(CONTENT '' AS text CANONICAL); +ERROR: unsupported XML feature at character 29 +SELECT xmlserialize(CONTENT ' ' AS text CANONICAL); +ERROR: unsupported XML feature at character 29 +SELECT xmlserialize(CONTENT 'foo' AS text CANONICAL); +ERROR: unsupported XML feature at character 29 +SELECT xmlserialize(DOCUMENT '<foo><bar>73</bar></foo>' AS text CANONICAL INDENT); +ERROR: syntax error at or near "INDENT" at character 75 +SELECT xmlserialize(CONTENT '<foo><bar>73</bar></foo>' AS text CANONICAL INDENT); +ERROR: syntax error at or near "INDENT" at character 74 +\set VERBOSITY default SELECT xml '<foo>bar</foo>' IS DOCUMENT; ERROR: unsupported XML feature LINE 1: SELECT xml '<foo>bar</foo>' IS DOCUMENT; diff --git a/src/test/regress/expected/xml_2.out b/src/test/regress/expected/xml_2.out index e1d165c6c9..b47fb44b7a 100644 --- a/src/test/regress/expected/xml_2.out +++ b/src/test/regress/expected/xml_2.out @@ -652,6 +652,120 @@ SELECT xmlserialize(CONTENT '<foo><bar><val x="y">42</val></bar></foo>' AS text t (1 row) +-- xmlserialize: canonical +CREATE TABLE xmltest_serialize (id int, doc xml); +INSERT INTO xmltest_serialize VALUES + (1,'<?xml version="1.0" encoding="ISO-8859-1"?> + <!DOCTYPE doc SYSTEM "doc.dtd" [ + <!ENTITY val "42"> + <!ATTLIST xyz attr CDATA "default"> + ]> + + <!-- attributes and namespces will be sorted --> + <foo a:attr="out" b:attr="sorted" attr2="all" attr="I am" + xmlns:b="http://www.ietf.org" + xmlns:a="http://www.w3.org" + xmlns="http://example.org"> + + <!-- Normalization of whitespace in start and end tags --> + <!-- Elimination of superfluous namespace declarations, as already declared in <foo> --> + <bar xmlns="" xmlns:a="http://www.w3.org" >&val;</bar > + + <!-- empty element will be converted to start-end tag pair --> + <empty/> + + <!-- text will be transcoded to UTF-8 --> + <transcode>1</transcode> + + <!-- default attribute will be added --> + <!-- whitespace inside tag will be preserved --> + <whitespace> 321 </whitespace> + + <!-- empty namespace will be removed of child tag --> + <emptyns xmlns="" > + <emptyns_child xmlns=""></emptyns_child> + </emptyns> + + <!-- CDATA section will be replaced by its value --> + <compute><![CDATA[value>"0" && value<"10" ?"valid":"error"]]></compute> + </foo> + <!-- comment outside doc -->'::xml), + (2,'<foo> + <bar> + <!-- important comment --> + <val x="y">42</val> + </bar> + </foo> '::xml); +SELECT xmlserialize(DOCUMENT doc AS text CANONICAL) FROM xmltest_serialize WHERE id = 1; + xmlserialize +------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------ + <!-- attributes and namespces will be sorted --> + + <foo xmlns="http://example.org" xmlns:a="http://www.w3.org" xmlns:b="http://www.ietf.org" attr="I am" attr2="all" b:attr="sorted" a:attr="out"><!-- Normalization of whitespace in start and end tags --><!-- Elimination of superfluous namespace declarations, as already declared in <foo> --><bar xmlns="">42</bar><!-- empty element will be converted to start-end tag pair --><empty></empty><!-- text will be transcoded to UTF-8 --><transcode>1</transcode><!-- default attribute will be added --><!-- whitespace inside tag will be preserved --><whitespace> 321 </whitespace><!-- empty namespace will be removed of child tag --><emptyns xmlns=""><emptyns_child></emptyns_child></emptyns><!-- CDATA section will be replaced by its value --><compute>value>"0" && value<"10" ?"valid":"error"</compute></foo>+ + <!-- comment outside doc --> +(1 row) + +SELECT xmlserialize(DOCUMENT doc AS text CANONICAL WITH COMMENTS) FROM xmltest_serialize WHERE id = 2; + xmlserialize +--------------------------------------------------------------------- + <foo><bar><!-- important comment --><val x="y">42</val></bar></foo> +(1 row) + +SELECT xmlserialize(DOCUMENT doc AS text CANONICAL) = xmlserialize(DOCUMENT doc AS text CANONICAL WITH COMMENTS) FROM xmltest_serialize; + ?column? +---------- + t + t +(2 rows) + +SELECT xmlserialize(CONTENT doc AS text CANONICAL WITH NO COMMENTS) FROM xmltest_serialize WHERE id = 1; + xmlserialize +-------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------- + <foo xmlns="http://example.org" xmlns:a="http://www.w3.org" xmlns:b="http://www.ietf.org" attr="I am" attr2="all" b:attr="sorted" a:attr="out"><bar xmlns="">42</bar><empty></empty><transcode>1</transcode><whitespace> 321 </whitespace><emptyns xmlns=""><emptyns_child></emptyns_child></emptyns><compute>value>"0" && value<"10" ?"valid":"error"</compute></foo> +(1 row) + +SELECT xmlserialize(CONTENT doc AS text CANONICAL WITH COMMENTS) FROM xmltest_serialize WHERE id = 2; + xmlserialize +--------------------------------------------------------------------- + <foo><bar><!-- important comment --><val x="y">42</val></bar></foo> +(1 row) + +SELECT xmlserialize(CONTENT doc AS text CANONICAL) = xmlserialize(CONTENT doc AS text CANONICAL WITH COMMENTS) FROM xmltest_serialize; + ?column? +---------- + t + t +(2 rows) + +SELECT xmlserialize(DOCUMENT NULL AS text CANONICAL); + xmlserialize +-------------- + +(1 row) + +SELECT xmlserialize(CONTENT NULL AS text CANONICAL); + xmlserialize +-------------- + +(1 row) + +\set VERBOSITY terse +SELECT xmlserialize(DOCUMENT '' AS text CANONICAL); +ERROR: invalid XML document +SELECT xmlserialize(DOCUMENT ' ' AS text CANONICAL); +ERROR: invalid XML document +SELECT xmlserialize(DOCUMENT 'foo' AS text CANONICAL); +ERROR: invalid XML document +SELECT xmlserialize(CONTENT '' AS text CANONICAL); +ERROR: invalid XML document +SELECT xmlserialize(CONTENT ' ' AS text CANONICAL); +ERROR: invalid XML document +SELECT xmlserialize(CONTENT 'foo' AS text CANONICAL); +ERROR: invalid XML document +SELECT xmlserialize(DOCUMENT '<foo><bar>73</bar></foo>' AS text CANONICAL INDENT); +ERROR: syntax error at or near "INDENT" at character 75 +SELECT xmlserialize(CONTENT '<foo><bar>73</bar></foo>' AS text CANONICAL INDENT); +ERROR: syntax error at or near "INDENT" at character 74 +\set VERBOSITY default SELECT xml '<foo>bar</foo>' IS DOCUMENT; ?column? ---------- diff --git a/src/test/regress/sql/xml.sql b/src/test/regress/sql/xml.sql index 953bac09e4..a2f1c3566d 100644 --- a/src/test/regress/sql/xml.sql +++ b/src/test/regress/sql/xml.sql @@ -168,6 +168,69 @@ SELECT xmlserialize(CONTENT '<foo><bar></bar></foo>' AS text INDENT); -- 'no indent' = not using 'no indent' SELECT xmlserialize(DOCUMENT '<foo><bar><val x="y">42</val></bar></foo>' AS text) = xmlserialize(DOCUMENT '<foo><bar><val x="y">42</val></bar></foo>' AS text NO INDENT); SELECT xmlserialize(CONTENT '<foo><bar><val x="y">42</val></bar></foo>' AS text) = xmlserialize(CONTENT '<foo><bar><val x="y">42</val></bar></foo>' AS text NO INDENT); +-- xmlserialize: canonical +CREATE TABLE xmltest_serialize (id int, doc xml); +INSERT INTO xmltest_serialize VALUES + (1,'<?xml version="1.0" encoding="ISO-8859-1"?> + <!DOCTYPE doc SYSTEM "doc.dtd" [ + <!ENTITY val "42"> + <!ATTLIST xyz attr CDATA "default"> + ]> + + <!-- attributes and namespces will be sorted --> + <foo a:attr="out" b:attr="sorted" attr2="all" attr="I am" + xmlns:b="http://www.ietf.org" + xmlns:a="http://www.w3.org" + xmlns="http://example.org"> + + <!-- Normalization of whitespace in start and end tags --> + <!-- Elimination of superfluous namespace declarations, as already declared in <foo> --> + <bar xmlns="" xmlns:a="http://www.w3.org" >&val;</bar > + + <!-- empty element will be converted to start-end tag pair --> + <empty/> + + <!-- text will be transcoded to UTF-8 --> + <transcode>1</transcode> + + <!-- default attribute will be added --> + <!-- whitespace inside tag will be preserved --> + <whitespace> 321 </whitespace> + + <!-- empty namespace will be removed of child tag --> + <emptyns xmlns="" > + <emptyns_child xmlns=""></emptyns_child> + </emptyns> + + <!-- CDATA section will be replaced by its value --> + <compute><![CDATA[value>"0" && value<"10" ?"valid":"error"]]></compute> + </foo> + <!-- comment outside doc -->'::xml), + (2,'<foo> + <bar> + <!-- important comment --> + <val x="y">42</val> + </bar> + </foo> '::xml); + +SELECT xmlserialize(DOCUMENT doc AS text CANONICAL) FROM xmltest_serialize WHERE id = 1; +SELECT xmlserialize(DOCUMENT doc AS text CANONICAL WITH COMMENTS) FROM xmltest_serialize WHERE id = 2; +SELECT xmlserialize(DOCUMENT doc AS text CANONICAL) = xmlserialize(DOCUMENT doc AS text CANONICAL WITH COMMENTS) FROM xmltest_serialize; +SELECT xmlserialize(CONTENT doc AS text CANONICAL WITH NO COMMENTS) FROM xmltest_serialize WHERE id = 1; +SELECT xmlserialize(CONTENT doc AS text CANONICAL WITH COMMENTS) FROM xmltest_serialize WHERE id = 2; +SELECT xmlserialize(CONTENT doc AS text CANONICAL) = xmlserialize(CONTENT doc AS text CANONICAL WITH COMMENTS) FROM xmltest_serialize; +SELECT xmlserialize(DOCUMENT NULL AS text CANONICAL); +SELECT xmlserialize(CONTENT NULL AS text CANONICAL); +\set VERBOSITY terse +SELECT xmlserialize(DOCUMENT '' AS text CANONICAL); +SELECT xmlserialize(DOCUMENT ' ' AS text CANONICAL); +SELECT xmlserialize(DOCUMENT 'foo' AS text CANONICAL); +SELECT xmlserialize(CONTENT '' AS text CANONICAL); +SELECT xmlserialize(CONTENT ' ' AS text CANONICAL); +SELECT xmlserialize(CONTENT 'foo' AS text CANONICAL); +SELECT xmlserialize(DOCUMENT '<foo><bar>73</bar></foo>' AS text CANONICAL INDENT); +SELECT xmlserialize(CONTENT '<foo><bar>73</bar></foo>' AS text CANONICAL INDENT); +\set VERBOSITY default SELECT xml '<foo>bar</foo>' IS DOCUMENT; SELECT xml '<foo>bar</foo><bar>foo</bar>' IS DOCUMENT; diff --git a/src/tools/pgindent/typedefs.list b/src/tools/pgindent/typedefs.list index 61ad417cde..df763d7fa5 100644 --- a/src/tools/pgindent/typedefs.list +++ b/src/tools/pgindent/typedefs.list @@ -3225,6 +3225,7 @@ XmlExpr XmlExprOp XmlOptionType XmlSerialize +XmlSerializeFormat XmlTableBuilderData YYLTYPE YYSTYPE -- 2.34.1