On 2025-07-24 05:12 +0200, Michael Paquier wrote:
> Switching back to the previous code, where we rely on
> xmlParseBalancedChunkMemory() fixes the issue.  A quick POC is
> attached.  It fails one case in check-world with SERIALIZE because I
> am not sure it is possible to pass down some options through
> xmlParseBalancedChunkMemory(), still the regression is gone, and I am
> wondering if there is not a better solution to be able to dodge the
> original problem and still accept this case.

The whitespace can be preserved by setting xmlKeepBlanksDefault before
parsing.  See attached v2.  That function is deprecated, though.  But
libxml2 uses thread-local globals, so it should be safe.  Other than
that, I see no other way to set XML_PARSE_NOBLANKS with
xmlParseBalancedChunkMemory.

[1] 
https://gitlab.gnome.org/GNOME/libxml2/-/blob/408bd0e18e6ddba5d18e51d52da0f7b3ca1b4421/parserInternals.c#L2833

-- 
Erik Wienhold
>From ef277b72586c9df575c78df1eddf6b481fcfaad2 Mon Sep 17 00:00:00 2001
From: Michael Paquier <mich...@paquier.xyz>
Date: Thu, 24 Jul 2025 12:01:52 +0900
Subject: [PATCH v2] Fix xml2 regression

---
 src/backend/utils/adt/xml.c | 32 ++++++++++----------------------
 1 file changed, 10 insertions(+), 22 deletions(-)

diff --git a/src/backend/utils/adt/xml.c b/src/backend/utils/adt/xml.c
index f7b731825fc..e405c042c4f 100644
--- a/src/backend/utils/adt/xml.c
+++ b/src/backend/utils/adt/xml.c
@@ -1795,6 +1795,7 @@ xml_parse(text *data, XmlOptionType xmloption_arg,
        PgXmlErrorContext *xmlerrcxt;
        volatile xmlParserCtxtPtr ctxt = NULL;
        volatile xmlDocPtr doc = NULL;
+       int old_keep_blanks = -1;
 
        /*
         * This step looks annoyingly redundant, but we must do it to have a
@@ -1900,9 +1901,6 @@ xml_parse(text *data, XmlOptionType xmloption_arg,
                }
                else
                {
-                       xmlNodePtr      root;
-                       xmlNodePtr      oldroot PG_USED_FOR_ASSERTS_ONLY;
-
                        /* set up document with empty root node to be the 
context node */
                        doc = xmlNewDoc(version);
                        if (doc == NULL || xmlerrcxt->err_occurred)
@@ -1916,31 +1914,18 @@ xml_parse(text *data, XmlOptionType xmloption_arg,
                                                        "could not allocate XML 
document");
                        doc->standalone = standalone;
 
-                       root = xmlNewNode(NULL, (const xmlChar *) 
"content-root");
-                       if (root == NULL || xmlerrcxt->err_occurred)
-                               xml_ereport(xmlerrcxt, ERROR, 
ERRCODE_OUT_OF_MEMORY,
-                                                       "could not allocate xml 
node");
-
-                       /*
-                        * This attaches root to doc, so we need not free it 
separately;
-                        * and there can't yet be any old root to free.
-                        */
-                       oldroot = xmlDocSetRootElement(doc, root);
-                       Assert(oldroot == NULL);
-
                        /* allow empty content */
                        if (*(utf8string + count))
                        {
                                xmlNodePtr      node_list = NULL;
-                               xmlParserErrors res;
 
-                               res = xmlParseInNodeContext(root,
-                                                                               
        (char *) utf8string + count,
-                                                                               
        strlen((char *) utf8string + count),
-                                                                               
        options,
-                                                                               
        &node_list);
+                               old_keep_blanks = 
xmlKeepBlanksDefault(preserve_whitespace ? 1 : 0);
 
-                               if (res != XML_ERR_OK || 
xmlerrcxt->err_occurred)
+                               res_code = xmlParseBalancedChunkMemory(doc, 
NULL, NULL, 0,
+                                                                               
                           utf8string + count,
+                                                                               
                           &node_list);
+                               elog(NOTICE, "res_code = %d  err_occurred = 
%d", res_code, xmlerrcxt->err_occurred);
+                               if (res_code != 0 || xmlerrcxt->err_occurred)
                                {
                                        xmlFreeNodeList(node_list);
                                        xml_errsave(escontext, xmlerrcxt,
@@ -1975,6 +1960,9 @@ fail:
        if (ctxt != NULL)
                xmlFreeParserCtxt(ctxt);
 
+       if (old_keep_blanks != -1)
+               xmlKeepBlanksDefault(old_keep_blanks);
+
        pg_xml_done(xmlerrcxt, false);
 
        return doc;
-- 
2.50.1

Reply via email to