sw/qa/extras/indexing/IndexingExportTest.cxx |  167 ++++++++++++++-------------
 sw/source/filter/indexing/IndexingExport.cxx |   28 ++--
 2 files changed, 109 insertions(+), 86 deletions(-)

New commits:
commit 50f0e8c7880122a05585a2233f6f35d0dfee0385
Author:     Tomaž Vajngerl <tomaz.vajng...@collabora.co.uk>
AuthorDate: Sun Aug 22 11:43:10 2021 +0900
Commit:     Tomaž Vajngerl <qui...@gmail.com>
CommitDate: Mon Aug 23 02:51:23 2021 +0200

    indexing: make indexing XML flat and use simple element names
    
    This changes the indexing XML to be flat-er and change the element
    names to just be either "paragraph" or an "object", where an
    "object" then has an attribute what type exactly it is.
    
    This makes converting the XML to an indexing engine accepted
    format easier.
    
    Change-Id: Ia8941cc9616a862c1bc980efea5ba2548217644e
    Reviewed-on: https://gerrit.libreoffice.org/c/core/+/120836
    Tested-by: Jenkins
    Reviewed-by: Tomaž Vajngerl <qui...@gmail.com>

diff --git a/sw/qa/extras/indexing/IndexingExportTest.cxx 
b/sw/qa/extras/indexing/IndexingExportTest.cxx
index 346ff783d2c4..9d40d887f30d 100644
--- a/sw/qa/extras/indexing/IndexingExportTest.cxx
+++ b/sw/qa/extras/indexing/IndexingExportTest.cxx
@@ -107,10 +107,12 @@ void IndexingExportTest::testIndexingExport_Images()
     CPPUNIT_ASSERT(pXmlDoc);
 
     assertXPath(pXmlDoc, "/indexing");
-    assertXPath(pXmlDoc, "/indexing/graphic[1]", "alt", "Image_NonCaption - 
Alternative text");
-    assertXPath(pXmlDoc, "/indexing/graphic[1]", "name", "Image_NonCaption");
-    assertXPath(pXmlDoc, "/indexing/graphic[2]", "alt", "Image_InCaption - 
Alternative text");
-    assertXPath(pXmlDoc, "/indexing/graphic[2]", "name", "Image_InCaption");
+    assertXPath(pXmlDoc, "/indexing/object[1]", "alt", "Image_NonCaption - 
Alternative text");
+    assertXPath(pXmlDoc, "/indexing/object[1]", "name", "Image_NonCaption");
+    assertXPath(pXmlDoc, "/indexing/object[1]", "type", "graphic");
+    assertXPath(pXmlDoc, "/indexing/object[2]", "alt", "Image_InCaption - 
Alternative text");
+    assertXPath(pXmlDoc, "/indexing/object[2]", "name", "Image_InCaption");
+    assertXPath(pXmlDoc, "/indexing/object[2]", "type", "graphic");
 }
 
 void IndexingExportTest::testIndexingExport_OLE()
@@ -127,8 +129,9 @@ void IndexingExportTest::testIndexingExport_OLE()
     CPPUNIT_ASSERT(pXmlDoc);
 
     assertXPath(pXmlDoc, "/indexing");
-    assertXPath(pXmlDoc, "/indexing/ole[1]", "name", "Object - Chart");
-    assertXPath(pXmlDoc, "/indexing/ole[1]", "alt", "Alt Text");
+    assertXPath(pXmlDoc, "/indexing/object[1]", "name", "Object - Chart");
+    assertXPath(pXmlDoc, "/indexing/object[1]", "alt", "Alt Text");
+    assertXPath(pXmlDoc, "/indexing/object[1]", "type", "ole");
 }
 
 void IndexingExportTest::testIndexingExport_Shapes()
@@ -145,18 +148,22 @@ void IndexingExportTest::testIndexingExport_Shapes()
     CPPUNIT_ASSERT(pXmlDoc);
 
     assertXPath(pXmlDoc, "/indexing");
-
-    assertXPath(pXmlDoc, "/indexing/shape[1]", "name", "Circle");
-    assertXPathContent(pXmlDoc, "/indexing/shape[1]/paragraph[1]", "This is a 
circle");
-    assertXPathContent(pXmlDoc, "/indexing/shape[1]/paragraph[2]", "This is a 
second paragraph");
-
-    assertXPath(pXmlDoc, "/indexing/shape[2]", "name", "Diamond");
-    assertXPathContent(pXmlDoc, "/indexing/shape[2]/paragraph[1]", "This is a 
diamond");
-
-    assertXPath(pXmlDoc, "/indexing/shape[3]", "name", "Text Frame 1");
-    assertXPathContent(pXmlDoc, "/indexing/shape[3]/paragraph[1]", "This is a 
TextBox - Para1");
-    assertXPathContent(pXmlDoc, "/indexing/shape[3]/paragraph[2]", "Para2");
-    assertXPathContent(pXmlDoc, "/indexing/shape[3]/paragraph[3]", "Para3");
+    assertXPathContent(pXmlDoc, "/indexing/paragraph[1]", "Drawing : Just a 
Diamond");
+
+    assertXPath(pXmlDoc, "/indexing/object[1]", "name", "Circle");
+    assertXPath(pXmlDoc, "/indexing/object[1]", "type", "shape");
+    assertXPathContent(pXmlDoc, "/indexing/paragraph[2]", "This is a circle");
+    assertXPathContent(pXmlDoc, "/indexing/paragraph[3]", "This is a second 
paragraph");
+
+    assertXPath(pXmlDoc, "/indexing/object[2]", "name", "Diamond");
+    assertXPath(pXmlDoc, "/indexing/object[2]", "type", "shape");
+    assertXPathContent(pXmlDoc, "/indexing/paragraph[4]", "This is a diamond");
+
+    assertXPath(pXmlDoc, "/indexing/object[3]", "name", "Text Frame 1");
+    assertXPath(pXmlDoc, "/indexing/object[3]", "type", "shape");
+    assertXPathContent(pXmlDoc, "/indexing/paragraph[5]", "This is a TextBox - 
Para1");
+    assertXPathContent(pXmlDoc, "/indexing/paragraph[6]", "Para2");
+    assertXPathContent(pXmlDoc, "/indexing/paragraph[7]", "Para3");
 }
 
 void IndexingExportTest::testIndexingExport_Tables()
@@ -174,51 +181,56 @@ void IndexingExportTest::testIndexingExport_Tables()
 
     assertXPath(pXmlDoc, "/indexing");
 
-    assertXPath(pXmlDoc, "/indexing/table[1]", "name", "Table1");
-    assertXPathContent(pXmlDoc, "/indexing/table[1]/paragraph[1]", "A");
-    assertXPathContent(pXmlDoc, "/indexing/table[1]/paragraph[2]", "B");
-    assertXPathContent(pXmlDoc, "/indexing/table[1]/paragraph[3]", "1");
-    assertXPathContent(pXmlDoc, "/indexing/table[1]/paragraph[4]", "2");
-
-    assertXPath(pXmlDoc, "/indexing/table[2]", "name", "Table2");
-    assertXPathContent(pXmlDoc, "/indexing/table[2]/paragraph[1]", "A");
-    assertXPathContent(pXmlDoc, "/indexing/table[2]/paragraph[2]", "B");
-    assertXPathContent(pXmlDoc, "/indexing/table[2]/paragraph[3]", "C");
-    assertXPathContent(pXmlDoc, "/indexing/table[2]/paragraph[4]", "1");
-    assertXPathContent(pXmlDoc, "/indexing/table[2]/paragraph[5]", "10");
-    assertXPathContent(pXmlDoc, "/indexing/table[2]/paragraph[6]", "100");
-    assertXPathContent(pXmlDoc, "/indexing/table[2]/paragraph[7]", "2");
-    assertXPathContent(pXmlDoc, "/indexing/table[2]/paragraph[8]", "20");
-    assertXPathContent(pXmlDoc, "/indexing/table[2]/paragraph[9]", "200");
-    assertXPathContent(pXmlDoc, "/indexing/table[2]/paragraph[10]", "3");
-    assertXPathContent(pXmlDoc, "/indexing/table[2]/paragraph[11]", "30");
-    assertXPathContent(pXmlDoc, "/indexing/table[2]/paragraph[12]", "300");
-    assertXPathContent(pXmlDoc, "/indexing/table[2]/paragraph[13]", "4");
-    assertXPathContent(pXmlDoc, "/indexing/table[2]/paragraph[14]", "40");
-    assertXPathContent(pXmlDoc, "/indexing/table[2]/paragraph[15]", "400");
-
-    assertXPath(pXmlDoc, "/indexing/table[3]", "name", "WeirdTable");
-    assertXPathContent(pXmlDoc, "/indexing/table[3]/paragraph[1]", "A1");
-    assertXPathContent(pXmlDoc, "/indexing/table[3]/paragraph[2]", "B1");
-    assertXPathContent(pXmlDoc, "/indexing/table[3]/paragraph[3]", "C1");
-    assertXPathContent(pXmlDoc, "/indexing/table[3]/paragraph[4]", "D1");
-    assertXPathContent(pXmlDoc, "/indexing/table[3]/paragraph[5]", "A2B2");
-    assertXPathContent(pXmlDoc, "/indexing/table[3]/paragraph[6]", "C2D2");
-    assertXPathContent(pXmlDoc, "/indexing/table[3]/paragraph[7]", "A3B3C3D3");
-    assertXPathContent(pXmlDoc, "/indexing/table[3]/paragraph[8]", "A4-1");
-    assertXPathContent(pXmlDoc, "/indexing/table[3]/paragraph[9]", "A4-2");
-    assertXPathContent(pXmlDoc, "/indexing/table[3]/paragraph[10]", "B4-1");
-    assertXPathContent(pXmlDoc, "/indexing/table[3]/paragraph[11]", "C4-1");
-    assertXPathContent(pXmlDoc, "/indexing/table[3]/paragraph[12]", "D4-1");
-    assertXPathContent(pXmlDoc, "/indexing/table[3]/paragraph[13]", "D4-2");
-    assertXPathContent(pXmlDoc, "/indexing/table[3]/paragraph[14]", "");
-    assertXPathContent(pXmlDoc, "/indexing/table[3]/paragraph[15]", "");
-    assertXPathContent(pXmlDoc, "/indexing/table[3]/paragraph[16]", "B4-2");
-    assertXPathContent(pXmlDoc, "/indexing/table[3]/paragraph[17]", "C4-2");
-    assertXPathContent(pXmlDoc, "/indexing/table[3]/paragraph[18]", "");
-    assertXPathContent(pXmlDoc, "/indexing/table[3]/paragraph[19]", "");
-    assertXPathContent(pXmlDoc, "/indexing/table[3]/paragraph[20]", "A5B5C5");
-    assertXPathContent(pXmlDoc, "/indexing/table[3]/paragraph[21]", "D5");
+    assertXPath(pXmlDoc, "/indexing/object[1]", "name", "Table1");
+    assertXPath(pXmlDoc, "/indexing/object[1]", "type", "table");
+    assertXPath(pXmlDoc, "/indexing/object[1]", "index", "9");
+    // Search paragraph with parent = 9
+    assertXPathContent(pXmlDoc, "/indexing/paragraph[@parent=9][1]", "A");
+    assertXPathContent(pXmlDoc, "/indexing/paragraph[@parent=9][2]", "B");
+    assertXPathContent(pXmlDoc, "/indexing/paragraph[@parent=9][3]", "1");
+    assertXPathContent(pXmlDoc, "/indexing/paragraph[@parent=9][4]", "2");
+
+    assertXPath(pXmlDoc, "/indexing/object[2]", "name", "Table2");
+    assertXPath(pXmlDoc, "/indexing/object[2]", "type", "table");
+    assertXPath(pXmlDoc, "/indexing/object[2]", "index", "24");
+    // Search paragraph with parent = 24
+    assertXPathContent(pXmlDoc, "/indexing/paragraph[@parent=24][1]", "A");
+    assertXPathContent(pXmlDoc, "/indexing/paragraph[@parent=24][2]", "B");
+    assertXPathContent(pXmlDoc, "/indexing/paragraph[@parent=24][3]", "C");
+    assertXPathContent(pXmlDoc, "/indexing/paragraph[@parent=24][4]", "1");
+    assertXPathContent(pXmlDoc, "/indexing/paragraph[@parent=24][5]", "10");
+    assertXPathContent(pXmlDoc, "/indexing/paragraph[@parent=24][6]", "100");
+    assertXPathContent(pXmlDoc, "/indexing/paragraph[@parent=24][7]", "2");
+    assertXPathContent(pXmlDoc, "/indexing/paragraph[@parent=24][8]", "20");
+    assertXPathContent(pXmlDoc, "/indexing/paragraph[@parent=24][9]", "200");
+    assertXPathContent(pXmlDoc, "/indexing/paragraph[@parent=24][10]", "3");
+    assertXPathContent(pXmlDoc, "/indexing/paragraph[@parent=24][11]", "30");
+    assertXPathContent(pXmlDoc, "/indexing/paragraph[@parent=24][12]", "300");
+    assertXPathContent(pXmlDoc, "/indexing/paragraph[@parent=24][13]", "4");
+    assertXPathContent(pXmlDoc, "/indexing/paragraph[@parent=24][14]", "40");
+    assertXPathContent(pXmlDoc, "/indexing/paragraph[@parent=24][15]", "400");
+
+    assertXPath(pXmlDoc, "/indexing/object[3]", "name", "WeirdTable");
+    assertXPath(pXmlDoc, "/indexing/object[3]", "type", "table");
+    assertXPath(pXmlDoc, "/indexing/object[3]", "index", "72");
+    // Search paragraph with parent = 72
+    assertXPathContent(pXmlDoc, "/indexing/paragraph[@parent=72][1]", "A1");
+    assertXPathContent(pXmlDoc, "/indexing/paragraph[@parent=72][2]", "B1");
+    assertXPathContent(pXmlDoc, "/indexing/paragraph[@parent=72][3]", "C1");
+    assertXPathContent(pXmlDoc, "/indexing/paragraph[@parent=72][4]", "D1");
+    assertXPathContent(pXmlDoc, "/indexing/paragraph[@parent=72][5]", "A2B2");
+    assertXPathContent(pXmlDoc, "/indexing/paragraph[@parent=72][6]", "C2D2");
+    assertXPathContent(pXmlDoc, "/indexing/paragraph[@parent=72][7]", 
"A3B3C3D3");
+    assertXPathContent(pXmlDoc, "/indexing/paragraph[@parent=72][8]", "A4-1");
+    assertXPathContent(pXmlDoc, "/indexing/paragraph[@parent=72][9]", "A4-2");
+    assertXPathContent(pXmlDoc, "/indexing/paragraph[@parent=72][10]", "B4-1");
+    assertXPathContent(pXmlDoc, "/indexing/paragraph[@parent=72][11]", "C4-1");
+    assertXPathContent(pXmlDoc, "/indexing/paragraph[@parent=72][12]", "D4-1");
+    assertXPathContent(pXmlDoc, "/indexing/paragraph[@parent=72][13]", "D4-2");
+    assertXPathContent(pXmlDoc, "/indexing/paragraph[@parent=72][14]", "B4-2");
+    assertXPathContent(pXmlDoc, "/indexing/paragraph[@parent=72][15]", "C4-2");
+    assertXPathContent(pXmlDoc, "/indexing/paragraph[@parent=72][16]", 
"A5B5C5");
+    assertXPathContent(pXmlDoc, "/indexing/paragraph[@parent=72][17]", "D5");
 }
 
 void IndexingExportTest::testIndexingExport_Sections()
@@ -236,18 +248,20 @@ void IndexingExportTest::testIndexingExport_Sections()
 
     assertXPath(pXmlDoc, "/indexing");
 
-    assertXPath(pXmlDoc, "/indexing/section[1]", "name", "Section1");
-    assertXPathContent(pXmlDoc, "/indexing/section[1]/paragraph[1]",
-                       "This is a paragraph in a Section1");
-    assertXPathContent(pXmlDoc, "/indexing/section[1]/paragraph[2]", "Section1 
- Paragraph 2");
-    assertXPathContent(pXmlDoc, "/indexing/section[1]/paragraph[3]", "Section1 
- Paragraph 3");
+    assertXPath(pXmlDoc, "/indexing/object[1]", "name", "Section1");
+    assertXPath(pXmlDoc, "/indexing/object[1]", "type", "section");
+    assertXPathContent(pXmlDoc, "/indexing/paragraph[1]", "This is a paragraph 
in a Section1");
+    assertXPathContent(pXmlDoc, "/indexing/paragraph[2]", "Section1 - 
Paragraph 2");
+    assertXPathContent(pXmlDoc, "/indexing/paragraph[3]", "Section1 - 
Paragraph 3");
+
+    assertXPathContent(pXmlDoc, "/indexing/paragraph[4]", "This is a paragraph 
outside sections");
 
-    assertXPath(pXmlDoc, "/indexing/section[2]", "name", "Section2");
-    assertXPathContent(pXmlDoc, "/indexing/section[2]/paragraph[1]", "Section2 
- Paragraph 1");
-    assertXPathContent(pXmlDoc, "/indexing/section[2]/paragraph[2]", "Section2 
- Paragraph 2");
+    assertXPath(pXmlDoc, "/indexing/object[2]", "name", "Section2");
+    assertXPath(pXmlDoc, "/indexing/object[2]", "type", "section");
+    assertXPathContent(pXmlDoc, "/indexing/paragraph[5]", "Section2 - 
Paragraph 1");
+    assertXPathContent(pXmlDoc, "/indexing/paragraph[6]", "Section2 - 
Paragraph 2");
 
-    assertXPathContent(pXmlDoc, "/indexing/paragraph[1]", "This is a paragraph 
outside sections");
-    assertXPathContent(pXmlDoc, "/indexing/paragraph[2]", "This is a paragraph 
outside sections");
+    assertXPathContent(pXmlDoc, "/indexing/paragraph[7]", "This is a paragraph 
outside sections");
 }
 
 void IndexingExportTest::testIndexingExport_Fontwork()
@@ -265,10 +279,11 @@ void IndexingExportTest::testIndexingExport_Fontwork()
 
     assertXPath(pXmlDoc, "/indexing");
 
-    assertXPath(pXmlDoc, "/indexing/shape[1]", "name", "Gray");
+    assertXPath(pXmlDoc, "/indexing/object[1]", "name", "Gray");
+    assertXPath(pXmlDoc, "/indexing/object[1]", "type", "shape");
 
-    assertXPathContent(pXmlDoc, "/indexing/shape[1]/paragraph[1]", "Fontwork 
Text 1");
-    assertXPathContent(pXmlDoc, "/indexing/shape[1]/paragraph[2]", "Fontwork 
Text 2");
+    assertXPathContent(pXmlDoc, "/indexing/paragraph[1]", "Fontwork Text 1");
+    assertXPathContent(pXmlDoc, "/indexing/paragraph[2]", "Fontwork Text 2");
 }
 
 void IndexingExportTest::testIndexingExport_Header_Footer()
diff --git a/sw/source/filter/indexing/IndexingExport.cxx 
b/sw/source/filter/indexing/IndexingExport.cxx
index 834111d88af7..946c4aaddc06 100644
--- a/sw/source/filter/indexing/IndexingExport.cxx
+++ b/sw/source/filter/indexing/IndexingExport.cxx
@@ -69,18 +69,20 @@ public:
     void handleOLENode(const SwOLENode* pOleNode)
     {
         auto pFrameFormat = pOleNode->GetFlyFormat();
-        m_rXmlWriter.startElement("ole");
+        m_rXmlWriter.startElement("object");
         m_rXmlWriter.attribute("alt", pOleNode->GetTitle());
         m_rXmlWriter.attribute("name", pFrameFormat->GetName());
+        m_rXmlWriter.attribute("type", "ole");
         m_rXmlWriter.endElement();
     }
 
     void handleGraphicNode(const SwGrfNode* pGraphicNode)
     {
         auto pFrameFormat = pGraphicNode->GetFlyFormat();
-        m_rXmlWriter.startElement("graphic");
+        m_rXmlWriter.startElement("object");
         m_rXmlWriter.attribute("alt", pGraphicNode->GetTitle());
         m_rXmlWriter.attribute("name", pFrameFormat->GetName());
+        m_rXmlWriter.attribute("type", "graphic");
         m_rXmlWriter.endElement();
     }
 
@@ -93,6 +95,8 @@ public:
         }
         const OUString& rString
             = 
pTextNode->GetText().replaceAll(OUStringChar(CH_TXTATR_BREAKWORD), "");
+        if (rString.isEmpty())
+            return;
         m_rXmlWriter.startElement("paragraph");
         m_rXmlWriter.attribute("index", pTextNode->GetIndex());
         m_rXmlWriter.attribute("type", "1");
@@ -106,11 +110,15 @@ public:
     {
         if (pObject->GetName().isEmpty())
             return;
-        m_rXmlWriter.startElement("shape");
+
+        m_rXmlWriter.startElement("object");
         m_rXmlWriter.attribute("name", pObject->GetName());
         m_rXmlWriter.attribute("alt", pObject->GetTitle());
+        m_rXmlWriter.attribute("type", "shape");
         m_rXmlWriter.attribute("description", pObject->GetDescription());
 
+        m_rXmlWriter.endElement();
+
         SdrTextObj* pTextObject = dynamic_cast<SdrTextObj*>(pObject);
         if (pTextObject)
         {
@@ -123,12 +131,11 @@ public:
                 m_rXmlWriter.startElement("paragraph");
                 m_rXmlWriter.attribute("index", nParagraph);
                 m_rXmlWriter.attribute("type", "2");
+                m_rXmlWriter.attribute("parent", pObject->GetName());
                 m_rXmlWriter.content(sText);
                 m_rXmlWriter.endElement();
             }
         }
-
-        m_rXmlWriter.endElement();
     }
 
     void handleTableNode(SwTableNode* pTableNode)
@@ -136,20 +143,22 @@ public:
         const SwTableFormat* pFormat = pTableNode->GetTable().GetFrameFormat();
         OUString sName = pFormat->GetName();
 
-        m_rXmlWriter.startElement("table");
+        m_rXmlWriter.startElement("object");
         m_rXmlWriter.attribute("index", pTableNode->GetIndex());
-        m_rXmlWriter.attribute("type", "1");
         m_rXmlWriter.attribute("name", sName);
+        m_rXmlWriter.attribute("type", "table");
+        m_rXmlWriter.endElement();
 
         maNodeStack.push_back(pTableNode);
     }
 
     void handleSectionNode(SwSectionNode* pSectionNode)
     {
-        m_rXmlWriter.startElement("section");
+        m_rXmlWriter.startElement("object");
         m_rXmlWriter.attribute("index", pSectionNode->GetIndex());
-        m_rXmlWriter.attribute("type", "1");
         m_rXmlWriter.attribute("name", 
pSectionNode->GetSection().GetSectionName());
+        m_rXmlWriter.attribute("type", "section");
+        m_rXmlWriter.endElement();
 
         maNodeStack.push_back(pSectionNode);
     }
@@ -159,7 +168,6 @@ public:
         if (!maNodeStack.empty() && pEndNode->StartOfSectionNode() == 
maNodeStack.back())
         {
             maNodeStack.pop_back();
-            m_rXmlWriter.endElement();
         }
     }
 };

Reply via email to