oox/source/docprop/docprophandler.cxx | 25 +++++ oox/source/docprop/docprophandler.hxx | 1 sax/source/fastparser/fastparser.cxx | 6 - sax/source/tools/fastserializer.cxx | 155 +++++++++++++++++++++++++++++++++- sax/source/tools/fastserializer.hxx | 9 + 5 files changed, 190 insertions(+), 6 deletions(-)
New commits: commit 3c45bfb0cabf206f6217f1de9eb5cfa12b78e46f Author: Michael Stahl <mst...@redhat.com> Date: Fri Jul 3 23:49:08 2015 +0200 sax: FastSaxSerializer well-formed element test in presence of ... ... the maMarkStack, which causes the order of calls to startFastElement()/endFastElement() to differ from the order of the tags that are written into the output. This is an attempt to improve the assertions, but if an assertion fails it's generally not obvious where the problem actually is since the unpredictable order may cause the problem to be detected and reported much later than its root cause. Let's see if this finds any new problems in export testing. Change-Id: I97699cc8ef9b18ea9f4f221d5210134feecf0336 diff --git a/sax/source/tools/fastserializer.cxx b/sax/source/tools/fastserializer.cxx index 10e7ab3..a2ba5c15 100644 --- a/sax/source/tools/fastserializer.cxx +++ b/sax/source/tools/fastserializer.cxx @@ -196,7 +196,10 @@ namespace sax_fastparser { } #ifdef DBG_UTIL - m_DebugStartedElements.push(Element); + if (mbMarkStackEmpty) + m_DebugStartedElements.push(Element); + else + maMarkStack.top()->m_DebugStartedElements.push_back(Element); #endif writeBytes(sOpeningBracket, N_CHARS(sOpeningBracket)); @@ -213,10 +216,30 @@ namespace sax_fastparser { void FastSaxSerializer::endFastElement( ::sal_Int32 Element ) { #ifdef DBG_UTIL - assert(!m_DebugStartedElements.empty()); // Well-formedness constraint: Element Type Match - assert(Element == m_DebugStartedElements.top()); - m_DebugStartedElements.pop(); + if (mbMarkStackEmpty) + { + assert(!m_DebugStartedElements.empty()); + assert(Element == m_DebugStartedElements.top()); + m_DebugStartedElements.pop(); + } + else + { + if (dynamic_cast<ForSort*>(maMarkStack.top().get())) + { + // Sort is always well-formed fragment + assert(!maMarkStack.top()->m_DebugStartedElements.empty()); + } + if (maMarkStack.top()->m_DebugStartedElements.empty()) + { + maMarkStack.top()->m_DebugEndedElements.push_back(Element); + } + else + { + assert(Element == maMarkStack.top()->m_DebugStartedElements.back()); + maMarkStack.top()->m_DebugStartedElements.pop_back(); + } + } #endif writeBytes(sOpeningBracketAndSlash, N_CHARS(sOpeningBracketAndSlash)); @@ -324,17 +347,98 @@ namespace sax_fastparser { mbMarkStackEmpty = false; } +#ifdef DBG_UTIL + static void lcl_DebugMergeAppend( + std::deque<sal_Int32> & rLeftEndedElements, + std::deque<sal_Int32> & rLeftStartedElements, + std::deque<sal_Int32> & rRightEndedElements, + std::deque<sal_Int32> & rRightStartedElements) + { + while (!rRightEndedElements.empty()) + { + if (rLeftStartedElements.empty()) + { + rLeftEndedElements.push_back(rRightEndedElements.front()); + } + else + { + assert(rLeftStartedElements.back() == rRightEndedElements.front()); + rLeftStartedElements.pop_back(); + } + rRightEndedElements.pop_front(); + } + while (!rRightStartedElements.empty()) + { + rLeftStartedElements.push_back(rRightStartedElements.front()); + rRightStartedElements.pop_front(); + } + } + + static void lcl_DebugMergePrepend( + std::deque<sal_Int32> & rLeftEndedElements, + std::deque<sal_Int32> & rLeftStartedElements, + std::deque<sal_Int32> & rRightEndedElements, + std::deque<sal_Int32> & rRightStartedElements) + { + while (!rLeftStartedElements.empty()) + { + if (rRightEndedElements.empty()) + { + rRightStartedElements.push_front(rLeftStartedElements.back()); + } + else + { + assert(rRightEndedElements.front() == rLeftStartedElements.back()); + rRightEndedElements.pop_front(); + } + rLeftStartedElements.pop_back(); + } + while (!rLeftEndedElements.empty()) + { + rRightEndedElements.push_front(rLeftEndedElements.back()); + rLeftEndedElements.pop_back(); + } + } +#endif + void FastSaxSerializer::mergeTopMarks( sax_fastparser::MergeMarksEnum eMergeType ) { SAL_WARN_IF(mbMarkStackEmpty, "sax", "Empty mark stack - nothing to merge"); if ( mbMarkStackEmpty ) return; +#ifdef DBG_UTIL + if (dynamic_cast<ForSort*>(maMarkStack.top().get())) + { + // Sort is always well-formed fragment + assert(maMarkStack.top()->m_DebugStartedElements.empty()); + assert(maMarkStack.top()->m_DebugEndedElements.empty()); + } + lcl_DebugMergeAppend( + maMarkStack.top()->m_DebugEndedElements, + maMarkStack.top()->m_DebugStartedElements, + maMarkStack.top()->m_DebugPostponedEndedElements, + maMarkStack.top()->m_DebugPostponedStartedElements); +#endif + // flush, so that we get everything in getData() maCachedOutputStream.flush(); if ( maMarkStack.size() == 1 && eMergeType != MERGE_MARKS_IGNORE) { +#if DBG_UTIL + while (!maMarkStack.top()->m_DebugEndedElements.empty()) + { + assert(maMarkStack.top()->m_DebugEndedElements.front() == m_DebugStartedElements.top()); + maMarkStack.top()->m_DebugEndedElements.pop_front(); + m_DebugStartedElements.pop(); + } + while (!maMarkStack.top()->m_DebugStartedElements.empty()) + { + m_DebugStartedElements.push(maMarkStack.top()->m_DebugStartedElements.front()); + maMarkStack.top()->m_DebugStartedElements.pop_front(); + } +#endif Sequence<sal_Int8> aSeq( maMarkStack.top()->getData() ); maMarkStack.pop(); mbMarkStackEmpty = true; @@ -343,8 +447,51 @@ namespace sax_fastparser { return; } +#if DBG_UTIL + ::std::deque<sal_Int32> topDebugStartedElements(maMarkStack.top()->m_DebugStartedElements); + ::std::deque<sal_Int32> topDebugEndedElements(maMarkStack.top()->m_DebugEndedElements); +#endif const Int8Sequence aMerge( maMarkStack.top()->getData() ); maMarkStack.pop(); +#if DBG_UTIL + switch (eMergeType) + { + case MERGE_MARKS_APPEND: + lcl_DebugMergeAppend( + maMarkStack.top()->m_DebugEndedElements, + maMarkStack.top()->m_DebugStartedElements, + topDebugEndedElements, + topDebugStartedElements); + break; + case MERGE_MARKS_PREPEND: + if (dynamic_cast<ForSort*>(maMarkStack.top().get())) // argh... + { + lcl_DebugMergeAppend( + maMarkStack.top()->m_DebugEndedElements, + maMarkStack.top()->m_DebugStartedElements, + topDebugEndedElements, + topDebugStartedElements); + } + else + { + lcl_DebugMergePrepend( + topDebugEndedElements, + topDebugStartedElements, + maMarkStack.top()->m_DebugEndedElements, + maMarkStack.top()->m_DebugStartedElements); + } + break; + case MERGE_MARKS_POSTPONE: + lcl_DebugMergeAppend( + maMarkStack.top()->m_DebugPostponedEndedElements, + maMarkStack.top()->m_DebugPostponedStartedElements, + topDebugEndedElements, + topDebugStartedElements); + break; + case MERGE_MARKS_IGNORE: + break; + } +#endif if (maMarkStack.empty()) { mbMarkStackEmpty = true; diff --git a/sax/source/tools/fastserializer.hxx b/sax/source/tools/fastserializer.hxx index 95e631c..4dc786b 100644 --- a/sax/source/tools/fastserializer.hxx +++ b/sax/source/tools/fastserializer.hxx @@ -161,6 +161,15 @@ private: Int8Sequence maPostponed; public: +#ifdef DBG_UTIL + // pending close tags, followed by pending open tags + ::std::deque<sal_Int32> m_DebugEndedElements; + ::std::deque<sal_Int32> m_DebugStartedElements; + // ... and another buffer for maPostponed ... + ::std::deque<sal_Int32> m_DebugPostponedEndedElements; + ::std::deque<sal_Int32> m_DebugPostponedStartedElements; +#endif + ForMerge() : maData(), maPostponed() {} virtual ~ForMerge() {} commit cf17befa261fd64f95bb4db932ce4d7e149f4a72 Author: Michael Stahl <mst...@redhat.com> Date: Fri Jul 3 22:28:18 2015 +0200 tdf#91378: sax, oox: avoid sending empty strings to character callbacks This reverts the changes in FastSaxParserImpl from commit 16e8ffbd5ec1fe7b81835ea6584547669d55d751 and instead fixes the problem of inserting string properties with empty value locally in OOXMLDocPropHandler. This change was not wrong in any obvious way, but it turns out there is one doc rhbz583386-4.docx that, when imported with this change and exported to DOCX again, results in a non-well-formed document because of some weird SDT stuff. That problem is rather baffling, but unfortunately the DocxAttributeOutput usage of FastSaxSerializer::mark() makes the DOCX export rather un-debuggable, so avoid that problem by reverting the import change for now. Change-Id: I0d874cbfe82d4f15d58b50116dda152341bdf7b0 diff --git a/oox/source/docprop/docprophandler.cxx b/oox/source/docprop/docprophandler.cxx index 1a5000b..c5d89b9 100644 --- a/oox/source/docprop/docprophandler.cxx +++ b/oox/source/docprop/docprophandler.cxx @@ -45,6 +45,7 @@ OOXMLDocPropHandler::OOXMLDocPropHandler( const uno::Reference< uno::XComponentC , m_nBlock( 0 ) , m_nType( 0 ) , m_nInBlock( 0 ) + , m_CustomStringPropertyState(NONE) { if ( !xContext.is() || !rDocProp.is() ) throw uno::RuntimeException(); @@ -61,6 +62,7 @@ void OOXMLDocPropHandler::InitNew() m_aCustomPropertyName.clear(); m_nType = 0; m_nInBlock = 0; + m_CustomStringPropertyState = NONE; } void OOXMLDocPropHandler::AddCustomProperty( const uno::Any& aAny ) @@ -345,7 +347,29 @@ void SAL_CALL OOXMLDocPropHandler::endFastElement( ::sal_Int32 ) m_aCustomPropertyName.clear(); } else if ( m_nInBlock == 2 ) + { + if ( m_nState == CUSTPR_TOKEN(Properties) + && m_nBlock == CUSTPR_TOKEN(property)) + { + switch (m_nType) + { + case VT_TOKEN(bstr): + case VT_TOKEN(lpstr): + case VT_TOKEN(lpwstr): + if (!m_aCustomPropertyName.isEmpty() && + INSERTED != m_CustomStringPropertyState) + { + // the property has string type, so it is valid + // even with an empty value - characters() has + // not been called in that case + AddCustomProperty(uno::makeAny(OUString())); + } + break; + } + } + m_CustomStringPropertyState = NONE; m_nType = 0; + } } } @@ -595,6 +619,7 @@ void SAL_CALL OOXMLDocPropHandler::characters( const OUString& aChars ) case VT_TOKEN( lpwstr ): // the property has string type AddCustomProperty( uno::makeAny( AttributeConversion::decodeXString( aChars ) ) ); + m_CustomStringPropertyState = INSERTED; break; case VT_TOKEN( date ): diff --git a/oox/source/docprop/docprophandler.hxx b/oox/source/docprop/docprophandler.hxx index f0d8049..daabc03 100644 --- a/oox/source/docprop/docprophandler.hxx +++ b/oox/source/docprop/docprophandler.hxx @@ -50,6 +50,7 @@ class OOXMLDocPropHandler : public ::cppu::WeakImplHelper1< ::com::sun::star::xm sal_Int32 m_nInBlock; + enum { NONE, INSERTED } m_CustomStringPropertyState; OUString m_aCustomPropertyName; public: diff --git a/sax/source/fastparser/fastparser.cxx b/sax/source/fastparser/fastparser.cxx index 79ab899..c866618 100644 --- a/sax/source/fastparser/fastparser.cxx +++ b/sax/source/fastparser/fastparser.cxx @@ -1037,7 +1037,8 @@ void FastSaxParserImpl::parse() void FastSaxParserImpl::callbackStartElement(const xmlChar *localName , const xmlChar* prefix, const xmlChar* URI, int numNamespaces, const xmlChar** namespaces, int numAttributes, int /*defaultedAttributes*/, const xmlChar **attributes) { - sendPendingCharacters(); + if (!pendingCharacters.isEmpty()) + sendPendingCharacters(); Entity& rEntity = getEntity(); if( rEntity.maNamespaceCount.empty() ) { @@ -1144,7 +1145,8 @@ void FastSaxParserImpl::callbackStartElement(const xmlChar *localName , const xm void FastSaxParserImpl::callbackEndElement( const xmlChar*, const xmlChar*, const xmlChar* ) { - sendPendingCharacters(); + if (!pendingCharacters.isEmpty()) + sendPendingCharacters(); Entity& rEntity = getEntity(); SAL_WARN_IF(rEntity.maNamespaceCount.empty(), "sax", "Empty NamespaceCount"); if( !rEntity.maNamespaceCount.empty() ) _______________________________________________ Libreoffice-commits mailing list libreoffice-comm...@lists.freedesktop.org http://lists.freedesktop.org/mailman/listinfo/libreoffice-commits