src/lib/VSDContentCollector.cpp | 504 +++++++++++++++++++++++++++++----------- src/lib/VSDContentCollector.h | 2 src/lib/VSDTypes.h | 5 src/lib/VSDXMLParserBase.cpp | 1 src/lib/libvisio_utils.cpp | 1 5 files changed, 374 insertions(+), 139 deletions(-)
New commits: commit 94f36d00499808d7588a0970ce0dc7470d1245c7 Author: Fridrich Å trba <fridrich.st...@bluewin.ch> Date: Tue Jan 31 17:12:11 2017 +0100 Refactoring to get the text encoding from the font information Change-Id: I2261310cf4ef2635e44cc80db809e7ca9fc4409f diff --git a/src/lib/VSDContentCollector.cpp b/src/lib/VSDContentCollector.cpp index eb9a366..e201809 100644 --- a/src/lib/VSDContentCollector.cpp +++ b/src/lib/VSDContentCollector.cpp @@ -256,7 +256,14 @@ void libvisio::VSDContentCollector::_flushShape() if (m_currentForeignData.size() && m_currentForeignProps["librevenge:mime-type"] && m_foreignWidth != 0.0 && m_foreignHeight != 0.0) numForeignElements++; if (!m_currentText.empty()) - numTextElements++; + { + if ((m_currentText.m_format == VSD_TEXT_UTF16 + && (m_currentText.m_data.size() >= 2 && (m_currentText.m_data.getDataBuffer()[0] || m_currentText.m_data.getDataBuffer()[1]))) + || m_currentText.m_data.getDataBuffer()[0]) + { + numTextElements++; + } + } if (numPathElements+numForeignElements+numTextElements > 1) { @@ -591,6 +598,19 @@ void libvisio::VSDContentCollector::_flushText() /* Do not output empty text objects. */ if (m_currentText.empty() || m_misc.m_hideText) return; + else + // Check whether the buffer contains only the terminating NULL character + { + if (m_currentText.m_format == VSD_TEXT_UTF16) + { + if (m_currentText.m_data.size() < 2) + return; + else if (!(m_currentText.m_data.getDataBuffer()[0]) && !(m_currentText.m_data.getDataBuffer()[1])) + return; + } + else if (!(m_currentText.m_data.getDataBuffer()[0])) + return; + } /* Fill the text object/frame properties */ double xmiddle = m_txtxform ? m_txtxform->width / 2.0 : m_xform.width / 2.0; @@ -679,172 +699,381 @@ void libvisio::VSDContentCollector::_flushText() unsigned charNumRemaining(charIt->charCount); unsigned tabNumRemaining(tabIt->m_numChars); + std::vector<unsigned char> sOutputVector; librevenge::RVNGString sOutputText; - /* Iterate over the text character by character */ - librevenge::RVNGString::Iter textIt(m_currentText); - for (textIt.rewind(); textIt.next();) + // Unfortunately, we have to handle the unicode formats differently then the 8-bit formats + if (m_currentText.m_format == VSD_TEXT_UTF8 || m_currentText.m_format == VSD_TEXT_UTF16) { - /* Any character will cause a paragraph to open if it is not yet opened. */ - if (!isParagraphOpened) + std::vector<unsigned char> tmpBuffer(m_currentText.m_data.size()); + memcpy(&tmpBuffer[0], m_currentText.m_data.getDataBuffer(), m_currentText.m_data.size()); + librevenge::RVNGString textString; + appendCharacters(textString, tmpBuffer, m_currentText.m_format); + /* Iterate over the text character by character */ + librevenge::RVNGString::Iter textIt(textString); + for (textIt.rewind(); textIt.next();) { - librevenge::RVNGPropertyList paraProps; - _fillParagraphProperties(paraProps, *paraIt); + /* Any character will cause a paragraph to open if it is not yet opened. */ + if (!isParagraphOpened) + { + librevenge::RVNGPropertyList paraProps; + _fillParagraphProperties(paraProps, *paraIt); + + if (m_textBlockStyle.defaultTabStop > 0.0) + paraProps.insert("style:tab-stop-distance", m_textBlockStyle.defaultTabStop); + + _fillTabSet(paraProps, *tabIt); - if (m_textBlockStyle.defaultTabStop > 0.0) - paraProps.insert("style:tab-stop-distance", m_textBlockStyle.defaultTabStop); + VSDBullet bullet; + _bulletFromParaFormat(bullet, *paraIt); - _fillTabSet(paraProps, *tabIt); + /* Bullet definition changed with regard to the last paragraph style. */ + if (bullet != currentBullet) + { + /* If the previous paragraph style had a bullet, close the list level. */ + if (!!currentBullet) + m_shapeOutputText->addCloseUnorderedListLevel(); - VSDBullet bullet; - _bulletFromParaFormat(bullet, *paraIt); + currentBullet = bullet; + /* If the current paragraph style has a bullet, open a new list level. */ + if (!!currentBullet) + { + librevenge::RVNGPropertyList bulletList; + _listLevelFromBullet(bulletList, currentBullet); + m_shapeOutputText->addOpenUnorderedListLevel(bulletList); + } + } - /* Bullet definition changed with regard to the last paragraph style. */ - if (bullet != currentBullet) + if (!currentBullet) + m_shapeOutputText->addOpenParagraph(paraProps); + else + m_shapeOutputText->addOpenListElement(paraProps); + isParagraphOpened = true; + isParagraphWithoutSpan = true; + } + + /* Any character will cause a span to open if it is not yet opened. + * The additional conditions aim to avoid superfluous empty span but + * also a paragraph without span at all. */ + if (!isSpanOpened && ((*(textIt()) != '\n') || isParagraphWithoutSpan)) { - /* If the previous paragraph style had a bullet, close the list level. */ - if (!!currentBullet) - m_shapeOutputText->addCloseUnorderedListLevel(); + librevenge::RVNGPropertyList textProps; + _fillCharProperties(textProps, *charIt); - currentBullet = bullet; - /* If the current paragraph style has a bullet, open a new list level. */ - if (!!currentBullet) + // TODO: In draw, text span background cannot be specified the same way as in writer span + if (m_textBlockStyle.isTextBkgndFilled) { - librevenge::RVNGPropertyList bulletList; - _listLevelFromBullet(bulletList, currentBullet); - m_shapeOutputText->addOpenUnorderedListLevel(bulletList); + textProps.insert("fo:background-color", getColourString(m_textBlockStyle.textBkgndColour)); +#if 0 + if (m_textBlockStyle.textBkgndColour.a) + textProps.insert("fo:background-opacity", 1.0 - m_textBlockStyle.textBkgndColour.a/255.0, librevenge::RVNG_PERCENT); +#endif } + m_shapeOutputText->addOpenSpan(textProps); + isSpanOpened = true; + isParagraphWithoutSpan = false; } - if (!currentBullet) - m_shapeOutputText->addOpenParagraph(paraProps); - else - m_shapeOutputText->addOpenListElement(paraProps); - isParagraphOpened = true; - isParagraphWithoutSpan = true; - } - - /* Any character will cause a span to open if it is not yet opened. - * The additional conditions aim to avoid superfluous empty span but - * also a paragraph without span at all. */ - if (!isSpanOpened && ((*(textIt()) != '\n') || isParagraphWithoutSpan)) - { - librevenge::RVNGPropertyList textProps; - _fillCharProperties(textProps, *charIt); + /* Current character is a paragraph break, + * which will cause the paragraph to close. */ + if (*(textIt()) == '\n') + { + if (!sOutputText.empty()) + m_shapeOutputText->addInsertText(sOutputText); + sOutputText.clear(); + if (isSpanOpened) + { + m_shapeOutputText->addCloseSpan(); + isSpanOpened = false; + } - // TODO: In draw, text span background cannot be specified the same way as in writer span - if (m_textBlockStyle.isTextBkgndFilled) + if (isParagraphOpened) + { + if (!currentBullet) + m_shapeOutputText->addCloseParagraph(); + else + m_shapeOutputText->addCloseListElement(); + isParagraphOpened = false; + } + } + /* Current character is a tabulator. We have to output + * the current text buffer and insert the tab. */ + else if (*(textIt()) == '\t') { - textProps.insert("fo:background-color", getColourString(m_textBlockStyle.textBkgndColour)); -#if 0 - if (m_textBlockStyle.textBkgndColour.a) - textProps.insert("fo:background-opacity", 1.0 - m_textBlockStyle.textBkgndColour.a/255.0, librevenge::RVNG_PERCENT); -#endif + if (!sOutputText.empty()) + m_shapeOutputText->addInsertText(sOutputText); + sOutputText.clear(); + m_shapeOutputText->addInsertTab(); + } + /* Current character is a field placeholder. We append + * to the current text buffer a text representation + * of the field. */ + else if (strlen(textIt()) == 3 && + textIt()[0] == '\xef' && + textIt()[1] == '\xbf' && + textIt()[2] == '\xbc') + _appendField(sOutputText); + /* We have a normal UTF8 character and we append it + * to the current text buffer. */ + else + sOutputText.append(textIt()); + + /* Decrease the count of remaining characters in the same paragraph, + * if it is possible. */ + if (paraNumRemaining) + paraNumRemaining--; + /* Fetch next paragraph style if it exists. If not, just use the + * last one. */ + if (!paraNumRemaining) + { + ++paraIt; + if (paraIt != m_paraFormats.end()) + paraNumRemaining = paraIt->charCount; + else + --paraIt; } - m_shapeOutputText->addOpenSpan(textProps); - isSpanOpened = true; - isParagraphWithoutSpan = false; - } - /* Current character is a paragraph break, - * which will cause the paragraph to close. */ - if (*(textIt()) == '\n') - { - if (!sOutputText.empty()) - m_shapeOutputText->addInsertText(sOutputText); - sOutputText.clear(); - if (isSpanOpened) + /* Decrease the count of remaining characters in the same span, + * if it is possible. */ + if (charNumRemaining) + charNumRemaining--; + /* Fetch next character style if it exists and close span, since + * the next span will have to use the new character style. + * If there is no more character style to fetch, just finish using + * the last one. */ + if (!charNumRemaining) { - m_shapeOutputText->addCloseSpan(); - isSpanOpened = false; + ++charIt; + if (charIt != m_charFormats.end()) + { + charNumRemaining = charIt->charCount; + if (isSpanOpened) + { + if (!sOutputText.empty()) + m_shapeOutputText->addInsertText(sOutputText); + sOutputText.clear(); + m_shapeOutputText->addCloseSpan(); + isSpanOpened = false; + } + } + else + --charIt; } - if (isParagraphOpened) + /* Decrease the count of remaining characters using the same + * tab-set definition, if it is possible. */ + if (tabNumRemaining) + tabNumRemaining--; + /* Fetch next tab-set definition if it exists. If not, just use the + * last one. */ + if (!tabNumRemaining) { - if (!currentBullet) - m_shapeOutputText->addCloseParagraph(); + ++tabIt; + if (tabIt != m_tabSets.end()) + tabNumRemaining = tabIt->m_numChars; else - m_shapeOutputText->addCloseListElement(); - isParagraphOpened = false; + --tabIt; } } - /* Current character is a tabulator. We have to output - * the current text buffer and insert the tab. */ - else if (*(textIt()) == '\t') + } + else // 8-bit charsets + { + /* Iterate over the text character by character */ + const unsigned char *tmpBuffer = m_currentText.m_data.getDataBuffer(); + unsigned long tmpBufferLength = m_currentText.m_data.size(); + // Remove the terminating \0 character from the buffer + while (tmpBufferLength > 1 &&!tmpBuffer[tmpBufferLength-1]) { - if (!sOutputText.empty()) - m_shapeOutputText->addInsertText(sOutputText); - sOutputText.clear(); - m_shapeOutputText->addInsertTab(); - } - /* Current character is a field placeholder. We append - * to the current text buffer a text representation - * of the field. */ - else if (strlen(textIt()) == 3 && - textIt()[0] == '\xef' && - textIt()[1] == '\xbf' && - textIt()[2] == '\xbc') - _appendField(sOutputText); - /* We have a normal UTF8 character and we append it - * to the current text buffer. */ - else - sOutputText.append(textIt()); - - /* Decrease the count of remaining characters in the same paragraph, - * if it is possible. */ - if (paraNumRemaining) - paraNumRemaining--; - /* Fetch next paragraph style if it exists. If not, just use the - * last one. */ - if (!paraNumRemaining) - { - ++paraIt; - if (paraIt != m_paraFormats.end()) - paraNumRemaining = paraIt->charCount; - else - --paraIt; + --tmpBufferLength; } - - /* Decrease the count of remaining characters in the same span, - * if it is possible. */ - if (charNumRemaining) - charNumRemaining--; - /* Fetch next character style if it exists and close span, since - * the next span will have to use the new character style. - * If there is no more character style to fetch, just finish using - * the last one. */ - if (!charNumRemaining) + for (unsigned long i = 0; i < tmpBufferLength; ++i) { - ++charIt; - if (charIt != m_charFormats.end()) + /* Any character will cause a paragraph to open if it is not yet opened. */ + if (!isParagraphOpened) { - charNumRemaining = charIt->charCount; - if (isSpanOpened) + librevenge::RVNGPropertyList paraProps; + _fillParagraphProperties(paraProps, *paraIt); + + if (m_textBlockStyle.defaultTabStop > 0.0) + paraProps.insert("style:tab-stop-distance", m_textBlockStyle.defaultTabStop); + + _fillTabSet(paraProps, *tabIt); + + VSDBullet bullet; + _bulletFromParaFormat(bullet, *paraIt); + + /* Bullet definition changed with regard to the last paragraph style. */ + if (bullet != currentBullet) { - if (!sOutputText.empty()) - m_shapeOutputText->addInsertText(sOutputText); + /* If the previous paragraph style had a bullet, close the list level. */ + if (!!currentBullet) + m_shapeOutputText->addCloseUnorderedListLevel(); + + currentBullet = bullet; + /* If the current paragraph style has a bullet, open a new list level. */ + if (!!currentBullet) + { + librevenge::RVNGPropertyList bulletList; + _listLevelFromBullet(bulletList, currentBullet); + m_shapeOutputText->addOpenUnorderedListLevel(bulletList); + } + } + + if (!currentBullet) + m_shapeOutputText->addOpenParagraph(paraProps); + else + m_shapeOutputText->addOpenListElement(paraProps); + isParagraphOpened = true; + isParagraphWithoutSpan = true; + } + + /* Any character will cause a span to open if it is not yet opened. + * The additional conditions aim to avoid superfluous empty span but + * also a paragraph without span at all. */ + if (!isSpanOpened && ((tmpBuffer[i] != (unsigned char)'\n' && tmpBuffer[i] != 0x0d && tmpBuffer[i] != 0x0e) || isParagraphWithoutSpan)) + { + librevenge::RVNGPropertyList textProps; + _fillCharProperties(textProps, *charIt); + + // TODO: In draw, text span background cannot be specified the same way as in writer span + if (m_textBlockStyle.isTextBkgndFilled) + { + textProps.insert("fo:background-color", getColourString(m_textBlockStyle.textBkgndColour)); +#if 0 + if (m_textBlockStyle.textBkgndColour.a) + textProps.insert("fo:background-opacity", 1.0 - m_textBlockStyle.textBkgndColour.a/255.0, librevenge::RVNG_PERCENT); +#endif + } + m_shapeOutputText->addOpenSpan(textProps); + isSpanOpened = true; + isParagraphWithoutSpan = false; + } + + /* Current character is a paragraph break, + * which will cause the paragraph to close. */ + if (tmpBuffer[i] == (unsigned char)'\n' || tmpBuffer[i] == 0x0d || tmpBuffer[i] == 0x0e) + { + if (!sOutputVector.empty()) + { + appendCharacters(sOutputText, sOutputVector, charIt->font.m_format); + sOutputVector.clear(); + } + if (!sOutputText.empty()) + { + m_shapeOutputText->addInsertText(sOutputText); sOutputText.clear(); + } + if (isSpanOpened) + { m_shapeOutputText->addCloseSpan(); isSpanOpened = false; } + + if (isParagraphOpened) + { + if (!currentBullet) + m_shapeOutputText->addCloseParagraph(); + else + m_shapeOutputText->addCloseListElement(); + isParagraphOpened = false; + } } + /* Current character is a tabulator. We have to output + * the current text buffer and insert the tab. */ + else if (tmpBuffer[i] == (unsigned char)'\t') + { + if (!sOutputVector.empty()) + { + appendCharacters(sOutputText, sOutputVector, charIt->font.m_format); + sOutputVector.clear(); + } + if (!sOutputText.empty()) + { + m_shapeOutputText->addInsertText(sOutputText); + sOutputText.clear(); + } + m_shapeOutputText->addInsertTab(); + } + /* Current character is a field placeholder. We append + * to the current text buffer a text representation + * of the field. */ + else if (tmpBuffer[i] == 0x1e) + { + if (!sOutputVector.empty()) + { + appendCharacters(sOutputText, sOutputVector, charIt->font.m_format); + sOutputVector.clear(); + } + _appendField(sOutputText); + } + /* We have a normal UTF8 character and we append it + * to the current text buffer. */ else - --charIt; - } + sOutputVector.push_back(tmpBuffer[i]); + + /* Decrease the count of remaining characters in the same paragraph, + * if it is possible. */ + if (paraNumRemaining) + paraNumRemaining--; + /* Fetch next paragraph style if it exists. If not, just use the + * last one. */ + if (!paraNumRemaining) + { + ++paraIt; + if (paraIt != m_paraFormats.end()) + paraNumRemaining = paraIt->charCount; + else + --paraIt; + } - /* Decrease the count of remaining characters using the same - * tab-set definition, if it is possible. */ - if (tabNumRemaining) - tabNumRemaining--; - /* Fetch next tab-set definition if it exists. If not, just use the - * last one. */ - if (!tabNumRemaining) - { - ++tabIt; - if (tabIt != m_tabSets.end()) - tabNumRemaining = tabIt->m_numChars; - else - --tabIt; + /* Decrease the count of remaining characters in the same span, + * if it is possible. */ + if (charNumRemaining) + charNumRemaining--; + /* Fetch next character style if it exists and close span, since + * the next span will have to use the new character style. + * If there is no more character style to fetch, just finish using + * the last one. */ + if (!charNumRemaining) + { + ++charIt; + if (charIt != m_charFormats.end()) + { + charNumRemaining = charIt->charCount; + if (isSpanOpened) + { + if (!sOutputVector.empty()) + { + appendCharacters(sOutputText, sOutputVector, charIt->font.m_format); + sOutputVector.clear(); + } + if (!sOutputText.empty()) + { + m_shapeOutputText->addInsertText(sOutputText); + sOutputText.clear(); + } + m_shapeOutputText->addCloseSpan(); + isSpanOpened = false; + } + } + else + --charIt; + } + + /* Decrease the count of remaining characters using the same + * tab-set definition, if it is possible. */ + if (tabNumRemaining) + tabNumRemaining--; + /* Fetch next tab-set definition if it exists. If not, just use the + * last one. */ + if (!tabNumRemaining) + { + ++tabIt; + if (tabIt != m_tabSets.end()) + tabNumRemaining = tabIt->m_numChars; + else + --tabIt; + } } } @@ -853,9 +1082,16 @@ void libvisio::VSDContentCollector::_flushText() { if (isSpanOpened) { + if (!sOutputVector.empty()) + { + appendCharacters(sOutputText, sOutputVector, charIt->font.m_format); + sOutputVector.clear(); + } if (!sOutputText.empty()) + { m_shapeOutputText->addInsertText(sOutputText); - sOutputText.clear(); + sOutputText.clear(); + } m_shapeOutputText->addCloseSpan(); isSpanOpened = false; } @@ -2434,11 +2670,7 @@ void libvisio::VSDContentCollector::collectText(unsigned level, const librevenge m_currentText.clear(); if (!textStream.empty()) - { - std::vector<unsigned char> tmpBuffer(textStream.size()); - memcpy(&tmpBuffer[0], textStream.getDataBuffer(), textStream.size()); - appendCharacters(m_currentText, tmpBuffer, format); - } + m_currentText = libvisio::VSDName(textStream, format); } void libvisio::VSDContentCollector::collectParaIX(unsigned /* id */ , unsigned level, unsigned charCount, const boost::optional<double> &indFirst, diff --git a/src/lib/VSDContentCollector.h b/src/lib/VSDContentCollector.h index 4460e35..2338e7a 100644 --- a/src/lib/VSDContentCollector.h +++ b/src/lib/VSDContentCollector.h @@ -278,7 +278,7 @@ private: std::map<unsigned, NURBSData> m_NURBSData; std::map<unsigned, PolylineData> m_polylineData; - librevenge::RVNGString m_currentText; + libvisio::VSDName m_currentText; std::map<unsigned, librevenge::RVNGString> m_names, m_stencilNames; std::vector<librevenge::RVNGString> m_fields; VSDFieldList m_stencilFields; diff --git a/src/lib/VSDTypes.h b/src/lib/VSDTypes.h index ae0abd9..a2e6e40 100644 --- a/src/lib/VSDTypes.h +++ b/src/lib/VSDTypes.h @@ -188,6 +188,11 @@ public: { return !m_data.size(); } + void clear() + { + m_data.clear(); + m_format = VSD_TEXT_ANSI; + } librevenge::RVNGBinaryData m_data; TextFormat m_format; }; diff --git a/src/lib/libvisio_utils.cpp b/src/lib/libvisio_utils.cpp index bd03e75..08b5b2d 100644 --- a/src/lib/libvisio_utils.cpp +++ b/src/lib/libvisio_utils.cpp @@ -112,7 +112,6 @@ const librevenge::RVNGString libvisio::getColourString(const Colour &c) void libvisio::appendUCS4(librevenge::RVNGString &text, UChar32 ucs4Character) { // Convert carriage returns to new line characters - // Writerperfect/LibreOffice will replace them by <text:line-break> if (ucs4Character == (UChar32) 0x0d || ucs4Character == (UChar32) 0x0e) ucs4Character = (UChar32) '\n'; commit dc7a4e0f70800fd747eaa7fef3e606d6bb9e5177 Author: Fridrich Å trba <fridrich.st...@bluewin.ch> Date: Tue Jan 31 12:53:11 2017 +0100 Remove stray debug output Change-Id: I97c9fd76eb48e944c60873f49ae215aa788db7b5 diff --git a/src/lib/VSDXMLParserBase.cpp b/src/lib/VSDXMLParserBase.cpp index 714399b..da5581f 100644 --- a/src/lib/VSDXMLParserBase.cpp +++ b/src/lib/VSDXMLParserBase.cpp @@ -2221,7 +2221,6 @@ void libvisio::VSDXMLParserBase::readTriggerId(unsigned &id, xmlTextReaderPtr re unsigned triggerId = MINUS_ONE; const boost::shared_ptr<xmlChar> triggerString(xmlTextReaderGetAttribute(reader, BAD_CAST("F")), xmlFree); - printf("Fridrich A %s\n", (const char *)triggerString.get()); if (triggerString) { if (parse((const char *)triggerString.get(),
_______________________________________________ Libreoffice-commits mailing list libreoffice-comm...@lists.freedesktop.org https://lists.freedesktop.org/mailman/listinfo/libreoffice-commits