sw/qa/extras/rtfimport/data/fdo79384.rtf | 9 ++++++ sw/qa/extras/rtfimport/rtfimport.cxx | 8 +++++ writerfilter/source/rtftok/rtfdocumentimpl.cxx | 34 ++++++++++++++++++++++--- writerfilter/source/rtftok/rtfdocumentimpl.hxx | 2 - 4 files changed, 48 insertions(+), 5 deletions(-)
New commits: commit d15eb9f09c8854bd58fecd3dc6a31fa678e392a1 Author: Michael Stahl <mst...@redhat.com> Date: Mon Jun 2 23:57:13 2014 +0200 fdo#79384: RTF import: fix literal Shift-JIS text This is a variable-length encoding, and the second byte may be a RTF syntax character like \, {, }. (cherry picked from commit 061190a62fcdbfb3a0b266d5afffbd257a3e692e) Conflicts: writerfilter/source/rtftok/rtfdocumentimpl.cxx writerfilter/source/rtftok/rtfdocumentimpl.hxx Change-Id: I813ccafda18388af3bf05eb7ce9a0253c627b1c4 Reviewed-on: https://gerrit.libreoffice.org/9632 Reviewed-by: Miklos Vajna <vmik...@collabora.co.uk> Tested-by: Miklos Vajna <vmik...@collabora.co.uk> diff --git a/sw/qa/extras/rtfimport/data/fdo79384.rtf b/sw/qa/extras/rtfimport/data/fdo79384.rtf new file mode 100644 index 0000000..2a90085 --- /dev/null +++ b/sw/qa/extras/rtfimport/data/fdo79384.rtf @@ -0,0 +1,9 @@ +{\rtf1\ansi +{\fonttbl{\f5\fnil\fprq0\fcharset128 OpenSymbol{\*\falt Arial Unicode MS};}} + +\pard\plain + +\dbch\f5 Mp{u y{p +}\ + +\par } diff --git a/sw/qa/extras/rtfimport/rtfimport.cxx b/sw/qa/extras/rtfimport/rtfimport.cxx index 562b9df..e8f40c8 100644 --- a/sw/qa/extras/rtfimport/rtfimport.cxx +++ b/sw/qa/extras/rtfimport/rtfimport.cxx @@ -286,6 +286,14 @@ DECLARE_RTFIMPORT_TEST(testN751020, "n751020.rtf") CPPUNIT_ASSERT_EQUAL(sal_Int32(TWIP_TO_MM100(200)), getProperty<sal_Int32>(xParaEnum->nextElement(), "ParaBottomMargin")); } +DECLARE_RTFIMPORT_TEST(testFdo79384, "fdo79384.rtf") +{ + uno::Reference<text::XTextRange> xTextRange = getRun(getParagraph(1), 1); + + CPPUNIT_ASSERT_EQUAL(OUString("ÐаÑкеÑÑ ÑпиÑкамЫ", 31, RTL_TEXTENCODING_UTF8), + xTextRange->getString()); +} + DECLARE_RTFIMPORT_TEST(testFdo47326, "fdo47326.rtf") { // This was 15 only, as \super buffered text, then the contents of it got lost. diff --git a/writerfilter/source/rtftok/rtfdocumentimpl.cxx b/writerfilter/source/rtftok/rtfdocumentimpl.cxx index e8316ff..9946de2 100644 --- a/writerfilter/source/rtftok/rtfdocumentimpl.cxx +++ b/writerfilter/source/rtftok/rtfdocumentimpl.cxx @@ -963,9 +963,33 @@ int RTFDocumentImpl::resolveChars(char ch) m_aStates.top().nCharsToSkip--; } } + // read a single char if we're in hex mode if (m_aStates.top().nInternalState == INTERNAL_HEX) break; + + if (RTFParserState::DBCH == m_aStates.top().eRunType && + RTL_TEXTENCODING_MS_932 == m_aStates.top().nCurrentEncoding) + { + unsigned char uch = ch; + if ((uch >= 0x80 && uch <= 0x9F) || uch >= 0xE0) + { + // read second byte of 2-byte Shift-JIS - may be \ { } + Strm() >> ch; + if (m_aStates.top().nCharsToSkip == 0) + { + assert(bUnicodeChecked); + aBuf.append(ch); + } + else + { + assert(bSkipped); + // anybody who uses \ucN with Shift-JIS is insane + m_aStates.top().nCharsToSkip--; + } + } + } + Strm() >> ch; } if (m_aStates.top().nInternalState != INTERNAL_HEX && !Strm().IsEof()) @@ -2747,12 +2771,13 @@ int RTFDocumentImpl::dispatchFlag(RTFKeyword nKeyword) break; case RTF_LOCH: // Noop, dmapper detects this automatically. + m_aStates.top().eRunType = RTFParserState::LOCH; break; case RTF_HICH: - m_aStates.top().bIsCjk = true; + m_aStates.top().eRunType = RTFParserState::HICH; break; case RTF_DBCH: - m_aStates.top().bIsCjk = false; + m_aStates.top().eRunType = RTFParserState::DBCH; break; case RTF_TITLEPG: { @@ -3165,7 +3190,8 @@ int RTFDocumentImpl::dispatchValue(RTFKeyword nKeyword, int nParam) if (nKeyword == RTF_F) nSprm = NS_sprm::LN_CRgFtc0; else - nSprm = (m_aStates.top().bIsCjk ? NS_sprm::LN_CRgFtc1 : NS_sprm::LN_CRgFtc2); + nSprm = (m_aStates.top().eRunType == RTFParserState::HICH + ? NS_sprm::LN_CRgFtc1 : NS_sprm::LN_CRgFtc2); if (m_aStates.top().nDestinationState == DESTINATION_FONTTABLE || m_aStates.top().nDestinationState == DESTINATION_FONTENTRY) { m_aFontIndexes.push_back(nParam); @@ -5171,7 +5197,7 @@ RTFParserState::RTFParserState(RTFDocumentImpl *pDocumentImpl) aShape(), aDrawingObject(), aFrame(this), - bIsCjk(false), + eRunType(LOCH), nYear(0), nMonth(0), nDay(0), diff --git a/writerfilter/source/rtftok/rtfdocumentimpl.hxx b/writerfilter/source/rtftok/rtfdocumentimpl.hxx index 1d10b6f7..a6e4406 100644 --- a/writerfilter/source/rtftok/rtfdocumentimpl.hxx +++ b/writerfilter/source/rtftok/rtfdocumentimpl.hxx @@ -254,7 +254,7 @@ namespace writerfilter { RTFFrame aFrame; /// CJK or CTL? - bool bIsCjk; + enum { LOCH, HICH, DBCH } eRunType; // Info group. int nYear;
_______________________________________________ Libreoffice-commits mailing list libreoffice-comm...@lists.freedesktop.org http://lists.freedesktop.org/mailman/listinfo/libreoffice-commits