basctl/source/basicide/baside2.cxx | 2 - sw/inc/iodetect.hxx | 4 +- sw/source/filter/ascii/parasc.cxx | 21 +++-------- sw/source/filter/basflt/iodetect.cxx | 63 ++++++++++------------------------- tools/source/stream/stream.cxx | 31 ++++++----------- 5 files changed, 40 insertions(+), 81 deletions(-)
New commits: commit 4f8f5e18075284fbd6565914d853bbe0d3c1c7a0 Author: Mike Kaganski <[email protected]> AuthorDate: Thu Feb 26 13:33:55 2026 +0500 Commit: Mike Kaganski <[email protected]> CommitDate: Sat Feb 28 09:50:02 2026 +0100 Simplify SwIoSystem::IsDetectableText to take an SvStream No need to read from a stream into a buffer, then create a stream inside the function. The stream stores the detected encoding and endianness, so no need to use some of the out arguments. Change-Id: If3c6ea3f4eed1b57f6de415f2731278791146be4 Reviewed-on: https://gerrit.libreoffice.org/c/core/+/200669 Tested-by: Jenkins Reviewed-by: Mike Kaganski <[email protected]> diff --git a/sw/inc/iodetect.hxx b/sw/inc/iodetect.hxx index 0770731aaeba..045daeb14eba 100644 --- a/sw/inc/iodetect.hxx +++ b/sw/inc/iodetect.hxx @@ -108,8 +108,8 @@ public: static bool IsValidStgFilter( SotStorage& , const SfxFilter& ); static bool IsValidStgFilter( const css::uno::Reference < css::embed::XStorage >& rStg, const SfxFilter& rFilter); - static bool IsDetectableText( const char* pBuf, sal_uLong &rLen, - rtl_TextEncoding *pCharSet, bool *pSwap, LineEnd *pLineEnd, bool *pBom); + static bool IsDetectableText(SvStream& rStream, sal_uLong nMaxBuf, + LineEnd* pLineEnd, bool* pBom); static OUString GetSubStorageName( const SfxFilter& rFltr ); }; diff --git a/sw/source/filter/ascii/parasc.cxx b/sw/source/filter/ascii/parasc.cxx index d2f42c3eaaad..1be51c40c511 100644 --- a/sw/source/filter/ascii/parasc.cxx +++ b/sw/source/filter/ascii/parasc.cxx @@ -262,7 +262,7 @@ ErrCode SwASCIIParser::ReadChars() sal_Unicode *pStart = nullptr, *pEnd = nullptr, *pLastStt = nullptr; tools::Long nReadCnt = 0, nLineLen = 0; sal_Unicode cLastCR = 0; - bool bSwapUnicode = false; + bool bSwapUnicode; const SwAsciiOptions* pUseMe = &m_rOpt; SwAsciiOptions aEmpty; @@ -271,27 +271,19 @@ ErrCode SwASCIIParser::ReadChars() && aEmpty.GetLanguage() == m_rOpt.GetLanguage() && aEmpty.GetParaFlags() == m_rOpt.GetParaFlags()) { - sal_Size nLen, nOrig; - nOrig = nLen = m_rInput.ReadBytes(m_pArr.get(), ASC_BUFFLEN); - rtl_TextEncoding eCharSet; LineEnd eLineEnd; bool bHasBom; - const bool bRet - = SwIoSystem::IsDetectableText(m_pArr.get(), nLen, &eCharSet, - &bSwapUnicode, &eLineEnd, &bHasBom); + const bool bRet = SwIoSystem::IsDetectableText(m_rInput, ASC_BUFFLEN, &eLineEnd, &bHasBom); if (!bRet) return ERRCODE_IO_BROKENPACKAGE; - OSL_ENSURE(bRet, "Autodetect of text import without nag dialog must have failed"); - if (bRet && eCharSet != RTL_TEXTENCODING_DONTKNOW) + const rtl_TextEncoding eCharSet = m_rInput.GetStreamEncoding(); + if (eCharSet != RTL_TEXTENCODING_DONTKNOW) { aEmpty.SetCharSet(eCharSet); aEmpty.SetParaFlags(eLineEnd); aEmpty.SetIncludeBOM(bHasBom); - m_rInput.SeekRel(-(tools::Long(nLen))); } - else - m_rInput.SeekRel(-(tools::Long(nOrig))); pUseMe=&aEmpty; } m_usedAsciiOptions = *pUseMe; @@ -310,9 +302,10 @@ ErrCode SwASCIIParser::ReadChars() bSwapUnicode = false; hContext = rtl_createTextToUnicodeContext( hConverter ); } - else if (pUseMe != &aEmpty) //Already successfully figured out type + else { - m_rInput.StartReadingUnicodeText(currentCharSet); + if (pUseMe != &aEmpty) // Already successfully figured out type + m_rInput.StartReadingUnicodeText(currentCharSet); bSwapUnicode = m_rInput.IsEndianSwap(); } diff --git a/sw/source/filter/basflt/iodetect.cxx b/sw/source/filter/basflt/iodetect.cxx index d136dd2ea070..73836b429ecf 100644 --- a/sw/source/filter/basflt/iodetect.cxx +++ b/sw/source/filter/basflt/iodetect.cxx @@ -240,56 +240,33 @@ std::shared_ptr<const SfxFilter> SwIoSystem::GetFileFilter(const OUString& rFile return SwIoSystem::GetFilterOfFormat(FILTER_TEXT); } -bool SwIoSystem::IsDetectableText(const char* pBuf, sal_uLong &rLen, - rtl_TextEncoding *pCharSet, bool *pSwap, LineEnd *pLineEnd, bool *pBom) +bool SwIoSystem::IsDetectableText(SvStream& rStream, sal_uLong nMaxBuf, LineEnd* pLineEnd, + bool* pBom) { - SvMemoryStream aStream(const_cast<char*>(pBuf), rLen, StreamMode::READ); - aStream.DetectEncoding(); - rtl_TextEncoding eCharSet = aStream.GetStreamEncoding(); - auto nBomSize = aStream.Tell(); - pBuf += nBomSize; - rLen -= nBomSize; + const auto nOrigPos = rStream.Tell(); + rStream.DetectEncoding(nMaxBuf); + const auto nNewPos = rStream.Tell(); + nMaxBuf -= nNewPos - nOrigPos; + const rtl_TextEncoding eCharSet = rStream.GetStreamEncoding(); bool bCR = false, bLF = false, bIsBareUnicode = false; if (eCharSet != RTL_TEXTENCODING_DONTKNOW) { - std::unique_ptr<sal_Unicode[]> aWork(new sal_Unicode[rLen+1]); - sal_Unicode *pNewBuf = aWork.get(); - std::size_t nNewLen; + OUString aCRLFBuffer; if (eCharSet != RTL_TEXTENCODING_UCS2) { - nNewLen = rLen; - rtl_TextToUnicodeConverter hConverter = - rtl_createTextToUnicodeConverter(eCharSet); - rtl_TextToUnicodeContext hContext = - rtl_createTextToUnicodeContext(hConverter); - - sal_Size nCntBytes; - sal_uInt32 nInfo; - nNewLen = rtl_convertTextToUnicode( hConverter, hContext, pBuf, - rLen, pNewBuf, nNewLen, - (RTL_TEXTTOUNICODE_FLAGS_UNDEFINED_DEFAULT | - RTL_TEXTTOUNICODE_FLAGS_MBUNDEFINED_DEFAULT | - RTL_TEXTTOUNICODE_FLAGS_INVALID_DEFAULT), &nInfo, &nCntBytes); - - rtl_destroyTextToUnicodeContext(hConverter, hContext); - rtl_destroyTextToUnicodeConverter(hConverter); + aCRLFBuffer = read_uInt8s_ToOUString(rStream, nMaxBuf, eCharSet); } else { - nNewLen = rLen/2; - memcpy(pNewBuf, pBuf, rLen); - if (aStream.IsEndianSwap()) - { - for (sal_uLong n = 0; n < nNewLen; ++n) - pNewBuf[n] = OSL_SWAPWORD(pNewBuf[n]); - } + aCRLFBuffer = read_uInt16s_ToOUString(rStream, nMaxBuf / 2); } + rStream.Seek(nNewPos); - for (sal_uLong nCnt = 0; nCnt < nNewLen; ++nCnt, ++pNewBuf) + for (sal_Int32 i = 0; i < aCRLFBuffer.getLength(); ++i) { - switch (*pNewBuf) + switch (aCRLFBuffer[i]) { case 0xA: bLF = true; @@ -304,12 +281,14 @@ bool SwIoSystem::IsDetectableText(const char* pBuf, sal_uLong &rLen, } else { - for( sal_uLong nCnt = 0; nCnt < rLen; ++nCnt, ++pBuf ) + OString aCRLFBuffer = read_uInt8s_ToOString(rStream, nMaxBuf); + rStream.Seek(nNewPos); + for (sal_Int32 i = 0; i < aCRLFBuffer.getLength(); ++i) { - switch (*pBuf) + switch (aCRLFBuffer[i]) { case 0x0: - if( nCnt + 1 < rLen && !*(pBuf+1) ) + if (i + 1 < aCRLFBuffer.getLength() && !aCRLFBuffer[i + 1]) return false; bIsBareUnicode = true; break; @@ -336,14 +315,10 @@ bool SwIoSystem::IsDetectableText(const char* pBuf, sal_uLong &rLen, else eLineEnd = bCR ? ( bLF ? LINEEND_CRLF : LINEEND_CR ) : LINEEND_LF; - if (pCharSet) - *pCharSet = eCharSet; - if (pSwap) - *pSwap = aStream.IsEndianSwap(); if (pLineEnd) *pLineEnd = eLineEnd; if (pBom) - *pBom = nBomSize != 0; + *pBom = nNewPos != nOrigPos; return !bIsBareUnicode; } commit 35871de09d3e6ca556409bfe43c0989ad2c5afd9 Author: Mike Kaganski <[email protected]> AuthorDate: Thu Feb 26 11:51:26 2026 +0500 Commit: Mike Kaganski <[email protected]> CommitDate: Sat Feb 28 09:49:55 2026 +0100 Set encoding in StartReadingUnicodeText Simplifies some code. It doesn't make sense to set endianness, but avoid setting encoding. Change-Id: I3a09fea7381b6c113075b9800b06b307cdaa6d45 Reviewed-on: https://gerrit.libreoffice.org/c/core/+/200668 Tested-by: Jenkins Reviewed-by: Mike Kaganski <[email protected]> diff --git a/basctl/source/basicide/baside2.cxx b/basctl/source/basicide/baside2.cxx index fae3c01bbc29..8699f9e16203 100644 --- a/basctl/source/basicide/baside2.cxx +++ b/basctl/source/basicide/baside2.cxx @@ -443,8 +443,6 @@ void ModulWindow::LoadBasic() GetEditEngine()->SetUpdateMode( false ); // tdf#139196 - import macros using either default or utf-8 text encoding pStream->StartReadingUnicodeText(RTL_TEXTENCODING_UTF8); - if (pStream->Tell() == 3) - pStream->SetStreamEncoding(RTL_TEXTENCODING_UTF8); GetEditView()->Read( *pStream ); GetEditEngine()->SetUpdateMode( true ); GetEditorWindow().PaintImmediately(); diff --git a/tools/source/stream/stream.cxx b/tools/source/stream/stream.cxx index b8a6530a8590..1a65357f8cf7 100644 --- a/tools/source/stream/stream.cxx +++ b/tools/source/stream/stream.cxx @@ -702,7 +702,6 @@ void SvStream::StartReadingUnicodeText(rtl_TextEncoding eReadBomEncoding) return; // nothing to read const sal_uInt64 nOldPos = Tell(); - bool bGetBack = true; unsigned char nFlag(0); ReadUChar( nFlag ); switch ( nFlag ) @@ -714,8 +713,9 @@ void SvStream::StartReadingUnicodeText(rtl_TextEncoding eReadBomEncoding) ReadUChar(nFlag); if (nFlag == 0xff) { + SetStreamEncoding(RTL_TEXTENCODING_UNICODE); SetEndian(SvStreamEndian::BIG); - bGetBack = false; + return; } } break; @@ -726,8 +726,9 @@ void SvStream::StartReadingUnicodeText(rtl_TextEncoding eReadBomEncoding) ReadUChar(nFlag); if (nFlag == 0xfe) { + SetStreamEncoding(RTL_TEXTENCODING_UNICODE); SetEndian(SvStreamEndian::LITTLE); - bGetBack = false; + return; } } break; @@ -740,15 +741,17 @@ void SvStream::StartReadingUnicodeText(rtl_TextEncoding eReadBomEncoding) { ReadUChar(nFlag); if (nFlag == 0xbf) - bGetBack = false; // it is UTF-8 + { + SetStreamEncoding(RTL_TEXTENCODING_UTF8); + return; + } } } break; default: ; // nothing } - if (bGetBack) - Seek(nOldPos); // no BOM, pure data + Seek(nOldPos); // no BOM, pure data } void SvStream::DetectEncoding(size_t maxBytes) @@ -790,20 +793,10 @@ void SvStream::DetectEncoding(size_t maxBytes) StartReadingUnicodeText(RTL_TEXTENCODING_DONTKNOW); if (!good()) return; + if (GetStreamEncoding() != RTL_TEXTENCODING_DONTKNOW) + return; // BOM detected, encoding already set - const sal_uInt64 nBomSize = Tell() - nOrigPos; - if (nBomSize == 2) - { - SetStreamEncoding(RTL_TEXTENCODING_UCS2); - return; - } - if (nBomSize == 3) - { - SetStreamEncoding(RTL_TEXTENCODING_UTF8); - return; - } - - assert(nBomSize == 0); // we are at nOrigPos + assert(Tell() - nOrigPos == 0); // we are at nOrigPos auto bytes = std::make_unique<char[]>(maxBytes); size_t nRead = ReadBytes(bytes.get(), maxBytes); Seek(nOrigPos);
