basctl/source/basicide/baside2.cxx   |    2 -
 sw/inc/iodetect.hxx                  |    4 +-
 sw/source/filter/ascii/parasc.cxx    |   21 +++--------
 sw/source/filter/basflt/iodetect.cxx |   63 ++++++++++-------------------------
 tools/source/stream/stream.cxx       |   31 ++++++-----------
 5 files changed, 40 insertions(+), 81 deletions(-)

New commits:
commit 4f8f5e18075284fbd6565914d853bbe0d3c1c7a0
Author:     Mike Kaganski <[email protected]>
AuthorDate: Thu Feb 26 13:33:55 2026 +0500
Commit:     Mike Kaganski <[email protected]>
CommitDate: Sat Feb 28 09:50:02 2026 +0100

    Simplify SwIoSystem::IsDetectableText to take an SvStream
    
    No need to read from a stream into a buffer, then create a stream
    inside the function. The stream stores the detected encoding and
    endianness, so no need to use some of the out arguments.
    
    Change-Id: If3c6ea3f4eed1b57f6de415f2731278791146be4
    Reviewed-on: https://gerrit.libreoffice.org/c/core/+/200669
    Tested-by: Jenkins
    Reviewed-by: Mike Kaganski <[email protected]>

diff --git a/sw/inc/iodetect.hxx b/sw/inc/iodetect.hxx
index 0770731aaeba..045daeb14eba 100644
--- a/sw/inc/iodetect.hxx
+++ b/sw/inc/iodetect.hxx
@@ -108,8 +108,8 @@ public:
     static bool IsValidStgFilter( SotStorage& , const SfxFilter& );
     static bool IsValidStgFilter( const css::uno::Reference < 
css::embed::XStorage >& rStg, const SfxFilter& rFilter);
 
-    static bool IsDetectableText( const char* pBuf, sal_uLong &rLen,
-            rtl_TextEncoding *pCharSet, bool *pSwap, LineEnd *pLineEnd, bool 
*pBom);
+    static bool IsDetectableText(SvStream& rStream, sal_uLong nMaxBuf,
+                                 LineEnd* pLineEnd, bool* pBom);
 
     static OUString GetSubStorageName( const SfxFilter& rFltr );
 };
diff --git a/sw/source/filter/ascii/parasc.cxx 
b/sw/source/filter/ascii/parasc.cxx
index d2f42c3eaaad..1be51c40c511 100644
--- a/sw/source/filter/ascii/parasc.cxx
+++ b/sw/source/filter/ascii/parasc.cxx
@@ -262,7 +262,7 @@ ErrCode SwASCIIParser::ReadChars()
     sal_Unicode *pStart = nullptr, *pEnd = nullptr, *pLastStt = nullptr;
     tools::Long nReadCnt = 0, nLineLen = 0;
     sal_Unicode cLastCR = 0;
-    bool bSwapUnicode = false;
+    bool bSwapUnicode;
 
     const SwAsciiOptions* pUseMe = &m_rOpt;
     SwAsciiOptions aEmpty;
@@ -271,27 +271,19 @@ ErrCode SwASCIIParser::ReadChars()
         && aEmpty.GetLanguage() == m_rOpt.GetLanguage()
         && aEmpty.GetParaFlags() == m_rOpt.GetParaFlags())
     {
-        sal_Size nLen, nOrig;
-        nOrig = nLen = m_rInput.ReadBytes(m_pArr.get(), ASC_BUFFLEN);
-        rtl_TextEncoding eCharSet;
         LineEnd eLineEnd;
         bool bHasBom;
-        const bool bRet
-            = SwIoSystem::IsDetectableText(m_pArr.get(), nLen, &eCharSet,
-                                            &bSwapUnicode, &eLineEnd, 
&bHasBom);
+        const bool bRet = SwIoSystem::IsDetectableText(m_rInput, ASC_BUFFLEN, 
&eLineEnd, &bHasBom);
         if (!bRet)
             return ERRCODE_IO_BROKENPACKAGE;
 
-        OSL_ENSURE(bRet, "Autodetect of text import without nag dialog must 
have failed");
-        if (bRet && eCharSet != RTL_TEXTENCODING_DONTKNOW)
+        const rtl_TextEncoding eCharSet = m_rInput.GetStreamEncoding();
+        if (eCharSet != RTL_TEXTENCODING_DONTKNOW)
         {
             aEmpty.SetCharSet(eCharSet);
             aEmpty.SetParaFlags(eLineEnd);
             aEmpty.SetIncludeBOM(bHasBom);
-            m_rInput.SeekRel(-(tools::Long(nLen)));
         }
-        else
-            m_rInput.SeekRel(-(tools::Long(nOrig)));
         pUseMe=&aEmpty;
     }
     m_usedAsciiOptions = *pUseMe;
@@ -310,9 +302,10 @@ ErrCode SwASCIIParser::ReadChars()
         bSwapUnicode = false;
         hContext = rtl_createTextToUnicodeContext( hConverter );
     }
-    else if (pUseMe != &aEmpty)  //Already successfully figured out type
+    else
     {
-        m_rInput.StartReadingUnicodeText(currentCharSet);
+        if (pUseMe != &aEmpty) // Already successfully figured out type
+            m_rInput.StartReadingUnicodeText(currentCharSet);
         bSwapUnicode = m_rInput.IsEndianSwap();
     }
 
diff --git a/sw/source/filter/basflt/iodetect.cxx 
b/sw/source/filter/basflt/iodetect.cxx
index d136dd2ea070..73836b429ecf 100644
--- a/sw/source/filter/basflt/iodetect.cxx
+++ b/sw/source/filter/basflt/iodetect.cxx
@@ -240,56 +240,33 @@ std::shared_ptr<const SfxFilter> 
SwIoSystem::GetFileFilter(const OUString& rFile
     return SwIoSystem::GetFilterOfFormat(FILTER_TEXT);
 }
 
-bool SwIoSystem::IsDetectableText(const char* pBuf, sal_uLong &rLen,
-    rtl_TextEncoding *pCharSet, bool *pSwap, LineEnd *pLineEnd, bool *pBom)
+bool SwIoSystem::IsDetectableText(SvStream& rStream, sal_uLong nMaxBuf, 
LineEnd* pLineEnd,
+                                  bool* pBom)
 {
-    SvMemoryStream aStream(const_cast<char*>(pBuf), rLen, StreamMode::READ);
-    aStream.DetectEncoding();
-    rtl_TextEncoding eCharSet = aStream.GetStreamEncoding();
-    auto nBomSize = aStream.Tell();
-    pBuf += nBomSize;
-    rLen -= nBomSize;
+    const auto nOrigPos = rStream.Tell();
+    rStream.DetectEncoding(nMaxBuf);
+    const auto nNewPos = rStream.Tell();
+    nMaxBuf -= nNewPos - nOrigPos;
 
+    const rtl_TextEncoding eCharSet = rStream.GetStreamEncoding();
     bool bCR = false, bLF = false, bIsBareUnicode = false;
 
     if (eCharSet != RTL_TEXTENCODING_DONTKNOW)
     {
-        std::unique_ptr<sal_Unicode[]> aWork(new sal_Unicode[rLen+1]);
-        sal_Unicode *pNewBuf = aWork.get();
-        std::size_t nNewLen;
+        OUString aCRLFBuffer;
         if (eCharSet != RTL_TEXTENCODING_UCS2)
         {
-            nNewLen = rLen;
-            rtl_TextToUnicodeConverter hConverter =
-                rtl_createTextToUnicodeConverter(eCharSet);
-            rtl_TextToUnicodeContext hContext =
-                rtl_createTextToUnicodeContext(hConverter);
-
-            sal_Size nCntBytes;
-            sal_uInt32 nInfo;
-            nNewLen = rtl_convertTextToUnicode( hConverter, hContext, pBuf,
-                rLen, pNewBuf, nNewLen,
-                (RTL_TEXTTOUNICODE_FLAGS_UNDEFINED_DEFAULT |
-                  RTL_TEXTTOUNICODE_FLAGS_MBUNDEFINED_DEFAULT |
-                  RTL_TEXTTOUNICODE_FLAGS_INVALID_DEFAULT), &nInfo, 
&nCntBytes);
-
-            rtl_destroyTextToUnicodeContext(hConverter, hContext);
-            rtl_destroyTextToUnicodeConverter(hConverter);
+            aCRLFBuffer = read_uInt8s_ToOUString(rStream, nMaxBuf, eCharSet);
         }
         else
         {
-            nNewLen = rLen/2;
-            memcpy(pNewBuf, pBuf, rLen);
-            if (aStream.IsEndianSwap())
-            {
-                for (sal_uLong n = 0; n < nNewLen; ++n)
-                    pNewBuf[n] = OSL_SWAPWORD(pNewBuf[n]);
-            }
+            aCRLFBuffer = read_uInt16s_ToOUString(rStream, nMaxBuf / 2);
         }
+        rStream.Seek(nNewPos);
 
-        for (sal_uLong nCnt = 0; nCnt < nNewLen; ++nCnt, ++pNewBuf)
+        for (sal_Int32 i = 0; i < aCRLFBuffer.getLength(); ++i)
         {
-            switch (*pNewBuf)
+            switch (aCRLFBuffer[i])
             {
                 case 0xA:
                     bLF = true;
@@ -304,12 +281,14 @@ bool SwIoSystem::IsDetectableText(const char* pBuf, 
sal_uLong &rLen,
     }
     else
     {
-        for( sal_uLong nCnt = 0; nCnt < rLen; ++nCnt, ++pBuf )
+        OString aCRLFBuffer = read_uInt8s_ToOString(rStream, nMaxBuf);
+        rStream.Seek(nNewPos);
+        for (sal_Int32 i = 0; i < aCRLFBuffer.getLength(); ++i)
         {
-            switch (*pBuf)
+            switch (aCRLFBuffer[i])
             {
                 case 0x0:
-                    if( nCnt + 1 < rLen && !*(pBuf+1) )
+                    if (i + 1 < aCRLFBuffer.getLength() && !aCRLFBuffer[i + 1])
                         return false;
                     bIsBareUnicode = true;
                     break;
@@ -336,14 +315,10 @@ bool SwIoSystem::IsDetectableText(const char* pBuf, 
sal_uLong &rLen,
     else
         eLineEnd = bCR ? ( bLF ? LINEEND_CRLF : LINEEND_CR ) : LINEEND_LF;
 
-    if (pCharSet)
-        *pCharSet = eCharSet;
-    if (pSwap)
-        *pSwap = aStream.IsEndianSwap();
     if (pLineEnd)
         *pLineEnd = eLineEnd;
     if (pBom)
-        *pBom = nBomSize != 0;
+        *pBom = nNewPos != nOrigPos;
 
     return !bIsBareUnicode;
 }
commit 35871de09d3e6ca556409bfe43c0989ad2c5afd9
Author:     Mike Kaganski <[email protected]>
AuthorDate: Thu Feb 26 11:51:26 2026 +0500
Commit:     Mike Kaganski <[email protected]>
CommitDate: Sat Feb 28 09:49:55 2026 +0100

    Set encoding in StartReadingUnicodeText
    
    Simplifies some code. It doesn't make sense to set endianness, but
    avoid setting encoding.
    
    Change-Id: I3a09fea7381b6c113075b9800b06b307cdaa6d45
    Reviewed-on: https://gerrit.libreoffice.org/c/core/+/200668
    Tested-by: Jenkins
    Reviewed-by: Mike Kaganski <[email protected]>

diff --git a/basctl/source/basicide/baside2.cxx 
b/basctl/source/basicide/baside2.cxx
index fae3c01bbc29..8699f9e16203 100644
--- a/basctl/source/basicide/baside2.cxx
+++ b/basctl/source/basicide/baside2.cxx
@@ -443,8 +443,6 @@ void ModulWindow::LoadBasic()
         GetEditEngine()->SetUpdateMode( false );
         // tdf#139196 - import macros using either default or utf-8 text 
encoding
         pStream->StartReadingUnicodeText(RTL_TEXTENCODING_UTF8);
-        if (pStream->Tell() == 3)
-            pStream->SetStreamEncoding(RTL_TEXTENCODING_UTF8);
         GetEditView()->Read( *pStream );
         GetEditEngine()->SetUpdateMode( true );
         GetEditorWindow().PaintImmediately();
diff --git a/tools/source/stream/stream.cxx b/tools/source/stream/stream.cxx
index b8a6530a8590..1a65357f8cf7 100644
--- a/tools/source/stream/stream.cxx
+++ b/tools/source/stream/stream.cxx
@@ -702,7 +702,6 @@ void SvStream::StartReadingUnicodeText(rtl_TextEncoding 
eReadBomEncoding)
         return;    // nothing to read
 
     const sal_uInt64 nOldPos = Tell();
-    bool bGetBack = true;
     unsigned char nFlag(0);
     ReadUChar( nFlag );
     switch ( nFlag )
@@ -714,8 +713,9 @@ void SvStream::StartReadingUnicodeText(rtl_TextEncoding 
eReadBomEncoding)
                 ReadUChar(nFlag);
                 if (nFlag == 0xff)
                 {
+                    SetStreamEncoding(RTL_TEXTENCODING_UNICODE);
                     SetEndian(SvStreamEndian::BIG);
-                    bGetBack = false;
+                    return;
                 }
             }
         break;
@@ -726,8 +726,9 @@ void SvStream::StartReadingUnicodeText(rtl_TextEncoding 
eReadBomEncoding)
                 ReadUChar(nFlag);
                 if (nFlag == 0xfe)
                 {
+                    SetStreamEncoding(RTL_TEXTENCODING_UNICODE);
                     SetEndian(SvStreamEndian::LITTLE);
-                    bGetBack = false;
+                    return;
                 }
             }
         break;
@@ -740,15 +741,17 @@ void SvStream::StartReadingUnicodeText(rtl_TextEncoding 
eReadBomEncoding)
                 {
                     ReadUChar(nFlag);
                     if (nFlag == 0xbf)
-                        bGetBack = false; // it is UTF-8
+                    {
+                        SetStreamEncoding(RTL_TEXTENCODING_UTF8);
+                        return;
+                    }
                 }
             }
         break;
         default:
             ;   // nothing
     }
-    if (bGetBack)
-        Seek(nOldPos);      // no BOM, pure data
+    Seek(nOldPos); // no BOM, pure data
 }
 
 void SvStream::DetectEncoding(size_t maxBytes)
@@ -790,20 +793,10 @@ void SvStream::DetectEncoding(size_t maxBytes)
     StartReadingUnicodeText(RTL_TEXTENCODING_DONTKNOW);
     if (!good())
         return;
+    if (GetStreamEncoding() != RTL_TEXTENCODING_DONTKNOW)
+        return; // BOM detected, encoding already set
 
-    const sal_uInt64 nBomSize = Tell() - nOrigPos;
-    if (nBomSize == 2)
-    {
-        SetStreamEncoding(RTL_TEXTENCODING_UCS2);
-        return;
-    }
-    if (nBomSize == 3)
-    {
-        SetStreamEncoding(RTL_TEXTENCODING_UTF8);
-        return;
-    }
-
-    assert(nBomSize == 0); // we are at nOrigPos
+    assert(Tell() - nOrigPos == 0); // we are at nOrigPos
     auto bytes = std::make_unique<char[]>(maxBytes);
     size_t nRead = ReadBytes(bytes.get(), maxBytes);
     Seek(nOrigPos);

Reply via email to