include/sfx2/objsh.hxx              |    6 
 sc/inc/scabstdlg.hxx                |    3 
 sc/qa/unit/data/csv/tdf48731.csv    |    4 
 sc/source/ui/attrdlg/scdlgfact.cxx  |    5 
 sc/source/ui/attrdlg/scdlgfact.hxx  |    3 
 sc/source/ui/dbgui/scuiasciiopt.cxx |   33 +++
 sc/source/ui/inc/scuiasciiopt.hxx   |    3 
 sc/source/ui/unoobj/filtuno.cxx     |   12 +
 sfx2/source/doc/objstor.cxx         |  327 ++++++++++++++++++++++++++++++++++++
 9 files changed, 383 insertions(+), 13 deletions(-)

New commits:
commit 2feda8ba21acdcf33a9b4ba94742f574c17839bd
Author:     Gabriel Masei <gabriel.ma...@1and1.ro>
AuthorDate: Sun Mar 17 10:13:25 2024 +0200
Commit:     Mike Kaganski <mike.kagan...@collabora.com>
CommitDate: Sat Apr 6 19:39:22 2024 +0200

    tdf#152336 Detect charset and separators for csv files
    
    Change-Id: Ie8451b3d30e390d363d8f9e5ec8bdf47350ca3a2
    Reviewed-on: https://gerrit.libreoffice.org/c/core/+/164936
    Reviewed-by: Mike Kaganski <mike.kagan...@collabora.com>
    Tested-by: Jenkins

diff --git a/include/sfx2/objsh.hxx b/include/sfx2/objsh.hxx
index 075b3e57f4b9..e26c242aaaad 100644
--- a/include/sfx2/objsh.hxx
+++ b/include/sfx2/objsh.hxx
@@ -45,6 +45,8 @@
 #include <functional>
 #include <sfx2/AccessibilityIssue.hxx>
 
+#include <unotools/ucbstreamhelper.hxx>
+
 namespace weld {class Button; }
 namespace model {class ColorSet; }
 struct NamedColor;
@@ -443,6 +445,10 @@ public:
     const css::uno::Sequence< css::beans::PropertyValue >& 
GetModifyPasswordInfo() const;
     bool                        SetModifyPasswordInfo( const 
css::uno::Sequence< css::beans::PropertyValue >& aInfo );
 
+    static void                 DetectCharSet(SvStream& stream, 
rtl_TextEncoding& eCharSet, SvStreamEndian& endian);
+    static void                 DetectCsvSeparators(SvStream& stream, 
rtl_TextEncoding& eCharSet, OUString& separators, sal_Unicode cStringDelimiter, 
bool bForceCommonSeps = true,  bool bAllowMultipleSeps = false);
+    static void                 DetectCsvFilterOptions(SvStream& stream, 
OUString& aFilterOptions, bool bForceDetect = false);
+    static void                 DetectFilterOptions(SfxMedium* pMedium, bool 
bForceDetect = false);
     static ErrCode              HandleFilter( SfxMedium* pMedium, 
SfxObjectShell const * pDoc );
 
     virtual bool                PrepareClose(bool bUI = true);
diff --git a/sc/inc/scabstdlg.hxx b/sc/inc/scabstdlg.hxx
index 7a94af5f6fe9..afd9abf4fbfc 100644
--- a/sc/inc/scabstdlg.hxx
+++ b/sc/inc/scabstdlg.hxx
@@ -417,7 +417,8 @@ public:
     virtual     VclPtr<AbstractScImportAsciiDlg> 
CreateScImportAsciiDlg(weld::Window* pParent,
                                                                     const 
OUString& aDatName,
                                                                     SvStream* 
pInStream,
-                                                                    
ScImportAsciiCall eCall) = 0;
+                                                                    
ScImportAsciiCall eCall,
+                                                                    
ScAsciiOptions* aOptions = nullptr) = 0;
 
     virtual     VclPtr<AbstractScTextImportOptionsDlg> 
CreateScTextImportOptionsDlg(weld::Window* pParent) = 0;
 
diff --git a/sc/qa/unit/data/csv/tdf48731.csv b/sc/qa/unit/data/csv/tdf48731.csv
index c0353427ced3..fdbead52112e 100644
--- a/sc/qa/unit/data/csv/tdf48731.csv
+++ b/sc/qa/unit/data/csv/tdf48731.csv
@@ -1,4 +1,4 @@
-WITHOUT QUOTES
+WITHOUT QUOTES,
 1 apostrophe,'
 2 apostrophes,''
 3 apostrophes,'''
@@ -6,7 +6,7 @@ A number,'3
 A word,'word
 A misspelled word,'mword
 
-WITH QUOTES
+WITH QUOTES,
 1 apostrophe,"'"
 2 apostrophes,"''"
 3 apostrophes,"'''"
diff --git a/sc/source/ui/attrdlg/scdlgfact.cxx 
b/sc/source/ui/attrdlg/scdlgfact.cxx
index 4fe36f303c57..03926077d9f1 100644
--- a/sc/source/ui/attrdlg/scdlgfact.cxx
+++ b/sc/source/ui/attrdlg/scdlgfact.cxx
@@ -1058,9 +1058,10 @@ const SfxItemSet* 
ScAsyncTabController_Impl::GetOutputItemSet() const
 // =========================Factories  for createdialog ===================
 VclPtr<AbstractScImportAsciiDlg> 
ScAbstractDialogFactory_Impl::CreateScImportAsciiDlg(weld::Window* pParent,
                                                     const OUString& aDatName,
-                                                    SvStream* pInStream, 
ScImportAsciiCall eCall)
+                                                    SvStream* pInStream, 
ScImportAsciiCall eCall,
+                                                    ScAsciiOptions* aOptions)
 {
-    return 
VclPtr<AbstractScImportAsciiDlg_Impl>::Create(std::make_shared<ScImportAsciiDlg>(pParent,
 aDatName,pInStream, eCall));
+    return 
VclPtr<AbstractScImportAsciiDlg_Impl>::Create(std::make_shared<ScImportAsciiDlg>(pParent,
 aDatName,pInStream, eCall, aOptions));
 }
 
 VclPtr<AbstractScTextImportOptionsDlg> 
ScAbstractDialogFactory_Impl::CreateScTextImportOptionsDlg(weld::Window* 
pParent)
diff --git a/sc/source/ui/attrdlg/scdlgfact.hxx 
b/sc/source/ui/attrdlg/scdlgfact.hxx
index 276bffd63195..4644ff4b35e2 100644
--- a/sc/source/ui/attrdlg/scdlgfact.hxx
+++ b/sc/source/ui/attrdlg/scdlgfact.hxx
@@ -663,7 +663,8 @@ public:
     virtual VclPtr<AbstractScImportAsciiDlg> 
CreateScImportAsciiDlg(weld::Window* pParent,
                                                                     const 
OUString& aDatName,
                                                                     SvStream* 
pInStream,
-                                                                    
ScImportAsciiCall eCall) override;
+                                                                    
ScImportAsciiCall eCall,
+                                                                    
ScAsciiOptions* aOptions = nullptr) override;
 
     virtual VclPtr<AbstractScTextImportOptionsDlg> 
CreateScTextImportOptionsDlg(weld::Window* pParent) override;
 
diff --git a/sc/source/ui/dbgui/scuiasciiopt.cxx 
b/sc/source/ui/dbgui/scuiasciiopt.cxx
index 601323a6586d..6cb73c803c39 100644
--- a/sc/source/ui/dbgui/scuiasciiopt.cxx
+++ b/sc/source/ui/dbgui/scuiasciiopt.cxx
@@ -316,7 +316,8 @@ static void lcl_SaveSeparators(
 }
 
 ScImportAsciiDlg::ScImportAsciiDlg(weld::Window* pParent, std::u16string_view 
aDatName,
-                                   SvStream* pInStream, ScImportAsciiCall 
eCall)
+                                   SvStream* pInStream, ScImportAsciiCall 
eCall,
+                                   const ScAsciiOptions* aOptions)
     : GenericDialogController(pParent, "modules/scalc/ui/textimportcsv.ui", 
"TextImportCsvDialog")
     , mpDatStream(pInStream)
     , mnStreamPos(pInStream ? pInStream->Tell() : 0)
@@ -385,7 +386,27 @@ ScImportAsciiDlg::ScImportAsciiDlg(weld::Window* pParent, 
std::u16string_view aD
     sal_Int32 nFromRow = 1;
     sal_Int32 nCharSet = -1;
     sal_Int32 nLanguage = 0;
-    lcl_LoadSeparators (sFieldSeparators, sTextSeparators, bMergeDelimiters,
+
+    if (aOptions)
+    {
+        if (!aOptions->GetFieldSeps().isEmpty())
+            sFieldSeparators = aOptions->GetFieldSeps();
+        if (aOptions->GetTextSep())
+            sTextSeparators = OUStringChar(aOptions->GetTextSep());
+        bMergeDelimiters = aOptions->IsMergeSeps();
+        bFixedWidth = aOptions->IsFixedLen();
+        bQuotedFieldAsText = aOptions->IsQuotedAsText();
+        bDetectSpecialNum = aOptions->IsDetectSpecialNumber();
+        bDetectScientificNum = aOptions->IsDetectScientificNumber();
+        bEvaluateFormulas = aOptions->IsEvaluateFormulas();
+        bSkipEmptyCells = aOptions->IsSkipEmptyCells();
+        bRemoveSpace = aOptions->IsRemoveSpace();
+        nFromRow = aOptions->GetStartRow();
+        nCharSet = aOptions->GetCharSet();
+        nLanguage = static_cast<sal_uInt16>(aOptions->GetLanguage());
+    }
+    else
+        lcl_LoadSeparators (sFieldSeparators, sTextSeparators, 
bMergeDelimiters,
                          bQuotedFieldAsText, bDetectSpecialNum, 
bDetectScientificNum, bFixedWidth, nFromRow,
                          nCharSet, nLanguage, bSkipEmptyCells, bRemoveSpace, 
bEvaluateFormulas, meCall);
     // load from saved settings
@@ -415,8 +436,8 @@ ScImportAsciiDlg::ScImportAsciiDlg(weld::Window* pParent, 
std::u16string_view aD
         mxNfRow->set_value(nFromRow);
 
     // Clipboard is always Unicode, else detect.
-    rtl_TextEncoding ePreselectUnicode = (meCall == SC_IMPORTFILE ?
-            RTL_TEXTENCODING_DONTKNOW : RTL_TEXTENCODING_UNICODE);
+    rtl_TextEncoding ePreselectUnicode = (aOptions ? aOptions->GetCharSet() : 
(meCall == SC_IMPORTFILE ?
+            RTL_TEXTENCODING_DONTKNOW : RTL_TEXTENCODING_UNICODE));
     // Sniff for Unicode / not
     if( ePreselectUnicode == RTL_TEXTENCODING_DONTKNOW && mpDatStream )
     {
@@ -463,7 +484,9 @@ ScImportAsciiDlg::ScImportAsciiDlg(weld::Window* pParent, 
std::u16string_view aD
         mnStreamPos = mpDatStream->Tell();
     }
 
-    if (bIsTSV)
+    if (aOptions && !maFieldSeparators.isEmpty())
+        SetSeparators(0);
+    else if (bIsTSV)
         SetSeparators('        ');
     else
     {
diff --git a/sc/source/ui/inc/scuiasciiopt.hxx 
b/sc/source/ui/inc/scuiasciiopt.hxx
index 309192477ecf..ee8ca78b221d 100644
--- a/sc/source/ui/inc/scuiasciiopt.hxx
+++ b/sc/source/ui/inc/scuiasciiopt.hxx
@@ -83,7 +83,8 @@ class ScImportAsciiDlg : public weld::GenericDialogController
 public:
                                 ScImportAsciiDlg(
                                     weld::Window* pParent, std::u16string_view 
aDatName,
-                                    SvStream* pInStream, ScImportAsciiCall 
eCall );
+                                    SvStream* pInStream, ScImportAsciiCall 
eCall,
+                                    const ScAsciiOptions* aOptions = nullptr );
                                 virtual ~ScImportAsciiDlg() override;
 
     void                        GetOptions( ScAsciiOptions& rOpt );
diff --git a/sc/source/ui/unoobj/filtuno.cxx b/sc/source/ui/unoobj/filtuno.cxx
index fa520e23aeff..b5e9421b5440 100644
--- a/sc/source/ui/unoobj/filtuno.cxx
+++ b/sc/source/ui/unoobj/filtuno.cxx
@@ -179,15 +179,25 @@ sal_Int16 SAL_CALL ScFilterOptionsObj::execute()
     {
         //  ascii import is special...
 
+        ScAsciiOptions aInOptions, *pInOptions = nullptr;
         INetURLObject aURL( aFileName );
         // tdf#132421 - don't URL encode filename for the import ASCII dialog 
title
         OUString 
aPrivDatName(aURL.GetLastName(INetURLObject::DecodeMechanism::Unambiguous));
         std::unique_ptr<SvStream> pInStream;
         if ( xInputStream.is() )
+        {
             pInStream = utl::UcbStreamHelper::CreateStream( xInputStream );
 
+            if (aFilterOptions.isEmpty())
+                aFilterOptions = "DETECT,34,DETECT,,,,,,,,,,,,";
+            SfxObjectShell::DetectCsvFilterOptions(*pInStream, aFilterOptions);
+
+            aInOptions.ReadFromString(aFilterOptions);
+            pInOptions = &aInOptions;
+        }
+
         ScopedVclPtr<AbstractScImportAsciiDlg> 
pDlg(pFact->CreateScImportAsciiDlg(Application::GetFrameWeld(xDialogParent), 
aPrivDatName,
-                                                                               
   pInStream.get(), SC_IMPORTFILE));
+                                                                               
   pInStream.get(), SC_IMPORTFILE, pInOptions));
         if ( pDlg->Execute() == RET_OK )
         {
             ScAsciiOptions aOptions;
diff --git a/sfx2/source/doc/objstor.cxx b/sfx2/source/doc/objstor.cxx
index 200ae4166ae9..ccfc41f8cd93 100644
--- a/sfx2/source/doc/objstor.cxx
+++ b/sfx2/source/doc/objstor.cxx
@@ -118,6 +118,9 @@
 #include <appbaslib.hxx>
 #include "objstor.hxx"
 #include "exoticfileloadexception.hxx"
+#include <unicode/ucsdet.h>
+#include <unicode/ucnv.h>
+#include <o3tl/string_view.hxx>
 
 using namespace ::com::sun::star;
 using namespace ::com::sun::star::container;
@@ -873,6 +876,322 @@ bool SfxObjectShell::DoLoadExternal( SfxMedium *pMed )
     return LoadExternal(*pMedium);
 }
 
+const ::std::unordered_map<std::string, rtl_TextEncoding>  mapCharSets =
+                            {{"UTF-8", RTL_TEXTENCODING_UTF8},
+                            {"UTF-16BE", RTL_TEXTENCODING_UCS2},
+                            {"UTF-16LE", RTL_TEXTENCODING_UCS2},
+                            {"UTF-32BE", RTL_TEXTENCODING_UCS4},
+                            {"UTF-32LE", RTL_TEXTENCODING_UCS4},
+                            {"Shift_JIS", RTL_TEXTENCODING_SHIFT_JIS},
+                            {"ISO-2022-JP", RTL_TEXTENCODING_ISO_2022_JP},
+                            {"ISO-2022-CN", RTL_TEXTENCODING_ISO_2022_CN},
+                            {"ISO-2022-KR", RTL_TEXTENCODING_ISO_2022_KR},
+                            {"GB18030", RTL_TEXTENCODING_GB_18030},
+                            {"Big5", RTL_TEXTENCODING_BIG5},
+                            {"EUC-JP", RTL_TEXTENCODING_EUC_JP},
+                            {"EUC-KR", RTL_TEXTENCODING_EUC_KR},
+                            {"ISO-8859-1", RTL_TEXTENCODING_ISO_8859_1},
+                            {"ISO-8859-2", RTL_TEXTENCODING_ISO_8859_2},
+                            {"ISO-8859-5", RTL_TEXTENCODING_ISO_8859_5},
+                            {"ISO-8859-6", RTL_TEXTENCODING_ISO_8859_6},
+                            {"ISO-8859-7", RTL_TEXTENCODING_ISO_8859_7},
+                            {"ISO-8859-8", RTL_TEXTENCODING_ISO_8859_8},
+                            {"ISO-8859-9", RTL_TEXTENCODING_ISO_8859_9},
+                            {"windows-1250", RTL_TEXTENCODING_MS_1250},
+                            {"windows-1251", RTL_TEXTENCODING_MS_1251},
+                            {"windows-1252", RTL_TEXTENCODING_MS_1252},
+                            {"windows-1253", RTL_TEXTENCODING_MS_1253},
+                            {"windows-1254", RTL_TEXTENCODING_MS_1254},
+                            {"windows-1255", RTL_TEXTENCODING_MS_1255},
+                            {"windows-1256", RTL_TEXTENCODING_MS_1256},
+                            {"KOI8-R", RTL_TEXTENCODING_KOI8_R}};
+
+void SfxObjectShell::DetectCharSet(SvStream& stream, rtl_TextEncoding& 
eCharSet, SvStreamEndian &endian)
+{
+    constexpr size_t buffsize = 4096;
+    sal_Int8 bytes[buffsize] = { 0 };
+    sal_uInt64 nInitPos = stream.Tell();
+    sal_Int32 nRead = stream.ReadBytes(bytes, buffsize);
+
+    stream.Seek(nInitPos);
+    eCharSet = RTL_TEXTENCODING_DONTKNOW;
+
+    if (!nRead)
+        return;
+
+    UErrorCode uerr = U_ZERO_ERROR;
+    UCharsetDetector* ucd = ucsdet_open(&uerr);
+    if (!U_SUCCESS(uerr))
+        return;
+
+    const UCharsetMatch* match = nullptr;
+    const char* pEncodingName = nullptr;
+    ucsdet_setText(ucd, reinterpret_cast<const char*>(bytes), nRead, &uerr);
+    if (U_SUCCESS(uerr))
+        match = ucsdet_detect(ucd, &uerr);
+
+    if (U_SUCCESS(uerr))
+        pEncodingName = ucsdet_getName(match, &uerr);
+
+    if (U_SUCCESS(uerr))
+    {
+        const auto it = mapCharSets.find(pEncodingName);
+        if (it != mapCharSets.end())
+            eCharSet = it->second;
+
+        if (eCharSet == RTL_TEXTENCODING_UNICODE && !strcmp("UTF-16LE", 
pEncodingName))
+            endian = SvStreamEndian::LITTLE;
+        else if (eCharSet == RTL_TEXTENCODING_UNICODE && !strcmp("UTF-16BE", 
pEncodingName))
+            endian = SvStreamEndian::BIG;
+    }
+
+    ucsdet_close(ucd);
+}
+
+void SfxObjectShell::DetectCsvSeparators(SvStream& stream, rtl_TextEncoding& 
eCharSet, OUString& separators, sal_Unicode cStringDelimiter, bool 
bForceCommonSeps, bool bAllowMultipleSeps)
+{
+    OUString sLine;
+    std::vector<std::unordered_map<sal_Unicode, sal_uInt32>> aLinesCharsCount;
+    std::unordered_map<sal_Unicode, sal_uInt32> aCharsCount;
+    std::unordered_map<sal_Unicode, std::pair<sal_uInt32, sal_uInt32>> aStats;
+    constexpr sal_uInt32 nMaxLinesToProcess = 20;
+    sal_uInt32 nLinesCount = 0;
+    OUString sInitSeps;
+    OUString sCommonSeps = ",  ;:| \/";//Sorted by importance
+    std::unordered_set<sal_Unicode> usetCommonSeps;
+    bool bIsDelimiter = false;
+    // The below two are needed to handle a "not perfect" structure.
+    sal_uInt32 nMaxLinesSameChar = 0;
+    sal_uInt32 nMinDiffs = 0xFFFFFFFF;
+    sal_uInt64 nInitPos = stream.Tell();
+
+    if (!cStringDelimiter)
+        cStringDelimiter = '\"';
+
+    if (bForceCommonSeps)
+        for (sal_Int32 nComSepIdx = sCommonSeps.getLength() - 1; nComSepIdx >= 
0; nComSepIdx --)
+            usetCommonSeps.insert(sCommonSeps[nComSepIdx]);
+    aLinesCharsCount.reserve(nMaxLinesToProcess);
+    separators = "";
+
+    stream.StartReadingUnicodeText(eCharSet);
+    while (stream.ReadUniOrByteStringLine(sLine, eCharSet) && 
aLinesCharsCount.size() < nMaxLinesToProcess)
+    {
+        if (sLine.isEmpty())
+            continue;
+
+        if (!nLinesCount)
+        {
+            if (sLine.getLength() == 5 && 
sLine.startsWithIgnoreAsciiCase("sep="))
+            {
+                separators += OUStringChar(sLine[4]);
+                break;
+            }
+            else if (sLine.getLength() == 7 && sLine[6] == '"' && 
sLine.startsWithIgnoreAsciiCase("\"sep="))
+            {
+                separators += OUStringChar(sLine[5]);
+                break;
+            }
+        }
+
+        // Count the occurrences of each character within the line.
+        // Skip strings.
+        const sal_Unicode *pEnd = sLine.getStr() + sLine.getLength();
+        for (const sal_Unicode *p = sLine.getStr(); p < pEnd; p++)
+        {
+            if (*p == cStringDelimiter)
+            {
+                bIsDelimiter = !bIsDelimiter;
+                continue;
+            }
+            if (bIsDelimiter)
+                continue;
+
+            // If restricted only to common separators then skip the rest
+            if (bForceCommonSeps && usetCommonSeps.find(*p) == 
usetCommonSeps.end())
+                continue;
+
+            auto it_elem = aCharsCount.find(*p);
+            if (it_elem == aCharsCount.cend())
+                aCharsCount.insert(std::pair<sal_uInt32, sal_uInt32>(*p, 1));
+            else
+                it_elem->second ++;
+        }
+
+        if (bIsDelimiter)
+            continue;
+
+        nLinesCount ++;
+
+        // For each character count the lines that contain it and different 
number of occurences.
+        // And the global maximum for the first statistic.
+        for (auto aCurLineChar=aCharsCount.cbegin(); aCurLineChar != 
aCharsCount.cend(); aCurLineChar++)
+        {
+            auto aCurStats = aStats.find(aCurLineChar->first);
+            if (aCurStats == aStats.cend())
+                aStats.insert(std::pair<sal_Unicode, std::pair<sal_uInt32, 
sal_uInt32>>(aCurLineChar->first, std::pair<sal_uInt32, sal_uInt32>(1, 1)));
+            else
+            {
+                aCurStats->second.first ++;// Increment number of lines that 
contain the current character
+
+                std::vector<std::unordered_map<sal_Unicode, 
sal_uInt32>>::const_iterator aPrevLineChar;
+                for (aPrevLineChar=aLinesCharsCount.cbegin(); aPrevLineChar != 
aLinesCharsCount.cend(); aPrevLineChar++)
+                {
+                    auto aPrevStats = aPrevLineChar->find(aCurLineChar->first);
+                    if (aPrevStats != aPrevLineChar->cend() && 
aPrevStats->second == aCurLineChar->second)
+                        break;
+                }
+                if (aPrevLineChar == aLinesCharsCount.cend())
+                    aCurStats->second.second ++;// Increment number of 
different number of occurences.
+
+                // Update the maximum of number of lines that contain the same 
character. This is a global value.
+                if (nMaxLinesSameChar < aCurStats->second.first)
+                    nMaxLinesSameChar = aCurStats->second.first;
+            }
+        }
+
+        aLinesCharsCount.emplace_back();
+        aLinesCharsCount[aLinesCharsCount.size() - 1].swap(aCharsCount);
+    }
+
+    // Compute the global minimum of different number of occurences.
+    // But only for characters which occur in a maximum number of lines 
(previously computed).
+    for (auto it=aStats.cbegin(); it != aStats.cend(); it++)
+        if (it->second.first == nMaxLinesSameChar && nMinDiffs > 
it->second.second)
+            nMinDiffs = it->second.second;
+
+    // Compute the initial list of separators: those with the maximum lines of 
occurence and
+    // the minimum of different number of occurences.
+    for (auto it=aStats.cbegin(); it != aStats.cend(); it++)
+        if (it->second.first == nMaxLinesSameChar && it->second.second == 
nMinDiffs)
+            sInitSeps += OUStringChar(it->first);
+
+    // If forced to most common or there are multiple separators then pick up 
only the most common by importance.
+    if (bForceCommonSeps || sInitSeps.getLength() > 1)
+    {
+        sal_Int32 nInitSepIdx;
+        sal_Int32 nComSepIdx;
+        for (nComSepIdx = 0; nComSepIdx < sCommonSeps.getLength(); 
nComSepIdx++)
+        {
+            sal_Unicode c = sCommonSeps[nComSepIdx];
+            for (nInitSepIdx = sInitSeps.getLength() - 1; nInitSepIdx >= 0; 
nInitSepIdx --)
+            {
+                if (c == sInitSeps[nInitSepIdx])
+                {
+                    separators += OUStringChar(c);
+                    break;
+                }
+            }
+
+            if (!bAllowMultipleSeps && nInitSepIdx >= 0)
+                break;
+        }
+    }
+
+    // If there are no most common separators then keep the initial list.
+    if (!bForceCommonSeps && !separators.getLength())
+    {
+        if (bAllowMultipleSeps)
+            separators = sInitSeps;
+        else
+            separators = OUStringChar(sInitSeps[0]);
+    }
+
+    stream.Seek(nInitPos);
+}
+
+void SfxObjectShell::DetectCsvFilterOptions(SvStream& stream, OUString& 
aFilterOptions, bool bForceDetect)
+{
+    rtl_TextEncoding eCharSet = RTL_TEXTENCODING_DONTKNOW;
+    std::u16string_view aSeps;
+    std::u16string_view aDelimiter;
+    std::u16string_view aCharSet;
+    std::u16string_view aRest;
+    OUString aOrigFilterOpts = aFilterOptions;
+    bool bDelimiter = false, bCharSet = false, bRest = false; // This 
indicates the presence of the token even if empty ;)
+
+    if (aFilterOptions.isEmpty() && !bForceDetect)
+        return;
+    const std::u16string_view aDetect = u"DETECT";
+    sal_Int32 nPos = 0;
+
+    // Get first three tokens as they are the only tokens that affect 
detection.
+    aSeps = o3tl::getToken(aOrigFilterOpts, 0, ',', nPos);
+    bDelimiter = (nPos >= 0);
+    if (bDelimiter)
+        aDelimiter = o3tl::getToken(aOrigFilterOpts, 0, ',', nPos);
+    bCharSet = (nPos >= 0);
+    if (bCharSet)
+        aCharSet = o3tl::getToken(aOrigFilterOpts, 0, ',', nPos);
+    bRest = (nPos >= 0);
+    if (bRest)
+        aRest = std::basic_string_view<sal_Unicode>(aOrigFilterOpts.getStr() + 
nPos, aOrigFilterOpts.getLength() - nPos);
+
+    // Detect charset
+    if (bForceDetect || aCharSet == aDetect)
+    {
+        SvStreamEndian endian;
+        DetectCharSet(stream, eCharSet, endian);
+        if (eCharSet == RTL_TEXTENCODING_UNICODE)
+            stream.SetEndian(endian);
+    }
+    else if (!aCharSet.empty())
+        eCharSet = o3tl::toInt32(aCharSet);
+
+
+    //Detect separators
+    aFilterOptions = "";
+    if (bForceDetect || aSeps == aDetect)
+    {
+        OUString separators;
+        DetectCsvSeparators(stream, eCharSet, separators, 
static_cast<sal_Unicode>(o3tl::toInt32(aDelimiter)));
+
+        sal_Int32 nLen = separators.getLength();
+        for (sal_Int32 nSep = 0; nSep < nLen; nSep ++)
+        {
+            if (nSep)
+                aFilterOptions += "/";
+            aFilterOptions += OUString::number(separators[nSep]);
+        }
+    }
+    else
+        // For now keep the provided values.
+        aFilterOptions = aSeps;
+
+    OUStringChar cComma = u',';
+    if (bDelimiter || bForceDetect)
+        aFilterOptions += cComma + aDelimiter;
+    if (bCharSet || bForceDetect)
+        aFilterOptions += cComma + (aCharSet == aDetect || bForceDetect ? 
OUString::number(eCharSet) : aCharSet);
+    if (bRest)
+        aFilterOptions += cComma + aRest;
+}
+
+void SfxObjectShell::DetectFilterOptions(SfxMedium* pMedium, bool bForceDetect)
+{
+    std::shared_ptr<const SfxFilter> pFilter = pMedium->GetFilter();
+    SfxItemSet& rSet = pMedium->GetItemSet();
+    const SfxStringItem* pOptions = rSet.GetItem(SID_FILE_FILTEROPTIONS, 
false);
+
+    // Skip if filter options are missing and the detection is not enforced
+    if (!bForceDetect && (!pFilter || !pOptions))
+        return;
+
+    if (pFilter->GetName() == "Text - txt - csv (StarCalc)")
+    {
+        css::uno::Reference< css::io::XInputStream > xInputStream = 
pMedium->GetInputStream();
+        if (!xInputStream.is())
+            return;
+        std::unique_ptr<SvStream> pInStream = 
utl::UcbStreamHelper::CreateStream(xInputStream);
+        if (!pInStream)
+            return;
+
+        OUString aFilterOptions = pOptions->GetValue();
+        DetectCsvFilterOptions(*pInStream, aFilterOptions, bForceDetect);
+        rSet.Put(SfxStringItem(SID_FILE_FILTEROPTIONS, aFilterOptions));
+    }
+}
+
 ErrCode SfxObjectShell::HandleFilter( SfxMedium* pMedium, SfxObjectShell const 
* pDoc )
 {
     ErrCode nError = ERRCODE_NONE;
@@ -880,6 +1199,14 @@ ErrCode SfxObjectShell::HandleFilter( SfxMedium* pMedium, 
SfxObjectShell const *
     const SfxStringItem* pOptions = rSet.GetItem(SID_FILE_FILTEROPTIONS, 
false);
     const SfxUnoAnyItem* pData = rSet.GetItem(SID_FILTER_DATA, false);
     const bool bTiledRendering = comphelper::LibreOfficeKit::isActive();
+
+    // Process earlier as the input could contain express detection 
instructions.
+    // This is relevant for "automatic" use case. For interactive use case the
+    // FilterOptions should not be detected here (the detection is done before 
entering
+    // interactive state). For now this is focused on CSV files.
+    DetectFilterOptions(pMedium);
+    //::sleep(30);
+
     if ( !pData && (bTiledRendering || !pOptions) )
     {
         css::uno::Reference< XMultiServiceFactory > xServiceManager = 
::comphelper::getProcessServiceFactory();

Reply via email to