This is an automated email from the ASF dual-hosted git repository. damjan pushed a commit to branch trunk in repository https://gitbox.apache.org/repos/asf/openoffice.git
The following commit(s) were added to refs/heads/trunk by this push: new 7b2bc0e6bb Allow reading lines longer than 64 KiB in SvStream, and reading CSV rows and cells longer than 64 KiB in OpenOffice Base. (They are now limited to ~2 GiB). 7b2bc0e6bb is described below commit 7b2bc0e6bba2fbc38d078306fe10d875115d6c86 Author: Damjan Jovanovic <dam...@apache.org> AuthorDate: Mon Jul 22 08:06:52 2024 +0200 Allow reading lines longer than 64 KiB in SvStream, and reading CSV rows and cells longer than 64 KiB in OpenOffice Base. (They are now limited to ~2 GiB). - New member functions were added to the main/tools SvStream class to work with 32 bit ::rtl::OUString and ::rtl::OStringBuilder when reading lines. - The helper class QuotedString had to be upgraded from using the 16 bit String to the 32 bit ::rtl::OUString. - The CSV database driver was patched to use ::rtl::OUString and 32 bit indexes in various places. - Luckily, little other work was needed, as the ORowSetValue class already uses 32 bit ::rtl::OUString, and was previously converting 16 bit String to 32 bit ::rtl::OUString internally anyway. Also simplified some of the line parsing logic, and translated some German comments to English. Patch by: me --- .../source/drivers/file/quotedstring.cxx | 48 +++--- main/connectivity/source/drivers/flat/ETable.cxx | 192 ++++++++++----------- main/connectivity/source/inc/file/quotedstring.hxx | 18 +- main/connectivity/source/inc/flat/ETable.hxx | 2 +- main/tools/inc/tools/stream.hxx | 4 + main/tools/source/stream/stream.cxx | 83 +++++++++ 6 files changed, 210 insertions(+), 137 deletions(-) diff --git a/main/connectivity/source/drivers/file/quotedstring.cxx b/main/connectivity/source/drivers/file/quotedstring.cxx index 4ea452613b..366036b6f7 100644 --- a/main/connectivity/source/drivers/file/quotedstring.cxx +++ b/main/connectivity/source/drivers/file/quotedstring.cxx @@ -25,6 +25,7 @@ #include "precompiled_connectivity.hxx" #include "file/quotedstring.hxx" #include <rtl/logfile.hxx> +#include <rtl/ustrbuf.hxx> namespace connectivity { @@ -32,21 +33,21 @@ namespace connectivity //= QuotedTokenizedString //================================================================== //------------------------------------------------------------------ - xub_StrLen QuotedTokenizedString::GetTokenCount( sal_Unicode cTok, sal_Unicode cStrDel ) const + sal_Int32 QuotedTokenizedString::GetTokenCount( sal_Unicode cTok, sal_Unicode cStrDel ) const { RTL_LOGFILE_CONTEXT_AUTHOR( aLogger, "file", "ocke.jans...@sun.com", "QuotedTokenizedString::GetTokenCount" ); - const xub_StrLen nLen = m_sString.Len(); + const sal_Int32 nLen = m_sString.getLength(); if ( !nLen ) return 0; - xub_StrLen nTokCount = 1; + sal_Int32 nTokCount = 1; sal_Bool bStart = sal_True; // Stehen wir auf dem ersten Zeichen im Token? sal_Bool bInString = sal_False; // Befinden wir uns INNERHALB eines (cStrDel delimited) String? // Suche bis Stringende nach dem ersten nicht uebereinstimmenden Zeichen - for( xub_StrLen i = 0; i < nLen; ++i ) + for( sal_Int32 i = 0; i < nLen; ++i ) { - const sal_Unicode cChar = m_sString.GetChar(i); + const sal_Unicode cChar = m_sString[ i ]; if (bStart) { bStart = sal_False; @@ -63,7 +64,7 @@ namespace connectivity // Wenn jetzt das String-Delimiter-Zeichen auftritt ... if ( cChar == cStrDel ) { - if ((i+1 < nLen) && (m_sString.GetChar(i+1) == cStrDel)) + if ((i+1 < nLen) && (m_sString[ i+1 ] == cStrDel)) { // Verdoppeltes String-Delimiter-Zeichen: ++i; // kein String-Ende, naechstes Zeichen ueberlesen. @@ -91,49 +92,47 @@ namespace connectivity } //------------------------------------------------------------------ - void QuotedTokenizedString::GetTokenSpecial( String& _rStr,xub_StrLen& nStartPos, sal_Unicode cTok, sal_Unicode cStrDel ) const + void QuotedTokenizedString::GetTokenSpecial( ::rtl::OUString* _rStr,sal_Int32& nStartPos, sal_Unicode cTok, sal_Unicode cStrDel ) const { RTL_LOGFILE_CONTEXT_AUTHOR( aLogger, "file", "ocke.jans...@sun.com", "QuotedTokenizedString::GetTokenCount" ); - _rStr.Erase(); - const xub_StrLen nLen = m_sString.Len(); + *_rStr = ::rtl::OUString(); + const sal_Int32 nLen = m_sString.getLength(); if ( nLen ) { - sal_Bool bInString = (nStartPos < nLen) && (m_sString.GetChar(nStartPos) == cStrDel); // Befinden wir uns INNERHALB eines (cStrDel delimited) String? + sal_Bool bInString = (nStartPos < nLen) && (m_sString[ nStartPos ] == cStrDel); // Are we inside a (cStrDel delimited) String? - // Erstes Zeichen ein String-Delimiter? + // Is the first character a string delimiter? if (bInString ) - ++nStartPos; // dieses Zeichen ueberlesen! + ++nStartPos; // ignore this character! if ( nStartPos >= nLen ) return; - sal_Unicode* pData = _rStr.AllocBuffer( nLen - nStartPos + 1 ); - const sal_Unicode* pStart = pData; - // Suche bis Stringende nach dem ersten nicht uebereinstimmenden Zeichen - for( xub_StrLen i = nStartPos; i < nLen; ++i ) + ::rtl::OUStringBuffer buffer( nLen - nStartPos); + // Search until the end of string for the first non-matching character + for( sal_Int32 i = nStartPos; i < nLen; ++i ) { - const sal_Unicode cChar = m_sString.GetChar(i); + const sal_Unicode cChar = m_sString[ i ]; if (bInString) { // Wenn jetzt das String-Delimiter-Zeichen auftritt ... if ( cChar == cStrDel ) { - if ((i+1 < nLen) && (m_sString.GetChar(i+1) == cStrDel)) + if ((i+1 < nLen) && (m_sString[ i+1 ] == cStrDel)) { // Verdoppeltes String-Delimiter-Zeichen: // kein String-Ende, naechstes Zeichen ueberlesen. ++i; - *pData++ = m_sString.GetChar(i); // Zeichen gehoert zum Resultat-String + buffer.append( m_sString[ i ] ); // Zeichen gehoert zum Resultat-String } else { // String-Ende bInString = sal_False; - *pData = 0; } } else { - *pData++ = cChar; // Zeichen gehoert zum Resultat-String + buffer.append( cChar ); // Zeichen gehoert zum Resultat-String } } @@ -149,12 +148,11 @@ namespace connectivity } else { - *pData++ = cChar; // Zeichen gehoert zum Resultat-String + buffer.append( cChar ); // Zeichen gehoert zum Resultat-String } } - } // for( xub_StrLen i = nStartPos; i < nLen; ++i ) - *pData = 0; - _rStr.ReleaseBufferAccess(xub_StrLen(pData - pStart)); + } // for( sal_Int32 i = nStartPos; i < nLen; ++i ) + *_rStr = buffer.makeStringAndClear(); } } } diff --git a/main/connectivity/source/drivers/flat/ETable.cxx b/main/connectivity/source/drivers/flat/ETable.cxx index 0f0e576b3d..23dbb3a9fb 100644 --- a/main/connectivity/source/drivers/flat/ETable.cxx +++ b/main/connectivity/source/drivers/flat/ETable.cxx @@ -97,7 +97,7 @@ void OFlatTable::fillColumns(const ::com::sun::star::lang::Locale& _aLocale) aHeaderLine = aFirstLine; } // column count - const xub_StrLen nFieldCount = aHeaderLine.GetTokenCount(m_cFieldDelimiter,m_cStringDelimiter); + const sal_Int32 nFieldCount = aHeaderLine.GetTokenCount(m_cFieldDelimiter,m_cStringDelimiter); if(!m_aColumns.isValid()) m_aColumns = new OSQLColumns(); @@ -118,7 +118,7 @@ void OFlatTable::fillColumns(const ::com::sun::star::lang::Locale& _aLocale) // read description const sal_Unicode cDecimalDelimiter = pConnection->getDecimalDelimiter(); const sal_Unicode cThousandDelimiter = pConnection->getThousandDelimiter(); - String aColumnName; + ::rtl::OUString aColumnName; ::rtl::OUString aTypeName; ::comphelper::UStringMixEqual aCase(bCase); ::std::vector<String> aColumnNames,m_aTypeNames; @@ -127,26 +127,26 @@ void OFlatTable::fillColumns(const ::com::sun::star::lang::Locale& _aLocale) sal_Int32 nRowCount = 0; do { - xub_StrLen nStartPosHeaderLine = 0; // use for eficient way to get the tokens - xub_StrLen nStartPosFirstLine = 0; // use for eficient way to get the tokens - xub_StrLen nStartPosFirstLine2 = 0; - for (xub_StrLen i = 0; i < nFieldCount; i++) + sal_Int32 nStartPosHeaderLine = 0; // use for eficient way to get the tokens + sal_Int32 nStartPosFirstLine = 0; // use for eficient way to get the tokens + sal_Int32 nStartPosFirstLine2 = 0; + for (sal_Int32 i = 0; i < nFieldCount; i++) { if ( nRowCount == 0) { if ( bHasHeaderLine ) { - aHeaderLine.GetTokenSpecial(aColumnName,nStartPosHeaderLine,m_cFieldDelimiter,m_cStringDelimiter); - if ( !aColumnName.Len() ) + aHeaderLine.GetTokenSpecial(&aColumnName,nStartPosHeaderLine,m_cFieldDelimiter,m_cStringDelimiter); + if ( aColumnName.isEmpty() ) { - aColumnName = 'C'; + aColumnName = ::rtl::OUString::createFromAscii( "C" ); aColumnName += String::CreateFromInt32(i+1); } } else { // no column name so ... - aColumnName = 'C'; + aColumnName = ::rtl::OUString::createFromAscii( "C" ); aColumnName += String::CreateFromInt32(i+1); } aColumnNames.push_back(aColumnName); @@ -157,7 +157,7 @@ void OFlatTable::fillColumns(const ::com::sun::star::lang::Locale& _aLocale) } while(nRowCount < nMaxRowsToScan && readLine(aFirstLine,nCurPos) && !m_pFileStream->IsEof()); - for (xub_StrLen i = 0; i < nFieldCount; i++) + for (sal_Int32 i = 0; i < nFieldCount; i++) { // check if the columname already exists String aAlias(aColumnNames[i]); @@ -183,7 +183,7 @@ void OFlatTable::fillColumns(const ::com::sun::star::lang::Locale& _aLocale) } m_pFileStream->Seek(m_nStartRowFilePos); } -void OFlatTable::impl_fillColumnInfo_nothrow(QuotedTokenizedString& aFirstLine,xub_StrLen& nStartPosFirstLine,xub_StrLen& nStartPosFirstLine2 +void OFlatTable::impl_fillColumnInfo_nothrow(QuotedTokenizedString& aFirstLine,sal_Int32& nStartPosFirstLine,sal_Int32& nStartPosFirstLine2 ,sal_Int32& io_nType,sal_Int32& io_nPrecisions,sal_Int32& io_nScales,String& o_sTypeName ,const sal_Unicode cDecimalDelimiter,const sal_Unicode cThousandDelimiter,const CharClass& aCharClass) { @@ -195,26 +195,26 @@ void OFlatTable::impl_fillColumnInfo_nothrow(QuotedTokenizedString& aFirstLine,x if ( bNumeric ) { // first without fielddelimiter - String aField; - aFirstLine.GetTokenSpecial(aField,nStartPosFirstLine,m_cFieldDelimiter,'\0'); - if (aField.Len() == 0 || - (m_cStringDelimiter && m_cStringDelimiter == aField.GetChar(0))) + ::rtl::OUString aField; + aFirstLine.GetTokenSpecial(&aField,nStartPosFirstLine,m_cFieldDelimiter,'\0'); + if ( aField.isEmpty() || + (m_cStringDelimiter && m_cStringDelimiter == aField[0])) { bNumeric = sal_False; if ( m_cStringDelimiter != '\0' ) - aFirstLine.GetTokenSpecial(aField,nStartPosFirstLine2,m_cFieldDelimiter,m_cStringDelimiter); + aFirstLine.GetTokenSpecial(&aField,nStartPosFirstLine2,m_cFieldDelimiter,m_cStringDelimiter); else nStartPosFirstLine2 = nStartPosFirstLine; } else { - String aField2; + ::rtl::OUString aField2; if ( m_cStringDelimiter != '\0' ) - aFirstLine.GetTokenSpecial(aField2,nStartPosFirstLine2,m_cFieldDelimiter,m_cStringDelimiter); + aFirstLine.GetTokenSpecial(&aField2,nStartPosFirstLine2,m_cFieldDelimiter,m_cStringDelimiter); else aField2 = aField; - if (aField2.Len() == 0) + if ( aField2.isEmpty() ) { bNumeric = sal_False; } @@ -224,9 +224,9 @@ void OFlatTable::impl_fillColumnInfo_nothrow(QuotedTokenizedString& aFirstLine,x xub_StrLen nDot = 0; xub_StrLen nDecimalDelCount = 0; xub_StrLen nSpaceCount = 0; - for (xub_StrLen j = 0; j < aField2.Len(); j++) + for (sal_Int32 j = 0; j < aField2.getLength(); j++) { - const sal_Unicode c = aField2.GetChar(j); + const sal_Unicode c = aField2[j]; if ( j == nSpaceCount && m_cFieldDelimiter != 32 && c == 32 ) { ++nSpaceCount; @@ -256,10 +256,11 @@ void OFlatTable::impl_fillColumnInfo_nothrow(QuotedTokenizedString& aFirstLine,x if (bNumeric && cThousandDelimiter) { // Ist der Trenner richtig angegeben? - const String aValue = aField2.GetToken(0,cDecimalDelimiter); - for (sal_Int32 j = aValue.Len() - 4; j >= 0; j -= 4) + sal_Int32 tokenIdx = 0; + const ::rtl::OUString aValue = aField2.getToken(0,cDecimalDelimiter,tokenIdx); + for (sal_Int32 j = aValue.getLength() - 4; j >= 0; j -= 4) { - const sal_Unicode c = aValue.GetChar(static_cast<sal_uInt16>(j)); + const sal_Unicode c = aValue[j]; // nur Ziffern und Dezimalpunkt und Tausender-Trennzeichen? if (c == cThousandDelimiter && j) continue; @@ -287,20 +288,20 @@ void OFlatTable::impl_fillColumnInfo_nothrow(QuotedTokenizedString& aFirstLine,x } else if ( io_nType == DataType::DATE || io_nType == DataType::TIMESTAMP || io_nType == DataType::TIME) { - String aField; - aFirstLine.GetTokenSpecial(aField,nStartPosFirstLine,m_cFieldDelimiter,'\0'); - if (aField.Len() == 0 || - (m_cStringDelimiter && m_cStringDelimiter == aField.GetChar(0))) + ::rtl::OUString aField; + aFirstLine.GetTokenSpecial(&aField,nStartPosFirstLine,m_cFieldDelimiter,'\0'); + if ( aField.isEmpty() || + (m_cStringDelimiter && m_cStringDelimiter == aField[0])) { } else { - String aField2; + ::rtl::OUString aField2; if ( m_cStringDelimiter != '\0' ) - aFirstLine.GetTokenSpecial(aField2,nStartPosFirstLine2,m_cFieldDelimiter,m_cStringDelimiter); + aFirstLine.GetTokenSpecial(&aField2,nStartPosFirstLine2,m_cFieldDelimiter,m_cStringDelimiter); else aField2 = aField; - if (aField2.Len() ) + if ( !aField2.isEmpty() ) { try { @@ -378,21 +379,21 @@ void OFlatTable::impl_fillColumnInfo_nothrow(QuotedTokenizedString& aFirstLine,x } else { - String aField; - aFirstLine.GetTokenSpecial(aField,nStartPosFirstLine,m_cFieldDelimiter,'\0'); - if (aField.Len() == 0 || - (m_cStringDelimiter && m_cStringDelimiter == aField.GetChar(0))) + ::rtl::OUString aField; + aFirstLine.GetTokenSpecial(&aField,nStartPosFirstLine,m_cFieldDelimiter,'\0'); + if ( aField.isEmpty() || + (m_cStringDelimiter && m_cStringDelimiter == aField[0])) { if ( m_cStringDelimiter != '\0' ) - aFirstLine.GetTokenSpecial(aField,nStartPosFirstLine2,m_cFieldDelimiter,m_cStringDelimiter); + aFirstLine.GetTokenSpecial(&aField,nStartPosFirstLine2,m_cFieldDelimiter,m_cStringDelimiter); else nStartPosFirstLine2 = nStartPosFirstLine; } else { - String aField2; + ::rtl::OUString aField2; if ( m_cStringDelimiter != '\0' ) - aFirstLine.GetTokenSpecial(aField2,nStartPosFirstLine2,m_cFieldDelimiter,m_cStringDelimiter); + aFirstLine.GetTokenSpecial(&aField2,nStartPosFirstLine2,m_cFieldDelimiter,m_cStringDelimiter); } } } @@ -490,7 +491,7 @@ String OFlatTable::getEntry() // name and extension have to coincide if ( m_pConnection->matchesExtension( sExt ) ) { - if ( sExt.getLength() ) + if ( !sExt.isEmpty() ) sName = sName.replaceAt(sName.getLength()-(sExt.getLength()+1),sExt.getLength()+1,::rtl::OUString()); if ( sName == m_Name ) { @@ -616,17 +617,17 @@ sal_Bool OFlatTable::fetchRow(OValueRefRow& _rRow,const OSQLColumns & _rCols,sal const sal_Unicode cDecimalDelimiter = pConnection->getDecimalDelimiter(); const sal_Unicode cThousandDelimiter = pConnection->getThousandDelimiter(); // Felder: - xub_StrLen nStartPos = 0; - String aStr; + sal_Int32 nStartPos = 0; + ::rtl::OUString aStr; OSQLColumns::Vector::const_iterator aIter = _rCols.get().begin(); OSQLColumns::Vector::const_iterator aEnd = _rCols.get().end(); const OValueRefVector::Vector::size_type nCount = _rRow->get().size(); for (OValueRefVector::Vector::size_type i = 1; aIter != aEnd && i < nCount; ++aIter, i++) { - m_aCurrentLine.GetTokenSpecial(aStr,nStartPos,m_cFieldDelimiter,m_cStringDelimiter); + m_aCurrentLine.GetTokenSpecial(&aStr,nStartPos,m_cFieldDelimiter,m_cStringDelimiter); - if (aStr.Len() == 0) + if (aStr.isEmpty()) (_rRow->get())[i]->setNull(); else { @@ -680,7 +681,7 @@ sal_Bool OFlatTable::fetchRow(OValueRefRow& _rRow,const OSQLColumns & _rCols,sal String aStrConverted; if ( DataType::INTEGER != nType ) { - sal_Unicode* pData = aStrConverted.AllocBuffer(aStr.Len()); + sal_Unicode* pData = aStrConverted.AllocBuffer(aStr.getLength()); const sal_Unicode* pStart = pData; OSL_ENSURE(cDecimalDelimiter && nType != DataType::INTEGER || @@ -688,9 +689,9 @@ sal_Bool OFlatTable::fetchRow(OValueRefRow& _rRow,const OSQLColumns & _rCols,sal "FalscherTyp"); // In Standard-Notation (DezimalPUNKT ohne Tausender-Komma) umwandeln: - for (xub_StrLen j = 0; j < aStr.Len(); ++j) + for (sal_Int32 j = 0; j < aStr.getLength(); ++j) { - const sal_Unicode cChar = aStr.GetChar(j); + const sal_Unicode cChar = aStr[j]; if (cDecimalDelimiter && cChar == cDecimalDelimiter) *pData++ = '.'; //aStrConverted.Append( '.' ); @@ -703,7 +704,7 @@ sal_Bool OFlatTable::fetchRow(OValueRefRow& _rRow,const OSQLColumns & _rCols,sal else *pData++ = cChar; //aStrConverted.Append(cChar); - } // for (xub_StrLen j = 0; j < aStr.Len(); ++j) + } // for (sal_Int32 j = 0; j < aStr.Len(); ++j) aStrConverted.ReleaseBufferAccess(xub_StrLen(pData - pStart)); } // if ( DataType::INTEGER != nType ) else @@ -898,77 +899,64 @@ sal_Bool OFlatTable::readLine(sal_Int32& _rnCurrentPos) return readLine(m_aCurrentLine, _rnCurrentPos); } // ----------------------------------------------------------------------------- -sal_Bool OFlatTable::readLine(QuotedTokenizedString& line, sal_Int32& _rnCurrentPos) +sal_Bool OFlatTable::readLine(QuotedTokenizedString& lineOut, sal_Int32& _rnCurrentPos) { RTL_LOGFILE_CONTEXT_AUTHOR( aLogger, "flat", "ocke.jans...@sun.com", "OFlatTable::readLine" ); const rtl_TextEncoding nEncoding = m_pConnection->getTextEncoding(); - m_pFileStream->ReadByteStringLine(line,nEncoding); - if (m_pFileStream->IsEof()) - return sal_False; - - QuotedTokenizedString sLine = line; // check if the string continues on next line - xub_StrLen nLastOffset = 0; - bool isQuoted = false; + ::rtl::OUStringBuffer lineBuffer( 4096 ); + ::rtl::OUString separator = ::rtl::OUString(); + static ::rtl::OUString lf = ::rtl::OUString::createFromAscii( "\n" ); bool isFieldStarting = true; - while (sLine.Len() < STRING_MAXLEN) + bool inQuotedField = false; + sal_Bool result = sal_True; + do { - bool wasQuote = false; - const sal_Unicode *p; - p = sLine.GetString().GetBuffer(); - p += nLastOffset; + ::rtl::OUString line; + m_pFileStream->ReadByteStringLine(line,nEncoding); + if (m_pFileStream->IsEof()) + { + result = sal_False; + break; + } - while (*p) + bool wasPreviousQuote = false; + for ( sal_Int32 i = 0; i < line.getLength(); i++ ) { - if (isQuoted) + sal_Unicode ch = line[i]; + if ( ch == m_cStringDelimiter ) { - if (*p == m_cStringDelimiter) - wasQuote = !wasQuote; + if ( isFieldStarting ) + { + isFieldStarting = false; + inQuotedField = true; + } else { - if (wasQuote) - { - wasQuote = false; - isQuoted = false; - if (*p == m_cFieldDelimiter) - isFieldStarting = true; - } + wasPreviousQuote = !wasPreviousQuote; } } - else + else if ( ch == m_cFieldDelimiter ) { - if (isFieldStarting) - { - isFieldStarting = false; - if (*p == m_cStringDelimiter) - isQuoted = true; - else if (*p == m_cFieldDelimiter) - isFieldStarting = true; - } - else if (*p == m_cFieldDelimiter) + if ( !inQuotedField ) isFieldStarting = true; } - ++p; - } - - if (wasQuote) - isQuoted = false; - - if (isQuoted) - { - nLastOffset = sLine.Len(); - m_pFileStream->ReadByteStringLine(sLine,nEncoding); - if ( !m_pFileStream->IsEof() ) + else { - line.GetString().Append('\n'); - line.GetString() += sLine.GetString(); - sLine = line; + if ( wasPreviousQuote ) + inQuotedField = false; + wasPreviousQuote = false; } - else - break; } - else - break; - } + if ( wasPreviousQuote ) + inQuotedField = false; + + lineBuffer.append( separator ); + separator = lf; + lineBuffer.append( line ); + + } while ( inQuotedField ); + + lineOut = lineBuffer.makeStringAndClear(); _rnCurrentPos = m_pFileStream->Tell(); - return sal_True; + return result; } diff --git a/main/connectivity/source/inc/file/quotedstring.hxx b/main/connectivity/source/inc/file/quotedstring.hxx index e5e8321d54..1bdd61e318 100644 --- a/main/connectivity/source/inc/file/quotedstring.hxx +++ b/main/connectivity/source/inc/file/quotedstring.hxx @@ -30,21 +30,21 @@ namespace connectivity { //================================================================== - // Ableitung von String mit ueberladenen GetToken/GetTokenCount-Methoden - // Speziell fuer FLAT FILE-Format: Strings koennen gequotet sein + // Derivation of string with overloaded GetToken/GetTokenCount methods + // Especially for FLAT FILE format: Strings can be quoted //================================================================== class OOO_DLLPUBLIC_FILE QuotedTokenizedString { - String m_sString; + ::rtl::OUString m_sString; public: QuotedTokenizedString() {} - QuotedTokenizedString(const String& _sString) : m_sString(_sString){} + QuotedTokenizedString(const ::rtl::OUString& _sString) : m_sString(_sString){} - xub_StrLen GetTokenCount( sal_Unicode cTok , sal_Unicode cStrDel ) const; - void GetTokenSpecial( String& _rStr,xub_StrLen& nStartPos, sal_Unicode cTok = ';', sal_Unicode cStrDel = '\0' ) const; - inline String& GetString() { return m_sString; } - inline xub_StrLen Len() const { return m_sString.Len(); } - inline operator String&() { return m_sString; } + sal_Int32 GetTokenCount( sal_Unicode cTok , sal_Unicode cStrDel ) const; + void GetTokenSpecial( ::rtl::OUString* _rStr, sal_Int32& nStartPos, sal_Unicode cTok = ';', sal_Unicode cStrDel = '\0' ) const; + inline ::rtl::OUString& GetString() { return m_sString; } + inline sal_Int32 Len() const { return m_sString.getLength(); } + inline operator ::rtl::OUString&() { return m_sString; } }; } diff --git a/main/connectivity/source/inc/flat/ETable.hxx b/main/connectivity/source/inc/flat/ETable.hxx index c946fb4c83..0e97d7abcf 100644 --- a/main/connectivity/source/inc/flat/ETable.hxx +++ b/main/connectivity/source/inc/flat/ETable.hxx @@ -65,7 +65,7 @@ namespace connectivity sal_Bool CreateFile(const INetURLObject& aFile, sal_Bool& bCreateMemo); sal_Bool readLine(sal_Int32& _rnCurrentPos); sal_Bool readLine(QuotedTokenizedString& line, sal_Int32& _rnCurrentPos); - void impl_fillColumnInfo_nothrow(QuotedTokenizedString& aFirstLine,xub_StrLen& nStartPosFirstLine,xub_StrLen& nStartPosFirstLine2 + void impl_fillColumnInfo_nothrow(QuotedTokenizedString& aFirstLine,sal_Int32& nStartPosFirstLine,sal_Int32& nStartPosFirstLine2 ,sal_Int32& io_nType,sal_Int32& io_nPrecisions,sal_Int32& io_nScales,String& o_sTypeName ,const sal_Unicode cDecimalDelimiter,const sal_Unicode cThousandDelimiter,const CharClass& aCharClass); public: diff --git a/main/tools/inc/tools/stream.hxx b/main/tools/inc/tools/stream.hxx index 51a44306d9..6f91567c43 100644 --- a/main/tools/inc/tools/stream.hxx +++ b/main/tools/inc/tools/stream.hxx @@ -31,6 +31,8 @@ #endif #include <tools/ref.hxx> #include <tools/rtti.hxx> +#include <rtl/strbuf.hxx> +#include <rtl/ustring.hxx> class FileCopier; class StreamData; @@ -443,10 +445,12 @@ public: sal_Bool ReadCString( String& rStr, rtl_TextEncoding eToEncode ); sal_Bool ReadCString( String& rStr ) { return ReadCString( rStr, GetStreamCharSet()); } + sal_Bool ReadLine( ::rtl::OStringBuffer& rStr ); sal_Bool ReadLine( ByteString& rStr ); sal_Bool WriteLine( const ByteString& rStr ); sal_Bool WriteLines( const ByteString& rStr ); + sal_Bool ReadByteStringLine( ::rtl::OUString& rStr, rtl_TextEncoding eSrcCharSet ); sal_Bool ReadByteStringLine( String& rStr, rtl_TextEncoding eSrcCharSet ); sal_Bool ReadByteStringLine( String& rStr ) { return ReadByteStringLine( rStr, GetStreamCharSet()); } sal_Bool WriteByteStringLine( const String& rStr, rtl_TextEncoding eDestCharSet ); diff --git a/main/tools/source/stream/stream.cxx b/main/tools/source/stream/stream.cxx index 9805757e36..1b4d919770 100644 --- a/main/tools/source/stream/stream.cxx +++ b/main/tools/source/stream/stream.cxx @@ -676,6 +676,16 @@ void SvStream::ResetError() |* *************************************************************************/ +sal_Bool SvStream::ReadByteStringLine( ::rtl::OUString& rStr, rtl_TextEncoding eSrcCharSet ) +{ + sal_Bool bRet; + ::rtl::OStringBuffer stringBuffer; + + bRet = ReadLine( stringBuffer ); + rStr = ::rtl::OStringToOUString( stringBuffer.makeStringAndClear(), eSrcCharSet ); + return bRet; +} + sal_Bool SvStream::ReadByteStringLine( String& rStr, rtl_TextEncoding eSrcCharSet ) { sal_Bool bRet; @@ -686,6 +696,79 @@ sal_Bool SvStream::ReadByteStringLine( String& rStr, rtl_TextEncoding eSrcCharSe return bRet; } +sal_Bool SvStream::ReadLine( ::rtl::OStringBuffer& rStr ) +{ + sal_Char buf[256+1]; + sal_Bool bEnd = sal_False; + sal_Size nOldFilePos = Tell(); + sal_Char c = 0; + sal_Size nTotalLen = 0; + + rStr.setLength( 0 ); + while( !bEnd && !GetError() ) // !!! do not test for EOF, + // !!! because we read in blocks + { + sal_uInt16 nLen = (sal_uInt16)Read( buf, sizeof(buf)-1 ); + if ( !nLen ) + { + if ( rStr.getLength() == 0 ) + { + // the very first block read failed -> abort + bIsEof = sal_True; + return sal_False; + } + else + break; + } + + sal_uInt16 j, n; + for( j = n = 0; j < nLen ; ++j ) + { + c = buf[j]; + if ( c == '\n' || c == '\r' ) + { + bEnd = sal_True; + break; + } + // erAck 26.02.01: Old behavior was no special treatment of '\0' + // character here, but a following rStr+=c did ignore it. Is this + // really intended? Or should a '\0' better terminate a line? + // The nOldFilePos stuff wasn't correct then anyways. + if ( c ) + { + if ( n < j ) + buf[n] = c; + ++n; + } + } + if ( n ) + rStr.append( buf, n ); + nTotalLen += j; + } + + if ( !bEnd && !GetError() && rStr.getLength() ) + bEnd = sal_True; + + nOldFilePos += nTotalLen; + if( Tell() > nOldFilePos ) + nOldFilePos++; + Seek( nOldFilePos ); // seek because of the above BlockRead! + + if ( bEnd && (c=='\r' || c=='\n') ) // Special treatment of DOS files + { + char cTemp; + sal_Size nLen = Read((char*)&cTemp , sizeof(cTemp) ); + if ( nLen ) { + if( cTemp == c || (cTemp != '\n' && cTemp != '\r') ) + Seek( nOldFilePos ); + } + } + + if ( bEnd ) + bIsEof = sal_False; + return bEnd; +} + sal_Bool SvStream::ReadLine( ByteString& rStr ) { sal_Char buf[256+1];