This is an automated email from the ASF dual-hosted git repository.

yiguolei pushed a commit to branch branch-2.1
in repository https://gitbox.apache.org/repos/asf/doris.git


The following commit(s) were added to refs/heads/branch-2.1 by this push:
     new 971e10a9db7 [fix](csv-reader) fix column split error when there is 
escape character (#34364)
971e10a9db7 is described below

commit 971e10a9db782c9986b20e1209468e4d7aeedf71
Author: Xin Liao <[email protected]>
AuthorDate: Thu May 2 09:26:23 2024 +0800

    [fix](csv-reader) fix column split error when there is escape character 
(#34364)
---
 be/src/vec/exec/format/file_reader/new_plain_text_line_reader.cpp | 7 +++----
 be/src/vec/exec/format/file_reader/new_plain_text_line_reader.h   | 2 ++
 2 files changed, 5 insertions(+), 4 deletions(-)

diff --git a/be/src/vec/exec/format/file_reader/new_plain_text_line_reader.cpp 
b/be/src/vec/exec/format/file_reader/new_plain_text_line_reader.cpp
index c395e52f36b..fefd5ecae67 100644
--- a/be/src/vec/exec/format/file_reader/new_plain_text_line_reader.cpp
+++ b/be/src/vec/exec/format/file_reader/new_plain_text_line_reader.cpp
@@ -143,13 +143,12 @@ void EncloseCsvLineReaderContext::_on_normal(const 
uint8_t* start, size_t& len)
 }
 
 void EncloseCsvLineReaderContext::_on_pre_match_enclose(const uint8_t* start, 
size_t& len) {
-    bool should_escape = false;
     do {
         do {
             if (start[_idx] == _escape) [[unlikely]] {
-                should_escape = !should_escape;
-            } else if (should_escape) [[unlikely]] {
-                should_escape = false;
+                _should_escape = !_should_escape;
+            } else if (_should_escape) [[unlikely]] {
+                _should_escape = false;
             } else if (start[_idx] == _enclose) [[unlikely]] {
                 _state.forward_to(ReaderState::MATCH_ENCLOSE);
                 ++_idx;
diff --git a/be/src/vec/exec/format/file_reader/new_plain_text_line_reader.h 
b/be/src/vec/exec/format/file_reader/new_plain_text_line_reader.h
index babfc13641a..0b0d9f133fa 100644
--- a/be/src/vec/exec/format/file_reader/new_plain_text_line_reader.h
+++ b/be/src/vec/exec/format/file_reader/new_plain_text_line_reader.h
@@ -135,6 +135,7 @@ public:
 
     inline void refresh_impl() {
         _idx = 0;
+        _should_escape = false;
         _result = nullptr;
         _column_sep_positions.clear();
         _state.reset();
@@ -168,6 +169,7 @@ private:
     const size_t _column_sep_len;
 
     size_t _idx = 0;
+    bool _should_escape = false;
 
     const std::string _column_sep;
     std::vector<size_t> _column_sep_positions;


---------------------------------------------------------------------
To unsubscribe, e-mail: [email protected]
For additional commands, e-mail: [email protected]

Reply via email to