This is an automated email from the ASF dual-hosted git repository.

liaoxin pushed a commit to branch master
in repository https://gitbox.apache.org/repos/asf/doris.git


The following commit(s) were added to refs/heads/master by this push:
     new 3216257096f [fix](csv reader) fix core dump when parsing csv with 
enclose (#45485)
3216257096f is described below

commit 3216257096f98b19b07c88d84ef2ab976385b1f3
Author: hui lai <lai...@selectdb.com>
AuthorDate: Wed Dec 25 09:26:44 2024 +0800

    [fix](csv reader) fix core dump when parsing csv with enclose (#45485)
---
 be/src/vec/exec/format/file_reader/new_plain_text_line_reader.cpp      | 3 ++-
 .../load_p0/broker_load/test_csv_with_enclose_and_escapeS3_load.out    | 2 ++
 .../data/load_p0/stream_load/test_csv_with_enclose_and_escape.out      | 2 ++
 .../load_p0/broker_load/test_csv_with_enclose_and_escapeS3_load.groovy | 2 +-
 .../suites/load_p0/stream_load/test_csv_with_enclose_and_escape.groovy | 2 +-
 5 files changed, 8 insertions(+), 3 deletions(-)

diff --git a/be/src/vec/exec/format/file_reader/new_plain_text_line_reader.cpp 
b/be/src/vec/exec/format/file_reader/new_plain_text_line_reader.cpp
index 9a09a90d1aa..ad86cca212b 100644
--- a/be/src/vec/exec/format/file_reader/new_plain_text_line_reader.cpp
+++ b/be/src/vec/exec/format/file_reader/new_plain_text_line_reader.cpp
@@ -173,8 +173,9 @@ void 
EncloseCsvLineReaderContext::_on_pre_match_enclose(const uint8_t* start, si
 
 void EncloseCsvLineReaderContext::_on_match_enclose(const uint8_t* start, 
size_t& len) {
     const uint8_t* curr_start = start + _idx;
+    size_t curr_len = len - _idx;
     const uint8_t* delim_pos =
-            find_col_sep_func(curr_start, _column_sep_len, 
_column_sep.c_str(), _column_sep_len);
+            find_col_sep_func(curr_start, curr_len, _column_sep.c_str(), 
_column_sep_len);
 
     if (delim_pos != nullptr) [[likely]] {
         on_col_sep_found(start, delim_pos);
diff --git 
a/regression-test/data/load_p0/broker_load/test_csv_with_enclose_and_escapeS3_load.out
 
b/regression-test/data/load_p0/broker_load/test_csv_with_enclose_and_escapeS3_load.out
index 53aea0d8f89..0c1450e35fe 100644
--- 
a/regression-test/data/load_p0/broker_load/test_csv_with_enclose_and_escapeS3_load.out
+++ 
b/regression-test/data/load_p0/broker_load/test_csv_with_enclose_and_escapeS3_load.out
@@ -9,3 +9,5 @@
 3      abc"de,fg"h     2023-07-15      i\nj,k\n"       2023-07-20:05:48:31     
ghi
 6      ab"c    2023-07-20      d"ef"   2023-07-20:05:48:31     "g"hi
 7      aaa     2023-07-20              2023-07-20:05:48:31     
+8      aaa"bbb"ccc     2023-07-20      "aa"bb  2023-07-20:05:48:31     aa"bb"
+9      aa,"bbb cc"     2023-07-20      ""aa"bb ,2023-07-20:05:48:31,"aa"bb"
diff --git 
a/regression-test/data/load_p0/stream_load/test_csv_with_enclose_and_escape.out 
b/regression-test/data/load_p0/stream_load/test_csv_with_enclose_and_escape.out
index 18b4968b7cd..5646d96230f 100644
--- 
a/regression-test/data/load_p0/stream_load/test_csv_with_enclose_and_escape.out
+++ 
b/regression-test/data/load_p0/stream_load/test_csv_with_enclose_and_escape.out
@@ -5,6 +5,8 @@
 3      abc"de,fg"h     2023-07-15      i\nj,k\n"       2023-07-20T05:48:31     
ghi
 6      ab"c    2023-07-20      d"ef"   2023-07-20T05:48:31     "g"hi
 7      aaa     2023-07-20              2023-07-20T05:48:31     
+8      aaa"bbb"ccc     2023-07-20      "aa"bb  2023-07-20T05:48:31     aa"bb"
+9      aa,"bbb \N      2023-07-20      \N      ,2023-07-20:05:48:31,"aa"bb"
 10     ab@@cd@@efg     2023-07-20      ab@@cd$$$efg    2023-07-20T05:48:31     
@@ab$$$cd$$$
 10     ab@@cd@@efg     2023-07-20      ab@@cd$$$efg    2023-07-20T05:48:31     
@@ab$$$cd$$$
 10     abc     \N      "def"   \N      "ghi"
diff --git 
a/regression-test/suites/load_p0/broker_load/test_csv_with_enclose_and_escapeS3_load.groovy
 
b/regression-test/suites/load_p0/broker_load/test_csv_with_enclose_and_escapeS3_load.groovy
index 5991a62bb79..06f48eac353 100644
--- 
a/regression-test/suites/load_p0/broker_load/test_csv_with_enclose_and_escapeS3_load.groovy
+++ 
b/regression-test/suites/load_p0/broker_load/test_csv_with_enclose_and_escapeS3_load.groovy
@@ -18,7 +18,7 @@
 
 suite("test_csv_with_enclose_and_escapeS3_load", "load_p0") {
 
-    def tableName = "test_csv_with_enclose_and_escape"
+    def tableName = "test_csv_with_enclose_and_escapeS3_load"
     def s3BucketName = getS3BucketName()
 
     sql """ DROP TABLE IF EXISTS ${tableName} """
diff --git 
a/regression-test/suites/load_p0/stream_load/test_csv_with_enclose_and_escape.groovy
 
b/regression-test/suites/load_p0/stream_load/test_csv_with_enclose_and_escape.groovy
index 7e24b4158c8..1562fa35cfd 100644
--- 
a/regression-test/suites/load_p0/stream_load/test_csv_with_enclose_and_escape.groovy
+++ 
b/regression-test/suites/load_p0/stream_load/test_csv_with_enclose_and_escape.groovy
@@ -85,7 +85,7 @@ suite("test_csv_with_enclose_and_escape", "p0") {
             result, exception, startTime, endTime ->
                 assertTrue(exception == null)
                 def json = parseJson(result)
-                assertEquals("Fail", json.Status)
+                assertEquals("Success", json.Status)
         }
     }
 


---------------------------------------------------------------------
To unsubscribe, e-mail: commits-unsubscr...@doris.apache.org
For additional commands, e-mail: commits-h...@doris.apache.org

Reply via email to