This is an automated email from the ASF dual-hosted git repository. liaoxin pushed a commit to branch master in repository https://gitbox.apache.org/repos/asf/doris.git
The following commit(s) were added to refs/heads/master by this push: new 3216257096f [fix](csv reader) fix core dump when parsing csv with enclose (#45485) 3216257096f is described below commit 3216257096f98b19b07c88d84ef2ab976385b1f3 Author: hui lai <lai...@selectdb.com> AuthorDate: Wed Dec 25 09:26:44 2024 +0800 [fix](csv reader) fix core dump when parsing csv with enclose (#45485) --- be/src/vec/exec/format/file_reader/new_plain_text_line_reader.cpp | 3 ++- .../load_p0/broker_load/test_csv_with_enclose_and_escapeS3_load.out | 2 ++ .../data/load_p0/stream_load/test_csv_with_enclose_and_escape.out | 2 ++ .../load_p0/broker_load/test_csv_with_enclose_and_escapeS3_load.groovy | 2 +- .../suites/load_p0/stream_load/test_csv_with_enclose_and_escape.groovy | 2 +- 5 files changed, 8 insertions(+), 3 deletions(-) diff --git a/be/src/vec/exec/format/file_reader/new_plain_text_line_reader.cpp b/be/src/vec/exec/format/file_reader/new_plain_text_line_reader.cpp index 9a09a90d1aa..ad86cca212b 100644 --- a/be/src/vec/exec/format/file_reader/new_plain_text_line_reader.cpp +++ b/be/src/vec/exec/format/file_reader/new_plain_text_line_reader.cpp @@ -173,8 +173,9 @@ void EncloseCsvLineReaderContext::_on_pre_match_enclose(const uint8_t* start, si void EncloseCsvLineReaderContext::_on_match_enclose(const uint8_t* start, size_t& len) { const uint8_t* curr_start = start + _idx; + size_t curr_len = len - _idx; const uint8_t* delim_pos = - find_col_sep_func(curr_start, _column_sep_len, _column_sep.c_str(), _column_sep_len); + find_col_sep_func(curr_start, curr_len, _column_sep.c_str(), _column_sep_len); if (delim_pos != nullptr) [[likely]] { on_col_sep_found(start, delim_pos); diff --git a/regression-test/data/load_p0/broker_load/test_csv_with_enclose_and_escapeS3_load.out b/regression-test/data/load_p0/broker_load/test_csv_with_enclose_and_escapeS3_load.out index 53aea0d8f89..0c1450e35fe 100644 --- a/regression-test/data/load_p0/broker_load/test_csv_with_enclose_and_escapeS3_load.out +++ b/regression-test/data/load_p0/broker_load/test_csv_with_enclose_and_escapeS3_load.out @@ -9,3 +9,5 @@ 3 abc"de,fg"h 2023-07-15 i\nj,k\n" 2023-07-20:05:48:31 ghi 6 ab"c 2023-07-20 d"ef" 2023-07-20:05:48:31 "g"hi 7 aaa 2023-07-20 2023-07-20:05:48:31 +8 aaa"bbb"ccc 2023-07-20 "aa"bb 2023-07-20:05:48:31 aa"bb" +9 aa,"bbb cc" 2023-07-20 ""aa"bb ,2023-07-20:05:48:31,"aa"bb" diff --git a/regression-test/data/load_p0/stream_load/test_csv_with_enclose_and_escape.out b/regression-test/data/load_p0/stream_load/test_csv_with_enclose_and_escape.out index 18b4968b7cd..5646d96230f 100644 --- a/regression-test/data/load_p0/stream_load/test_csv_with_enclose_and_escape.out +++ b/regression-test/data/load_p0/stream_load/test_csv_with_enclose_and_escape.out @@ -5,6 +5,8 @@ 3 abc"de,fg"h 2023-07-15 i\nj,k\n" 2023-07-20T05:48:31 ghi 6 ab"c 2023-07-20 d"ef" 2023-07-20T05:48:31 "g"hi 7 aaa 2023-07-20 2023-07-20T05:48:31 +8 aaa"bbb"ccc 2023-07-20 "aa"bb 2023-07-20T05:48:31 aa"bb" +9 aa,"bbb \N 2023-07-20 \N ,2023-07-20:05:48:31,"aa"bb" 10 ab@@cd@@efg 2023-07-20 ab@@cd$$$efg 2023-07-20T05:48:31 @@ab$$$cd$$$ 10 ab@@cd@@efg 2023-07-20 ab@@cd$$$efg 2023-07-20T05:48:31 @@ab$$$cd$$$ 10 abc \N "def" \N "ghi" diff --git a/regression-test/suites/load_p0/broker_load/test_csv_with_enclose_and_escapeS3_load.groovy b/regression-test/suites/load_p0/broker_load/test_csv_with_enclose_and_escapeS3_load.groovy index 5991a62bb79..06f48eac353 100644 --- a/regression-test/suites/load_p0/broker_load/test_csv_with_enclose_and_escapeS3_load.groovy +++ b/regression-test/suites/load_p0/broker_load/test_csv_with_enclose_and_escapeS3_load.groovy @@ -18,7 +18,7 @@ suite("test_csv_with_enclose_and_escapeS3_load", "load_p0") { - def tableName = "test_csv_with_enclose_and_escape" + def tableName = "test_csv_with_enclose_and_escapeS3_load" def s3BucketName = getS3BucketName() sql """ DROP TABLE IF EXISTS ${tableName} """ diff --git a/regression-test/suites/load_p0/stream_load/test_csv_with_enclose_and_escape.groovy b/regression-test/suites/load_p0/stream_load/test_csv_with_enclose_and_escape.groovy index 7e24b4158c8..1562fa35cfd 100644 --- a/regression-test/suites/load_p0/stream_load/test_csv_with_enclose_and_escape.groovy +++ b/regression-test/suites/load_p0/stream_load/test_csv_with_enclose_and_escape.groovy @@ -85,7 +85,7 @@ suite("test_csv_with_enclose_and_escape", "p0") { result, exception, startTime, endTime -> assertTrue(exception == null) def json = parseJson(result) - assertEquals("Fail", json.Status) + assertEquals("Success", json.Status) } } --------------------------------------------------------------------- To unsubscribe, e-mail: commits-unsubscr...@doris.apache.org For additional commands, e-mail: commits-h...@doris.apache.org