This is an automated email from the ASF dual-hosted git repository. yiguolei pushed a commit to branch dev-1.1.1 in repository https://gitbox.apache.org/repos/asf/doris.git
The following commit(s) were added to refs/heads/dev-1.1.1 by this push: new 2b5c3905f2 [cherry-pick][fix](load) skip empty orc file #10593 (#11039) 2b5c3905f2 is described below commit 2b5c3905f2a30db5d67fbdb7014035bea847b517 Author: Mingyu Chen <morningman....@gmail.com> AuthorDate: Wed Jul 20 17:10:19 2022 +0800 [cherry-pick][fix](load) skip empty orc file #10593 (#11039) Co-authored-by: morningman <morning...@apache.org> --- be/src/exec/orc_scanner.cpp | 9 +++++++++ 1 file changed, 9 insertions(+) diff --git a/be/src/exec/orc_scanner.cpp b/be/src/exec/orc_scanner.cpp index 8537189348..6b3384f042 100644 --- a/be/src/exec/orc_scanner.cpp +++ b/be/src/exec/orc_scanner.cpp @@ -438,6 +438,15 @@ Status ORCScanner::open_next_reader() { new ORCFileStream(file_reader.release(), range.path)); _reader = orc::createReader(std::move(inStream), _options); + // Something the upstream system(eg, hive) may create empty orc file + // which only has a header and footer, without schema. + // And if we call `_reader->createRowReader()` with selected columns, + // it will throw ParserError: Invalid column selected xx. + // So here we first check its number of rows and skip these kind of files. + if (_reader->getNumberOfRows() == 0) { + continue; + } + _total_groups = _reader->getNumberOfStripes(); _current_group = 0; _rows_of_group = 0; --------------------------------------------------------------------- To unsubscribe, e-mail: commits-unsubscr...@doris.apache.org For additional commands, e-mail: commits-h...@doris.apache.org