This is an automated email from the ASF dual-hosted git repository. airborne pushed a commit to branch branch-2.0 in repository https://gitbox.apache.org/repos/asf/doris.git
The following commit(s) were added to refs/heads/branch-2.0 by this push: new 132df2d5876 [Improvement](inverted index) Remove the check for inverted index file exists (#36929) 132df2d5876 is described below commit 132df2d587692e2b4ff58a8837e57b84ddc9deec Author: Sun Chenyang <csun5...@gmail.com> AuthorDate: Fri Jun 28 12:10:48 2024 +0800 [Improvement](inverted index) Remove the check for inverted index file exists (#36929) ## Proposed changes backport #36945 --- be/src/clucene | 2 +- be/src/olap/olap_common.h | 1 - .../rowset/segment_v2/inverted_index_cache.cpp | 19 ++++---- .../inverted_index_compound_directory.cpp | 45 +++++++------------ .../segment_v2/inverted_index_compound_reader.h | 2 +- .../rowset/segment_v2/inverted_index_reader.cpp | 51 +++++++++------------- be/src/vec/exec/scan/new_olap_scan_node.cpp | 2 - be/src/vec/exec/scan/new_olap_scan_node.h | 1 - be/src/vec/exec/scan/new_olap_scanner.cpp | 2 - 9 files changed, 48 insertions(+), 77 deletions(-) diff --git a/be/src/clucene b/be/src/clucene index a28adab869f..51f15724f5b 160000 --- a/be/src/clucene +++ b/be/src/clucene @@ -1 +1 @@ -Subproject commit a28adab869f1397aefd7c3636d977c406613617d +Subproject commit 51f15724f5bdb7ae5f6f5e5d7072d43a5bda63f8 diff --git a/be/src/olap/olap_common.h b/be/src/olap/olap_common.h index e5f029d2a29..811e77590f9 100644 --- a/be/src/olap/olap_common.h +++ b/be/src/olap/olap_common.h @@ -357,7 +357,6 @@ struct OlapReaderStatistics { int64_t inverted_index_query_timer = 0; int64_t inverted_index_query_cache_hit = 0; int64_t inverted_index_query_cache_miss = 0; - int64_t inverted_index_query_file_exists_timer = 0; int64_t inverted_index_query_null_bitmap_timer = 0; int64_t inverted_index_query_bitmap_copy_timer = 0; int64_t inverted_index_query_bitmap_op_timer = 0; diff --git a/be/src/olap/rowset/segment_v2/inverted_index_cache.cpp b/be/src/olap/rowset/segment_v2/inverted_index_cache.cpp index 035e28efabd..41e3b134a32 100644 --- a/be/src/olap/rowset/segment_v2/inverted_index_cache.cpp +++ b/be/src/olap/rowset/segment_v2/inverted_index_cache.cpp @@ -136,18 +136,17 @@ Status InvertedIndexSearcherCache::get_index_searcher(const io::FileSystemSPtr& std::unique_ptr<MemTracker>(new MemTracker("InvertedIndexSearcherCacheWithRead")); #ifndef BE_TEST { - bool exists = false; - { - SCOPED_RAW_TIMER(&stats->inverted_index_query_file_exists_timer); - RETURN_IF_ERROR(fs->exists(file_path, &exists)); - } - if (!exists) { - return Status::Error<ErrorCode::INVERTED_INDEX_FILE_NOT_FOUND>( - "inverted index path: {} not exist.", file_path); - } SCOPED_RAW_TIMER(&stats->inverted_index_searcher_open_timer); SCOPED_CONSUME_MEM_TRACKER(mem_tracker.get()); - index_searcher = build_index_searcher(fs, index_dir, file_name); + try { + index_searcher = build_index_searcher(fs, index_dir, file_name); + } catch (CLuceneError& err) { + if (err.number() == CL_ERR_FileNotFound) { + return Status::Error<ErrorCode::INVERTED_INDEX_FILE_NOT_FOUND>( + "inverted index path: {} not exist.", file_path); + } + throw err; + } } #endif diff --git a/be/src/olap/rowset/segment_v2/inverted_index_compound_directory.cpp b/be/src/olap/rowset/segment_v2/inverted_index_compound_directory.cpp index 0eb7e31a027..1c1857f2ac2 100644 --- a/be/src/olap/rowset/segment_v2/inverted_index_compound_directory.cpp +++ b/be/src/olap/rowset/segment_v2/inverted_index_compound_directory.cpp @@ -266,30 +266,26 @@ bool DorisCompoundDirectory::FSIndexInput::open(const io::FileSystemSPtr& fs, co io::FileBlockCachePathPolicy cache_policy; auto type = config::enable_file_cache ? config::file_cache_type : ""; io::FileReaderOptions reader_options(io::cache_type_from_string(type), cache_policy); - if (!fs->open_file(fd, reader_options, &h->_reader).ok()) { - error.set(CL_ERR_IO, "open file error"); + auto st = fs->open_file(fd, reader_options, &h->_reader); + if (st.is_not_found()) { + error.set(CL_ERR_FileNotFound, "File does not exist"); + } else if (st.is_io_error()) { + error.set(CL_ERR_IO, "File open io error"); + } else if (st.code() == ErrorCode::PERMISSION_DENIED) { + error.set(CL_ERR_IO, "File Access denied"); + } else { + error.set(CL_ERR_IO, "Could not open file"); } //Check if a valid handle was retrieved - if (h->_reader) { + if (st.ok() && h->_reader) { //Store the file length h->_length = h->_reader->size(); h->_fpos = 0; ret = _CLNEW FSIndexInput(h, buffer_size); return true; - - } else { - int err = errno; - if (err == ENOENT) { - error.set(CL_ERR_IO, "File does not exist"); - } else if (err == EACCES) { - error.set(CL_ERR_IO, "File Access denied"); - } else if (err == EMFILE) { - error.set(CL_ERR_IO, "Too many open files"); - } else { - error.set(CL_ERR_IO, "Could not open file"); - } } + delete h->_shared_lock; _CLDECDELETE(h) return false; @@ -532,19 +528,6 @@ void DorisCompoundDirectory::init(const io::FileSystemSPtr& _fs, const char* _pa } lucene::store::Directory::setLockFactory(lock_factory); - - // It's fail checking directory existence in S3. - if (fs->type() == io::FileSystemType::S3) { - return; - } - bool exists = false; - LOG_AND_THROW_IF_ERROR(fs->exists(directory, &exists), - "Doris compound directory init IO error"); - if (!exists) { - auto e = "Doris compound directory init error: " + directory + " is not a directory"; - LOG(WARNING) << e; - _CLTHROWA(CL_ERR_IO, e.c_str()); - } } void DorisCompoundDirectory::priv_getFN(char* buffer, const char* name) const { @@ -616,7 +599,11 @@ int64_t DorisCompoundDirectory::fileLength(const char* name) const { char buffer[CL_MAX_DIR]; priv_getFN(buffer, name); int64_t size = -1; - LOG_AND_THROW_IF_ERROR(fs->file_size(buffer, &size), "Get file size IO error"); + auto st = fs->file_size(buffer, &size); + if (st.is_not_found()) { + _CLTHROWA(CL_ERR_FileNotFound, "File does not exist"); + } + LOG_AND_THROW_IF_ERROR(st, "Get file size IO error"); return size; } diff --git a/be/src/olap/rowset/segment_v2/inverted_index_compound_reader.h b/be/src/olap/rowset/segment_v2/inverted_index_compound_reader.h index c084141c656..d9daadb2b04 100644 --- a/be/src/olap/rowset/segment_v2/inverted_index_compound_reader.h +++ b/be/src/olap/rowset/segment_v2/inverted_index_compound_reader.h @@ -54,7 +54,7 @@ private: lucene::store::RAMDirectory* ram_dir; std::string directory; std::string file_name; - CL_NS(store)::IndexInput* stream; + CL_NS(store)::IndexInput* stream = nullptr; using EntriesType = lucene::util::CLHashMap<char*, ReaderFileEntry*, lucene::util::Compare::Char, diff --git a/be/src/olap/rowset/segment_v2/inverted_index_reader.cpp b/be/src/olap/rowset/segment_v2/inverted_index_reader.cpp index 7db21a96c65..b6b8f9c0441 100644 --- a/be/src/olap/rowset/segment_v2/inverted_index_reader.cpp +++ b/be/src/olap/rowset/segment_v2/inverted_index_reader.cpp @@ -99,13 +99,6 @@ bool InvertedIndexReader::_is_match_query(InvertedIndexQueryType query_type) { query_type == InvertedIndexQueryType::MATCH_REGEXP_QUERY); } -bool InvertedIndexReader::indexExists(io::Path& index_file_path) { - // SCOPED_RAW_TIMER(&stats->inverted_index_query_file_exists_timer); - bool exists = false; - RETURN_IF_ERROR(_fs->exists(index_file_path, &exists)); - return exists; -} - std::unique_ptr<lucene::analysis::Analyzer> InvertedIndexReader::create_analyzer( InvertedIndexCtx* inverted_index_ctx) { std::unique_ptr<lucene::analysis::Analyzer> analyzer; @@ -210,21 +203,18 @@ Status InvertedIndexReader::read_null_bitmap(OlapReaderStatistics* stats, return Status::OK(); } - bool exists = false; - { - SCOPED_RAW_TIMER(&stats->inverted_index_query_file_exists_timer); - RETURN_IF_ERROR(_fs->exists(index_file_path, &exists)); - } - if (!exists) { - LOG(WARNING) << "inverted index: " << index_file_path.native() << " not exist."; - return Status::Error<ErrorCode::INVERTED_INDEX_FILE_NOT_FOUND>( - "inverted index path: {} not exist.", index_file_path.native()); - } - if (!dir) { - dir = new DorisCompoundReader( - DorisCompoundDirectoryFactory::getDirectory(_fs, index_dir.c_str()), - index_file_name.c_str(), config::inverted_index_read_buffer_size); + try { + dir = new DorisCompoundReader( + DorisCompoundDirectoryFactory::getDirectory(_fs, index_dir.c_str()), + index_file_name.c_str(), config::inverted_index_read_buffer_size); + } catch (CLuceneError& err) { + if (err.number() == CL_ERR_FileNotFound) { + return Status::Error<ErrorCode::INVERTED_INDEX_FILE_NOT_FOUND>( + "inverted index path: {} not exist.", index_file_path.native()); + } + throw err; + } owned_dir = true; } @@ -763,17 +753,18 @@ BkdIndexReader::BkdIndexReader(io::FileSystemSPtr fs, const std::string& path, auto index_dir = io_path.parent_path(); auto index_file_name = InvertedIndexDescriptor::get_index_file_name(io_path.filename(), index_meta->index_id()); - - // check index file existence auto index_file = index_dir / index_file_name; - if (!indexExists(index_file)) { - LOG(WARNING) << "bkd index: " << index_file.string() << " not exist."; - return; - } _file_full_path = index_file; - _compoundReader = std::make_unique<DorisCompoundReader>( - DorisCompoundDirectoryFactory::getDirectory(fs, index_dir.c_str()), - index_file_name.c_str(), config::inverted_index_read_buffer_size); + try { + _compoundReader = std::make_unique<DorisCompoundReader>( + DorisCompoundDirectoryFactory::getDirectory(fs, index_dir.c_str()), + index_file_name.c_str(), config::inverted_index_read_buffer_size); + } catch (CLuceneError& err) { + if (err.number() == CL_ERR_FileNotFound) { + LOG(WARNING) << "bkd index: " << index_file.string() << " not exist."; + return; + } + } } Status BkdIndexReader::new_iterator(OlapReaderStatistics* stats, RuntimeState* runtime_state, diff --git a/be/src/vec/exec/scan/new_olap_scan_node.cpp b/be/src/vec/exec/scan/new_olap_scan_node.cpp index 61147cc77a8..98a2371fa68 100644 --- a/be/src/vec/exec/scan/new_olap_scan_node.cpp +++ b/be/src/vec/exec/scan/new_olap_scan_node.cpp @@ -173,8 +173,6 @@ Status NewOlapScanNode::_init_profile() { _inverted_index_query_timer = ADD_TIMER(_segment_profile, "InvertedIndexQueryTime"); _inverted_index_query_null_bitmap_timer = ADD_TIMER(_segment_profile, "InvertedIndexQueryNullBitmapTime"); - _inverted_index_query_file_exists_timer = - ADD_TIMER(_segment_profile, "InvertedIndexQueryFileExistsTime"); _inverted_index_query_bitmap_copy_timer = ADD_TIMER(_segment_profile, "InvertedIndexQueryBitmapCopyTime"); _inverted_index_query_bitmap_op_timer = diff --git a/be/src/vec/exec/scan/new_olap_scan_node.h b/be/src/vec/exec/scan/new_olap_scan_node.h index 35b4290ecfa..a8e0f6dde7e 100644 --- a/be/src/vec/exec/scan/new_olap_scan_node.h +++ b/be/src/vec/exec/scan/new_olap_scan_node.h @@ -191,7 +191,6 @@ private: RuntimeProfile::Counter* _inverted_index_query_cache_hit_counter = nullptr; RuntimeProfile::Counter* _inverted_index_query_cache_miss_counter = nullptr; RuntimeProfile::Counter* _inverted_index_query_timer = nullptr; - RuntimeProfile::Counter* _inverted_index_query_file_exists_timer = nullptr; RuntimeProfile::Counter* _inverted_index_query_null_bitmap_timer = nullptr; RuntimeProfile::Counter* _inverted_index_query_bitmap_copy_timer = nullptr; RuntimeProfile::Counter* _inverted_index_query_bitmap_op_timer = nullptr; diff --git a/be/src/vec/exec/scan/new_olap_scanner.cpp b/be/src/vec/exec/scan/new_olap_scanner.cpp index f80802cc2f7..ddecaccd195 100644 --- a/be/src/vec/exec/scan/new_olap_scanner.cpp +++ b/be/src/vec/exec/scan/new_olap_scanner.cpp @@ -587,8 +587,6 @@ void NewOlapScanner::_update_counters_before_close() { COUNTER_UPDATE(olap_parent->_inverted_index_query_cache_miss_counter, stats.inverted_index_query_cache_miss); COUNTER_UPDATE(olap_parent->_inverted_index_query_timer, stats.inverted_index_query_timer); - COUNTER_UPDATE(olap_parent->_inverted_index_query_file_exists_timer, - stats.inverted_index_query_file_exists_timer); COUNTER_UPDATE(olap_parent->_inverted_index_query_null_bitmap_timer, stats.inverted_index_query_null_bitmap_timer); COUNTER_UPDATE(olap_parent->_inverted_index_query_bitmap_copy_timer, --------------------------------------------------------------------- To unsubscribe, e-mail: commits-unsubscr...@doris.apache.org For additional commands, e-mail: commits-h...@doris.apache.org