This is an automated email from the ASF dual-hosted git repository.

airborne pushed a commit to branch branch-2.0
in repository https://gitbox.apache.org/repos/asf/doris.git


The following commit(s) were added to refs/heads/branch-2.0 by this push:
     new 132df2d5876 [Improvement](inverted index) Remove the check for 
inverted index file exists (#36929)
132df2d5876 is described below

commit 132df2d587692e2b4ff58a8837e57b84ddc9deec
Author: Sun Chenyang <csun5...@gmail.com>
AuthorDate: Fri Jun 28 12:10:48 2024 +0800

    [Improvement](inverted index) Remove the check for inverted index file 
exists (#36929)
    
    ## Proposed changes
    
    backport #36945
---
 be/src/clucene                                     |  2 +-
 be/src/olap/olap_common.h                          |  1 -
 .../rowset/segment_v2/inverted_index_cache.cpp     | 19 ++++----
 .../inverted_index_compound_directory.cpp          | 45 +++++++------------
 .../segment_v2/inverted_index_compound_reader.h    |  2 +-
 .../rowset/segment_v2/inverted_index_reader.cpp    | 51 +++++++++-------------
 be/src/vec/exec/scan/new_olap_scan_node.cpp        |  2 -
 be/src/vec/exec/scan/new_olap_scan_node.h          |  1 -
 be/src/vec/exec/scan/new_olap_scanner.cpp          |  2 -
 9 files changed, 48 insertions(+), 77 deletions(-)

diff --git a/be/src/clucene b/be/src/clucene
index a28adab869f..51f15724f5b 160000
--- a/be/src/clucene
+++ b/be/src/clucene
@@ -1 +1 @@
-Subproject commit a28adab869f1397aefd7c3636d977c406613617d
+Subproject commit 51f15724f5bdb7ae5f6f5e5d7072d43a5bda63f8
diff --git a/be/src/olap/olap_common.h b/be/src/olap/olap_common.h
index e5f029d2a29..811e77590f9 100644
--- a/be/src/olap/olap_common.h
+++ b/be/src/olap/olap_common.h
@@ -357,7 +357,6 @@ struct OlapReaderStatistics {
     int64_t inverted_index_query_timer = 0;
     int64_t inverted_index_query_cache_hit = 0;
     int64_t inverted_index_query_cache_miss = 0;
-    int64_t inverted_index_query_file_exists_timer = 0;
     int64_t inverted_index_query_null_bitmap_timer = 0;
     int64_t inverted_index_query_bitmap_copy_timer = 0;
     int64_t inverted_index_query_bitmap_op_timer = 0;
diff --git a/be/src/olap/rowset/segment_v2/inverted_index_cache.cpp 
b/be/src/olap/rowset/segment_v2/inverted_index_cache.cpp
index 035e28efabd..41e3b134a32 100644
--- a/be/src/olap/rowset/segment_v2/inverted_index_cache.cpp
+++ b/be/src/olap/rowset/segment_v2/inverted_index_cache.cpp
@@ -136,18 +136,17 @@ Status 
InvertedIndexSearcherCache::get_index_searcher(const io::FileSystemSPtr&
             std::unique_ptr<MemTracker>(new 
MemTracker("InvertedIndexSearcherCacheWithRead"));
 #ifndef BE_TEST
     {
-        bool exists = false;
-        {
-            SCOPED_RAW_TIMER(&stats->inverted_index_query_file_exists_timer);
-            RETURN_IF_ERROR(fs->exists(file_path, &exists));
-        }
-        if (!exists) {
-            return Status::Error<ErrorCode::INVERTED_INDEX_FILE_NOT_FOUND>(
-                    "inverted index path: {} not exist.", file_path);
-        }
         SCOPED_RAW_TIMER(&stats->inverted_index_searcher_open_timer);
         SCOPED_CONSUME_MEM_TRACKER(mem_tracker.get());
-        index_searcher = build_index_searcher(fs, index_dir, file_name);
+        try {
+            index_searcher = build_index_searcher(fs, index_dir, file_name);
+        } catch (CLuceneError& err) {
+            if (err.number() == CL_ERR_FileNotFound) {
+                return Status::Error<ErrorCode::INVERTED_INDEX_FILE_NOT_FOUND>(
+                        "inverted index path: {} not exist.", file_path);
+            }
+            throw err;
+        }
     }
 #endif
 
diff --git 
a/be/src/olap/rowset/segment_v2/inverted_index_compound_directory.cpp 
b/be/src/olap/rowset/segment_v2/inverted_index_compound_directory.cpp
index 0eb7e31a027..1c1857f2ac2 100644
--- a/be/src/olap/rowset/segment_v2/inverted_index_compound_directory.cpp
+++ b/be/src/olap/rowset/segment_v2/inverted_index_compound_directory.cpp
@@ -266,30 +266,26 @@ bool DorisCompoundDirectory::FSIndexInput::open(const 
io::FileSystemSPtr& fs, co
     io::FileBlockCachePathPolicy cache_policy;
     auto type = config::enable_file_cache ? config::file_cache_type : "";
     io::FileReaderOptions reader_options(io::cache_type_from_string(type), 
cache_policy);
-    if (!fs->open_file(fd, reader_options, &h->_reader).ok()) {
-        error.set(CL_ERR_IO, "open file error");
+    auto st = fs->open_file(fd, reader_options, &h->_reader);
+    if (st.is_not_found()) {
+        error.set(CL_ERR_FileNotFound, "File does not exist");
+    } else if (st.is_io_error()) {
+        error.set(CL_ERR_IO, "File open io error");
+    } else if (st.code() == ErrorCode::PERMISSION_DENIED) {
+        error.set(CL_ERR_IO, "File Access denied");
+    } else {
+        error.set(CL_ERR_IO, "Could not open file");
     }
 
     //Check if a valid handle was retrieved
-    if (h->_reader) {
+    if (st.ok() && h->_reader) {
         //Store the file length
         h->_length = h->_reader->size();
         h->_fpos = 0;
         ret = _CLNEW FSIndexInput(h, buffer_size);
         return true;
-
-    } else {
-        int err = errno;
-        if (err == ENOENT) {
-            error.set(CL_ERR_IO, "File does not exist");
-        } else if (err == EACCES) {
-            error.set(CL_ERR_IO, "File Access denied");
-        } else if (err == EMFILE) {
-            error.set(CL_ERR_IO, "Too many open files");
-        } else {
-            error.set(CL_ERR_IO, "Could not open file");
-        }
     }
+
     delete h->_shared_lock;
     _CLDECDELETE(h)
     return false;
@@ -532,19 +528,6 @@ void DorisCompoundDirectory::init(const 
io::FileSystemSPtr& _fs, const char* _pa
     }
 
     lucene::store::Directory::setLockFactory(lock_factory);
-
-    // It's fail checking directory existence in S3.
-    if (fs->type() == io::FileSystemType::S3) {
-        return;
-    }
-    bool exists = false;
-    LOG_AND_THROW_IF_ERROR(fs->exists(directory, &exists),
-                           "Doris compound directory init IO error");
-    if (!exists) {
-        auto e = "Doris compound directory init error: " + directory + " is 
not a directory";
-        LOG(WARNING) << e;
-        _CLTHROWA(CL_ERR_IO, e.c_str());
-    }
 }
 
 void DorisCompoundDirectory::priv_getFN(char* buffer, const char* name) const {
@@ -616,7 +599,11 @@ int64_t DorisCompoundDirectory::fileLength(const char* 
name) const {
     char buffer[CL_MAX_DIR];
     priv_getFN(buffer, name);
     int64_t size = -1;
-    LOG_AND_THROW_IF_ERROR(fs->file_size(buffer, &size), "Get file size IO 
error");
+    auto st = fs->file_size(buffer, &size);
+    if (st.is_not_found()) {
+        _CLTHROWA(CL_ERR_FileNotFound, "File does not exist");
+    }
+    LOG_AND_THROW_IF_ERROR(st, "Get file size IO error");
     return size;
 }
 
diff --git a/be/src/olap/rowset/segment_v2/inverted_index_compound_reader.h 
b/be/src/olap/rowset/segment_v2/inverted_index_compound_reader.h
index c084141c656..d9daadb2b04 100644
--- a/be/src/olap/rowset/segment_v2/inverted_index_compound_reader.h
+++ b/be/src/olap/rowset/segment_v2/inverted_index_compound_reader.h
@@ -54,7 +54,7 @@ private:
     lucene::store::RAMDirectory* ram_dir;
     std::string directory;
     std::string file_name;
-    CL_NS(store)::IndexInput* stream;
+    CL_NS(store)::IndexInput* stream = nullptr;
 
     using EntriesType =
             lucene::util::CLHashMap<char*, ReaderFileEntry*, 
lucene::util::Compare::Char,
diff --git a/be/src/olap/rowset/segment_v2/inverted_index_reader.cpp 
b/be/src/olap/rowset/segment_v2/inverted_index_reader.cpp
index 7db21a96c65..b6b8f9c0441 100644
--- a/be/src/olap/rowset/segment_v2/inverted_index_reader.cpp
+++ b/be/src/olap/rowset/segment_v2/inverted_index_reader.cpp
@@ -99,13 +99,6 @@ bool 
InvertedIndexReader::_is_match_query(InvertedIndexQueryType query_type) {
             query_type == InvertedIndexQueryType::MATCH_REGEXP_QUERY);
 }
 
-bool InvertedIndexReader::indexExists(io::Path& index_file_path) {
-    // SCOPED_RAW_TIMER(&stats->inverted_index_query_file_exists_timer);
-    bool exists = false;
-    RETURN_IF_ERROR(_fs->exists(index_file_path, &exists));
-    return exists;
-}
-
 std::unique_ptr<lucene::analysis::Analyzer> 
InvertedIndexReader::create_analyzer(
         InvertedIndexCtx* inverted_index_ctx) {
     std::unique_ptr<lucene::analysis::Analyzer> analyzer;
@@ -210,21 +203,18 @@ Status 
InvertedIndexReader::read_null_bitmap(OlapReaderStatistics* stats,
             return Status::OK();
         }
 
-        bool exists = false;
-        {
-            SCOPED_RAW_TIMER(&stats->inverted_index_query_file_exists_timer);
-            RETURN_IF_ERROR(_fs->exists(index_file_path, &exists));
-        }
-        if (!exists) {
-            LOG(WARNING) << "inverted index: " << index_file_path.native() << 
" not exist.";
-            return Status::Error<ErrorCode::INVERTED_INDEX_FILE_NOT_FOUND>(
-                    "inverted index path: {} not exist.", 
index_file_path.native());
-        }
-
         if (!dir) {
-            dir = new DorisCompoundReader(
-                    DorisCompoundDirectoryFactory::getDirectory(_fs, 
index_dir.c_str()),
-                    index_file_name.c_str(), 
config::inverted_index_read_buffer_size);
+            try {
+                dir = new DorisCompoundReader(
+                        DorisCompoundDirectoryFactory::getDirectory(_fs, 
index_dir.c_str()),
+                        index_file_name.c_str(), 
config::inverted_index_read_buffer_size);
+            } catch (CLuceneError& err) {
+                if (err.number() == CL_ERR_FileNotFound) {
+                    return 
Status::Error<ErrorCode::INVERTED_INDEX_FILE_NOT_FOUND>(
+                            "inverted index path: {} not exist.", 
index_file_path.native());
+                }
+                throw err;
+            }
             owned_dir = true;
         }
 
@@ -763,17 +753,18 @@ BkdIndexReader::BkdIndexReader(io::FileSystemSPtr fs, 
const std::string& path,
     auto index_dir = io_path.parent_path();
     auto index_file_name = 
InvertedIndexDescriptor::get_index_file_name(io_path.filename(),
                                                                         
index_meta->index_id());
-
-    // check index file existence
     auto index_file = index_dir / index_file_name;
-    if (!indexExists(index_file)) {
-        LOG(WARNING) << "bkd index: " << index_file.string() << " not exist.";
-        return;
-    }
     _file_full_path = index_file;
-    _compoundReader = std::make_unique<DorisCompoundReader>(
-            DorisCompoundDirectoryFactory::getDirectory(fs, index_dir.c_str()),
-            index_file_name.c_str(), config::inverted_index_read_buffer_size);
+    try {
+        _compoundReader = std::make_unique<DorisCompoundReader>(
+                DorisCompoundDirectoryFactory::getDirectory(fs, 
index_dir.c_str()),
+                index_file_name.c_str(), 
config::inverted_index_read_buffer_size);
+    } catch (CLuceneError& err) {
+        if (err.number() == CL_ERR_FileNotFound) {
+            LOG(WARNING) << "bkd index: " << index_file.string() << " not 
exist.";
+            return;
+        }
+    }
 }
 
 Status BkdIndexReader::new_iterator(OlapReaderStatistics* stats, RuntimeState* 
runtime_state,
diff --git a/be/src/vec/exec/scan/new_olap_scan_node.cpp 
b/be/src/vec/exec/scan/new_olap_scan_node.cpp
index 61147cc77a8..98a2371fa68 100644
--- a/be/src/vec/exec/scan/new_olap_scan_node.cpp
+++ b/be/src/vec/exec/scan/new_olap_scan_node.cpp
@@ -173,8 +173,6 @@ Status NewOlapScanNode::_init_profile() {
     _inverted_index_query_timer = ADD_TIMER(_segment_profile, 
"InvertedIndexQueryTime");
     _inverted_index_query_null_bitmap_timer =
             ADD_TIMER(_segment_profile, "InvertedIndexQueryNullBitmapTime");
-    _inverted_index_query_file_exists_timer =
-            ADD_TIMER(_segment_profile, "InvertedIndexQueryFileExistsTime");
     _inverted_index_query_bitmap_copy_timer =
             ADD_TIMER(_segment_profile, "InvertedIndexQueryBitmapCopyTime");
     _inverted_index_query_bitmap_op_timer =
diff --git a/be/src/vec/exec/scan/new_olap_scan_node.h 
b/be/src/vec/exec/scan/new_olap_scan_node.h
index 35b4290ecfa..a8e0f6dde7e 100644
--- a/be/src/vec/exec/scan/new_olap_scan_node.h
+++ b/be/src/vec/exec/scan/new_olap_scan_node.h
@@ -191,7 +191,6 @@ private:
     RuntimeProfile::Counter* _inverted_index_query_cache_hit_counter = nullptr;
     RuntimeProfile::Counter* _inverted_index_query_cache_miss_counter = 
nullptr;
     RuntimeProfile::Counter* _inverted_index_query_timer = nullptr;
-    RuntimeProfile::Counter* _inverted_index_query_file_exists_timer = nullptr;
     RuntimeProfile::Counter* _inverted_index_query_null_bitmap_timer = nullptr;
     RuntimeProfile::Counter* _inverted_index_query_bitmap_copy_timer = nullptr;
     RuntimeProfile::Counter* _inverted_index_query_bitmap_op_timer = nullptr;
diff --git a/be/src/vec/exec/scan/new_olap_scanner.cpp 
b/be/src/vec/exec/scan/new_olap_scanner.cpp
index f80802cc2f7..ddecaccd195 100644
--- a/be/src/vec/exec/scan/new_olap_scanner.cpp
+++ b/be/src/vec/exec/scan/new_olap_scanner.cpp
@@ -587,8 +587,6 @@ void NewOlapScanner::_update_counters_before_close() {
     COUNTER_UPDATE(olap_parent->_inverted_index_query_cache_miss_counter,
                    stats.inverted_index_query_cache_miss);
     COUNTER_UPDATE(olap_parent->_inverted_index_query_timer, 
stats.inverted_index_query_timer);
-    COUNTER_UPDATE(olap_parent->_inverted_index_query_file_exists_timer,
-                   stats.inverted_index_query_file_exists_timer);
     COUNTER_UPDATE(olap_parent->_inverted_index_query_null_bitmap_timer,
                    stats.inverted_index_query_null_bitmap_timer);
     COUNTER_UPDATE(olap_parent->_inverted_index_query_bitmap_copy_timer,


---------------------------------------------------------------------
To unsubscribe, e-mail: commits-unsubscr...@doris.apache.org
For additional commands, e-mail: commits-h...@doris.apache.org

Reply via email to