This is an automated email from the ASF dual-hosted git repository.
dataroaring pushed a commit to branch branch-3.0
in repository https://gitbox.apache.org/repos/asf/doris.git
The following commit(s) were added to refs/heads/branch-3.0 by this push:
new ca9ebecd0d4 branch-3.0: [fix](inverted index) Clear inverted index
cache from file cache #49685 (#49738)
ca9ebecd0d4 is described below
commit ca9ebecd0d4a707b9eb01883801339836a73103b
Author: github-actions[bot]
<41898282+github-actions[bot]@users.noreply.github.com>
AuthorDate: Wed Apr 2 10:47:00 2025 +0800
branch-3.0: [fix](inverted index) Clear inverted index cache from file
cache #49685 (#49738)
Cherry-picked from #49685
Co-authored-by: zzzxl <[email protected]>
---
be/src/olap/rowset/rowset.cpp | 29 ++++++
be/src/olap/rowset/rowset.h | 2 +
.../olap/rowset/segment_v2/inverted_index_desc.cpp | 13 +++
.../olap/rowset/segment_v2/inverted_index_desc.h | 5 +
be/test/olap/rowset/beta_rowset_test.cpp | 107 +++++++++++++++++++++
.../org/apache/doris/regression/suite/Suite.groovy | 8 +-
.../tablets/test_clean_stale_rs_file_cache.groovy | 2 +-
...=> test_clean_stale_rs_index_file_cache.groovy} | 14 +--
8 files changed, 169 insertions(+), 11 deletions(-)
diff --git a/be/src/olap/rowset/rowset.cpp b/be/src/olap/rowset/rowset.cpp
index 3b86504090d..737872575ae 100644
--- a/be/src/olap/rowset/rowset.cpp
+++ b/be/src/olap/rowset/rowset.cpp
@@ -22,6 +22,7 @@
#include "common/config.h"
#include "io/cache/block_file_cache_factory.h"
#include "olap/olap_define.h"
+#include "olap/rowset/segment_v2/inverted_index_desc.h"
#include "olap/segment_loader.h"
#include "olap/tablet_schema.h"
#include "util/time.h"
@@ -128,6 +129,14 @@ void Rowset::clear_cache() {
auto* file_cache =
io::FileCacheFactory::instance()->get_by_path(file_key);
file_cache->remove_if_cached_async(file_key);
}
+
+ // inverted index
+ auto file_names = get_index_file_names();
+ for (const auto& file_name : file_names) {
+ auto file_key = io::BlockFileCache::hash(file_name);
+ auto* file_cache =
io::FileCacheFactory::instance()->get_by_path(file_key);
+ file_cache->remove_if_cached_async(file_key);
+ }
}
}
@@ -165,4 +174,24 @@ void Rowset::merge_rowset_meta(const RowsetMeta& other) {
_schema = _rowset_meta->tablet_schema();
}
+std::vector<std::string> Rowset::get_index_file_names() {
+ std::vector<std::string> file_names;
+ auto idx_version = _schema->get_inverted_index_storage_format();
+ for (int64_t seg_id = 0; seg_id < num_segments(); ++seg_id) {
+ if (idx_version == InvertedIndexStorageFormatPB::V1) {
+ for (const auto& index : _schema->inverted_indexes()) {
+ auto file_name =
segment_v2::InvertedIndexDescriptor::get_index_file_name_v1(
+ rowset_id().to_string(), seg_id, index->index_id(),
+ index->get_index_suffix());
+ file_names.emplace_back(std::move(file_name));
+ }
+ } else {
+ auto file_name =
segment_v2::InvertedIndexDescriptor::get_index_file_name_v2(
+ rowset_id().to_string(), seg_id);
+ file_names.emplace_back(std::move(file_name));
+ }
+ }
+ return file_names;
+}
+
} // namespace doris
diff --git a/be/src/olap/rowset/rowset.h b/be/src/olap/rowset/rowset.h
index be21f29888e..db6872875a5 100644
--- a/be/src/olap/rowset/rowset.h
+++ b/be/src/olap/rowset/rowset.h
@@ -312,6 +312,8 @@ public:
Result<std::string> segment_path(int64_t seg_id);
+ std::vector<std::string> get_index_file_names();
+
protected:
friend class RowsetFactory;
diff --git a/be/src/olap/rowset/segment_v2/inverted_index_desc.cpp
b/be/src/olap/rowset/segment_v2/inverted_index_desc.cpp
index e909bc1e0a9..faa8dff7a81 100644
--- a/be/src/olap/rowset/segment_v2/inverted_index_desc.cpp
+++ b/be/src/olap/rowset/segment_v2/inverted_index_desc.cpp
@@ -76,4 +76,17 @@ std::string
InvertedIndexDescriptor::get_index_file_cache_key(std::string_view i
return fmt::format("{}_{}{}", index_path_prefix, index_id, suffix);
}
+std::string InvertedIndexDescriptor::get_index_file_name_v1(const std::string&
rowset_id,
+ int64_t seg_id,
int64_t index_id,
+ std::string_view
index_path_suffix) {
+ std::string suffix =
+ index_path_suffix.empty() ? "" : std::string {"@"} +
index_path_suffix.data();
+ return fmt::format("{}_{}_{}{}{}", rowset_id, seg_id, index_id, suffix,
index_suffix);
+}
+
+std::string InvertedIndexDescriptor::get_index_file_name_v2(const std::string&
rowset_id,
+ int64_t seg_id) {
+ return fmt::format("{}_{}{}", rowset_id, seg_id, index_suffix);
+}
+
} // namespace doris::segment_v2
\ No newline at end of file
diff --git a/be/src/olap/rowset/segment_v2/inverted_index_desc.h
b/be/src/olap/rowset/segment_v2/inverted_index_desc.h
index f421c7f3790..23c85bcf697 100644
--- a/be/src/olap/rowset/segment_v2/inverted_index_desc.h
+++ b/be/src/olap/rowset/segment_v2/inverted_index_desc.h
@@ -50,6 +50,11 @@ public:
int64_t index_id,
std::string_view
index_path_suffix);
+ static std::string get_index_file_name_v1(const std::string& rowset_id,
int64_t seg_id,
+ int64_t index_id,
std::string_view index_path_suffix);
+
+ static std::string get_index_file_name_v2(const std::string& rowset_id,
int64_t seg_id);
+
static const char* get_temporary_null_bitmap_file_name() { return
"null_bitmap"; }
static const char* get_temporary_bkd_index_data_file_name() { return
"bkd"; }
static const char* get_temporary_bkd_index_meta_file_name() { return
"bkd_meta"; }
diff --git a/be/test/olap/rowset/beta_rowset_test.cpp
b/be/test/olap/rowset/beta_rowset_test.cpp
index 2e13436b3d3..a1e6a0ec633 100644
--- a/be/test/olap/rowset/beta_rowset_test.cpp
+++ b/be/test/olap/rowset/beta_rowset_test.cpp
@@ -27,6 +27,7 @@
#include <gen_cpp/olap_common.pb.h>
#include <gtest/gtest-message.h>
#include <gtest/gtest-test-part.h>
+#include <gtest/gtest.h>
#include <stdint.h>
#include <unistd.h>
@@ -44,6 +45,7 @@
#include "io/fs/local_file_system.h"
#include "io/fs/s3_file_system.h"
#include "io/fs/s3_obj_storage_client.h"
+#include "json2pb/json_to_pb.h"
#include "olap/data_dir.h"
#include "olap/olap_common.h"
#include "olap/options.h"
@@ -170,6 +172,61 @@ protected:
EXPECT_EQ(Status::OK(), s);
}
+ void init_rs_meta(RowsetMetaSharedPtr& pb1, int64_t start, int64_t end) {
+ std::string json_rowset_meta = R"({
+ "rowset_id": 540085,
+ "tablet_id": 15674,
+ "partition_id": 10000,
+ "txn_id": 4045,
+ "tablet_schema_hash": 567997588,
+ "rowset_type": "BETA_ROWSET",
+ "rowset_state": "VISIBLE",
+ "start_version": 2,
+ "end_version": 2,
+ "num_rows": 3929,
+ "total_disk_size": 84699,
+ "data_disk_size": 84464,
+ "index_disk_size": 235,
+ "empty": false,
+ "load_id": {
+ "hi": -5350970832824939812,
+ "lo": -6717994719194512122
+ },
+ "creation_time": 1553765670
+ })";
+
+ RowsetMetaPB rowset_meta_pb;
+ json2pb::JsonToProtoMessage(json_rowset_meta, &rowset_meta_pb);
+ rowset_meta_pb.set_start_version(start);
+ rowset_meta_pb.set_end_version(end);
+ rowset_meta_pb.set_creation_time(10000);
+
+ pb1->init_from_pb(rowset_meta_pb);
+ }
+
+ void construct_column(ColumnPB* column_pb, TabletIndexPB* tablet_index,
int64_t index_id,
+ const std::string& index_name, int32_t col_unique_id,
+ const std::string& column_type, const std::string&
column_name,
+ const std::map<std::string, std::string>& properties
=
+ std::map<std::string, std::string>(),
+ bool is_key = false) {
+ column_pb->set_unique_id(col_unique_id);
+ column_pb->set_name(column_name);
+ column_pb->set_type(column_type);
+ column_pb->set_is_key(is_key);
+ column_pb->set_is_nullable(true);
+ tablet_index->set_index_id(index_id);
+ tablet_index->set_index_name(index_name);
+ tablet_index->set_index_type(IndexType::INVERTED);
+ tablet_index->add_col_unique_id(col_unique_id);
+ if (!properties.empty()) {
+ auto* pros = tablet_index->mutable_properties();
+ for (const auto& [key, value] : properties) {
+ (*pros)[key] = value;
+ }
+ }
+ }
+
private:
std::unique_ptr<DataDir> _data_dir;
};
@@ -304,4 +361,54 @@ TEST_F(BetaRowsetTest, AddToBinlogTest) {
ASSERT_TRUE(s.ok()) << "second add_to_binlog(): " << s;
}
+TEST_F(BetaRowsetTest, GetIndexFileNames) {
+ // v1
+ {
+ TabletSchemaPB schema_pb;
+ schema_pb.set_keys_type(KeysType::DUP_KEYS);
+
schema_pb.set_inverted_index_storage_format(InvertedIndexStorageFormatPB::V2);
+ construct_column(schema_pb.add_column(), schema_pb.add_index(), 10000,
"key_index", 0,
+ "INT", "key");
+ construct_column(schema_pb.add_column(), schema_pb.add_index(), 10001,
"v1_index", 1,
+ "STRING", "v1");
+
schema_pb.set_inverted_index_storage_format(InvertedIndexStorageFormatPB::V1);
+ auto tablet_schema = std::make_shared<TabletSchema>();
+ tablet_schema->init_from_pb(schema_pb);
+
+ auto rowset_meta = std::make_shared<RowsetMeta>();
+ init_rs_meta(rowset_meta, 1, 1);
+ rowset_meta->set_num_segments(2);
+
+ BetaRowset rowset(tablet_schema, rowset_meta, "");
+ auto file_names = rowset.get_index_file_names();
+ ASSERT_EQ(file_names[0], "540085_0_10000.idx");
+ ASSERT_EQ(file_names[1], "540085_0_10001.idx");
+ ASSERT_EQ(file_names[2], "540085_1_10000.idx");
+ ASSERT_EQ(file_names[3], "540085_1_10001.idx");
+ }
+
+ // v2
+ {
+ TabletSchemaPB schema_pb;
+ schema_pb.set_keys_type(KeysType::DUP_KEYS);
+
schema_pb.set_inverted_index_storage_format(InvertedIndexStorageFormatPB::V2);
+ construct_column(schema_pb.add_column(), schema_pb.add_index(), 10000,
"key_index", 0,
+ "INT", "key");
+ construct_column(schema_pb.add_column(), schema_pb.add_index(), 10001,
"v1_index", 1,
+ "STRING", "v1");
+
schema_pb.set_inverted_index_storage_format(InvertedIndexStorageFormatPB::V2);
+ auto tablet_schema = std::make_shared<TabletSchema>();
+ tablet_schema->init_from_pb(schema_pb);
+
+ auto rowset_meta = std::make_shared<RowsetMeta>();
+ init_rs_meta(rowset_meta, 1, 1);
+ rowset_meta->set_num_segments(2);
+
+ BetaRowset rowset(tablet_schema, rowset_meta, "");
+ auto file_names = rowset.get_index_file_names();
+ ASSERT_EQ(file_names[0], "540085_0.idx");
+ ASSERT_EQ(file_names[1], "540085_1.idx");
+ }
+}
+
} // namespace doris
diff --git
a/regression-test/framework/src/main/groovy/org/apache/doris/regression/suite/Suite.groovy
b/regression-test/framework/src/main/groovy/org/apache/doris/regression/suite/Suite.groovy
index 2062f9ad7ef..32f75d2e760 100644
---
a/regression-test/framework/src/main/groovy/org/apache/doris/regression/suite/Suite.groovy
+++
b/regression-test/framework/src/main/groovy/org/apache/doris/regression/suite/Suite.groovy
@@ -3008,7 +3008,7 @@ class Suite implements GroovyInterceptable {
}
}
- def getRowsetFileCacheDirFromBe = { beHttpPort, msHttpPort, tabletId,
version ->
+ def getRowsetFileCacheDirFromBe = { beHttpPort, msHttpPort, tabletId,
version, fileSuffix = "dat" ->
def hashValues = []
def segmentFiles = []
getSegmentFilesFromMs(msHttpPort, tabletId, version) {
@@ -3018,7 +3018,7 @@ class Suite implements GroovyInterceptable {
//
{"rowset_id":"0","partition_id":"27695","tablet_id":"27700","txn_id":"7057526525952","tablet_schema_hash":0,"rowset_type":"BETA_ROWSET","rowset_state":"COMMITTED","start_version":"3","end_version":"3","version_hash":"0","num_rows":"1","total_disk_size":"895","data_disk_size":"895","index_disk_size":"0","empty":false,"load_id":{"hi":"-1646598626735601581","lo":"-6677682539881484579"},"delete_flag":false,"creation_time":"1736153402","num_segments":"1","rowset_id_v2":"020
[...]
def segmentNum = json.num_segments as int
def rowsetId = json.rowset_id_v2 as String
- segmentFiles = (0..<segmentNum).collect { i ->
"${rowsetId}_${i}.dat" }
+ segmentFiles = (0..<segmentNum).collect { i ->
"${rowsetId}_${i}.${fileSuffix}" }
}
segmentFiles.each {
@@ -3032,7 +3032,7 @@ class Suite implements GroovyInterceptable {
}
// get table's tablet file cache
- def getTabletFileCacheDirFromBe = { msHttpPort, table, version ->
+ def getTabletFileCacheDirFromBe = { msHttpPort, table, version, fileSuffix
= "dat" ->
// beHost HashFile
def beHostToHashFile = [:]
@@ -3040,7 +3040,7 @@ class Suite implements GroovyInterceptable {
getTabletsAndHostFromFe.each {
def beHost = it.Value[1]
def tabletId = it.Key
- def hashRet = getRowsetFileCacheDirFromBe(beHost + ":8040",
msHttpPort, tabletId, version)
+ def hashRet = getRowsetFileCacheDirFromBe(beHost + ":8040",
msHttpPort, tabletId, version, fileSuffix)
hashRet.each {
def hashFile = it
if (beHostToHashFile.containsKey(beHost)) {
diff --git
a/regression-test/suites/cloud_p0/tablets/test_clean_stale_rs_file_cache.groovy
b/regression-test/suites/cloud_p0/tablets/test_clean_stale_rs_file_cache.groovy
index 8d41939981a..807e51ae95f 100644
---
a/regression-test/suites/cloud_p0/tablets/test_clean_stale_rs_file_cache.groovy
+++
b/regression-test/suites/cloud_p0/tablets/test_clean_stale_rs_file_cache.groovy
@@ -66,7 +66,7 @@ suite('test_clean_stale_rs_file_cache', 'docker') {
sql """
insert into $table values (10, 1, 'v1'), (20, 2, 'v2'), (30, 3,
'v3')
"""
- def cacheDirVersion3 = getTabletFileCacheDirFromBe(msHttpPort, table,
2)
+ def cacheDirVersion3 = getTabletFileCacheDirFromBe(msHttpPort, table,
3)
// version 4
sql """
insert into $table values (100, 1, 'v1'), (200, 2, 'v2'), (300, 3,
'v3')
diff --git
a/regression-test/suites/cloud_p0/tablets/test_clean_stale_rs_file_cache.groovy
b/regression-test/suites/cloud_p0/tablets/test_clean_stale_rs_index_file_cache.groovy
similarity index 92%
copy from
regression-test/suites/cloud_p0/tablets/test_clean_stale_rs_file_cache.groovy
copy to
regression-test/suites/cloud_p0/tablets/test_clean_stale_rs_index_file_cache.groovy
index 8d41939981a..9077364d577 100644
---
a/regression-test/suites/cloud_p0/tablets/test_clean_stale_rs_file_cache.groovy
+++
b/regression-test/suites/cloud_p0/tablets/test_clean_stale_rs_index_file_cache.groovy
@@ -18,7 +18,7 @@
import org.apache.doris.regression.suite.ClusterOptions
import org.apache.doris.regression.util.Http
-suite('test_clean_stale_rs_file_cache', 'docker') {
+suite('test_clean_stale_rs_index_file_cache', 'docker') {
if (!isCloudMode()) {
return;
}
@@ -40,7 +40,9 @@ suite('test_clean_stale_rs_file_cache', 'docker') {
options.setBeNum(1)
options.cloudMode = true
- def table = "test_clean_stale_rs_file_cache"
+
+ def table = "test_clean_stale_rs_index_file_cache"
+ sql """ drop table if exists $table; """
docker(options) {
def ms = cluster.getAllMetaservices().get(0)
@@ -48,7 +50,8 @@ suite('test_clean_stale_rs_file_cache', 'docker') {
sql """CREATE TABLE $table (
`k1` int(11) NULL,
`k2` int(11) NULL,
- `v1` varchar(2048)
+ `v1` varchar(2048),
+ INDEX v1_idx (`v1`) USING INVERTED PROPERTIES("parser" =
"english", "support_phrase" = "true") COMMENT ''
)
DUPLICATE KEY(`k1`, `k2`)
COMMENT 'OLAP'
@@ -61,12 +64,12 @@ suite('test_clean_stale_rs_file_cache', 'docker') {
sql """
insert into $table values (1, 1, 'v1'), (2, 2, 'v2'), (3, 3, 'v3')
"""
- def cacheDirVersion2 = getTabletFileCacheDirFromBe(msHttpPort, table,
2)
+ def cacheDirVersion2 = getTabletFileCacheDirFromBe(msHttpPort, table,
2, "idx")
// version 3
sql """
insert into $table values (10, 1, 'v1'), (20, 2, 'v2'), (30, 3,
'v3')
"""
- def cacheDirVersion3 = getTabletFileCacheDirFromBe(msHttpPort, table,
2)
+ def cacheDirVersion3 = getTabletFileCacheDirFromBe(msHttpPort, table,
3, "idx")
// version 4
sql """
insert into $table values (100, 1, 'v1'), (200, 2, 'v2'), (300, 3,
'v3')
@@ -124,6 +127,5 @@ suite('test_clean_stale_rs_file_cache', 'docker') {
"Matching subdir found in: ${subDirs}")
}
}
-
}
}
---------------------------------------------------------------------
To unsubscribe, e-mail: [email protected]
For additional commands, e-mail: [email protected]