This is an automated email from the ASF dual-hosted git repository.
lzljs3620320 pushed a commit to branch master
in repository https://gitbox.apache.org/repos/asf/paimon.git
The following commit(s) were added to refs/heads/master by this push:
new 8c467ff58f [core] File paths in SimpleLsmKvDb should contain UUID
(#7536)
8c467ff58f is described below
commit 8c467ff58fae248a38164ce0bfb40cb8c75eca1c
Author: Jingsong Lee <[email protected]>
AuthorDate: Thu Mar 26 17:41:09 2026 +0800
[core] File paths in SimpleLsmKvDb should contain UUID (#7536)
During construction, the ClusteringCompactManager obtains a temporary
directory via ioManager.pickTempDir() and creates a SimpleLsmKvDb within
that directory. When bucket > 1, multiple ClusteringCompactManager
instances across different buckets may end up using the same temporary
directory. The original SimpleLsmKvDb.newSstFile() method used
sequentially numbered names like sst-000000.db, causing SST files from
different instances to overwrite each other. This leads to corrupted key
indexes and failed deduplication of primary keys (e.g., duplicate values
30 and 33 for key=3).
---
.../paimon/lookup/sort/db/SimpleLsmKvDb.java | 5 ++-
.../paimon/lookup/sort/db/SimpleLsmKvDbTest.java | 51 ++++++++++++++++++++++
.../paimon/crosspartition/GlobalIndexAssigner.java | 2 +-
.../java/org/apache/paimon/disk/IOManager.java | 2 +-
.../java/org/apache/paimon/disk/IOManagerImpl.java | 2 +-
.../clustering/ClusteringCompactManager.java | 2 +-
.../paimon/separated/ClusteringTableTest.java | 2 +-
7 files changed, 60 insertions(+), 6 deletions(-)
diff --git
a/paimon-common/src/main/java/org/apache/paimon/lookup/sort/db/SimpleLsmKvDb.java
b/paimon-common/src/main/java/org/apache/paimon/lookup/sort/db/SimpleLsmKvDb.java
index 8871fcab48..c5b8c53fd9 100644
---
a/paimon-common/src/main/java/org/apache/paimon/lookup/sort/db/SimpleLsmKvDb.java
+++
b/paimon-common/src/main/java/org/apache/paimon/lookup/sort/db/SimpleLsmKvDb.java
@@ -42,6 +42,7 @@ import java.util.Iterator;
import java.util.List;
import java.util.Map;
import java.util.TreeMap;
+import java.util.UUID;
/**
* A simple LSM-Tree based KV database built on top of {@link
SortLookupStoreFactory}.
@@ -92,6 +93,7 @@ public class SimpleLsmKvDb implements Closeable {
static final long PER_ENTRY_OVERHEAD = 160;
private final File dataDirectory;
+ private final String uuid;
private final SortLookupStoreFactory storeFactory;
private final Comparator<MemorySlice> keyComparator;
private final long memTableFlushThreshold;
@@ -126,6 +128,7 @@ public class SimpleLsmKvDb implements Closeable {
int level0FileNumCompactTrigger,
int sizeRatio) {
this.dataDirectory = dataDirectory;
+ this.uuid = UUID.randomUUID().toString();
this.storeFactory = storeFactory;
this.keyComparator = keyComparator;
this.memTableFlushThreshold = memTableFlushThreshold;
@@ -540,7 +543,7 @@ public class SimpleLsmKvDb implements Closeable {
private File newSstFile() {
long sequence = fileSequence++;
- return new File(dataDirectory, String.format("sst-%06d.db", sequence));
+ return new File(dataDirectory, String.format("sst-%s-%06d.db", uuid,
sequence));
}
private void ensureOpen() {
diff --git
a/paimon-common/src/test/java/org/apache/paimon/lookup/sort/db/SimpleLsmKvDbTest.java
b/paimon-common/src/test/java/org/apache/paimon/lookup/sort/db/SimpleLsmKvDbTest.java
index 44d7d46ff5..92a5488591 100644
---
a/paimon-common/src/test/java/org/apache/paimon/lookup/sort/db/SimpleLsmKvDbTest.java
+++
b/paimon-common/src/test/java/org/apache/paimon/lookup/sort/db/SimpleLsmKvDbTest.java
@@ -1430,6 +1430,57 @@ public class SimpleLsmKvDbTest {
}
}
+ @Test
+ public void testTwoInstancesSameDirectoryNoFileCollision() throws
IOException {
+ // Two SimpleLsmKvDb instances sharing the same dataDirectory should
not interfere
+ // with each other because SST file names contain a unique UUID.
+ File sharedDir = new File(tempDir.toFile(), "shared-dir-db");
+
+ try (SimpleLsmKvDb db1 =
+ SimpleLsmKvDb.builder(sharedDir)
+ .memTableFlushThreshold(1024)
+ .blockSize(256)
+ .level0FileNumCompactTrigger(4)
+ .compressOptions(new CompressOptions("none",
1))
+ .build();
+ SimpleLsmKvDb db2 =
+ SimpleLsmKvDb.builder(sharedDir)
+ .memTableFlushThreshold(1024)
+ .blockSize(256)
+ .level0FileNumCompactTrigger(4)
+ .compressOptions(new CompressOptions("none",
1))
+ .build()) {
+
+ // Write different data to each instance
+ putString(db1, "key-a", "from-db1");
+ putString(db1, "key-b", "from-db1");
+ db1.flush();
+
+ putString(db2, "key-a", "from-db2");
+ putString(db2, "key-c", "from-db2");
+ db2.flush();
+
+ // Each instance should see only its own data
+ Assertions.assertEquals("from-db1", getString(db1, "key-a"));
+ Assertions.assertEquals("from-db1", getString(db1, "key-b"));
+ Assertions.assertNull(getString(db1, "key-c"));
+
+ Assertions.assertEquals("from-db2", getString(db2, "key-a"));
+ Assertions.assertNull(getString(db2, "key-b"));
+ Assertions.assertEquals("from-db2", getString(db2, "key-c"));
+
+ // Write more data and flush again to ensure no cross-contamination
+ putString(db1, "key-a", "updated-db1");
+ db1.flush();
+
+ putString(db2, "key-a", "updated-db2");
+ db2.flush();
+
+ Assertions.assertEquals("updated-db1", getString(db1, "key-a"));
+ Assertions.assertEquals("updated-db2", getString(db2, "key-a"));
+ }
+ }
+
@Test
public void testBulkLoadFailsOnNonEmptyDb() throws IOException {
try (SimpleLsmKvDb db = createDb()) {
diff --git
a/paimon-core/src/main/java/org/apache/paimon/crosspartition/GlobalIndexAssigner.java
b/paimon-core/src/main/java/org/apache/paimon/crosspartition/GlobalIndexAssigner.java
index c387f9abdf..9097c5c482 100644
---
a/paimon-core/src/main/java/org/apache/paimon/crosspartition/GlobalIndexAssigner.java
+++
b/paimon-core/src/main/java/org/apache/paimon/crosspartition/GlobalIndexAssigner.java
@@ -132,7 +132,7 @@ public class GlobalIndexAssigner implements Serializable,
Closeable {
this.extractor = new
RowPartitionAllPrimaryKeyExtractor(table.schema());
this.keyPartExtractor = new
KeyPartPartitionKeyExtractor(table.schema());
- String tmpDir = ioManager.pickRandomTempDir();
+ String tmpDir = ioManager.pickTempDir();
this.path = new File(tmpDir, "rocksdb-" + UUID.randomUUID());
if (!this.path.mkdirs()) {
throw new RuntimeException(
diff --git a/paimon-core/src/main/java/org/apache/paimon/disk/IOManager.java
b/paimon-core/src/main/java/org/apache/paimon/disk/IOManager.java
index 4a35167e1e..352f8c4a73 100644
--- a/paimon-core/src/main/java/org/apache/paimon/disk/IOManager.java
+++ b/paimon-core/src/main/java/org/apache/paimon/disk/IOManager.java
@@ -38,7 +38,7 @@ public interface IOManager extends AutoCloseable {
String[] tempDirs();
- String pickRandomTempDir();
+ String pickTempDir();
Enumerator createChannelEnumerator();
diff --git
a/paimon-core/src/main/java/org/apache/paimon/disk/IOManagerImpl.java
b/paimon-core/src/main/java/org/apache/paimon/disk/IOManagerImpl.java
index 57926d0154..c0028c6bc6 100644
--- a/paimon-core/src/main/java/org/apache/paimon/disk/IOManagerImpl.java
+++ b/paimon-core/src/main/java/org/apache/paimon/disk/IOManagerImpl.java
@@ -95,7 +95,7 @@ public class IOManagerImpl implements IOManager {
}
@Override
- public String pickRandomTempDir() {
+ public String pickTempDir() {
return pickRandomly(Arrays.asList(tempDirs()));
}
diff --git
a/paimon-core/src/main/java/org/apache/paimon/mergetree/compact/clustering/ClusteringCompactManager.java
b/paimon-core/src/main/java/org/apache/paimon/mergetree/compact/clustering/ClusteringCompactManager.java
index 9e925d0764..12acea3be6 100644
---
a/paimon-core/src/main/java/org/apache/paimon/mergetree/compact/clustering/ClusteringCompactManager.java
+++
b/paimon-core/src/main/java/org/apache/paimon/mergetree/compact/clustering/ClusteringCompactManager.java
@@ -119,7 +119,7 @@ public class ClusteringCompactManager extends
CompactFutureManager {
valueType.getFieldTypes(), clusteringColumnIndexes,
true);
SimpleLsmKvDb kvDb =
- SimpleLsmKvDb.builder(new File(ioManager.pickRandomTempDir()))
+ SimpleLsmKvDb.builder(new File(ioManager.pickTempDir()))
.cacheManager(cacheManager)
.keyComparator(new
RowCompactedSerializer(keyType).createSliceComparator())
.build();
diff --git
a/paimon-core/src/test/java/org/apache/paimon/separated/ClusteringTableTest.java
b/paimon-core/src/test/java/org/apache/paimon/separated/ClusteringTableTest.java
index aaf661be2f..353dd6d704 100644
---
a/paimon-core/src/test/java/org/apache/paimon/separated/ClusteringTableTest.java
+++
b/paimon-core/src/test/java/org/apache/paimon/separated/ClusteringTableTest.java
@@ -74,7 +74,7 @@ class ClusteringTableTest {
.column("b", DataTypes.INT())
.primaryKey("a")
.option(DELETION_VECTORS_ENABLED.key(), "true")
- .option(BUCKET.key(), "1")
+ .option(BUCKET.key(), "2")
.option(CLUSTERING_COLUMNS.key(), "b")
.option(PK_CLUSTERING_OVERRIDE.key(), "true")
.build();