This is an automated email from the ASF dual-hosted git repository.

hope pushed a commit to branch release-1.4
in repository https://gitbox.apache.org/repos/asf/paimon.git

commit 89ab4f47627eda5caf46071a310c6e4a4ac9f188
Author: Jingsong Lee <[email protected]>
AuthorDate: Thu Mar 26 17:41:09 2026 +0800

    [core] File paths in SimpleLsmKvDb should contain UUID (#7536)
    
    During construction, the ClusteringCompactManager obtains a temporary
    directory via ioManager.pickTempDir() and creates a SimpleLsmKvDb within
    that directory. When bucket > 1, multiple ClusteringCompactManager
    instances across different buckets may end up using the same temporary
    directory. The original SimpleLsmKvDb.newSstFile() method used
    sequentially numbered names like sst-000000.db, causing SST files from
    different instances to overwrite each other. This leads to corrupted key
    indexes and failed deduplication of primary keys (e.g., duplicate values
    30 and 33 for key=3).
---
 .../paimon/lookup/sort/db/SimpleLsmKvDb.java       |  5 ++-
 .../paimon/lookup/sort/db/SimpleLsmKvDbTest.java   | 51 ++++++++++++++++++++++
 .../paimon/crosspartition/GlobalIndexAssigner.java |  2 +-
 .../java/org/apache/paimon/disk/IOManager.java     |  2 +-
 .../java/org/apache/paimon/disk/IOManagerImpl.java |  2 +-
 .../clustering/ClusteringCompactManager.java       |  2 +-
 .../paimon/separated/ClusteringTableTest.java      |  2 +-
 7 files changed, 60 insertions(+), 6 deletions(-)

diff --git 
a/paimon-common/src/main/java/org/apache/paimon/lookup/sort/db/SimpleLsmKvDb.java
 
b/paimon-common/src/main/java/org/apache/paimon/lookup/sort/db/SimpleLsmKvDb.java
index 8871fcab48..c5b8c53fd9 100644
--- 
a/paimon-common/src/main/java/org/apache/paimon/lookup/sort/db/SimpleLsmKvDb.java
+++ 
b/paimon-common/src/main/java/org/apache/paimon/lookup/sort/db/SimpleLsmKvDb.java
@@ -42,6 +42,7 @@ import java.util.Iterator;
 import java.util.List;
 import java.util.Map;
 import java.util.TreeMap;
+import java.util.UUID;
 
 /**
  * A simple LSM-Tree based KV database built on top of {@link 
SortLookupStoreFactory}.
@@ -92,6 +93,7 @@ public class SimpleLsmKvDb implements Closeable {
     static final long PER_ENTRY_OVERHEAD = 160;
 
     private final File dataDirectory;
+    private final String uuid;
     private final SortLookupStoreFactory storeFactory;
     private final Comparator<MemorySlice> keyComparator;
     private final long memTableFlushThreshold;
@@ -126,6 +128,7 @@ public class SimpleLsmKvDb implements Closeable {
             int level0FileNumCompactTrigger,
             int sizeRatio) {
         this.dataDirectory = dataDirectory;
+        this.uuid = UUID.randomUUID().toString();
         this.storeFactory = storeFactory;
         this.keyComparator = keyComparator;
         this.memTableFlushThreshold = memTableFlushThreshold;
@@ -540,7 +543,7 @@ public class SimpleLsmKvDb implements Closeable {
 
     private File newSstFile() {
         long sequence = fileSequence++;
-        return new File(dataDirectory, String.format("sst-%06d.db", sequence));
+        return new File(dataDirectory, String.format("sst-%s-%06d.db", uuid, 
sequence));
     }
 
     private void ensureOpen() {
diff --git 
a/paimon-common/src/test/java/org/apache/paimon/lookup/sort/db/SimpleLsmKvDbTest.java
 
b/paimon-common/src/test/java/org/apache/paimon/lookup/sort/db/SimpleLsmKvDbTest.java
index 44d7d46ff5..92a5488591 100644
--- 
a/paimon-common/src/test/java/org/apache/paimon/lookup/sort/db/SimpleLsmKvDbTest.java
+++ 
b/paimon-common/src/test/java/org/apache/paimon/lookup/sort/db/SimpleLsmKvDbTest.java
@@ -1430,6 +1430,57 @@ public class SimpleLsmKvDbTest {
         }
     }
 
+    @Test
+    public void testTwoInstancesSameDirectoryNoFileCollision() throws 
IOException {
+        // Two SimpleLsmKvDb instances sharing the same dataDirectory should 
not interfere
+        // with each other because SST file names contain a unique UUID.
+        File sharedDir = new File(tempDir.toFile(), "shared-dir-db");
+
+        try (SimpleLsmKvDb db1 =
+                        SimpleLsmKvDb.builder(sharedDir)
+                                .memTableFlushThreshold(1024)
+                                .blockSize(256)
+                                .level0FileNumCompactTrigger(4)
+                                .compressOptions(new CompressOptions("none", 
1))
+                                .build();
+                SimpleLsmKvDb db2 =
+                        SimpleLsmKvDb.builder(sharedDir)
+                                .memTableFlushThreshold(1024)
+                                .blockSize(256)
+                                .level0FileNumCompactTrigger(4)
+                                .compressOptions(new CompressOptions("none", 
1))
+                                .build()) {
+
+            // Write different data to each instance
+            putString(db1, "key-a", "from-db1");
+            putString(db1, "key-b", "from-db1");
+            db1.flush();
+
+            putString(db2, "key-a", "from-db2");
+            putString(db2, "key-c", "from-db2");
+            db2.flush();
+
+            // Each instance should see only its own data
+            Assertions.assertEquals("from-db1", getString(db1, "key-a"));
+            Assertions.assertEquals("from-db1", getString(db1, "key-b"));
+            Assertions.assertNull(getString(db1, "key-c"));
+
+            Assertions.assertEquals("from-db2", getString(db2, "key-a"));
+            Assertions.assertNull(getString(db2, "key-b"));
+            Assertions.assertEquals("from-db2", getString(db2, "key-c"));
+
+            // Write more data and flush again to ensure no cross-contamination
+            putString(db1, "key-a", "updated-db1");
+            db1.flush();
+
+            putString(db2, "key-a", "updated-db2");
+            db2.flush();
+
+            Assertions.assertEquals("updated-db1", getString(db1, "key-a"));
+            Assertions.assertEquals("updated-db2", getString(db2, "key-a"));
+        }
+    }
+
     @Test
     public void testBulkLoadFailsOnNonEmptyDb() throws IOException {
         try (SimpleLsmKvDb db = createDb()) {
diff --git 
a/paimon-core/src/main/java/org/apache/paimon/crosspartition/GlobalIndexAssigner.java
 
b/paimon-core/src/main/java/org/apache/paimon/crosspartition/GlobalIndexAssigner.java
index c387f9abdf..9097c5c482 100644
--- 
a/paimon-core/src/main/java/org/apache/paimon/crosspartition/GlobalIndexAssigner.java
+++ 
b/paimon-core/src/main/java/org/apache/paimon/crosspartition/GlobalIndexAssigner.java
@@ -132,7 +132,7 @@ public class GlobalIndexAssigner implements Serializable, 
Closeable {
         this.extractor = new 
RowPartitionAllPrimaryKeyExtractor(table.schema());
         this.keyPartExtractor = new 
KeyPartPartitionKeyExtractor(table.schema());
 
-        String tmpDir = ioManager.pickRandomTempDir();
+        String tmpDir = ioManager.pickTempDir();
         this.path = new File(tmpDir, "rocksdb-" + UUID.randomUUID());
         if (!this.path.mkdirs()) {
             throw new RuntimeException(
diff --git a/paimon-core/src/main/java/org/apache/paimon/disk/IOManager.java 
b/paimon-core/src/main/java/org/apache/paimon/disk/IOManager.java
index 4a35167e1e..352f8c4a73 100644
--- a/paimon-core/src/main/java/org/apache/paimon/disk/IOManager.java
+++ b/paimon-core/src/main/java/org/apache/paimon/disk/IOManager.java
@@ -38,7 +38,7 @@ public interface IOManager extends AutoCloseable {
 
     String[] tempDirs();
 
-    String pickRandomTempDir();
+    String pickTempDir();
 
     Enumerator createChannelEnumerator();
 
diff --git 
a/paimon-core/src/main/java/org/apache/paimon/disk/IOManagerImpl.java 
b/paimon-core/src/main/java/org/apache/paimon/disk/IOManagerImpl.java
index 57926d0154..c0028c6bc6 100644
--- a/paimon-core/src/main/java/org/apache/paimon/disk/IOManagerImpl.java
+++ b/paimon-core/src/main/java/org/apache/paimon/disk/IOManagerImpl.java
@@ -95,7 +95,7 @@ public class IOManagerImpl implements IOManager {
     }
 
     @Override
-    public String pickRandomTempDir() {
+    public String pickTempDir() {
         return pickRandomly(Arrays.asList(tempDirs()));
     }
 
diff --git 
a/paimon-core/src/main/java/org/apache/paimon/mergetree/compact/clustering/ClusteringCompactManager.java
 
b/paimon-core/src/main/java/org/apache/paimon/mergetree/compact/clustering/ClusteringCompactManager.java
index 9e925d0764..12acea3be6 100644
--- 
a/paimon-core/src/main/java/org/apache/paimon/mergetree/compact/clustering/ClusteringCompactManager.java
+++ 
b/paimon-core/src/main/java/org/apache/paimon/mergetree/compact/clustering/ClusteringCompactManager.java
@@ -119,7 +119,7 @@ public class ClusteringCompactManager extends 
CompactFutureManager {
                         valueType.getFieldTypes(), clusteringColumnIndexes, 
true);
 
         SimpleLsmKvDb kvDb =
-                SimpleLsmKvDb.builder(new File(ioManager.pickRandomTempDir()))
+                SimpleLsmKvDb.builder(new File(ioManager.pickTempDir()))
                         .cacheManager(cacheManager)
                         .keyComparator(new 
RowCompactedSerializer(keyType).createSliceComparator())
                         .build();
diff --git 
a/paimon-core/src/test/java/org/apache/paimon/separated/ClusteringTableTest.java
 
b/paimon-core/src/test/java/org/apache/paimon/separated/ClusteringTableTest.java
index aaf661be2f..353dd6d704 100644
--- 
a/paimon-core/src/test/java/org/apache/paimon/separated/ClusteringTableTest.java
+++ 
b/paimon-core/src/test/java/org/apache/paimon/separated/ClusteringTableTest.java
@@ -74,7 +74,7 @@ class ClusteringTableTest {
                         .column("b", DataTypes.INT())
                         .primaryKey("a")
                         .option(DELETION_VECTORS_ENABLED.key(), "true")
-                        .option(BUCKET.key(), "1")
+                        .option(BUCKET.key(), "2")
                         .option(CLUSTERING_COLUMNS.key(), "b")
                         .option(PK_CLUSTERING_OVERRIDE.key(), "true")
                         .build();

Reply via email to