the-other-tim-brown commented on code in PR #10344:
URL: https://github.com/apache/hudi/pull/10344#discussion_r1435173523


##########
hudi-common/src/main/java/org/apache/hudi/common/util/collection/ExternalSpillableMap.java:
##########
@@ -78,41 +78,49 @@ public class ExternalSpillableMap<T extends Serializable, R 
extends Serializable
   // Enables compression of values stored in disc
   private final boolean isCompressionEnabled;
   // current space occupied by this map in-memory
-  private Long currentInMemoryMapSize;
+  private long currentInMemoryMapSize;
   // An estimate of the size of each payload written to this map
   private volatile long estimatedPayloadSize = 0;
   // Base File Path
   private final String baseFilePath;
 
-  public ExternalSpillableMap(Long maxInMemorySizeInBytes, String 
baseFilePath, SizeEstimator<T> keySizeEstimator,
+  public ExternalSpillableMap(long maxInMemorySizeInBytes, String 
baseFilePath, SizeEstimator<T> keySizeEstimator,
                               SizeEstimator<R> valueSizeEstimator) throws 
IOException {
     this(maxInMemorySizeInBytes, baseFilePath, keySizeEstimator, 
valueSizeEstimator, DiskMapType.BITCASK);
   }
 
-  public ExternalSpillableMap(Long maxInMemorySizeInBytes, String 
baseFilePath, SizeEstimator<T> keySizeEstimator,
+  public ExternalSpillableMap(long maxInMemorySizeInBytes, String 
baseFilePath, SizeEstimator<T> keySizeEstimator,
                               SizeEstimator<R> valueSizeEstimator, DiskMapType 
diskMapType) throws IOException {
     this(maxInMemorySizeInBytes, baseFilePath, keySizeEstimator, 
valueSizeEstimator, diskMapType, false);
   }
 
-  public ExternalSpillableMap(Long maxInMemorySizeInBytes, String 
baseFilePath, SizeEstimator<T> keySizeEstimator,
+  public ExternalSpillableMap(long maxInMemorySizeInBytes, String 
baseFilePath, SizeEstimator<T> keySizeEstimator,
                               SizeEstimator<R> valueSizeEstimator, DiskMapType 
diskMapType, boolean isCompressionEnabled) throws IOException {
     this.inMemoryMap = new HashMap<>();
     this.baseFilePath = baseFilePath;
-    this.maxInMemorySizeInBytes = (long) Math.floor(maxInMemorySizeInBytes * 
sizingFactorForInMemoryMap);
+    this.maxInMemorySizeInBytes = (long) Math.floor(maxInMemorySizeInBytes * 
SIZING_FACTOR_FOR_IN_MEMORY_MAP);
     this.currentInMemoryMapSize = 0L;
     this.keySizeEstimator = keySizeEstimator;
     this.valueSizeEstimator = valueSizeEstimator;
     this.diskMapType = diskMapType;
     this.isCompressionEnabled = isCompressionEnabled;
   }
 
+  private DiskMap<T, R> getDiskBasedMap() {
+    return getDiskBasedMap(false);
+  }
+
+  private DiskMap<T, R> getOrCreateDiskBasedMap() {
+    return getDiskBasedMap(true);
+  }
+
   private DiskMap<T, R> getDiskBasedMap(boolean forceInitialization) {
     if (null == diskBasedMap) {
-      if (!forceInitialization) {
-        return DiskMap.empty();
-      }
       synchronized (this) {
         if (null == diskBasedMap) {
+          if (!forceInitialization) {
+            return DiskMap.empty();

Review Comment:
   > My biggest concern is for write method (like `#put`) and read method (like 
`#contains`) we need to take care the disk map type to ensure the correctness.
   > 
   Yes, this is what unit tests are for. Once we agree on a path, I will make 
sure they are in place and handle the various code paths in the `put` and 
`contains` logic.
   
   > Another choice is we make the instantiation of `RocksDbDiskMap` and 
`BitCaskDiskMap` as lazy and light-weight, so that the wrapper 
`ExternalSpillableMap` can simplify a lot.
   Wouldn't this just duplicate the logic?  If someone adds a third 
implementation, then would it be triple work? How do you propose doing this in 
a way that has less complexity? This complexity bit seems to be the major 
blocker so I am looking for some clear ways on how to actually reduce it. Links 
to where code should be added would be helpful.
   
   



##########
hudi-common/src/main/java/org/apache/hudi/common/util/collection/ExternalSpillableMap.java:
##########
@@ -78,41 +78,49 @@ public class ExternalSpillableMap<T extends Serializable, R 
extends Serializable
   // Enables compression of values stored in disc
   private final boolean isCompressionEnabled;
   // current space occupied by this map in-memory
-  private Long currentInMemoryMapSize;
+  private long currentInMemoryMapSize;
   // An estimate of the size of each payload written to this map
   private volatile long estimatedPayloadSize = 0;
   // Base File Path
   private final String baseFilePath;
 
-  public ExternalSpillableMap(Long maxInMemorySizeInBytes, String 
baseFilePath, SizeEstimator<T> keySizeEstimator,
+  public ExternalSpillableMap(long maxInMemorySizeInBytes, String 
baseFilePath, SizeEstimator<T> keySizeEstimator,
                               SizeEstimator<R> valueSizeEstimator) throws 
IOException {
     this(maxInMemorySizeInBytes, baseFilePath, keySizeEstimator, 
valueSizeEstimator, DiskMapType.BITCASK);
   }
 
-  public ExternalSpillableMap(Long maxInMemorySizeInBytes, String 
baseFilePath, SizeEstimator<T> keySizeEstimator,
+  public ExternalSpillableMap(long maxInMemorySizeInBytes, String 
baseFilePath, SizeEstimator<T> keySizeEstimator,
                               SizeEstimator<R> valueSizeEstimator, DiskMapType 
diskMapType) throws IOException {
     this(maxInMemorySizeInBytes, baseFilePath, keySizeEstimator, 
valueSizeEstimator, diskMapType, false);
   }
 
-  public ExternalSpillableMap(Long maxInMemorySizeInBytes, String 
baseFilePath, SizeEstimator<T> keySizeEstimator,
+  public ExternalSpillableMap(long maxInMemorySizeInBytes, String 
baseFilePath, SizeEstimator<T> keySizeEstimator,
                               SizeEstimator<R> valueSizeEstimator, DiskMapType 
diskMapType, boolean isCompressionEnabled) throws IOException {
     this.inMemoryMap = new HashMap<>();
     this.baseFilePath = baseFilePath;
-    this.maxInMemorySizeInBytes = (long) Math.floor(maxInMemorySizeInBytes * 
sizingFactorForInMemoryMap);
+    this.maxInMemorySizeInBytes = (long) Math.floor(maxInMemorySizeInBytes * 
SIZING_FACTOR_FOR_IN_MEMORY_MAP);
     this.currentInMemoryMapSize = 0L;
     this.keySizeEstimator = keySizeEstimator;
     this.valueSizeEstimator = valueSizeEstimator;
     this.diskMapType = diskMapType;
     this.isCompressionEnabled = isCompressionEnabled;
   }
 
+  private DiskMap<T, R> getDiskBasedMap() {
+    return getDiskBasedMap(false);
+  }
+
+  private DiskMap<T, R> getOrCreateDiskBasedMap() {
+    return getDiskBasedMap(true);
+  }
+
   private DiskMap<T, R> getDiskBasedMap(boolean forceInitialization) {
     if (null == diskBasedMap) {
-      if (!forceInitialization) {
-        return DiskMap.empty();
-      }
       synchronized (this) {
         if (null == diskBasedMap) {
+          if (!forceInitialization) {
+            return DiskMap.empty();

Review Comment:
   > My biggest concern is for write method (like `#put`) and read method (like 
`#contains`) we need to take care the disk map type to ensure the correctness.
   > 
   Yes, this is what unit tests are for. Once we agree on a path, I will make 
sure they are in place and handle the various code paths in the `put` and 
`contains` logic.
   
   > Another choice is we make the instantiation of `RocksDbDiskMap` and 
`BitCaskDiskMap` as lazy and light-weight, so that the wrapper 
`ExternalSpillableMap` can simplify a lot.
   
   Wouldn't this just duplicate the logic?  If someone adds a third 
implementation, then would it be triple work? How do you propose doing this in 
a way that has less complexity? This complexity bit seems to be the major 
blocker so I am looking for some clear ways on how to actually reduce it. Links 
to where code should be added would be helpful.
   
   



-- 
This is an automated message from the Apache Git Service.
To respond to the message, please log on to GitHub and use the
URL above to go to the specific comment.

To unsubscribe, e-mail: [email protected]

For queries about this service, please contact Infrastructure at:
[email protected]

Reply via email to