nfsantos commented on code in PR #1595:
URL: https://github.com/apache/jackrabbit-oak/pull/1595#discussion_r1694874094


##########
oak-run-commons/src/main/java/org/apache/jackrabbit/oak/index/indexer/document/flatfile/FlatFileStoreIterator.java:
##########
@@ -19,34 +19,44 @@
 
 package org.apache.jackrabbit.oak.index.indexer.document.flatfile;
 
-import static org.apache.jackrabbit.guava.common.collect.Iterators.concat;
-import static 
org.apache.jackrabbit.guava.common.collect.Iterators.singletonIterator;
-
-import java.io.Closeable;
-import java.util.Iterator;
-import java.util.Set;
-
+import org.apache.jackrabbit.guava.common.collect.AbstractIterator;
+import org.apache.jackrabbit.oak.commons.IOUtils;
 import org.apache.jackrabbit.oak.index.indexer.document.NodeStateEntry;
 import 
org.apache.jackrabbit.oak.index.indexer.document.flatfile.linkedList.FlatFileBufferLinkedList;
 import 
org.apache.jackrabbit.oak.index.indexer.document.flatfile.linkedList.NodeStateEntryList;
 import 
org.apache.jackrabbit.oak.index.indexer.document.flatfile.linkedList.PersistedLinkedList;
+import 
org.apache.jackrabbit.oak.index.indexer.document.flatfile.linkedList.PersistedLinkedListV2;
 import 
org.apache.jackrabbit.oak.index.indexer.document.flatfile.pipelined.ConfigHelper;
 import org.apache.jackrabbit.oak.spi.blob.BlobStore;
 import org.apache.jackrabbit.oak.spi.state.NodeState;
 import org.slf4j.Logger;
 import org.slf4j.LoggerFactory;
 
-import org.apache.jackrabbit.guava.common.collect.AbstractIterator;
+import java.io.Closeable;
+import java.util.Iterator;
+import java.util.Set;
+
+import static org.apache.jackrabbit.guava.common.collect.Iterators.concat;
+import static 
org.apache.jackrabbit.guava.common.collect.Iterators.singletonIterator;
 
 class FlatFileStoreIterator extends AbstractIterator<NodeStateEntry> 
implements Iterator<NodeStateEntry>, Closeable {
-    private static final Logger log = 
LoggerFactory.getLogger(FlatFileStoreIterator.class);
+    private static final Logger LOG = 
LoggerFactory.getLogger(FlatFileStoreIterator.class);
 
     static final String BUFFER_MEM_LIMIT_CONFIG_NAME = 
"oak.indexer.memLimitInMB";
     // by default, use the PersistedLinkedList
     private static final int DEFAULT_BUFFER_MEM_LIMIT_IN_MB = 0;
-    static final String PERSISTED_LINKED_LIST_CACHE_SIZE = 
"oak.indexer.persistedLinkedList.cacheSize";
-    static final int DEFAULT_PERSISTED_LINKED_LIST_CACHE_SIZE = 1000;
 
+    public static final String PERSISTED_LINKED_LIST_CACHE_SIZE = 
"oak.indexer.persistedLinkedList.cacheSize";
+    public static final int DEFAULT_PERSISTED_LINKED_LIST_CACHE_SIZE = 1000;
+
+    public static final String PERSISTED_LINKED_LIST_V2_CACHE_SIZE = 
"oak.indexer.persistedLinkedListV2.cacheSize";
+    public static final int DEFAULT_PERSISTED_LINKED_LIST_V2_CACHE_SIZE = 
10000;
+
+    public static final String PERSISTED_LINKED_LIST_V2_MEMORY_CACHE_SIZE_MB = 
"oak.indexer.persistedLinkedListV2.cacheMaxSizeMB";
+    public static final int 
DEFAULT_PERSISTED_LINKED_LIST_V2_MEMORY_CACHE_SIZE_MB = 8;
+
+    public static final String PERSISTED_LINKED_LIST_USE_V2 = 
"oak.indexer.persistedLinkedList.useV2";

Review Comment:
   The memory estimation is conservative because it assumes that all strings 
will be encoded with 2 bytes per char. In practice, in most cases the majority 
of strings will be encoded with 1 byte per char, so the memory estimation will 
almost double than the memory effectively used. And if using string interning 
or string deduplication, the difference might be even bigger. But of course we 
have to do the calculation for the worst case scenario. And as you say, it does 
not need to be precise. The goal here is just to prevent OOME while ensuring 
that the cache is big enough to give us the majority of the performance 
benefits. 



-- 
This is an automated message from the Apache Git Service.
To respond to the message, please log on to GitHub and use the
URL above to go to the specific comment.

To unsubscribe, e-mail: [email protected]

For queries about this service, please contact Infrastructure at:
[email protected]

Reply via email to