This is an automated email from the ASF dual-hosted git repository. spmallette pushed a commit to branch TINKERPOP-3158 in repository https://gitbox.apache.org/repos/asf/tinkerpop.git
commit d78ef8416b5ef761a6f49d016aa0591216119b56 Author: Stephen Mallette <[email protected]> AuthorDate: Wed May 7 19:11:04 2025 -0400 tx working with vector on commit --- .../structure/TinkerTransactionGraph.java | 2 - .../structure/TinkerTransactionVectorIndex.java | 79 ++++++++++++++++++---- .../tinkergraph/structure/TinkerVectorIndex.java | 3 +- .../structure/TinkerGraphVectorIndexTest.java | 10 --- 4 files changed, 66 insertions(+), 28 deletions(-) diff --git a/tinkergraph-gremlin/src/main/java/org/apache/tinkerpop/gremlin/tinkergraph/structure/TinkerTransactionGraph.java b/tinkergraph-gremlin/src/main/java/org/apache/tinkerpop/gremlin/tinkergraph/structure/TinkerTransactionGraph.java index 6b53a43f96..39ad815099 100644 --- a/tinkergraph-gremlin/src/main/java/org/apache/tinkerpop/gremlin/tinkergraph/structure/TinkerTransactionGraph.java +++ b/tinkergraph-gremlin/src/main/java/org/apache/tinkerpop/gremlin/tinkergraph/structure/TinkerTransactionGraph.java @@ -35,10 +35,8 @@ import org.apache.tinkerpop.gremlin.tinkergraph.process.traversal.strategy.optim import org.apache.tinkerpop.gremlin.tinkergraph.services.TinkerServiceRegistry; import org.apache.tinkerpop.gremlin.util.iterator.IteratorUtils; -import java.util.ArrayList; import java.util.Arrays; import java.util.Collections; -import java.util.HashSet; import java.util.Iterator; import java.util.List; import java.util.Map; diff --git a/tinkergraph-gremlin/src/main/java/org/apache/tinkerpop/gremlin/tinkergraph/structure/TinkerTransactionVectorIndex.java b/tinkergraph-gremlin/src/main/java/org/apache/tinkerpop/gremlin/tinkergraph/structure/TinkerTransactionVectorIndex.java index 864a8242ba..8b39bad366 100644 --- a/tinkergraph-gremlin/src/main/java/org/apache/tinkerpop/gremlin/tinkergraph/structure/TinkerTransactionVectorIndex.java +++ b/tinkergraph-gremlin/src/main/java/org/apache/tinkerpop/gremlin/tinkergraph/structure/TinkerTransactionVectorIndex.java @@ -22,6 +22,7 @@ import com.github.jelmerk.hnswlib.core.Item; import com.github.jelmerk.hnswlib.core.SearchResult; import com.github.jelmerk.hnswlib.core.hnsw.HnswIndex; import com.github.jelmerk.hnswlib.core.Index; +import com.github.jelmerk.hnswlib.core.hnsw.SizeLimitExceededException; import org.apache.tinkerpop.gremlin.structure.Graph; import org.apache.tinkerpop.gremlin.structure.Property; import org.apache.tinkerpop.gremlin.structure.Vertex; @@ -46,6 +47,11 @@ final class TinkerTransactionVectorIndex<T extends TinkerElement> extends Abstra */ protected Map<String, Index<Object, float[], ElementItem, Float>> vectorIndices = new ConcurrentHashMap<>(); + /** + * Map of property key to growth rate + */ + private final Map<String, Double> growthRates = new ConcurrentHashMap<>(); + /** * Default number of nearest neighbors to return */ @@ -69,7 +75,12 @@ final class TinkerTransactionVectorIndex<T extends TinkerElement> extends Abstra /** * Default maximum number of items in the index */ - private static final int DEFAULT_MAX_ITEMS = 100; + private static final int DEFAULT_MAX_ITEMS = 10000; + + /** + * Default growth rate for the index when it reaches capacity (10%) + */ + private static final double DEFAULT_GROWTH_RATE = 0.1; /** * Configuration key for the dimension of the vector @@ -101,6 +112,11 @@ final class TinkerTransactionVectorIndex<T extends TinkerElement> extends Abstra */ public static final String CONFIG_DISTANCE_FUNCTION = "distanceFunction"; + /** + * Configuration key for the growth rate of the index when it reaches capacity + */ + public static final String CONFIG_GROWTH_RATE = "growthRate"; + /** * Creates a new vector index for the specified graph and element class. * @@ -183,6 +199,15 @@ final class TinkerTransactionVectorIndex<T extends TinkerElement> extends Abstra } } + double growthRate = DEFAULT_GROWTH_RATE; + if (configuration.containsKey(CONFIG_GROWTH_RATE)) { + final Object growthObj = configuration.get(CONFIG_GROWTH_RATE); + if (growthObj instanceof Number) { + growthRate = ((Number) growthObj).doubleValue(); + } + } + this.growthRates.put(key, growthRate); + // Create a new HNSW index for this property key final Index<Object, float[], ElementItem, Float> index = HnswIndex .newBuilder(dimension, vector.getDistanceFunction(), Float::compare, maxItems) @@ -303,7 +328,7 @@ final class TinkerTransactionVectorIndex<T extends TinkerElement> extends Abstra // If the element is not in the index, just ignore the exception } final ElementItem item = new ElementItem(element.id(), newValue, element); - index.add(item); + addWithResize(key, index, item); } /** @@ -366,26 +391,17 @@ final class TinkerTransactionVectorIndex<T extends TinkerElement> extends Abstra @Override public void remove(final String key, final Object value, final T element) { - // For vector indices, we use removeFromIndex - if (value instanceof float[]) { - removeFromIndex(key, element); - } + // only make changes to index tx close } @Override public void removeElement(final T element) { - if (this.indexClass.isAssignableFrom(element.getClass())) { - for (String key : this.indexedKeys) { - removeFromIndex(key, element); - } - } + // only make changes to index tx close } @Override public void autoUpdate(final String key, final Object newValue, final Object oldValue, final T element) { - if (this.indexedKeys.contains(key) && newValue instanceof float[]) { - updateIndex(key, (float[]) newValue, element); - } + // only make changes to index tx close } /** @@ -422,4 +438,39 @@ final class TinkerTransactionVectorIndex<T extends TinkerElement> extends Abstra public void rollback() { // No specific action needed for rollback in the current implementation } + + /** + * Helper method to add an item to the index with automatic resizing if needed. + * + * @param key the property key + * @param index the vector index + * @param item the item to add + */ + private void addWithResize(final String key, final Index<Object, float[], ElementItem, Float> index, + final ElementItem item) { + try { + index.add(item); + } catch (SizeLimitExceededException e) { + // Get the growth rate for this index + final Double growthRate = this.growthRates.getOrDefault(key, 0.0d); + + // If growth rate is 0 or not set, rethrow the exception + if (growthRate <= 0) { + throw e; + } + + // Calculate new size based on growth rate + final int currentSize = ((HnswIndex<Object, float[], ElementItem, Float>) index).getMaxItemCount(); + final int newSize = currentSize + (int) Math.ceil(currentSize * growthRate); + + // Resize the index + ((HnswIndex<Object, float[], ElementItem, Float>) index).resize(newSize); + + // Try adding the item again + index.add(item); + } catch (Exception e) { + // If it's not a size limit exception, rethrow it + throw e; + } + } } diff --git a/tinkergraph-gremlin/src/main/java/org/apache/tinkerpop/gremlin/tinkergraph/structure/TinkerVectorIndex.java b/tinkergraph-gremlin/src/main/java/org/apache/tinkerpop/gremlin/tinkergraph/structure/TinkerVectorIndex.java index becbaad03f..0a4c17e5b1 100644 --- a/tinkergraph-gremlin/src/main/java/org/apache/tinkerpop/gremlin/tinkergraph/structure/TinkerVectorIndex.java +++ b/tinkergraph-gremlin/src/main/java/org/apache/tinkerpop/gremlin/tinkergraph/structure/TinkerVectorIndex.java @@ -43,7 +43,7 @@ import java.util.stream.Collectors; final class TinkerVectorIndex<T extends Element> extends AbstractTinkerVectorIndex<T> { /** - * Map of property key to vector index + * Map of the property key to vector index */ private final Map<String, Index<Object, float[], ElementItem, Float>> vectorIndices = new ConcurrentHashMap<>(); @@ -412,7 +412,6 @@ final class TinkerVectorIndex<T extends Element> extends AbstractTinkerVectorInd } } - /** * A class that wraps an element with its vector for use in the HNSW index. */ diff --git a/tinkergraph-gremlin/src/test/java/org/apache/tinkerpop/gremlin/tinkergraph/structure/TinkerGraphVectorIndexTest.java b/tinkergraph-gremlin/src/test/java/org/apache/tinkerpop/gremlin/tinkergraph/structure/TinkerGraphVectorIndexTest.java index f84806283b..b3fbfe8e80 100644 --- a/tinkergraph-gremlin/src/test/java/org/apache/tinkerpop/gremlin/tinkergraph/structure/TinkerGraphVectorIndexTest.java +++ b/tinkergraph-gremlin/src/test/java/org/apache/tinkerpop/gremlin/tinkergraph/structure/TinkerGraphVectorIndexTest.java @@ -508,11 +508,6 @@ public class TinkerGraphVectorIndexTest { @Test public void shouldGrowIndexWhenCapacityReached() { - // Skip this test for TinkerTransactionGraph as it handles transactions differently - if (graph instanceof TinkerTransactionGraph) { - return; - } - final GraphTraversalSource g = traversal().with(graph); // Create a small index with only 5 items capacity and 50% growth rate @@ -551,11 +546,6 @@ public class TinkerGraphVectorIndexTest { @Test public void shouldThrowExceptionWhenGrowthRateIsZero() { - // Skip this test for TinkerTransactionGraph as it handles transactions differently - if (graph instanceof TinkerTransactionGraph) { - return; - } - final GraphTraversalSource g = traversal().with(graph); // Create a small index with only 5 items capacity and 0 growth rate
