benwtrent commented on code in PR #12910:
URL: https://github.com/apache/lucene/pull/12910#discussion_r1422799676
##########
lucene/core/src/java/org/apache/lucene/util/hnsw/NeighborArray.java:
##########
@@ -201,9 +225,69 @@ private int descSortFindRightMostInsertionPoint(float
newScore, int bound) {
int end = bound - 1;
while (start <= end) {
int mid = (start + end) / 2;
- if (score[mid] < newScore) end = mid - 1;
+ if (scores[mid] < newScore) end = mid - 1;
else start = mid + 1;
}
return start;
}
+
+ /**
+ * Find first non-diverse neighbour among the list of neighbors starting
from the most distant
+ * neighbours
+ */
+ private int findWorstNonDiverse(int nodeOrd, RandomVectorScorerSupplier
scorerSupplier)
+ throws IOException {
+ RandomVectorScorer scorer = scorerSupplier.scorer(nodeOrd);
+ int[] uncheckedIndexes = sort(scorer);
+ if (uncheckedIndexes == null) {
+ // all nodes are checked, we will directly return the most distant one
+ return size - 1;
+ }
Review Comment:
This still doesn't make intuitive sense to me. Though its just copying what
was done already.
"checked" here indicates if the nodes are sorted or not. But this implies
sorting only ever occurs when we check for diversity and that the worst diverse
neighbor is also the furtherest one. This doesn't seem correct to me. But, that
is a discussion for a separate time.
##########
lucene/core/src/java/org/apache/lucene/util/hnsw/NeighborArray.java:
##########
@@ -34,14 +34,14 @@
public class NeighborArray {
private final boolean scoresDescOrder;
private int size;
- float[] score;
- int[] node;
+ private float[] scores;
+ private int[] nodes;
Review Comment:
If we are disallowing array size growth, we need to make these `final`.
##########
lucene/core/src/java/org/apache/lucene/util/hnsw/NeighborArray.java:
##########
@@ -51,45 +51,61 @@ public NeighborArray(int maxSize, boolean descOrder) {
*/
public void addInOrder(int newNode, float newScore) {
assert size == sortedNodeSize : "cannot call addInOrder after
addOutOfOrder";
- if (size == node.length) {
- node = ArrayUtil.grow(node);
- score = ArrayUtil.growExact(score, node.length);
+ if (size == nodes.length) {
+ throw new IllegalStateException("No growth is allowed");
}
if (size > 0) {
- float previousScore = score[size - 1];
+ float previousScore = scores[size - 1];
assert ((scoresDescOrder && (previousScore >= newScore))
|| (scoresDescOrder == false && (previousScore <= newScore)))
: "Nodes are added in the incorrect order! Comparing "
+ newScore
+ " to "
- + Arrays.toString(ArrayUtil.copyOfSubArray(score, 0, size));
+ + Arrays.toString(ArrayUtil.copyOfSubArray(scores, 0, size));
}
- node[size] = newNode;
- score[size] = newScore;
+ nodes[size] = newNode;
+ scores[size] = newScore;
++size;
++sortedNodeSize;
}
/** Add node and newScore but do not insert as sorted */
public void addOutOfOrder(int newNode, float newScore) {
- if (size == node.length) {
- node = ArrayUtil.grow(node);
- score = ArrayUtil.growExact(score, node.length);
+ if (size == nodes.length) {
+ throw new IllegalStateException("No growth is allowed");
}
- score[size] = newScore;
- node[size] = newNode;
+ scores[size] = newScore;
+ nodes[size] = newNode;
size++;
}
+ /**
+ * In addition to {@link #addOutOfOrder(int, float)}, this function will
also remove the
+ * least-diverse node if the node array is full after insertion
+ *
+ * @param nodeId node Id of the owner of this NeighbourArray
Review Comment:
Maybe add something about how this isn't threadsafe?
##########
lucene/core/src/java/org/apache/lucene/util/hnsw/NeighborArray.java:
##########
@@ -51,45 +51,61 @@ public NeighborArray(int maxSize, boolean descOrder) {
*/
public void addInOrder(int newNode, float newScore) {
assert size == sortedNodeSize : "cannot call addInOrder after
addOutOfOrder";
- if (size == node.length) {
- node = ArrayUtil.grow(node);
- score = ArrayUtil.growExact(score, node.length);
+ if (size == nodes.length) {
+ throw new IllegalStateException("No growth is allowed");
}
if (size > 0) {
- float previousScore = score[size - 1];
+ float previousScore = scores[size - 1];
assert ((scoresDescOrder && (previousScore >= newScore))
|| (scoresDescOrder == false && (previousScore <= newScore)))
: "Nodes are added in the incorrect order! Comparing "
+ newScore
+ " to "
- + Arrays.toString(ArrayUtil.copyOfSubArray(score, 0, size));
+ + Arrays.toString(ArrayUtil.copyOfSubArray(scores, 0, size));
}
- node[size] = newNode;
- score[size] = newScore;
+ nodes[size] = newNode;
+ scores[size] = newScore;
++size;
++sortedNodeSize;
}
/** Add node and newScore but do not insert as sorted */
public void addOutOfOrder(int newNode, float newScore) {
- if (size == node.length) {
- node = ArrayUtil.grow(node);
- score = ArrayUtil.growExact(score, node.length);
+ if (size == nodes.length) {
+ throw new IllegalStateException("No growth is allowed");
}
- score[size] = newScore;
- node[size] = newNode;
+ scores[size] = newScore;
+ nodes[size] = newNode;
size++;
}
+ /**
+ * In addition to {@link #addOutOfOrder(int, float)}, this function will
also remove the
+ * least-diverse node if the node array is full after insertion
+ *
+ * @param nodeId node Id of the owner of this NeighbourArray
+ */
+ public void addAndEnsureDiversity(
Review Comment:
package private?
##########
lucene/core/src/java/org/apache/lucene/util/hnsw/NeighborArray.java:
##########
@@ -51,45 +51,61 @@ public NeighborArray(int maxSize, boolean descOrder) {
*/
public void addInOrder(int newNode, float newScore) {
assert size == sortedNodeSize : "cannot call addInOrder after
addOutOfOrder";
- if (size == node.length) {
- node = ArrayUtil.grow(node);
- score = ArrayUtil.growExact(score, node.length);
+ if (size == nodes.length) {
+ throw new IllegalStateException("No growth is allowed");
}
if (size > 0) {
- float previousScore = score[size - 1];
+ float previousScore = scores[size - 1];
assert ((scoresDescOrder && (previousScore >= newScore))
|| (scoresDescOrder == false && (previousScore <= newScore)))
: "Nodes are added in the incorrect order! Comparing "
+ newScore
+ " to "
- + Arrays.toString(ArrayUtil.copyOfSubArray(score, 0, size));
+ + Arrays.toString(ArrayUtil.copyOfSubArray(scores, 0, size));
}
- node[size] = newNode;
- score[size] = newScore;
+ nodes[size] = newNode;
+ scores[size] = newScore;
++size;
++sortedNodeSize;
}
/** Add node and newScore but do not insert as sorted */
public void addOutOfOrder(int newNode, float newScore) {
- if (size == node.length) {
Review Comment:
`addOutOfOrder` can be package private at least?
--
This is an automated message from the Apache Git Service.
To respond to the message, please log on to GitHub and use the
URL above to go to the specific comment.
To unsubscribe, e-mail: [email protected]
For queries about this service, please contact Infrastructure at:
[email protected]
---------------------------------------------------------------------
To unsubscribe, e-mail: [email protected]
For additional commands, e-mail: [email protected]