ilariapet commented on code in PR #3644:
URL: https://github.com/apache/solr/pull/3644#discussion_r2335845239
##########
solr/core/src/java/org/apache/solr/search/neural/KnnQParser.java:
##########
@@ -40,7 +53,90 @@ public Query parse() throws SyntaxError {
final String vectorToSearch = getVectorToSearch();
final int topK = localParams.getInt(TOP_K, DEFAULT_TOP_K);
- return denseVectorType.getKnnVectorQuery(
- schemaField.getName(), vectorToSearch, topK, getFilterQuery());
+ return wrapWithPatienceIfEarlyTerminationEnabled(
+ denseVectorType.getKnnVectorQuery(
+ schemaField.getName(), vectorToSearch, topK, getFilterQuery()));
+ }
+
+ protected Query wrapWithPatienceIfEarlyTerminationEnabled(Query knnQuery) {
+ final Double saturationThreshold =
+ Optional.ofNullable(localParams.get(SATURATION_THRESHOLD))
+ .map(KnnQParser::validateSaturationThreshold)
+ .orElse(null);
+
+ final Integer patience =
+ Optional.ofNullable(localParams.get(PATIENCE))
+ .map(KnnQParser::validatePatience)
+ .orElse(null);
+
+ final boolean useCustomParams = (saturationThreshold != null && patience
!= null);
+ if ((saturationThreshold == null) != (patience == null)) {
+ throw new SolrException(
+ SolrException.ErrorCode.BAD_REQUEST,
+ "Parameters 'saturationThreshold' and 'patience' must both be
provided, or neither.");
+ }
+
+ final boolean earlyTerminationEnabled =
+ localParams.getBool(EARLY_TERMINATION, DEFAULT_EARLY_TERMINATION) ||
useCustomParams;
+
+ if (!earlyTerminationEnabled) {
+ return knnQuery;
+ }
+
+ return switch (knnQuery) {
+ case KnnFloatVectorQuery knnFloatQuery -> useCustomParams
+ ? PatienceKnnVectorQuery.fromFloatQuery(knnFloatQuery,
saturationThreshold, patience)
+ : PatienceKnnVectorQuery.fromFloatQuery(knnFloatQuery);
+ case KnnByteVectorQuery knnByteQuery -> useCustomParams
+ ? PatienceKnnVectorQuery.fromByteQuery(knnByteQuery,
saturationThreshold, patience)
+ : PatienceKnnVectorQuery.fromByteQuery(knnByteQuery);
+ default -> throw new SolrException(
+ SolrException.ErrorCode.SERVER_ERROR,
+ "earlyTermination enabled but this is not a Knn*VectorQuery: " +
knnQuery.getClass());
+ };
+ }
+
+ private static Double validateSaturationThreshold(String value) {
+ if (value == null) {
+ return null;
+ }
+ try {
+ double parsedValue = Double.parseDouble(value);
+ if (Double.isNaN(parsedValue)
+ || Double.isInfinite(parsedValue)
+ || parsedValue <= 0.0
+ || parsedValue >= 1.0) {
+ throw new SolrException(
+ SolrException.ErrorCode.BAD_REQUEST,
+ "Invalid saturationThreshold value: must be a double between 0.0
and 1.0 (exclusive), got "
+ + parsedValue);
+ }
+ return parsedValue;
+ } catch (NumberFormatException e) {
+ throw new SolrException(
+ SolrException.ErrorCode.BAD_REQUEST,
+ "Invalid saturationThreshold value: not a valid double, got " +
value,
+ e);
+ }
+ }
+
+ private static Integer validatePatience(String value) {
+ if (value == null) {
+ return null;
+ }
+ try {
+ int parsedValue = Integer.parseInt(value);
+ if (parsedValue < 7) {
+ throw new SolrException(
+ SolrException.ErrorCode.BAD_REQUEST,
+ "Invalid patience value: must be an integer >= 7, got " +
parsedValue);
+ }
+ return parsedValue;
+ } catch (NumberFormatException e) {
+ throw new SolrException(
+ SolrException.ErrorCode.BAD_REQUEST,
+ "Invalid patience value: not a valid integer, got " + value,
+ e);
+ }
Review Comment:
Remove this part since we are consumers, and the validation should take
place on the Lucene side. Otherwise, if it gets added to Lucene later, we would
end up with a duplicate in Solr, leading to inconsistencies.
--
This is an automated message from the Apache Git Service.
To respond to the message, please log on to GitHub and use the
URL above to go to the specific comment.
To unsubscribe, e-mail: [email protected]
For queries about this service, please contact Infrastructure at:
[email protected]
---------------------------------------------------------------------
To unsubscribe, e-mail: [email protected]
For additional commands, e-mail: [email protected]