garydgregory commented on code in PR #737: URL: https://github.com/apache/commons-text/pull/737#discussion_r2936652131
########## src/test/java/org/apache/commons/text/similarity/LevenshteinDistanceTest.java: ########## @@ -6,7 +6,7 @@ * (the "License"); you may not use this file except in compliance with * the License. You may obtain a copy of the License at * - * https://www.apache.org/licenses/LICENSE-2.0 Review Comment: Do NOT edit license headers. ########## src/test/java/org/apache/commons/text/similarity/LevenshteinDistanceTest.java: ########## @@ -181,4 +181,58 @@ void testGetThresholdDirectlyAfterObjectInstantiation() { assertNull(LevenshteinDistance.getDefaultInstance().getThreshold()); } -} + // ------------------------------------------------------------------------- + // New Weighted Levenshtein Distance Tests + // ------------------------------------------------------------------------- + + @Test + void testConstructorWithNegativeCosts() { + assertThrows(IllegalArgumentException.class, () -> new LevenshteinDistance(null, -1, 1, 1)); + assertThrows(IllegalArgumentException.class, () -> new LevenshteinDistance(null, 1, -1, 1)); + assertThrows(IllegalArgumentException.class, () -> new LevenshteinDistance(null, 1, 1, -1)); + } + + @Test + void testGetLevenshteinDistance_WeightedUnlimited() { + // Substitution is very expensive (10) vs Insert/Delete (1 each) + final LevenshteinDistance dist = new LevenshteinDistance(null, 1, 1, 10); + // 'a' -> 'b' should choose delete 'a' (1) and insert 'b' (1) = distance 2, + // instead of replace (10). + assertEquals(2, dist.apply("a", "b")); + + // All operations are free (0) + final LevenshteinDistance freeDist = new LevenshteinDistance(null, 0, 0, 0); + assertEquals(0, freeDist.apply("abc", "def")); + + // Asymmetric costs: Insert=10, Delete=1, Replace=100 + final LevenshteinDistance asymmetric = new LevenshteinDistance(null, 10, 1, 100); + assertEquals(1, asymmetric.apply("a", "")); // Delete 'a' = 1 + assertEquals(10, asymmetric.apply("", "a")); // Insert 'a' = 10 + } + + @Test + void testGetLevenshteinDistance_WeightedThreshold() { + // Distance is 2 (via delete/insert), threshold is 5 -> result 2 + final LevenshteinDistance weighted = new LevenshteinDistance(5, 1, 1, 10); + assertEquals(2, weighted.apply("a", "b")); + + // Distance is 2, threshold is 1 -> result -1 + final LevenshteinDistance strict = new LevenshteinDistance(1, 1, 1, 10); + assertEquals(-1, strict.apply("a", "b")); + + // Empty strings with weighted threshold + assertEquals(0, new LevenshteinDistance(5, 2, 2, 2).apply("", "")); + assertEquals(4, new LevenshteinDistance(5, 2, 2, 2).apply("aa", "")); + assertEquals(-1, new LevenshteinDistance(1, 2, 2, 2).apply("aa", "")); + } + + @Test + void testWeightedAccessors() { + final LevenshteinDistance dist = new LevenshteinDistance(10, 2, 3, 4); + assertEquals(10, dist.getThreshold()); + assertEquals(2, dist.getInsertCost()); + assertEquals(3, dist.getDeleteCost()); + assertEquals(4, dist.getReplaceCost()); + } + +} Review Comment: Don't delete the empty line at the end of files. ########## src/test/java/org/apache/commons/text/similarity/LevenshteinDistanceTest.java: ########## @@ -181,4 +181,58 @@ void testGetThresholdDirectlyAfterObjectInstantiation() { assertNull(LevenshteinDistance.getDefaultInstance().getThreshold()); } -} + // ------------------------------------------------------------------------- Review Comment: Remove these // comments please, they don't help maintain the code. Do use Javadoc if you want to call something out. ########## src/main/java/org/apache/commons/text/similarity/LevenshteinDistance.java: ########## @@ -294,48 +397,90 @@ public LevenshteinDistance() { } /** - * Constructs a new instance. If the threshold is not null, distance calculations will be limited to a maximum length. If the threshold is null, the - * unlimited version of the algorithm will be used. + * Constructs a new instance with the given threshold and all operation costs set to 1. * - * @param threshold If this is null then distances calculations will not be limited. This may not be negative. + * <p> + * If the threshold is not null, distance calculations will be limited to a maximum length. If + * the threshold is null, the unlimited version of the algorithm will be used. + * </p> + * + * @param threshold If this is null then distances calculations will not be limited. + * This may not be negative. */ public LevenshteinDistance(final Integer threshold) { + this(threshold, DEFAULT_INSERT_COST, DEFAULT_DELETE_COST, DEFAULT_REPLACE_COST); + } + + /** + * Constructs a new instance with the given threshold and custom operation costs. + * + * <p> + * If the threshold is not null, distance calculations will be limited to a maximum value. + * If the threshold is null, the unlimited version of the algorithm will be used. + * </p> + * + * <p> + * All cost parameters must be non-negative integers. Passing 0 for a cost makes that + * operation free; passing values greater than 1 makes it more expensive relative to + * the other operations. + * </p> + * + * @param threshold If this is null then distance calculations will not be limited. + * This may not be negative. + * @param insertCost the cost of inserting a character, must not be negative. + * @param deleteCost the cost of deleting a character, must not be negative. + * @param replaceCost the cost of replacing (substituting) a character, must not be negative. + * @throws IllegalArgumentException if threshold is negative, or any cost is negative. + * @since 1.13.0 + */ + public LevenshteinDistance(final Integer threshold, final int insertCost, final int deleteCost, + final int replaceCost) { if (threshold != null && threshold < 0) { throw new IllegalArgumentException("Threshold must not be negative"); } - this.threshold = threshold; + if (insertCost < 0) { + throw new IllegalArgumentException("Insert cost must not be negative"); + } + if (deleteCost < 0) { + throw new IllegalArgumentException("Delete cost must not be negative"); + } + if (replaceCost < 0) { + throw new IllegalArgumentException("Replace cost must not be negative"); + } + this.threshold = threshold; + this.insertCost = insertCost; + this.deleteCost = deleteCost; + this.replaceCost = replaceCost; } + // ------------------------------------------------------------------------- Review Comment: Remove these // comments please, they don't help maintain the code. ########## src/main/java/org/apache/commons/text/similarity/LevenshteinDistance.java: ########## @@ -349,41 +494,60 @@ public Integer apply(final CharSequence left, final CharSequence right) { * A higher score indicates a greater distance. * </p> * - * <pre> - * distance.apply(null, *) = Throws {@link IllegalArgumentException} - * distance.apply(*, null) = Throws {@link IllegalArgumentException} - * distance.apply("","") = 0 - * distance.apply("","a") = 1 - * distance.apply("aaapppp", "") = 7 - * distance.apply("frog", "fog") = 1 - * distance.apply("fly", "ant") = 3 - * distance.apply("elephant", "hippo") = 7 - * distance.apply("hippo", "elephant") = 7 - * distance.apply("hippo", "zzzzzzzz") = 8 - * distance.apply("hello", "hallo") = 1 - * </pre> - * * @param <E> The type of similarity score unit. * @param left the first input, must not be null. * @param right the second input, must not be null. - * @return result distance, or -1. - * @throws IllegalArgumentException if either String input {@code null}. + * @return result distance, or -1 if a threshold is set and the distance exceeds it. + * @throws IllegalArgumentException if either input is {@code null}. * @since 1.13.0 */ public <E> Integer apply(final SimilarityInput<E> left, final SimilarityInput<E> right) { if (threshold != null) { - return limitedCompare(left, right, threshold); + return limitedCompare(left, right, threshold, insertCost, deleteCost, replaceCost); } - return unlimitedCompare(left, right); + return unlimitedCompare(left, right, insertCost, deleteCost, replaceCost); } + // ------------------------------------------------------------------------- Review Comment: Remove these // comments please, they don't help maintain the code. ########## src/main/java/org/apache/commons/text/similarity/LevenshteinDistance.java: ########## @@ -51,239 +72,321 @@ public static LevenshteinDistance getDefaultInstance() { } /** - * Finds the Levenshtein distance between two CharSequences if it's less than or equal to a given threshold. + * Finds the Levenshtein distance between two CharSequences if it's less than or equal to a given + * threshold, using configurable costs for insert, delete, and replace operations. + * + * <p> + * This implementation follows from Algorithms on Strings, Trees and Sequences by Dan Gusfield and + * Chas Emerick's implementation of the Levenshtein distance algorithm. + * </p> * * <p> - * This implementation follows from Algorithms on Strings, Trees and Sequences by Dan Gusfield and Chas Emerick's implementation of the Levenshtein distance - * algorithm. + * Note: The stripe-width optimisation used in the default (all-costs-1) case relies on the + * assumption that each operation costs exactly 1. When custom costs are supplied the stripe + * cannot be reliably bounded to {@code 2*threshold+1}, so the full O(nm) DP table is used + * instead, returning -1 only when the final distance exceeds the threshold. * </p> * * <pre> - * limitedCompare(null, *, *) = Throws {@link IllegalArgumentException} - * limitedCompare(*, null, *) = Throws {@link IllegalArgumentException} - * limitedCompare(*, *, -1) = Throws {@link IllegalArgumentException} - * limitedCompare("","", 0) = 0 - * limitedCompare("aaapppp", "", 8) = 7 - * limitedCompare("aaapppp", "", 7) = 7 - * limitedCompare("aaapppp", "", 6)) = -1 - * limitedCompare("elephant", "hippo", 7) = 7 - * limitedCompare("elephant", "hippo", 6) = -1 - * limitedCompare("hippo", "elephant", 7) = 7 - * limitedCompare("hippo", "elephant", 6) = -1 + * limitedCompare(null, *, *, *, *, *) = Throws {@link IllegalArgumentException} + * limitedCompare(*, null, *, *, *, *) = Throws {@link IllegalArgumentException} + * limitedCompare(*, *, -1, *, *, *) = Throws {@link IllegalArgumentException} + * limitedCompare("","", 0, 1, 1, 1) = 0 + * limitedCompare("aaapppp", "", 8, 1, 1, 1) = 7 + * limitedCompare("aaapppp", "", 7, 1, 1, 1) = 7 + * limitedCompare("aaapppp", "", 6, 1, 1, 1)) = -1 + * limitedCompare("elephant", "hippo", 7, 1, 1, 1) = 7 + * limitedCompare("elephant", "hippo", 6, 1, 1, 1) = -1 + * limitedCompare("hippo", "elephant", 7, 1, 1, 1) = 7 + * limitedCompare("hippo", "elephant", 6, 1, 1, 1) = -1 * </pre> * - * @param left the first SimilarityInput, must not be null. - * @param right the second SimilarityInput, must not be null. - * @param threshold the target threshold, must not be negative. - * @return result distance, or -1 + * @param left the first SimilarityInput, must not be null. + * @param right the second SimilarityInput, must not be null. + * @param threshold the target threshold, must not be negative. + * @param insertCost the cost of an insertion operation, must not be negative. + * @param deleteCost the cost of a deletion operation, must not be negative. + * @param replaceCost the cost of a substitution operation, must not be negative. + * @return result distance, or -1 if the distance exceeds the threshold. */ - private static <E> int limitedCompare(SimilarityInput<E> left, SimilarityInput<E> right, final int threshold) { // NOPMD + private static <E> int limitedCompare(SimilarityInput<E> left, SimilarityInput<E> right, // NOPMD + final int threshold, final int insertCost, final int deleteCost, final int replaceCost) { if (left == null || right == null) { throw new IllegalArgumentException("CharSequences must not be null"); } - - /* - * This implementation only computes the distance if it's less than or equal to the threshold value, returning -1 if it's greater. The advantage is - * performance: unbounded distance is O(nm), but a bound of k allows us to reduce it to O(km) time by only computing a diagonal stripe of width 2k + 1 - * of the cost table. It is also possible to use this to compute the unbounded Levenshtein distance by starting the threshold at 1 and doubling each - * time until the distance is found; this is O(dm), where d is the distance. - * - * One subtlety comes from needing to ignore entries on the border of our stripe, for example, - * p[] = |#|#|#|* d[] = *|#|#|#| We must ignore the entry to the left - * of the leftmost member We must ignore the entry above the rightmost member - * - * Another subtlety comes from our stripe running off the matrix if the strings aren't of the same size. Since string s is always swapped to be the - * shorter of the two, the stripe will always run off to the upper right instead of the lower left of the matrix. - * - * As a concrete example, suppose s is of length 5, t is of length 7, and our threshold is 1. In this case we're going to walk a stripe of length 3. The - * matrix would look like so: - * - * <pre> 1 2 3 4 5 1 |#|#| | | | 2 |#|#|#| | | 3 | |#|#|#| | 4 | | |#|#|#| 5 | | | |#|#| 6 | | | | |#| 7 | | | | | | </pre> - * - * Note how the stripe leads off the table as there is no possible way to turn a string of length 5 into one of length 7 in edit distance of 1. - * - * Additionally, this implementation decreases memory usage by using two single-dimensional arrays and swapping them back and forth instead of - * allocating an entire n by m matrix. This requires a few minor changes, such as immediately returning when it's detected that the stripe has run off - * the matrix and initially filling the arrays with large values so that entries we don't compute are ignored. - * - * See Algorithms on Strings, Trees and Sequences by Dan Gusfield for some discussion. - */ + if (threshold < 0) { + throw new IllegalArgumentException("Threshold must not be negative"); + } int n = left.length(); // length of left int m = right.length(); // length of right - // if one string is empty, the edit distance is necessarily the length - // of the other + // If one string is empty, the edit distance is the cost of inserting/deleting + // all characters of the other string. if (n == 0) { - return m <= threshold ? m : -1; + final int dist = m * insertCost; + return dist <= threshold ? dist : -1; } if (m == 0) { - return n <= threshold ? n : -1; + final int dist = n * deleteCost; + return dist <= threshold ? dist : -1; + } + + // When all costs equal 1, use the classic diagonal-stripe optimisation. + // For asymmetric costs the stripe width is not reliably bounded, so fall + // back to the full O(nm) table and threshold-check only at the end. + if (insertCost == 1 && deleteCost == 1 && replaceCost == 1) { + return limitedCompareUniformCost(left, right, threshold, n, m); } + return limitedCompareCustomCost(left, right, threshold, insertCost, deleteCost, replaceCost, n, m); + } + + /** + * Classic stripe-optimised O(km) limited compare for uniform unit costs. + * This preserves the original algorithm exactly. + */ + private static <E> int limitedCompareUniformCost(SimilarityInput<E> left, SimilarityInput<E> right, + final int threshold, int n, int m) { if (n > m) { - // swap the two strings to consume less memory final SimilarityInput<E> tmp = left; left = right; right = tmp; n = m; m = right.length(); } - // the edit distance cannot be less than the length difference if (m - n > threshold) { return -1; } - int[] p = new int[n + 1]; // 'previous' cost array, horizontally - int[] d = new int[n + 1]; // cost array, horizontally - int[] tempD; // placeholder to assist in swapping p and d + int[] p = new int[n + 1]; + int[] d = new int[n + 1]; + int[] tempD; - // fill in starting table values final int boundary = Math.min(n, threshold) + 1; for (int i = 0; i < boundary; i++) { p[i] = i; } - // these fills ensure that the value above the rightmost entry of our - // stripe will be ignored in following loop iterations Arrays.fill(p, boundary, p.length, Integer.MAX_VALUE); Arrays.fill(d, Integer.MAX_VALUE); - // iterates through t for (int j = 1; j <= m; j++) { - final E rightJ = right.at(j - 1); // jth character of right + final E rightJ = right.at(j - 1); d[0] = j; - // compute stripe indices, constrain to array size final int min = Math.max(1, j - threshold); final int max = j > Integer.MAX_VALUE - threshold ? n : Math.min(n, j + threshold); - // ignore entry left of leftmost if (min > 1) { d[min - 1] = Integer.MAX_VALUE; } int lowerBound = Integer.MAX_VALUE; - // iterates through [min, max] in s for (int i = min; i <= max; i++) { if (left.at(i - 1).equals(rightJ)) { - // diagonally left and up d[i] = p[i - 1]; } else { - // 1 + minimum of cell to the left, to the top, diagonally - // left and up d[i] = 1 + Math.min(Math.min(d[i - 1], p[i]), p[i - 1]); } lowerBound = Math.min(lowerBound, d[i]); } - // if the lower bound is greater than the threshold, then exit early if (lowerBound > threshold) { return -1; } - // copy current distance counts to 'previous row' distance counts tempD = p; p = d; d = tempD; } - // if p[n] is greater than the threshold, there's no guarantee on it - // being the correct - // distance if (p[n] <= threshold) { return p[n]; } return -1; } /** - * Finds the Levenshtein distance between two Strings. + * Full O(nm) limited compare for custom (non-uniform) operation costs. + * Uses two rolling arrays to keep memory at O(min(n,m)). * * <p> - * A higher score indicates a greater distance. + * When {@code deleteCost != insertCost} swapping the strings would change the + * semantics (delete on the original becomes insert on the swapped copy), so + * we always keep left as-is and pay the correct directional cost. * </p> + */ + private static <E> int limitedCompareCustomCost(final SimilarityInput<E> left, final SimilarityInput<E> right, + final int threshold, final int insertCost, final int deleteCost, final int replaceCost, + final int n, final int m) { + + // p[i] = cost to convert left[0..i-1] to right[0..j-1] (previous row) + int[] p = new int[n + 1]; + int[] d = new int[n + 1]; + + // Base case: convert left[0..i-1] to empty string via i deletions. + for (int i = 0; i <= n; i++) { + p[i] = i * deleteCost; + } + + for (int j = 1; j <= m; j++) { + final E rightJ = right.at(j - 1); + // Base case: convert empty string to right[0..j-1] via j insertions. + d[0] = j * insertCost; + + for (int i = 1; i <= n; i++) { + if (left.at(i - 1).equals(rightJ)) { + // Characters match ? no operation needed (cost 0). + d[i] = p[i - 1]; + } else { + // Minimum of: delete left[i-1], insert right[j-1], or replace. + d[i] = Math.min( + Math.min(d[i - 1] + insertCost, // insert right[j-1] + p[i] + deleteCost), // delete left[i-1] + p[i - 1] + replaceCost // replace + ); + } + } + + // Swap rows. + final int[] tempD = p; + p = d; + d = tempD; + } + + if (p[n] <= threshold) { + return p[n]; + } + return -1; + } + + /** + * Finds the Levenshtein distance between two Strings using configurable + * insert, delete, and replace costs. * * <p> - * This implementation only need one single-dimensional arrays of length s.length() + 1 + * A higher score indicates a greater distance. * </p> * * <pre> - * unlimitedCompare(null, *) = Throws {@link IllegalArgumentException} - * unlimitedCompare(*, null) = Throws {@link IllegalArgumentException} - * unlimitedCompare("","") = 0 - * unlimitedCompare("","a") = 1 - * unlimitedCompare("aaapppp", "") = 7 - * unlimitedCompare("frog", "fog") = 1 - * unlimitedCompare("fly", "ant") = 3 - * unlimitedCompare("elephant", "hippo") = 7 - * unlimitedCompare("hippo", "elephant") = 7 - * unlimitedCompare("hippo", "zzzzzzzz") = 8 - * unlimitedCompare("hello", "hallo") = 1 + * unlimitedCompare(null, *, *, *, *) = Throws {@link IllegalArgumentException} + * unlimitedCompare(*, null, *, *, *) = Throws {@link IllegalArgumentException} + * unlimitedCompare("","", 1, 1, 1) = 0 + * unlimitedCompare("","a", 1, 1, 1) = 1 + * unlimitedCompare("aaapppp", "", 1, 1, 1) = 7 + * unlimitedCompare("frog", "fog", 1, 1, 1) = 1 + * unlimitedCompare("fly", "ant", 1, 1, 1) = 3 + * unlimitedCompare("elephant", "hippo", 1, 1, 1) = 7 + * unlimitedCompare("hippo", "elephant", 1, 1, 1) = 7 + * unlimitedCompare("hippo", "zzzzzzzz", 1, 1, 1) = 8 + * unlimitedCompare("hello", "hallo", 1, 1, 1) = 1 * </pre> * - * @param left the first CharSequence, must not be null. - * @param right the second CharSequence, must not be null. - * @return result distance, or -1. + * @param left the first CharSequence, must not be null. + * @param right the second CharSequence, must not be null. + * @param insertCost the cost of an insertion operation, must not be negative. + * @param deleteCost the cost of a deletion operation, must not be negative. + * @param replaceCost the cost of a substitution operation, must not be negative. + * @return result distance. * @throws IllegalArgumentException if either CharSequence input is {@code null}. */ - private static <E> int unlimitedCompare(SimilarityInput<E> left, SimilarityInput<E> right) { + private static <E> int unlimitedCompare(SimilarityInput<E> left, SimilarityInput<E> right, + final int insertCost, final int deleteCost, final int replaceCost) { if (left == null || right == null) { throw new IllegalArgumentException("CharSequences must not be null"); } - /* - * This implementation use two variable to record the previous cost counts, So this implementation use less memory than previous impl. - */ + int n = left.length(); // length of left int m = right.length(); // length of right if (n == 0) { - return m; + return m * insertCost; } if (m == 0) { - return n; + return n * deleteCost; } - if (n > m) { - // swap the input strings to consume less memory + + // When costs are symmetric (insert == delete) we can safely swap the + // shorter string into 'left' to minimise the working array size. + // When insert != delete, swapping reverses the semantics of those two + // operations, so we must keep the original orientation. + final boolean canSwap = insertCost == deleteCost; + if (canSwap && n > m) { final SimilarityInput<E> tmp = left; left = right; right = tmp; n = m; m = right.length(); } + + // Single rolling array of length n+1. final int[] p = new int[n + 1]; - // indexes into strings left and right - int i; // iterates through left - int j; // iterates through right + + // Base case: converting left[0..i-1] ? "" costs i deletions. + for (int i = 0; i <= n; i++) { + p[i] = i * deleteCost; + } + int upperLeft; int upper; - E rightJ; // jth character of right - int cost; // cost - for (i = 0; i <= n; i++) { - p[i] = i; - } - for (j = 1; j <= m; j++) { + + for (int j = 1; j <= m; j++) { upperLeft = p[0]; - rightJ = right.at(j - 1); - p[0] = j; + final E rightJ = right.at(j - 1); + // Base case: converting "" ? right[0..j-1] costs j insertions. + p[0] = j * insertCost; - for (i = 1; i <= n; i++) { + for (int i = 1; i <= n; i++) { upper = p[i]; - cost = left.at(i - 1).equals(rightJ) ? 0 : 1; - // minimum of cell to the left+1, to the top+1, diagonally left and up +cost - p[i] = Math.min(Math.min(p[i - 1] + 1, p[i] + 1), upperLeft + cost); + if (left.at(i - 1).equals(rightJ)) { + // Characters match ? carry diagonal (no cost). + p[i] = upperLeft; + } else { + // Minimum of insert, delete, or replace. + p[i] = Math.min( + Math.min(p[i - 1] + insertCost, // insert right[j-1] + p[i] + deleteCost), // delete left[i-1] + upperLeft + replaceCost // replace + ); + } upperLeft = upper; } } return p[n]; } + // ------------------------------------------------------------------------- + // Instance state + // ------------------------------------------------------------------------- + /** - * Threshold. + * Threshold (nullable). When non-null, {@link #limitedCompare} is used + * instead of {@link #unlimitedCompare}. */ private final Integer threshold; /** - * Constructs a default instance that uses a version of the algorithm that does not use a threshold parameter. + * Cost of inserting a character into the left sequence. + */ + private final int insertCost; + + /** + * Cost of deleting a character from the left sequence. + */ + private final int deleteCost; + + /** + * Cost of substituting one character for another. + */ + private final int replaceCost; + + // ------------------------------------------------------------------------- Review Comment: Remove these // comments please, they don't help maintain the code. ########## src/main/java/org/apache/commons/text/similarity/LevenshteinDistance.java: ########## @@ -294,48 +397,90 @@ public LevenshteinDistance() { } /** - * Constructs a new instance. If the threshold is not null, distance calculations will be limited to a maximum length. If the threshold is null, the - * unlimited version of the algorithm will be used. + * Constructs a new instance with the given threshold and all operation costs set to 1. * - * @param threshold If this is null then distances calculations will not be limited. This may not be negative. + * <p> + * If the threshold is not null, distance calculations will be limited to a maximum length. If + * the threshold is null, the unlimited version of the algorithm will be used. + * </p> + * + * @param threshold If this is null then distances calculations will not be limited. + * This may not be negative. */ public LevenshteinDistance(final Integer threshold) { + this(threshold, DEFAULT_INSERT_COST, DEFAULT_DELETE_COST, DEFAULT_REPLACE_COST); + } + + /** + * Constructs a new instance with the given threshold and custom operation costs. + * + * <p> + * If the threshold is not null, distance calculations will be limited to a maximum value. + * If the threshold is null, the unlimited version of the algorithm will be used. + * </p> + * + * <p> + * All cost parameters must be non-negative integers. Passing 0 for a cost makes that + * operation free; passing values greater than 1 makes it more expensive relative to + * the other operations. + * </p> + * + * @param threshold If this is null then distance calculations will not be limited. + * This may not be negative. + * @param insertCost the cost of inserting a character, must not be negative. + * @param deleteCost the cost of deleting a character, must not be negative. + * @param replaceCost the cost of replacing (substituting) a character, must not be negative. + * @throws IllegalArgumentException if threshold is negative, or any cost is negative. + * @since 1.13.0 + */ + public LevenshteinDistance(final Integer threshold, final int insertCost, final int deleteCost, Review Comment: Use a builder instead, this will avoid constructor inflation and depreciations in the future. -- This is an automated message from the Apache Git Service. To respond to the message, please log on to GitHub and use the URL above to go to the specific comment. To unsubscribe, e-mail: [email protected] For queries about this service, please contact Infrastructure at: [email protected]
