2014-12-13 4:21 GMT+01:00 <ki...@apache.org>: > > Repository: commons-text > Updated Branches: > refs/heads/master 87b789fbe -> 7570eb016 > > > SANDBOX-483 Add changes and fix old Javadocs from [lang] that remained > after the code porting > > > Project: http://git-wip-us.apache.org/repos/asf/commons-text/repo > Commit: > http://git-wip-us.apache.org/repos/asf/commons-text/commit/7570eb01 > Tree: http://git-wip-us.apache.org/repos/asf/commons-text/tree/7570eb01 > Diff: http://git-wip-us.apache.org/repos/asf/commons-text/diff/7570eb01 > > Branch: refs/heads/master > Commit: 7570eb0163cab027b444ca55e6d4c9768fcd0d34 > Parents: 87b789f > Author: Bruno P. Kinoshita <ki...@apache.org> > Authored: Sat Dec 13 01:21:11 2014 -0200 > Committer: Bruno P. Kinoshita <ki...@apache.org> > Committed: Sat Dec 13 01:21:11 2014 -0200 > > ---------------------------------------------------------------------- > src/changes/changes.xml | 1 + > .../commons/text/similarity/FuzzyDistance.java | 20 ++-- > .../text/similarity/JaroWrinklerDistance.java | 103 +++++++++---------- > .../text/similarity/LevenshteinDistance.java | 44 ++++---- > 4 files changed, 83 insertions(+), 85 deletions(-) > ---------------------------------------------------------------------- > > > > http://git-wip-us.apache.org/repos/asf/commons-text/blob/7570eb01/src/changes/changes.xml > ---------------------------------------------------------------------- > diff --git a/src/changes/changes.xml b/src/changes/changes.xml > index d8c3fdf..f890519 100644 > --- a/src/changes/changes.xml > +++ b/src/changes/changes.xml > @@ -23,6 +23,7 @@ > > <release version="1.0" date="tba" description="tba"> > <action issue="SANDBOX-485" type="add" dev="kinow">Add Hamming > distance</action> > + <action issue="SANDBOX-483" type="add" dev="kinow" > due-to="britter">Incorporate String algorithms from Commons Lang</action> >
Thanks for the kudos, but I didn't do much to resolve this issue :-) > </release> > > </body> > > > http://git-wip-us.apache.org/repos/asf/commons-text/blob/7570eb01/src/main/java/org/apache/commons/text/similarity/FuzzyDistance.java > ---------------------------------------------------------------------- > diff --git > a/src/main/java/org/apache/commons/text/similarity/FuzzyDistance.java > b/src/main/java/org/apache/commons/text/similarity/FuzzyDistance.java > index 8e9228a..f4299ea 100644 > --- a/src/main/java/org/apache/commons/text/similarity/FuzzyDistance.java > +++ b/src/main/java/org/apache/commons/text/similarity/FuzzyDistance.java > @@ -26,6 +26,10 @@ import java.util.Locale; > * indicates a higher similarity. > * </p> > * > + * <p> > + * This code has been adapted from Apache Commons Lang 3.3. > + * </p> > + * > * @since 1.0 > */ > public class FuzzyDistance implements StringMetric<Integer> { > @@ -54,14 +58,14 @@ public class FuzzyDistance implements > StringMetric<Integer> { > * </p> > * > * <pre> > - * StringUtils.getFuzzyDistance(null, null, null) > = IllegalArgumentException > - * StringUtils.getFuzzyDistance("", "", Locale.ENGLISH) > = 0 > - * StringUtils.getFuzzyDistance("Workshop", "b", Locale.ENGLISH) > = 0 > - * StringUtils.getFuzzyDistance("Room", "o", Locale.ENGLISH) > = 1 > - * StringUtils.getFuzzyDistance("Workshop", "w", Locale.ENGLISH) > = 1 > - * StringUtils.getFuzzyDistance("Workshop", "ws", Locale.ENGLISH) > = 2 > - * StringUtils.getFuzzyDistance("Workshop", "wo", Locale.ENGLISH) > = 4 > - * StringUtils.getFuzzyDistance("Apache Software Foundation", "asf", > Locale.ENGLISH) = 3 > + * distance.getFuzzyDistance(null, null, null) > = IllegalArgumentException > + * distance.getFuzzyDistance("", "", Locale.ENGLISH) > = 0 > + * distance.getFuzzyDistance("Workshop", "b", Locale.ENGLISH) > = 0 > + * distance.getFuzzyDistance("Room", "o", Locale.ENGLISH) > = 1 > + * distance.getFuzzyDistance("Workshop", "w", Locale.ENGLISH) > = 1 > + * distance.getFuzzyDistance("Workshop", "ws", Locale.ENGLISH) > = 2 > + * distance.getFuzzyDistance("Workshop", "wo", Locale.ENGLISH) > = 4 > + * distance.getFuzzyDistance("Apache Software Foundation", "asf", > Locale.ENGLISH) = 3 > * </pre> > * > * @param term a full term that should be matched against, must not > be null > > > http://git-wip-us.apache.org/repos/asf/commons-text/blob/7570eb01/src/main/java/org/apache/commons/text/similarity/JaroWrinklerDistance.java > ---------------------------------------------------------------------- > diff --git > a/src/main/java/org/apache/commons/text/similarity/JaroWrinklerDistance.java > b/src/main/java/org/apache/commons/text/similarity/JaroWrinklerDistance.java > index 3a94969..67aa2b8 100644 > --- > a/src/main/java/org/apache/commons/text/similarity/JaroWrinklerDistance.java > +++ > b/src/main/java/org/apache/commons/text/similarity/JaroWrinklerDistance.java > @@ -49,20 +49,20 @@ public class JaroWrinklerDistance implements > StringMetric<Double> { > * </p> > * > * <pre> > - * StringUtils.getJaroWinklerDistance(null, null) = > IllegalArgumentException > - * StringUtils.getJaroWinklerDistance("","") = 0.0 > - * StringUtils.getJaroWinklerDistance("","a") = 0.0 > - * StringUtils.getJaroWinklerDistance("aaapppp", "") = 0.0 > - * StringUtils.getJaroWinklerDistance("frog", "fog") = 0.93 > - * StringUtils.getJaroWinklerDistance("fly", "ant") = 0.0 > - * StringUtils.getJaroWinklerDistance("elephant", "hippo") = 0.44 > - * StringUtils.getJaroWinklerDistance("hippo", "elephant") = 0.44 > - * StringUtils.getJaroWinklerDistance("hippo", "zzzzzzzz") = 0.0 > - * StringUtils.getJaroWinklerDistance("hello", "hallo") = 0.88 > - * StringUtils.getJaroWinklerDistance("ABC Corporation", "ABC Corp") > = 0.91 > - * StringUtils.getJaroWinklerDistance("D N H Enterprises Inc", "D > & H Enterprises, Inc.") = 0.93 > - * StringUtils.getJaroWinklerDistance("My Gym Children's Fitness > Center", "My Gym. Childrens Fitness") = 0.94 > - * StringUtils.getJaroWinklerDistance("PENNSYLVANIA", > "PENNCISYLVNIA") = 0.9 > + * distance.getJaroWinklerDistance(null, null) = > IllegalArgumentException > + * distance.getJaroWinklerDistance("","") = 0.0 > + * distance.getJaroWinklerDistance("","a") = 0.0 > + * distance.getJaroWinklerDistance("aaapppp", "") = 0.0 > + * distance.getJaroWinklerDistance("frog", "fog") = 0.93 > + * distance.getJaroWinklerDistance("fly", "ant") = 0.0 > + * distance.getJaroWinklerDistance("elephant", "hippo") = 0.44 > + * distance.getJaroWinklerDistance("hippo", "elephant") = 0.44 > + * distance.getJaroWinklerDistance("hippo", "zzzzzzzz") = 0.0 > + * distance.getJaroWinklerDistance("hello", "hallo") = 0.88 > + * distance.getJaroWinklerDistance("ABC Corporation", "ABC Corp") = > 0.91 > + * distance.getJaroWinklerDistance("D N H Enterprises Inc", "D & > H Enterprises, Inc.") = 0.93 > + * distance.getJaroWinklerDistance("My Gym Children's Fitness > Center", "My Gym. Childrens Fitness") = 0.94 > + * distance.getJaroWinklerDistance("PENNSYLVANIA", "PENNCISYLVNIA") > = 0.9 > * </pre> > * > * @param left the first String, must not be null > @@ -86,9 +86,6 @@ public class JaroWrinklerDistance implements > StringMetric<Double> { > return matchScore; > } > > - // TODO: we can move these methods to a Util class, keep them here, > - // create a common abstract class, shade lang-3.3... > - > /** > * Calculates the number of characters from the beginning of the > strings > * that match exactly one-to-one, up to a maximum of four (4) > characters. > @@ -118,30 +115,29 @@ public class JaroWrinklerDistance implements > StringMetric<Double> { > * </p> > * > * <pre> > - * StringUtils.getCommonPrefix(null) = "" > - * StringUtils.getCommonPrefix(new String[] {}) = "" > - * StringUtils.getCommonPrefix(new String[] {"abc"}) = "abc" > - * StringUtils.getCommonPrefix(new String[] {null, null}) = "" > - * StringUtils.getCommonPrefix(new String[] {"", ""}) = "" > - * StringUtils.getCommonPrefix(new String[] {"", null}) = "" > - * StringUtils.getCommonPrefix(new String[] {"abc", null, null}) = "" > - * StringUtils.getCommonPrefix(new String[] {null, null, "abc"}) = "" > - * StringUtils.getCommonPrefix(new String[] {"", "abc"}) = "" > - * StringUtils.getCommonPrefix(new String[] {"abc", ""}) = "" > - * StringUtils.getCommonPrefix(new String[] {"abc", "abc"}) = "abc" > - * StringUtils.getCommonPrefix(new String[] {"abc", "a"}) = "a" > - * StringUtils.getCommonPrefix(new String[] {"ab", "abxyz"}) = "ab" > - * StringUtils.getCommonPrefix(new String[] {"abcde", "abxyz"}) = "ab" > - * StringUtils.getCommonPrefix(new String[] {"abcde", "xyz"}) = "" > - * StringUtils.getCommonPrefix(new String[] {"xyz", "abcde"}) = "" > - * StringUtils.getCommonPrefix(new String[] {"i am a machine", "i am > a robot"}) = "i am a " > + * getCommonPrefix(null) = "" > + * getCommonPrefix(new String[] {}) = "" > + * getCommonPrefix(new String[] {"abc"}) = "abc" > + * getCommonPrefix(new String[] {null, null}) = "" > + * getCommonPrefix(new String[] {"", ""}) = "" > + * getCommonPrefix(new String[] {"", null}) = "" > + * getCommonPrefix(new String[] {"abc", null, null}) = "" > + * getCommonPrefix(new String[] {null, null, "abc"}) = "" > + * getCommonPrefix(new String[] {"", "abc"}) = "" > + * getCommonPrefix(new String[] {"abc", ""}) = "" > + * getCommonPrefix(new String[] {"abc", "abc"}) = "abc" > + * getCommonPrefix(new String[] {"abc", "a"}) = "a" > + * getCommonPrefix(new String[] {"ab", "abxyz"}) = "ab" > + * getCommonPrefix(new String[] {"abcde", "abxyz"}) = "ab" > + * getCommonPrefix(new String[] {"abcde", "xyz"}) = "" > + * getCommonPrefix(new String[] {"xyz", "abcde"}) = "" > + * getCommonPrefix(new String[] {"i am a machine", "i am a robot"}) = > "i am a " > * </pre> > * > * @param strs array of String objects, entries may be null > * @return the initial sequence of characters that are common to all > Strings > * in the array; empty String if the array is null, the > elements are > * all null or if there is no common prefix. > - * @since 2.4 > */ > public static String getCommonPrefix(final String... strs) { > if (strs == null || strs.length == 0) { > @@ -249,31 +245,28 @@ public class JaroWrinklerDistance implements > StringMetric<Double> { > * </p> > * > * <pre> > - * StringUtils.indexOfDifference(null) = -1 > - * StringUtils.indexOfDifference(new String[] {}) = -1 > - * StringUtils.indexOfDifference(new String[] {"abc"}) = -1 > - * StringUtils.indexOfDifference(new String[] {null, null}) = -1 > - * StringUtils.indexOfDifference(new String[] {"", ""}) = -1 > - * StringUtils.indexOfDifference(new String[] {"", null}) = 0 > - * StringUtils.indexOfDifference(new String[] {"abc", null, null}) = 0 > - * StringUtils.indexOfDifference(new String[] {null, null, "abc"}) = 0 > - * StringUtils.indexOfDifference(new String[] {"", "abc"}) = 0 > - * StringUtils.indexOfDifference(new String[] {"abc", ""}) = 0 > - * StringUtils.indexOfDifference(new String[] {"abc", "abc"}) = -1 > - * StringUtils.indexOfDifference(new String[] {"abc", "a"}) = 1 > - * StringUtils.indexOfDifference(new String[] {"ab", "abxyz"}) = 2 > - * StringUtils.indexOfDifference(new String[] {"abcde", "abxyz"}) = 2 > - * StringUtils.indexOfDifference(new String[] {"abcde", "xyz"}) = 0 > - * StringUtils.indexOfDifference(new String[] {"xyz", "abcde"}) = 0 > - * StringUtils.indexOfDifference(new String[] {"i am a machine", "i > am a robot"}) = 7 > + * distance.indexOfDifference(null) = -1 > + * distance.indexOfDifference(new String[] {}) = -1 > + * distance.indexOfDifference(new String[] {"abc"}) = -1 > + * distance.indexOfDifference(new String[] {null, null}) = -1 > + * distance.indexOfDifference(new String[] {"", ""}) = -1 > + * distance.indexOfDifference(new String[] {"", null}) = 0 > + * distance.indexOfDifference(new String[] {"abc", null, null}) = 0 > + * distance.indexOfDifference(new String[] {null, null, "abc"}) = 0 > + * distance.indexOfDifference(new String[] {"", "abc"}) = 0 > + * distance.indexOfDifference(new String[] {"abc", ""}) = 0 > + * distance.indexOfDifference(new String[] {"abc", "abc"}) = -1 > + * distance.indexOfDifference(new String[] {"abc", "a"}) = 1 > + * distance.indexOfDifference(new String[] {"ab", "abxyz"}) = 2 > + * distance.indexOfDifference(new String[] {"abcde", "abxyz"}) = 2 > + * distance.indexOfDifference(new String[] {"abcde", "xyz"}) = 0 > + * distance.indexOfDifference(new String[] {"xyz", "abcde"}) = 0 > + * distance.indexOfDifference(new String[] {"i am a machine", "i am a > robot"}) = 7 > * </pre> > * > * @param css array of CharSequences, entries may be null > * @return the index where the strings begin to differ; -1 if they > are all > * equal > - * @since 2.4 > - * @since 3.0 Changed signature from indexOfDifference(String...) to > - * indexOfDifference(CharSequence...) > */ > protected static int indexOfDifference(final CharSequence... css) { > if (css == null || css.length <= 1) { > > > http://git-wip-us.apache.org/repos/asf/commons-text/blob/7570eb01/src/main/java/org/apache/commons/text/similarity/LevenshteinDistance.java > ---------------------------------------------------------------------- > diff --git > a/src/main/java/org/apache/commons/text/similarity/LevenshteinDistance.java > b/src/main/java/org/apache/commons/text/similarity/LevenshteinDistance.java > index 1793f1e..cca3dc1 100644 > --- > a/src/main/java/org/apache/commons/text/similarity/LevenshteinDistance.java > +++ > b/src/main/java/org/apache/commons/text/similarity/LevenshteinDistance.java > @@ -59,17 +59,17 @@ public class LevenshteinDistance implements > StringMetric<Integer> { > * </p> > * > * <pre> > - * StringUtils.getLevenshteinDistance(null, *) = > IllegalArgumentException > - * StringUtils.getLevenshteinDistance(*, null) = > IllegalArgumentException > - * StringUtils.getLevenshteinDistance("","") = 0 > - * StringUtils.getLevenshteinDistance("","a") = 1 > - * StringUtils.getLevenshteinDistance("aaapppp", "") = 7 > - * StringUtils.getLevenshteinDistance("frog", "fog") = 1 > - * StringUtils.getLevenshteinDistance("fly", "ant") = 3 > - * StringUtils.getLevenshteinDistance("elephant", "hippo") = 7 > - * StringUtils.getLevenshteinDistance("hippo", "elephant") = 7 > - * StringUtils.getLevenshteinDistance("hippo", "zzzzzzzz") = 8 > - * StringUtils.getLevenshteinDistance("hello", "hallo") = 1 > + * distance.getLevenshteinDistance(null, *) = > IllegalArgumentException > + * distance.getLevenshteinDistance(*, null) = > IllegalArgumentException > + * distance.getLevenshteinDistance("","") = 0 > + * distance.getLevenshteinDistance("","a") = 1 > + * distance.getLevenshteinDistance("aaapppp", "") = 7 > + * distance.getLevenshteinDistance("frog", "fog") = 1 > + * distance.getLevenshteinDistance("fly", "ant") = 3 > + * distance.getLevenshteinDistance("elephant", "hippo") = 7 > + * distance.getLevenshteinDistance("hippo", "elephant") = 7 > + * distance.getLevenshteinDistance("hippo", "zzzzzzzz") = 8 > + * distance.getLevenshteinDistance("hello", "hallo") = 1 > * </pre> > * > * @param left the first string, must not be null > @@ -103,17 +103,17 @@ public class LevenshteinDistance implements > StringMetric<Integer> { > * </p> > * > * <pre> > - * StringUtils.getLevenshteinDistance(null, *, *) = > IllegalArgumentException > - * StringUtils.getLevenshteinDistance(*, null, *) = > IllegalArgumentException > - * StringUtils.getLevenshteinDistance(*, *, -1) = > IllegalArgumentException > - * StringUtils.getLevenshteinDistance("","", 0) = 0 > - * StringUtils.getLevenshteinDistance("aaapppp", "", 8) = 7 > - * StringUtils.getLevenshteinDistance("aaapppp", "", 7) = 7 > - * StringUtils.getLevenshteinDistance("aaapppp", "", 6)) = -1 > - * StringUtils.getLevenshteinDistance("elephant", "hippo", 7) = 7 > - * StringUtils.getLevenshteinDistance("elephant", "hippo", 6) = -1 > - * StringUtils.getLevenshteinDistance("hippo", "elephant", 7) = 7 > - * StringUtils.getLevenshteinDistance("hippo", "elephant", 6) = -1 > + * distance.getLevenshteinDistance(null, *, *) = > IllegalArgumentException > + * distance.getLevenshteinDistance(*, null, *) = > IllegalArgumentException > + * distance.getLevenshteinDistance(*, *, -1) = > IllegalArgumentException > + * distance.getLevenshteinDistance("","", 0) = 0 > + * distance.getLevenshteinDistance("aaapppp", "", 8) = 7 > + * distance.getLevenshteinDistance("aaapppp", "", 7) = 7 > + * distance.getLevenshteinDistance("aaapppp", "", 6)) = -1 > + * distance.getLevenshteinDistance("elephant", "hippo", 7) = 7 > + * distance.getLevenshteinDistance("elephant", "hippo", 6) = -1 > + * distance.getLevenshteinDistance("hippo", "elephant", 7) = 7 > + * distance.getLevenshteinDistance("hippo", "elephant", 6) = -1 > * </pre> > * > * @param left the first string, must not be null > > -- http://people.apache.org/~britter/ http://www.systemoutprintln.de/ http://twitter.com/BenediktRitter http://github.com/britter