Any thoughts of the effect of this on the Commons Collections Bloom filter proposal?
Gary On Fri, Jan 17, 2020 at 9:00 PM <aherb...@apache.org> wrote: > This is an automated email from the ASF dual-hosted git repository. > > aherbert pushed a commit to branch master > in repository https://gitbox.apache.org/repos/asf/commons-codec.git > > > The following commit(s) were added to refs/heads/master by this push: > new f5a61f0 [CODEC-264]: Ensure hash128 maintains the sign extension > bug. > f5a61f0 is described below > > commit f5a61f0cd029f18666163f414f848ba0e1b39976 > Author: Alex Herbert <aherb...@apache.org> > AuthorDate: Sat Jan 18 01:59:55 2020 +0000 > > [CODEC-264]: Ensure hash128 maintains the sign extension bug. > > The hash128(...) method was calling the public hash128x86(...) method > with an int seed when it should have called the private hash128x86(...) > with a long seed. Thus it did not have the sign extension error. The > internal method has been renamed to avoid a name clash with the public > API. The old hash128() behaviour is now restored as the seed is passed > to the hash method with implicit long conversion. > --- > src/changes/changes.xml | 4 +++ > .../apache/commons/codec/digest/MurmurHash3.java | 11 +++--- > .../commons/codec/digest/MurmurHash3Test.java | 39 > +++++++++++++++++++++- > 3 files changed, 49 insertions(+), 5 deletions(-) > > diff --git a/src/changes/changes.xml b/src/changes/changes.xml > index 7c95625..c907953 100644 > --- a/src/changes/changes.xml > +++ b/src/changes/changes.xml > @@ -42,6 +42,10 @@ The <action> type attribute can be > add,update,fix,remove. > </properties> > <body> > > + <release version="1.15" date="YYYY-MM-DD" description="Feature and > fix release."> > + <action issue="CODEC-264" dev="aherbert" due-to="Andy Seaborne" > type="fix">MurmurHash3: Ensure hash128 maintains the sign extension > bug.</action> > + </release> > + > <release version="1.14" date="2019-12-30" description="Feature and > fix release."> > <action issue="CODEC-261" dev="aherbert" type="fix">Hex: Allow > encoding read-only ByteBuffer.</action> > <action issue="CODEC-259" dev="aherbert" type="fix">Hex: Only use > an available ByteBuffer backing array if the length equals the remaining > byte count.</action> > diff --git > a/src/main/java/org/apache/commons/codec/digest/MurmurHash3.java > b/src/main/java/org/apache/commons/codec/digest/MurmurHash3.java > index d6f45c1..ba73ef6 100644 > --- a/src/main/java/org/apache/commons/codec/digest/MurmurHash3.java > +++ b/src/main/java/org/apache/commons/codec/digest/MurmurHash3.java > @@ -800,9 +800,12 @@ public final class MurmurHash3 { > @Deprecated > public static long[] hash128(final byte[] data, final int offset, > final int length, final int seed) { > // ************ > - // Note: This fails to apply masking using 0xffffffffL to the > seed. > + // Note: This deliberately fails to apply masking using > 0xffffffffL to the seed > + // to maintain behavioural compatibility with the original > version. > + // The implicit conversion to a long will extend a negative sign > + // bit through the upper 32-bits of the long seed. These should > be zero. > // ************ > - return hash128x64(data, offset, length, seed); > + return hash128x64Internal(data, offset, length, seed); > } > > /** > @@ -820,7 +823,7 @@ public final class MurmurHash3 { > */ > public static long[] hash128x64(final byte[] data, final int offset, > final int length, final int seed) { > // Use an unsigned 32-bit integer as the seed > - return hash128x64(data, offset, length, seed & 0xffffffffL); > + return hash128x64Internal(data, offset, length, seed & > 0xffffffffL); > } > > /** > @@ -835,7 +838,7 @@ public final class MurmurHash3 { > * @param seed The initial seed value > * @return The 128-bit hash (2 longs) > */ > - private static long[] hash128x64(final byte[] data, final int offset, > final int length, final long seed) { > + private static long[] hash128x64Internal(final byte[] data, final int > offset, final int length, final long seed) { > long h1 = seed; > long h2 = seed; > final int nblocks = length >> 4; > diff --git > a/src/test/java/org/apache/commons/codec/digest/MurmurHash3Test.java > b/src/test/java/org/apache/commons/codec/digest/MurmurHash3Test.java > index 05f78f7..6f31cf7 100644 > --- a/src/test/java/org/apache/commons/codec/digest/MurmurHash3Test.java > +++ b/src/test/java/org/apache/commons/codec/digest/MurmurHash3Test.java > @@ -559,7 +559,7 @@ public class MurmurHash3Test { > {1237530251176898868L, 6144786892208594932L}, > {2347717913548230384L, -7461066668225718223L}, > {-7963311463560798404L, 8435801462986138227L}, > {-7493166089060196513L, 8163503673197886404L}, > {6807249306539951962L, -1438886581269648819L}, > {6752656991043418179L, 6334147827922066123L}, > - {-4534351735605790331L, -4530801663887858236L}, > {-7886946241830957955L, -6261339648449285315L}, }; > + {-4534351735605790331L, -4530801663887858236L}, > {-7886946241830957955L, -6261339648449285315L},}; > for (int i = 0; i < answers.length; i++) { > final byte[] bytes = Arrays.copyOf(RANDOM_BYTES, i); > Assert.assertArrayEquals(answers[i], > MurmurHash3.hash128(bytes)); > @@ -606,6 +606,43 @@ public class MurmurHash3Test { > } > > /** > + * Test the {@link MurmurHash3#hash128(byte[], int, int, int)} > algorithm. > + * > + * <p>Explicit test for a negative seed. The original implementation > has a sign extension error > + * for negative seeds. This test is here to maintain behavioural > compatibility of the > + * broken deprecated method. > + */ > + @Test > + public void testHash128WithOffsetLengthAndNegativeSeed() { > + // Seed can be negative > + final int seed = -42; > + final int offset = 13; > + > + // Test with all sizes up to 31 bytes. This ensures a full round > of 16-bytes plus up to > + // 15 bytes remaining. > + final long[][] answers = {{5954234972212089025L, > 3342108296337967352L}, > + {8501094764898402923L, 7873951092908129427L}, > {-3334322685492296196L, -2842715922956549478L}, > + {-2177918982459519644L, -1612349961980368636L}, > {4172870320608886992L, -4177375712254136503L}, > + {7546965006884307324L, -5222114032564054641L}, > {-2885083166621537267L, -2069868899915344482L}, > + {-2397098497873118851L, 4990578036999888806L}, > {-706479374719025018L, 7531201699171849870L}, > + {6487943141157228609L, 3576221902299447884L}, > {6671331646806999453L, -3428049860825046360L}, > + {-8700221138601237020L, -2748450904559980545L}, > {-9028762509863648063L, 6130259301750313402L}, > + {729958512305702590L, -36367317333638988L}, > {-3803232241584178983L, -4257744207892929651L}, > + {5734013720237474696L, -760784490666078990L}, > {-6097477411153026656L, 625288777006549065L}, > + {1320365359996757504L, -2251971390373072541L}, > {5551441703887653022L, -3516892619809375941L}, > + {698875391638415033L, 3456972931370496131L}, > {5874956830271303805L, -6074126509360777023L}, > + {-7030758398537734781L, -3174643657101295554L}, > {6835789852786226556L, 7245353136839389595L}, > + {-7755767580598793204L, -6680491060945077989L}, > {-3099789923710523185L, -2751080516924952518L}, > + {-7772046549951435453L, 5263206145535830491L}, > {7458715941971015543L, 5470582752171544854L}, > + {-7753394773760064468L, -2330157750295630617L}, > {-5899278942232791979L, 6235686401271389982L}, > + {4881732293467626532L, 2617335658565007304L}, > {-5722863941703478257L, -5424475653939430258L}, > + {-3703319768293496315L, -2124426428486426443L},}; > + for (int i = 0; i < answers.length; i++) { > + Assert.assertArrayEquals("Length: " + i, answers[i], > MurmurHash3.hash128(RANDOM_BYTES, offset, i, seed)); > + } > + } > + > + /** > * Test the {@link MurmurHash3#hash128(String)} algorithm. This only > tests it can return > * the same value as {@link MurmurHash3#hash128(byte[], int, int, > int)} if the string > * is converted to bytes using the method {@link String#getBytes()}. > >