Once [compress] has its next release, what about copying XXHash32 to [codec]? That seems to me like the proper home for such things.
Gary ---------- Forwarded message ---------- From: <bode...@apache.org> Date: Tue, Jan 24, 2017 at 11:53 AM Subject: commons-compress git commit: COMPRESS-271 xxhash32 checksum To: comm...@commons.apache.org Repository: commons-compress Updated Branches: refs/heads/master c0932797e -> b5d6f1f62 COMPRESS-271 xxhash32 checksum Project: http://git-wip-us.apache.org/repos/asf/commons-compress/repo Commit: http://git-wip-us.apache.org/repos/asf/commons-compress/ commit/b5d6f1f6 Tree: http://git-wip-us.apache.org/repos/asf/commons-compress/tree/b5d6f1f6 Diff: http://git-wip-us.apache.org/repos/asf/commons-compress/diff/b5d6f1f6 Branch: refs/heads/master Commit: b5d6f1f622c7770d0473f380aa3156ea79dae77a Parents: c093279 Author: Stefan Bodewig <bode...@apache.org> Authored: Tue Jan 24 20:52:48 2017 +0100 Committer: Stefan Bodewig <bode...@apache.org> Committed: Tue Jan 24 20:52:48 2017 +0100 ---------------------------------------------------------------------- .../compress/compressors/lz4/XXHash32.java | 180 +++++++++++++++++++ .../compress/compressors/lz4/XXHash32Test.java | 66 +++++++ 2 files changed, 246 insertions(+) ---------------------------------------------------------------------- http://git-wip-us.apache.org/repos/asf/commons-compress/ blob/b5d6f1f6/src/main/java/org/apache/commons/compress/ compressors/lz4/XXHash32.java ---------------------------------------------------------------------- diff --git a/src/main/java/org/apache/commons/compress/compressors/lz4/XXHash32.java b/src/main/java/org/apache/commons/compress/compressors/lz4/XXHash32.java new file mode 100644 index 0000000..8c17dc7 --- /dev/null +++ b/src/main/java/org/apache/commons/compress/compressors/ lz4/XXHash32.java @@ -0,0 +1,180 @@ +/* + * Licensed to the Apache Software Foundation (ASF) under one + * or more contributor license agreements. See the NOTICE file + * distributed with this work for additional information + * regarding copyright ownership. The ASF licenses this file + * to you under the Apache License, Version 2.0 (the + * "License"); you may not use this file except in compliance + * with the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, + * software distributed under the License is distributed on an + * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY + * KIND, either express or implied. See the License for the + * specific language governing permissions and limitations + * under the License. + */ +package org.apache.commons.compress.compressors.lz4; + +import static java.lang.Integer.rotateLeft; + +import java.nio.ByteBuffer; +import java.nio.ByteOrder; +import java.util.zip.Checksum; + +/** + * Implementation of the xxhash32 hash algorithm. + * + * @see <a href="http://cyan4973.github.io/xxHash/">xxHash</a> + * @NotThreadSafe + * @since 1.14 + */ +public class XXHash32 implements Checksum { + + private static final int BUF_SIZE = 16; + private static final int ROTATE_BITS = 13; + + private static final int PRIME1 = (int) 2654435761l; + private static final int PRIME2 = (int) 2246822519l; + private static final int PRIME3 = (int) 3266489917l; + private static final int PRIME4 = 668265263; + private static final int PRIME5 = 374761393; + + private final byte[] oneByte = new byte[1]; + private final int[] state = new int[4]; + private final ByteBuffer buffer = ByteBuffer.allocate(BUF_SIZE). order(ByteOrder.LITTLE_ENDIAN); + private final int seed; + + private int totalLen; + private int pos; + + /** + * Creates an XXHash32 instance with a seed of 0. + */ + public XXHash32() { + this(0); + } + + /** + * Creates an XXHash32 instance. + */ + public XXHash32(int seed) { + this.seed = seed; + initializeState(); + } + + @Override + public void reset() { + initializeState(); + buffer.clear(); + totalLen = 0; + pos = 0; + } + + @Override + public void update(int b) { + oneByte[0] = (byte) (b & 0xff); + update(oneByte, 0, 1); + } + + @Override + public void update(byte[] b, int off, final int len) { + if (len <= 0) { + return; + } + totalLen += len; + + final int end = off + len; + + if (pos + len < BUF_SIZE) { + buffer.put(b, off, len); + pos += len; + return; + } + + if (pos > 0) { + final int size = BUF_SIZE - pos; + buffer.put(b, off, size); + process(); + off += size; + } + + final int limit = end - BUF_SIZE; + while (off <= limit) { + buffer.put(b, off, BUF_SIZE); + process(); + off += BUF_SIZE; + } + + if (off < end) { + pos = end - off; + buffer.put(b, off, pos); + } + } + + @Override + public long getValue() { + int hash; + if (totalLen > BUF_SIZE) { + hash = + rotateLeft(state[0], 1) + + rotateLeft(state[1], 7) + + rotateLeft(state[2], 12) + + rotateLeft(state[3], 18); + } else { + hash = state[2] + PRIME5; + } + hash += totalLen; + + buffer.flip(); + + int idx = 0; + final int limit = pos - 4; + for (; idx <= limit; idx += 4) { + hash = rotateLeft(hash + buffer.getInt() * PRIME3, 17) * PRIME4; + } + while (idx < pos) { + hash = rotateLeft(hash + (buffer.get() & 0xff) * PRIME5, 11) * PRIME1; + idx++; + } + + hash ^= hash >>> 15; + hash *= PRIME2; + hash ^= hash >>> 13; + hash *= PRIME3; + hash ^= hash >>> 16; + return hash & 0xffffffffl; + } + + private void initializeState() { + state[0] = seed + PRIME1 + PRIME2; + state[1] = seed + PRIME2; + state[2] = seed; + state[3] = seed - PRIME1; + } + + private void process() { + buffer.flip(); + + // local shadows for performance + int s0 = state[0]; + int s1 = state[1]; + int s2 = state[2]; + int s3 = state[3]; + + s0 = rotateLeft(s0 + buffer.getInt() * PRIME2, ROTATE_BITS) * PRIME1; + s1 = rotateLeft(s1 + buffer.getInt() * PRIME2, ROTATE_BITS) * PRIME1; + s2 = rotateLeft(s2 + buffer.getInt() * PRIME2, ROTATE_BITS) * PRIME1; + s3 = rotateLeft(s3 + buffer.getInt() * PRIME2, ROTATE_BITS) * PRIME1; + + state[0] = s0; + state[1] = s1; + state[2] = s2; + state[3] = s3; + + buffer.clear(); + pos = 0; + } +} http://git-wip-us.apache.org/repos/asf/commons-compress/ blob/b5d6f1f6/src/test/java/org/apache/commons/compress/ compressors/lz4/XXHash32Test.java ---------------------------------------------------------------------- diff --git a/src/test/java/org/apache/commons/compress/compressors/lz4/XXHash32Test.java b/src/test/java/org/apache/commons/compress/compressors/ lz4/XXHash32Test.java new file mode 100644 index 0000000..0c7e462 --- /dev/null +++ b/src/test/java/org/apache/commons/compress/compressors/ lz4/XXHash32Test.java @@ -0,0 +1,66 @@ +/* + * Licensed to the Apache Software Foundation (ASF) under one + * or more contributor license agreements. See the NOTICE file + * distributed with this work for additional information + * regarding copyright ownership. The ASF licenses this file + * to you under the Apache License, Version 2.0 (the + * "License"); you may not use this file except in compliance + * with the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, + * software distributed under the License is distributed on an + * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY + * KIND, either express or implied. See the License for the + * specific language governing permissions and limitations + * under the License. + */ +package org.apache.commons.compress.compressors.lz4; + +import java.io.File; +import java.io.FileInputStream; +import java.io.IOException; +import java.util.Arrays; +import java.util.Collection; + +import org.apache.commons.compress.AbstractTestCase; +import org.apache.commons.compress.utils.IOUtils; + +import org.junit.Assert; +import org.junit.Test; +import org.junit.runners.Parameterized; +import org.junit.runners.Parameterized.Parameters; +import org.junit.runner.RunWith; + +@RunWith(Parameterized.class) +public class XXHash32Test { + + private final File file; + private final String expectedChecksum; + + public XXHash32Test(String fileName, String c) throws IOException { + file = AbstractTestCase.getFile(fileName); + expectedChecksum = c; + } + + @Parameters + public static Collection<Object[]> factory() { + return Arrays.asList(new Object[][] { + // reference checksums created with xxh32sum + { "bla.tar", "fbb5c8d1" }, + { "bla.tar.xz", "4106a208" }, + { "8.posix.tar.gz", "9fce116a" }, + }); + } + + @Test + public void verifyChecksum() throws IOException { + XXHash32 h = new XXHash32(); + try (FileInputStream s = new FileInputStream(file)) { + byte[] b = IOUtils.toByteArray(s); + h.update(b, 0, b.length); + } + Assert.assertEquals("checksum for " + file.getName(), expectedChecksum, Long.toHexString(h.getValue())); + } +} -- E-Mail: garydgreg...@gmail.com | ggreg...@apache.org Java Persistence with Hibernate, Second Edition <https://www.amazon.com/gp/product/1617290459/ref=as_li_tl?ie=UTF8&camp=1789&creative=9325&creativeASIN=1617290459&linkCode=as2&tag=garygregory-20&linkId=cadb800f39946ec62ea2b1af9fe6a2b8> <http:////ir-na.amazon-adsystem.com/e/ir?t=garygregory-20&l=am2&o=1&a=1617290459> JUnit in Action, Second Edition <https://www.amazon.com/gp/product/1935182021/ref=as_li_tl?ie=UTF8&camp=1789&creative=9325&creativeASIN=1935182021&linkCode=as2&tag=garygregory-20&linkId=31ecd1f6b6d1eaf8886ac902a24de418%22> <http:////ir-na.amazon-adsystem.com/e/ir?t=garygregory-20&l=am2&o=1&a=1935182021> Spring Batch in Action <https://www.amazon.com/gp/product/1935182951/ref=as_li_tl?ie=UTF8&camp=1789&creative=9325&creativeASIN=1935182951&linkCode=%7B%7BlinkCode%7D%7D&tag=garygregory-20&linkId=%7B%7Blink_id%7D%7D%22%3ESpring+Batch+in+Action> <http:////ir-na.amazon-adsystem.com/e/ir?t=garygregory-20&l=am2&o=1&a=1935182951> Blog: http://garygregory.wordpress.com Home: http://garygregory.com/ Tweet! http://twitter.com/GaryGregory