This is an automated email from the ASF dual-hosted git repository. leerho pushed a commit to branch name_change_and_release_prep in repository https://gitbox.apache.org/repos/asf/datasketches-java.git
commit 0f592d69f841f60094833f036c1d5199b9061721 Author: Lee Rhodes <[email protected]> AuthorDate: Sun Nov 16 12:39:34 2025 -0800 Fixed Hll Union to HllUnion. Updated README, POM. --- README.md | 39 ++++++-- .../org/apache/datasketches/hll/BaseHllSketch.java | 24 ++--- .../apache/datasketches/hll/DirectHll4Array.java | 2 +- .../apache/datasketches/hll/DirectHll6Array.java | 2 +- .../apache/datasketches/hll/DirectHll8Array.java | 2 +- .../org/apache/datasketches/hll/Hll4Array.java | 2 +- .../org/apache/datasketches/hll/Hll6Array.java | 2 +- .../org/apache/datasketches/hll/Hll8Array.java | 2 +- .../org/apache/datasketches/hll/HllSketch.java | 14 +-- .../datasketches/hll/{Union.java => HllUnion.java} | 106 ++++++++++----------- .../org/apache/datasketches/hll/PreambleUtil.java | 2 +- .../org/apache/datasketches/hll/package-info.java | 22 ++--- .../apache/datasketches/hll/BaseHllSketchTest.java | 4 +- .../apache/datasketches/hll/DirectUnionTest.java | 42 ++++---- .../org/apache/datasketches/hll/HllArrayTest.java | 4 +- .../datasketches/hll/HllSketchMergeOrderTest.java | 2 +- .../org/apache/datasketches/hll/HllSketchTest.java | 8 +- .../apache/datasketches/hll/IsomorphicTest.java | 43 ++++----- .../org/apache/datasketches/hll/UnionCaseTest.java | 44 ++++----- .../org/apache/datasketches/hll/UnionTest.java | 56 +++++------ 20 files changed, 212 insertions(+), 210 deletions(-) diff --git a/README.md b/README.md index 0358c6865..3a7cec913 100644 --- a/README.md +++ b/README.md @@ -27,34 +27,53 @@ This is the core Java component of the DataSketches library. It contains all of This component is also a dependency of other components of the library that create adaptors for target systems, such as the [Apache Pig adaptor](https://github.com/apache/datasketches-pig), the [Apache Hive adaptor](https://github.com/apache/datasketches-hive), and others. -Note that we have a parallel core component for C++, Python and GO implementations of many of the same sketch algorithms, -[datasketches-cpp](https://github.com/apache/datasketches-cpp), [datasketches-python](https://github.com/apache/datasketches-python), and -[datasketches-go](https://github.com/apache/datasketches-go). +Note that we have parallel core components for C++, Python and GO implementations of many of the same sketch algorithms: + +- [datasketches-cpp](https://github.com/apache/datasketches-cpp), +- [datasketches-python](https://github.com/apache/datasketches-python), +- [datasketches-go](https://github.com/apache/datasketches-go). Please visit the main [DataSketches website](https://datasketches.apache.org) for more information. If you are interested in making contributions to this site please see our [Community](https://datasketches.apache.org/docs/Community/) page for how to contact us. --- +## Major Changes with this Release +This release is a major release where we took the opportunity to do some significant refactoring that will constitute incompatible changes from previous releases. Any incompatibility with prior releases is always an inconvenience to users who wish to just upgrade to the latest release and run. However, some of the code in this library was written in 2013 and meanwhile the Java language has evolved enormously since then. We chose to use this major release as the opportunity to moderniz [...] + +### Eliminate the dependency on the DataSketches-Memory component. +The DataSketches-Memory component was originally developed in 2014 to address the need for fast access to off-heap memory data structures and used Unsafe and other JVM internals as there were no satisfactory Java language features to do this at the time. + +The FFM capabilities introduced into the language in Java 22, are now part of the Java 25 LTS release, which we support. Since the capabilities of FFM are a superset of the original DataSketches-Memory component, it made sense to rewrite the code to eliminate the dependency on DataSketches-Memory and use FFM instead. This impacted code across the entire library. + +This provided several advantages to the code base. By removing this dependency on DataSketches-Memory, there are now no runtime dependencies! This should make integrating this library into other Java systems much simpler. Since FFM is tightly integrated into the Java language, it has improved performance, especially with bulk operations. + +- As an added note: There are numerous other improvements to the Java language that we could perhaps take advantage of in a rewrite, e.g., Records, text blocks, switch expressions, sealed, var, modules, patterns, etc. However, faced with the risk of accidentally creating bugs due to too many changes at one time, we focused on FFM, which actually improve performance as opposed to just syntactic sugar. + +### Align public sketch class names so that the sketch family name is part of the class name. +For example, the Theta sketch was the first sketch written for the library and its base class was called *Sketch*. Obviously, because it was the only sketch! The Tuple sketch evolved soon after and its base class was also called *Sketch*. Oops, bad idea. If a user wanted to use both the Theta and Tuple sketches in the same class one of them had to be fully qualified every time it was referenced. Ugh! + +Unfortunately, this habit propagated so some of the other early sketches where we ended up with two different sketches with a *ItemsSketch*, for example. For the more recent additions to the library we started including the sketch family name in all the relevant sketch-like public classes of a sketch family. + +In this release we have refactored these older sketches with new names that now include the sketch family name. Yes, this is an incompatible change for user code moving from earlier releases, but this can be usually fixed with search-and-replace tools. This release is not perfect, but hopefully more consistent across all the different sketch families. + ## Build & Runtime Dependencies ### Installation Directory Path **NOTE:** This component accesses resource files for testing. As a result, the directory elements of the full absolute path of the target installation directory must qualify as Java identifiers. In other words, the directory elements must not have any space characters (or non-Java identifier characters) in any of the path elements. This is required by the Oracle Java Specification in order to ensure location-independent access to resources: [See Oracle Location-Independent Access to Reso [...] -### OpenJDK Version 24 -An OpenJDK-compatible build of Java 24, provided by one of the Open-Source JVM providers, such as Azul Systems, Red Hat, SAP, Eclipse Temurin, etc, is required. -All of the testing of this release has been performed with an Eclipse Temurin build. - -This release uses the new Java Foreign Function & Memory (FFM) features that were made part of the Java Language in in Java 22. +### OpenJDK Version 25 +At minimum, an OpenJDK-compatible build of Java 25, provided by one of the Open-Source JVM providers, such as *Azul Systems*, *Red Hat*, *SAP*, *Eclipse Temurin*, etc, is required. +All of the testing of this release has been performed with the *Eclipse Temurin* build. ## Compilation and Test using Maven This DataSketches component is structured as a Maven project and Maven is the recommended tool for compile and test. #### A Toolchain is required -* You must have a JDK type toolchain defined in location *~/.m2/toolchains.xml* that specifies where to find a locally installed OpenJDK-compatible version 24. -* Your default \$JAVA\_HOME compiler must be OpenJDK compatible, specified in the toolchain, and may be a version greater than 24. Note that if your \$JAVA\_HOME is set to a Java version greater than 24, Maven will automatically use the Java 24 version specified in the toolchain instead. The included pom.xml specifies the necessary JVM flags, so no further action should be required. +* You must have a JDK type toolchain defined in location *~/.m2/toolchains.xml* that specifies where to find a locally installed OpenJDK-compatible version 25. +* Your default \$JAVA\_HOME compiler must be OpenJDK compatible, specified in the toolchain, and may be a version greater than 25. Note that if your \$JAVA\_HOME is set to a Java version greater than 25, Maven will automatically use the Java 25 version specified in the toolchain instead. The included pom.xml specifies the necessary JVM flags, if required, so no further action is needed. * Note that the paths specified in the toolchain must be fully qualified direct paths to the OpenJDK version locations. Using environment variables will not work. #### To run normal unit tests: diff --git a/src/main/java/org/apache/datasketches/hll/BaseHllSketch.java b/src/main/java/org/apache/datasketches/hll/BaseHllSketch.java index 082318ee2..99107220e 100644 --- a/src/main/java/org/apache/datasketches/hll/BaseHllSketch.java +++ b/src/main/java/org/apache/datasketches/hll/BaseHllSketch.java @@ -35,7 +35,7 @@ import org.apache.datasketches.common.Util; /** * Although this class is package-private, it provides a single place to define and document - * the common public API for both HllSketch and Union. + * the common public API for both HllSketch and HllUnion. * @author Lee Rhodes * @author Kevin Lang */ @@ -115,7 +115,7 @@ abstract class BaseHllSketch implements MemorySegmentStatus { * Gets the current (approximate) Relative Error (RE) asymptotic values given several * parameters. This is used primarily for testing. * @param upperBound return the RE for the Upper Bound, otherwise for the Lower Bound. - * @param oooFlag set true if the sketch is the result of a non qualifying union operation. + * @param oooFlag set true if the sketch is the result of a non qualifying HllUnion operation. * @param lgConfigK the configured value for the sketch. * @param numStdDev the given number of Standard Deviations. This must be an integer between * 1 and 3, inclusive. @@ -206,8 +206,8 @@ abstract class BaseHllSketch implements MemorySegmentStatus { * inquire of the sketch if it has, in fact, moved itself. * * @param seg the given MemorySegment - * @return true if the given MemorySegment refers to the same underlying resource as this sketch or - * union. + * @return true if the given MemorySegment refers to the same underlying resource as this HllSketch or + * HllUnion. */ @Override public abstract boolean isSameResource(MemorySegment seg); @@ -219,17 +219,17 @@ abstract class BaseHllSketch implements MemorySegmentStatus { /** * Serializes this sketch as a byte array in compact form. The compact form is smaller in size - * than the updatable form and read-only. It can be used in union operations as follows: + * than the updatable form and read-only. It can be used in HllUnion operations as follows: * <pre>{@code - * Union union; HllSketch sk, sk2; + * HllUnion union; HllSketch sk, sk2; * int lgK = 12; * sk = new HllSketch(lgK, TgtHllType.HLL_4); //can be 4, 6, or 8 * for (int i = 0; i < (2 << lgK); i++) { sk.update(i); } * byte[] arr = HllSketch.toCompactByteArray(); * //... - * union = Union.heapify(arr); //initializes the union using data from the array. + * union = HllUnion.heapify(arr); //initializes the HllUnion using data from the array. * //OR, if used in an off-heap environment: - * union = Union.heapify(MemorySegment.ofArray(arr)); //same as above, except from MemorySegment object. + * union = HllUnion.heapify(MemorySegment.ofArray(arr)); //same as above, except from MemorySegment object. * * //To recover an updatable heap sketch: * sk2 = HllSketch.heapify(arr); @@ -250,17 +250,17 @@ abstract class BaseHllSketch implements MemorySegmentStatus { /** * Serializes this sketch as a byte array in an updatable form. The updatable form is larger than * the compact form. The use of this form is primarily in environments that support updating - * sketches in off-heap MemorySegment. If the sketch is constructed using HLL_8, sketch updating and - * union updating operations can actually occur in MemorySegment, which can be off-heap: + * sketches in off-heap MemorySegment. If the sketch is constructed using HLL_8, HllSketch updating and + * HllUnion updating operations can actually occur in MemorySegment, which can be off-heap: * <pre>{@code - * Union union; HllSketch sk; + * HllUnion union; HllSketch sk; * int lgK = 12; * sk = new HllSketch(lgK, TgtHllType.HLL_8) //must be 8 * for (int i = 0; i < (2 << lgK); i++) { sk.update(i); } * byte[] arr = sk.toUpdatableByteArray(); * MemorySegment wseg = MemorySegment.wrap(arr); * //... - * union = Union.writableWrap(wseg); //no deserialization! + * union = HllUnion.writableWrap(wseg); //no deserialization! * }</pre> * @return this sketch as an updatable byte array. */ diff --git a/src/main/java/org/apache/datasketches/hll/DirectHll4Array.java b/src/main/java/org/apache/datasketches/hll/DirectHll4Array.java index 99e9450bb..03eefdc01 100644 --- a/src/main/java/org/apache/datasketches/hll/DirectHll4Array.java +++ b/src/main/java/org/apache/datasketches/hll/DirectHll4Array.java @@ -136,7 +136,7 @@ final class DirectHll4Array extends DirectHllArray { } @Override - //Would be used by Union, but not used because the gadget is always HLL8 type + //Would be used by HllUnion, but not used because the gadget is always HLL8 type void updateSlotNoKxQ(final int slotNo, final int newValue) { throw new SketchesStateException("Improper access."); } diff --git a/src/main/java/org/apache/datasketches/hll/DirectHll6Array.java b/src/main/java/org/apache/datasketches/hll/DirectHll6Array.java index c9a8eb7c7..4d35be674 100644 --- a/src/main/java/org/apache/datasketches/hll/DirectHll6Array.java +++ b/src/main/java/org/apache/datasketches/hll/DirectHll6Array.java @@ -83,7 +83,7 @@ final class DirectHll6Array extends DirectHllArray { } @Override - //Would be used by Union, but not used because the gadget is always HLL8 type + //Would be used by HllUnion, but not used because the gadget is always HLL8 type void updateSlotNoKxQ(final int slotNo, final int newValue) { throw new SketchesStateException("Improper access."); } diff --git a/src/main/java/org/apache/datasketches/hll/DirectHll8Array.java b/src/main/java/org/apache/datasketches/hll/DirectHll8Array.java index 7267d2f57..34714fb75 100644 --- a/src/main/java/org/apache/datasketches/hll/DirectHll8Array.java +++ b/src/main/java/org/apache/datasketches/hll/DirectHll8Array.java @@ -86,7 +86,7 @@ final class DirectHll8Array extends DirectHllArray { } @Override - //Used by Union when source is not HLL8 + //Used by HllUnion when source is not HLL8 void updateSlotNoKxQ(final int slotNo, final int newValue) { final int oldValue = getSlotValue(slotNo); if (newValue > oldValue) { diff --git a/src/main/java/org/apache/datasketches/hll/Hll4Array.java b/src/main/java/org/apache/datasketches/hll/Hll4Array.java index 759174bea..f6295fa78 100644 --- a/src/main/java/org/apache/datasketches/hll/Hll4Array.java +++ b/src/main/java/org/apache/datasketches/hll/Hll4Array.java @@ -136,7 +136,7 @@ final class Hll4Array extends HllArray { } @Override - //Would be used by Union, but not used because the gadget is always HLL8 type + //Would be used by HllUnion, but not used because the gadget is always HLL8 type void updateSlotNoKxQ(final int slotNo, final int newValue) { throw new SketchesStateException("Improper access."); } diff --git a/src/main/java/org/apache/datasketches/hll/Hll6Array.java b/src/main/java/org/apache/datasketches/hll/Hll6Array.java index 70a41090e..a0ddcbf59 100644 --- a/src/main/java/org/apache/datasketches/hll/Hll6Array.java +++ b/src/main/java/org/apache/datasketches/hll/Hll6Array.java @@ -93,7 +93,7 @@ final class Hll6Array extends HllArray { } @Override - //Would be used by Union, but not used because the gadget is always HLL8 type + //Would be used by HllUnion, but not used because the gadget is always HLL8 type void updateSlotNoKxQ(final int slotNo, final int newValue) { throw new SketchesStateException("Improper access."); } diff --git a/src/main/java/org/apache/datasketches/hll/Hll8Array.java b/src/main/java/org/apache/datasketches/hll/Hll8Array.java index 97ebac9dc..423cebfee 100644 --- a/src/main/java/org/apache/datasketches/hll/Hll8Array.java +++ b/src/main/java/org/apache/datasketches/hll/Hll8Array.java @@ -92,7 +92,7 @@ final class Hll8Array extends HllArray { } @Override - //Used by Union when source is not HLL8 + //Used by HllUnion when source is not HLL8 void updateSlotNoKxQ(final int slotNo, final int newValue) { final int oldValue = getSlotValue(slotNo); hllByteArr[slotNo] = (byte) Math.max(newValue, oldValue); diff --git a/src/main/java/org/apache/datasketches/hll/HllSketch.java b/src/main/java/org/apache/datasketches/hll/HllSketch.java index 35d782a27..0ff0c1e97 100644 --- a/src/main/java/org/apache/datasketches/hll/HllSketch.java +++ b/src/main/java/org/apache/datasketches/hll/HllSketch.java @@ -203,7 +203,7 @@ public class HllSketch extends BaseHllSketch { return heapify(srcSeg, true); } - //used by union and above + //used by HllUnion and above static final HllSketch heapify(final MemorySegment srcSeg, final boolean checkRebuild) { Objects.requireNonNull(srcSeg, "Source MemorySegment must not be null"); checkBounds(0, 8, srcSeg.byteSize()); //need min 8 bytes @@ -218,7 +218,7 @@ public class HllSketch extends BaseHllSketch { } else { //Hll_8 heapSketch = new HllSketch(Hll8Array.heapify(srcSeg)); if (checkRebuild) { - Union.checkRebuildCurMinNumKxQ(heapSketch); + HllUnion.checkRebuildCurMinNumKxQ(heapSketch); } } } else if (curMode == CurMode.LIST) { @@ -245,7 +245,7 @@ public class HllSketch extends BaseHllSketch { return writableWrap(srcWseg, true); } - //used by union and above + //used by HllUnion and above static final HllSketch writableWrap( final MemorySegment srcWseg, final boolean checkRebuild) { Objects.requireNonNull(srcWseg, "Source MemorySegment must not be null"); checkBounds(0, 8, srcWseg.byteSize()); //need min 8 bytes @@ -268,8 +268,8 @@ public class HllSketch extends BaseHllSketch { directSketch = new HllSketch(new DirectHll6Array(lgConfigK, srcWseg)); } else { //Hll_8 directSketch = new HllSketch(new DirectHll8Array(lgConfigK, srcWseg)); - if (checkRebuild) { //union only uses HLL_8, we allow non-finalized from a union call. - Union.checkRebuildCurMinNumKxQ(directSketch); + if (checkRebuild) { //HllUnion only uses HLL_8, we allow non-finalized from a HllUnion call. + HllUnion.checkRebuildCurMinNumKxQ(directSketch); } } } else if (curMode == CurMode.LIST) { @@ -305,8 +305,8 @@ public class HllSketch extends BaseHllSketch { directSketch = new HllSketch(new DirectHll6Array(lgConfigK, srcSeg, true)); } else { //Hll_8 directSketch = new HllSketch(new DirectHll8Array(lgConfigK, srcSeg, true)); - //rebuild if srcSeg came from a union and was not finalized, rather than throw exception. - Union.checkRebuildCurMinNumKxQ(directSketch); + //rebuild if srcSeg came from a HllUnion and was not finalized, rather than throw exception. + HllUnion.checkRebuildCurMinNumKxQ(directSketch); } } else if (curMode == CurMode.LIST) { directSketch = diff --git a/src/main/java/org/apache/datasketches/hll/Union.java b/src/main/java/org/apache/datasketches/hll/HllUnion.java similarity index 88% rename from src/main/java/org/apache/datasketches/hll/Union.java rename to src/main/java/org/apache/datasketches/hll/HllUnion.java index ead23b9ff..dca2cd7da 100644 --- a/src/main/java/org/apache/datasketches/hll/Union.java +++ b/src/main/java/org/apache/datasketches/hll/HllUnion.java @@ -34,64 +34,64 @@ import java.lang.foreign.MemorySegment; import org.apache.datasketches.common.SketchesArgumentException; /** - * This performs union operations for all HllSketches. This union operator can be configured to be - * on or off heap. The source sketch given to this union using the {@link #update(HllSketch)} can + * This performs union operations for all HllSketches. This HllUnion operator can be configured to be + * on or off heap. The source sketch given to this HllUnion using the {@link #update(HllSketch)} can * be configured with any precision value <i>lgConfigK</i> (from 4 to 21), any <i>TgtHllType</i> * (HLL_4, HLL_6, HLL_8), and either on or off-heap; and it can be in either of the sparse modes * (<i>LIST</i> or <i>SET</i>), or the dense mode (<i>HLL</i>). * - * <p>Although the API for this union operator parallels many of the methods of the - * <i>HllSketch</i>, the behavior of the union operator has some fundamental differences.</p> + * <p>Although the API for this HllUnion operator parallels many of the methods of the + * <i>HllSketch</i>, the behavior of the HllUnion operator has some fundamental differences.</p> * - * <p>First, this union operator is configured with a <i>lgMaxK</i> instead of the normal - * <i>lgConfigK</i>. Generally, this union operator will inherit the lowest <i>lgConfigK</i> + * <p>First, this HllUnion operator is configured with a <i>lgMaxK</i> instead of the normal + * <i>lgConfigK</i>. Generally, this HllUnion operator will inherit the lowest <i>lgConfigK</i> * less than <i>lgMaxK</i> that it has seen. However, the <i>lgConfigK</i> of incoming sketches that * are still in sparse are ignored. The <i>lgMaxK</i> provides the user the ability to specify the - * largest maximum size for the union operation. + * largest maximum size for the HllUnion operation. * - * <p>Second, the user cannot specify the {@link TgtHllType} as an input parameter to the union. + * <p>Second, the user cannot specify the {@link TgtHllType} as an input parameter to the HllUnion. * Instead, it is specified for the sketch returned with {@link #getResult(TgtHllType)}. * * <p>The following graph illustrates the HLL Merge speed.</p> * - * <p><img src="doc-files/HLL_UnionTime4_6_8_Java_CPP.png" width="500" alt="HLL LgK12 Union Speed"></p> + * <p><img src="doc-files/HLL_UnionTime4_6_8_Java_CPP.png" width="500" alt="HLL_UnionTime4_6_8_Java_CPP.png"></p> * This graph illustrates the relative merging speed of the HLL 4,6,8 Java HLL sketches compared to * the DataSketches C++ implementations of the same sketches. With this particular test (merging 32 relative large * sketches together), the Java HLL 8 is the fastest and the Java HLL 4 the slowest, with a mixed cluster in the middle. - * Union / Merging speed is somewhat difficult to measure as the performance is very dependent on the mix of sketch + * HllUnion / Merging speed is somewhat difficult to measure as the performance is very dependent on the mix of sketch * sizes (and types) you are merging. So your mileage will vary! * - * <p>For a complete example of using the Union operator - * see <a href="https://datasketches.apache.org/docs/HLL/HllJavaExample.html">Union Example</a>.</p> + * <p>For a complete example of using the HllUnion operator + * see <a href="https://datasketches.apache.org/docs/HLL/HllJavaExample.html">HllUnion Example</a>.</p> * * @author Lee Rhodes * @author Kevin Lang */ -public class Union extends BaseHllSketch { +public class HllUnion extends BaseHllSketch { final int lgMaxK; private final HllSketch gadget; /** - * Construct this Union operator with the default maximum log-base-2 of <i>K</i>. + * Construct this HllUnion operator with the default maximum log-base-2 of <i>K</i>. */ - public Union() { + public HllUnion() { lgMaxK = HllSketch.DEFAULT_LG_K; gadget = new HllSketch(lgMaxK, HLL_8); } /** - * Construct this Union operator with a given maximum log-base-2 of <i>K</i>. + * Construct this HllUnion operator with a given maximum log-base-2 of <i>K</i>. * @param lgMaxK the desired maximum log-base-2 of <i>K</i>. This value must be * between 4 and 21 inclusively. */ - public Union(final int lgMaxK) { + public HllUnion(final int lgMaxK) { this.lgMaxK = HllUtil.checkLgK(lgMaxK); gadget = new HllSketch(lgMaxK, HLL_8); } /** - * Construct this Union operator with a given maximum log-base-2 of <i>K</i> and the given - * MemorySegment as the destination for this Union. This MemorySegment is usually configured + * Construct this HllUnion operator with a given maximum log-base-2 of <i>K</i> and the given + * MemorySegment as the destination for this HllUnion. This MemorySegment is usually configured * for off-heap MemorySegment. What remains on the java heap is a thin wrapper object that reads and * writes to the given MemorySegment. * @@ -101,35 +101,35 @@ public class Union extends BaseHllSketch { * between 4 and 21 inclusively. * @param dstWseg the destination writable MemorySegment for the sketch. */ - public Union(final int lgMaxK, final MemorySegment dstWseg) { + public HllUnion(final int lgMaxK, final MemorySegment dstWseg) { this.lgMaxK = HllUtil.checkLgK(lgMaxK); gadget = new HllSketch(lgMaxK, HLL_8, dstWseg); } //used only by writableWrap - private Union(final HllSketch sketch) { + private HllUnion(final HllSketch sketch) { lgMaxK = sketch.getLgConfigK(); gadget = sketch; } /** - * Construct a union operator populated with the given byte array image of an HllSketch. + * Construct a HllUnion operator populated with the given byte array image of an HllSketch. * @param byteArray the given byte array - * @return a union operator populated with the given byte array image of an HllSketch. + * @return a HllUnion operator populated with the given byte array image of an HllSketch. */ - public static final Union heapify(final byte[] byteArray) { + public static final HllUnion heapify(final byte[] byteArray) { return heapify(MemorySegment.ofArray(byteArray)); } /** - * Construct a union operator populated with the given MemorySegment image of an HllSketch. + * Construct a HllUnion operator populated with the given MemorySegment image of an HllSketch. * @param seg the given MemorySegment - * @return a union operator populated with the given MemorySegment image of an HllSketch. + * @return a HllUnion operator populated with the given MemorySegment image of an HllSketch. */ - public static final Union heapify(final MemorySegment seg) { + public static final HllUnion heapify(final MemorySegment seg) { final int lgK = HllUtil.checkLgK(seg.get(JAVA_BYTE, PreambleUtil.LG_K_BYTE)); final HllSketch sk = HllSketch.heapify(seg, false); //allows non-finalized image - final Union union = new Union(lgK); + final HllUnion union = new HllUnion(lgK); union.update(sk); return union; } @@ -143,16 +143,16 @@ public class Union extends BaseHllSketch { * <p>The given <i>dstSeg</i> is checked for the required capacity as determined by * {@link HllSketch#getMaxUpdatableSerializationBytes(int, TgtHllType)}, and for the correct type. * @param srcWseg an writable image of a valid sketch with data. - * @return a Union operator where the sketch data is in the given dstSeg. + * @return a HllUnion operator where the sketch data is in the given dstSeg. */ - public static final Union writableWrap(final MemorySegment srcWseg) { + public static final HllUnion writableWrap(final MemorySegment srcWseg) { final TgtHllType tgtHllType = extractTgtHllType(srcWseg); if (tgtHllType != TgtHllType.HLL_8) { throw new SketchesArgumentException( - "Union can only wrap writable HLL_8 sketches that were the Gadget of a Union."); + "HllUnion can only wrap writable HLL_8 sketches that were the Gadget of a HllUnion."); } //allows writableWrap of non-finalized image - return new Union(HllSketch.writableWrap(srcWseg, false)); + return new HllUnion(HllSketch.writableWrap(srcWseg, false)); } @Override @@ -178,7 +178,7 @@ public class Union extends BaseHllSketch { } /** - * Gets the effective <i>lgConfigK</i> for the union operator, which may be less than + * Gets the effective <i>lgConfigK</i> for the HllUnion operator, which may be less than * <i>lgMaxK</i>. * @return the <i>lgConfigK</i>. */ @@ -194,28 +194,28 @@ public class Union extends BaseHllSketch { } /** - * Returns the maximum size in bytes that this union operator can grow to given a lgK. + * Returns the maximum size in bytes that this HllUnion operator can grow to given a lgK. * - * @param lgK The maximum Log2 of K for this union operator. This value must be + * @param lgK The maximum Log2 of K for this HllUnion operator. This value must be * between 4 and 21 inclusively. - * @return the maximum size in bytes that this union operator can grow to. + * @return the maximum size in bytes that this HllUnion operator can grow to. */ public static int getMaxSerializationBytes(final int lgK) { return HllSketch.getMaxUpdatableSerializationBytes(lgK, TgtHllType.HLL_8); } /** - * Return the result of this union operator as an HLL_4 sketch. - * @return the result of this union operator as an HLL_4 sketch. + * Return the result of this HllUnion operator as an HLL_4 sketch. + * @return the result of this HllUnion operator as an HLL_4 sketch. */ public HllSketch getResult() { return getResult(HllSketch.DEFAULT_HLL_TYPE); } /** - * Return the result of this union operator with the specified {@link TgtHllType} + * Return the result of this HllUnion operator with the specified {@link TgtHllType} * @param tgtHllType the TgtHllType enum - * @return the result of this union operator with the specified TgtHllType + * @return the result of this HllUnion operator with the specified TgtHllType */ public HllSketch getResult(final TgtHllType tgtHllType) { checkRebuildCurMinNumKxQ(gadget); @@ -286,11 +286,11 @@ public class Union extends BaseHllSketch { } /** - * Gets the serialization of this union operator as a byte array in compact form, which is + * Gets the serialization of this HllUnion operator as a byte array in compact form, which is * designed to be heapified only. It is not directly updatable. - * For the Union operator, this is the serialization of the internal state of - * the union operator as a sketch. - * @return the serialization of this union operator as a byte array. + * For the HllUnion operator, this is the serialization of the internal state of + * the HllUnion operator as a sketch. + * @return the serialization of this HllUnion operator as a byte array. */ @Override public byte[] toCompactByteArray() { @@ -313,7 +313,7 @@ public class Union extends BaseHllSketch { } /** - * Update this union operator with the given sketch. + * Update this HllUnion operator with the given sketch. * @param sketch the given sketch. */ public void update(final HllSketch sketch) { @@ -326,28 +326,28 @@ public class Union extends BaseHllSketch { gadget.hllSketchImpl = gadget.hllSketchImpl.couponUpdate(coupon); } - // Union operator logic + // HllUnion operator logic /** * Union the given source and destination sketches. This static method examines the state of * the current internal gadget and the incoming sketch and determines the optimum way to * perform the union. This may involve swapping the merge order, downsampling, transforming, - * and / or copying one of the arguments and may completely replace the internals of the union. + * and / or copying one of the arguments and may completely replace the internals of the HllUnion. * - * <p>If the union gadget is empty, the source sketch is effectively copied to the union gadget + * <p>If the HllUnion gadget is empty, the source sketch is effectively copied to the HllUnion gadget * after any required transformations. * - * <p>The direction of the merge is reversed if the union gadget is in LIST or SET mode, and the + * <p>The direction of the merge is reversed if the HllUnion gadget is in LIST or SET mode, and the * source sketch is in HLL mode. This is done to maintain maximum accuracy of the union process. * * <p>The source sketch is downsampled if the source LgK is larger than maxLgK and in HLL mode. * - * <p>The union gadget is downsampled if both source and union gadget are in HLL mode - * and the source LgK <b>less than</b> the union gadget LgK. + * <p>The HllUnion gadget is downsampled if both source and HllUnion gadget are in HLL mode + * and the source LgK <b>less than</b> the HllUnion gadget LgK. * * @param source the given incoming sketch, which cannot be modified. * @param gadget the given gadget sketch, which has a target of HLL_8 and holds the result. - * @param lgMaxK the maximum value of log2 K for this union. + * @param lgMaxK the maximum value of log2 K for this union operation. * @return the union of the two sketches in the form of the internal HllSketchImpl, which is * always in HLL_8 form. */ @@ -765,7 +765,7 @@ public class Union extends BaseHllSketch { tgt.hllSketchImpl.putRebuildCurMinNumKxQFlag(true); } - //Used by union operator. Always copies or downsamples to Heap HLL_8. + //Used by HllUnion operator. Always copies or downsamples to Heap HLL_8. //Caller must ultimately manage oooFlag, as caller has more context. /** * Copies or downsamples the given candidate HLLmode sketch to tgtLgK, HLL_8, on the heap. diff --git a/src/main/java/org/apache/datasketches/hll/PreambleUtil.java b/src/main/java/org/apache/datasketches/hll/PreambleUtil.java index a43a6f121..b86b65fc6 100644 --- a/src/main/java/org/apache/datasketches/hll/PreambleUtil.java +++ b/src/main/java/org/apache/datasketches/hll/PreambleUtil.java @@ -137,7 +137,7 @@ final class PreambleUtil { static final int EMPTY_FLAG_MASK = 4; static final int COMPACT_FLAG_MASK = 8; static final int OUT_OF_ORDER_FLAG_MASK = 16; - static final int REBUILD_CURMIN_NUM_KXQ_MASK = 32; //used only by Union + static final int REBUILD_CURMIN_NUM_KXQ_MASK = 32; //used only by HllUnion //Mode byte masks static final int CUR_MODE_MASK = 3; diff --git a/src/main/java/org/apache/datasketches/hll/package-info.java b/src/main/java/org/apache/datasketches/hll/package-info.java index 114d4da96..ad2f22fa9 100644 --- a/src/main/java/org/apache/datasketches/hll/package-info.java +++ b/src/main/java/org/apache/datasketches/hll/package-info.java @@ -18,18 +18,18 @@ */ /** - * <h2>The DataSketches™ HLL sketch family package</h2> - * {@link org.apache.datasketches.hll.HllSketch HllSketch} and {@link org.apache.datasketches.hll.Union Union} + * <h2>The DataSketches™ HllSketch family package</h2> + * {@link org.apache.datasketches.hll.HllSketch HllSketch} and {@link org.apache.datasketches.hll.HllUnion HllUnion} * are the public facing classes of this high performance implementation of Phillipe Flajolet's * HyperLogLog algorithm[1] but with significantly improved error behavior and important features that can be * essential for large production systems that must handle massive data. * - * <h2>Key Features of the DataSketches™ HLL Sketch and its companion Union</h2> + * <h2>Key Features of the DataSketches™ HllSketch and its companion HllUnion</h2> * * <h3>Advanced Estimation Algorithms for Optimum Accuracy</h3> * * <h4>Zero error at low cardinalities</h4> - * The HLL sketch leverages highly compact arrays and hash tables to keep exact counts until the transition to + * The HllSketch leverages highly compact arrays and hash tables to keep exact counts until the transition to * dense mode is required for space reasons. The result is perfect accuracy for very low cardinalities. * * <p>Accuracy for very small streams can be important because Big Data is often fragmented into millions of smaller @@ -55,7 +55,7 @@ * sketch once the statistical randomness is removed through multiple trials. This can be observed in the * following graph.</p> * - * <p><img src="doc-files/HLL_HIP_K12T20U20.png" width="500" alt="HLL Accuracy">[6]</p> + * <p><img src="doc-files/HLL_HIP_K12T20U20.png" width="500" alt="HLL_HIP_K12T20U20.png">[6]</p> * * <p>The above graph has 7 curves. At y = 0, is the median line that hugs the x-axis so closely that it can't be seen. * The two curves, just above and just below the x-axis, correspond to +/- 1 standard deviation (SD) of error. @@ -68,8 +68,8 @@ * Below the cardinality of about 512 there is no error at all. This is the point where this particular * sketch transitions from sparse to dense (or estimation) mode.</p> * - * <h3>Three HLL Types</h3> - * This HLL implementation offers three different types of HLL sketch, each with different + * <h3>Three HllSketch Types</h3> + * This HLL implementation offers three different types of HllSketch, each with different * trade-offs with accuracy, space and performance. These types are selected with the * {@link org.apache.datasketches.hll.TgtHllType TgtHllType} parameter. * @@ -96,7 +96,7 @@ * terms of update time, but has the smallest storage footprint of about <i>K/2 * 1.03</i> bytes. * * <h3>Off-Heap Operation</h3> - * This HLL sketch also offers the capability of operating off-heap. Given a <i>MemorySegment[5]</i> object + * This HllSketch also offers the capability of operating off-heap. Given a <i>MemorySegment[5]</i> object * created by the user, the sketch will perform all of its updates and internal phase transitions * in that object, which can actually reside either on-heap or off-heap based on how it was * configured. In large systems that must update and union many millions of sketches, having the @@ -104,8 +104,8 @@ * off-heap and back, and reduces the need for garbage collection. * * <h3>Merging sketches with different configured <i>lgConfigK</i></h3> - * This enables a user to union a HLL sketch that was configured with, say, <i>lgConfigK = 12</i> - * with another loaded HLL sketch that was configured with, say, <i>lgConfigK = 14</i>. + * This enables a user to union an HllSketch that was configured with, say, <i>lgConfigK = 12</i> + * with another loaded HllSketch that was configured with, say, <i>lgConfigK = 14</i>. * * <p>Why is this important? Suppose you have been building a history of sketches of your customer's * data that go back a full year (or 5 or 10!) that were all configured with <i>lgConfigK = 12</i>. Because sketches @@ -125,7 +125,7 @@ * * <h3>Multi-language, multi-platform.</h3> * The binary structures for our sketch serializations are language and platform independent. - * This means it is possible to generate an HLL sketch on a C++ Windows platform and it can be used on a + * This means it is possible to generate an HllSketch on a C++ Windows platform and it can be used on a * Java or Python Unix platform. * * <p>[1] Philippe Flajolet, et al, <a href="https://algo.inria.fr/flajolet/Publications/FlFuGaMe07.pdf"> diff --git a/src/test/java/org/apache/datasketches/hll/BaseHllSketchTest.java b/src/test/java/org/apache/datasketches/hll/BaseHllSketchTest.java index 4afb282b6..0ba429b0e 100644 --- a/src/test/java/org/apache/datasketches/hll/BaseHllSketchTest.java +++ b/src/test/java/org/apache/datasketches/hll/BaseHllSketchTest.java @@ -26,7 +26,7 @@ import org.apache.datasketches.hll.BaseHllSketch; import org.apache.datasketches.hll.HllSketch; import org.apache.datasketches.hll.PreambleUtil; import org.apache.datasketches.hll.TgtHllType; -import org.apache.datasketches.hll.Union; +import org.apache.datasketches.hll.HllUnion; import org.testng.annotations.Test; import java.lang.foreign.MemorySegment; @@ -71,7 +71,7 @@ public class BaseHllSketchTest { sk.update(s); sk.update("1234"); - final Union u = new Union(10); + final HllUnion u = new HllUnion(10); final byte[] byteArr1 = null; u.update(byteArr1); u.update(new byte[] {}); diff --git a/src/test/java/org/apache/datasketches/hll/DirectUnionTest.java b/src/test/java/org/apache/datasketches/hll/DirectUnionTest.java index 9dca85d98..114530a86 100644 --- a/src/test/java/org/apache/datasketches/hll/DirectUnionTest.java +++ b/src/test/java/org/apache/datasketches/hll/DirectUnionTest.java @@ -36,7 +36,7 @@ import org.apache.datasketches.hll.HllSketch; import org.apache.datasketches.hll.HllUtil; import org.apache.datasketches.hll.RelativeErrorTables; import org.apache.datasketches.hll.TgtHllType; -import org.apache.datasketches.hll.Union; +import org.apache.datasketches.hll.HllUnion; /** * @author Lee Rhodes @@ -198,7 +198,7 @@ public class DirectUnionTest { final String h1SketchStr = ("H1 SKETCH: \n" + h1.toString()); final String h2SketchStr = ("H2 SKETCH: \n" + h2.toString()); - final Union union = newUnion(lgMaxK); + final HllUnion union = newUnion(lgMaxK); union.update(h1); final String uH1SketchStr = ("Union after H1: \n" + union.getResult(resultType).toString()); @@ -260,7 +260,7 @@ public class DirectUnionTest { } private static void toFrom1(final int lgK, final TgtHllType tgtHllType, final int n) { - final Union srcU = newUnion(lgK); + final HllUnion srcU = newUnion(lgK); final HllSketch srcSk = new HllSketch(lgK, tgtHllType); for (int i = 0; i < n; i++) { srcSk.update(i); @@ -271,7 +271,7 @@ public class DirectUnionTest { final byte[] byteArr = srcU.toCompactByteArray(); final MemorySegment seg = MemorySegment.ofArray(byteArr); - final Union dstU = Union.heapify(seg); + final HllUnion dstU = HllUnion.heapify(seg); assertEquals(dstU.getEstimate(), srcU.getEstimate(), 0.0); } @@ -290,7 +290,7 @@ public class DirectUnionTest { } private static void toFrom2(final int lgK, final TgtHllType tgtHllType, final int n) { - final Union srcU = newUnion(lgK); + final HllUnion srcU = newUnion(lgK); final HllSketch srcSk = new HllSketch(lgK, tgtHllType); for (int i = 0; i < n; i++) { srcSk.update(i); @@ -300,14 +300,14 @@ public class DirectUnionTest { srcU.update(srcSk); final byte[] byteArr = srcU.toCompactByteArray(); - final Union dstU = Union.heapify(byteArr); + final HllUnion dstU = HllUnion.heapify(byteArr); assertEquals(dstU.getEstimate(), srcU.getEstimate(), 0.0); } @Test public void checkCompositeEst() { - final Union u = newUnion(12); + final HllUnion u = newUnion(12); assertEquals(u.getCompositeEstimate(), 0, .03); for (int i = 1; i <= 15; i++) { u.update(i); } assertEquals(u.getCompositeEstimate(), 15, 15 *.03); @@ -319,31 +319,31 @@ public class DirectUnionTest { @Test public void checkMisc() { try { - final Union u = newUnion(HllUtil.MIN_LOG_K - 1); + final HllUnion u = newUnion(HllUtil.MIN_LOG_K - 1); fail(); } catch (final SketchesArgumentException e) { //expected } try { - final Union u = newUnion(HllUtil.MAX_LOG_K + 1); + final HllUnion u = newUnion(HllUtil.MAX_LOG_K + 1); fail(); } catch (final SketchesArgumentException e) { //expected } - final Union u = newUnion(7); + final HllUnion u = newUnion(7); final HllSketch sk = u.getResult(); assertTrue(sk.isEmpty()); } @Test public void checkHeapify() { - final Union u = newUnion(16); + final HllUnion u = newUnion(16); for (int i = 0; i < (1 << 20); i++) { u.update(i); } final double est1 = u.getEstimate(); final byte[] byteArray = u.toUpdatableByteArray(); - final Union u2 = Union.heapify(byteArray); + final HllUnion u2 = HllUnion.heapify(byteArray); assertEquals(u2.getEstimate(), est1, 0.0); } @@ -363,7 +363,7 @@ public class DirectUnionTest { @Test public void checkEmptyCouponMisc() { final int lgK = 8; - final Union union = newUnion(lgK); + final HllUnion union = newUnion(lgK); for (int i = 0; i < 20; i++) { union.update(i); } //SET mode union.couponUpdate(0); assertEquals(union.getEstimate(), 20.0, 0.001); @@ -371,7 +371,7 @@ public class DirectUnionTest { assertTrue(union.hasMemorySegment()); assertFalse(union.isOffHeap()); final int bytes = union.getUpdatableSerializationBytes(); - assertTrue(bytes <= Union.getMaxSerializationBytes(lgK)); + assertTrue(bytes <= HllUnion.getMaxSerializationBytes(lgK)); assertFalse(union.isCompact()); } @@ -388,7 +388,7 @@ public class DirectUnionTest { final HllSketch sk2 = HllSketch.wrap(MemorySegment.ofArray(skByteArr)); assertEquals(sk2.getEstimate(), est, 0.0); - final Union union = newUnion(lgConfigK); + final HllUnion union = newUnion(lgConfigK); union.update(HllSketch.wrap(MemorySegment.ofArray(skByteArr))); assertEquals(union.getEstimate(), est, 0.0); } @@ -402,7 +402,7 @@ public class DirectUnionTest { final double est1 = sk1.getEstimate(); final byte[] byteArr1 = sk1.toCompactByteArray(); - final Union union = newUnion(lgConfigK); + final HllUnion union = newUnion(lgConfigK); union.update(HllSketch.wrap(MemorySegment.ofArray(byteArr1))); final double est2 = union.getEstimate(); assertEquals(est2, est1); @@ -412,10 +412,10 @@ public class DirectUnionTest { public void checkWritableWrap() { final int lgConfigK = 10; final int n = 128; - final Union union = newUnion(lgConfigK); + final HllUnion union = newUnion(lgConfigK); for (int i = 0; i < n; i++) { union.update(i); } final double est = union.getEstimate(); - final Union union2 = Union.writableWrap(MemorySegment.ofArray(union.toUpdatableByteArray())); + final HllUnion union2 = HllUnion.writableWrap(MemorySegment.ofArray(union.toUpdatableByteArray())); final double est2 = union2.getEstimate(); assertEquals(est2, est, 0.0); } @@ -426,13 +426,13 @@ public class DirectUnionTest { final int n = 128; final HllSketch sk = new HllSketch(lgConfigK, HLL_6); for (int i = 0; i < n; i++) {sk.update(i); } - Union.writableWrap(MemorySegment.ofArray(sk.toUpdatableByteArray())); + HllUnion.writableWrap(MemorySegment.ofArray(sk.toUpdatableByteArray())); } - private static Union newUnion(final int lgK) { + private static HllUnion newUnion(final int lgK) { final int bytes = HllSketch.getMaxUpdatableSerializationBytes(lgK, TgtHllType.HLL_8); final MemorySegment wseg = MemorySegment.ofArray(new byte[bytes]); - return new Union(lgK, wseg); + return new HllUnion(lgK, wseg); } private static double getBound(final int lgK, final boolean ub, final boolean oooFlag, final int numStdDev, final double est) { diff --git a/src/test/java/org/apache/datasketches/hll/HllArrayTest.java b/src/test/java/org/apache/datasketches/hll/HllArrayTest.java index 27793679b..3bbb01db0 100644 --- a/src/test/java/org/apache/datasketches/hll/HllArrayTest.java +++ b/src/test/java/org/apache/datasketches/hll/HllArrayTest.java @@ -34,7 +34,7 @@ import org.apache.datasketches.hll.AbstractHllArray; import org.apache.datasketches.hll.HllArray; import org.apache.datasketches.hll.HllSketch; import org.apache.datasketches.hll.TgtHllType; -import org.apache.datasketches.hll.Union; +import org.apache.datasketches.hll.HllUnion; import org.testng.annotations.Test; /** @@ -59,7 +59,7 @@ public class HllArrayTest { } private static void testComposite(final int lgK, final TgtHllType tgtHllType, final int n) { - final Union u = new Union(lgK); + final HllUnion u = new HllUnion(lgK); final HllSketch sk = new HllSketch(lgK, tgtHllType); for (int i = 0; i < n; i++) { u.update(i); diff --git a/src/test/java/org/apache/datasketches/hll/HllSketchMergeOrderTest.java b/src/test/java/org/apache/datasketches/hll/HllSketchMergeOrderTest.java index f2656f15f..1618f81e7 100644 --- a/src/test/java/org/apache/datasketches/hll/HllSketchMergeOrderTest.java +++ b/src/test/java/org/apache/datasketches/hll/HllSketchMergeOrderTest.java @@ -98,7 +98,7 @@ public class HllSketchMergeOrderTest { * Merges three sketches in the specified order and returns the composite estimate */ private double mergeThreeSketches(final HllSketch s1, final HllSketch s2, final HllSketch s3) { - final Union union = new Union(LgK); + final HllUnion union = new HllUnion(LgK); union.update(s1); union.update(s2); diff --git a/src/test/java/org/apache/datasketches/hll/HllSketchTest.java b/src/test/java/org/apache/datasketches/hll/HllSketchTest.java index d0744f857..b9cc6f298 100644 --- a/src/test/java/org/apache/datasketches/hll/HllSketchTest.java +++ b/src/test/java/org/apache/datasketches/hll/HllSketchTest.java @@ -47,7 +47,7 @@ import org.apache.datasketches.hll.HllSketchImpl; import org.apache.datasketches.hll.HllUtil; import org.apache.datasketches.hll.PreambleUtil; import org.apache.datasketches.hll.TgtHllType; -import org.apache.datasketches.hll.Union; +import org.apache.datasketches.hll.HllUnion; import org.testng.annotations.Test; /** @@ -431,15 +431,15 @@ public class HllSketchTest { @SuppressWarnings("unused") @Test public void checkJavadocExample() { - Union union; HllSketch sk, sk2; + HllUnion union; HllSketch sk, sk2; final int lgK = 12; sk = new HllSketch(lgK, TgtHllType.HLL_4); //can be 4, 6, or 8 for (int i = 0; i < (2 << lgK); i++) { sk.update(i); } final byte[] arr = sk.toCompactByteArray(); // ... - union = Union.heapify(arr); //initializes the union using data from the array. + union = HllUnion.heapify(arr); //initializes the union using data from the array. //OR, if used in an off-heap environment: - union = Union.heapify(MemorySegment.ofArray(arr)); + union = HllUnion.heapify(MemorySegment.ofArray(arr)); //To recover an updatable Heap sketch: sk2 = HllSketch.heapify(arr); diff --git a/src/test/java/org/apache/datasketches/hll/IsomorphicTest.java b/src/test/java/org/apache/datasketches/hll/IsomorphicTest.java index 90db8088b..d895246a8 100644 --- a/src/test/java/org/apache/datasketches/hll/IsomorphicTest.java +++ b/src/test/java/org/apache/datasketches/hll/IsomorphicTest.java @@ -30,11 +30,6 @@ import static org.testng.Assert.fail; import java.lang.foreign.MemorySegment; -import org.apache.datasketches.hll.AbstractHllArray; -import org.apache.datasketches.hll.CurMode; -import org.apache.datasketches.hll.HllSketch; -import org.apache.datasketches.hll.TgtHllType; -import org.apache.datasketches.hll.Union; import org.testng.annotations.Test; /** @@ -55,7 +50,7 @@ public class IsomorphicTest { final TgtHllType tgtHllType1 = TgtHllType.fromOrdinal(t); final HllSketch sk1 = buildHeapSketch(lgK, tgtHllType1, curMode); final byte[] sk1bytes = sk1.toUpdatableByteArray(); //UPDATABLE - final Union union = buildHeapUnion(lgK, null); //UNION + final HllUnion union = buildHeapUnion(lgK, null); //UNION union.update(sk1); final HllSketch sk2 = union.getResult(tgtHllType1); final byte[] sk2bytes = sk2.toUpdatableByteArray(); //UPDATABLE @@ -77,7 +72,7 @@ public class IsomorphicTest { final TgtHllType tgtHllType1 = TgtHllType.fromOrdinal(t); final HllSketch sk1 = buildHeapSketch(lgK, tgtHllType1, curMode); final byte[] sk1bytes = sk1.toCompactByteArray(); //COMPACT - final Union union = buildHeapUnion(lgK, null); //UNION + final HllUnion union = buildHeapUnion(lgK, null); //UNION union.update(sk1); final HllSketch sk2 = union.getResult(tgtHllType1); final byte[] sk2bytes = sk2.toCompactByteArray(); //COMPACT @@ -161,17 +156,17 @@ public class IsomorphicTest { } private static void innerLoop(final int uLgK, final int skLgK, final TgtHllType tgtHllType) { - Union u; + HllUnion u; HllSketch sk; final HllSketch skOut; - //CASE 1 Heap Union, Heap sketch + //CASE 1 Heap HllUnion, Heap HllSketch u = buildHeapUnionHllMode(uLgK, 0); sk = buildHeapSketchHllMode(skLgK, tgtHllType, 1 << uLgK); u.update(sk); final byte[] bytesOut1 = u.getResult(HLL_8).toUpdatableByteArray(); - //CASE 2 Heap Union, MemorySegment sketch + //CASE 2 Heap HllUnion, MemorySegment HllSketch u = buildHeapUnionHllMode(uLgK, 0); sk = buildMemorySegmentSketchHllMode(skLgK, tgtHllType, 1 << uLgK); u.update(sk); @@ -181,10 +176,10 @@ public class IsomorphicTest { //println("Uheap/SkSegment HIP: " + bytesToDouble(bytesOut2, 8)); //HipAccum String comb = "uLgK: " + uLgK + ", skLgK: " + skLgK + ", SkType: " + tgtHllType.toString() - + ", Case1: Heap Union, Heap sketch; Case2: /Heap Union, MemorySegment sketch"; + + ", Case1: Heap HllUnion, Heap HllSketch; Case2: /Heap HllUnion, MemorySegment HllSketch"; checkArrays(bytesOut1, bytesOut2, comb, false); - //CASE 3 Offheap Union, Heap sketch + //CASE 3 Offheap HllUnion, Heap HllSketch u = buildMemorySegmentUnionHllMode(uLgK, 0); sk = buildHeapSketchHllMode(skLgK, tgtHllType, 1 << uLgK); u.update(sk); @@ -194,10 +189,10 @@ public class IsomorphicTest { //println("Usegment/SkHeap HIP: " + bytesToDouble(bytesOut3, 8)); //HipAccum comb = "LgK: " + uLgK + ", skLgK: " + skLgK + ", SkType: " + tgtHllType.toString() - + ", Case2: Heap Union, MemorySegment sketch; Case3: /MemorySegment Union, Heap sketch"; + + ", Case2: Heap HllUnion, MemorySegment HllSketch; Case3: /MemorySegment HllUnion, Heap HllSketch"; checkArrays(bytesOut2, bytesOut3, comb, false); - //Case 4 MemorySegment Union, MemorySegment sketch + //Case 4 MemorySegment HllUnion, MemorySegment HllSketch u = buildMemorySegmentUnionHllMode(uLgK, 0); sk = buildMemorySegmentSketchHllMode(skLgK, tgtHllType, 1 << uLgK); u.update(sk); @@ -205,7 +200,7 @@ public class IsomorphicTest { comb = "LgK: " + uLgK + ", skLgK: " + skLgK + ", SkType: " + tgtHllType.toString() - + ", Case2: Heap Union, MemorySegment sketch; Case4: /MemorySegment Union, MemorySegment sketch"; + + ", Case2: Heap HllUnion, MemorySegment HllSketch; Case4: /MemorySegment HllUnion, MemorySegment HllSketch"; checkArrays(bytesOut2, bytesOut4, comb, false); } @@ -218,7 +213,7 @@ public class IsomorphicTest { public void isomorphicHllMerges2() { byte[] bytesOut8, bytesOut6, bytesOut4; String comb; - Union u; + HllUnion u; HllSketch sk; for (int lgK = 4; lgK <= 4; lgK++) { //All LgK u = buildHeapUnionHllMode(lgK, 0); @@ -260,18 +255,18 @@ public class IsomorphicTest { } //BUILDERS - private Union buildHeapUnion(final int lgMaxK, final CurMode curMode) { - final Union u = new Union(lgMaxK); + private HllUnion buildHeapUnion(final int lgMaxK, final CurMode curMode) { + final HllUnion u = new HllUnion(lgMaxK); final int n = (curMode == null) ? 0 : getN(lgMaxK, curMode); for (int i = 0; i < n; i++) { u.update(i + v); } v += n; return u; } - private Union buildMemorySegmentUnion(final int lgMaxK, final CurMode curMode) { + private HllUnion buildMemorySegmentUnion(final int lgMaxK, final CurMode curMode) { final int bytes = HllSketch.getMaxUpdatableSerializationBytes(lgMaxK, TgtHllType.HLL_8); final MemorySegment wseg = MemorySegment.ofArray(new byte[bytes]); - final Union u = new Union(lgMaxK, wseg); + final HllUnion u = new HllUnion(lgMaxK, wseg); final int n = (curMode == null) ? 0 : getN(lgMaxK, curMode); for (int i = 0; i < n; i++) { u.update(i + v); } v += n; @@ -296,17 +291,17 @@ public class IsomorphicTest { return sk; } - private static Union buildHeapUnionHllMode(final int lgMaxK, final int startN) { - final Union u = new Union(lgMaxK); + private static HllUnion buildHeapUnionHllMode(final int lgMaxK, final int startN) { + final HllUnion u = new HllUnion(lgMaxK); final int n = getN(lgMaxK, HLL); for (int i = 0; i < n; i++) { u.update(i + startN); } return u; } - private static Union buildMemorySegmentUnionHllMode(final int lgMaxK, final int startN) { + private static HllUnion buildMemorySegmentUnionHllMode(final int lgMaxK, final int startN) { final int bytes = HllSketch.getMaxUpdatableSerializationBytes(lgMaxK, TgtHllType.HLL_8); final MemorySegment wseg = MemorySegment.ofArray(new byte[bytes]); - final Union u = new Union(lgMaxK, wseg); + final HllUnion u = new HllUnion(lgMaxK, wseg); final int n = getN(lgMaxK, HLL); for (int i = 0; i < n; i++) { u.update(i + startN); } return u; diff --git a/src/test/java/org/apache/datasketches/hll/UnionCaseTest.java b/src/test/java/org/apache/datasketches/hll/UnionCaseTest.java index fc6a7a40d..84ec7ce9a 100644 --- a/src/test/java/org/apache/datasketches/hll/UnionCaseTest.java +++ b/src/test/java/org/apache/datasketches/hll/UnionCaseTest.java @@ -33,12 +33,6 @@ import static org.testng.Assert.assertTrue; import java.lang.foreign.MemorySegment; import org.apache.datasketches.common.SketchesStateException; -import org.apache.datasketches.hll.AbstractHllArray; -import org.apache.datasketches.hll.DirectHllArray; -import org.apache.datasketches.hll.HllArray; -import org.apache.datasketches.hll.HllSketch; -import org.apache.datasketches.hll.TgtHllType; -import org.apache.datasketches.hll.Union; import org.testng.annotations.Test; /** @@ -49,7 +43,7 @@ public class UnionCaseTest { long v = 0; final static int maxLgK = 12; HllSketch source; - //Union union; + //HllUnion union; String hfmt = "%10s%10s%10s%10s%10s%10s%10s%10s%10s%10s%10s" + LS; String hdr = String.format(hfmt, "caseNum","srcLgKStr","gdtLgKStr","srcType","gdtType", "srcSeg","gdtSeg","srcMode","gdtMode","srcOoof","gdtOoof"); @@ -96,13 +90,13 @@ public class UnionCaseTest { private void checkCase(final int caseNum, final TgtHllType srcType, final boolean srcSeg) { source = getSource(caseNum, srcType, srcSeg); final boolean gdtSeg = (caseNum & 1) > 0; - final Union union = getUnion(caseNum, gdtSeg); + final HllUnion union = getUnion(caseNum, gdtSeg); union.update(source); final int totalU = getSrcCount(caseNum, maxLgK) + getUnionCount(caseNum); output(caseNum, source, union, totalU); } - private void output(final int caseNum, final HllSketch source, final Union union, final int totalU) { + private void output(final int caseNum, final HllSketch source, final HllUnion union, final int totalU) { final double estU = union.getEstimate(); final double err = Math.abs((estU / totalU) - 1.0); final int gdtLgK = union.getLgConfigK(); @@ -137,7 +131,7 @@ public class UnionCaseTest { } } - private Union getUnion(final int caseNum, final boolean useMemorySegment) { + private HllUnion getUnion(final int caseNum, final boolean useMemorySegment) { final int unionU = getUnionCount(caseNum); return (useMemorySegment) ? buildMemorSegmentUnion(maxLgK, unionU) : buildHeapUnion(maxLgK, unionU); } @@ -162,10 +156,10 @@ public class UnionCaseTest { @Test public void checkMisc() { - final Union u = buildHeapUnion(12, 0); + final HllUnion u = buildHeapUnion(12, 0); int bytes = u.getCompactSerializationBytes(); assertEquals(bytes, 8); - bytes = Union.getMaxSerializationBytes(7); + bytes = HllUnion.getMaxSerializationBytes(7); assertEquals(bytes, 40 + 128); double v = u.getEstimate(); assertEquals(v, 0.0, 0.0); @@ -187,7 +181,7 @@ public class UnionCaseTest { final int n2 = 3; final int n3 = 2; final int sum = n1 + n2 + n3; - final Union u = buildHeapUnion(12, n1); //gdt = list + final HllUnion u = buildHeapUnion(12, n1); //gdt = list final HllSketch h2 = buildHeapSketch(11, HLL_6, n2); //src = list final HllSketch h3 = buildHeapSketch(10, HLL_8, n3); //src = list u.update(h2); @@ -209,7 +203,7 @@ public class UnionCaseTest { final int n2 = 2; final int n3 = 16; final int sum = n1 + n2 + n3; - final Union u = buildHeapUnion(12, n1); //LIST, 5 + final HllUnion u = buildHeapUnion(12, n1); //LIST, 5 final HllSketch h2 = buildHeapSketch(11, HLL_6, n2); //LIST, 2 final HllSketch h3 = buildHeapSketch(10, HLL_8, n3); //SET, 16 u.update(h2); @@ -231,7 +225,7 @@ public class UnionCaseTest { final int n2 = 10; final int n3 = 6; final int sum = n1 + n2 + n3; - final Union u = buildHeapUnion(12, n1); + final HllUnion u = buildHeapUnion(12, n1); final HllSketch h2 = buildHeapSketch(11, HLL_6, n2); //SET final HllSketch h3 = buildHeapSketch(10, HLL_8, n3); //LIST u.update(h2); @@ -253,7 +247,7 @@ public class UnionCaseTest { final int n2 = 10; final int n3 = 16; final int sum = n1 + n2 + n3; - final Union u = buildHeapUnion(12, n1); + final HllUnion u = buildHeapUnion(12, n1); final HllSketch h2 = buildHeapSketch(11, HLL_6, n2); //src: SET final HllSketch h3 = buildHeapSketch(10, HLL_8, n3); //src: SET u.update(h2); @@ -275,7 +269,7 @@ public class UnionCaseTest { final int n2 = 0; final int n3 = 7; final int sum = n1 + n2 + n3; - final Union u = buildHeapUnion(12, n1); //LIST empty + final HllUnion u = buildHeapUnion(12, n1); //LIST empty final HllSketch h2 = buildHeapSketch(11, HLL_6, n2); //src: LIST empty, ignored final HllSketch h3 = buildHeapSketch(10, HLL_8, n3); //src: LIST u.update(h2); @@ -297,7 +291,7 @@ public class UnionCaseTest { final int n2 = 0; final int n3 = 16; final int sum = n1 + n2 + n3; - final Union u = buildHeapUnion(12, n1); //LIST empty + final HllUnion u = buildHeapUnion(12, n1); //LIST empty final HllSketch h2 = buildHeapSketch(11, HLL_6, n2); //LIST empty, ignored final HllSketch h3 = buildHeapSketch(10, HLL_8, n3); // Src Set u.update(h2); @@ -316,7 +310,7 @@ public class UnionCaseTest { @SuppressWarnings("unused") @Test public void checkSpecialMergeCase4() { - final Union u = buildHeapUnion(12, 1 << 9); + final HllUnion u = buildHeapUnion(12, 1 << 9); final HllSketch sk = buildHeapSketch(12, HLL_8, 1 << 9); u.update(sk); @@ -360,7 +354,7 @@ public class UnionCaseTest { final HllSketch sk = buildHeapSketch(4, HLL_8, 16); final HllArray hllArr = (HllArray)(sk.hllSketchImpl); hllArr.putRebuildCurMinNumKxQFlag(true); //corrupt the flag - final Union union = buildHeapUnion(4, 0); + final HllUnion union = buildHeapUnion(4, 0); union.update(sk); } @@ -370,7 +364,7 @@ public class UnionCaseTest { final DirectHllArray hllArr = (DirectHllArray)(sk.hllSketchImpl); hllArr.putRebuildCurMinNumKxQFlag(true); //corrupt the flag final MemorySegment wseg = sk.getMemorySegment(); - Union.writableWrap(wseg); + HllUnion.writableWrap(wseg); } @Test(expectedExceptions = SketchesStateException.class) @@ -393,17 +387,17 @@ public class UnionCaseTest { } //BUILDERS - private Union buildHeapUnion(final int lgMaxK, final int n) { - final Union u = new Union(lgMaxK); + private HllUnion buildHeapUnion(final int lgMaxK, final int n) { + final HllUnion u = new HllUnion(lgMaxK); for (int i = 0; i < n; i++) { u.update(i + v); } v += n; return u; } - private Union buildMemorSegmentUnion(final int lgMaxK, final int n) { + private HllUnion buildMemorSegmentUnion(final int lgMaxK, final int n) { final int bytes = HllSketch.getMaxUpdatableSerializationBytes(lgMaxK, TgtHllType.HLL_8); final MemorySegment wseg = MemorySegment.ofArray(new byte[bytes]); - final Union u = new Union(lgMaxK, wseg); + final HllUnion u = new HllUnion(lgMaxK, wseg); for (int i = 0; i < n; i++) { u.update(i + v); } v += n; return u; diff --git a/src/test/java/org/apache/datasketches/hll/UnionTest.java b/src/test/java/org/apache/datasketches/hll/UnionTest.java index 0e1bc5b46..23c373207 100644 --- a/src/test/java/org/apache/datasketches/hll/UnionTest.java +++ b/src/test/java/org/apache/datasketches/hll/UnionTest.java @@ -31,12 +31,6 @@ import static org.testng.Assert.fail; import java.lang.foreign.MemorySegment; import org.apache.datasketches.common.SketchesArgumentException; -import org.apache.datasketches.hll.HllSketch; -import org.apache.datasketches.hll.HllUtil; -import org.apache.datasketches.hll.PreambleUtil; -import org.apache.datasketches.hll.RelativeErrorTables; -import org.apache.datasketches.hll.TgtHllType; -import org.apache.datasketches.hll.Union; import org.testng.annotations.Test; /** @@ -199,7 +193,7 @@ public class UnionTest { final String h1SketchStr = ("H1 SKETCH: \n" + h1.toString()); final String h2SketchStr = ("H2 SKETCH: \n" + h2.toString()); - final Union union = newUnion(lgMaxK); + final HllUnion union = newUnion(lgMaxK); union.update(h1); final String uH1SketchStr = ("Union after H1: \n" + union.getResult(resultType).toString()); @@ -261,7 +255,7 @@ public class UnionTest { } private static void toFrom1(final int lgK, final TgtHllType tgtHllType, final int n) { - final Union srcU = newUnion(lgK); + final HllUnion srcU = newUnion(lgK); final HllSketch srcSk = new HllSketch(lgK, tgtHllType); for (int i = 0; i < n; i++) { srcSk.update(i); @@ -272,7 +266,7 @@ public class UnionTest { final byte[] byteArr = srcU.toCompactByteArray(); final MemorySegment seg = MemorySegment.ofArray(byteArr); - final Union dstU = Union.heapify(seg); + final HllUnion dstU = HllUnion.heapify(seg); assertFalse(dstU.isSameResource(seg)); assertEquals(dstU.getEstimate(), srcU.getEstimate(), 0.0); @@ -292,7 +286,7 @@ public class UnionTest { } private static void toFrom2(final int lgK, final TgtHllType tgtHllType, final int n) { - final Union srcU = newUnion(lgK); + final HllUnion srcU = newUnion(lgK); final HllSketch srcSk = new HllSketch(lgK, tgtHllType); for (int i = 0; i < n; i++) { srcSk.update(i); @@ -302,14 +296,14 @@ public class UnionTest { srcU.update(srcSk); final byte[] byteArr = srcU.toCompactByteArray(); - final Union dstU = Union.heapify(byteArr); + final HllUnion dstU = HllUnion.heapify(byteArr); assertEquals(dstU.getEstimate(), srcU.getEstimate(), 0.0); } @Test public void checkCompositeEst() { - final Union u = new Union(); + final HllUnion u = new HllUnion(); assertEquals(u.getCompositeEstimate(), 0, .03); for (int i = 1; i <= 15; i++) { u.update(i); } assertEquals(u.getCompositeEstimate(), 15, 15 *.03); @@ -321,31 +315,31 @@ public class UnionTest { @Test public void checkMisc() { try { - final Union u = newUnion(HllUtil.MIN_LOG_K - 1); + final HllUnion u = newUnion(HllUtil.MIN_LOG_K - 1); fail(); } catch (final SketchesArgumentException e) { //expected } try { - final Union u = newUnion(HllUtil.MAX_LOG_K + 1); + final HllUnion u = newUnion(HllUtil.MAX_LOG_K + 1); fail(); } catch (final SketchesArgumentException e) { //expected } - final Union u = newUnion(7); + final HllUnion u = newUnion(7); final HllSketch sk = u.getResult(); assertTrue(sk.isEmpty()); } @Test public void checkHeapify() { - final Union u = newUnion(16); + final HllUnion u = newUnion(16); for (int i = 0; i < (1 << 20); i++) { u.update(i); } final double est1 = u.getEstimate(); final byte[] byteArray = u.toUpdatableByteArray(); - final Union u2 = Union.heapify(byteArray); + final HllUnion u2 = HllUnion.heapify(byteArray); assertEquals(u2.getEstimate(), est1, 0.0); } @@ -365,7 +359,7 @@ public class UnionTest { @Test public void checkEmptyCouponMisc() { final int lgK = 8; - final Union union = newUnion(lgK); + final HllUnion union = newUnion(lgK); for (int i = 0; i < 20; i++) { union.update(i); } //SET mode union.couponUpdate(0); assertEquals(union.getEstimate(), 20.0, 0.001); @@ -373,7 +367,7 @@ public class UnionTest { assertFalse(union.hasMemorySegment()); assertFalse(union.isOffHeap()); final int bytes = union.getUpdatableSerializationBytes(); - assertTrue(bytes <= Union.getMaxSerializationBytes(lgK)); + assertTrue(bytes <= HllUnion.getMaxSerializationBytes(lgK)); assertFalse(union.isCompact()); } @@ -390,7 +384,7 @@ public class UnionTest { final HllSketch sk2 = HllSketch.wrap(MemorySegment.ofArray(skByteArr)); assertEquals(sk2.getEstimate(), est, 0.0); - final Union union = newUnion(lgConfigK); + final HllUnion union = newUnion(lgConfigK); union.update(HllSketch.wrap(MemorySegment.ofArray(skByteArr))); assertEquals(union.getEstimate(), est, 0.0); } @@ -404,7 +398,7 @@ public class UnionTest { final double est1 = sk1.getEstimate(); final byte[] byteArr1 = sk1.toCompactByteArray(); - final Union union = newUnion(lgConfigK); + final HllUnion union = newUnion(lgConfigK); union.update(HllSketch.wrap(MemorySegment.ofArray(byteArr1))); final double est2 = union.getEstimate(); assertEquals(est2, est1); @@ -420,7 +414,7 @@ public class UnionTest { sk1.update(i); sk2.update(i + u); } - final Union union = new Union(lgK); + final HllUnion union = new HllUnion(lgK); union.update(sk1); union.update(sk2); final HllSketch rsk1 = union.getResult(TgtHllType.HLL_8); @@ -450,9 +444,9 @@ public class UnionTest { sk1.update(i); sk2.update(i + u); } - final int bytes = Union.getMaxSerializationBytes(lgK); + final int bytes = HllUnion.getMaxSerializationBytes(lgK); final MemorySegment wseg = MemorySegment.ofArray(new byte[bytes]); - final Union union1 = new Union(lgK, wseg); //Create original union off-heap + final HllUnion union1 = new HllUnion(lgK, wseg); //Create original union off-heap union1.update(sk1); union1.update(sk2); //oooFlag = Rebuild_KxQ = TRUE assertTrue(!union1.toString().isEmpty()); @@ -466,23 +460,23 @@ public class UnionTest { assertFalse(rebuild); } - @Test //similar to above except uses Union.writableWrap instead of heapify + @Test //similar to above except uses HllUnion.writableWrap instead of heapify public void druidUseCase() { final int lgK = 12; - final int bytes = Union.getMaxSerializationBytes(lgK); + final int bytes = HllUnion.getMaxSerializationBytes(lgK); final MemorySegment wseg = MemorySegment.ofArray(new byte[bytes]); - new Union(lgK, wseg); // result is unused, relying on side effect + new HllUnion(lgK, wseg); // result is unused, relying on side effect int trueCount = 0; final int delta = 1 << (lgK - 3); //(lgK < 8) ? 16 : 1 << (lgK - 3) //allows changing lgK above for (int i = 0; i < 3; i++) { - Union.writableWrap(wseg).update(buildSketch(trueCount, delta)); + HllUnion.writableWrap(wseg).update(buildSketch(trueCount, delta)); trueCount += delta; } boolean rebuild = PreambleUtil.extractRebuildCurMinNumKxQFlag(wseg); final double hipAccum = PreambleUtil.extractHipAccum(wseg); assertTrue(rebuild); assertTrue(hipAccum == 0.0); - final HllSketch result = Union.writableWrap(wseg).getResult(); //rebuilds result + final HllSketch result = HllUnion.writableWrap(wseg).getResult(); //rebuilds result rebuild = result.hllSketchImpl.isRebuildCurMinNumKxQFlag(); assertFalse(rebuild); final double est = result.getEstimate(); @@ -500,8 +494,8 @@ public class UnionTest { return sketch; } - private static Union newUnion(final int lgK) { - return new Union(lgK); + private static HllUnion newUnion(final int lgK) { + return new HllUnion(lgK); } private static double getBound(final int lgK, final boolean ub, final boolean oooFlag, final int numStdDev, final double est) { --------------------------------------------------------------------- To unsubscribe, e-mail: [email protected] For additional commands, e-mail: [email protected]
