This is an automated email from the ASF dual-hosted git repository. leerho pushed a commit to branch 6.1.X_align_with_master in repository https://gitbox.apache.org/repos/asf/datasketches-java.git
commit 9fa87995f20435f065847b31eab8ce429786570f Author: Lee Rhodes <[email protected]> AuthorDate: Wed Dec 11 17:07:16 2024 -0800 This aligns 6.1.X with master --- .github/workflows/check_cpp_files.yml | 9 ++++- pom.xml | 2 +- .../filters/bloomfilter/BloomFilter.java | 4 +- .../filters/bloomfilter/BloomFilterBuilder.java | 4 +- .../datasketches/frequencies/ItemsSketch.java | 4 +- .../datasketches/frequencies/LongsSketch.java | 4 +- .../datasketches/frequencies/PreambleUtil.java | 7 ++-- .../org/apache/datasketches/hash/MurmurHash3.java | 2 - .../org/apache/datasketches/hash/package-info.java | 3 +- .../apache/datasketches/kll/KllDoublesHelper.java | 1 + .../apache/datasketches/kll/KllDoublesSketch.java | 1 + .../apache/datasketches/kll/KllFloatsHelper.java | 1 + .../apache/datasketches/kll/KllFloatsSketch.java | 1 + .../apache/datasketches/kll/KllItemsHelper.java | 1 + .../apache/datasketches/kll/KllLongsHelper.java | 1 + .../apache/datasketches/kll/KllLongsSketch.java | 1 + .../datasketches/quantiles/DoublesSketch.java | 1 + .../datasketches/quantiles/package-info.java | 3 +- .../quantilescommon/DoublesSortedView.java | 8 ++-- .../quantilescommon/FloatsSortedView.java | 8 ++-- .../quantilescommon/GenericSortedView.java | 8 ++-- .../quantilescommon/LongsSortedView.java | 8 ++-- .../datasketches/quantilescommon/QuantilesAPI.java | 4 +- .../quantilescommon/QuantilesDoublesAPI.java | 42 ++++++++++--------- .../quantilescommon/QuantilesFloatsAPI.java | 8 ++-- .../quantilescommon/QuantilesGenericAPI.java | 8 ++-- .../quantilescommon/QuantilesLongsAPI.java | 8 ++-- .../org/apache/datasketches/req/BaseReqSketch.java | 1 + .../java/org/apache/datasketches/req/ReqSerDe.java | 3 +- .../datasketches/sampling/ReservoirLongsUnion.java | 7 ++-- .../datasketches/sampling/ReservoirSize.java | 1 + .../apache/datasketches/sampling/package-info.java | 4 +- .../apache/datasketches/tdigest/TDigestDouble.java | 47 ++++++++++++++++++++++ .../apache/datasketches/theta/PreambleUtil.java | 3 +- .../java/org/apache/datasketches/theta/Sketch.java | 2 +- .../org/apache/datasketches/theta/Sketches.java | 10 ++--- .../org/apache/datasketches/theta/UnionImpl.java | 2 +- .../apache/datasketches/theta/UpdateSketch.java | 1 + .../java/org/apache/datasketches/tuple/Union.java | 3 +- .../datasketches/tuple/adouble/DoubleSummary.java | 4 ++ .../tuple/aninteger/IntegerSummary.java | 4 ++ .../DirectArrayOfDoublesCompactSketch.java | 1 + .../DirectArrayOfDoublesIntersection.java | 3 +- .../DirectArrayOfDoublesQuickSelectSketch.java | 1 + .../DirectArrayOfDoublesSketchIterator.java | 1 + .../arrayofdoubles/DirectArrayOfDoublesUnion.java | 1 + .../datasketches/tdigest/TDigestDoubleTest.java | 13 ++++-- tools/SketchesCheckstyle.xml | 2 +- 48 files changed, 173 insertions(+), 93 deletions(-) diff --git a/.github/workflows/check_cpp_files.yml b/.github/workflows/check_cpp_files.yml index 59ae5824..b96ed607 100644 --- a/.github/workflows/check_cpp_files.yml +++ b/.github/workflows/check_cpp_files.yml @@ -12,12 +12,17 @@ jobs: runs-on: ubuntu-latest steps: - name: Checkout - uses: actions/checkout@v3 + uses: actions/checkout@v4 - name: Checkout C++ - uses: actions/checkout@v3 + uses: actions/checkout@v4 with: repository: apache/datasketches-cpp path: cpp + - name: Setup Java + uses: actions/setup-java@v2 + with: + java-version: '11' + distribution: 'temurin' - name: Configure C++ build run: cd cpp/build && cmake .. -DGENERATE=true - name: Build C++ unit tests diff --git a/pom.xml b/pom.xml index 51fe1d41..cadcad01 100644 --- a/pom.xml +++ b/pom.xml @@ -33,7 +33,7 @@ under the License. <groupId>org.apache.datasketches</groupId> <artifactId>datasketches-java</artifactId> - <version>6.1.1</version> + <version>6.2.0-SNAPSHOT</version> <packaging>jar</packaging> <name>${project.artifactId}</name> diff --git a/src/main/java/org/apache/datasketches/filters/bloomfilter/BloomFilter.java b/src/main/java/org/apache/datasketches/filters/bloomfilter/BloomFilter.java index eafe8834..171fc2cf 100644 --- a/src/main/java/org/apache/datasketches/filters/bloomfilter/BloomFilter.java +++ b/src/main/java/org/apache/datasketches/filters/bloomfilter/BloomFilter.java @@ -33,8 +33,8 @@ import org.apache.datasketches.memory.WritableMemory; import org.apache.datasketches.memory.XxHash; /** - * <p>A Bloom filter is a data structure that can be used for probabilistic - * set membership.</p> + * A Bloom filter is a data structure that can be used for probabilistic + * set membership. * * <p>When querying a Bloom filter, there are no false positives. Specifically: * When querying an item that has already been inserted to the filter, the filter will diff --git a/src/main/java/org/apache/datasketches/filters/bloomfilter/BloomFilterBuilder.java b/src/main/java/org/apache/datasketches/filters/bloomfilter/BloomFilterBuilder.java index f865a335..ee17a991 100644 --- a/src/main/java/org/apache/datasketches/filters/bloomfilter/BloomFilterBuilder.java +++ b/src/main/java/org/apache/datasketches/filters/bloomfilter/BloomFilterBuilder.java @@ -25,8 +25,8 @@ import org.apache.datasketches.common.SketchesArgumentException; import org.apache.datasketches.memory.WritableMemory; /** - * <p>This class provides methods to help estimate the correct parameters when - * creating a Bloom filter, and methods to create the filter using those values.</p> + * This class provides methods to help estimate the correct parameters when + * creating a Bloom filter, and methods to create the filter using those values. * * <p>The underlying math is described in the * <a href='https://en.wikipedia.org/wiki/Bloom_filter#Optimal_number_of_hash_functions'> diff --git a/src/main/java/org/apache/datasketches/frequencies/ItemsSketch.java b/src/main/java/org/apache/datasketches/frequencies/ItemsSketch.java index 7c5bc770..b2648b94 100644 --- a/src/main/java/org/apache/datasketches/frequencies/ItemsSketch.java +++ b/src/main/java/org/apache/datasketches/frequencies/ItemsSketch.java @@ -55,10 +55,10 @@ import org.apache.datasketches.memory.Memory; import org.apache.datasketches.memory.WritableMemory; /** - * <p>This sketch is useful for tracking approximate frequencies of items of type <i><T></i> + * This sketch is useful for tracking approximate frequencies of items of type <i><T></i> * with optional associated counts (<i><T></i> item, <i>long</i> count) that are members of a * multiset of such items. The true frequency of an item is defined to be the sum of associated - * counts.</p> + * counts. * * <p>This implementation provides the following capabilities:</p> * <ul> diff --git a/src/main/java/org/apache/datasketches/frequencies/LongsSketch.java b/src/main/java/org/apache/datasketches/frequencies/LongsSketch.java index 30452a1b..7d1990b6 100644 --- a/src/main/java/org/apache/datasketches/frequencies/LongsSketch.java +++ b/src/main/java/org/apache/datasketches/frequencies/LongsSketch.java @@ -54,9 +54,9 @@ import org.apache.datasketches.memory.Memory; import org.apache.datasketches.memory.WritableMemory; /** - * <p>This sketch is useful for tracking approximate frequencies of <i>long</i> items with optional + * This sketch is useful for tracking approximate frequencies of <i>long</i> items with optional * associated counts (<i>long</i> item, <i>long</i> count) that are members of a multiset of - * such items. The true frequency of an item is defined to be the sum of associated counts.</p> + * such items. The true frequency of an item is defined to be the sum of associated counts. * * <p>This implementation provides the following capabilities:</p> * <ul> diff --git a/src/main/java/org/apache/datasketches/frequencies/PreambleUtil.java b/src/main/java/org/apache/datasketches/frequencies/PreambleUtil.java index 785c211f..90de1c83 100644 --- a/src/main/java/org/apache/datasketches/frequencies/PreambleUtil.java +++ b/src/main/java/org/apache/datasketches/frequencies/PreambleUtil.java @@ -31,12 +31,11 @@ import org.apache.datasketches.memory.Memory; /** * This class defines the preamble data structure and provides basic utilities for some of the key * fields. - * <p> - * The intent of the design of this class was to isolate the detailed knowledge of the bit and byte + * + * <p>The intent of the design of this class was to isolate the detailed knowledge of the bit and byte * layout of the serialized form of the sketches derived from the Sketch class into one place. This * allows the possibility of the introduction of different serialization schemes with minimal impact - * on the rest of the library. - * </p> + * on the rest of the library.</p> * * <p> * MAP: Low significance bytes of this <i>long</i> data structure are on the right. However, the diff --git a/src/main/java/org/apache/datasketches/hash/MurmurHash3.java b/src/main/java/org/apache/datasketches/hash/MurmurHash3.java index 9fb2ab94..a708e007 100644 --- a/src/main/java/org/apache/datasketches/hash/MurmurHash3.java +++ b/src/main/java/org/apache/datasketches/hash/MurmurHash3.java @@ -29,10 +29,8 @@ import org.apache.datasketches.common.Util; import org.apache.datasketches.memory.Memory; /** - * <p> * The MurmurHash3 is a fast, non-cryptographic, 128-bit hash function that has * excellent avalanche and 2-way bit independence properties. - * </p> * * <p> * Austin Appleby's C++ diff --git a/src/main/java/org/apache/datasketches/hash/package-info.java b/src/main/java/org/apache/datasketches/hash/package-info.java index 2d97afeb..5744b277 100644 --- a/src/main/java/org/apache/datasketches/hash/package-info.java +++ b/src/main/java/org/apache/datasketches/hash/package-info.java @@ -18,12 +18,11 @@ */ /** - * <p>The hash package contains a high-performing and extended Java implementations + * The hash package contains a high-performing and extended Java implementations * of Austin Appleby's 128-bit MurmurHash3 hash function originally coded in C. * This core MurmurHash3.java class is used throughout many of the sketch classes for consistency * and as long as the user specifies the same seed will result in coordinated hash operations. * This package also contains an adaptor class that extends the basic class with more functions * commonly associated with hashing. - * </p> */ package org.apache.datasketches.hash; diff --git a/src/main/java/org/apache/datasketches/kll/KllDoublesHelper.java b/src/main/java/org/apache/datasketches/kll/KllDoublesHelper.java index 67035b45..acbecdf0 100644 --- a/src/main/java/org/apache/datasketches/kll/KllDoublesHelper.java +++ b/src/main/java/org/apache/datasketches/kll/KllDoublesHelper.java @@ -312,6 +312,7 @@ final class KllDoublesHelper { /** * Compression algorithm used to merge higher levels. + * * <p>Here is what we do for each level:</p> * <ul><li>If it does not need to be compacted, then simply copy it over.</li> * <li>Otherwise, it does need to be compacted, so... diff --git a/src/main/java/org/apache/datasketches/kll/KllDoublesSketch.java b/src/main/java/org/apache/datasketches/kll/KllDoublesSketch.java index 59cda435..fbe9dbb3 100644 --- a/src/main/java/org/apache/datasketches/kll/KllDoublesSketch.java +++ b/src/main/java/org/apache/datasketches/kll/KllDoublesSketch.java @@ -278,6 +278,7 @@ public abstract class KllDoublesSketch extends KllSketch implements QuantilesDou /** * {@inheritDoc} + * * <p>The parameter <i>k</i> will not change.</p> */ @Override diff --git a/src/main/java/org/apache/datasketches/kll/KllFloatsHelper.java b/src/main/java/org/apache/datasketches/kll/KllFloatsHelper.java index 50cadeb3..69045f78 100644 --- a/src/main/java/org/apache/datasketches/kll/KllFloatsHelper.java +++ b/src/main/java/org/apache/datasketches/kll/KllFloatsHelper.java @@ -312,6 +312,7 @@ final class KllFloatsHelper { /** * Compression algorithm used to merge higher levels. + * * <p>Here is what we do for each level:</p> * <ul><li>If it does not need to be compacted, then simply copy it over.</li> * <li>Otherwise, it does need to be compacted, so... diff --git a/src/main/java/org/apache/datasketches/kll/KllFloatsSketch.java b/src/main/java/org/apache/datasketches/kll/KllFloatsSketch.java index 48781631..b993f999 100644 --- a/src/main/java/org/apache/datasketches/kll/KllFloatsSketch.java +++ b/src/main/java/org/apache/datasketches/kll/KllFloatsSketch.java @@ -278,6 +278,7 @@ public abstract class KllFloatsSketch extends KllSketch implements QuantilesFloa /** * {@inheritDoc} + * * <p>The parameter <i>k</i> will not change.</p> */ @Override diff --git a/src/main/java/org/apache/datasketches/kll/KllItemsHelper.java b/src/main/java/org/apache/datasketches/kll/KllItemsHelper.java index 00b8c7d3..dd309ba3 100644 --- a/src/main/java/org/apache/datasketches/kll/KllItemsHelper.java +++ b/src/main/java/org/apache/datasketches/kll/KllItemsHelper.java @@ -346,6 +346,7 @@ final class KllItemsHelper { /** * Compression algorithm used to merge higher levels. + * * <p>Here is what we do for each level:</p> * <ul><li>If it does not need to be compacted, then simply copy it over.</li> * <li>Otherwise, it does need to be compacted, so... diff --git a/src/main/java/org/apache/datasketches/kll/KllLongsHelper.java b/src/main/java/org/apache/datasketches/kll/KllLongsHelper.java index ec67b55d..04fe2cc0 100644 --- a/src/main/java/org/apache/datasketches/kll/KllLongsHelper.java +++ b/src/main/java/org/apache/datasketches/kll/KllLongsHelper.java @@ -312,6 +312,7 @@ final class KllLongsHelper { /** * Compression algorithm used to merge higher levels. + * * <p>Here is what we do for each level:</p> * <ul><li>If it does not need to be compacted, then simply copy it over.</li> * <li>Otherwise, it does need to be compacted, so... diff --git a/src/main/java/org/apache/datasketches/kll/KllLongsSketch.java b/src/main/java/org/apache/datasketches/kll/KllLongsSketch.java index 0f6fa7d8..f5688ad7 100644 --- a/src/main/java/org/apache/datasketches/kll/KllLongsSketch.java +++ b/src/main/java/org/apache/datasketches/kll/KllLongsSketch.java @@ -278,6 +278,7 @@ public abstract class KllLongsSketch extends KllSketch implements QuantilesLongs /** * {@inheritDoc} + * * <p>The parameter <i>k</i> will not change.</p> */ @Override diff --git a/src/main/java/org/apache/datasketches/quantiles/DoublesSketch.java b/src/main/java/org/apache/datasketches/quantiles/DoublesSketch.java index 03f84116..f75d3dfe 100644 --- a/src/main/java/org/apache/datasketches/quantiles/DoublesSketch.java +++ b/src/main/java/org/apache/datasketches/quantiles/DoublesSketch.java @@ -506,6 +506,7 @@ public abstract class DoublesSketch implements QuantilesDoublesAPI { /** * {@inheritDoc} + * * <p>The parameter <i>k</i> will not change.</p> */ @Override diff --git a/src/main/java/org/apache/datasketches/quantiles/package-info.java b/src/main/java/org/apache/datasketches/quantiles/package-info.java index c44e2e31..8767757b 100644 --- a/src/main/java/org/apache/datasketches/quantiles/package-info.java +++ b/src/main/java/org/apache/datasketches/quantiles/package-info.java @@ -18,9 +18,8 @@ */ /** - * <p>The quantiles package contains stochastic streaming algorithms that enable single-pass + * The quantiles package contains stochastic streaming algorithms that enable single-pass * analysis of the distribution of a stream of quantiles. - * </p> * * @see org.apache.datasketches.quantiles.DoublesSketch * @see org.apache.datasketches.quantiles.ItemsSketch diff --git a/src/main/java/org/apache/datasketches/quantilescommon/DoublesSortedView.java b/src/main/java/org/apache/datasketches/quantilescommon/DoublesSortedView.java index 98616661..1427f627 100644 --- a/src/main/java/org/apache/datasketches/quantilescommon/DoublesSortedView.java +++ b/src/main/java/org/apache/datasketches/quantilescommon/DoublesSortedView.java @@ -38,7 +38,7 @@ public interface DoublesSortedView extends SortedView { * @param splitPoints an array of <i>m</i> unique, monotonically increasing items * (of the same type as the input items) * that divide the item input domain into <i>m+1</i> overlapping intervals. - * + * <blockquote> * <p>The start of each interval is below the lowest item retained by the sketch * corresponding to a zero rank or zero probability, and the end of the interval * is the rank or cumulative probability corresponding to the split point.</p> @@ -55,7 +55,7 @@ public interface DoublesSortedView extends SortedView { * </ul> * * <p>It is not recommended to include either the minimum or maximum items of the input stream.</p> - * + * </blockquote> * @param searchCrit the desired search criteria. * @return a discrete CDF array of m+1 double ranks (or cumulative probabilities) on the interval [0.0, 1.0]. * @throws IllegalArgumentException if sketch is empty. @@ -100,7 +100,7 @@ public interface DoublesSortedView extends SortedView { * @param splitPoints an array of <i>m</i> unique, monotonically increasing items * (of the same type as the input items) * that divide the item input domain into <i>m+1</i> consecutive, non-overlapping intervals. - * + * <blockquote> * <p>Each interval except for the end intervals starts with a split point and ends with the next split * point in sequence.</p> * @@ -124,7 +124,7 @@ public interface DoublesSortedView extends SortedView { * </ul> * * <p>It is not recommended to include either the minimum or maximum items of the input stream.</p> - * + * </blockquote> * @param searchCrit the desired search criteria. * @return a PMF array of m+1 probability masses as doubles on the interval [0.0, 1.0]. * @throws IllegalArgumentException if sketch is empty. diff --git a/src/main/java/org/apache/datasketches/quantilescommon/FloatsSortedView.java b/src/main/java/org/apache/datasketches/quantilescommon/FloatsSortedView.java index 0a0c54b5..eec699d9 100644 --- a/src/main/java/org/apache/datasketches/quantilescommon/FloatsSortedView.java +++ b/src/main/java/org/apache/datasketches/quantilescommon/FloatsSortedView.java @@ -38,7 +38,7 @@ public interface FloatsSortedView extends SortedView { * @param splitPoints an array of <i>m</i> unique, monotonically increasing items * (of the same type as the input items) * that divide the item input domain into <i>m+1</i> overlapping intervals. - * + * <blockquote> * <p>The start of each interval is below the lowest item retained by the sketch * corresponding to a zero rank or zero probability, and the end of the interval * is the rank or cumulative probability corresponding to the split point.</p> @@ -55,7 +55,7 @@ public interface FloatsSortedView extends SortedView { * </ul> * * <p>It is not recommended to include either the minimum or maximum items of the input stream.</p> - * + * </blockquote> * @param searchCrit the desired search criteria. * @return a discrete CDF array of m+1 double ranks (or cumulative probabilities) on the interval [0.0, 1.0]. * @throws IllegalArgumentException if sketch is empty. @@ -100,7 +100,7 @@ public interface FloatsSortedView extends SortedView { * @param splitPoints an array of <i>m</i> unique, monotonically increasing items * (of the same type as the input items) * that divide the item input domain into <i>m+1</i> consecutive, non-overlapping intervals. - * + * <blockquote> * <p>Each interval except for the end intervals starts with a split point and ends with the next split * point in sequence.</p> * @@ -124,7 +124,7 @@ public interface FloatsSortedView extends SortedView { * </ul> * * <p>It is not recommended to include either the minimum or maximum items of the input stream.</p> - * + * </blockquote> * @param searchCrit the desired search criteria. * @return a PMF array of m+1 probability masses as doubles on the interval [0.0, 1.0]. * @throws IllegalArgumentException if sketch is empty. diff --git a/src/main/java/org/apache/datasketches/quantilescommon/GenericSortedView.java b/src/main/java/org/apache/datasketches/quantilescommon/GenericSortedView.java index 0d27ce78..f0dc8115 100644 --- a/src/main/java/org/apache/datasketches/quantilescommon/GenericSortedView.java +++ b/src/main/java/org/apache/datasketches/quantilescommon/GenericSortedView.java @@ -47,7 +47,7 @@ public interface GenericSortedView<T> extends PartitioningFeature<T>, SketchPar * @param splitPoints an array of <i>m</i> unique, monotonically increasing items * (of the same type as the input items) * that divide the item input domain into <i>m+1</i> overlapping intervals. - * + * <blockquote> * <p>The start of each interval is below the lowest item retained by the sketch * corresponding to a zero rank or zero probability, and the end of the interval * is the rank or cumulative probability corresponding to the split point.</p> @@ -64,7 +64,7 @@ public interface GenericSortedView<T> extends PartitioningFeature<T>, SketchPar * </ul> * * <p>It is not recommended to include either the minimum or maximum items of the input stream.</p> - * + * </blockquote> * @param searchCrit the desired search criteria. * @return a discrete CDF array of m+1 double ranks (or cumulative probabilities) on the interval [0.0, 1.0]. * @throws IllegalArgumentException if sketch is empty. @@ -116,7 +116,7 @@ public interface GenericSortedView<T> extends PartitioningFeature<T>, SketchPar * @param splitPoints an array of <i>m</i> unique, monotonically increasing items * (of the same type as the input items) * that divide the item input domain into <i>m+1</i> consecutive, non-overlapping intervals. - * + * <blockquote> * <p>Each interval except for the end intervals starts with a split point and ends with the next split * point in sequence.</p> * @@ -140,7 +140,7 @@ public interface GenericSortedView<T> extends PartitioningFeature<T>, SketchPar * </ul> * * <p>It is not recommended to include either the minimum or maximum items of the input stream.</p> - * + * </blockquote> * @param searchCrit the desired search criteria. * @return a PMF array of m+1 probability masses as doubles on the interval [0.0, 1.0]. * @throws IllegalArgumentException if sketch is empty. diff --git a/src/main/java/org/apache/datasketches/quantilescommon/LongsSortedView.java b/src/main/java/org/apache/datasketches/quantilescommon/LongsSortedView.java index 4823edd1..e7e3521c 100644 --- a/src/main/java/org/apache/datasketches/quantilescommon/LongsSortedView.java +++ b/src/main/java/org/apache/datasketches/quantilescommon/LongsSortedView.java @@ -38,7 +38,7 @@ public interface LongsSortedView extends SortedView { * @param splitPoints an array of <i>m</i> unique, monotonically increasing items * (of the same type as the input items) * that divide the item input domain into <i>m+1</i> overlapping intervals. - * + * <blockquote> * <p>The start of each interval is below the lowest item retained by the sketch * corresponding to a zero rank or zero probability, and the end of the interval * is the rank or cumulative probability corresponding to the split point.</p> @@ -55,7 +55,7 @@ public interface LongsSortedView extends SortedView { * </ul> * * <p>It is not recommended to include either the minimum or maximum items of the input stream.</p> - * + * </blockquote> * @param searchCrit the desired search criteria. * @return a discrete CDF array of m+1 double ranks (or cumulative probabilities) on the interval [0.0, 1.0]. * @throws IllegalArgumentException if sketch is empty. @@ -100,7 +100,7 @@ public interface LongsSortedView extends SortedView { * @param splitPoints an array of <i>m</i> unique, monotonically increasing items * (of the same type as the input items) * that divide the item input domain into <i>m+1</i> consecutive, non-overlapping intervals. - * + * <blockquote> * <p>Each interval except for the end intervals starts with a split point and ends with the next split * point in sequence.</p> * @@ -124,7 +124,7 @@ public interface LongsSortedView extends SortedView { * </ul> * * <p>It is not recommended to include either the minimum or maximum items of the input stream.</p> - * + * </blockquote> * @param searchCrit the desired search criteria. * @return a PMF array of m+1 probability masses as doubles on the interval [0.0, 1.0]. * @throws IllegalArgumentException if sketch is empty. diff --git a/src/main/java/org/apache/datasketches/quantilescommon/QuantilesAPI.java b/src/main/java/org/apache/datasketches/quantilescommon/QuantilesAPI.java index 9943fefe..a7af4cf7 100644 --- a/src/main/java/org/apache/datasketches/quantilescommon/QuantilesAPI.java +++ b/src/main/java/org/apache/datasketches/quantilescommon/QuantilesAPI.java @@ -20,12 +20,12 @@ package org.apache.datasketches.quantilescommon; /** - * <p>This is a stochastic streaming sketch that enables near-real time analysis of the + * This is a stochastic streaming sketch that enables near-real time analysis of the * approximate distribution of items from a very large stream in a single pass, requiring only * that the items are comparable. * The analysis is obtained using the <i>getQuantile()</i> function or the * inverse functions getRank(), getPMF() (the Probability Mass Function), and getCDF() - * (the Cumulative Distribution Function).</p> + * (the Cumulative Distribution Function). * * <p>Given an input stream of <i>N</i> items, the <i>natural rank</i> of any specific * item is defined as its index <i>(1 to N)</i> in the hypothetical sorted stream of all diff --git a/src/main/java/org/apache/datasketches/quantilescommon/QuantilesDoublesAPI.java b/src/main/java/org/apache/datasketches/quantilescommon/QuantilesDoublesAPI.java index e8e5310f..65952819 100644 --- a/src/main/java/org/apache/datasketches/quantilescommon/QuantilesDoublesAPI.java +++ b/src/main/java/org/apache/datasketches/quantilescommon/QuantilesDoublesAPI.java @@ -21,6 +21,8 @@ package org.apache.datasketches.quantilescommon; import static org.apache.datasketches.quantilescommon.QuantileSearchCriteria.INCLUSIVE; +import org.apache.datasketches.common.SketchesArgumentException; + /** * The Quantiles API for item type <i>double</i>. * @see QuantilesAPI @@ -33,7 +35,7 @@ public interface QuantilesDoublesAPI extends QuantilesAPI { * This is equivalent to {@link #getCDF(double[], QuantileSearchCriteria) getCDF(splitPoints, INCLUSIVE)} * @param splitPoints an array of <i>m</i> unique, monotonically increasing items. * @return a discrete CDF array of m+1 double ranks (or cumulative probabilities) on the interval [0.0, 1.0]. - * @throws IllegalArgumentException if sketch is empty. + * @throws SketchesArgumentException if sketch is empty. */ default double[] getCDF(double[] splitPoints) { return getCDF(splitPoints, INCLUSIVE); @@ -50,7 +52,7 @@ public interface QuantilesDoublesAPI extends QuantilesAPI { * @param splitPoints an array of <i>m</i> unique, monotonically increasing items * (of the same type as the input items) * that divide the item input domain into <i>m+1</i> overlapping intervals. - * + * <blockquote> * <p>The start of each interval is below the lowest item retained by the sketch * corresponding to a zero rank or zero probability, and the end of the interval * is the rank or cumulative probability corresponding to the split point.</p> @@ -67,10 +69,10 @@ public interface QuantilesDoublesAPI extends QuantilesAPI { * </ul> * * <p>It is not recommended to include either the minimum or maximum items of the input stream.</p> - * + * </blockquote> * @param searchCrit the desired search criteria. * @return a discrete CDF array of m+1 double ranks (or cumulative probabilities) on the interval [0.0, 1.0]. - * @throws IllegalArgumentException if sketch is empty. + * @throws SketchesArgumentException if sketch is empty. */ double[] getCDF(double[] splitPoints, QuantileSearchCriteria searchCrit); @@ -79,7 +81,7 @@ public interface QuantilesDoublesAPI extends QuantilesAPI { * item returned by <i>getQuantile(1.0)</i>. * * @return the maximum item of the stream - * @throws IllegalArgumentException if sketch is empty. + * @throws SketchesArgumentException if sketch is empty. */ double getMaxItem(); @@ -88,7 +90,7 @@ public interface QuantilesDoublesAPI extends QuantilesAPI { * item returned by <i>getQuantile(0.0)</i>. * * @return the minimum item of the stream - * @throws IllegalArgumentException if sketch is empty. + * @throws SketchesArgumentException if sketch is empty. */ double getMinItem(); @@ -96,7 +98,7 @@ public interface QuantilesDoublesAPI extends QuantilesAPI { * This is equivalent to {@link #getPMF(double[], QuantileSearchCriteria) getPMF(splitPoints, INCLUSIVE)} * @param splitPoints an array of <i>m</i> unique, monotonically increasing items. * @return a PMF array of m+1 probability masses as doubles on the interval [0.0, 1.0]. - * @throws IllegalArgumentException if sketch is empty. + * @throws SketchesArgumentException if sketch is empty. */ default double[] getPMF(double[] splitPoints) { return getPMF(splitPoints, INCLUSIVE); @@ -113,7 +115,7 @@ public interface QuantilesDoublesAPI extends QuantilesAPI { * @param splitPoints an array of <i>m</i> unique, monotonically increasing items * (of the same type as the input items) * that divide the item input domain into <i>m+1</i> consecutive, non-overlapping intervals. - * + * <blockquote> * <p>Each interval except for the end intervals starts with a split point and ends with the next split * point in sequence.</p> * @@ -137,10 +139,10 @@ public interface QuantilesDoublesAPI extends QuantilesAPI { * </ul> * * <p>It is not recommended to include either the minimum or maximum items of the input stream.</p> - * + * </blockquote> * @param searchCrit the desired search criteria. * @return a PMF array of m+1 probability masses as doubles on the interval [0.0, 1.0]. - * @throws IllegalArgumentException if sketch is empty. + * @throws SketchesArgumentException if sketch is empty. */ double[] getPMF(double[] splitPoints, QuantileSearchCriteria searchCrit); @@ -148,7 +150,7 @@ public interface QuantilesDoublesAPI extends QuantilesAPI { * This is equivalent to {@link #getQuantile(double, QuantileSearchCriteria) getQuantile(rank, INCLUSIVE)} * @param rank the given normalized rank, a double in the range [0.0, 1.0]. * @return the approximate quantile given the normalized rank. - * @throws IllegalArgumentException if sketch is empty. + * @throws SketchesArgumentException if sketch is empty. */ default double getQuantile(double rank) { return getQuantile(rank, INCLUSIVE); @@ -163,7 +165,7 @@ public interface QuantilesDoublesAPI extends QuantilesAPI { * If EXCLUSIVE, he given rank includes all quantiles < * the quantile directly corresponding to the given rank. * @return the approximate quantile given the normalized rank. - * @throws IllegalArgumentException if sketch is empty. + * @throws SketchesArgumentException if sketch is empty. * @see org.apache.datasketches.quantilescommon.QuantileSearchCriteria */ double getQuantile(double rank, QuantileSearchCriteria searchCrit); @@ -180,7 +182,7 @@ public interface QuantilesDoublesAPI extends QuantilesAPI { * @param rank the given normalized rank * @return the lower bound of the quantile confidence interval in which the quantile of the * given rank exists. - * @throws IllegalArgumentException if sketch is empty. + * @throws SketchesArgumentException if sketch is empty. */ double getQuantileLowerBound(double rank); @@ -196,7 +198,7 @@ public interface QuantilesDoublesAPI extends QuantilesAPI { * @param rank the given normalized rank * @return the upper bound of the quantile confidence interval in which the true quantile of the * given rank exists. - * @throws IllegalArgumentException if sketch is empty. + * @throws SketchesArgumentException if sketch is empty. */ double getQuantileUpperBound(double rank); @@ -205,7 +207,7 @@ public interface QuantilesDoublesAPI extends QuantilesAPI { * @param ranks the given array of normalized ranks, each of which must be * in the interval [0.0,1.0]. * @return an array of quantiles corresponding to the given array of normalized ranks. - * @throws IllegalArgumentException if sketch is empty. + * @throws SketchesArgumentException if sketch is empty. */ default double[] getQuantiles(double[] ranks) { return getQuantiles(ranks, INCLUSIVE); @@ -219,7 +221,7 @@ public interface QuantilesDoublesAPI extends QuantilesAPI { * @param searchCrit if INCLUSIVE, the given ranks include all quantiles ≤ * the quantile directly corresponding to each rank. * @return an array of quantiles corresponding to the given array of normalized ranks. - * @throws IllegalArgumentException if sketch is empty. + * @throws SketchesArgumentException if sketch is empty. * @see org.apache.datasketches.quantilescommon.QuantileSearchCriteria */ double[] getQuantiles(double[] ranks, QuantileSearchCriteria searchCrit); @@ -228,7 +230,7 @@ public interface QuantilesDoublesAPI extends QuantilesAPI { * This is equivalent to {@link #getRank(double, QuantileSearchCriteria) getRank(quantile, INCLUSIVE)} * @param quantile the given quantile * @return the normalized rank corresponding to the given quantile - * @throws IllegalArgumentException if sketch is empty. + * @throws SketchesArgumentException if sketch is empty. */ default double getRank(double quantile) { return getRank(quantile, INCLUSIVE); @@ -240,7 +242,7 @@ public interface QuantilesDoublesAPI extends QuantilesAPI { * @param quantile the given quantile * @param searchCrit if INCLUSIVE the given quantile is included into the rank. * @return the normalized rank corresponding to the given quantile - * @throws IllegalArgumentException if sketch is empty. + * @throws SketchesArgumentException if sketch is empty. * @see org.apache.datasketches.quantilescommon.QuantileSearchCriteria */ double getRank(double quantile, QuantileSearchCriteria searchCrit); @@ -249,7 +251,7 @@ public interface QuantilesDoublesAPI extends QuantilesAPI { * This is equivalent to {@link #getRanks(double[], QuantileSearchCriteria) getRanks(quantiles, INCLUSIVE)} * @param quantiles the given array of quantiles * @return an array of normalized ranks corresponding to the given array of quantiles. - * @throws IllegalArgumentException if sketch is empty. + * @throws SketchesArgumentException if sketch is empty. */ default double[] getRanks(double[] quantiles) { return getRanks(quantiles, INCLUSIVE); @@ -262,7 +264,7 @@ public interface QuantilesDoublesAPI extends QuantilesAPI { * @param quantiles the given array of quantiles * @param searchCrit if INCLUSIVE, the given quantiles include the rank directly corresponding to each quantile. * @return an array of normalized ranks corresponding to the given array of quantiles. - * @throws IllegalArgumentException if sketch is empty. + * @throws SketchesArgumentException if sketch is empty. * @see org.apache.datasketches.quantilescommon.QuantileSearchCriteria */ double[] getRanks(double[] quantiles, QuantileSearchCriteria searchCrit); diff --git a/src/main/java/org/apache/datasketches/quantilescommon/QuantilesFloatsAPI.java b/src/main/java/org/apache/datasketches/quantilescommon/QuantilesFloatsAPI.java index 98678044..8b8a91bd 100644 --- a/src/main/java/org/apache/datasketches/quantilescommon/QuantilesFloatsAPI.java +++ b/src/main/java/org/apache/datasketches/quantilescommon/QuantilesFloatsAPI.java @@ -49,7 +49,7 @@ public interface QuantilesFloatsAPI extends QuantilesAPI { * @param splitPoints an array of <i>m</i> unique, monotonically increasing items * (of the same type as the input items) * that divide the item input domain into <i>m+1</i> overlapping intervals. - * + * <blockquote> * <p>The start of each interval is below the lowest item retained by the sketch * corresponding to a zero rank or zero probability, and the end of the interval * is the rank or cumulative probability corresponding to the split point.</p> @@ -66,7 +66,7 @@ public interface QuantilesFloatsAPI extends QuantilesAPI { * </ul> * * <p>It is not recommended to include either the minimum or maximum items of the input stream.</p> - * + * </blockquote> * @param searchCrit the desired search criteria. * @return a discrete CDF array of m+1 double ranks (or cumulative probabilities) on the interval [0.0, 1.0]. * @throws IllegalArgumentException if sketch is empty. @@ -112,7 +112,7 @@ public interface QuantilesFloatsAPI extends QuantilesAPI { * @param splitPoints an array of <i>m</i> unique, monotonically increasing items * (of the same type as the input items) * that divide the item input domain into <i>m+1</i> consecutive, non-overlapping intervals. - * + * <blockquote> * <p>Each interval except for the end intervals starts with a split point and ends with the next split * point in sequence.</p> * @@ -136,7 +136,7 @@ public interface QuantilesFloatsAPI extends QuantilesAPI { * </ul> * * <p>It is not recommended to include either the minimum or maximum items of the input stream.</p> - * + * </blockquote> * @param searchCrit the desired search criteria. * @return a PMF array of m+1 probability masses as doubles on the interval [0.0, 1.0]. * @throws IllegalArgumentException if sketch is empty. diff --git a/src/main/java/org/apache/datasketches/quantilescommon/QuantilesGenericAPI.java b/src/main/java/org/apache/datasketches/quantilescommon/QuantilesGenericAPI.java index 459e58cd..bc088128 100644 --- a/src/main/java/org/apache/datasketches/quantilescommon/QuantilesGenericAPI.java +++ b/src/main/java/org/apache/datasketches/quantilescommon/QuantilesGenericAPI.java @@ -53,7 +53,7 @@ public interface QuantilesGenericAPI<T> extends QuantilesAPI, PartitioningFeatur * @param splitPoints an array of <i>m</i> unique, monotonically increasing items * (of the same type as the input items) * that divide the item input domain into <i>m+1</i> overlapping intervals. - * + * <blockquote> * <p>The start of each interval is below the lowest item retained by the sketch * corresponding to a zero rank or zero probability, and the end of the interval * is the rank or cumulative probability corresponding to the split point.</p> @@ -70,7 +70,7 @@ public interface QuantilesGenericAPI<T> extends QuantilesAPI, PartitioningFeatur * </ul> * * <p>It is not recommended to include either the minimum or maximum items of the input stream.</p> - * + * </blockquote> * @param searchCrit the desired search criteria. * @return a discrete CDF array of m+1 double ranks (or cumulative probabilities) on the interval [0.0, 1.0]. * @throws IllegalArgumentException if sketch is empty. @@ -132,7 +132,7 @@ public interface QuantilesGenericAPI<T> extends QuantilesAPI, PartitioningFeatur * @param splitPoints an array of <i>m</i> unique, monotonically increasing items * (of the same type as the input items) * that divide the item input domain into <i>m+1</i> consecutive, non-overlapping intervals. - * + * <blockquote> * <p>Each interval except for the end intervals starts with a split point and ends with the next split * point in sequence.</p> * @@ -156,7 +156,7 @@ public interface QuantilesGenericAPI<T> extends QuantilesAPI, PartitioningFeatur * </ul> * * <p>It is not recommended to include either the minimum or maximum items of the input stream.</p> - * + * </blockquote> * @param searchCrit the desired search criteria. * @return a PMF array of m+1 probability masses as doubles on the interval [0.0, 1.0]. * @throws IllegalArgumentException if sketch is empty. diff --git a/src/main/java/org/apache/datasketches/quantilescommon/QuantilesLongsAPI.java b/src/main/java/org/apache/datasketches/quantilescommon/QuantilesLongsAPI.java index 2b542a68..fb1ca581 100644 --- a/src/main/java/org/apache/datasketches/quantilescommon/QuantilesLongsAPI.java +++ b/src/main/java/org/apache/datasketches/quantilescommon/QuantilesLongsAPI.java @@ -50,7 +50,7 @@ public interface QuantilesLongsAPI extends QuantilesAPI { * @param splitPoints an array of <i>m</i> unique, monotonically increasing items * (of the same type as the input items) * that divide the item input domain into <i>m+1</i> overlapping intervals. - * + * <blockquote> * <p>The start of each interval is below the lowest item retained by the sketch * corresponding to a zero rank or zero probability, and the end of the interval * is the rank or cumulative probability corresponding to the split point.</p> @@ -67,7 +67,7 @@ public interface QuantilesLongsAPI extends QuantilesAPI { * </ul> * * <p>It is not recommended to include either the minimum or maximum items of the input stream.</p> - * + * </blockquote> * @param searchCrit the desired search criteria. * @return a discrete CDF array of m+1 double ranks (or cumulative probabilities) on the interval [0.0, 1.0]. * @throws IllegalArgumentException if sketch is empty. @@ -113,7 +113,7 @@ public interface QuantilesLongsAPI extends QuantilesAPI { * @param splitPoints an array of <i>m</i> unique, monotonically increasing items * (of the same type as the input items) * that divide the item input domain into <i>m+1</i> consecutive, non-overlapping intervals. - * + * <blockquote> * <p>Each interval except for the end intervals starts with a split point and ends with the next split * point in sequence.</p> * @@ -137,7 +137,7 @@ public interface QuantilesLongsAPI extends QuantilesAPI { * </ul> * * <p>It is not recommended to include either the minimum or maximum items of the input stream.</p> - * + * </blockquote> * @param searchCrit the desired search criteria. * @return a PMF array of m+1 probability masses as doubles on the interval [0.0, 1.0]. * @throws IllegalArgumentException if sketch is empty. diff --git a/src/main/java/org/apache/datasketches/req/BaseReqSketch.java b/src/main/java/org/apache/datasketches/req/BaseReqSketch.java index 2460308f..99de621d 100644 --- a/src/main/java/org/apache/datasketches/req/BaseReqSketch.java +++ b/src/main/java/org/apache/datasketches/req/BaseReqSketch.java @@ -160,6 +160,7 @@ abstract class BaseReqSketch implements QuantilesFloatsAPI { /** * {@inheritDoc} + * * <p>The parameters k, highRankAccuracy, and reqDebug will not change.</p> */ @Override diff --git a/src/main/java/org/apache/datasketches/req/ReqSerDe.java b/src/main/java/org/apache/datasketches/req/ReqSerDe.java index 52b1371a..952749de 100644 --- a/src/main/java/org/apache/datasketches/req/ReqSerDe.java +++ b/src/main/java/org/apache/datasketches/req/ReqSerDe.java @@ -110,7 +110,8 @@ import org.apache.datasketches.memory.WritableMemory; * 0 || (empty)| 0 | K | Flags |FamID=17| SerVer | PreInts = 2 | * </pre> * <pre> - * <p>Flags:</p> + * + * <b>Flags:</b> * Bit 0 : Endianness, reserved * Bit 1 : ReadOnly, reserved * Bit 2 : Empty diff --git a/src/main/java/org/apache/datasketches/sampling/ReservoirLongsUnion.java b/src/main/java/org/apache/datasketches/sampling/ReservoirLongsUnion.java index c3ef3395..b58317a9 100644 --- a/src/main/java/org/apache/datasketches/sampling/ReservoirLongsUnion.java +++ b/src/main/java/org/apache/datasketches/sampling/ReservoirLongsUnion.java @@ -132,10 +132,9 @@ public final class ReservoirLongsUnion { /** * Union the given sketch. - * <p> - * This method can be repeatedly called. If the given sketch is null it is interpreted as an empty - * sketch. - * </p> + * + * <p>This method can be repeatedly called. If the given sketch is null it is interpreted as an empty + * sketch.</p> * * @param sketchIn The incoming sketch. */ diff --git a/src/main/java/org/apache/datasketches/sampling/ReservoirSize.java b/src/main/java/org/apache/datasketches/sampling/ReservoirSize.java index e6f36195..e12d31aa 100644 --- a/src/main/java/org/apache/datasketches/sampling/ReservoirSize.java +++ b/src/main/java/org/apache/datasketches/sampling/ReservoirSize.java @@ -25,6 +25,7 @@ import org.apache.datasketches.common.Util; /** * This class provides a compact representation of reservoir size by encoding it into a * fixed-point 16-bit value. + * * <p>The value itself is a fractional power of 2, with 5 bits of exponent and 11 bits of * mantissa. The exponent allows a choice of anywhere from 0-30, and there are 2048 possible * reservoir size values within each octave. Because reservoir size must be an integer, this diff --git a/src/main/java/org/apache/datasketches/sampling/package-info.java b/src/main/java/org/apache/datasketches/sampling/package-info.java index edfaa20a..bbe44691 100644 --- a/src/main/java/org/apache/datasketches/sampling/package-info.java +++ b/src/main/java/org/apache/datasketches/sampling/package-info.java @@ -18,8 +18,8 @@ */ /** - * <p>This package is dedicated to streaming algorithms that enable fixed size, uniform sampling of - * weighted and unweighted items from a stream.</p> + * This package is dedicated to streaming algorithms that enable fixed size, uniform sampling of + * weighted and unweighted items from a stream. * * <p>These sketches are mergeable and can be serialized and deserialized to/from a compact * form.</p> diff --git a/src/main/java/org/apache/datasketches/tdigest/TDigestDouble.java b/src/main/java/org/apache/datasketches/tdigest/TDigestDouble.java index 1e340851..478b8012 100644 --- a/src/main/java/org/apache/datasketches/tdigest/TDigestDouble.java +++ b/src/main/java/org/apache/datasketches/tdigest/TDigestDouble.java @@ -32,6 +32,7 @@ import org.apache.datasketches.memory.Memory; import org.apache.datasketches.memory.WritableBuffer; import org.apache.datasketches.memory.WritableMemory; import org.apache.datasketches.quantilescommon.QuantilesAPI; +import org.apache.datasketches.quantilescommon.QuantilesUtil; /** * t-Digest for estimating quantiles and ranks. @@ -125,6 +126,7 @@ public final class TDigestDouble { /** * Process buffered values and merge centroids if needed */ + // this method will become private in the next major version public void compress() { if (numBuffered_ == 0) { return; } final int num = numBuffered_ + numCentroids_; @@ -277,6 +279,51 @@ public final class TDigestDouble { return weightedAverage(centroidWeights_[numCentroids_ - 1], w1, maxValue_, w2); } + /** + * Returns an approximation to the Probability Mass Function (PMF) of the input stream + * given a set of split points. + * + * @param splitPoints an array of <i>m</i> unique, monotonically increasing values + * that divide the input domain into <i>m+1</i> consecutive disjoint intervals (bins). + * + * @return an array of m+1 doubles each of which is an approximation + * to the fraction of the input stream values (the mass) that fall into one of those intervals. + * @throws SketchesStateException if sketch is empty. + */ + public double[] getPMF(final double[] splitPoints) { + final double[] buckets = getCDF(splitPoints); + for (int i = buckets.length; i-- > 1; ) { + buckets[i] -= buckets[i - 1]; + } + return buckets; + } + + /** + * Returns an approximation to the Cumulative Distribution Function (CDF), which is the + * cumulative analog of the PMF, of the input stream given a set of split points. + * + * @param splitPoints an array of <i>m</i> unique, monotonically increasing values + * that divide the input domain into <i>m+1</i> consecutive disjoint intervals. + * + * @return an array of m+1 doubles, which are a consecutive approximation to the CDF + * of the input stream given the splitPoints. The value at array position j of the returned + * CDF array is the sum of the returned values in positions 0 through j of the returned PMF + * array. This can be viewed as array of ranks of the given split points plus one more value + * that is always 1. + * @throws SketchesStateException if sketch is empty. + */ + public double[] getCDF(final double[] splitPoints) { + if (isEmpty()) { throw new SketchesStateException(QuantilesAPI.EMPTY_MSG); } + QuantilesUtil.checkDoublesSplitPointsOrder(splitPoints); + final int len = splitPoints.length + 1; + final double[] ranks = new double[len]; + for (int i = 0; i < len - 1; i++) { + ranks[i] = getRank(splitPoints[i]); + } + ranks[len - 1] = 1.0; + return ranks; + } + /** * Computes size needed to serialize the current state. * @return size in bytes needed to serialize this tdigest diff --git a/src/main/java/org/apache/datasketches/theta/PreambleUtil.java b/src/main/java/org/apache/datasketches/theta/PreambleUtil.java index 562be982..e1d9262e 100644 --- a/src/main/java/org/apache/datasketches/theta/PreambleUtil.java +++ b/src/main/java/org/apache/datasketches/theta/PreambleUtil.java @@ -37,6 +37,7 @@ import org.apache.datasketches.thetacommon.ThetaUtil; /** * This class defines the preamble data structure and provides basic utilities for some of the key * fields. + * * <p>The intent of the design of this class was to isolate the detailed knowledge of the bit and * byte layout of the serialized form of the sketches derived from the Sketch class into one place. * This allows the possibility of the introduction of different serialization @@ -126,7 +127,7 @@ import org.apache.datasketches.thetacommon.ThetaUtil; * 3 ||----------------------Start of Hash Table of longs---------------------------------| * </pre> * - * <p> Union objects require 32 bytes of preamble plus a non-compact array of longs representing a + * <p>Union objects require 32 bytes of preamble plus a non-compact array of longs representing a * hash table.</p> * * <pre> diff --git a/src/main/java/org/apache/datasketches/theta/Sketch.java b/src/main/java/org/apache/datasketches/theta/Sketch.java index 05d6bbb2..89618bc2 100644 --- a/src/main/java/org/apache/datasketches/theta/Sketch.java +++ b/src/main/java/org/apache/datasketches/theta/Sketch.java @@ -309,7 +309,7 @@ public abstract class Sketch implements MemoryStatus { * log_base2 of the number of nominal entries, which is a power of 2. * @param lgNomEntries <a href="{@docRoot}/resources/dictionary.html#nomEntries">Nominal Entries</a> * @return the maximum number of storage bytes required for a CompactSketch with the given - * nomEntries. + * lgNomEntries. */ public static int getCompactSketchMaxBytes(final int lgNomEntries) { return (int)((2 << lgNomEntries) * ThetaUtil.REBUILD_THRESHOLD diff --git a/src/main/java/org/apache/datasketches/theta/Sketches.java b/src/main/java/org/apache/datasketches/theta/Sketches.java index dc20072d..2e7fa091 100644 --- a/src/main/java/org/apache/datasketches/theta/Sketches.java +++ b/src/main/java/org/apache/datasketches/theta/Sketches.java @@ -91,14 +91,14 @@ public final class Sketches { /** * Returns the maximum number of storage bytes required for a CompactSketch given the configured - * number of nominal entries (power of 2). - * @param nomEntries <a href="{@docRoot}/resources/dictionary.html#nomEntries">Nominal Entries</a> + * log_base2 of the number of nominal entries, which is a power of 2. + * @param lgNomEntries <a href="{@docRoot}/resources/dictionary.html#nomEntries">Nominal Entries</a> * @return the maximum number of storage bytes required for a CompactSketch with the given - * nomEntries. + * lgNomEntries. * @see Sketch#getCompactSketchMaxBytes(int) */ - public static int getCompactSketchMaxBytes(final int nomEntries) { - return Sketch.getCompactSketchMaxBytes(nomEntries); + public static int getCompactSketchMaxBytes(final int lgNomEntries) { + return Sketch.getCompactSketchMaxBytes(lgNomEntries); } /** diff --git a/src/main/java/org/apache/datasketches/theta/UnionImpl.java b/src/main/java/org/apache/datasketches/theta/UnionImpl.java index be298080..bac05de7 100644 --- a/src/main/java/org/apache/datasketches/theta/UnionImpl.java +++ b/src/main/java/org/apache/datasketches/theta/UnionImpl.java @@ -333,7 +333,7 @@ final class UnionImpl extends Union { if (sketchIn.isOrdered() && (sketchIn instanceof CompactSketch)) { //Use early stop //Ordered, thus compact if (sketchIn.hasMemory()) { - final Memory skMem = ((CompactSketch) sketchIn).getMemory(); + final Memory skMem = sketchIn.getMemory(); final int preambleLongs = skMem.getByte(PREAMBLE_LONGS_BYTE) & 0X3F; for (int i = 0; i < curCountIn; i++ ) { final int offsetBytes = preambleLongs + i << 3; diff --git a/src/main/java/org/apache/datasketches/theta/UpdateSketch.java b/src/main/java/org/apache/datasketches/theta/UpdateSketch.java index 882c5e2e..cb6854b0 100644 --- a/src/main/java/org/apache/datasketches/theta/UpdateSketch.java +++ b/src/main/java/org/apache/datasketches/theta/UpdateSketch.java @@ -343,6 +343,7 @@ public abstract class UpdateSketch extends Sketch { /** * All potential updates converge here. + * * <p>Don't ever call this unless you really know what you are doing!</p> * * @param hash the given input hash value. A hash of zero or Long.MAX_VALUE is ignored. diff --git a/src/main/java/org/apache/datasketches/tuple/Union.java b/src/main/java/org/apache/datasketches/tuple/Union.java index 653312fa..acefa2ab 100644 --- a/src/main/java/org/apache/datasketches/tuple/Union.java +++ b/src/main/java/org/apache/datasketches/tuple/Union.java @@ -100,8 +100,7 @@ public class Union<S extends Summary> { /** * Performs a stateful union of the internal set with the given tupleSketch. * @param tupleSketch input tuple sketch to merge with the internal set. - * - * <p>Nulls and empty sketches are ignored.</p> + * Nulls and empty sketches are ignored. */ public void union(final Sketch<S> tupleSketch) { if (tupleSketch == null || tupleSketch.isEmpty()) { return; } diff --git a/src/main/java/org/apache/datasketches/tuple/adouble/DoubleSummary.java b/src/main/java/org/apache/datasketches/tuple/adouble/DoubleSummary.java index e7abae0d..a54c11af 100644 --- a/src/main/java/org/apache/datasketches/tuple/adouble/DoubleSummary.java +++ b/src/main/java/org/apache/datasketches/tuple/adouble/DoubleSummary.java @@ -41,24 +41,28 @@ public final class DoubleSummary implements UpdatableSummary<Double> { /** * The aggregation mode is the summation function. + * * <p>New retained value = previous retained value + incoming value</p> */ Sum, /** * The aggregation mode is the minimum function. + * * <p>New retained value = min(previous retained value, incoming value)</p> */ Min, /** * The aggregation mode is the maximum function. + * * <p>New retained value = max(previous retained value, incoming value)</p> */ Max, /** * The aggregation mode is always one. + * * <p>New retained value = 1.0</p> */ AlwaysOne diff --git a/src/main/java/org/apache/datasketches/tuple/aninteger/IntegerSummary.java b/src/main/java/org/apache/datasketches/tuple/aninteger/IntegerSummary.java index 4c04fa2c..72695355 100644 --- a/src/main/java/org/apache/datasketches/tuple/aninteger/IntegerSummary.java +++ b/src/main/java/org/apache/datasketches/tuple/aninteger/IntegerSummary.java @@ -41,24 +41,28 @@ public class IntegerSummary implements UpdatableSummary<Integer> { /** * The aggregation mode is the summation function. + * * <p>New retained value = previous retained value + incoming value</p> */ Sum, /** * The aggregation mode is the minimum function. + * * <p>New retained value = min(previous retained value, incoming value)</p> */ Min, /** * The aggregation mode is the maximum function. + * * <p>New retained value = max(previous retained value, incoming value)</p> */ Max, /** * The aggregation mode is always one. + * * <p>New retained value = 1</p> */ AlwaysOne diff --git a/src/main/java/org/apache/datasketches/tuple/arrayofdoubles/DirectArrayOfDoublesCompactSketch.java b/src/main/java/org/apache/datasketches/tuple/arrayofdoubles/DirectArrayOfDoublesCompactSketch.java index 91d4eade..52f82714 100644 --- a/src/main/java/org/apache/datasketches/tuple/arrayofdoubles/DirectArrayOfDoublesCompactSketch.java +++ b/src/main/java/org/apache/datasketches/tuple/arrayofdoubles/DirectArrayOfDoublesCompactSketch.java @@ -30,6 +30,7 @@ import org.apache.datasketches.tuple.Util; /** * Direct Compact Sketch of type ArrayOfDoubles. + * * <p>This implementation uses data in a given Memory that is owned and managed by the caller. * This Memory can be off-heap, which if managed properly will greatly reduce the need for * the JVM to perform garbage collection.</p> diff --git a/src/main/java/org/apache/datasketches/tuple/arrayofdoubles/DirectArrayOfDoublesIntersection.java b/src/main/java/org/apache/datasketches/tuple/arrayofdoubles/DirectArrayOfDoublesIntersection.java index 3dd019d7..7c1b1bf0 100644 --- a/src/main/java/org/apache/datasketches/tuple/arrayofdoubles/DirectArrayOfDoublesIntersection.java +++ b/src/main/java/org/apache/datasketches/tuple/arrayofdoubles/DirectArrayOfDoublesIntersection.java @@ -23,6 +23,7 @@ import org.apache.datasketches.memory.WritableMemory; /** * Direct Intersection operation for tuple sketches of type ArrayOfDoubles. + * * <p>This implementation uses data in a given Memory that is owned and managed by the caller. * This Memory can be off-heap, which if managed properly will greatly reduce the need for * the JVM to perform garbage collection.</p> @@ -43,7 +44,7 @@ final class DirectArrayOfDoublesIntersection extends ArrayOfDoublesIntersection } @Override - protected ArrayOfDoublesQuickSelectSketch createSketch(final int nomEntries, final int numValues, + protected ArrayOfDoublesQuickSelectSketch createSketch(final int nomEntries, final int numValues, final long seed) { return new DirectArrayOfDoublesQuickSelectSketch(nomEntries, 0, 1f, numValues, seed, mem_); } diff --git a/src/main/java/org/apache/datasketches/tuple/arrayofdoubles/DirectArrayOfDoublesQuickSelectSketch.java b/src/main/java/org/apache/datasketches/tuple/arrayofdoubles/DirectArrayOfDoublesQuickSelectSketch.java index 1b4e8690..ae1aa3dc 100644 --- a/src/main/java/org/apache/datasketches/tuple/arrayofdoubles/DirectArrayOfDoublesQuickSelectSketch.java +++ b/src/main/java/org/apache/datasketches/tuple/arrayofdoubles/DirectArrayOfDoublesQuickSelectSketch.java @@ -33,6 +33,7 @@ import org.apache.datasketches.tuple.Util; /** * Direct QuickSelect tuple sketch of type ArrayOfDoubles. + * * <p>This implementation uses data in a given Memory that is owned and managed by the caller. * This Memory can be off-heap, which if managed properly will greatly reduce the need for * the JVM to perform garbage collection.</p> diff --git a/src/main/java/org/apache/datasketches/tuple/arrayofdoubles/DirectArrayOfDoublesSketchIterator.java b/src/main/java/org/apache/datasketches/tuple/arrayofdoubles/DirectArrayOfDoublesSketchIterator.java index 15503fc0..dcdab131 100644 --- a/src/main/java/org/apache/datasketches/tuple/arrayofdoubles/DirectArrayOfDoublesSketchIterator.java +++ b/src/main/java/org/apache/datasketches/tuple/arrayofdoubles/DirectArrayOfDoublesSketchIterator.java @@ -23,6 +23,7 @@ import org.apache.datasketches.memory.Memory; /** * Iterator over the off-heap, Direct tuple sketch of type ArrayOfDoubles (compact or hash table). + * * <p>This implementation uses data in a given Memory that is owned and managed by the caller. * This Memory can be off-heap, which if managed properly will greatly reduce the need for * the JVM to perform garbage collection.</p> diff --git a/src/main/java/org/apache/datasketches/tuple/arrayofdoubles/DirectArrayOfDoublesUnion.java b/src/main/java/org/apache/datasketches/tuple/arrayofdoubles/DirectArrayOfDoublesUnion.java index 00310f53..73401963 100644 --- a/src/main/java/org/apache/datasketches/tuple/arrayofdoubles/DirectArrayOfDoublesUnion.java +++ b/src/main/java/org/apache/datasketches/tuple/arrayofdoubles/DirectArrayOfDoublesUnion.java @@ -27,6 +27,7 @@ import org.apache.datasketches.tuple.SerializerDeserializer; /** * Direct Union operation for tuple sketches of type ArrayOfDoubles. + * * <p>This implementation uses data in a given Memory that is owned and managed by the caller. * This Memory can be off-heap, which if managed properly will greatly reduce the need for * the JVM to perform garbage collection.</p> diff --git a/src/test/java/org/apache/datasketches/tdigest/TDigestDoubleTest.java b/src/test/java/org/apache/datasketches/tdigest/TDigestDoubleTest.java index db043cff..55baa83e 100644 --- a/src/test/java/org/apache/datasketches/tdigest/TDigestDoubleTest.java +++ b/src/test/java/org/apache/datasketches/tdigest/TDigestDoubleTest.java @@ -41,6 +41,8 @@ public class TDigestDoubleTest { assertThrows(SketchesStateException.class, () -> td.getMaxValue()); assertThrows(SketchesStateException.class, () -> td.getRank(0)); assertThrows(SketchesStateException.class, () -> td.getQuantile(0.5)); + assertThrows(SketchesStateException.class, () -> td.getPMF(new double[]{0})); + assertThrows(SketchesStateException.class, () -> td.getCDF(new double[]{0})); } @Test @@ -65,9 +67,6 @@ public class TDigestDoubleTest { final TDigestDouble td = new TDigestDouble(); final int n = 10000; for (int i = 0; i < n; i++) td.update(i); -// System.out.println(td.toString(true)); -// td.compress(); -// System.out.println(td.toString(true)); assertFalse(td.isEmpty()); assertEquals(td.getTotalWeight(), n); assertEquals(td.getMinValue(), 0); @@ -82,6 +81,14 @@ public class TDigestDoubleTest { assertEquals(td.getQuantile(0.9), n * 0.9, n * 0.9 * 0.01); assertEquals(td.getQuantile(0.95), n * 0.95, n * 0.95 * 0.01); assertEquals(td.getQuantile(1), n - 1); + final double[] pmf = td.getPMF(new double[] {n / 2}); + assertEquals(pmf.length, 2); + assertEquals(pmf[0], 0.5, 0.0001); + assertEquals(pmf[1], 0.5, 0.0001); + final double[] cdf = td.getCDF(new double[] {n / 2}); + assertEquals(cdf.length, 2); + assertEquals(cdf[0], 0.5, 0.0001); + assertEquals(cdf[1], 1.0); } @Test diff --git a/tools/SketchesCheckstyle.xml b/tools/SketchesCheckstyle.xml index 873a878a..4e2471d9 100644 --- a/tools/SketchesCheckstyle.xml +++ b/tools/SketchesCheckstyle.xml @@ -230,7 +230,7 @@ under the License. <property name="allowedAnnotations" value="Override, Test"/> </module> - <module name="JavadocParagraph"/> + <!-- <module name="JavadocParagraph"/> --> <module name="JavadocTagContinuationIndentation"> <property name="severity" value="ignore"/> --------------------------------------------------------------------- To unsubscribe, e-mail: [email protected] For additional commands, e-mail: [email protected]
