This is an automated email from the ASF dual-hosted git repository. leerho pushed a commit to branch master in repository https://gitbox.apache.org/repos/asf/datasketches-characterization.git
commit 729a693cfdc6d8abe66c20afbbc1e703bdba297d Author: Lee Rhodes <[email protected]> AuthorDate: Mon Jan 27 15:51:57 2025 -0800 Refactoring & clean up. --- .../characterization/DoubleFlipFlopStream.java | 79 +++ .../StreamMaker.java => DoubleStreamMaker.java} | 69 +-- .../characterization/{req => }/FlipFlopStream.java | 16 +- .../characterization/{req => }/StreamMaker.java | 17 +- .../TrueDoubleRanks.java => TrueRanks.java} | 83 ++- .../filters/BaseFilterAccuracyProfile.java | 56 +- .../filters/BaseFilterUpdateSpeedProfile.java | 29 +- .../characterization/filters/BaseSpaceProfile.java | 56 +- .../filters/BloomFilterAccuracyProfile.java | 36 +- .../filters/BloomFilterSpaceProfile.java | 42 +- .../filters/BloomFilterUpdateSpeedProfile.java | 31 +- .../filters/QuotientFilterAccuracyProfile.java | 145 ++--- .../filters/QuotientFilterExpansionProfile.java | 240 +++++---- .../filters/QuotientFilterSpaceProfile.java | 112 ++-- .../filters/QuotientFilterUpdateSpeedProfile.java | 99 ++-- .../characterization/filters/package-info.java | 20 + .../req/ReqSketchAccuracyProfile.java | 12 +- .../req/ReqSketchAccuracyProfile2.java | 16 +- .../characterization/req/TrueFloatRanks.java | 172 ------ .../tdigest/TDigestAccuracyProfile.java | 292 +++++------ .../tdigest/TDigestErrorVsRankProfile.java | 584 ++++++++++----------- .../tdigest/TDigestMergeAccuracyProfile.java | 308 +++++------ .../tdigest/TDigestSpeedProfile.java | 262 ++++----- .../characterization/tdigest/package-info.java | 20 + tools/SketchesCheckstyle.xml | 2 +- 25 files changed, 1481 insertions(+), 1317 deletions(-) diff --git a/src/main/java/org/apache/datasketches/characterization/DoubleFlipFlopStream.java b/src/main/java/org/apache/datasketches/characterization/DoubleFlipFlopStream.java new file mode 100644 index 0000000..9019b5d --- /dev/null +++ b/src/main/java/org/apache/datasketches/characterization/DoubleFlipFlopStream.java @@ -0,0 +1,79 @@ +package org.apache.datasketches.characterization; + +import org.testng.annotations.Test; + +public class DoubleFlipFlopStream { + private double[] arr; + private int v; + private int idx; + private int low; + private int high; + private int lo; + private int hi; + + /** + * Constructor used by TestNG + */ + public DoubleFlipFlopStream() {} + + /** + * Construct an overall sequence of size N + * @param N the length of the sequence and size of the returned array. + * @param offset The lowest value in the sequence. Usually either 0 or 1. + */ + public DoubleFlipFlopStream(final int N, final int offset) { + arr = new double[N]; + idx = 0; + v = offset; + low = 0; + high = N - 1; + lo = low; + hi = high; + } + + /** + * Generates a flip-flop sequence + * @param loReps : low range repeated steps before flip + * @param hiReps : hi range repeated steps before flip + * @param steps maximum number of steps for this sequence + */ + public void flipFlop(final int loReps, final int hiReps, int steps) { + int n = hi - lo + 1; + while (n > 0 && steps > 0) { + int i = loReps; + while (n > 0 && steps > 0 && i > 0) { + arr[idx++] = lo++ + v; + n--; + steps--; + i--; + } + int j = hiReps; + while (n > 0 && steps > 0 && j > 0) { + arr[idx++] = hi-- + v; + n--; + steps--; + j--; + } + } + } + + /** + * @return the populated array + */ + public double[] getArray() { + return arr; + } + + @Test + public void checkFlipFlop() { + final int N = 50; + final DoubleFlipFlopStream ffs = new DoubleFlipFlopStream(N, 1); + ffs.flipFlop(1, 1, 20); //flip-flop + ffs.flipFlop(10, 1, 10);//forward + ffs.flipFlop(0, 10, 10);//reverse + ffs.flipFlop(1, 1, 10); //flip-flop + for (int i = 0; i < N; i++) { println(ffs.arr[i]); } + } + + static void println(final Object o) { System.out.println(o.toString()); } +} diff --git a/src/main/java/org/apache/datasketches/characterization/req/StreamMaker.java b/src/main/java/org/apache/datasketches/characterization/DoubleStreamMaker.java similarity index 63% copy from src/main/java/org/apache/datasketches/characterization/req/StreamMaker.java copy to src/main/java/org/apache/datasketches/characterization/DoubleStreamMaker.java index 01c4990..53892f4 100644 --- a/src/main/java/org/apache/datasketches/characterization/req/StreamMaker.java +++ b/src/main/java/org/apache/datasketches/characterization/DoubleStreamMaker.java @@ -17,34 +17,41 @@ * under the License. */ -package org.apache.datasketches.characterization.req; +package org.apache.datasketches.characterization; -import static org.apache.datasketches.characterization.req.StreamMaker.Pattern.FLIP_FLOP; -import static org.apache.datasketches.characterization.req.StreamMaker.Pattern.RANDOM; -import static org.apache.datasketches.characterization.req.StreamMaker.Pattern.REVERSED; -import static org.apache.datasketches.characterization.req.StreamMaker.Pattern.SORTED; -import static org.apache.datasketches.characterization.req.StreamMaker.Pattern.SQRT; -import static org.apache.datasketches.characterization.req.StreamMaker.Pattern.ZOOM_IN; -import static org.apache.datasketches.characterization.req.StreamMaker.Pattern.ZOOM_OUT; +import static org.apache.datasketches.characterization.DoubleStreamMaker.Pattern.FLIP_FLOP; +import static org.apache.datasketches.characterization.DoubleStreamMaker.Pattern.RANDOM; +import static org.apache.datasketches.characterization.DoubleStreamMaker.Pattern.REVERSED; +import static org.apache.datasketches.characterization.DoubleStreamMaker.Pattern.SORTED; +import static org.apache.datasketches.characterization.DoubleStreamMaker.Pattern.SQRT; +import static org.apache.datasketches.characterization.DoubleStreamMaker.Pattern.ZOOM_IN; +import static org.apache.datasketches.characterization.DoubleStreamMaker.Pattern.ZOOM_OUT; +import static org.apache.datasketches.characterization.StreamMaker.Pattern.FLIP_FLOP; +import static org.apache.datasketches.characterization.StreamMaker.Pattern.RANDOM; +import static org.apache.datasketches.characterization.StreamMaker.Pattern.REVERSED; +import static org.apache.datasketches.characterization.StreamMaker.Pattern.SORTED; +import static org.apache.datasketches.characterization.StreamMaker.Pattern.SQRT; +import static org.apache.datasketches.characterization.StreamMaker.Pattern.ZOOM_IN; +import static org.apache.datasketches.characterization.StreamMaker.Pattern.ZOOM_OUT; -import org.apache.datasketches.characterization.Shuffle; +import org.apache.datasketches.characterization.StreamMaker.Pattern; import org.testng.annotations.Test; /** * @author Lee Rhodes */ -public class StreamMaker { +public class DoubleStreamMaker { static final String LS = System.getProperty("line.separator"); static String TAB = "\t"; public enum Pattern { SORTED, REVERSED, ZOOM_IN, ZOOM_OUT, RANDOM, SQRT, FLIP_FLOP, CLUSTERED, CLUSTERED_ZOOM_IN, ZOOM_IN_SQRT } - - public float min = 0; - public float max = 0; - - public float[] makeStream(final int n, final Pattern pattern, final int offset) { - float[] arr = new float[n]; + + public double min = 0; + public double max = 0; + + public double[] makeStream(final int n, final Pattern pattern, final int offset) { + double[] arr = new double[n]; min = offset; max = n - 1 + offset; switch (pattern) { @@ -109,13 +116,13 @@ public class StreamMaker { break; } case FLIP_FLOP: { - final FlipFlopStream ffs = new FlipFlopStream(n, offset); - ffs.flipFlop(1, 1, n * 2 / 5); + final DoubleFlipFlopStream dffs = new DoubleFlipFlopStream(n, offset); + dffs.flipFlop(1, 1, n * 2 / 5); final int m = n / 5; - ffs.flipFlop(m, 1, m); - ffs.flipFlop(1, m, m); - ffs.flipFlop(1, 1, n); - arr = ffs.getArray(); + dffs.flipFlop(m, 1, m); + dffs.flipFlop(1, m, m); + dffs.flipFlop(1, 1, n); + arr = dffs.getArray(); break; } case CLUSTERED: { @@ -127,9 +134,9 @@ public class StreamMaker { } return arr; } - + public void printStream(final int n, final Pattern order, final int offset) { - final float[] stream = makeStream(n, order, offset); + final double[] stream = makeStream(n, order, offset); println(order + " n:" + n + " offset: " + offset); for (int i = 0; i < stream.length; i++) { //if (i != 0 && i % 21 == 0) { println(""); } @@ -140,18 +147,18 @@ public class StreamMaker { @Test public void checkStreamMaker() { - printStream(100, SORTED, 1); - printStream(100, REVERSED, 1); + printStream(100, Pattern.SORTED, 1); + printStream(100, Pattern.REVERSED, 1); //printStream(100, ZOOM_IN, 0); - printStream(100, ZOOM_IN, 1); + printStream(100, Pattern.ZOOM_IN, 1); //printStream(100, ZOOM_OUT, 0); - printStream(100, ZOOM_OUT, 1); + printStream(100, Pattern.ZOOM_OUT, 1); //printStream(100, RANDOM, 0); - printStream(100, RANDOM, 1); + printStream(100, Pattern.RANDOM, 1); //printStream(100, SQRT, 0); - printStream(100, SQRT, 1); + printStream(100, Pattern.SQRT, 1); //printStream(100, FLIP_FLOP, 0); - printStream(100, FLIP_FLOP, 1); + printStream(100, Pattern.FLIP_FLOP, 1); } static void print(final Object o) { System.out.print(o.toString()); } diff --git a/src/main/java/org/apache/datasketches/characterization/req/FlipFlopStream.java b/src/main/java/org/apache/datasketches/characterization/FlipFlopStream.java similarity index 88% rename from src/main/java/org/apache/datasketches/characterization/req/FlipFlopStream.java rename to src/main/java/org/apache/datasketches/characterization/FlipFlopStream.java index 48de600..ccaa462 100644 --- a/src/main/java/org/apache/datasketches/characterization/req/FlipFlopStream.java +++ b/src/main/java/org/apache/datasketches/characterization/FlipFlopStream.java @@ -17,7 +17,7 @@ * under the License. */ -package org.apache.datasketches.characterization.req; +package org.apache.datasketches.characterization; import org.testng.annotations.Test; @@ -56,8 +56,8 @@ public class FlipFlopStream { /** * Generates a flip-flop sequence - * @param loReps repetitions for the low range - * @param hiReps repetitions for the high range + * @param loReps : low range repeated steps before flip + * @param hiReps : hi range repeated steps before flip * @param steps maximum number of steps for this sequence */ public void flipFlop(final int loReps, final int hiReps, int steps) { @@ -91,12 +91,12 @@ public class FlipFlopStream { public void checkFlipFlop() { final int N = 50; final FlipFlopStream ffs = new FlipFlopStream(N, 1); - ffs.flipFlop(1, 1, 20); - ffs.flipFlop(10, 1, 10); - ffs.flipFlop(1, 10, 10); - ffs.flipFlop(1, 1, 10); + ffs.flipFlop(1, 1, 20); //flip-flop + ffs.flipFlop(10, 1, 10);//forward + ffs.flipFlop(0, 10, 10);//reverse + ffs.flipFlop(1, 1, 10); //flip-flop for (int i = 0; i < N; i++) { println(ffs.arr[i]); } } - + static void println(final Object o) { System.out.println(o.toString()); } } diff --git a/src/main/java/org/apache/datasketches/characterization/req/StreamMaker.java b/src/main/java/org/apache/datasketches/characterization/StreamMaker.java similarity index 86% rename from src/main/java/org/apache/datasketches/characterization/req/StreamMaker.java rename to src/main/java/org/apache/datasketches/characterization/StreamMaker.java index 01c4990..8638231 100644 --- a/src/main/java/org/apache/datasketches/characterization/req/StreamMaker.java +++ b/src/main/java/org/apache/datasketches/characterization/StreamMaker.java @@ -17,17 +17,16 @@ * under the License. */ -package org.apache.datasketches.characterization.req; +package org.apache.datasketches.characterization; -import static org.apache.datasketches.characterization.req.StreamMaker.Pattern.FLIP_FLOP; -import static org.apache.datasketches.characterization.req.StreamMaker.Pattern.RANDOM; -import static org.apache.datasketches.characterization.req.StreamMaker.Pattern.REVERSED; -import static org.apache.datasketches.characterization.req.StreamMaker.Pattern.SORTED; -import static org.apache.datasketches.characterization.req.StreamMaker.Pattern.SQRT; -import static org.apache.datasketches.characterization.req.StreamMaker.Pattern.ZOOM_IN; -import static org.apache.datasketches.characterization.req.StreamMaker.Pattern.ZOOM_OUT; +import static org.apache.datasketches.characterization.StreamMaker.Pattern.FLIP_FLOP; +import static org.apache.datasketches.characterization.StreamMaker.Pattern.RANDOM; +import static org.apache.datasketches.characterization.StreamMaker.Pattern.REVERSED; +import static org.apache.datasketches.characterization.StreamMaker.Pattern.SORTED; +import static org.apache.datasketches.characterization.StreamMaker.Pattern.SQRT; +import static org.apache.datasketches.characterization.StreamMaker.Pattern.ZOOM_IN; +import static org.apache.datasketches.characterization.StreamMaker.Pattern.ZOOM_OUT; -import org.apache.datasketches.characterization.Shuffle; import org.testng.annotations.Test; /** diff --git a/src/main/java/org/apache/datasketches/characterization/tdigest/TrueDoubleRanks.java b/src/main/java/org/apache/datasketches/characterization/TrueRanks.java similarity index 67% rename from src/main/java/org/apache/datasketches/characterization/tdigest/TrueDoubleRanks.java rename to src/main/java/org/apache/datasketches/characterization/TrueRanks.java index 092c2e7..acb9923 100644 --- a/src/main/java/org/apache/datasketches/characterization/tdigest/TrueDoubleRanks.java +++ b/src/main/java/org/apache/datasketches/characterization/TrueRanks.java @@ -17,11 +17,10 @@ * under the License. */ -package org.apache.datasketches.characterization.tdigest; +package org.apache.datasketches.characterization; import java.util.Arrays; -import org.apache.datasketches.characterization.Shuffle; import org.apache.datasketches.quantilescommon.BinarySearch; import org.testng.annotations.Test; @@ -33,37 +32,53 @@ import org.testng.annotations.Test; * * @author Lee Rhodes */ -public class TrueDoubleRanks { +public class TrueRanks { private static final String LS = System.getProperty("line.separator"); private int length; - private double[] stream; - private double[] sortedStream; + private double[] dStream; + private float[] fStream; + private double[] sortedDStream; private int[] sortedAbsRanks; private int[] streamAbsRanks; + + TrueRanks() { } //for TestNG - TrueDoubleRanks() { } //for TestNG - - public TrueDoubleRanks(final double[] stream) { - this.stream = stream; - compute(); + public TrueRanks(final double[] stream, final boolean LTEQ) { + this.dStream = stream; + this.fStream = toFloatArr(stream); + compute(LTEQ); } - public double getMinValue() { return sortedStream[0]; } + public TrueRanks(final float[] stream, final boolean LTEQ) { + this.fStream = stream; + this.dStream = toDoubleArr(stream); + compute(LTEQ); + } + + public double getMinDoubleValue() { return sortedDStream[0]; } + + public float getMinFloatValue() { return (float) sortedDStream[0]; } - public double getMaxValue() { return sortedStream[length - 1]; } + public double getMaxDoubleValue() { return sortedDStream[length - 1]; } + + public float getMaxFloatValue() { return (float) sortedDStream[length - 1]; } public int getMinAbsRank() { return sortedAbsRanks[0]; } public int getMaxAbsRank() { return sortedAbsRanks[length - 1]; } - public double[] getStream() { return stream; } + public double[] getDoubleStream() { return dStream; } + + public float[] getFloatStream() { return fStream; } - public double[] getSortedStream() { return sortedStream; } + public double[] getSortedDoubleStream() { return sortedDStream; } + + public float[] getSortedFloatStream() { return toFloatArr(sortedDStream); } public int[] getSortedAbsRanks() { return sortedAbsRanks; } public int[] getStreamAbsRanks() { return streamAbsRanks; } - + public double[] getSortedRelRanks() { return relativeRank(sortedAbsRanks); } @@ -73,23 +88,37 @@ public class TrueDoubleRanks { } public int getAbsRank(final double v) { - final int idx = BinarySearch.find(sortedStream, 0, length - 1, v); + final int idx = BinarySearch.find(sortedDStream, 0, length - 1, v); return sortedAbsRanks[idx]; } + private static float[] toFloatArr(final double[] dArr) { + final int len = dArr.length; + final float[] fArr = new float[len]; + for (int i = 0; i < len; i++) { fArr[i] = (float) dArr[i]; } + return fArr; + } + + private static double[] toDoubleArr(final float[] fArr) { + final int len = fArr.length; + final double[] dArr = new double[len]; + for (int i = 0; i < len; i++) { dArr[i] = fArr[i]; } + return dArr; + } + /** * Sorts the stream, then computes the sortedAbsRanks based on the comparison criterion. */ - private void compute() { - length = stream.length; - sortedStream = stream.clone(); - Arrays.sort(sortedStream); + private void compute(final boolean LTEQ) { + length = dStream.length; + sortedDStream = dStream.clone(); + Arrays.sort(sortedDStream); sortedAbsRanks = new int[length]; - if (ltEq) { //LE + if (LTEQ) { //if LTEQ == true, criteria is <= sortedAbsRanks[length - 1] = length; int i = length - 2; while (i >= 0) { //goes backwards - if (sortedStream[i] == sortedStream[i + 1]) { sortedAbsRanks[i] = sortedAbsRanks[i + 1]; } + if (sortedDStream[i] == sortedDStream[i + 1]) { sortedAbsRanks[i] = sortedAbsRanks[i + 1]; } else { sortedAbsRanks[i] = i + 1; } i--; } @@ -97,14 +126,14 @@ public class TrueDoubleRanks { sortedAbsRanks[0] = 0; int i = 1; while (i < length) { //forwards - if (sortedStream[i - 1] == sortedStream[i]) { sortedAbsRanks[i] = sortedAbsRanks[i - 1]; } + if (sortedDStream[i - 1] == sortedDStream[i]) { sortedAbsRanks[i] = sortedAbsRanks[i - 1]; } else { sortedAbsRanks[i] = i; } i++; } } streamAbsRanks = new int[length]; //put the ranks in original stream order for (int j = 0; j < length; j++) { - final int idx = BinarySearch.find(sortedStream, 0, length - 1, stream[j]); + final int idx = BinarySearch.find(sortedDStream, 0, length - 1, dStream[j]); streamAbsRanks[j] = sortedAbsRanks[idx]; } } @@ -134,14 +163,14 @@ public class TrueDoubleRanks { final StringBuilder sb = new StringBuilder(); final String ffmt = "%5.1f "; final String dfmt = "%5d "; - TrueDoubleRanks trueRanks; + TrueRanks trueRanks; final int N = vArr.length; sb.append("Values:").append(LS); for (int i = 0; i < N; i++) { sb.append(String.format(ffmt, vArr[i])); } sb.append(LS); - trueRanks = new TrueDoubleRanks(vArr); + trueRanks = new TrueRanks(vArr, false); sb.append("LT Abs Ranks:").append(LS); int[] absArr = trueRanks.getStreamAbsRanks(); for (int i = 0; i < N; i++) { sb.append(String.format(dfmt, absArr[i])); } @@ -151,7 +180,7 @@ public class TrueDoubleRanks { for (int i = 0; i < N; i++) { sb.append(String.format(ffmt, relArr[i])); } sb.append(LS); - trueRanks = new TrueDoubleRanks(vArr); + trueRanks = new TrueRanks(vArr, true); sb.append("LE Abs Ranks:").append(LS); absArr = trueRanks.getStreamAbsRanks(); for (int i = 0; i < N; i++) { sb.append(String.format(dfmt, absArr[i])); } diff --git a/src/main/java/org/apache/datasketches/characterization/filters/BaseFilterAccuracyProfile.java b/src/main/java/org/apache/datasketches/characterization/filters/BaseFilterAccuracyProfile.java index 77931e6..4c3a3e8 100644 --- a/src/main/java/org/apache/datasketches/characterization/filters/BaseFilterAccuracyProfile.java +++ b/src/main/java/org/apache/datasketches/characterization/filters/BaseFilterAccuracyProfile.java @@ -1,16 +1,35 @@ -package org.apache.datasketches.characterization.filters; +/* + * Licensed to the Apache Software Foundation (ASF) under one + * or more contributor license agreements. See the NOTICE file + * distributed with this work for additional information + * regarding copyright ownership. The ASF licenses this file + * to you under the Apache License, Version 2.0 (the + * "License"); you may not use this file except in compliance + * with the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, + * software distributed under the License is distributed on an + * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY + * KIND, either express or implied. See the License for the + * specific language governing permissions and limitations + * under the License. + */ -import org.apache.datasketches.Job; -import org.apache.datasketches.JobProfile; -import org.apache.datasketches.Properties; - -import java.util.ArrayList; +package org.apache.datasketches.characterization.filters; import static java.lang.Math.log; import static java.lang.Math.pow; import static org.apache.datasketches.common.Util.pwr2SeriesNext; -public abstract class BaseFilterAccuracyProfile implements JobProfile{ +import java.util.ArrayList; + +import org.apache.datasketches.Job; +import org.apache.datasketches.JobProfile; +import org.apache.datasketches.Properties; + +public abstract class BaseFilterAccuracyProfile implements JobProfile { Job job; public Properties prop; @@ -50,9 +69,9 @@ public abstract class BaseFilterAccuracyProfile implements JobProfile{ lgMaxBpU = Integer.parseInt(prop.mustGet("Trials_lgMaxBpU")); slope = (double) (lgMaxT - lgMinT) / (lgMinBpU - lgMaxBpU); tPPO = Integer.parseInt(prop.mustGet("Trials_TPPO")); - numQueries = 1<<minNumHashes+1; // starting value for the number of query points. - numTrials = 1<<lgMinT; - numItemsInserted = (long) Math.round(capacity * (1L << lgU)); + numQueries = 1 << minNumHashes + 1; // starting value for the number of query points. + numTrials = 1 << lgMinT; + numItemsInserted = Math.round(capacity * (1L << lgU)); configure(); doTrials(); @@ -74,11 +93,12 @@ public abstract class BaseFilterAccuracyProfile implements JobProfile{ /** * Used to get the size of the filter in bits for the current trial. + * @return the size of the filter in bits for the current trial. */ public abstract long getFilterLengthBits(); - /** + * @param numHashes number of hashes * @return the number of bits per entry for the filter. */ public abstract int getBitsperEntry(final int numHashes); @@ -100,20 +120,20 @@ public abstract class BaseFilterAccuracyProfile implements JobProfile{ * and processes the results. The results of each trial are appended to a StringBuilder * in a tab-separated format and printed. * - * The number of hashes ranges from the minimum to the maximum number of hashes specified + * <p>The number of hashes ranges from the minimum to the maximum number of hashes specified * in the class. The number of trials for each number of hashes is determined by the * getNumTrials method. The false positive rate and filter size are calculated by - * averaging the results of the trials for each number of hashes. + * averaging the results of the trials for each number of hashes.</p> * - * After the results are processed, they are printed and the number of query points is + * <p>After the results are processed, they are printed and the number of query points is * updated for the next number of hashes. * We need to increase the power of 2 for each trial set because the failure probability decays - * with 2^(-x) when x is related to the number of bits per entry. + * with 2^(-x) when x is related to the number of bits per entry.</p> */ private void doTrials() { final StringBuilder dataStr = new StringBuilder(); job.println(getHeader()); - for (int nh= minNumHashes; nh <= maxNumHashes; nh++) { + for (int nh = minNumHashes; nh <= maxNumHashes; nh++) { fpr = 0; filterNumBits = 0; final int numTrials = getNumTrials(nh); @@ -126,7 +146,7 @@ public abstract class BaseFilterAccuracyProfile implements JobProfile{ //bitsPerEntry = getBitsperEntry(nh); process(nh, fpr, filterNumBits, numQueries, numTrials, dataStr); job.println(dataStr.toString()); - numQueries = (int)pwr2SeriesNext(1, 1L<<(nh+1)); + numQueries = (int)pwr2SeriesNext(1, 1L << (nh + 1)); } } @@ -181,7 +201,7 @@ public abstract class BaseFilterAccuracyProfile implements JobProfile{ * Returns a column header row * @return a column header row */ - private String getHeader() { + private static String getHeader() { final StringBuilder sb = new StringBuilder(); sb.append("numHashes").append(TAB); sb.append("FPR").append(TAB); diff --git a/src/main/java/org/apache/datasketches/characterization/filters/BaseFilterUpdateSpeedProfile.java b/src/main/java/org/apache/datasketches/characterization/filters/BaseFilterUpdateSpeedProfile.java index c017385..922c134 100644 --- a/src/main/java/org/apache/datasketches/characterization/filters/BaseFilterUpdateSpeedProfile.java +++ b/src/main/java/org/apache/datasketches/characterization/filters/BaseFilterUpdateSpeedProfile.java @@ -1,13 +1,32 @@ -package org.apache.datasketches.characterization.filters; +/* + * Licensed to the Apache Software Foundation (ASF) under one + * or more contributor license agreements. See the NOTICE file + * distributed with this work for additional information + * regarding copyright ownership. The ASF licenses this file + * to you under the Apache License, Version 2.0 (the + * "License"); you may not use this file except in compliance + * with the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, + * software distributed under the License is distributed on an + * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY + * KIND, either express or implied. See the License for the + * specific language governing permissions and limitations + * under the License. + */ -import org.apache.datasketches.Job; -import org.apache.datasketches.JobProfile; -import org.apache.datasketches.Properties; +package org.apache.datasketches.characterization.filters; import static java.lang.Math.log; import static java.lang.Math.pow; import static org.apache.datasketches.common.Util.pwr2SeriesNext; +import org.apache.datasketches.Job; +import org.apache.datasketches.JobProfile; +import org.apache.datasketches.Properties; + public abstract class BaseFilterUpdateSpeedProfile implements JobProfile { Job job; public Properties prop; @@ -72,7 +91,7 @@ public abstract class BaseFilterUpdateSpeedProfile implements JobProfile { int lastU = 0; final StringBuilder dataStr = new StringBuilder(); job.println(getHeader()); - while (lastU < 0.9*maxU) { //Trials for each U point on X-axis, and one row on output + while (lastU < 0.9 * maxU) { //Trials for each U point on X-axis, and one row on output final int nextU = lastU == 0 ? minU : (int)pwr2SeriesNext(uPPO, lastU); lastU = nextU; final int trials = getNumTrials(nextU); diff --git a/src/main/java/org/apache/datasketches/characterization/filters/BaseSpaceProfile.java b/src/main/java/org/apache/datasketches/characterization/filters/BaseSpaceProfile.java index 41be958..c0a51c8 100644 --- a/src/main/java/org/apache/datasketches/characterization/filters/BaseSpaceProfile.java +++ b/src/main/java/org/apache/datasketches/characterization/filters/BaseSpaceProfile.java @@ -1,22 +1,35 @@ -package org.apache.datasketches.characterization.filters; -import org.apache.datasketches.Job; -import org.apache.datasketches.JobProfile; -import org.apache.datasketches.Properties; +/* + * Licensed to the Apache Software Foundation (ASF) under one + * or more contributor license agreements. See the NOTICE file + * distributed with this work for additional information + * regarding copyright ownership. The ASF licenses this file + * to you under the Apache License, Version 2.0 (the + * "License"); you may not use this file except in compliance + * with the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, + * software distributed under the License is distributed on an + * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY + * KIND, either express or implied. See the License for the + * specific language governing permissions and limitations + * under the License. + */ -import java.util.ArrayList; +package org.apache.datasketches.characterization.filters; import static java.lang.Math.log; import static java.lang.Math.pow; import static org.apache.datasketches.common.Util.pwr2SeriesNext; -class trialResults{ - long filterSizeBits; - double measuredFalsePositiveRate; +import java.util.ArrayList; - int numHashbits; -} +import org.apache.datasketches.Job; +import org.apache.datasketches.JobProfile; +import org.apache.datasketches.Properties; -public abstract class BaseSpaceProfile implements JobProfile{ +public abstract class BaseSpaceProfile implements JobProfile { Job job; public Properties prop; double targetFpp; @@ -35,7 +48,13 @@ public abstract class BaseSpaceProfile implements JobProfile{ int lgMaxBpU; double slope; + class TrialResults { + long filterSizeBits; + double measuredFalsePositiveRate; + int numHashbits; + } + @Override public void start(final Job job) { @@ -46,7 +65,7 @@ public abstract class BaseSpaceProfile implements JobProfile{ lgMinU = Integer.parseInt(prop.mustGet("Trials_lgMinU")); lgMaxU = Integer.parseInt(prop.mustGet("Trials_lgMaxU")); uPPO = Integer.parseInt(prop.mustGet("Trials_UPPO")); - inputCardinality = (int)pwr2SeriesNext(uPPO, 1L<<lgMinU); + inputCardinality = (int)pwr2SeriesNext(uPPO, 1L << lgMinU); //Trials Profile lgMinT = Integer.parseInt(prop.mustGet("Trials_lgMinT")); @@ -74,7 +93,7 @@ public abstract class BaseSpaceProfile implements JobProfile{ // In here should have the logic to initialize a new sketch for a different number of input items for each trial. //public abstract long doTrial(long inputCardinality, double targetFpp); - public abstract trialResults doTrial(long inputCardinality, double targetFpp); + public abstract TrialResults doTrial(long inputCardinality, double targetFpp); /* This experiment varies the cardinality of the input and measures the filter size required @@ -84,24 +103,23 @@ public abstract class BaseSpaceProfile implements JobProfile{ */ private void doTrials() { final StringBuilder dataStr = new StringBuilder(); - final int minT = 1 << lgMinT; - final int maxT = 1 << lgMaxT; + //final int minT = 1 << lgMinT; + //final int maxT = 1 << lgMaxT; final long maxU = 1L << lgMaxU; job.println(getHeader()); - while(inputCardinality < maxU) { + while (inputCardinality < maxU) { numTrials = getNumTrials(inputCardinality); //doTrial(inputCardinality, targetFpp, final long numQueries); inputCardinality = (int)pwr2SeriesNext(uPPO, inputCardinality); //long filterNumBits = doTrial(inputCardinality, targetFpp) ; - trialResults results = doTrial(inputCardinality, targetFpp) ; + final TrialResults results = doTrial(inputCardinality, targetFpp); process(inputCardinality, results.filterSizeBits, numTrials, results.measuredFalsePositiveRate, results.numHashbits, dataStr); job.println(dataStr.toString()) ; } } - /** * Computes the number of trials for a given current number of uniques for a * trial set. This is used to decrease the number of trials @@ -144,7 +162,7 @@ public abstract class BaseSpaceProfile implements JobProfile{ sb.append(numHashBits); } - private String getHeader() { + private static String getHeader() { final StringBuilder sb = new StringBuilder(); sb.append("TrueU").append(TAB); sb.append("Size").append(TAB); diff --git a/src/main/java/org/apache/datasketches/characterization/filters/BloomFilterAccuracyProfile.java b/src/main/java/org/apache/datasketches/characterization/filters/BloomFilterAccuracyProfile.java index 0a9fc93..f6d95f1 100644 --- a/src/main/java/org/apache/datasketches/characterization/filters/BloomFilterAccuracyProfile.java +++ b/src/main/java/org/apache/datasketches/characterization/filters/BloomFilterAccuracyProfile.java @@ -1,11 +1,28 @@ +/* + * Licensed to the Apache Software Foundation (ASF) under one + * or more contributor license agreements. See the NOTICE file + * distributed with this work for additional information + * regarding copyright ownership. The ASF licenses this file + * to you under the Apache License, Version 2.0 (the + * "License"); you may not use this file except in compliance + * with the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, + * software distributed under the License is distributed on an + * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY + * KIND, either express or implied. See the License for the + * specific language governing permissions and limitations + * under the License. + */ + package org.apache.datasketches.characterization.filters; import org.apache.datasketches.filters.bloomfilter.BloomFilter; import org.apache.datasketches.filters.bloomfilter.BloomFilterBuilder; -import java.util.ArrayList; - -public class BloomFilterAccuracyProfile extends BaseFilterAccuracyProfile{ +public class BloomFilterAccuracyProfile extends BaseFilterAccuracyProfile { protected BloomFilter sketch; @@ -19,12 +36,12 @@ public class BloomFilterAccuracyProfile extends BaseFilterAccuracyProfile{ } /* - * The experiment hasa fixed number of bits per entry and + * The experiment has a fixed number of bits per entry and */ @Override public double doTrial(final int numHashes, final long numQueries) { - // Initialise and populate the sketch + // and populate the sketch filterLengthBits = (long) ((numHashes * numItemsInserted) / LN2); sketch = BloomFilterBuilder.createBySize(filterLengthBits, numHashes); @@ -32,21 +49,21 @@ public class BloomFilterAccuracyProfile extends BaseFilterAccuracyProfile{ inputItems.clear(); negativeItems.clear(); long item; - for (long i = 0; i < numItemsInserted; i++){ + for (long i = 0; i < numItemsInserted; i++) { item = ++vIn; inputItems.add(item) ; sketch.update(item); } - for (long i = numItemsInserted; i < numItemsInserted+numQueries; i++ ) { + for (long i = numItemsInserted; i < numItemsInserted + numQueries; i++ ) { item = ++vIn; negativeItems.add(item) ; } // Check the number of false positives long numFalsePositive = 0 ; - for (int i = 0; i < negativeItems.size(); i++){ - if (sketch.query(negativeItems.get(i))) ++numFalsePositive ; + for (int i = 0; i < negativeItems.size(); i++) { + if (sketch.query(negativeItems.get(i))) { ++numFalsePositive; } } return (double)numFalsePositive / numQueries; } @@ -61,5 +78,4 @@ public class BloomFilterAccuracyProfile extends BaseFilterAccuracyProfile{ return sketch.getCapacity(); } - } diff --git a/src/main/java/org/apache/datasketches/characterization/filters/BloomFilterSpaceProfile.java b/src/main/java/org/apache/datasketches/characterization/filters/BloomFilterSpaceProfile.java index ace7882..8125d73 100644 --- a/src/main/java/org/apache/datasketches/characterization/filters/BloomFilterSpaceProfile.java +++ b/src/main/java/org/apache/datasketches/characterization/filters/BloomFilterSpaceProfile.java @@ -1,44 +1,64 @@ +/* + * Licensed to the Apache Software Foundation (ASF) under one + * or more contributor license agreements. See the NOTICE file + * distributed with this work for additional information + * regarding copyright ownership. The ASF licenses this file + * to you under the Apache License, Version 2.0 (the + * "License"); you may not use this file except in compliance + * with the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, + * software distributed under the License is distributed on an + * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY + * KIND, either express or implied. See the License for the + * specific language governing permissions and limitations + * under the License. + */ + package org.apache.datasketches.characterization.filters; +import static org.apache.datasketches.common.Util.pwr2SeriesNext; + import org.apache.datasketches.filters.bloomfilter.BloomFilter; import org.apache.datasketches.filters.bloomfilter.BloomFilterBuilder; -import static org.apache.datasketches.common.Util.pwr2SeriesNext; - public class BloomFilterSpaceProfile extends BaseSpaceProfile { protected BloomFilter sketch; + @Override public void configure() {} @Override //public long doTrial(long inputCardinality, double targetFpp) { - public trialResults doTrial(long inputCardinality, double targetFpp){ - trialResults result = new trialResults(); + public TrialResults doTrial(final long inputCardinality, final double targetFpp) { + final TrialResults result = new TrialResults(); final long numBits = BloomFilterBuilder.suggestNumFilterBits(inputCardinality, targetFpp); final short numHashes = BloomFilterBuilder.suggestNumHashes(inputCardinality, numBits); - sketch = BloomFilterBuilder.createBySize(numBits, (int)numHashes-4, 348675132L); + sketch = BloomFilterBuilder.createBySize(numBits, numHashes - 4, 348675132L); //sketch = BloomFilterBuilder.createByAccuracy(inputCardinality, targetFpp); - long numQueries = pwr2SeriesNext(1, 1L<<(sketch.getNumHashes()+4)); + final long numQueries = pwr2SeriesNext(1, 1L << (sketch.getNumHashes() + 4)); // Build the test sets but clear them first so that they have the correct cardinality and no surplus is added. inputItems.clear(); negativeItems.clear(); long item; - for (long i = 0; i < inputCardinality; i++){ + for (long i = 0; i < inputCardinality; i++) { item = ++vIn; inputItems.add(item) ; sketch.update(item); } - for (long i = inputCardinality; i < inputCardinality+numQueries; i++ ) { + for (long i = inputCardinality; i < inputCardinality + numQueries; i++ ) { item = ++vIn; negativeItems.add(item) ; } // Check the number of false positives long numFalsePositive = 0 ; - for (int i = 0; i < negativeItems.size(); i++){ - if (sketch.query(negativeItems.get(i))) ++numFalsePositive ; + for (int i = 0; i < negativeItems.size(); i++) { + if (sketch.query(negativeItems.get(i))) { ++numFalsePositive; } } result.filterSizeBits = sketch.getCapacity(); result.measuredFalsePositiveRate = (double)numFalsePositive / numQueries; @@ -46,4 +66,4 @@ public class BloomFilterSpaceProfile extends BaseSpaceProfile { return result; } -} \ No newline at end of file +} diff --git a/src/main/java/org/apache/datasketches/characterization/filters/BloomFilterUpdateSpeedProfile.java b/src/main/java/org/apache/datasketches/characterization/filters/BloomFilterUpdateSpeedProfile.java index b968cfd..58a82d8 100644 --- a/src/main/java/org/apache/datasketches/characterization/filters/BloomFilterUpdateSpeedProfile.java +++ b/src/main/java/org/apache/datasketches/characterization/filters/BloomFilterUpdateSpeedProfile.java @@ -1,14 +1,29 @@ +/* + * Licensed to the Apache Software Foundation (ASF) under one + * or more contributor license agreements. See the NOTICE file + * distributed with this work for additional information + * regarding copyright ownership. The ASF licenses this file + * to you under the Apache License, Version 2.0 (the + * "License"); you may not use this file except in compliance + * with the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, + * software distributed under the License is distributed on an + * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY + * KIND, either express or implied. See the License for the + * specific language governing permissions and limitations + * under the License. + */ + package org.apache.datasketches.characterization.filters; import org.apache.datasketches.filters.bloomfilter.BloomFilter; -import org.apache.datasketches.memory.WritableHandle; -import org.apache.datasketches.memory.WritableMemory; import org.apache.datasketches.filters.bloomfilter.BloomFilterBuilder; -public class BloomFilterUpdateSpeedProfile extends BaseFilterUpdateSpeedProfile{ +public class BloomFilterUpdateSpeedProfile extends BaseFilterUpdateSpeedProfile { protected BloomFilter sketch; - private WritableHandle handle; - private WritableMemory wmem; @Override public void configure() { @@ -19,11 +34,7 @@ public class BloomFilterUpdateSpeedProfile extends BaseFilterUpdateSpeedProfile{ } @Override - public void cleanup() { - try { - if (handle != null) { handle.close(); } - } catch (final Exception e) {} - } + public void cleanup() { } @Override public double doTrial(final int uPerTrial) { diff --git a/src/main/java/org/apache/datasketches/characterization/filters/QuotientFilterAccuracyProfile.java b/src/main/java/org/apache/datasketches/characterization/filters/QuotientFilterAccuracyProfile.java index 3448817..3034f26 100644 --- a/src/main/java/org/apache/datasketches/characterization/filters/QuotientFilterAccuracyProfile.java +++ b/src/main/java/org/apache/datasketches/characterization/filters/QuotientFilterAccuracyProfile.java @@ -1,65 +1,80 @@ -package org.apache.datasketches.characterization.filters; - -import org.apache.datasketches.filters.quotientfilter.QuotientFilter; -import org.apache.datasketches.filters.quotientfilter.QuotientFilterBuilder; - -import java.util.ArrayList; - - -public class QuotientFilterAccuracyProfile extends BaseFilterAccuracyProfile{ - - protected QuotientFilter sketch; - - - @Override - public void configure() {} - - /* - We use the numHashBits as being equivalent to the number of bits per entry in the Quotient filter. - This value is 3 metadata bits plus the remaining number of bits for the fingerprint length. - */ - @Override - public double doTrial(final int numHashBits, final long numQueries) { - // Initialise and populate the sketch - sketch = new QuotientFilter(lgU, numHashBits); - - // Build the test sets but clear them first so that they have the correct cardinality and no surplus is added. - inputItems.clear(); - negativeItems.clear(); - long item; - for (long i = 0; i < numItemsInserted; i++){ - item = ++vIn; - inputItems.add(item) ; - sketch.insert(item); - } - - for (long i = numItemsInserted; i < numItemsInserted+numQueries; i++ ) { - item = ++vIn; - negativeItems.add(item) ; - } - - // Check the number of false positives - long numFalsePositive = 0 ; - for (int i = 0; i < negativeItems.size(); i++){ - if (sketch.search(negativeItems.get(i))) ++numFalsePositive ; - } - return (double)numFalsePositive / numQueries; - } - - /* - The total number of bits per entry is the three metadata plus the fingerprint length bits. - This is exactly the number of hash bits that is passed as a parameter. - */ - @Override - public int getBitsperEntry(final int numHashBits) { - return numHashBits; - } - - @Override - public long getFilterLengthBits() { - return sketch.getSpaceUse(); - } - - -} - +/* + * Licensed to the Apache Software Foundation (ASF) under one + * or more contributor license agreements. See the NOTICE file + * distributed with this work for additional information + * regarding copyright ownership. The ASF licenses this file + * to you under the Apache License, Version 2.0 (the + * "License"); you may not use this file except in compliance + * with the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, + * software distributed under the License is distributed on an + * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY + * KIND, either express or implied. See the License for the + * specific language governing permissions and limitations + * under the License. + */ + +//package org.apache.datasketches.characterization.filters; + +//import java.util.ArrayList; +// +//import org.apache.datasketches.filters.quotientfilter.QuotientFilter; +//import org.apache.datasketches.filters.quotientfilter.QuotientFilterBuilder; + +//public class QuotientFilterAccuracyProfile extends BaseFilterAccuracyProfile { + +// protected QuotientFilter sketch; +// +// @Override +// public void configure() {} +// +// /* +// We use the numHashBits as being equivalent to the number of bits per entry in the Quotient filter. +// This value is 3 metadata bits plus the remaining number of bits for the fingerprint length. +// */ +// @Override +// public double doTrial(final int numHashBits, final long numQueries) { +// // Initialize and populate the sketch +// sketch = new QuotientFilter(lgU, numHashBits); +// +// // Build the test sets but clear them first so that they have the correct cardinality and no surplus is added. +// inputItems.clear(); +// negativeItems.clear(); +// long item; +// for (long i = 0; i < numItemsInserted; i++){ +// item = ++vIn; +// inputItems.add(item) ; +// sketch.insert(item); +// } +// +// for (long i = numItemsInserted; i < numItemsInserted+numQueries; i++ ) { +// item = ++vIn; +// negativeItems.add(item) ; +// } +// +// // Check the number of false positives +// long numFalsePositive = 0 ; +// for (int i = 0; i < negativeItems.size(); i++){ +// if (sketch.search(negativeItems.get(i))) ++numFalsePositive ; +// } +// return (double)numFalsePositive / numQueries; +// } +// +// /* +// The total number of bits per entry is the three metadata plus the fingerprint length bits. +// This is exactly the number of hash bits that is passed as a parameter. +// */ +// @Override +// public int getBitsperEntry(final int numHashBits) { +// return numHashBits; +// } +// +// @Override +// public long getFilterLengthBits() { +// return sketch.getSpaceUse(); +// } +// +//} diff --git a/src/main/java/org/apache/datasketches/characterization/filters/QuotientFilterExpansionProfile.java b/src/main/java/org/apache/datasketches/characterization/filters/QuotientFilterExpansionProfile.java index eed1804..23c7a72 100644 --- a/src/main/java/org/apache/datasketches/characterization/filters/QuotientFilterExpansionProfile.java +++ b/src/main/java/org/apache/datasketches/characterization/filters/QuotientFilterExpansionProfile.java @@ -1,110 +1,130 @@ -package org.apache.datasketches.characterization.filters; - -import org.apache.datasketches.Job; -import org.apache.datasketches.JobProfile; -import org.apache.datasketches.filters.quotientfilter.QuotientFilter; - -import java.util.ArrayList; - -import static org.apache.datasketches.common.Util.pwr2SeriesNext; - -public class QuotientFilterExpansionProfile implements JobProfile{ - - private Job job; - long vIn = 0L; - int startLgU; - int endLgU; - int uPPO; - int lgK; - int startLenFprint; - public ArrayList<Long> negativeItems = new ArrayList<>(); - int lgTrials; - int numTrials; - - @Override - public void start(final Job job) { - this.job = job; - runExpansionTrials(); - } - - @Override - public void shutdown() { } - - @Override - public void cleanup() { } - - private void runExpansionTrials(){ - //final ArrayList<Long> negativeItems = new ArrayList<>(); - startLgU = Integer.parseInt(job.getProperties().mustGet("startLgU")); - endLgU = Integer.parseInt(job.getProperties().mustGet("endLgU")); - uPPO = Integer.parseInt(job.getProperties().mustGet("Universe_uPPO")); - lgK = Integer.parseInt(job.getProperties().mustGet("startLgNumSlots")); - startLenFprint = Integer.parseInt(job.getProperties().mustGet("startLenFprint")); - final int lgNumQueries = Integer.parseInt(job.getProperties().mustGet("lgNumQueries")); - lgTrials = Integer.parseInt(job.getProperties().mustGet("lgTrials")); - final long numQueries = 1L << lgNumQueries; - final int numTrials = 1 << lgTrials; - - job.println(getHeader()); - for (int t = 0; t < numTrials; t++) doTrial(numQueries, startLgU, endLgU); - } - - /* - The number of collisions in N keys inserted to a length M hash table is approximately N^2/2M. - This can be seen through the recursive relationship - */ - private void doTrial(long numQueries, long startLgU, long endLgU){ - QuotientFilter qf = new QuotientFilter(lgK, startLenFprint); - final StringBuilder dataStr = new StringBuilder(); - - // Populate the negative items. Do this first to easily keep it separate from input item set. - negativeItems.clear(); - for (long i = 0; i < numQueries; i++ ) {negativeItems.add(++vIn) ;} - - // vIn is now the starting point; add a specified number of items from this location to the end of the range. - long startPoint = 0L; - long inputCardinality = (1L << startLgU) ; - long numInsertions; - long maxCardinality = (1L << endLgU) ; // end point - - while(inputCardinality < maxCardinality) { - numInsertions = inputCardinality - startPoint; - for (long i = 0; i < numInsertions; i++) { qf.insert(++vIn);} - startPoint = inputCardinality; - - // test the false positive rate - long numFalsePositive = 0; - for (long negItem : negativeItems) { - if (qf.search(negItem)) ++numFalsePositive; - } - //double fpr = (double) numFalsePositive / numQueries; - //System.out.println("Expansions " + qf.getNumExpansions() + " " + qf.getNumSlots()) ; - process(inputCardinality, qf.getNumEntries(), qf.getNumSlots(), qf.getFingerprintLength(), numFalsePositive, dataStr); - job.println(dataStr.toString()); - inputCardinality = pwr2SeriesNext(uPPO, inputCardinality); - } - } - - private static void process(final long numInput, final long numEntries, final long numSlots, final int fPrintLen, - final long falsePositiveRate, final StringBuilder sb){ - //final double falsePositiveRate, final StringBuilder sb){ - // OUTPUT - sb.setLength(0); - sb.append(numSlots).append(TAB); - sb.append(numInput).append(TAB); - sb.append(fPrintLen).append(TAB); - sb.append(numEntries).append(TAB); - sb.append(falsePositiveRate); - } - - private String getHeader() { - final StringBuilder sb = new StringBuilder(); - sb.append("NumSlots").append(TAB); - sb.append("NumInput").append(TAB); - sb.append("FPrintLen").append(TAB); - sb.append("NumEntries").append(TAB); - sb.append("NumFalsePositives"); - //sb.append("FalsePositiveRate"); - return sb.toString(); - } -} +/* + * Licensed to the Apache Software Foundation (ASF) under one + * or more contributor license agreements. See the NOTICE file + * distributed with this work for additional information + * regarding copyright ownership. The ASF licenses this file + * to you under the Apache License, Version 2.0 (the + * "License"); you may not use this file except in compliance + * with the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, + * software distributed under the License is distributed on an + * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY + * KIND, either express or implied. See the License for the + * specific language governing permissions and limitations + * under the License. + */ + +//package org.apache.datasketches.characterization.filters; +// +//import org.apache.datasketches.Job; +//import org.apache.datasketches.JobProfile; +//import org.apache.datasketches.filters.quotientfilter.QuotientFilter; +// +//import java.util.ArrayList; +// +//import static org.apache.datasketches.common.Util.pwr2SeriesNext; +// +//public class QuotientFilterExpansionProfile implements JobProfile{ +// +// private Job job; +// long vIn = 0L; +// int startLgU; +// int endLgU; +// int uPPO; +// int lgK; +// int startLenFprint; +// public ArrayList<Long> negativeItems = new ArrayList<>(); +// int lgTrials; +// int numTrials; +// +// @Override +// public void start(final Job job) { +// this.job = job; +// runExpansionTrials(); +// } +// +// @Override +// public void shutdown() { } +// +// @Override +// public void cleanup() { } +// +// private void runExpansionTrials(){ +// //final ArrayList<Long> negativeItems = new ArrayList<>(); +// startLgU = Integer.parseInt(job.getProperties().mustGet("startLgU")); +// endLgU = Integer.parseInt(job.getProperties().mustGet("endLgU")); +// uPPO = Integer.parseInt(job.getProperties().mustGet("Universe_uPPO")); +// lgK = Integer.parseInt(job.getProperties().mustGet("startLgNumSlots")); +// startLenFprint = Integer.parseInt(job.getProperties().mustGet("startLenFprint")); +// final int lgNumQueries = Integer.parseInt(job.getProperties().mustGet("lgNumQueries")); +// lgTrials = Integer.parseInt(job.getProperties().mustGet("lgTrials")); +// final long numQueries = 1L << lgNumQueries; +// final int numTrials = 1 << lgTrials; +// +// job.println(getHeader()); +// for (int t = 0; t < numTrials; t++) doTrial(numQueries, startLgU, endLgU); +// } +// +// /* +// The number of collisions in N keys inserted to a length M hash table is approximately N^2/2M. +// This can be seen through the recursive relationship +// */ +// private void doTrial(long numQueries, long startLgU, long endLgU){ +// QuotientFilter qf = new QuotientFilter(lgK, startLenFprint); +// final StringBuilder dataStr = new StringBuilder(); +// +// // Populate the negative items. Do this first to easily keep it separate from input item set. +// negativeItems.clear(); +// for (long i = 0; i < numQueries; i++ ) {negativeItems.add(++vIn) ;} +// +// // vIn is now the starting point; add a specified number of items from this location to the end of the range. +// long startPoint = 0L; +// long inputCardinality = (1L << startLgU) ; +// long numInsertions; +// long maxCardinality = (1L << endLgU) ; // end point +// +// while(inputCardinality < maxCardinality) { +// numInsertions = inputCardinality - startPoint; +// for (long i = 0; i < numInsertions; i++) { qf.insert(++vIn);} +// startPoint = inputCardinality; +// +// // test the false positive rate +// long numFalsePositive = 0; +// for (long negItem : negativeItems) { +// if (qf.search(negItem)) ++numFalsePositive; +// } +// //double fpr = (double) numFalsePositive / numQueries; +// //System.out.println("Expansions " + qf.getNumExpansions() + " " + qf.getNumSlots()) ; +// process(inputCardinality, qf.getNumEntries(), qf.getNumSlots(), qf.getFingerprintLength(), +// numFalsePositive, dataStr); +// job.println(dataStr.toString()); +// inputCardinality = pwr2SeriesNext(uPPO, inputCardinality); +// } +// } +// +// private static void process(final long numInput, final long numEntries, final long numSlots, final int fPrintLen, +// final long falsePositiveRate, final StringBuilder sb){ +// //final double falsePositiveRate, final StringBuilder sb){ +// // OUTPUT +// sb.setLength(0); +// sb.append(numSlots).append(TAB); +// sb.append(numInput).append(TAB); +// sb.append(fPrintLen).append(TAB); +// sb.append(numEntries).append(TAB); +// sb.append(falsePositiveRate); +// } +// +// private String getHeader() { +// final StringBuilder sb = new StringBuilder(); +// sb.append("NumSlots").append(TAB); +// sb.append("NumInput").append(TAB); +// sb.append("FPrintLen").append(TAB); +// sb.append("NumEntries").append(TAB); +// sb.append("NumFalsePositives"); +// //sb.append("FalsePositiveRate"); +// return sb.toString(); +// } +//} diff --git a/src/main/java/org/apache/datasketches/characterization/filters/QuotientFilterSpaceProfile.java b/src/main/java/org/apache/datasketches/characterization/filters/QuotientFilterSpaceProfile.java index b237918..c86eb40 100644 --- a/src/main/java/org/apache/datasketches/characterization/filters/QuotientFilterSpaceProfile.java +++ b/src/main/java/org/apache/datasketches/characterization/filters/QuotientFilterSpaceProfile.java @@ -1,47 +1,67 @@ -package org.apache.datasketches.characterization.filters; +/* + * Licensed to the Apache Software Foundation (ASF) under one + * or more contributor license agreements. See the NOTICE file + * distributed with this work for additional information + * regarding copyright ownership. The ASF licenses this file + * to you under the Apache License, Version 2.0 (the + * "License"); you may not use this file except in compliance + * with the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, + * software distributed under the License is distributed on an + * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY + * KIND, either express or implied. See the License for the + * specific language governing permissions and limitations + * under the License. + */ -import org.apache.datasketches.filters.quotientfilter.QuotientFilter; -import org.apache.datasketches.filters.quotientfilter.QuotientFilterBuilder; - -import static org.apache.datasketches.common.Util.pwr2SeriesNext; - -public class QuotientFilterSpaceProfile extends BaseSpaceProfile { - protected QuotientFilter sketch; - - public void configure() {} - - @Override - //public long doTrial(long inputCardinality, double targetFpp) { - public trialResults doTrial(long inputCardinality, double targetFpp){ - trialResults result = new trialResults(); - int fingerprintLength = QuotientFilterBuilder.suggestFingerprintLength(targetFpp); - int lgNumSlots = QuotientFilterBuilder.suggestLgNumSlots(inputCardinality); // Make sure to check this line if you want -1 or not - sketch = new QuotientFilter(lgNumSlots, fingerprintLength+3); - - long numQueries = pwr2SeriesNext(1, 1L<<(fingerprintLength+1)); - // Build the test sets but clear them first so that they have the correct cardinality and no surplus is added. - inputItems.clear(); - negativeItems.clear(); - long item; - for (long i = 0; i < inputCardinality; i++){ - item = ++vIn; - inputItems.add(item) ; - sketch.insert(item); - } - - for (long i = inputCardinality; i < inputCardinality+numQueries; i++ ) { - item = ++vIn; - negativeItems.add(item) ; - } - - // Check the number of false positives - long numFalsePositive = 0 ; - for (int i = 0; i < negativeItems.size(); i++){ - if (sketch.search(negativeItems.get(i))) ++numFalsePositive ; - } - result.filterSizeBits = sketch.getSpaceUse(); - result.measuredFalsePositiveRate = (double)numFalsePositive / numQueries; - result.numHashbits = fingerprintLength; - return result; - } -} +//package org.apache.datasketches.characterization.filters; +// +//import org.apache.datasketches.filters.quotientfilter.QuotientFilter; +//import org.apache.datasketches.filters.quotientfilter.QuotientFilterBuilder; +// +//import static org.apache.datasketches.common.Util.pwr2SeriesNext; +// +//public class QuotientFilterSpaceProfile extends BaseSpaceProfile { +// protected QuotientFilter sketch; +// +// public void configure() {} +// +// @Override +// //public long doTrial(long inputCardinality, double targetFpp) { +// public trialResults doTrial(long inputCardinality, double targetFpp){ +// trialResults result = new trialResults(); +// int fingerprintLength = QuotientFilterBuilder.suggestFingerprintLength(targetFpp); +// //Make sure to check this line if you want -1 or not +// int lgNumSlots = QuotientFilterBuilder.suggestLgNumSlots(inputCardinality); +// sketch = new QuotientFilter(lgNumSlots, fingerprintLength+3); +// +// long numQueries = pwr2SeriesNext(1, 1L<<(fingerprintLength+1)); +// // Build the test sets but clear them first so that they have the correct cardinality and no surplus is added. +// inputItems.clear(); +// negativeItems.clear(); +// long item; +// for (long i = 0; i < inputCardinality; i++){ +// item = ++vIn; +// inputItems.add(item) ; +// sketch.insert(item); +// } +// +// for (long i = inputCardinality; i < inputCardinality+numQueries; i++ ) { +// item = ++vIn; +// negativeItems.add(item) ; +// } +// +// // Check the number of false positives +// long numFalsePositive = 0 ; +// for (int i = 0; i < negativeItems.size(); i++){ +// if (sketch.search(negativeItems.get(i))) ++numFalsePositive ; +// } +// result.filterSizeBits = sketch.getSpaceUse(); +// result.measuredFalsePositiveRate = (double)numFalsePositive / numQueries; +// result.numHashbits = fingerprintLength; +// return result; +// } +//} diff --git a/src/main/java/org/apache/datasketches/characterization/filters/QuotientFilterUpdateSpeedProfile.java b/src/main/java/org/apache/datasketches/characterization/filters/QuotientFilterUpdateSpeedProfile.java index e73e6d0..2ff53ea 100644 --- a/src/main/java/org/apache/datasketches/characterization/filters/QuotientFilterUpdateSpeedProfile.java +++ b/src/main/java/org/apache/datasketches/characterization/filters/QuotientFilterUpdateSpeedProfile.java @@ -1,41 +1,60 @@ -package org.apache.datasketches.characterization.filters; - -import org.apache.datasketches.memory.WritableHandle; -import org.apache.datasketches.memory.WritableMemory; - - -import org.apache.datasketches.filters.quotientfilter.QuotientFilter; - -public class QuotientFilterUpdateSpeedProfile extends BaseFilterUpdateSpeedProfile{ - protected QuotientFilter sketch; - protected int lgNumSlots ; - protected int numBitsPerSlot; - private WritableHandle handle; - private WritableMemory wmem; - - @Override - public void configure() { - lgNumSlots = Integer.parseInt(prop.mustGet("lgNumSlots")); - numBitsPerSlot = Integer.parseInt(prop.mustGet("numBitsPerSlot")); - } - - @Override - public void cleanup() { - try { - if (handle != null) { handle.close(); } - } catch (final Exception e) {} - } - - @Override - public double doTrial(final int uPerTrial) { - //sketch.reset(); //is not implemented - sketch = new QuotientFilter(lgNumSlots, numBitsPerSlot); - final long startUpdateTime_nS = System.nanoTime(); - for (int u = uPerTrial; u-- > 0;) { - sketch.insert(++vIn); - } - final long updateTime_nS = System.nanoTime() - startUpdateTime_nS; - return (double) updateTime_nS / uPerTrial; - } -} +/* + * Licensed to the Apache Software Foundation (ASF) under one + * or more contributor license agreements. See the NOTICE file + * distributed with this work for additional information + * regarding copyright ownership. The ASF licenses this file + * to you under the Apache License, Version 2.0 (the + * "License"); you may not use this file except in compliance + * with the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, + * software distributed under the License is distributed on an + * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY + * KIND, either express or implied. See the License for the + * specific language governing permissions and limitations + * under the License. + */ + +//package org.apache.datasketches.characterization.filters; +// +//import org.apache.datasketches.memory.WritableHandle; +//import org.apache.datasketches.memory.WritableMemory; +// +// +//import org.apache.datasketches.filters.quotientfilter.QuotientFilter; +// +//public class QuotientFilterUpdateSpeedProfile extends BaseFilterUpdateSpeedProfile{ +// protected QuotientFilter sketch; +// protected int lgNumSlots ; +// protected int numBitsPerSlot; +// private WritableHandle handle; +// private WritableMemory wmem; +// +// @Override +// public void configure() { +// lgNumSlots = Integer.parseInt(prop.mustGet("lgNumSlots")); +// numBitsPerSlot = Integer.parseInt(prop.mustGet("numBitsPerSlot")); +// } +// +// @Override +// public void cleanup() { +// try { +// if (handle != null) { handle.close(); } +// } catch (final Exception e) {} +// } +// +// @Override +// public double doTrial(final int uPerTrial) { +// //sketch.reset(); //is not implemented +// sketch = new QuotientFilter(lgNumSlots, numBitsPerSlot); +// final long startUpdateTime_nS = System.nanoTime(); +// for (int u = uPerTrial; u-- > 0;) { +// sketch.insert(++vIn); +// } +// final long updateTime_nS = System.nanoTime() - startUpdateTime_nS; +// return (double) updateTime_nS / uPerTrial; +// } +//} diff --git a/src/main/java/org/apache/datasketches/characterization/filters/package-info.java b/src/main/java/org/apache/datasketches/characterization/filters/package-info.java new file mode 100644 index 0000000..70a5b15 --- /dev/null +++ b/src/main/java/org/apache/datasketches/characterization/filters/package-info.java @@ -0,0 +1,20 @@ +/* + * Licensed to the Apache Software Foundation (ASF) under one + * or more contributor license agreements. See the NOTICE file + * distributed with this work for additional information + * regarding copyright ownership. The ASF licenses this file + * to you under the Apache License, Version 2.0 (the + * "License"); you may not use this file except in compliance + * with the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, + * software distributed under the License is distributed on an + * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY + * KIND, either express or implied. See the License for the + * specific language governing permissions and limitations + * under the License. + */ + +package org.apache.datasketches.characterization.filters; diff --git a/src/main/java/org/apache/datasketches/characterization/req/ReqSketchAccuracyProfile.java b/src/main/java/org/apache/datasketches/characterization/req/ReqSketchAccuracyProfile.java index 5e1bba4..39425e0 100644 --- a/src/main/java/org/apache/datasketches/characterization/req/ReqSketchAccuracyProfile.java +++ b/src/main/java/org/apache/datasketches/characterization/req/ReqSketchAccuracyProfile.java @@ -30,7 +30,9 @@ import org.apache.datasketches.JobProfile; import org.apache.datasketches.MonotonicPoints; import org.apache.datasketches.Properties; import org.apache.datasketches.characterization.Shuffle; -import org.apache.datasketches.characterization.req.StreamMaker.Pattern; +import org.apache.datasketches.characterization.StreamMaker; +import org.apache.datasketches.characterization.StreamMaker.Pattern; +import org.apache.datasketches.characterization.TrueRanks; import org.apache.datasketches.hll.HllSketch; import org.apache.datasketches.quantiles.DoublesSketch; import org.apache.datasketches.quantiles.DoublesSketchBuilder; @@ -93,7 +95,7 @@ public class ReqSketchAccuracyProfile implements JobProfile { private HllSketch[] errHllSkArr; //Specific to a streamLength - private TrueFloatRanks trueRanks; + private TrueRanks trueRanks; //The entire stream private float[] stream; //a shuffled array of values from 1...N private float[] sortedStream; @@ -237,11 +239,11 @@ public class ReqSketchAccuracyProfile implements JobProfile { stream = streamMaker.makeStream(streamLength, pattern, offset); //compute true ranks if (ltEq) { - trueRanks = new TrueFloatRanks(stream, true); + trueRanks = new TrueRanks(stream, true); } else { - trueRanks = new TrueFloatRanks(stream, false); + trueRanks = new TrueRanks(stream, false); } - sortedStream = trueRanks.getSortedStream(); + sortedStream = trueRanks.getSortedFloatStream(); sortedAbsRanks = trueRanks.getSortedAbsRanks(); //compute the true values used at the plot points diff --git a/src/main/java/org/apache/datasketches/characterization/req/ReqSketchAccuracyProfile2.java b/src/main/java/org/apache/datasketches/characterization/req/ReqSketchAccuracyProfile2.java index bb9e982..b9237e2 100644 --- a/src/main/java/org/apache/datasketches/characterization/req/ReqSketchAccuracyProfile2.java +++ b/src/main/java/org/apache/datasketches/characterization/req/ReqSketchAccuracyProfile2.java @@ -25,7 +25,9 @@ import static org.apache.datasketches.quantilescommon.QuantilesUtil.evenlySpaced import org.apache.datasketches.Job; import org.apache.datasketches.JobProfile; import org.apache.datasketches.Properties; -import org.apache.datasketches.characterization.req.StreamMaker.Pattern; +import org.apache.datasketches.characterization.StreamMaker; +import org.apache.datasketches.characterization.StreamMaker.Pattern; +import org.apache.datasketches.characterization.TrueRanks; import org.apache.datasketches.quantiles.DoublesSketch; import org.apache.datasketches.quantiles.DoublesSketchBuilder; import org.apache.datasketches.quantiles.UpdateDoublesSketch; @@ -71,7 +73,7 @@ public class ReqSketchAccuracyProfile2 implements JobProfile { //Specific to the stream private StreamMaker streamMaker; - private TrueFloatRanks trueRanks; + private TrueRanks trueRanks; private float[] sortedPPValues; private int[] sortedPPIndices; private int[] sortedPPAbsRanks; @@ -152,9 +154,9 @@ public class ReqSketchAccuracyProfile2 implements JobProfile { streamMaker = new StreamMaker(); final float[] stream = streamMaker.makeStream(N, pattern, offset); if (ltEq) { - trueRanks = new TrueFloatRanks(stream, true); + trueRanks = new TrueRanks(stream, true); } else { - trueRanks = new TrueFloatRanks(stream, false); + trueRanks = new TrueRanks(stream, false); } } @@ -163,7 +165,7 @@ public class ReqSketchAccuracyProfile2 implements JobProfile { sortedPPAbsRanks = new int[numPlotPoints]; sortedPPValues = new float[numPlotPoints]; final int[] sortedAbsRanks = trueRanks.getSortedAbsRanks(); - final float[] sortedStream = trueRanks.getSortedStream(); + final float[] sortedStream = trueRanks.getSortedFloatStream(); final int minIdx = (int)Math.round((double)(N - 1) / numPlotPoints); final float[] temp = evenlySpacedFloats(minIdx, N - 1, numPlotPoints); //indices @@ -215,10 +217,10 @@ public class ReqSketchAccuracyProfile2 implements JobProfile { void doTrial() { //for all plot points sk.reset(); - final float[] stream = trueRanks.getStream(); + final float[] stream = trueRanks.getFloatStream(); for (int i = 0; i < N; i++) { sk.update(stream[i]); } - final float[] sortedStream = trueRanks.getSortedStream(); + final float[] sortedStream = trueRanks.getSortedFloatStream(); final int[] sortedAbsRanks = trueRanks.getSortedAbsRanks(); int pp = 0; diff --git a/src/main/java/org/apache/datasketches/characterization/req/TrueFloatRanks.java b/src/main/java/org/apache/datasketches/characterization/req/TrueFloatRanks.java deleted file mode 100644 index c45feb8..0000000 --- a/src/main/java/org/apache/datasketches/characterization/req/TrueFloatRanks.java +++ /dev/null @@ -1,172 +0,0 @@ -/* - * Licensed to the Apache Software Foundation (ASF) under one - * or more contributor license agreements. See the NOTICE file - * distributed with this work for additional information - * regarding copyright ownership. The ASF licenses this file - * to you under the Apache License, Version 2.0 (the - * "License"); you may not use this file except in compliance - * with the License. You may obtain a copy of the License at - * - * http://www.apache.org/licenses/LICENSE-2.0 - * - * Unless required by applicable law or agreed to in writing, - * software distributed under the License is distributed on an - * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY - * KIND, either express or implied. See the License for the - * specific language governing permissions and limitations - * under the License. - */ - -package org.apache.datasketches.characterization.req; - -import java.util.Arrays; - -import org.apache.datasketches.characterization.Shuffle; -import org.apache.datasketches.quantilescommon.BinarySearch; -import org.testng.annotations.Test; - -/** - * Given an array of values, these methods compute the true rank (mass) of - * each value of the array based on the comparison criterion. - * The mass or rank of each value is the fractional number of elements of the array that satisfy - * the criterion. - * - * @author Lee Rhodes - */ -public class TrueFloatRanks { - private static final String LS = System.getProperty("line.separator"); - private boolean ltEq; - private int length; - private float[] stream; - private float[] sortedStream; - private int[] sortedAbsRanks; - private int[] streamAbsRanks; - - TrueFloatRanks() { } //for TestNG - - public TrueFloatRanks(final float[] stream, final boolean ltEq) { - this.stream = stream; - this.ltEq = ltEq; - compute(); - } - - public float getMinValue() { return sortedStream[0]; } - - public float getMaxValue() { return sortedStream[length - 1]; } - - public int getMinAbsRank() { return sortedAbsRanks[0]; } - - public int getMaxAbsRank() { return sortedAbsRanks[length - 1]; } - - public float[] getStream() { return stream; } - - public float[] getSortedStream() { return sortedStream; } - - public int[] getSortedAbsRanks() { return sortedAbsRanks; } - - public int[] getStreamAbsRanks() { return streamAbsRanks; } - - public double[] getSortedRelRanks() { - return relativeRank(sortedAbsRanks); - } - - public double[] getStreamRelRanks() { - return relativeRank(streamAbsRanks); - } - - public int getAbsRank(final float v) { - final int idx = BinarySearch.find(sortedStream, 0, length - 1, v); - return sortedAbsRanks[idx]; - } - - /** - * Sorts the stream, then computes the sortedAbsRanks based on the comparison criterion. - */ - private void compute() { - length = stream.length; - sortedStream = stream.clone(); - Arrays.sort(sortedStream); - sortedAbsRanks = new int[length]; - if (ltEq) { //LE - sortedAbsRanks[length - 1] = length; - int i = length - 2; - while (i >= 0) { //goes backwards - if (sortedStream[i] == sortedStream[i + 1]) { sortedAbsRanks[i] = sortedAbsRanks[i + 1]; } - else { sortedAbsRanks[i] = i + 1; } - i--; - } - } else { // LT - sortedAbsRanks[0] = 0; - int i = 1; - while (i < length) { //forwards - if (sortedStream[i - 1] == sortedStream[i]) { sortedAbsRanks[i] = sortedAbsRanks[i - 1]; } - else { sortedAbsRanks[i] = i; } - i++; - } - } - streamAbsRanks = new int[length]; //put the ranks in original stream order - for (int j = 0; j < length; j++) { - final int idx = BinarySearch.find(sortedStream, 0, length - 1, stream[j]); - streamAbsRanks[j] = sortedAbsRanks[idx]; - } - } - - /** - * Converts an absolute rank array to a relative rank array. - * @param absRankArr the absolute rank array to be converted. - * @return the relative rank array. - */ - public static double[] relativeRank(final int[] absRankArr) { - final int length = absRankArr.length; - final double[] relRank = new double[length]; - for (int i = 0; i < length; i++) { relRank[i] = (double)absRankArr[i] / length; } - return relRank; - } - - @Test - public void checkRanks() { - final float[] vArr = { 5, 5, 5, 6, 6, 6, 7, 8, 8, 8 }; - checkRanksImpl(vArr); - println(LS + "SHUFFLED:"); - Shuffle.shuffle(vArr); - checkRanksImpl(vArr); - } - - private static void checkRanksImpl(final float[] vArr) { - final StringBuilder sb = new StringBuilder(); - final String ffmt = "%5.1f "; - final String dfmt = "%5d "; - TrueFloatRanks trueRanks; - - final int N = vArr.length; - sb.append("Values:").append(LS); - for (int i = 0; i < N; i++) { sb.append(String.format(ffmt, vArr[i])); } - sb.append(LS); - - trueRanks = new TrueFloatRanks(vArr, false); - sb.append("LT Abs Ranks:").append(LS); - int[] absArr = trueRanks.getStreamAbsRanks(); - for (int i = 0; i < N; i++) { sb.append(String.format(dfmt, absArr[i])); } - sb.append(LS); - sb.append("LT Rel Ranks:").append(LS); - double[] relArr = relativeRank(absArr); - for (int i = 0; i < N; i++) { sb.append(String.format(ffmt, relArr[i])); } - sb.append(LS); - - trueRanks = new TrueFloatRanks(vArr, true); - sb.append("LE Abs Ranks:").append(LS); - absArr = trueRanks.getStreamAbsRanks(); - for (int i = 0; i < N; i++) { sb.append(String.format(dfmt, absArr[i])); } - sb.append(LS); - sb.append("LE Rel Ranks:").append(LS); - relArr = relativeRank(absArr); - for (int i = 0; i < N; i++) { sb.append(String.format(ffmt, relArr[i])); } - sb.append(LS); - println(sb.toString()); - } - - private static void println(final Object o) { - System.out.println(o.toString()); - } - -} diff --git a/src/main/java/org/apache/datasketches/characterization/tdigest/TDigestAccuracyProfile.java b/src/main/java/org/apache/datasketches/characterization/tdigest/TDigestAccuracyProfile.java index 0598ca4..ed30c94 100644 --- a/src/main/java/org/apache/datasketches/characterization/tdigest/TDigestAccuracyProfile.java +++ b/src/main/java/org/apache/datasketches/characterization/tdigest/TDigestAccuracyProfile.java @@ -17,149 +17,149 @@ * under the License. */ -package org.apache.datasketches.characterization.tdigest; - -import static org.apache.datasketches.common.Util.pwr2SeriesNext; - -import java.util.Arrays; -import java.util.Random; - -import org.apache.datasketches.Job; -import org.apache.datasketches.JobProfile; -import org.apache.datasketches.MonotonicPoints; -import org.apache.datasketches.Properties; - -import com.tdunning.math.stats.TDigest; - -public class TDigestAccuracyProfile implements JobProfile { - private Job job; - private Properties prop; - - private Random rand; - private double[] values; - - @Override - public void start(final Job job) { - this.job = job; - prop = job.getProperties(); - final int lgMin = Integer.parseInt(prop.mustGet("LgMin")); - final int lgMax = Integer.parseInt(prop.mustGet("LgMax")); - final int lgDelta = Integer.parseInt(prop.mustGet("LgDelta")); - final int ppo = Integer.parseInt(prop.mustGet("PPO")); - - final int compression = Integer.parseInt(prop.mustGet("Compression")); - final double[] ranks = {0.01, 0.05, 0.5, 0.95, 0.99}; - final double errorPercentile = Double.parseDouble(prop.mustGet("ErrorPercentile")); - - rand = new Random(); - values = new double[1 << lgMax]; - - final int numSteps; - final boolean useppo; - if (lgDelta < 1) { - numSteps = MonotonicPoints.countPoints(lgMin, lgMax, ppo); - useppo = true; - } else { - numSteps = (lgMax - lgMin) / lgDelta + 1; - useppo = false; - } - - int streamLength = 1 << lgMin; - int lgCurSL = lgMin; - for (int step = 0; step < numSteps; step++) { - - doStreamLength(streamLength, compression, ranks, errorPercentile); - - if (useppo) { - streamLength = (int)pwr2SeriesNext(ppo, streamLength); - } else { - lgCurSL += lgDelta; - streamLength = 1 << lgCurSL; - } - } - } - - @Override - public void shutdown() {} - - @Override - public void cleanup() {} - - void doStreamLength(final int streamLength, final int compression, final double[] ranks, final double errorPercentile) { - final int numTrials = 1 << Integer.parseInt(prop.mustGet("LgTrials")); - - double[][] rankErrors = new double[ranks.length][]; - for (int i = 0; i < ranks.length; i++) rankErrors[i] = new double[numTrials]; - - for (int t = 0; t < numTrials; t++) { - runTrial(t, streamLength, compression, ranks, rankErrors); - } - - job.print(streamLength); - for (int i = 0; i < ranks.length; i++) { - Arrays.sort(rankErrors[i]); - final int errPctIndex = (int)(numTrials * errorPercentile / 100); - final double rankErrorAtPct = rankErrors[i][errPctIndex]; - job.print("\t"); - job.print(rankErrorAtPct * 100); - } - job.print("\n"); - } - - void runTrial(final int trial, final int streamLength, final int compression, final double[] ranks, final double[][] rankErrors) { - TDigest td = TDigest.createDigest(compression); - for (int i = 0; i < streamLength; i++) { - values[i] = rand.nextDouble(); - td.add(values[i]); - } - Arrays.sort(values, 0, streamLength); - for (int i = 0; i < ranks.length; i++) { - final double quantile = values[(int)((streamLength - 1) * ranks[i])]; - final double trueRank = computeTrueRank(values, streamLength, quantile); - rankErrors[i][trial] = Math.abs(trueRank - td.cdf(quantile)); - } - } - - static double computeTrueRank(final double[] values, final int streamLength, final double value) { - final int lower = lowerBound(values, 0, streamLength, value); - final int upper = upperBound(values, lower, streamLength, value); - return (lower + upper) / 2.0 / streamLength; - } - - static int lowerBound(final double[] values, int first, final int last, final double value) { - int current; - int step; - int count = last - first; - while (count > 0) { - current = first; - step = count / 2; - current += step; - if (values[current] < value) { - first = ++current; - count -= step + 1; - } else { - count = step; - } - } - return first; - } - - static int upperBound(final double[] values, int first, final int last, final double value) { - int current; - int step; - int count = last - first; - while (count > 0) { - current = first; - step = count / 2; - current += step; - if (!(value < values[current])) { - first = ++current; - count -= step + 1; - } else { - count = step; - } - } - return first; - } - -} +//package org.apache.datasketches.characterization.tdigest; +// +//import static org.apache.datasketches.common.Util.pwr2SeriesNext; +// +//import java.util.Arrays; +//import java.util.Random; +// +//import org.apache.datasketches.Job; +//import org.apache.datasketches.JobProfile; +//import org.apache.datasketches.MonotonicPoints; +//import org.apache.datasketches.Properties; +// +//import com.tdunning.math.stats.TDigest; +// +//public class TDigestAccuracyProfile implements JobProfile { +// private Job job; +// private Properties prop; +// +// private Random rand; +// private double[] values; +// +// @Override +// public void start(final Job job) { +// this.job = job; +// prop = job.getProperties(); +// final int lgMin = Integer.parseInt(prop.mustGet("LgMin")); +// final int lgMax = Integer.parseInt(prop.mustGet("LgMax")); +// final int lgDelta = Integer.parseInt(prop.mustGet("LgDelta")); +// final int ppo = Integer.parseInt(prop.mustGet("PPO")); +// +// final int compression = Integer.parseInt(prop.mustGet("Compression")); +// final double[] ranks = {0.01, 0.05, 0.5, 0.95, 0.99}; +// final double errorPercentile = Double.parseDouble(prop.mustGet("ErrorPercentile")); +// +// rand = new Random(); +// values = new double[1 << lgMax]; +// +// final int numSteps; +// final boolean useppo; +// if (lgDelta < 1) { +// numSteps = MonotonicPoints.countPoints(lgMin, lgMax, ppo); +// useppo = true; +// } else { +// numSteps = (lgMax - lgMin) / lgDelta + 1; +// useppo = false; +// } +// +// int streamLength = 1 << lgMin; +// int lgCurSL = lgMin; +// for (int step = 0; step < numSteps; step++) { +// +// doStreamLength(streamLength, compression, ranks, errorPercentile); +// +// if (useppo) { +// streamLength = (int)pwr2SeriesNext(ppo, streamLength); +// } else { +// lgCurSL += lgDelta; +// streamLength = 1 << lgCurSL; +// } +// } +// } +// +// @Override +// public void shutdown() {} +// +// @Override +// public void cleanup() {} +// +// void doStreamLength(final int streamLength, final int compression, final double[] ranks, final double errorPercentile) { +// final int numTrials = 1 << Integer.parseInt(prop.mustGet("LgTrials")); +// +// double[][] rankErrors = new double[ranks.length][]; +// for (int i = 0; i < ranks.length; i++) rankErrors[i] = new double[numTrials]; +// +// for (int t = 0; t < numTrials; t++) { +// runTrial(t, streamLength, compression, ranks, rankErrors); +// } +// +// job.print(streamLength); +// for (int i = 0; i < ranks.length; i++) { +// Arrays.sort(rankErrors[i]); +// final int errPctIndex = (int)(numTrials * errorPercentile / 100); +// final double rankErrorAtPct = rankErrors[i][errPctIndex]; +// job.print("\t"); +// job.print(rankErrorAtPct * 100); +// } +// job.print("\n"); +// } +// +// void runTrial(final int trial, final int streamLength, final int compression, final double[] ranks, final double[][] rankErrors) { +// TDigest td = TDigest.createDigest(compression); +// for (int i = 0; i < streamLength; i++) { +// values[i] = rand.nextDouble(); +// td.add(values[i]); +// } +// Arrays.sort(values, 0, streamLength); +// for (int i = 0; i < ranks.length; i++) { +// final double quantile = values[(int)((streamLength - 1) * ranks[i])]; +// final double trueRank = computeTrueRank(values, streamLength, quantile); +// rankErrors[i][trial] = Math.abs(trueRank - td.cdf(quantile)); +// } +// } +// +// static double computeTrueRank(final double[] values, final int streamLength, final double value) { +// final int lower = lowerBound(values, 0, streamLength, value); +// final int upper = upperBound(values, lower, streamLength, value); +// return (lower + upper) / 2.0 / streamLength; +// } +// +// static int lowerBound(final double[] values, int first, final int last, final double value) { +// int current; +// int step; +// int count = last - first; +// while (count > 0) { +// current = first; +// step = count / 2; +// current += step; +// if (values[current] < value) { +// first = ++current; +// count -= step + 1; +// } else { +// count = step; +// } +// } +// return first; +// } +// +// static int upperBound(final double[] values, int first, final int last, final double value) { +// int current; +// int step; +// int count = last - first; +// while (count > 0) { +// current = first; +// step = count / 2; +// current += step; +// if (!(value < values[current])) { +// first = ++current; +// count -= step + 1; +// } else { +// count = step; +// } +// } +// return first; +// } +// +//} diff --git a/src/main/java/org/apache/datasketches/characterization/tdigest/TDigestErrorVsRankProfile.java b/src/main/java/org/apache/datasketches/characterization/tdigest/TDigestErrorVsRankProfile.java index 966b6a8..4551caa 100644 --- a/src/main/java/org/apache/datasketches/characterization/tdigest/TDigestErrorVsRankProfile.java +++ b/src/main/java/org/apache/datasketches/characterization/tdigest/TDigestErrorVsRankProfile.java @@ -17,295 +17,295 @@ * under the License. */ -package org.apache.datasketches.characterization.tdigest; - -import static java.lang.Math.round; -import static org.apache.datasketches.GaussianRanks.GAUSSIANS_3SD; -import static org.apache.datasketches.SpacedPoints.expSpaced; -import static org.apache.datasketches.common.Util.pwr2SeriesNext; -import static org.apache.datasketches.quantilescommon.QuantilesUtil.evenlySpacedDoubles; - -import org.apache.datasketches.Job; -import org.apache.datasketches.JobProfile; -import org.apache.datasketches.MonotonicPoints; -import org.apache.datasketches.Properties; -import org.apache.datasketches.characterization.Shuffle; -import org.apache.datasketches.characterization.req.StreamMaker; -import org.apache.datasketches.characterization.req.StreamMaker.Pattern; -import org.apache.datasketches.hll.HllSketch; -import org.apache.datasketches.quantiles.DoublesSketch; -import org.apache.datasketches.quantiles.DoublesSketchBuilder; -import org.apache.datasketches.quantiles.UpdateDoublesSketch; - -import com.tdunning.math.stats.TDigest; - -/** - * @author Lee Rhodes - */ -public class TDigestErrorVsRankProfile implements JobProfile { - private Job job; - private Properties prop; - - //FROM PROPERTIES - //Stream pattern config - StreamMaker streamMaker = new StreamMaker(); - private Pattern pattern; - private int offset; - - //For computing the different stream lengths - private int lgMin; - private int lgMax; - private int lgDelta; - private int ppo; //not currently used - - private int numTrials; //num of Trials per plotPoint - private int errQSkLgK; //size of the error quantiles sketches - private int errHllSkLgK; //size of the error HLL sketch - private boolean shuffle; //if true, shuffle for each trial - - //plotting & x-axis configuration - private int numPlotPoints; - private boolean evenlySpaced; - private double exponent; - private double rankRange; - private double metricsRankRange; - - //Target sketch configuration & error analysis - private int K; - private TDigest sk; - - //The array of Gaussian quantiles for +/- StdDev error analysis - private double[] gRanks; - private UpdateDoublesSketch[] errQSkArr; - private HllSketch[] errHllSkArr; - - //Specific to a streamLength - private TrueDoubleRanks trueRanks; - //The entire stream - private float[] stream; //a shuffled array of values from 1...N - private float[] sortedStream; - private int[] sortedAbsRanks; - //private int[] streamAbsRanks ?? do we need? - //The PP points - private float[] sortedPPValues; - private int[] sortedPPIndices; - private int[] sortedPPAbsRanks; - int sumAllocCounts = 0; - - private final String[] columnLabels = - {"nPP", "Value", "Rank", - "-3SD","-2SD", "-1SD", "Med", "+1SD", "+2SD", "+3SD", - "1LB", "1UB", "UErrCnt"}; - private final String sFmt = - "%3s\t%5s\t%4s\t" - + "%4s\t%4s\t%4s\t%5s\t%4s\t%4s\t%4s\t" - + "%3s\t%3s\t%7s\n"; - private final String fFmt = - "%f\t%f\t%f\t" // rPP, Value, Rank - + "%f\t%f\t%f\t%f\t%f\t%f\t%f\t" //-3sd to +3sd - + "\t%d\n"; // UErrCnt - - //JobProfile interface - @Override - public void start(final Job job) { - this.job = job; - prop = job.getProperties(); - extractProperties(); - configureCommon(); - doStreamLengths(); - } - - @Override - public void shutdown() {} - - @Override - public void cleanup() {} - //end JobProfile - - private void extractProperties() { - //Stream Pattern - pattern = Pattern.valueOf(prop.mustGet("Pattern")); - offset = Integer.parseInt(prop.mustGet("Offset")); - //Stream lengths - lgMin = Integer.parseInt(prop.mustGet("LgMin")); - lgMax = Integer.parseInt(prop.mustGet("LgMax")); - lgDelta = Integer.parseInt(prop.mustGet("LgDelta")); - ppo = Integer.parseInt(prop.mustGet("PPO")); - // Trials config (independent of sketch) - numTrials = 1 << Integer.parseInt(prop.mustGet("LgTrials")); - errQSkLgK = Integer.parseInt(prop.mustGet("ErrQSkLgK")); - errHllSkLgK = Integer.parseInt(prop.mustGet("ErrHllSkLgK")); - shuffle = Boolean.valueOf(prop.mustGet("Shuffle")); - //plotting - numPlotPoints = Integer.parseInt(prop.mustGet("NumPlotPoints")); - evenlySpaced = Boolean.valueOf(prop.mustGet("EvenlySpaced")); - exponent = Double.parseDouble(prop.mustGet("Exponent")); - rankRange = Double.parseDouble(prop.mustGet("RankRange")); - //Target sketch config - K = Integer.parseInt(prop.mustGet("K")); - - metricsRankRange = Double.parseDouble(prop.mustGet("MetricsRankRange")); - } - - void configureCommon() { - configureSketch(); - errQSkArr = new UpdateDoublesSketch[numPlotPoints]; - errHllSkArr = new HllSketch[numPlotPoints]; - //configure the error quantiles array & HLL sketch arr - final DoublesSketchBuilder builder = DoublesSketch.builder().setK(1 << errQSkLgK); - for (int i = 0; i < numPlotPoints; i++) { - errQSkArr[i] = builder.build(); - errHllSkArr[i] = new HllSketch(errHllSkLgK); - } - gRanks = new double[GAUSSIANS_3SD.length - 2]; //omit 0.0 and 1.0 - for (int i = 1; i < GAUSSIANS_3SD.length - 1; i++) { - gRanks[i - 1] = GAUSSIANS_3SD[i]; - } - } - - void configureSketch() { - } - - private void doStreamLengths() { - //compute the number of stream lengths for the whole job - final int numSteps; - final boolean useppo; - if (lgDelta < 1) { - numSteps = MonotonicPoints.countPoints(lgMin, lgMax, ppo); - useppo = true; - } else { - numSteps = (lgMax - lgMin) / lgDelta + 1; - useppo = false; - } - - int streamLength = 1 << lgMin; //initial streamLength - int lgCurSL = lgMin; - - // Step through the different stream lengths - for (int step = 0; step < numSteps; step++) { - - doStreamLength(streamLength); - - //go to next stream length - if (useppo) { - streamLength = (int)pwr2SeriesNext(ppo, streamLength); - } else { - lgCurSL += lgDelta; - streamLength = 1 << lgCurSL; - } - } - } - - void doStreamLength(final int streamLength) { - job.println(LS + "Stream Length: " + streamLength ); - job.println(LS + "param k: " + K ); - job.printfData(sFmt, (Object[])columnLabels); - //build the stream - stream = streamMaker.makeStream(streamLength, pattern, offset); - //compute true ranks - trueRanks = new TrueDoubleRanks(stream); - sortedStream = trueRanks.getSortedStream(); - sortedAbsRanks = trueRanks.getSortedAbsRanks(); - - //compute the true values used at the plot points - int startIdx = 0; - int endIdx = streamLength - 1; - final boolean hra = true; - if (rankRange < 1.0) { //A substream of points focuses on a sub-range at one end. - final int subStreamLen = (int)Math.round(rankRange * streamLength); - startIdx = hra ? streamLength - subStreamLen : 0; - endIdx = hra ? streamLength - 1 : subStreamLen - 1; - } - - //generates PP indices in [startIdx, endIdx] inclusive, inclusive - // PV 2020-01-07: using double so that there's enough precision even for large stream lengths - final double[] temp = evenlySpaced - ? evenlySpacedDoubles(startIdx, endIdx, numPlotPoints) - : expSpaced(startIdx, endIdx, numPlotPoints, exponent, hra); - - sortedPPIndices = new int[numPlotPoints]; - sortedPPAbsRanks = new int[numPlotPoints]; - sortedPPValues = new float[numPlotPoints]; - - for (int pp = 0; pp < numPlotPoints; pp++) { - final int idx = (int)Math.round(temp[pp]); - sortedPPIndices[pp] = idx; - sortedPPAbsRanks[pp] = sortedAbsRanks[idx]; - sortedPPValues[pp] = sortedStream[idx]; - } - - //Do numTrials for all plotpoints - for (int t = 0; t < numTrials; t++) { - doTrial(); - - //sumAllocCounts = sk. - } - - // for special metrics for capturing accuracy per byte - double sumRelStdDev = 0; - int numRelStdDev = 0; - double sumAddStdDev = 0; - int numAddStdDev = 0; - - //at this point each of the errQSkArr sketches has a distribution of error from numTrials - for (int pp = 0 ; pp < numPlotPoints; pp++) { - final double v = sortedPPValues[pp]; - final double tr = v / streamLength; //the true rank - - //for each of the numErrDistRanks distributions extract the sd Gaussian quantiles - final double[] errQ = errQSkArr[pp].getQuantiles(gRanks); - final int uErrCnt = (int)round(errHllSkArr[pp].getEstimate()); - - //Plot the row. - final double relPP = (double)(pp + 1) / numPlotPoints; - job.printfData(fFmt, relPP, v, tr, - errQ[0], errQ[1], errQ[2], errQ[3], errQ[4], errQ[5], errQ[6], - uErrCnt); - - if (relPP > 0 && relPP < 1 - && (hra && relPP < metricsRankRange || !hra && relPP >= 1 - metricsRankRange)) { - sumAddStdDev += errQ[4]; - numAddStdDev++; - } - if (relPP > 0 && relPP < 1 - && (!hra && relPP < metricsRankRange || hra && relPP >= 1 - metricsRankRange)) { - sumRelStdDev += errQ[4] / (hra ? 1 - relPP : relPP); - numRelStdDev++; - } - errQSkArr[pp].reset(); //reset the errQSkArr for next streamLength - errHllSkArr[pp].reset(); //reset the errHllSkArr for next streamLength - } - final int serBytes = sk.smallByteSize(); - - // special metrics for capturing accuracy per byte - final double avgRelStdDevTimesSize = serBytes * sumRelStdDev / numRelStdDev; - final double avgAddStdDevTimesSize = serBytes * sumAddStdDev / numAddStdDev; - job.println(LS + "Avg. relative std. dev. times size: " + avgRelStdDevTimesSize); - job.println( "Avg. additive std. dev. times size: " + avgAddStdDevTimesSize); - - job.println(LS + "Serialization Bytes: " + serBytes); -// job.println(sk.viewCompactorDetail("%5.0f", false)); - } - - /** - * A trial consists of updating a virgin sketch with a stream of values. - * Capture the estimated ranks for all plotPoints and then update the errQSkArr with those - * error values. - */ - void doTrial() { - sk = TDigest.createDigest(K); - if (shuffle) { Shuffle.shuffle(stream); } - final int sl = stream.length; - for (int i = 0; i < sl; i++) { sk.add(stream[i]); } - //get estimated ranks from sketch for all plot points - final double[] estRanks = new double[sortedPPValues.length]; - for (int i = 0; i < sortedPPValues.length; i++) estRanks[i] = sk.cdf(sortedPPValues[i]); - //compute errors and update HLL for each plotPoint - for (int pp = 0; pp < numPlotPoints; pp++) { - final double errorAtPlotPoint = estRanks[pp] - (double)sortedPPAbsRanks[pp] / sl; - errQSkArr[pp].update(errorAtPlotPoint); //update each of the errQArr sketches - errHllSkArr[pp].update(errorAtPlotPoint); //unique count of error values - } - } - -} +//package org.apache.datasketches.characterization.tdigest; +// +//import static java.lang.Math.round; +//import static org.apache.datasketches.GaussianRanks.GAUSSIANS_3SD; +//import static org.apache.datasketches.SpacedPoints.expSpaced; +//import static org.apache.datasketches.common.Util.pwr2SeriesNext; +//import static org.apache.datasketches.quantilescommon.QuantilesUtil.evenlySpacedDoubles; +// +//import org.apache.datasketches.Job; +//import org.apache.datasketches.JobProfile; +//import org.apache.datasketches.MonotonicPoints; +//import org.apache.datasketches.Properties; +//import org.apache.datasketches.characterization.Shuffle; +//import org.apache.datasketches.characterization.req.StreamMaker; +//import org.apache.datasketches.characterization.req.StreamMaker.Pattern; +//import org.apache.datasketches.hll.HllSketch; +//import org.apache.datasketches.quantiles.DoublesSketch; +//import org.apache.datasketches.quantiles.DoublesSketchBuilder; +//import org.apache.datasketches.quantiles.UpdateDoublesSketch; +// +//import com.tdunning.math.stats.TDigest; +// +///** +// * @author Lee Rhodes +// */ +//public class TDigestErrorVsRankProfile implements JobProfile { +// private Job job; +// private Properties prop; +// +// //FROM PROPERTIES +// //Stream pattern config +// StreamMaker streamMaker = new StreamMaker(); +// private Pattern pattern; +// private int offset; +// +// //For computing the different stream lengths +// private int lgMin; +// private int lgMax; +// private int lgDelta; +// private int ppo; //not currently used +// +// private int numTrials; //num of Trials per plotPoint +// private int errQSkLgK; //size of the error quantiles sketches +// private int errHllSkLgK; //size of the error HLL sketch +// private boolean shuffle; //if true, shuffle for each trial +// +// //plotting & x-axis configuration +// private int numPlotPoints; +// private boolean evenlySpaced; +// private double exponent; +// private double rankRange; +// private double metricsRankRange; +// +// //Target sketch configuration & error analysis +// private int K; +// private TDigest sk; +// +// //The array of Gaussian quantiles for +/- StdDev error analysis +// private double[] gRanks; +// private UpdateDoublesSketch[] errQSkArr; +// private HllSketch[] errHllSkArr; +// +// //Specific to a streamLength +// private TrueDoubleRanks trueRanks; +// //The entire stream +// private float[] stream; //a shuffled array of values from 1...N +// private float[] sortedStream; +// private int[] sortedAbsRanks; +// //private int[] streamAbsRanks ?? do we need? +// //The PP points +// private float[] sortedPPValues; +// private int[] sortedPPIndices; +// private int[] sortedPPAbsRanks; +// int sumAllocCounts = 0; +// +// private final String[] columnLabels = +// {"nPP", "Value", "Rank", +// "-3SD","-2SD", "-1SD", "Med", "+1SD", "+2SD", "+3SD", +// "1LB", "1UB", "UErrCnt"}; +// private final String sFmt = +// "%3s\t%5s\t%4s\t" +// + "%4s\t%4s\t%4s\t%5s\t%4s\t%4s\t%4s\t" +// + "%3s\t%3s\t%7s\n"; +// private final String fFmt = +// "%f\t%f\t%f\t" // rPP, Value, Rank +// + "%f\t%f\t%f\t%f\t%f\t%f\t%f\t" //-3sd to +3sd +// + "\t%d\n"; // UErrCnt +// +// //JobProfile interface +// @Override +// public void start(final Job job) { +// this.job = job; +// prop = job.getProperties(); +// extractProperties(); +// configureCommon(); +// doStreamLengths(); +// } +// +// @Override +// public void shutdown() {} +// +// @Override +// public void cleanup() {} +// //end JobProfile +// +// private void extractProperties() { +// //Stream Pattern +// pattern = Pattern.valueOf(prop.mustGet("Pattern")); +// offset = Integer.parseInt(prop.mustGet("Offset")); +// //Stream lengths +// lgMin = Integer.parseInt(prop.mustGet("LgMin")); +// lgMax = Integer.parseInt(prop.mustGet("LgMax")); +// lgDelta = Integer.parseInt(prop.mustGet("LgDelta")); +// ppo = Integer.parseInt(prop.mustGet("PPO")); +// // Trials config (independent of sketch) +// numTrials = 1 << Integer.parseInt(prop.mustGet("LgTrials")); +// errQSkLgK = Integer.parseInt(prop.mustGet("ErrQSkLgK")); +// errHllSkLgK = Integer.parseInt(prop.mustGet("ErrHllSkLgK")); +// shuffle = Boolean.valueOf(prop.mustGet("Shuffle")); +// //plotting +// numPlotPoints = Integer.parseInt(prop.mustGet("NumPlotPoints")); +// evenlySpaced = Boolean.valueOf(prop.mustGet("EvenlySpaced")); +// exponent = Double.parseDouble(prop.mustGet("Exponent")); +// rankRange = Double.parseDouble(prop.mustGet("RankRange")); +// //Target sketch config +// K = Integer.parseInt(prop.mustGet("K")); +// +// metricsRankRange = Double.parseDouble(prop.mustGet("MetricsRankRange")); +// } +// +// void configureCommon() { +// configureSketch(); +// errQSkArr = new UpdateDoublesSketch[numPlotPoints]; +// errHllSkArr = new HllSketch[numPlotPoints]; +// //configure the error quantiles array & HLL sketch arr +// final DoublesSketchBuilder builder = DoublesSketch.builder().setK(1 << errQSkLgK); +// for (int i = 0; i < numPlotPoints; i++) { +// errQSkArr[i] = builder.build(); +// errHllSkArr[i] = new HllSketch(errHllSkLgK); +// } +// gRanks = new double[GAUSSIANS_3SD.length - 2]; //omit 0.0 and 1.0 +// for (int i = 1; i < GAUSSIANS_3SD.length - 1; i++) { +// gRanks[i - 1] = GAUSSIANS_3SD[i]; +// } +// } +// +// void configureSketch() { +// } +// +// private void doStreamLengths() { +// //compute the number of stream lengths for the whole job +// final int numSteps; +// final boolean useppo; +// if (lgDelta < 1) { +// numSteps = MonotonicPoints.countPoints(lgMin, lgMax, ppo); +// useppo = true; +// } else { +// numSteps = (lgMax - lgMin) / lgDelta + 1; +// useppo = false; +// } +// +// int streamLength = 1 << lgMin; //initial streamLength +// int lgCurSL = lgMin; +// +// // Step through the different stream lengths +// for (int step = 0; step < numSteps; step++) { +// +// doStreamLength(streamLength); +// +// //go to next stream length +// if (useppo) { +// streamLength = (int)pwr2SeriesNext(ppo, streamLength); +// } else { +// lgCurSL += lgDelta; +// streamLength = 1 << lgCurSL; +// } +// } +// } +// +// void doStreamLength(final int streamLength) { +// job.println(LS + "Stream Length: " + streamLength ); +// job.println(LS + "param k: " + K ); +// job.printfData(sFmt, (Object[])columnLabels); +// //build the stream +// stream = streamMaker.makeStream(streamLength, pattern, offset); +// //compute true ranks +// trueRanks = new TrueDoubleRanks(stream); +// sortedStream = trueRanks.getSortedStream(); +// sortedAbsRanks = trueRanks.getSortedAbsRanks(); +// +// //compute the true values used at the plot points +// int startIdx = 0; +// int endIdx = streamLength - 1; +// final boolean hra = true; +// if (rankRange < 1.0) { //A substream of points focuses on a sub-range at one end. +// final int subStreamLen = (int)Math.round(rankRange * streamLength); +// startIdx = hra ? streamLength - subStreamLen : 0; +// endIdx = hra ? streamLength - 1 : subStreamLen - 1; +// } +// +// //generates PP indices in [startIdx, endIdx] inclusive, inclusive +// // PV 2020-01-07: using double so that there's enough precision even for large stream lengths +// final double[] temp = evenlySpaced +// ? evenlySpacedDoubles(startIdx, endIdx, numPlotPoints) +// : expSpaced(startIdx, endIdx, numPlotPoints, exponent, hra); +// +// sortedPPIndices = new int[numPlotPoints]; +// sortedPPAbsRanks = new int[numPlotPoints]; +// sortedPPValues = new float[numPlotPoints]; +// +// for (int pp = 0; pp < numPlotPoints; pp++) { +// final int idx = (int)Math.round(temp[pp]); +// sortedPPIndices[pp] = idx; +// sortedPPAbsRanks[pp] = sortedAbsRanks[idx]; +// sortedPPValues[pp] = sortedStream[idx]; +// } +// +// //Do numTrials for all plotpoints +// for (int t = 0; t < numTrials; t++) { +// doTrial(); +// +// //sumAllocCounts = sk. +// } +// +// // for special metrics for capturing accuracy per byte +// double sumRelStdDev = 0; +// int numRelStdDev = 0; +// double sumAddStdDev = 0; +// int numAddStdDev = 0; +// +// //at this point each of the errQSkArr sketches has a distribution of error from numTrials +// for (int pp = 0 ; pp < numPlotPoints; pp++) { +// final double v = sortedPPValues[pp]; +// final double tr = v / streamLength; //the true rank +// +// //for each of the numErrDistRanks distributions extract the sd Gaussian quantiles +// final double[] errQ = errQSkArr[pp].getQuantiles(gRanks); +// final int uErrCnt = (int)round(errHllSkArr[pp].getEstimate()); +// +// //Plot the row. +// final double relPP = (double)(pp + 1) / numPlotPoints; +// job.printfData(fFmt, relPP, v, tr, +// errQ[0], errQ[1], errQ[2], errQ[3], errQ[4], errQ[5], errQ[6], +// uErrCnt); +// +// if (relPP > 0 && relPP < 1 +// && (hra && relPP < metricsRankRange || !hra && relPP >= 1 - metricsRankRange)) { +// sumAddStdDev += errQ[4]; +// numAddStdDev++; +// } +// if (relPP > 0 && relPP < 1 +// && (!hra && relPP < metricsRankRange || hra && relPP >= 1 - metricsRankRange)) { +// sumRelStdDev += errQ[4] / (hra ? 1 - relPP : relPP); +// numRelStdDev++; +// } +// errQSkArr[pp].reset(); //reset the errQSkArr for next streamLength +// errHllSkArr[pp].reset(); //reset the errHllSkArr for next streamLength +// } +// final int serBytes = sk.smallByteSize(); +// +// // special metrics for capturing accuracy per byte +// final double avgRelStdDevTimesSize = serBytes * sumRelStdDev / numRelStdDev; +// final double avgAddStdDevTimesSize = serBytes * sumAddStdDev / numAddStdDev; +// job.println(LS + "Avg. relative std. dev. times size: " + avgRelStdDevTimesSize); +// job.println( "Avg. additive std. dev. times size: " + avgAddStdDevTimesSize); +// +// job.println(LS + "Serialization Bytes: " + serBytes); +//// job.println(sk.viewCompactorDetail("%5.0f", false)); +// } +// +// /** +// * A trial consists of updating a virgin sketch with a stream of values. +// * Capture the estimated ranks for all plotPoints and then update the errQSkArr with those +// * error values. +// */ +// void doTrial() { +// sk = TDigest.createDigest(K); +// if (shuffle) { Shuffle.shuffle(stream); } +// final int sl = stream.length; +// for (int i = 0; i < sl; i++) { sk.add(stream[i]); } +// //get estimated ranks from sketch for all plot points +// final double[] estRanks = new double[sortedPPValues.length]; +// for (int i = 0; i < sortedPPValues.length; i++) estRanks[i] = sk.cdf(sortedPPValues[i]); +// //compute errors and update HLL for each plotPoint +// for (int pp = 0; pp < numPlotPoints; pp++) { +// final double errorAtPlotPoint = estRanks[pp] - (double)sortedPPAbsRanks[pp] / sl; +// errQSkArr[pp].update(errorAtPlotPoint); //update each of the errQArr sketches +// errHllSkArr[pp].update(errorAtPlotPoint); //unique count of error values +// } +// } +// +//} diff --git a/src/main/java/org/apache/datasketches/characterization/tdigest/TDigestMergeAccuracyProfile.java b/src/main/java/org/apache/datasketches/characterization/tdigest/TDigestMergeAccuracyProfile.java index 15fdd3d..999fd1b 100644 --- a/src/main/java/org/apache/datasketches/characterization/tdigest/TDigestMergeAccuracyProfile.java +++ b/src/main/java/org/apache/datasketches/characterization/tdigest/TDigestMergeAccuracyProfile.java @@ -17,157 +17,157 @@ * under the License. */ -package org.apache.datasketches.characterization.tdigest; - -import static org.apache.datasketches.common.Util.pwr2SeriesNext; - -import java.util.ArrayList; -import java.util.Arrays; -import java.util.Random; - -import org.apache.datasketches.Job; -import org.apache.datasketches.JobProfile; -import org.apache.datasketches.MonotonicPoints; -import org.apache.datasketches.Properties; - -import com.tdunning.math.stats.TDigest; - -public class TDigestMergeAccuracyProfile implements JobProfile { - private Job job; - private Properties prop; - - private Random rand; - private double[] values; - - @Override - public void start(final Job job) { - this.job = job; - prop = job.getProperties(); - final int lgMin = Integer.parseInt(prop.mustGet("LgMin")); - final int lgMax = Integer.parseInt(prop.mustGet("LgMax")); - final int lgDelta = Integer.parseInt(prop.mustGet("LgDelta")); - final int ppo = Integer.parseInt(prop.mustGet("PPO")); - - final int compression = Integer.parseInt(prop.mustGet("Compression")); - final double[] ranks = {0.01, 0.05, 0.5, 0.95, 0.99}; - final double errorPercentile = Double.parseDouble(prop.mustGet("ErrorPercentile")); - - rand = new Random(); - values = new double[1 << lgMax]; - - final int numSteps; - final boolean useppo; - if (lgDelta < 1) { - numSteps = MonotonicPoints.countPoints(lgMin, lgMax, ppo); - useppo = true; - } else { - numSteps = (lgMax - lgMin) / lgDelta + 1; - useppo = false; - } - - int streamLength = 1 << lgMin; - int lgCurSL = lgMin; - for (int step = 0; step < numSteps; step++) { - - doStreamLength(streamLength, compression, ranks, errorPercentile); - - if (useppo) { - streamLength = (int)pwr2SeriesNext(ppo, streamLength); - } else { - lgCurSL += lgDelta; - streamLength = 1 << lgCurSL; - } - } - } - - @Override - public void shutdown() {} - - @Override - public void cleanup() {} - - void doStreamLength(final int streamLength, final int compression, final double[] ranks, final double errorPercentile) { - final int numTrials = 1 << Integer.parseInt(prop.mustGet("LgTrials")); - - double[][] rankErrors = new double[ranks.length][]; - for (int i = 0; i < ranks.length; i++) rankErrors[i] = new double[numTrials]; - - for (int t = 0; t < numTrials; t++) { - runTrial(t, streamLength, compression, ranks, rankErrors); - } - - job.print(streamLength); - for (int i = 0; i < ranks.length; i++) { - Arrays.sort(rankErrors[i]); - final int errPctIndex = (int)(numTrials * errorPercentile / 100); - final double rankErrorAtPct = rankErrors[i][errPctIndex]; - job.print("\t"); - job.print(rankErrorAtPct * 100); - } - job.print("\n"); - } - - void runTrial(final int trial, final int streamLength, final int compression, final double[] ranks, final double[][] rankErrors) { - final int numSketches = Integer.parseInt(prop.mustGet("NumSketches")); - final ArrayList<TDigest> tds = new ArrayList<>(); - for (int s = 0; s < numSketches; s++) tds.add(TDigest.createDigest(compression)); - int s = 0; - for (int i = 0; i < streamLength; i++) { - values[i] = rand.nextDouble(); - tds.get(s).add(values[i]); - s++; - if (s == numSketches) s = 0; - } - TDigest tdMerge = TDigest.createDigest(compression); - tdMerge.add(tds); - Arrays.sort(values, 0, streamLength); - for (int i = 0; i < ranks.length; i++) { - final double quantile = values[(int)((streamLength - 1) * ranks[i])]; - final double trueRank = computeTrueRank(values, streamLength, quantile); - rankErrors[i][trial] = Math.abs(trueRank - tdMerge.cdf(quantile)); - } - } - - static double computeTrueRank(final double[] values, final int streamLength, final double value) { - final int lower = lowerBound(values, 0, streamLength, value); - final int upper = upperBound(values, lower, streamLength, value); - return (lower + upper) / 2.0 / streamLength; - } - - static int lowerBound(final double[] values, int first, final int last, final double value) { - int current; - int step; - int count = last - first; - while (count > 0) { - current = first; - step = count / 2; - current += step; - if (values[current] < value) { - first = ++current; - count -= step + 1; - } else { - count = step; - } - } - return first; - } - - static int upperBound(final double[] values, int first, final int last, final double value) { - int current; - int step; - int count = last - first; - while (count > 0) { - current = first; - step = count / 2; - current += step; - if (!(value < values[current])) { - first = ++current; - count -= step + 1; - } else { - count = step; - } - } - return first; - } - -} +//package org.apache.datasketches.characterization.tdigest; +// +//import static org.apache.datasketches.common.Util.pwr2SeriesNext; +// +//import java.util.ArrayList; +//import java.util.Arrays; +//import java.util.Random; +// +//import org.apache.datasketches.Job; +//import org.apache.datasketches.JobProfile; +//import org.apache.datasketches.MonotonicPoints; +//import org.apache.datasketches.Properties; +// +//import com.tdunning.math.stats.TDigest; +// +//public class TDigestMergeAccuracyProfile implements JobProfile { +// private Job job; +// private Properties prop; +// +// private Random rand; +// private double[] values; +// +// @Override +// public void start(final Job job) { +// this.job = job; +// prop = job.getProperties(); +// final int lgMin = Integer.parseInt(prop.mustGet("LgMin")); +// final int lgMax = Integer.parseInt(prop.mustGet("LgMax")); +// final int lgDelta = Integer.parseInt(prop.mustGet("LgDelta")); +// final int ppo = Integer.parseInt(prop.mustGet("PPO")); +// +// final int compression = Integer.parseInt(prop.mustGet("Compression")); +// final double[] ranks = {0.01, 0.05, 0.5, 0.95, 0.99}; +// final double errorPercentile = Double.parseDouble(prop.mustGet("ErrorPercentile")); +// +// rand = new Random(); +// values = new double[1 << lgMax]; +// +// final int numSteps; +// final boolean useppo; +// if (lgDelta < 1) { +// numSteps = MonotonicPoints.countPoints(lgMin, lgMax, ppo); +// useppo = true; +// } else { +// numSteps = (lgMax - lgMin) / lgDelta + 1; +// useppo = false; +// } +// +// int streamLength = 1 << lgMin; +// int lgCurSL = lgMin; +// for (int step = 0; step < numSteps; step++) { +// +// doStreamLength(streamLength, compression, ranks, errorPercentile); +// +// if (useppo) { +// streamLength = (int)pwr2SeriesNext(ppo, streamLength); +// } else { +// lgCurSL += lgDelta; +// streamLength = 1 << lgCurSL; +// } +// } +// } +// +// @Override +// public void shutdown() {} +// +// @Override +// public void cleanup() {} +// +// void doStreamLength(final int streamLength, final int compression, final double[] ranks, final double errorPercentile) { +// final int numTrials = 1 << Integer.parseInt(prop.mustGet("LgTrials")); +// +// double[][] rankErrors = new double[ranks.length][]; +// for (int i = 0; i < ranks.length; i++) rankErrors[i] = new double[numTrials]; +// +// for (int t = 0; t < numTrials; t++) { +// runTrial(t, streamLength, compression, ranks, rankErrors); +// } +// +// job.print(streamLength); +// for (int i = 0; i < ranks.length; i++) { +// Arrays.sort(rankErrors[i]); +// final int errPctIndex = (int)(numTrials * errorPercentile / 100); +// final double rankErrorAtPct = rankErrors[i][errPctIndex]; +// job.print("\t"); +// job.print(rankErrorAtPct * 100); +// } +// job.print("\n"); +// } +// +// void runTrial(final int trial, final int streamLength, final int compression, final double[] ranks, final double[][] rankErrors) { +// final int numSketches = Integer.parseInt(prop.mustGet("NumSketches")); +// final ArrayList<TDigest> tds = new ArrayList<>(); +// for (int s = 0; s < numSketches; s++) tds.add(TDigest.createDigest(compression)); +// int s = 0; +// for (int i = 0; i < streamLength; i++) { +// values[i] = rand.nextDouble(); +// tds.get(s).add(values[i]); +// s++; +// if (s == numSketches) s = 0; +// } +// TDigest tdMerge = TDigest.createDigest(compression); +// tdMerge.add(tds); +// Arrays.sort(values, 0, streamLength); +// for (int i = 0; i < ranks.length; i++) { +// final double quantile = values[(int)((streamLength - 1) * ranks[i])]; +// final double trueRank = computeTrueRank(values, streamLength, quantile); +// rankErrors[i][trial] = Math.abs(trueRank - tdMerge.cdf(quantile)); +// } +// } +// +// static double computeTrueRank(final double[] values, final int streamLength, final double value) { +// final int lower = lowerBound(values, 0, streamLength, value); +// final int upper = upperBound(values, lower, streamLength, value); +// return (lower + upper) / 2.0 / streamLength; +// } +// +// static int lowerBound(final double[] values, int first, final int last, final double value) { +// int current; +// int step; +// int count = last - first; +// while (count > 0) { +// current = first; +// step = count / 2; +// current += step; +// if (values[current] < value) { +// first = ++current; +// count -= step + 1; +// } else { +// count = step; +// } +// } +// return first; +// } +// +// static int upperBound(final double[] values, int first, final int last, final double value) { +// int current; +// int step; +// int count = last - first; +// while (count > 0) { +// current = first; +// step = count / 2; +// current += step; +// if (!(value < values[current])) { +// first = ++current; +// count -= step + 1; +// } else { +// count = step; +// } +// } +// return first; +// } +// +//} diff --git a/src/main/java/org/apache/datasketches/characterization/tdigest/TDigestSpeedProfile.java b/src/main/java/org/apache/datasketches/characterization/tdigest/TDigestSpeedProfile.java index b4e2efc..8109ad1 100644 --- a/src/main/java/org/apache/datasketches/characterization/tdigest/TDigestSpeedProfile.java +++ b/src/main/java/org/apache/datasketches/characterization/tdigest/TDigestSpeedProfile.java @@ -17,134 +17,134 @@ * under the License. */ -package org.apache.datasketches.characterization.tdigest; - -import java.util.Random; - -import org.apache.datasketches.Properties; -import org.apache.datasketches.characterization.quantiles.BaseQuantilesSpeedProfile; - -import com.tdunning.math.stats.TDigest; - -public class TDigestSpeedProfile extends BaseQuantilesSpeedProfile { - - private static final Random rnd = new Random(); - private int k; - private double[] inputValues; - private int numQueryValues; - private double[] rankQueryValues; - private double[] quantileQueryValues; - - long buildTimeNs; - long updateTimeNs; - long getRankTimeNs; - long getQuantileTimeNs; - long serializeTimeNs; - long deserializeTimeNs; - long fullSerializedSizeBytes; - long smallSerializedSizeBytes; - int numCentroids; - - @Override - public void configure(final int k, final int numQueryValues, final Properties properties) { - this.k = k; - this.numQueryValues = numQueryValues; - } - - @Override - public void prepareTrial(final int streamLength) { - // prepare input data - inputValues = new double[streamLength]; - for (int i = 0; i < streamLength; i++) { - inputValues[i] = rnd.nextDouble(); - } - // prepare query data - quantileQueryValues = new double[numQueryValues]; - for (int i = 0; i < numQueryValues; i++) { - quantileQueryValues[i] = rnd.nextDouble(); - } - rankQueryValues = new double[numQueryValues]; - for (int i = 0; i < numQueryValues; i++) { - rankQueryValues[i] = rnd.nextDouble(); - } - resetStats(); - } - - @Override - public void doTrial() { - final long startBuild = System.nanoTime(); - final TDigest sketch = TDigest.createDigest(k); - final long stopBuild = System.nanoTime(); - buildTimeNs += stopBuild - startBuild; - - final long startUpdate = System.nanoTime(); - for (int i = 0; i < inputValues.length; i++) { - sketch.add(inputValues[i]); - } - final long stopUpdate = System.nanoTime(); - updateTimeNs += stopUpdate - startUpdate; - - final long startGetRank = System.nanoTime(); - for (final double value: rankQueryValues) { - sketch.cdf(value); - } - final long stopGetRank = System.nanoTime(); - getRankTimeNs += stopGetRank - startGetRank; - - final long startGetQuantile = System.nanoTime(); - for (final double value: quantileQueryValues) { - sketch.quantile(value); - } - final long stopGetQuantile = System.nanoTime(); - getQuantileTimeNs += stopGetQuantile - startGetQuantile; - -// final long startSerialize = System.nanoTime(); -// final byte[] bytes = sketch.asBytes(null); -// final long stopSerialize = System.nanoTime(); -// serializeTimeNs += stopSerialize - startSerialize; - -// final long startDeserialize = System.nanoTime(); -// final deserializedSketch = MergingDigest.fromBytes(bytes); -// final long stopDeserialize = System.nanoTime(); -// deserializeTimeNs += stopDeserialize - startDeserialize; - - numCentroids += sketch.centroidCount(); - fullSerializedSizeBytes += sketch.byteSize(); - smallSerializedSizeBytes += sketch.smallByteSize(); - } - - @Override - public String getHeader() { - return "Stream\tTrials\tBuild\tUpdate\tRank\tQuant\tSer\tDeser\tCentroids\tFullSize\tSmallSize"; - } - - @Override - public String getStats(final int streamLength, final int numTrials, final int numQueryValues) { - return String.format("%d\t%d\t%.1f\t%.1f\t%.1f\t%.1f\t%.1f\t%.1f\t%d\t%d\t%d", - streamLength, - numTrials, - (double) buildTimeNs / numTrials, - (double) updateTimeNs / numTrials / streamLength, - (double) getRankTimeNs / numTrials / numQueryValues, - (double) getQuantileTimeNs / numTrials / numQueryValues, - (double) serializeTimeNs / numTrials, - (double) deserializeTimeNs / numTrials, - numCentroids / numTrials, - fullSerializedSizeBytes / numTrials, - smallSerializedSizeBytes / numTrials - ); - } - - private void resetStats() { - buildTimeNs = 0; - updateTimeNs = 0; - getRankTimeNs = 0; - getQuantileTimeNs = 0; - serializeTimeNs = 0; - deserializeTimeNs = 0; - fullSerializedSizeBytes = 0; - smallSerializedSizeBytes = 0; - numCentroids = 0; - } - -} +//package org.apache.datasketches.characterization.tdigest; +// +//import java.util.Random; +// +//import org.apache.datasketches.Properties; +//import org.apache.datasketches.characterization.quantiles.BaseQuantilesSpeedProfile; +// +//import com.tdunning.math.stats.TDigest; +// +//public class TDigestSpeedProfile extends BaseQuantilesSpeedProfile { +// +// private static final Random rnd = new Random(); +// private int k; +// private double[] inputValues; +// private int numQueryValues; +// private double[] rankQueryValues; +// private double[] quantileQueryValues; +// +// long buildTimeNs; +// long updateTimeNs; +// long getRankTimeNs; +// long getQuantileTimeNs; +// long serializeTimeNs; +// long deserializeTimeNs; +// long fullSerializedSizeBytes; +// long smallSerializedSizeBytes; +// int numCentroids; +// +// @Override +// public void configure(final int k, final int numQueryValues, final Properties properties) { +// this.k = k; +// this.numQueryValues = numQueryValues; +// } +// +// @Override +// public void prepareTrial(final int streamLength) { +// // prepare input data +// inputValues = new double[streamLength]; +// for (int i = 0; i < streamLength; i++) { +// inputValues[i] = rnd.nextDouble(); +// } +// // prepare query data +// quantileQueryValues = new double[numQueryValues]; +// for (int i = 0; i < numQueryValues; i++) { +// quantileQueryValues[i] = rnd.nextDouble(); +// } +// rankQueryValues = new double[numQueryValues]; +// for (int i = 0; i < numQueryValues; i++) { +// rankQueryValues[i] = rnd.nextDouble(); +// } +// resetStats(); +// } +// +// @Override +// public void doTrial() { +// final long startBuild = System.nanoTime(); +// final TDigest sketch = TDigest.createDigest(k); +// final long stopBuild = System.nanoTime(); +// buildTimeNs += stopBuild - startBuild; +// +// final long startUpdate = System.nanoTime(); +// for (int i = 0; i < inputValues.length; i++) { +// sketch.add(inputValues[i]); +// } +// final long stopUpdate = System.nanoTime(); +// updateTimeNs += stopUpdate - startUpdate; +// +// final long startGetRank = System.nanoTime(); +// for (final double value: rankQueryValues) { +// sketch.cdf(value); +// } +// final long stopGetRank = System.nanoTime(); +// getRankTimeNs += stopGetRank - startGetRank; +// +// final long startGetQuantile = System.nanoTime(); +// for (final double value: quantileQueryValues) { +// sketch.quantile(value); +// } +// final long stopGetQuantile = System.nanoTime(); +// getQuantileTimeNs += stopGetQuantile - startGetQuantile; +// +//// final long startSerialize = System.nanoTime(); +//// final byte[] bytes = sketch.asBytes(null); +//// final long stopSerialize = System.nanoTime(); +//// serializeTimeNs += stopSerialize - startSerialize; +// +//// final long startDeserialize = System.nanoTime(); +//// final deserializedSketch = MergingDigest.fromBytes(bytes); +//// final long stopDeserialize = System.nanoTime(); +//// deserializeTimeNs += stopDeserialize - startDeserialize; +// +// numCentroids += sketch.centroidCount(); +// fullSerializedSizeBytes += sketch.byteSize(); +// smallSerializedSizeBytes += sketch.smallByteSize(); +// } +// +// @Override +// public String getHeader() { +// return "Stream\tTrials\tBuild\tUpdate\tRank\tQuant\tSer\tDeser\tCentroids\tFullSize\tSmallSize"; +// } +// +// @Override +// public String getStats(final int streamLength, final int numTrials, final int numQueryValues) { +// return String.format("%d\t%d\t%.1f\t%.1f\t%.1f\t%.1f\t%.1f\t%.1f\t%d\t%d\t%d", +// streamLength, +// numTrials, +// (double) buildTimeNs / numTrials, +// (double) updateTimeNs / numTrials / streamLength, +// (double) getRankTimeNs / numTrials / numQueryValues, +// (double) getQuantileTimeNs / numTrials / numQueryValues, +// (double) serializeTimeNs / numTrials, +// (double) deserializeTimeNs / numTrials, +// numCentroids / numTrials, +// fullSerializedSizeBytes / numTrials, +// smallSerializedSizeBytes / numTrials +// ); +// } +// +// private void resetStats() { +// buildTimeNs = 0; +// updateTimeNs = 0; +// getRankTimeNs = 0; +// getQuantileTimeNs = 0; +// serializeTimeNs = 0; +// deserializeTimeNs = 0; +// fullSerializedSizeBytes = 0; +// smallSerializedSizeBytes = 0; +// numCentroids = 0; +// } +// +//} diff --git a/src/main/java/org/apache/datasketches/characterization/tdigest/package-info.java b/src/main/java/org/apache/datasketches/characterization/tdigest/package-info.java new file mode 100644 index 0000000..e496ef9 --- /dev/null +++ b/src/main/java/org/apache/datasketches/characterization/tdigest/package-info.java @@ -0,0 +1,20 @@ +/* + * Licensed to the Apache Software Foundation (ASF) under one + * or more contributor license agreements. See the NOTICE file + * distributed with this work for additional information + * regarding copyright ownership. The ASF licenses this file + * to you under the Apache License, Version 2.0 (the + * "License"); you may not use this file except in compliance + * with the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, + * software distributed under the License is distributed on an + * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY + * KIND, either express or implied. See the License for the + * specific language governing permissions and limitations + * under the License. + */ + +package org.apache.datasketches.characterization.tdigest; diff --git a/tools/SketchesCheckstyle.xml b/tools/SketchesCheckstyle.xml index a83a12a..76dc167 100644 --- a/tools/SketchesCheckstyle.xml +++ b/tools/SketchesCheckstyle.xml @@ -54,7 +54,7 @@ under the License. <!-- Size Violations --> <module name="LineLength"> <property name="severity" value="warning"/> - <property name="max" value="120"/> + <property name="max" value="140"/> <property name="ignorePattern" value="^package.*|^import.*|a href|href|http://|https://|ftp://"/> <!-- <metadata name="net.sf.eclipsecs.core.lastEnabledSeverity" value="inherit"/> --> </module> --------------------------------------------------------------------- To unsubscribe, e-mail: [email protected] For additional commands, e-mail: [email protected]
