This is an automated email from the ASF dual-hosted git repository. leerho pushed a commit to branch fix_array_of_strings_summary in repository https://gitbox.apache.org/repos/asf/datasketches-java.git
commit 6bda431723cf40b87ad640e40579f50f54a5a2d8 Author: Lee Rhodes <[email protected]> AuthorDate: Mon Dec 22 14:11:01 2025 -0800 Fix ArrayOfStringsSummary and add .mvn to root. --- .gitignore | 1 + .../tuple/strings/ArrayOfStringsSummary.java | 35 ++++++----- .../tuple/strings/ArrayOfStringsSketchTest.java | 25 ++++++-- .../strings/ArrayOfStringsSummary_Issue699.java | 69 ++++++++++++++++++++++ 4 files changed, 106 insertions(+), 24 deletions(-) diff --git a/.gitignore b/.gitignore index f03c5078f..f0a5c0998 100644 --- a/.gitignore +++ b/.gitignore @@ -15,6 +15,7 @@ # Additional tools .clover/ +.mvn/ # OSX files **/.DS_Store diff --git a/src/main/java/org/apache/datasketches/tuple/strings/ArrayOfStringsSummary.java b/src/main/java/org/apache/datasketches/tuple/strings/ArrayOfStringsSummary.java index 4197cd285..5008fecd5 100644 --- a/src/main/java/org/apache/datasketches/tuple/strings/ArrayOfStringsSummary.java +++ b/src/main/java/org/apache/datasketches/tuple/strings/ArrayOfStringsSummary.java @@ -36,19 +36,16 @@ import org.apache.datasketches.tuple.UpdatableSummary; */ public final class ArrayOfStringsSummary implements UpdatableSummary<String[]> { - private String[] stringArr = null; + private String[] stringArr = new String[] {}; //empty string array; /** * No argument constructor. */ - ArrayOfStringsSummary() { //required for ArrayOfStringsSummaryFactory - stringArr = null; - } + ArrayOfStringsSummary() {} //required for ArrayOfStringsSummaryFactory //Used by copy() and in test ArrayOfStringsSummary(final String[] stringArr) { - this.stringArr = stringArr.clone(); - checkNumNodes(stringArr.length); + update(stringArr); } //used by fromMemorySegment and in test @@ -87,10 +84,20 @@ public final class ArrayOfStringsSummary implements UpdatableSummary<String[]> { this.stringArr = stringArr; } + //From UpdatableSummary + + @Override + public final ArrayOfStringsSummary update(final String[] value) { + if (value == null) { stringArr = new String[] {}; } + else { stringArr = value.clone(); } + return this; + } + + //From Summary + @Override public ArrayOfStringsSummary copy() { - final ArrayOfStringsSummary nodes = new ArrayOfStringsSummary(stringArr); - return nodes; + return new ArrayOfStringsSummary(stringArr); } @Override @@ -112,16 +119,6 @@ public final class ArrayOfStringsSummary implements UpdatableSummary<String[]> { return out; } - //From UpdatableSummary - - @Override - public ArrayOfStringsSummary update(final String[] value) { - if (stringArr == null) { - stringArr = value.clone(); - } - return this; - } - //From Object @Override @@ -139,6 +136,8 @@ public final class ArrayOfStringsSummary implements UpdatableSummary<String[]> { return thisStr.equals(thatStr); } + //Local + /** * Returns the nodes array for this summary. * @return the nodes array for this summary. diff --git a/src/test/java/org/apache/datasketches/tuple/strings/ArrayOfStringsSketchTest.java b/src/test/java/org/apache/datasketches/tuple/strings/ArrayOfStringsSketchTest.java index ef39e6b90..b5cf4a39a 100644 --- a/src/test/java/org/apache/datasketches/tuple/strings/ArrayOfStringsSketchTest.java +++ b/src/test/java/org/apache/datasketches/tuple/strings/ArrayOfStringsSketchTest.java @@ -49,22 +49,30 @@ public class ArrayOfStringsSketchTest { for (int i = 0; i < len; i++) { sketch1.update(strArrArr[i], strArrArr[i]); } + println("Sketch1"); + printSummaries(sketch1.iterator()); + sketch1.update(strArrArr[0], strArrArr[0]); //insert duplicate + println("Sketch1 updated with a duplicate"); printSummaries(sketch1.iterator()); - byte[] array = sketch1.toByteArray(); - MemorySegment wseg = MemorySegment.ofArray(array); + + MemorySegment wseg = MemorySegment.ofArray(sketch1.toByteArray()); ArrayOfStringsTupleSketch sketch2 = new ArrayOfStringsTupleSketch(wseg); + println("Sketch2 = Sketch1 via SerDe"); printSummaries(sketch2.iterator()); - checkSummaries(sketch2, sketch2); + checkSummariesEqual(sketch2, sketch2); String[] strArr3 = {"g", "h" }; sketch2.update(strArr3, strArr3); - + println("Sketch2 with a new row"); + printSummaries(sketch2.iterator()); + TupleUnion<ArrayOfStringsSummary> union = new TupleUnion<>(new ArrayOfStringsSummarySetOperations()); union.union(sketch1); union.union(sketch2); CompactTupleSketch<ArrayOfStringsSummary> csk = union.getResult(); - //printSummaries(csk.iterator()); + println("Result of union of Sketch1, Sketch2"); + printSummaries(csk.iterator()); assertEquals(csk.getRetainedEntries(), 4); TupleIntersection<ArrayOfStringsSummary> inter = @@ -72,17 +80,21 @@ public class ArrayOfStringsSketchTest { inter.intersect(sketch1); inter.intersect(sketch2); csk = inter.getResult(); + println("Intersect Sketch1, Sketch2"); + printSummaries(csk.iterator()); assertEquals(csk.getRetainedEntries(), 3); TupleAnotB<ArrayOfStringsSummary> aNotB = new TupleAnotB<>(); aNotB.setA(sketch2); aNotB.notB(sketch1); csk = aNotB.getResult(true); + println("AnotB(Sketch2, Sketch1)"); + printSummaries(csk.iterator()); assertEquals(csk.getRetainedEntries(), 1); } - private static void checkSummaries(ArrayOfStringsTupleSketch sk1, ArrayOfStringsTupleSketch sk2) { + private static void checkSummariesEqual(ArrayOfStringsTupleSketch sk1, ArrayOfStringsTupleSketch sk2) { TupleSketchIterator<ArrayOfStringsSummary> it1 = sk1.iterator(); TupleSketchIterator<ArrayOfStringsSummary> it2 = sk2.iterator(); while(it1.next() && it2.next()) { @@ -100,6 +112,7 @@ public class ArrayOfStringsSketchTest { } println(""); } + println(""); } @Test diff --git a/src/test/java/org/apache/datasketches/tuple/strings/ArrayOfStringsSummary_Issue699.java b/src/test/java/org/apache/datasketches/tuple/strings/ArrayOfStringsSummary_Issue699.java new file mode 100644 index 000000000..febe924f9 --- /dev/null +++ b/src/test/java/org/apache/datasketches/tuple/strings/ArrayOfStringsSummary_Issue699.java @@ -0,0 +1,69 @@ +package org.apache.datasketches.tuple.strings; + +import static org.apache.datasketches.common.Util.LS; + +import org.apache.datasketches.theta.UpdatableThetaSketch; +import org.apache.datasketches.tuple.TupleSketchIterator; +import org.apache.datasketches.tuple.TupleUnion; +import org.testng.annotations.Test; + +public class ArrayOfStringsSummary_Issue699 { + UpdatableThetaSketch thetaSk = UpdatableThetaSketch.builder().build(); + ArrayOfStringsTupleSketch tupleSk = new ArrayOfStringsTupleSketch(); + TupleUnion<ArrayOfStringsSummary> union = new TupleUnion<>(new ArrayOfStringsSummarySetOperations()); + + @Test + void go() { + thetaSk.update("a"); + thetaSk.update("b"); + thetaSk.update("c"); + + tupleSk.update("a", new String[] {"x", "y"}); + tupleSk.update("b", new String[] {"z"}); + tupleSk.update("e", new String[] {"x", "z"}); + + println("Print Tuple Summary before union"); + printSummaries(tupleSk.iterator()); + + union.union(tupleSk); + union.union(thetaSk, new ArrayOfStringsSummary()); //enable this or the next + //union.union(thetaSk, new ArrayOfStringsSummary(new String[] {"u", "v"})); //optional association + + println("Print Tuple Summary after union"); + printSummaries(union.getResult().iterator()); + } + + @Test + void checkCopy() { + ArrayOfStringsSummary aoss = new ArrayOfStringsSummary(); + ArrayOfStringsSummary aoss2 = aoss.copy(); + } + + @Test + void checkToByteArray() { + ArrayOfStringsSummary aoss = new ArrayOfStringsSummary(); + byte[] bytes = aoss.toByteArray(); + println("byte[].length = " + bytes.length); + } + + + static void printSummaries(TupleSketchIterator<ArrayOfStringsSummary> it) { + while (it.next()) { + String[] strArr = it.getSummary().getValue(); + if (strArr.length == 0) { print("-"); } //illustrates an empty string array + for (String s : strArr) { + print(s + ", "); + } + println(""); + } + println(""); + } + + private static void println(Object o) { + print(o + LS); + } + + private static void print(Object o) { + //System.out.print(o.toString()); + } +} --------------------------------------------------------------------- To unsubscribe, e-mail: [email protected] For additional commands, e-mail: [email protected]
