This is an automated email from the ASF dual-hosted git repository.

alsay pushed a commit to branch tuple_no_base64
in repository https://gitbox.apache.org/repos/asf/datasketches-bigquery.git

commit d21837b38a49c97aa08a1e13af93387d4768f96f
Author: AlexanderSaydakov <[email protected]>
AuthorDate: Tue Sep 24 22:37:09 2024 -0700

    removed base64 encoding-decoding
---
 tuple/sqlx/tuple_sketch_int64_a_not_b.sqlx         |   2 +-
 tuple/sqlx/tuple_sketch_int64_a_not_b_seed.sqlx    |  11 +-
 ...qlx => tuple_sketch_int64_filter_low_high.sqlx} |  18 +--
 .../tuple_sketch_int64_filter_low_high_seed.sqlx   |   9 +-
 ...x => tuple_sketch_int64_from_theta_sketch.sqlx} |  18 +--
 .../tuple_sketch_int64_from_theta_sketch_seed.sqlx |  10 +-
 .../sqlx/tuple_sketch_int64_get_estimate_seed.sqlx |  10 +-
 .../tuple_sketch_int64_intersection_seed_mode.sqlx |  12 +-
 ...tuple_sketch_int64_jaccard_similarity_seed.sqlx |   2 +
 tuple/sqlx/tuple_sketch_int64_to_string_seed.sqlx  |  10 +-
 .../tuple_sketch_int64_union_lgk_seed_mode.sqlx    |  12 +-
 tuple/test/tuple_sketch_int_test.sql               |  72 ++++++++++--
 tuple/tuple_sketch_int64.cpp                       | 123 +++++++--------------
 13 files changed, 143 insertions(+), 166 deletions(-)

diff --git a/tuple/sqlx/tuple_sketch_int64_a_not_b.sqlx 
b/tuple/sqlx/tuple_sketch_int64_a_not_b.sqlx
index e8afd6d..e964604 100644
--- a/tuple/sqlx/tuple_sketch_int64_a_not_b.sqlx
+++ b/tuple/sqlx/tuple_sketch_int64_a_not_b.sqlx
@@ -34,6 +34,6 @@ Returns: a Compact Tuple Sketch as BYTES.
 For more information:
  - https://datasketches.apache.org/docs/Tuple/TupleSketches.html
 '''
-AS (
+) AS (
   $BQ_DATASET.tuple_sketch_int64_a_not_b_seed(sketchA, sketchB, NULL)
 );
diff --git a/tuple/sqlx/tuple_sketch_int64_a_not_b_seed.sqlx 
b/tuple/sqlx/tuple_sketch_int64_a_not_b_seed.sqlx
index e1efede..b069b20 100644
--- a/tuple/sqlx/tuple_sketch_int64_a_not_b_seed.sqlx
+++ b/tuple/sqlx/tuple_sketch_int64_a_not_b_seed.sqlx
@@ -24,6 +24,7 @@ RETURNS BYTES
 LANGUAGE js
 OPTIONS (
   library=["gs://$GCS_BUCKET/tuple_sketch_int64.js"],
+  js_parameter_encoding_mode='STANDARD',
   description = '''Computes a sketch that represents the scalar set difference 
of sketchA and not sketchB.
 Note that cardinality estimation accuracy, plots, and error tables are the 
same as the Theta Sketch.
 This function only applies to Tuple Sketches with an INT64 summary column.
@@ -37,16 +38,10 @@ For more information:
  - https://datasketches.apache.org/docs/Tuple/TupleSketches.html
 '''
 ) AS R"""
-const default_seed = BigInt(Module.DEFAULT_SEED);
 try {
-  var a_not_b = null;
-  try {
-    a_not_b = new Module.tuple_a_not_b_int64(seed ? BigInt(seed) : 
default_seed);
-    return a_not_b.computeWithB64ReturnB64(sketchA, sketchB, seed ? 
BigInt(seed) : default_seed);
-  } finally {
-    if (a_not_b != null) a_not_b.delete();
-  }
+  return Module.tupleAnotBInt64(sketchA, sketchB, seed ? BigInt(seed) : 
BigInt(Module.DEFAULT_SEED));
 } catch (e) {
+  if (e.message != null) throw e;
   throw new Error(Module.getExceptionMessage(e));
 }
 """;
diff --git a/tuple/sqlx/tuple_sketch_int64_filter_low_high_seed.sqlx 
b/tuple/sqlx/tuple_sketch_int64_filter_low_high.sqlx
similarity index 76%
copy from tuple/sqlx/tuple_sketch_int64_filter_low_high_seed.sqlx
copy to tuple/sqlx/tuple_sketch_int64_filter_low_high.sqlx
index 3edf784..7f660fb 100644
--- a/tuple/sqlx/tuple_sketch_int64_filter_low_high_seed.sqlx
+++ b/tuple/sqlx/tuple_sketch_int64_filter_low_high.sqlx
@@ -19,11 +19,9 @@
 
 config { hasOutput: true }
 
-CREATE OR REPLACE FUNCTION ${self()}(sketch BYTES, low INT64, high INT64, seed 
INT64)
+CREATE OR REPLACE FUNCTION ${self()}(sketch BYTES, low INT64, high INT64)
 RETURNS BYTES
-LANGUAGE js
 OPTIONS (
-  library=["gs://$GCS_BUCKET/tuple_sketch_int64.js"],
   description = '''Returns a Tuple Sketch computed from the given sketch 
filtered by the given low and high values. 
 This returns a compact tuple sketch that contains the subset of rows of the 
give sketch where the
 summary column is greater-than or equal to the given low and less-than or 
equal to the given high.
@@ -33,18 +31,12 @@ This function only applies to Tuple Sketches with an INT64 
summary column.
 Param sketch: the given Tuple Sketch. This may not be NULL.
 Param low: the given low INT64. This may not be NULL.
 Param high: the given high INT64. This may not be NULL.
-Param seed: This is used to confirm that the given sketches were configured 
with the correct seed. A NULL specifies the default seed = 9001.
+Assumed Default Param seed: 9001.
 Returns: a Compact Tuple Sketch as BYTES.
 
 For more information:
  - https://datasketches.apache.org/docs/Tuple/TupleSketches.html
 '''
-) AS R"""
-var sketchObject = null;
-try {
-  sketchObject = Module.compact_tuple_sketch_int64.deserializeFromB64(sketch, 
seed ? BigInt(seed) : BigInt(Module.DEFAULT_SEED));
-  return sketchObject.filterB64(low, high);
-} finally {
-  if (sketchObject != null) sketchObject.delete();
-}
-""";
+) AS (
+  $BQ_DATASET.tuple_sketch_int64_filter_low_high_seed(sketch, low, high, NULL)
+);
diff --git a/tuple/sqlx/tuple_sketch_int64_filter_low_high_seed.sqlx 
b/tuple/sqlx/tuple_sketch_int64_filter_low_high_seed.sqlx
index 3edf784..f0f6edd 100644
--- a/tuple/sqlx/tuple_sketch_int64_filter_low_high_seed.sqlx
+++ b/tuple/sqlx/tuple_sketch_int64_filter_low_high_seed.sqlx
@@ -24,6 +24,7 @@ RETURNS BYTES
 LANGUAGE js
 OPTIONS (
   library=["gs://$GCS_BUCKET/tuple_sketch_int64.js"],
+  js_parameter_encoding_mode='STANDARD',
   description = '''Returns a Tuple Sketch computed from the given sketch 
filtered by the given low and high values. 
 This returns a compact tuple sketch that contains the subset of rows of the 
give sketch where the
 summary column is greater-than or equal to the given low and less-than or 
equal to the given high.
@@ -42,9 +43,9 @@ For more information:
 ) AS R"""
 var sketchObject = null;
 try {
-  sketchObject = Module.compact_tuple_sketch_int64.deserializeFromB64(sketch, 
seed ? BigInt(seed) : BigInt(Module.DEFAULT_SEED));
-  return sketchObject.filterB64(low, high);
-} finally {
-  if (sketchObject != null) sketchObject.delete();
+  return Module.compact_tuple_sketch_int64.filterLowHigh(sketch, Number(low), 
Number(high), seed ? BigInt(seed) : BigInt(Module.DEFAULT_SEED));
+} catch (e) {
+  if (e.message != null) throw e;
+  throw new Error(Module.getExceptionMessage(e));
 }
 """;
diff --git a/tuple/sqlx/tuple_sketch_int64_from_theta_sketch_seed.sqlx 
b/tuple/sqlx/tuple_sketch_int64_from_theta_sketch.sqlx
similarity index 70%
copy from tuple/sqlx/tuple_sketch_int64_from_theta_sketch_seed.sqlx
copy to tuple/sqlx/tuple_sketch_int64_from_theta_sketch.sqlx
index 8a99ec3..11cc7e8 100644
--- a/tuple/sqlx/tuple_sketch_int64_from_theta_sketch_seed.sqlx
+++ b/tuple/sqlx/tuple_sketch_int64_from_theta_sketch.sqlx
@@ -19,28 +19,20 @@
 
 config { hasOutput: true }
 
-CREATE OR REPLACE FUNCTION ${self()}(sketch BYTES, value INT64, seed INT64)
+CREATE OR REPLACE FUNCTION ${self()}(sketch BYTES, value INT64)
 RETURNS BYTES
-LANGUAGE js
 OPTIONS (
-  library=["gs://$GCS_BUCKET/tuple_sketch_int64.js"],
   description = '''Converts the given Theta Sketch into a Tuple Sketch with a 
INT64 summary column set to the given INT64 value.
 Note that cardinality estimation accuracy, plots, and error tables are the 
same as the Theta Sketch.
 
 Param sketch: the given Theta Sketch. This may not be NULL.
 Param value: the given INT64 value. This may not be NULL.
-Param seed: This is used to confirm that the given sketches were configured 
with the correct seed. A NULL specifies the default seed = 9001.
+Assumed Default Param seed: 9001.
 Returns: a Tuple Sketch with an INT64 summary column as BYTES.
 
 For more information:
  - https://datasketches.apache.org/docs/Tuple/TupleSketches.html
 '''
-) AS R"""
-var sketchObject = null;
-try {
-  sketchObject = Module.compact_tuple_sketch_int64.convertThetaFromB64(sketch, 
BigInt(value), seed ? BigInt(seed) : BigInt(Module.DEFAULT_SEED));
-  return sketchObject.serializeAsB64();
-} finally {
-  if (sketchObject != null) sketchObject.delete();
-}
-""";
+) AS (
+  $BQ_DATASET.tuple_sketch_int64_from_theta_sketch_seed(sketch, value, NULL)
+);
diff --git a/tuple/sqlx/tuple_sketch_int64_from_theta_sketch_seed.sqlx 
b/tuple/sqlx/tuple_sketch_int64_from_theta_sketch_seed.sqlx
index 8a99ec3..6878e6f 100644
--- a/tuple/sqlx/tuple_sketch_int64_from_theta_sketch_seed.sqlx
+++ b/tuple/sqlx/tuple_sketch_int64_from_theta_sketch_seed.sqlx
@@ -24,6 +24,7 @@ RETURNS BYTES
 LANGUAGE js
 OPTIONS (
   library=["gs://$GCS_BUCKET/tuple_sketch_int64.js"],
+  js_parameter_encoding_mode='STANDARD',
   description = '''Converts the given Theta Sketch into a Tuple Sketch with a 
INT64 summary column set to the given INT64 value.
 Note that cardinality estimation accuracy, plots, and error tables are the 
same as the Theta Sketch.
 
@@ -36,11 +37,10 @@ For more information:
  - https://datasketches.apache.org/docs/Tuple/TupleSketches.html
 '''
 ) AS R"""
-var sketchObject = null;
 try {
-  sketchObject = Module.compact_tuple_sketch_int64.convertThetaFromB64(sketch, 
BigInt(value), seed ? BigInt(seed) : BigInt(Module.DEFAULT_SEED));
-  return sketchObject.serializeAsB64();
-} finally {
-  if (sketchObject != null) sketchObject.delete();
+  return Module.compact_tuple_sketch_int64.convertTheta(sketch, BigInt(value), 
seed ? BigInt(seed) : BigInt(Module.DEFAULT_SEED));
+} catch (e) {
+  if (e.message != null) throw e;
+  throw new Error(Module.getExceptionMessage(e));
 }
 """;
diff --git a/tuple/sqlx/tuple_sketch_int64_get_estimate_seed.sqlx 
b/tuple/sqlx/tuple_sketch_int64_get_estimate_seed.sqlx
index a04dd9f..2228e37 100644
--- a/tuple/sqlx/tuple_sketch_int64_get_estimate_seed.sqlx
+++ b/tuple/sqlx/tuple_sketch_int64_get_estimate_seed.sqlx
@@ -24,6 +24,7 @@ RETURNS FLOAT64
 LANGUAGE js
 OPTIONS (
   library=["gs://$GCS_BUCKET/tuple_sketch_int64.js"],
+  js_parameter_encoding_mode='STANDARD',
   description = '''Returns the cardinality estimate of the given Tuple Sketch.
 Note that cardinality estimation accuracy, plots, and error tables are the 
same as the Theta Sketch.
 This function only applies to Tuple Sketches with an INT64 summary column.
@@ -37,14 +38,9 @@ For more information:
 '''
 ) AS R"""
 try {
-  var sketchObject = null;
-  try {
-    sketchObject = 
Module.compact_tuple_sketch_int64.deserializeFromB64(sketch, seed ? 
BigInt(seed) : BigInt(Module.DEFAULT_SEED));
-    return sketchObject.getEstimate();
-  } finally {
-    if (sketchObject != null) sketchObject.delete();
-  }
+  return Module.compact_tuple_sketch_int64.getEstimate(sketch, seed ? 
BigInt(seed) : BigInt(Module.DEFAULT_SEED));
 } catch (e) {
+  if (e.message != null) throw e;
   throw new Error(Module.getExceptionMessage(e));
 }
 """;
diff --git a/tuple/sqlx/tuple_sketch_int64_intersection_seed_mode.sqlx 
b/tuple/sqlx/tuple_sketch_int64_intersection_seed_mode.sqlx
index dc9c2be..3e1c42f 100644
--- a/tuple/sqlx/tuple_sketch_int64_intersection_seed_mode.sqlx
+++ b/tuple/sqlx/tuple_sketch_int64_intersection_seed_mode.sqlx
@@ -24,6 +24,7 @@ RETURNS BYTES
 LANGUAGE js
 OPTIONS (
   library=["gs://$GCS_BUCKET/tuple_sketch_int64.js"],
+  js_parameter_encoding_mode='STANDARD',
   description = '''Computes a sketch that represents the scalar intersection 
of sketchA and sketchB.
 Note that cardinality estimation accuracy, plots, and error tables are the 
same as the Theta Sketch.
 This function only applies to Tuple Sketches with an INT64 summary column.
@@ -39,16 +40,9 @@ For more information:
 ) AS R"""
 const default_seed = BigInt(Module.DEFAULT_SEED);
 try {
-  var intersection = null;
-  try {
-    intersection = new Module.tuple_intersection_int64(seed ? BigInt(seed) : 
default_seed, mode ? mode : "");
-    intersection.updateWithB64(sketchA, seed ? BigInt(seed) : default_seed);
-    intersection.updateWithB64(sketchB, seed ? BigInt(seed) : default_seed);
-    return intersection.getResultB64();
-  } finally {
-    if (intersection != null) intersection.delete();
-  }
+  return Module.tupleIntersectionInt64(sketchA, sketchB, seed ? BigInt(seed) : 
default_seed, mode ? mode : "");
 } catch (e) {
+  if (e.message != null) throw e;
   throw new Error(Module.getExceptionMessage(e));
 }
 """;
diff --git a/tuple/sqlx/tuple_sketch_int64_jaccard_similarity_seed.sqlx 
b/tuple/sqlx/tuple_sketch_int64_jaccard_similarity_seed.sqlx
index ef74c43..06412a2 100644
--- a/tuple/sqlx/tuple_sketch_int64_jaccard_similarity_seed.sqlx
+++ b/tuple/sqlx/tuple_sketch_int64_jaccard_similarity_seed.sqlx
@@ -24,6 +24,7 @@ RETURNS STRUCT<lower_bound FLOAT64, estimate FLOAT64, 
upper_bound FLOAT64>
 LANGUAGE js
 OPTIONS (
   library=["gs://$GCS_BUCKET/tuple_sketch_int64.js"],
+  js_parameter_encoding_mode='STANDARD',
   description = '''Computes the Jaccard similarity index with upper and lower 
bounds.
 The Jaccard similarity index J(A,B) = (A ^ B)/(A U B) is used to measure how 
similar the two sketches are to each other.
 If J = 1.0, the sketches are considered equal. If J = 0, the two sketches are 
disjoint.
@@ -48,6 +49,7 @@ try {
     upper_bound: jaccard.get(2)
   };
 } catch (e) {
+  if (e.message != null) throw e;
   throw new Error(Module.getExceptionMessage(e));
 }
 """;
diff --git a/tuple/sqlx/tuple_sketch_int64_to_string_seed.sqlx 
b/tuple/sqlx/tuple_sketch_int64_to_string_seed.sqlx
index 7432242..9ee21bc 100644
--- a/tuple/sqlx/tuple_sketch_int64_to_string_seed.sqlx
+++ b/tuple/sqlx/tuple_sketch_int64_to_string_seed.sqlx
@@ -24,6 +24,7 @@ RETURNS STRING
 LANGUAGE js
 OPTIONS (
   library=["gs://$GCS_BUCKET/tuple_sketch_int64.js"],
+  js_parameter_encoding_mode='STANDARD',
   description = '''Returns a human readable STRING that is a short summary of 
the state of this sketch.
   Note that cardinality estimation accuracy, plots, and error tables are the 
same as the Theta Sketch.
   This function only applies to Tuple Sketches with an INT64 summary column.
@@ -37,14 +38,9 @@ For more information:
 '''
 ) AS R"""
 try {
-  var sketchObject = null;
-  try {
-    sketchObject = 
Module.compact_tuple_sketch_int64.deserializeFromB64(sketch, seed ? 
BigInt(seed) : BigInt(Module.DEFAULT_SEED));
-    return sketchObject.toString();
-  } finally {
-    if (sketchObject != null) sketchObject.delete();
-  }
+  return  Module.compact_tuple_sketch_int64.toString(sketch, seed ? 
BigInt(seed) : BigInt(Module.DEFAULT_SEED));
 } catch (e) {
+  if (e.message != null) throw e;
   throw new Error(Module.getExceptionMessage(e));
 }
 """;
diff --git a/tuple/sqlx/tuple_sketch_int64_union_lgk_seed_mode.sqlx 
b/tuple/sqlx/tuple_sketch_int64_union_lgk_seed_mode.sqlx
index 87156e0..52c3944 100644
--- a/tuple/sqlx/tuple_sketch_int64_union_lgk_seed_mode.sqlx
+++ b/tuple/sqlx/tuple_sketch_int64_union_lgk_seed_mode.sqlx
@@ -24,6 +24,7 @@ RETURNS BYTES
 LANGUAGE js
 OPTIONS (
   library=["gs://$GCS_BUCKET/tuple_sketch_int64.js"],
+  js_parameter_encoding_mode='STANDARD',
   description = '''Computes a Tuple Sketch that represents the UNION of 
sketchA and sketchB.
 Note that cardinality estimation accuracy, plots, and error tables are the 
same as the Theta Sketch.
 This function only applies to Tuple Sketches with an INT64 summary column.
@@ -40,16 +41,9 @@ For more information:
 const default_lg_k = Number(Module.DEFAULT_LG_K);
 const default_seed = BigInt(Module.DEFAULT_SEED);
 try {
-  var union = null;
-  try {
-    union = new Module.tuple_union_int64(lg_k ? lg_k : default_lg_k, seed ? 
BigInt(seed) : default_seed, mode ? mode : "");
-    union.updateWithB64(sketchA, seed ? BigInt(seed) : default_seed)
-    union.updateWithB64(sketchB, seed ? BigInt(seed) : default_seed)
-    return union.getResultB64();
-  } finally {
-    if (union != null) union.delete();
-  }
+  return Module.tupleUnionInt64(sketchA, sketchB, lg_k ? Number(lg_k) : 
default_lg_k, seed ? BigInt(seed) : default_seed, mode ? mode : "");
 } catch (e) {
+  if (e.message != null) throw e;
   throw new Error(Module.getExceptionMessage(e));
 }
 """;
diff --git a/tuple/test/tuple_sketch_int_test.sql 
b/tuple/test/tuple_sketch_int_test.sql
index 8712943..01917dd 100644
--- a/tuple/test/tuple_sketch_int_test.sql
+++ b/tuple/test/tuple_sketch_int_test.sql
@@ -17,13 +17,13 @@
  * under the License.
  */
 
+# using defaults
 create or replace table $BQ_DATASET.tuple_sketch(sketch bytes);
 
 insert into $BQ_DATASET.tuple_sketch
-(select 
$BQ_DATASET.tuple_sketch_int64_from_theta_sketch_seed($BQ_DATASET.theta_sketch_agg_string(cast(value
 as string)), 1, null) from unnest(GENERATE_ARRAY(1, 10000, 1)) as value);
+(select 
$BQ_DATASET.tuple_sketch_int64_from_theta_sketch($BQ_DATASET.theta_sketch_agg_string(cast(value
 as string)), 1) from unnest(GENERATE_ARRAY(1, 10000, 1)) as value);
 insert into $BQ_DATASET.tuple_sketch
-(select 
$BQ_DATASET.tuple_sketch_int64_from_theta_sketch_seed($BQ_DATASET.theta_sketch_agg_string(cast(value
 as string)), 1, null) from unnest(GENERATE_ARRAY(100000, 110000, 1)) as value);
-
+(select 
$BQ_DATASET.tuple_sketch_int64_from_theta_sketch($BQ_DATASET.theta_sketch_agg_string(cast(value
 as string)), 1) from unnest(GENERATE_ARRAY(100000, 110000, 1)) as value);
 
 # expected about 20000
 select $BQ_DATASET.tuple_sketch_int64_get_estimate(
@@ -37,6 +37,38 @@ select $BQ_DATASET.tuple_sketch_int64_to_string(
 
 drop table $BQ_DATASET.tuple_sketch;
 
+# using full signatures
+create or replace table $BQ_DATASET.tuple_sketch(sketch bytes);
+
+insert into $BQ_DATASET.tuple_sketch
+(select $BQ_DATASET.tuple_sketch_int64_from_theta_sketch_seed(
+  $BQ_DATASET.theta_sketch_agg_string_lgk_seed_p(cast(value as string), 
STRUCT<BYTEINT, INT64, FLOAT64>(10, 111, 0.999)),
+  1,
+  111
+) from unnest(GENERATE_ARRAY(1, 10000, 1)) as value);
+insert into $BQ_DATASET.tuple_sketch
+(select $BQ_DATASET.tuple_sketch_int64_from_theta_sketch_seed(
+  $BQ_DATASET.theta_sketch_agg_string_lgk_seed_p(cast(value as string), 
STRUCT<BYTEINT, INT64, FLOAT64>(10, 111, 0.999)),
+  1,
+  111
+) from unnest(GENERATE_ARRAY(100000, 110000, 1)) as value);
+
+# expected about 20000
+select $BQ_DATASET.tuple_sketch_int64_get_estimate_seed(
+  $BQ_DATASET.tuple_sketch_int64_agg_union_lgk_seed_mode(sketch, 
STRUCT<BYTEINT, INT64, STRING>(10, 111, "NOP")),
+  111
+) from $BQ_DATASET.tuple_sketch;
+
+# expected estimate about 20000
+select $BQ_DATASET.tuple_sketch_int64_to_string_seed(
+  $BQ_DATASET.tuple_sketch_int64_agg_union_lgk_seed_mode(sketch, 
STRUCT<BYTEINT, INT64, STRING>(10, 111, "NOP")),
+  111
+) from $BQ_DATASET.tuple_sketch;
+
+drop table $BQ_DATASET.tuple_sketch;
+
+
+# using defaluts
 # expected 5
 select $BQ_DATASET.tuple_sketch_int64_get_estimate(
   $BQ_DATASET.tuple_sketch_int64_union(
@@ -45,7 +77,7 @@ select $BQ_DATASET.tuple_sketch_int64_get_estimate(
   )
 );
 
-# full signatures
+# using full signatures
 # expected 5
 select $BQ_DATASET.tuple_sketch_int64_get_estimate_seed(
   $BQ_DATASET.tuple_sketch_int64_union_lgk_seed_mode(
@@ -58,6 +90,7 @@ select $BQ_DATASET.tuple_sketch_int64_get_estimate_seed(
   111
 );
 
+# using defaluts
 # expected 1
 select $BQ_DATASET.tuple_sketch_int64_get_estimate(
   $BQ_DATASET.tuple_sketch_int64_intersection(
@@ -66,7 +99,7 @@ select $BQ_DATASET.tuple_sketch_int64_get_estimate(
   )
 );
 
-# full signatures
+# using full signatures
 # expected 1
 select $BQ_DATASET.tuple_sketch_int64_get_estimate_seed(
   $BQ_DATASET.tuple_sketch_int64_intersection_seed_mode(
@@ -78,6 +111,7 @@ select $BQ_DATASET.tuple_sketch_int64_get_estimate_seed(
   111
 );
 
+# using defaluts
 # expected 2
 select $BQ_DATASET.tuple_sketch_int64_get_estimate(
   $BQ_DATASET.tuple_sketch_int64_a_not_b(
@@ -86,7 +120,7 @@ select $BQ_DATASET.tuple_sketch_int64_get_estimate(
   )
 );
 
-# full signatures
+# using full signatures
 # expected 2
 select $BQ_DATASET.tuple_sketch_int64_get_estimate_seed(
   $BQ_DATASET.tuple_sketch_int64_a_not_b_seed(
@@ -97,17 +131,39 @@ select $BQ_DATASET.tuple_sketch_int64_get_estimate_seed(
   111
 );
 
-
+# using defaluts
 # expected 0.2
 select $BQ_DATASET.tuple_sketch_int64_jaccard_similarity(
   (select $BQ_DATASET.tuple_sketch_int64_agg_string(str, 1) from unnest(["a", 
"b", "c"]) as str),
   (select $BQ_DATASET.tuple_sketch_int64_agg_string(str, 1) from unnest(["c", 
"d", "e"]) as str)
 );
 
-#full signatures
+# using full signatures
 # expected 0.2
 select $BQ_DATASET.tuple_sketch_int64_jaccard_similarity_seed(
   (select $BQ_DATASET.tuple_sketch_int64_agg_string_lgk_seed_p_mode(str, 1, 
STRUCT<BYTEINT, INT64, FLOAT64, STRING>(10, 111, 0.999, "NOP")) from 
unnest(["a", "b", "c"]) as str),
   (select $BQ_DATASET.tuple_sketch_int64_agg_string_lgk_seed_p_mode(str, 1, 
STRUCT<BYTEINT, INT64, FLOAT64, STRING>(10, 111, 0.999, "NOP")) from 
unnest(["c", "d", "e"]) as str),
   111
 );
+
+# using defaults
+# expected 1 entry
+select $BQ_DATASET.tuple_sketch_int64_to_string(
+  $BQ_DATASET.tuple_sketch_int64_filter_low_high(
+    $BQ_DATASET.tuple_sketch_int64_agg_string(key, 1),
+    2,
+    2
+  )
+) from unnest(["a", "b", "c", "c"]) as key;
+
+# using full signatures
+# expected 1 entry
+select $BQ_DATASET.tuple_sketch_int64_to_string_seed(
+  $BQ_DATASET.tuple_sketch_int64_filter_low_high_seed(
+    $BQ_DATASET.tuple_sketch_int64_agg_string_lgk_seed_p_mode(key, 1, 
STRUCT<BYTEINT, INT64, FLOAT64, STRING>(10, 111, 0.999, "SUM")),
+    2,
+    2,
+    111
+  ),
+  111
+) from unnest(["a", "b", "c", "c"]) as key;
diff --git a/tuple/tuple_sketch_int64.cpp b/tuple/tuple_sketch_int64.cpp
index 576f4f7..18ae010 100644
--- a/tuple/tuple_sketch_int64.cpp
+++ b/tuple/tuple_sketch_int64.cpp
@@ -26,8 +26,6 @@
 #include <tuple_jaccard_similarity.hpp>
 #include <theta_sketch.hpp>
 
-#include "../base64.hpp"
-
 using Summary = uint64_t;
 using Update = uint64_t;
 
@@ -118,39 +116,23 @@ EMSCRIPTEN_BINDINGS(tuple_sketch_int64) {
     ;
 
   emscripten::class_<compact_tuple_sketch_int64>("compact_tuple_sketch_int64")
-    .class_function("deserializeFromB64", 
emscripten::optional_override([](const std::string& b64, uint64_t seed) {
-      std::vector<char> bytes(b64_dec_len(b64.data(), b64.size()));
-      b64_decode(b64.data(), b64.size(), bytes.data());
-      return new 
compact_tuple_sketch_int64(compact_tuple_sketch_int64::deserialize(bytes.data(),
 bytes.size(), seed));
-    }), emscripten::allow_raw_pointers())
-    .class_function("deserializeFromBinary", 
emscripten::optional_override([](const std::string& bytes, uint64_t seed) {
-      return new 
compact_tuple_sketch_int64(compact_tuple_sketch_int64::deserialize(bytes.data(),
 bytes.size(), seed));
-    }), emscripten::allow_raw_pointers())
-    .class_function("convertThetaFromB64", 
emscripten::optional_override([](const std::string& b64, uint64_t value, 
uint64_t seed) {
-      std::vector<char> bytes(b64_dec_len(b64.data(), b64.size()));
-      b64_decode(b64.data(), b64.size(), bytes.data());
+    .class_function("convertTheta", emscripten::optional_override([](const 
std::string& theta_sketch_bytes, uint64_t value, uint64_t seed) {
       // converting constructor does not currently take wrapped compact theta 
sketch
-      const auto sketch = 
datasketches::compact_theta_sketch::deserialize(bytes.data(), bytes.size(), 
seed);
-      return new compact_tuple_sketch_int64(sketch, value);
-    }), emscripten::allow_raw_pointers())
-    .function("getEstimate", emscripten::optional_override([](const 
compact_tuple_sketch_int64& self) {
-      return self.get_estimate();
+      const auto sketch = 
datasketches::compact_theta_sketch::deserialize(theta_sketch_bytes.data(), 
theta_sketch_bytes.size(), seed);
+      auto bytes = compact_tuple_sketch_int64(sketch, value).serialize();
+      return Uint8Array.new_(emscripten::typed_memory_view(bytes.size(), 
bytes.data()));
     }))
-    .function("toString", emscripten::optional_override([](const 
compact_tuple_sketch_int64& self) {
-      return std::string(self.to_string());
+    .class_function("getEstimate", emscripten::optional_override([](const 
std::string& sketch_bytes, uint64_t seed) {
+      return compact_tuple_sketch_int64::deserialize(sketch_bytes.data(), 
sketch_bytes.size(), seed).get_estimate();
     }))
-    .function("serializeAsB64", emscripten::optional_override([](const 
compact_tuple_sketch_int64& self) {
-      auto bytes = self.serialize();
-      std::vector<char> b64(b64_enc_len(bytes.size()));
-      b64_encode((const char*) bytes.data(), bytes.size(), b64.data());
-      return std::string(b64.data(), b64.size());
+    .class_function("toString", emscripten::optional_override([](const 
std::string& sketch_bytes, uint64_t seed) {
+      return 
std::string(compact_tuple_sketch_int64::deserialize(sketch_bytes.data(), 
sketch_bytes.size(), seed).to_string());
     }))
-    .function("filterB64", emscripten::optional_override([](const 
compact_tuple_sketch_int64& self, int low, int high) {
-      auto sketch = self.filter([low, high](int v){return v >= low && v <= 
high;});
-      auto bytes = sketch.serialize();
-      std::vector<char> b64(b64_enc_len(bytes.size()));
-      b64_encode((const char*) bytes.data(), bytes.size(), b64.data());
-      return std::string(b64.data(), b64.size());
+    .class_function("filterLowHigh", emscripten::optional_override([](const 
std::string& sketch_bytes, int low, int high, uint64_t seed) {
+      auto bytes = compact_tuple_sketch_int64::deserialize(
+        sketch_bytes.data(), sketch_bytes.size(), seed
+      ).filter([low, high](int v){return v >= low && v <= high;}).serialize();
+      return Uint8Array.new_(emscripten::typed_memory_view(bytes.size(), 
bytes.data()));
     }))
     ;
 
@@ -165,65 +147,42 @@ EMSCRIPTEN_BINDINGS(tuple_sketch_int64) {
     .function("updateWithBytes", 
emscripten::optional_override([](tuple_union_int64& self, const std::string& 
bytes, uint64_t seed) {
       self.update(compact_tuple_sketch_int64::deserialize(bytes.data(), 
bytes.size(), seed));
     }), emscripten::allow_raw_pointers())
-    .function("updateWithB64", 
emscripten::optional_override([](tuple_union_int64& self, const std::string& 
b64, uint64_t seed) {
-      std::vector<char> bytes(b64_dec_len(b64.data(), b64.size()));
-      b64_decode(b64.data(), b64.size(), bytes.data());
-      self.update(compact_tuple_sketch_int64::deserialize(bytes.data(), 
bytes.size(), seed));
-    }), emscripten::allow_raw_pointers())
     .function("getResultAsUint8Array", 
emscripten::optional_override([](tuple_union_int64& self) {
       auto bytes = self.get_result().serialize();
       return Uint8Array.new_(emscripten::typed_memory_view(bytes.size(), 
bytes.data()));
     }))
-    .function("getResultB64", 
emscripten::optional_override([](tuple_union_int64& self) {
-      auto bytes = self.get_result().serialize();
-      std::vector<char> b64(b64_enc_len(bytes.size()));
-      b64_encode((const char*) bytes.data(), bytes.size(), b64.data());
-      return std::string(b64.data(), b64.size());
-    }))
     ;
 
-  emscripten::class_<tuple_intersection_int64>("tuple_intersection_int64")
-    .constructor(emscripten::optional_override([](uint64_t seed, const 
std::string& mode_str) {
-      return new tuple_intersection_int64(seed, 
tuple_intersection_policy<Summary>(convert_mode(mode_str)));
-    }))
-    .function("updateWithB64", 
emscripten::optional_override([](tuple_intersection_int64& self, const 
std::string& b64, uint64_t seed) {
-      std::vector<char> bytes(b64_dec_len(b64.data(), b64.size()));
-      b64_decode(b64.data(), b64.size(), bytes.data());
-      self.update(compact_tuple_sketch_int64::deserialize(bytes.data(), 
bytes.size(), seed));
-    }), emscripten::allow_raw_pointers())
-    .function("getResultB64", 
emscripten::optional_override([](tuple_intersection_int64& self) {
-      auto bytes = self.get_result().serialize();
-      std::vector<char> b64(b64_enc_len(bytes.size()));
-      b64_encode((const char*) bytes.data(), bytes.size(), b64.data());
-      return std::string(b64.data(), b64.size());
-    }))
-    ;
+  emscripten::function("tupleUnionInt64", emscripten::optional_override([](
+    const std::string& bytes1, const std::string& bytes2, uint8_t lg_k, 
uint64_t seed, const std::string& mode_str
+  ) {
+    const auto policy = tuple_union_policy<Summary>(convert_mode(mode_str));
+    auto u = 
tuple_union_int64(tuple_union_int64::builder(policy).set_lg_k(lg_k).set_seed(seed).build());
+    u.update(compact_tuple_sketch_int64::deserialize(bytes1.data(), 
bytes1.size(), seed));
+    u.update(compact_tuple_sketch_int64::deserialize(bytes2.data(), 
bytes2.size(), seed));
+    const auto bytes = u.get_result().serialize();
+    return Uint8Array.new_(emscripten::typed_memory_view(bytes.size(), 
bytes.data()));
+  }));
 
-  emscripten::class_<tuple_a_not_b_int64>("tuple_a_not_b_int64")
-    .constructor(emscripten::optional_override([](uint64_t seed) {
-      return new tuple_a_not_b_int64(seed);
-    }))
-    .function("computeWithB64ReturnB64", 
emscripten::optional_override([](tuple_a_not_b_int64& self,
-      const std::string& b64_1, const std::string& b64_2, uint64_t seed) {
-      std::vector<char> bytes1(b64_dec_len(b64_1.data(), b64_1.size()));
-      b64_decode(b64_1.data(), b64_1.size(), bytes1.data());
-      std::vector<char> bytes2(b64_dec_len(b64_2.data(), b64_2.size()));
-      b64_decode(b64_2.data(), b64_2.size(), bytes2.data());
-      auto bytes = self.compute(
-        compact_tuple_sketch_int64::deserialize(bytes1.data(), bytes1.size(), 
seed),
-        compact_tuple_sketch_int64::deserialize(bytes2.data(), bytes2.size(), 
seed)
-      ).serialize();
-      std::vector<char> b64(b64_enc_len(bytes.size()));
-      b64_encode((const char*) bytes.data(), bytes.size(), b64.data());
-      return std::string(b64.data(), b64.size());
-    }))
-    ;
+  emscripten::function("tupleIntersectionInt64", 
emscripten::optional_override([](
+    const std::string& bytes1, const std::string& bytes2, uint64_t seed, const 
std::string& mode_str
+  ) {
+    tuple_intersection_int64 intersection(seed, 
tuple_intersection_policy<Summary>(convert_mode(mode_str)));
+    intersection.update(compact_tuple_sketch_int64::deserialize(bytes1.data(), 
bytes1.size(), seed));
+    intersection.update(compact_tuple_sketch_int64::deserialize(bytes2.data(), 
bytes2.size(), seed));
+    const auto bytes = intersection.get_result().serialize();
+    return Uint8Array.new_(emscripten::typed_memory_view(bytes.size(), 
bytes.data()));
+  }));
+
+  emscripten::function("tupleAnotBInt64", 
emscripten::optional_override([](const std::string& bytes1, const std::string& 
bytes2, uint64_t seed) {
+    auto bytes = tuple_a_not_b_int64(seed).compute(
+      compact_tuple_sketch_int64::deserialize(bytes1.data(), bytes1.size(), 
seed),
+      compact_tuple_sketch_int64::deserialize(bytes2.data(), bytes2.size(), 
seed)
+    ).serialize();
+    return Uint8Array.new_(emscripten::typed_memory_view(bytes.size(), 
bytes.data()));
+  }));
 
-  emscripten::function("tupleInt64JaccardSimilarity", 
emscripten::optional_override([](const std::string& sketch1_b64, const 
std::string& sketch2_b64, uint64_t seed) {
-    std::vector<char> bytes1(b64_dec_len(sketch1_b64.data(), 
sketch1_b64.size()));
-    b64_decode(sketch1_b64.data(), sketch1_b64.size(), bytes1.data());
-    std::vector<char> bytes2(b64_dec_len(sketch2_b64.data(), 
sketch2_b64.size()));
-    b64_decode(sketch2_b64.data(), sketch2_b64.size(), bytes2.data());
+  emscripten::function("tupleInt64JaccardSimilarity", 
emscripten::optional_override([](const std::string& bytes1, const std::string& 
bytes2, uint64_t seed) {
     const auto arr = tuple_jaccard_similarity_int64::jaccard(
       compact_tuple_sketch_int64::deserialize(bytes1.data(), bytes1.size(), 
seed),
       compact_tuple_sketch_int64::deserialize(bytes2.data(), bytes2.size(), 
seed),


---------------------------------------------------------------------
To unsubscribe, e-mail: [email protected]
For additional commands, e-mail: [email protected]

Reply via email to