This is an automated email from the ASF dual-hosted git repository. alsay pushed a commit to branch null_handling in repository https://gitbox.apache.org/repos/asf/datasketches-bigquery.git
commit 7d3273d6282048ed4c6c9dcf6ea7782fbebb30fb Author: AlexanderSaydakov <[email protected]> AuthorDate: Fri Mar 21 18:29:30 2025 -0700 tolerate nulls in scalar functions --- .../cpc_sketch_get_estimate_and_bounds_seed.sqlx | 1 + cpc/sqlx/cpc_sketch_get_estimate_seed.sqlx | 1 + cpc/sqlx/cpc_sketch_to_string_seed.sqlx | 1 + cpc/sqlx/cpc_sketch_union_lgk_seed.sqlx | 1 + cpc/test/cpc_sketch_test.js | 25 +++++++ fi/sqlx/frequent_strings_sketch_get_result.sqlx | 1 + fi/sqlx/frequent_strings_sketch_to_string.sqlx | 1 + fi/test/frequent_strings_sketch_test.js | 12 +++- hll/sqlx/hll_sketch_get_estimate.sqlx | 1 + hll/sqlx/hll_sketch_get_estimate_and_bounds.sqlx | 1 + hll/sqlx/hll_sketch_to_string.sqlx | 1 + hll/sqlx/hll_sketch_union_lgk_type.sqlx | 1 + hll/test/hll_sketch_test.js | 25 +++++++ kll/sqlx/kll_sketch_float_get_cdf.sqlx | 1 + kll/sqlx/kll_sketch_float_get_max_value.sqlx | 1 + kll/sqlx/kll_sketch_float_get_min_value.sqlx | 1 + kll/sqlx/kll_sketch_float_get_n.sqlx | 1 + ...kll_sketch_float_get_normalized_rank_error.sqlx | 1 + kll/sqlx/kll_sketch_float_get_num_retained.sqlx | 1 + kll/sqlx/kll_sketch_float_get_pmf.sqlx | 1 + kll/sqlx/kll_sketch_float_get_quantile.sqlx | 1 + kll/sqlx/kll_sketch_float_get_rank.sqlx | 1 + kll/sqlx/kll_sketch_float_kolmogorov_smirnov.sqlx | 1 + kll/sqlx/kll_sketch_float_to_string.sqlx | 1 + kll/test/kll_sketch_float_test.js | 60 ++++++++++++++++ req/sqlx/req_sketch_float_get_cdf.sqlx | 1 + req/sqlx/req_sketch_float_get_max_value.sqlx | 1 + req/sqlx/req_sketch_float_get_min_value.sqlx | 1 + req/sqlx/req_sketch_float_get_n.sqlx | 1 + req/sqlx/req_sketch_float_get_num_retained.sqlx | 1 + req/sqlx/req_sketch_float_get_pmf.sqlx | 1 + req/sqlx/req_sketch_float_get_quantile.sqlx | 1 + req/sqlx/req_sketch_float_get_rank.sqlx | 1 + .../req_sketch_float_get_rank_lower_bound.sqlx | 1 + .../req_sketch_float_get_rank_upper_bound.sqlx | 1 + req/sqlx/req_sketch_float_to_string.sqlx | 1 + req/test/req_sketch_float_test.js | 55 +++++++++++++++ tdigest/sqlx/tdigest_double_get_max_value.sqlx | 1 + tdigest/sqlx/tdigest_double_get_min_value.sqlx | 1 + tdigest/sqlx/tdigest_double_get_quantile.sqlx | 1 + tdigest/sqlx/tdigest_double_get_rank.sqlx | 1 + tdigest/sqlx/tdigest_double_get_total_weight.sqlx | 1 + tdigest/sqlx/tdigest_double_to_string.sqlx | 1 + tdigest/test/tdigest_test.js | 30 ++++++++ theta/sqlx/theta_sketch_a_not_b_seed.sqlx | 1 + .../theta_sketch_get_estimate_and_bounds_seed.sqlx | 1 + theta/sqlx/theta_sketch_get_estimate_seed.sqlx | 1 + theta/sqlx/theta_sketch_get_num_retained_seed.sqlx | 1 + theta/sqlx/theta_sketch_get_theta_seed.sqlx | 1 + theta/sqlx/theta_sketch_intersection_seed.sqlx | 1 + .../sqlx/theta_sketch_jaccard_similarity_seed.sqlx | 1 + theta/sqlx/theta_sketch_to_string_seed.sqlx | 1 + theta/sqlx/theta_sketch_union_lgk_seed.sqlx | 1 + theta/test/theta_sketch_test.js | 66 +++++++++++++++++- tuple/sqlx/tuple_sketch_int64_a_not_b_seed.sqlx | 1 + .../tuple_sketch_int64_filter_low_high_seed.sqlx | 2 +- .../tuple_sketch_int64_from_theta_sketch_seed.sqlx | 1 + ..._sketch_int64_get_estimate_and_bounds_seed.sqlx | 1 + .../sqlx/tuple_sketch_int64_get_estimate_seed.sqlx | 1 + .../tuple_sketch_int64_get_num_retained_seed.sqlx | 1 + ...tch_int64_get_sum_estimate_and_bounds_seed.sqlx | 1 + tuple/sqlx/tuple_sketch_int64_get_theta_seed.sqlx | 1 + .../tuple_sketch_int64_intersection_seed_mode.sqlx | 1 + ...tuple_sketch_int64_jaccard_similarity_seed.sqlx | 1 + tuple/sqlx/tuple_sketch_int64_to_string_seed.sqlx | 1 + .../tuple_sketch_int64_union_lgk_seed_mode.sqlx | 1 + tuple/test/tuple_sketch_int64_test.js | 80 ++++++++++++++++++++++ 67 files changed, 410 insertions(+), 3 deletions(-) diff --git a/cpc/sqlx/cpc_sketch_get_estimate_and_bounds_seed.sqlx b/cpc/sqlx/cpc_sketch_get_estimate_and_bounds_seed.sqlx index 5aac29b..699839d 100644 --- a/cpc/sqlx/cpc_sketch_get_estimate_and_bounds_seed.sqlx +++ b/cpc/sqlx/cpc_sketch_get_estimate_and_bounds_seed.sqlx @@ -38,6 +38,7 @@ For more information: - https://datasketches.apache.org/docs/CPC/CpcSketches.html ''' ) AS R""" +if (sketch == null) return null try { const result = Module.cpc_sketch.getEstimateAndBounds(sketch, Number(num_std_devs), seed ? BigInt(seed) : BigInt(Module.DEFAULT_SEED)); return { diff --git a/cpc/sqlx/cpc_sketch_get_estimate_seed.sqlx b/cpc/sqlx/cpc_sketch_get_estimate_seed.sqlx index fbefeb4..19e6e4e 100644 --- a/cpc/sqlx/cpc_sketch_get_estimate_seed.sqlx +++ b/cpc/sqlx/cpc_sketch_get_estimate_seed.sqlx @@ -35,6 +35,7 @@ For more information: - https://datasketches.apache.org/docs/CPC/CpcSketches.html ''' ) AS R""" +if (sketch == null) return null try { return Module.cpc_sketch.getEstimate(sketch, seed ? BigInt(seed) : BigInt(Module.DEFAULT_SEED)); } catch (e) { diff --git a/cpc/sqlx/cpc_sketch_to_string_seed.sqlx b/cpc/sqlx/cpc_sketch_to_string_seed.sqlx index 9befdb8..b229952 100644 --- a/cpc/sqlx/cpc_sketch_to_string_seed.sqlx +++ b/cpc/sqlx/cpc_sketch_to_string_seed.sqlx @@ -35,6 +35,7 @@ For more information: - https://datasketches.apache.org/docs/CPC/CpcSketches.html ''' ) AS R""" +if (sketch == null) return null const default_seed = BigInt(Module.DEFAULT_SEED); try { return Module.cpc_sketch.toString(sketch, seed ? BigInt(seed) : default_seed); diff --git a/cpc/sqlx/cpc_sketch_union_lgk_seed.sqlx b/cpc/sqlx/cpc_sketch_union_lgk_seed.sqlx index d933e19..af3cf62 100644 --- a/cpc/sqlx/cpc_sketch_union_lgk_seed.sqlx +++ b/cpc/sqlx/cpc_sketch_union_lgk_seed.sqlx @@ -37,6 +37,7 @@ For more information: - https://datasketches.apache.org/docs/CPC/CpcSketches.html ''' ) AS R""" +if (sketchA == null || sketchB == null) return null const default_lg_k = Number(12); const default_seed = BigInt(Module.DEFAULT_SEED); try { diff --git a/cpc/test/cpc_sketch_test.js b/cpc/test/cpc_sketch_test.js index dc642dc..f89b1c8 100644 --- a/cpc/test/cpc_sketch_test.js +++ b/cpc/test/cpc_sketch_test.js @@ -56,6 +56,16 @@ generate_udaf_test("cpc_sketch_agg_string", { expected_output: cpc_2 }); +generate_udf_test("cpc_sketch_union", [{ + inputs: [ cpc_1, `CAST(NULL AS BYTES)` ], + expected_output: null +}]); + +generate_udf_test("cpc_sketch_union", [{ + inputs: [ `CAST(NULL AS BYTES)`, cpc_2 ], + expected_output: null +}]); + const cpc_union_1 = `FROM_BASE64('BAEQCwAKzJMFAAAAAgAAAHwTuG5g27UF')`; generate_udf_test("cpc_sketch_union", [{ @@ -63,11 +73,21 @@ generate_udf_test("cpc_sketch_union", [{ expected_output: cpc_union_1 }]); +generate_udf_test("cpc_sketch_get_estimate", [{ + inputs: [ `CAST(NULL AS BYTES)` ], + expected_output: null +}]); + generate_udf_test("cpc_sketch_get_estimate", [{ inputs: [ cpc_union_1 ], expected_output: 5.00162840932184 }]); +generate_udf_test("cpc_sketch_to_string", [{ + inputs: [ `CAST(NULL AS BYTES)` ], + expected_output: null +}]); + generate_udf_test("cpc_sketch_to_string", [{ inputs: [ cpc_union_1 ], expected_output: `'''### CPC sketch summary: @@ -107,6 +127,11 @@ generate_udaf_test("cpc_sketch_agg_union", { expected_output: cpc_union_2 }); +generate_udf_test("cpc_sketch_get_estimate_and_bounds", [{ + inputs: [ `CAST(NULL AS BYTES)`, 3 ], + expected_output: null +}]); + generate_udf_test("cpc_sketch_get_estimate_and_bounds", [{ inputs: [ cpc_union_2, 3 ], expected_output: `STRUCT(20000.731632174215 AS estimate, 19103.49112120969 AS lower_bound, 20932 AS upper_bound)` diff --git a/fi/sqlx/frequent_strings_sketch_get_result.sqlx b/fi/sqlx/frequent_strings_sketch_get_result.sqlx index 7327091..813af87 100644 --- a/fi/sqlx/frequent_strings_sketch_get_result.sqlx +++ b/fi/sqlx/frequent_strings_sketch_get_result.sqlx @@ -38,6 +38,7 @@ For more information: - https://datasketches.apache.org/docs/Frequency/FrequencySketches.html ''' ) AS R""" +if (sketch == null) return null try { return Module.frequent_strings_sketch.getResult(sketch, error_type, threshold ? threshold : 0); } catch (e) { diff --git a/fi/sqlx/frequent_strings_sketch_to_string.sqlx b/fi/sqlx/frequent_strings_sketch_to_string.sqlx index bbf1a04..96d778c 100644 --- a/fi/sqlx/frequent_strings_sketch_to_string.sqlx +++ b/fi/sqlx/frequent_strings_sketch_to_string.sqlx @@ -34,6 +34,7 @@ For more information: - https://datasketches.apache.org/docs/Frequency/FrequencySketches.html ''' ) AS R""" +if (sketch == null) return null try { return Module.frequent_strings_sketch.toString(sketch); } catch (e) { diff --git a/fi/test/frequent_strings_sketch_test.js b/fi/test/frequent_strings_sketch_test.js index 1ab533b..288d4b1 100644 --- a/fi/test/frequent_strings_sketch_test.js +++ b/fi/test/frequent_strings_sketch_test.js @@ -57,6 +57,11 @@ generate_udaf_test("frequent_strings_sketch_merge", { expected_output: fi_3 }); +generate_udf_test("frequent_strings_sketch_to_string", [{ + inputs: [ `CAST(NULL AS BYTES)` ], + expected_output: null +}]); + generate_udf_test("frequent_strings_sketch_to_string", [{ inputs: [ fi_3 ], expected_output: `'''### Frequent items sketch summary: @@ -70,7 +75,12 @@ generate_udf_test("frequent_strings_sketch_to_string", [{ }]); generate_udf_test("frequent_strings_sketch_get_result", [{ - inputs: [ fi_3, `"NO_FALSE_POSITIVES"`, `null` ], + inputs: [ `CAST(NULL AS BYTES)`, `"NO_FALSE_POSITIVES"`, `NULL` ], + expected_output: `[]` +}]); + +generate_udf_test("frequent_strings_sketch_get_result", [{ + inputs: [ fi_3, `"NO_FALSE_POSITIVES"`, `NULL` ], expected_output: `[STRUCT('a' AS item, 3 AS estimate, 3 AS lower_bound, 3 AS upper_bound), STRUCT('b' AS item, 2 AS estimate, 2 AS lower_bound, 2 AS upper_bound), STRUCT('c' AS item, 1 AS estimate, 1 AS lower_bound, 1 AS upper_bound)]` }]); diff --git a/hll/sqlx/hll_sketch_get_estimate.sqlx b/hll/sqlx/hll_sketch_get_estimate.sqlx index 14a399f..fbe7cd3 100644 --- a/hll/sqlx/hll_sketch_get_estimate.sqlx +++ b/hll/sqlx/hll_sketch_get_estimate.sqlx @@ -34,6 +34,7 @@ For more information: - https://datasketches.apache.org/docs/HLL/HllSketches.html ''' ) AS R""" +if (sketch == null) return null try { return Module.hll_sketch.getEstimate(sketch); } catch (e) { diff --git a/hll/sqlx/hll_sketch_get_estimate_and_bounds.sqlx b/hll/sqlx/hll_sketch_get_estimate_and_bounds.sqlx index 665d2ea..a22a998 100644 --- a/hll/sqlx/hll_sketch_get_estimate_and_bounds.sqlx +++ b/hll/sqlx/hll_sketch_get_estimate_and_bounds.sqlx @@ -37,6 +37,7 @@ For more information: - https://datasketches.apache.org/docs/HLL/HllSketches.html ''' ) AS R""" +if (sketch == null) return null try { const result = Module.hll_sketch.getEstimateAndBounds(sketch, Number(num_std_devs)); return { diff --git a/hll/sqlx/hll_sketch_to_string.sqlx b/hll/sqlx/hll_sketch_to_string.sqlx index e032ceb..bb5604b 100644 --- a/hll/sqlx/hll_sketch_to_string.sqlx +++ b/hll/sqlx/hll_sketch_to_string.sqlx @@ -34,6 +34,7 @@ For more information: - https://datasketches.apache.org/docs/HLL/HllSketches.html ''' ) AS R""" +if (sketch == null) return null try { return Module.hll_sketch.toString(sketch); } catch (e) { diff --git a/hll/sqlx/hll_sketch_union_lgk_type.sqlx b/hll/sqlx/hll_sketch_union_lgk_type.sqlx index d924969..b5f14b2 100644 --- a/hll/sqlx/hll_sketch_union_lgk_type.sqlx +++ b/hll/sqlx/hll_sketch_union_lgk_type.sqlx @@ -37,6 +37,7 @@ For more information: - https://datasketches.apache.org/docs/HLL/HllSketches.html ''' ) AS R""" +if (sketchA == null || sketchB == null) return null const default_lg_k = Number(12); try { return Module.hllUnion(sketchA, sketchB, lg_k ? Number(lg_k) : default_lg_k, tgt_type ? tgt_type : ""); diff --git a/hll/test/hll_sketch_test.js b/hll/test/hll_sketch_test.js index b120639..878e19f 100644 --- a/hll/test/hll_sketch_test.js +++ b/hll/test/hll_sketch_test.js @@ -56,6 +56,16 @@ generate_udaf_test("hll_sketch_agg_string", { expected_output: hll_2 }); +generate_udf_test("hll_sketch_union", [{ + inputs: [ hll_1, `CAST(NULL AS BYTES)` ], + expected_output: null +}]); + +generate_udf_test("hll_sketch_union", [{ + inputs: [ `CAST(NULL AS BYTES)`, hll_2 ], + expected_output: null +}]); + const hll_union_1 = `FROM_BASE64('AgEHDAMIBQAvgjsECv+ABG8Z3AbAv2oSnXrQCw==')`; generate_udf_test("hll_sketch_union", [{ @@ -63,11 +73,21 @@ generate_udf_test("hll_sketch_union", [{ expected_output: hll_union_1 }]); +generate_udf_test("hll_sketch_get_estimate", [{ + inputs: [ `CAST(NULL AS BYTES)` ], + expected_output: null +}]); + generate_udf_test("hll_sketch_get_estimate", [{ inputs: [ hll_union_1 ], expected_output: 5.000000049670538 }]); +generate_udf_test("hll_sketch_to_string", [{ + inputs: [ `CAST(NULL AS BYTES)` ], + expected_output: null +}]); + generate_udf_test("hll_sketch_to_string", [{ inputs: [ hll_union_1 ], expected_output: `'''### HLL sketch summary: @@ -107,6 +127,11 @@ generate_udaf_test("hll_sketch_agg_union", { expected_output: hll_union_2 }); +generate_udf_test("hll_sketch_get_estimate_and_bounds", [{ + inputs: [ `CAST(NULL AS BYTES)`, 3 ], + expected_output: null +}]); + generate_udf_test("hll_sketch_get_estimate_and_bounds", [{ inputs: [ hll_union_2, 3 ], expected_output: `STRUCT(20250.985334743167 AS estimate, 19292.57752380849 AS lower_bound, 21249.1304948276 AS upper_bound)` diff --git a/kll/sqlx/kll_sketch_float_get_cdf.sqlx b/kll/sqlx/kll_sketch_float_get_cdf.sqlx index 4b54d66..d854b96 100644 --- a/kll/sqlx/kll_sketch_float_get_cdf.sqlx +++ b/kll/sqlx/kll_sketch_float_get_cdf.sqlx @@ -53,6 +53,7 @@ For more information: - https://datasketches.apache.org/docs/KLL/KLLSketch.html ''' ) AS R""" +if (sketch == null) return null try { var sketchObject = null; try { diff --git a/kll/sqlx/kll_sketch_float_get_max_value.sqlx b/kll/sqlx/kll_sketch_float_get_max_value.sqlx index 33815cf..402bbf3 100644 --- a/kll/sqlx/kll_sketch_float_get_max_value.sqlx +++ b/kll/sqlx/kll_sketch_float_get_max_value.sqlx @@ -34,6 +34,7 @@ For more information: - https://datasketches.apache.org/docs/KLL/KLLSketch.html ''' ) AS R""" +if (sketch == null) return null try { var sketchObject = null; try { diff --git a/kll/sqlx/kll_sketch_float_get_min_value.sqlx b/kll/sqlx/kll_sketch_float_get_min_value.sqlx index 5c0c9f1..3b1bede 100644 --- a/kll/sqlx/kll_sketch_float_get_min_value.sqlx +++ b/kll/sqlx/kll_sketch_float_get_min_value.sqlx @@ -34,6 +34,7 @@ For more information: - https://datasketches.apache.org/docs/KLL/KLLSketch.html ''' ) AS R""" +if (sketch == null) return null try { var sketchObject = null; try { diff --git a/kll/sqlx/kll_sketch_float_get_n.sqlx b/kll/sqlx/kll_sketch_float_get_n.sqlx index b2c7529..b51d805 100644 --- a/kll/sqlx/kll_sketch_float_get_n.sqlx +++ b/kll/sqlx/kll_sketch_float_get_n.sqlx @@ -34,6 +34,7 @@ For more information: - https://datasketches.apache.org/docs/KLL/KLLSketch.html ''' ) AS R""" +if (sketch == null) return null try { var sketchObject = null; try { diff --git a/kll/sqlx/kll_sketch_float_get_normalized_rank_error.sqlx b/kll/sqlx/kll_sketch_float_get_normalized_rank_error.sqlx index b911bcd..8d48cbb 100644 --- a/kll/sqlx/kll_sketch_float_get_normalized_rank_error.sqlx +++ b/kll/sqlx/kll_sketch_float_get_normalized_rank_error.sqlx @@ -35,6 +35,7 @@ For more information: - https://datasketches.apache.org/docs/KLL/KLLSketch.html ''' ) AS R""" +if (sketch == null) return null try { var sketchObject = null; try { diff --git a/kll/sqlx/kll_sketch_float_get_num_retained.sqlx b/kll/sqlx/kll_sketch_float_get_num_retained.sqlx index e7f8d23..a589bdd 100644 --- a/kll/sqlx/kll_sketch_float_get_num_retained.sqlx +++ b/kll/sqlx/kll_sketch_float_get_num_retained.sqlx @@ -34,6 +34,7 @@ For more information: - https://datasketches.apache.org/docs/KLL/KLLSketch.html ''' ) AS R""" +if (sketch == null) return null try { var sketchObject = null; try { diff --git a/kll/sqlx/kll_sketch_float_get_pmf.sqlx b/kll/sqlx/kll_sketch_float_get_pmf.sqlx index d163b6c..2d2f089 100644 --- a/kll/sqlx/kll_sketch_float_get_pmf.sqlx +++ b/kll/sqlx/kll_sketch_float_get_pmf.sqlx @@ -55,6 +55,7 @@ For more information: - https://datasketches.apache.org/docs/KLL/KLLSketch.html ''' ) AS R""" +if (sketch == null) return null try { var sketchObject = null; try { diff --git a/kll/sqlx/kll_sketch_float_get_quantile.sqlx b/kll/sqlx/kll_sketch_float_get_quantile.sqlx index 3d6eef2..5c09b94 100644 --- a/kll/sqlx/kll_sketch_float_get_quantile.sqlx +++ b/kll/sqlx/kll_sketch_float_get_quantile.sqlx @@ -36,6 +36,7 @@ For more information: - https://datasketches.apache.org/docs/KLL/KLLSketch.html ''' ) AS R""" +if (sketch == null) return null try { var sketchObject = null; try { diff --git a/kll/sqlx/kll_sketch_float_get_rank.sqlx b/kll/sqlx/kll_sketch_float_get_rank.sqlx index fac16d3..61ae346 100644 --- a/kll/sqlx/kll_sketch_float_get_rank.sqlx +++ b/kll/sqlx/kll_sketch_float_get_rank.sqlx @@ -36,6 +36,7 @@ For more information: - https://datasketches.apache.org/docs/KLL/KLLSketch.html ''' ) AS R""" +if (sketch == null) return null try { var sketchObject = null; try { diff --git a/kll/sqlx/kll_sketch_float_kolmogorov_smirnov.sqlx b/kll/sqlx/kll_sketch_float_kolmogorov_smirnov.sqlx index 1f8fae7..85d9e91 100644 --- a/kll/sqlx/kll_sketch_float_kolmogorov_smirnov.sqlx +++ b/kll/sqlx/kll_sketch_float_kolmogorov_smirnov.sqlx @@ -38,6 +38,7 @@ For more information: - https://datasketches.apache.org/docs/KLL/KLLSketch.html ''' ) AS R""" +if (sketchA == null || sketchB == null) return null try { return Module.kolmogorovSmirnovTest(sketchA, sketchB, pvalue); } catch (e) { diff --git a/kll/sqlx/kll_sketch_float_to_string.sqlx b/kll/sqlx/kll_sketch_float_to_string.sqlx index adba9d3..556eecb 100644 --- a/kll/sqlx/kll_sketch_float_to_string.sqlx +++ b/kll/sqlx/kll_sketch_float_to_string.sqlx @@ -34,6 +34,7 @@ For more information: - https://datasketches.apache.org/docs/KLL/KLLSketch.html ''' ) AS R""" +if (sketch == null) return null try { var sketchObject = null; try { diff --git a/kll/test/kll_sketch_float_test.js b/kll/test/kll_sketch_float_test.js index 58858e7..3eda19e 100644 --- a/kll/test/kll_sketch_float_test.js +++ b/kll/test/kll_sketch_float_test.js @@ -56,6 +56,11 @@ generate_udaf_test("kll_sketch_float_merge", { expected_output: kll_3 }); +generate_udf_test("kll_sketch_float_to_string", [{ + inputs: [ `CAST(NULL AS BYTES)` ], + expected_output: null +}]); + generate_udf_test("kll_sketch_float_to_string", [{ inputs: [ kll_3 ], expected_output: `'''### KLL sketch summary: @@ -77,51 +82,106 @@ generate_udf_test("kll_sketch_float_to_string", [{ '''` }]); +generate_udf_test("kll_sketch_float_get_rank", [{ + inputs: [ `CAST(NULL AS BYTES)`, 10, true ], + expected_output: null +}]); + generate_udf_test("kll_sketch_float_get_rank", [{ inputs: [ kll_3, 10, true ], expected_output: 0.5 }]); +generate_udf_test("kll_sketch_float_get_quantile", [{ + inputs: [ `CAST(NULL AS BYTES)`, 0.5, true ], + expected_output: null +}]); + generate_udf_test("kll_sketch_float_get_quantile", [{ inputs: [ kll_3, 0.5, true ], expected_output: 10 }]); +generate_udf_test("kll_sketch_float_get_min_value", [{ + inputs: [ `CAST(NULL AS BYTES)` ], + expected_output: null +}]); + generate_udf_test("kll_sketch_float_get_min_value", [{ inputs: [ kll_3 ], expected_output: 1 }]); +generate_udf_test("kll_sketch_float_get_max_value", [{ + inputs: [ `CAST(NULL AS BYTES)` ], + expected_output: null +}]); + generate_udf_test("kll_sketch_float_get_max_value", [{ inputs: [ kll_3 ], expected_output: 20 }]); +generate_udf_test("kll_sketch_float_get_n", [{ + inputs: [ `CAST(NULL AS BYTES)` ], + expected_output: null +}]); + generate_udf_test("kll_sketch_float_get_n", [{ inputs: [ kll_3 ], expected_output: 20 }]); +generate_udf_test("kll_sketch_float_get_num_retained", [{ + inputs: [ `CAST(NULL AS BYTES)` ], + expected_output: null +}]); + generate_udf_test("kll_sketch_float_get_num_retained", [{ inputs: [ kll_3 ], expected_output: 20 }]); +generate_udf_test("kll_sketch_float_get_normalized_rank_error", [{ + inputs: [ `CAST(NULL AS BYTES)`, true ], + expected_output: null +}]); + generate_udf_test("kll_sketch_float_get_normalized_rank_error", [{ inputs: [ kll_3, true ], expected_output: 0.01651561908528982 }]); +generate_udf_test("kll_sketch_float_get_pmf", [{ + inputs: [ `CAST(NULL AS BYTES)`, `[10.0]`, true ], + expected_output: `[]` +}]); + generate_udf_test("kll_sketch_float_get_pmf", [{ inputs: [ kll_3, `[10.0]`, true ], expected_output: `[0.5, 0.5]` }]); +generate_udf_test("kll_sketch_float_get_cdf", [{ + inputs: [ `CAST(NULL AS BYTES)`, `[10.0]`, true ], + expected_output: `[]` +}]); + generate_udf_test("kll_sketch_float_get_cdf", [{ inputs: [ kll_3, `[10.0]`, true ], expected_output: `[0.5, 1.0]` }]); +generate_udf_test("kll_sketch_float_kolmogorov_smirnov", [{ + inputs: [ kll_1, `CAST(NULL AS BYTES)`, 0.05 ], + expected_output: null +}]); + +generate_udf_test("kll_sketch_float_kolmogorov_smirnov", [{ + inputs: [ `CAST(NULL AS BYTES)`, kll_2, 0.05 ], + expected_output: null +}]); + generate_udf_test("kll_sketch_float_kolmogorov_smirnov", [{ inputs: [ kll_1, kll_1, 0.05 ], expected_output: false diff --git a/req/sqlx/req_sketch_float_get_cdf.sqlx b/req/sqlx/req_sketch_float_get_cdf.sqlx index 60734eb..32cc11a 100644 --- a/req/sqlx/req_sketch_float_get_cdf.sqlx +++ b/req/sqlx/req_sketch_float_get_cdf.sqlx @@ -53,6 +53,7 @@ For more information: - https://datasketches.apache.org/docs/REQ/ReqSketch.html ''' ) AS R""" +if (sketch == null) return null try { var sketchObject = null; try { diff --git a/req/sqlx/req_sketch_float_get_max_value.sqlx b/req/sqlx/req_sketch_float_get_max_value.sqlx index 806c069..9ca1a53 100644 --- a/req/sqlx/req_sketch_float_get_max_value.sqlx +++ b/req/sqlx/req_sketch_float_get_max_value.sqlx @@ -34,6 +34,7 @@ For more information: - https://datasketches.apache.org/docs/REQ/ReqSketch.html ''' ) AS R""" +if (sketch == null) return null try { var sketchObject = null; try { diff --git a/req/sqlx/req_sketch_float_get_min_value.sqlx b/req/sqlx/req_sketch_float_get_min_value.sqlx index debc648..da4d97a 100644 --- a/req/sqlx/req_sketch_float_get_min_value.sqlx +++ b/req/sqlx/req_sketch_float_get_min_value.sqlx @@ -34,6 +34,7 @@ For more information: - https://datasketches.apache.org/docs/REQ/ReqSketch.html ''' ) AS R""" +if (sketch == null) return null try { var sketchObject = null; try { diff --git a/req/sqlx/req_sketch_float_get_n.sqlx b/req/sqlx/req_sketch_float_get_n.sqlx index bf2068b..328521f 100644 --- a/req/sqlx/req_sketch_float_get_n.sqlx +++ b/req/sqlx/req_sketch_float_get_n.sqlx @@ -34,6 +34,7 @@ For more information: - https://datasketches.apache.org/docs/REQ/ReqSketch.html ''' ) AS R""" +if (sketch == null) return null try { var sketchObject = null; try { diff --git a/req/sqlx/req_sketch_float_get_num_retained.sqlx b/req/sqlx/req_sketch_float_get_num_retained.sqlx index ecc6f3c..9757fe1 100644 --- a/req/sqlx/req_sketch_float_get_num_retained.sqlx +++ b/req/sqlx/req_sketch_float_get_num_retained.sqlx @@ -34,6 +34,7 @@ For more information: - https://datasketches.apache.org/docs/REQ/ReqSketch.html ''' ) AS R""" +if (sketch == null) return null try { var sketchObject = null; try { diff --git a/req/sqlx/req_sketch_float_get_pmf.sqlx b/req/sqlx/req_sketch_float_get_pmf.sqlx index a91d673..a4860ff 100644 --- a/req/sqlx/req_sketch_float_get_pmf.sqlx +++ b/req/sqlx/req_sketch_float_get_pmf.sqlx @@ -55,6 +55,7 @@ For more information: - https://datasketches.apache.org/docs/REQ/ReqSketch.html ''' ) AS R""" +if (sketch == null) return null try { var sketchObject = null; try { diff --git a/req/sqlx/req_sketch_float_get_quantile.sqlx b/req/sqlx/req_sketch_float_get_quantile.sqlx index 0590ece..2b33e01 100644 --- a/req/sqlx/req_sketch_float_get_quantile.sqlx +++ b/req/sqlx/req_sketch_float_get_quantile.sqlx @@ -36,6 +36,7 @@ For more information: - https://datasketches.apache.org/docs/REQ/ReqSketch.html ''' ) AS R""" +if (sketch == null) return null try { var sketchObject = null; try { diff --git a/req/sqlx/req_sketch_float_get_rank.sqlx b/req/sqlx/req_sketch_float_get_rank.sqlx index 32d96fe..41574b9 100644 --- a/req/sqlx/req_sketch_float_get_rank.sqlx +++ b/req/sqlx/req_sketch_float_get_rank.sqlx @@ -36,6 +36,7 @@ For more information: - https://datasketches.apache.org/docs/REQ/ReqSketch.html ''' ) AS R""" +if (sketch == null) return null try { var sketchObject = null; try { diff --git a/req/sqlx/req_sketch_float_get_rank_lower_bound.sqlx b/req/sqlx/req_sketch_float_get_rank_lower_bound.sqlx index e03a551..2f93c3f 100644 --- a/req/sqlx/req_sketch_float_get_rank_lower_bound.sqlx +++ b/req/sqlx/req_sketch_float_get_rank_lower_bound.sqlx @@ -36,6 +36,7 @@ For more information: - https://datasketches.apache.org/docs/REQ/ReqSketch.html ''' ) AS R""" +if (sketch == null) return null try { var sketchObject = null; try { diff --git a/req/sqlx/req_sketch_float_get_rank_upper_bound.sqlx b/req/sqlx/req_sketch_float_get_rank_upper_bound.sqlx index ab37081..59ccce2 100644 --- a/req/sqlx/req_sketch_float_get_rank_upper_bound.sqlx +++ b/req/sqlx/req_sketch_float_get_rank_upper_bound.sqlx @@ -36,6 +36,7 @@ For more information: - https://datasketches.apache.org/docs/REQ/ReqSketch.html ''' ) AS R""" +if (sketch == null) return null try { var sketchObject = null; try { diff --git a/req/sqlx/req_sketch_float_to_string.sqlx b/req/sqlx/req_sketch_float_to_string.sqlx index 7fd7a57..943e1a3 100644 --- a/req/sqlx/req_sketch_float_to_string.sqlx +++ b/req/sqlx/req_sketch_float_to_string.sqlx @@ -34,6 +34,7 @@ For more information: - https://datasketches.apache.org/docs/REQ/ReqSketch.html ''' ) AS R""" +if (sketch == null) return null try { var sketchObject = null; try { diff --git a/req/test/req_sketch_float_test.js b/req/test/req_sketch_float_test.js index 7a3f9d3..0880e35 100644 --- a/req/test/req_sketch_float_test.js +++ b/req/test/req_sketch_float_test.js @@ -56,6 +56,11 @@ generate_udaf_test("req_sketch_float_merge", { expected_output: req_3 }); +generate_udf_test("req_sketch_float_to_string", [{ + inputs: [ `CAST(NULL AS BYTES)` ], + expected_output: null +}]); + generate_udf_test("req_sketch_float_to_string", [{ inputs: [ req_3 ], expected_output: `'''### REQ sketch summary: @@ -74,41 +79,81 @@ generate_udf_test("req_sketch_float_to_string", [{ '''` }]); +generate_udf_test("req_sketch_float_get_rank", [{ + inputs: [ `CAST(NULL AS BYTES)`, 10, true ], + expected_output: null +}]); + generate_udf_test("req_sketch_float_get_rank", [{ inputs: [ req_3, 10, true ], expected_output: 0.5 }]); +generate_udf_test("req_sketch_float_get_quantile", [{ + inputs: [ `CAST(NULL AS BYTES)`, 0.5, true ], + expected_output: null +}]); + generate_udf_test("req_sketch_float_get_quantile", [{ inputs: [ req_3, 0.5, true ], expected_output: 10 }]); +generate_udf_test("req_sketch_float_get_min_value", [{ + inputs: [ `CAST(NULL AS BYTES)` ], + expected_output: null +}]); + generate_udf_test("req_sketch_float_get_min_value", [{ inputs: [ req_3 ], expected_output: 1 }]); +generate_udf_test("req_sketch_float_get_max_value", [{ + inputs: [ `CAST(NULL AS BYTES)` ], + expected_output: null +}]); + generate_udf_test("req_sketch_float_get_max_value", [{ inputs: [ req_3 ], expected_output: 20 }]); +generate_udf_test("req_sketch_float_get_n", [{ + inputs: [ `CAST(NULL AS BYTES)` ], + expected_output: null +}]); + generate_udf_test("req_sketch_float_get_n", [{ inputs: [ req_3 ], expected_output: 20 }]); +generate_udf_test("req_sketch_float_get_num_retained", [{ + inputs: [ `CAST(NULL AS BYTES)` ], + expected_output: null +}]); + generate_udf_test("req_sketch_float_get_num_retained", [{ inputs: [ req_3 ], expected_output: 20 }]); +generate_udf_test("req_sketch_float_get_pmf", [{ + inputs: [ `CAST(NULL AS BYTES)`, `[10.0]`, true ], + expected_output: `[]` +}]); + generate_udf_test("req_sketch_float_get_pmf", [{ inputs: [ req_3, `[10.0]`, true ], expected_output: `[0.5, 0.5]` }]); +generate_udf_test("req_sketch_float_get_cdf", [{ + inputs: [ `CAST(NULL AS BYTES)`, `[10.0]`, true ], + expected_output: `[]` +}]); + generate_udf_test("req_sketch_float_get_cdf", [{ inputs: [ req_3, `[10.0]`, true ], expected_output: `[0.5, 1.0]` @@ -119,6 +164,16 @@ generate_udf_test("req_sketch_float_get_rank_lower_bound", [{ expected_output: 0.95 }]); +generate_udf_test("req_sketch_float_get_rank_lower_bound", [{ + inputs: [ `CAST(NULL AS BYTES)`, 0.95, 2 ], + expected_output: null +}]); + +generate_udf_test("req_sketch_float_get_rank_upper_bound", [{ + inputs: [ `CAST(NULL AS BYTES)`, 0.95, 2 ], + expected_output: null +}]); + generate_udf_test("req_sketch_float_get_rank_upper_bound", [{ inputs: [ req_3, 0.95, 2 ], expected_output: 0.95 diff --git a/tdigest/sqlx/tdigest_double_get_max_value.sqlx b/tdigest/sqlx/tdigest_double_get_max_value.sqlx index 496bb6d..c40c3f1 100644 --- a/tdigest/sqlx/tdigest_double_get_max_value.sqlx +++ b/tdigest/sqlx/tdigest_double_get_max_value.sqlx @@ -34,6 +34,7 @@ For more information: - https://datasketches.apache.org/docs/tdigest/tdigest.html ''' ) AS R""" +if (sketch == null) return null try { var sketchObject = null; try { diff --git a/tdigest/sqlx/tdigest_double_get_min_value.sqlx b/tdigest/sqlx/tdigest_double_get_min_value.sqlx index 33a37d9..728eaa3 100644 --- a/tdigest/sqlx/tdigest_double_get_min_value.sqlx +++ b/tdigest/sqlx/tdigest_double_get_min_value.sqlx @@ -34,6 +34,7 @@ For more information: - https://datasketches.apache.org/docs/tdigest/tdigest.html ''' ) AS R""" +if (sketch == null) return null try { var sketchObject = null; try { diff --git a/tdigest/sqlx/tdigest_double_get_quantile.sqlx b/tdigest/sqlx/tdigest_double_get_quantile.sqlx index 6d6704b..81fa073 100644 --- a/tdigest/sqlx/tdigest_double_get_quantile.sqlx +++ b/tdigest/sqlx/tdigest_double_get_quantile.sqlx @@ -35,6 +35,7 @@ For more information: - https://datasketches.apache.org/docs/tdigest/tdigest.html ''' ) AS R""" +if (sketch == null) return null try { var sketchObject = null; try { diff --git a/tdigest/sqlx/tdigest_double_get_rank.sqlx b/tdigest/sqlx/tdigest_double_get_rank.sqlx index 2578517..e0b2ac8 100644 --- a/tdigest/sqlx/tdigest_double_get_rank.sqlx +++ b/tdigest/sqlx/tdigest_double_get_rank.sqlx @@ -35,6 +35,7 @@ For more information: - https://datasketches.apache.org/docs/tdigest/tdigest.html ''' ) AS R""" +if (sketch == null) return null try { var sketchObject = null; try { diff --git a/tdigest/sqlx/tdigest_double_get_total_weight.sqlx b/tdigest/sqlx/tdigest_double_get_total_weight.sqlx index 08b2efa..49d5ca4 100644 --- a/tdigest/sqlx/tdigest_double_get_total_weight.sqlx +++ b/tdigest/sqlx/tdigest_double_get_total_weight.sqlx @@ -34,6 +34,7 @@ For more information: - https://datasketches.apache.org/docs/tdigest/tdigest.html ''' ) AS R""" +if (sketch == null) return null try { var sketchObject = null; try { diff --git a/tdigest/sqlx/tdigest_double_to_string.sqlx b/tdigest/sqlx/tdigest_double_to_string.sqlx index da9b727..1d8aa80 100644 --- a/tdigest/sqlx/tdigest_double_to_string.sqlx +++ b/tdigest/sqlx/tdigest_double_to_string.sqlx @@ -34,6 +34,7 @@ For more information: - https://datasketches.apache.org/docs/tdigest/tdigest.html ''' ) AS R""" +if (sketch == null) return null try { var sketchObject = null; try { diff --git a/tdigest/test/tdigest_test.js b/tdigest/test/tdigest_test.js index 7651bc1..6855084 100644 --- a/tdigest/test/tdigest_test.js +++ b/tdigest/test/tdigest_test.js @@ -56,6 +56,11 @@ generate_udaf_test("tdigest_double_merge", { expected_output: td_3 }); +generate_udf_test("tdigest_double_to_string", [{ + inputs: [ `CAST(NULL AS BYTES)` ], + expected_output: null +}]); + generate_udf_test("tdigest_double_to_string", [{ inputs: [ td_3 ], expected_output: `'''### t-Digest summary: @@ -73,26 +78,51 @@ generate_udf_test("tdigest_double_to_string", [{ '''` }]); +generate_udf_test("tdigest_double_get_rank", [{ + inputs: [ `CAST(NULL AS BYTES)`, 10 ], + expected_output: null +}]); + generate_udf_test("tdigest_double_get_rank", [{ inputs: [ td_3, 10 ], expected_output: 0.475 }]); +generate_udf_test("tdigest_double_get_quantile", [{ + inputs: [ `CAST(NULL AS BYTES)`, 0.5 ], + expected_output: null +}]); + generate_udf_test("tdigest_double_get_quantile", [{ inputs: [ td_3, 0.5 ], expected_output: 11 }]); +generate_udf_test("tdigest_double_get_min_value", [{ + inputs: [ `CAST(NULL AS BYTES)` ], + expected_output: null +}]); + generate_udf_test("tdigest_double_get_min_value", [{ inputs: [ td_3 ], expected_output: 1 }]); +generate_udf_test("tdigest_double_get_max_value", [{ + inputs: [ `CAST(NULL AS BYTES)` ], + expected_output: null +}]); + generate_udf_test("tdigest_double_get_max_value", [{ inputs: [ td_3 ], expected_output: 20 }]); +generate_udf_test("tdigest_double_get_total_weight", [{ + inputs: [ `CAST(NULL AS BYTES)` ], + expected_output: null +}]); + generate_udf_test("tdigest_double_get_total_weight", [{ inputs: [ td_3 ], expected_output: 20 diff --git a/theta/sqlx/theta_sketch_a_not_b_seed.sqlx b/theta/sqlx/theta_sketch_a_not_b_seed.sqlx index e7fb8f3..eaffdb6 100644 --- a/theta/sqlx/theta_sketch_a_not_b_seed.sqlx +++ b/theta/sqlx/theta_sketch_a_not_b_seed.sqlx @@ -36,6 +36,7 @@ For more information: - https://datasketches.apache.org/docs/Theta/ThetaSketches.html ''' ) AS R""" +if (sketchA == null || sketchB == null) return null const default_seed = BigInt(Module.DEFAULT_SEED); try { return Module.thetaAnotBCompressed(sketchA, sketchB, seed ? BigInt(seed) : default_seed); diff --git a/theta/sqlx/theta_sketch_get_estimate_and_bounds_seed.sqlx b/theta/sqlx/theta_sketch_get_estimate_and_bounds_seed.sqlx index 0adac36..9985474 100644 --- a/theta/sqlx/theta_sketch_get_estimate_and_bounds_seed.sqlx +++ b/theta/sqlx/theta_sketch_get_estimate_and_bounds_seed.sqlx @@ -41,6 +41,7 @@ For more information: - https://datasketches.apache.org/docs/Theta/ThetaSketches.html ''' ) AS R""" +if (sketch == null) return null const default_seed = BigInt(Module.DEFAULT_SEED); try { const result = Module.compact_theta_sketch.getEstimateAndBoundsFromBytes(sketch, Number(num_std_devs), seed ? BigInt(seed) : default_seed); diff --git a/theta/sqlx/theta_sketch_get_estimate_seed.sqlx b/theta/sqlx/theta_sketch_get_estimate_seed.sqlx index d60f082..539cd30 100644 --- a/theta/sqlx/theta_sketch_get_estimate_seed.sqlx +++ b/theta/sqlx/theta_sketch_get_estimate_seed.sqlx @@ -35,6 +35,7 @@ For more information: - https://datasketches.apache.org/docs/Theta/ThetaSketches.html ''' ) AS R""" +if (sketch == null) return null const default_seed = BigInt(Module.DEFAULT_SEED); try { return Module.compact_theta_sketch.getEstimateFromBytes(sketch, seed ? BigInt(seed) : default_seed); diff --git a/theta/sqlx/theta_sketch_get_num_retained_seed.sqlx b/theta/sqlx/theta_sketch_get_num_retained_seed.sqlx index 890174a..cc03749 100644 --- a/theta/sqlx/theta_sketch_get_num_retained_seed.sqlx +++ b/theta/sqlx/theta_sketch_get_num_retained_seed.sqlx @@ -35,6 +35,7 @@ For more information: - https://datasketches.apache.org/docs/Theta/ThetaSketches.html ''' ) AS R""" +if (sketch == null) return null const default_seed = BigInt(Module.DEFAULT_SEED); try { return Module.compact_theta_sketch.getNumRetainedFromBytes(sketch, seed ? BigInt(seed) : default_seed); diff --git a/theta/sqlx/theta_sketch_get_theta_seed.sqlx b/theta/sqlx/theta_sketch_get_theta_seed.sqlx index 1c505b8..f34d372 100644 --- a/theta/sqlx/theta_sketch_get_theta_seed.sqlx +++ b/theta/sqlx/theta_sketch_get_theta_seed.sqlx @@ -35,6 +35,7 @@ For more information: - https://datasketches.apache.org/docs/Theta/ThetaSketches.html ''' ) AS R""" +if (sketch == null) return null const default_seed = BigInt(Module.DEFAULT_SEED); try { return Module.compact_theta_sketch.getThetaFromBytes(sketch, seed ? BigInt(seed) : default_seed); diff --git a/theta/sqlx/theta_sketch_intersection_seed.sqlx b/theta/sqlx/theta_sketch_intersection_seed.sqlx index 75ffe9a..5e16046 100644 --- a/theta/sqlx/theta_sketch_intersection_seed.sqlx +++ b/theta/sqlx/theta_sketch_intersection_seed.sqlx @@ -36,6 +36,7 @@ For more information: - https://datasketches.apache.org/docs/Theta/ThetaSketches.html ''' ) AS R""" +if (sketchA == null || sketchB == null) return null const default_seed = BigInt(Module.DEFAULT_SEED); try { return Module.thetaIntersectionCompressed(sketchA, sketchB, seed ? BigInt(seed) : default_seed); diff --git a/theta/sqlx/theta_sketch_jaccard_similarity_seed.sqlx b/theta/sqlx/theta_sketch_jaccard_similarity_seed.sqlx index e2ce69c..6544e8d 100644 --- a/theta/sqlx/theta_sketch_jaccard_similarity_seed.sqlx +++ b/theta/sqlx/theta_sketch_jaccard_similarity_seed.sqlx @@ -39,6 +39,7 @@ For more information: - https://datasketches.apache.org/docs/Theta/ThetaSketches.html ''' ) AS R""" +if (sketchA == null || sketchB == null) return null const default_seed = BigInt(Module.DEFAULT_SEED); try { const jaccard = Module.thetaJaccardSimilarity(sketchA, sketchB, seed == null ? default_seed : BigInt(seed)); diff --git a/theta/sqlx/theta_sketch_to_string_seed.sqlx b/theta/sqlx/theta_sketch_to_string_seed.sqlx index dde5f60..727021c 100644 --- a/theta/sqlx/theta_sketch_to_string_seed.sqlx +++ b/theta/sqlx/theta_sketch_to_string_seed.sqlx @@ -35,6 +35,7 @@ For more information: - https://datasketches.apache.org/docs/Theta/ThetaSketches.html ''' ) AS R""" +if (sketch == null) return null const default_seed = BigInt(Module.DEFAULT_SEED); try { return Module.compact_theta_sketch.toStringFromBytes(sketch, seed ? BigInt(seed) : default_seed); diff --git a/theta/sqlx/theta_sketch_union_lgk_seed.sqlx b/theta/sqlx/theta_sketch_union_lgk_seed.sqlx index 90cbed2..979c2aa 100644 --- a/theta/sqlx/theta_sketch_union_lgk_seed.sqlx +++ b/theta/sqlx/theta_sketch_union_lgk_seed.sqlx @@ -37,6 +37,7 @@ For more information: - https://datasketches.apache.org/docs/Theta/ThetaSketches.html ''' ) AS R""" +if (sketchA == null || sketchB == null) return null const default_lg_k = Number(Module.DEFAULT_LG_K); const default_seed = BigInt(Module.DEFAULT_SEED); try { diff --git a/theta/test/theta_sketch_test.js b/theta/test/theta_sketch_test.js index c157ce8..9af0754 100644 --- a/theta/test/theta_sketch_test.js +++ b/theta/test/theta_sketch_test.js @@ -40,7 +40,6 @@ generate_udaf_test("theta_sketch_agg_union", { expected_output: theta_empty }); - const theta_1 = `FROM_BASE64('AQQDPwEazJMDEIFfUcrcGW6ylF+DQ0nLOjDZ/9ze6gyQ')`; generate_udaf_test("theta_sketch_agg_string", { @@ -57,6 +56,16 @@ generate_udaf_test("theta_sketch_agg_string", { expected_output: theta_2 }); +generate_udf_test("theta_sketch_union", [{ + inputs: [ theta_1, `CAST(NULL AS BYTES)`], + expected_output: null +}]); + +generate_udf_test("theta_sketch_union", [{ + inputs: [ `CAST(NULL AS BYTES)`, theta_2 ], + expected_output: null +}]); + const theta_union_1 = `FROM_BASE64('AQQDPgEazJMFIQK+o5W4Mt5oB7X3Z6MJcYknIFaWEI3+GlXsNvTgWyADqD2ToYTc')`; generate_udf_test("theta_sketch_union", [{ @@ -64,11 +73,21 @@ generate_udf_test("theta_sketch_union", [{ expected_output: theta_union_1 }]); +generate_udf_test("theta_sketch_get_estimate", [{ + inputs: [ `CAST(NULL AS BYTES)` ], + expected_output: null +}]); + generate_udf_test("theta_sketch_get_estimate", [{ inputs: [ theta_union_1 ], expected_output: 5 }]); +generate_udf_test("theta_sketch_to_string", [{ + inputs: [ `CAST(NULL AS BYTES)` ], + expected_output: null +}]); + generate_udf_test("theta_sketch_to_string", [{ inputs: [ theta_union_1 ], expected_output: `'''### Theta sketch summary: @@ -86,6 +105,16 @@ generate_udf_test("theta_sketch_to_string", [{ '''` }]); +generate_udf_test("theta_sketch_intersection", [{ + inputs: [ theta_1, `CAST(NULL AS BYTES)` ], + expected_output: null +}]); + +generate_udf_test("theta_sketch_intersection", [{ + inputs: [ `CAST(NULL AS BYTES)`, theta_2 ], + expected_output: null +}]); + const theta_intersection = `FROM_BASE64('AQMDAAAazJO3DG7lqK9ACA==')`; generate_udf_test("theta_sketch_intersection", [{ @@ -98,6 +127,16 @@ generate_udf_test("theta_sketch_get_estimate", [{ expected_output: 1 }]); +generate_udf_test("theta_sketch_a_not_b", [{ + inputs: [ theta_1, `CAST(NULL AS BYTES)` ], + expected_output: null +}]); + +generate_udf_test("theta_sketch_a_not_b", [{ + inputs: [ `CAST(NULL AS BYTES)`, theta_2 ], + expected_output: null +}]); + const theta_a_not_b = `FROM_BASE64('AQQDPwEazJMCacuPE2yA/wsYbP/ub3UGSA==')`; generate_udf_test("theta_sketch_a_not_b", [{ @@ -110,6 +149,16 @@ generate_udf_test("theta_sketch_get_estimate", [{ expected_output: 2 }]); +generate_udf_test("theta_sketch_jaccard_similarity", [{ + inputs: [ theta_1, `CAST(NULL AS BYTES)` ], + expected_output: null +}]); + +generate_udf_test("theta_sketch_jaccard_similarity", [{ + inputs: [ `CAST(NULL AS BYTES)`, theta_2 ], + expected_output: null +}]); + generate_udf_test("theta_sketch_jaccard_similarity", [{ inputs: [ theta_1, theta_2 ], expected_output: `STRUCT(0.2 AS lower_bound, 0.2 AS estimate, 0.2 AS upper_bound)` @@ -139,16 +188,31 @@ generate_udaf_test("theta_sketch_agg_union", { expected_output: theta_union_2 }); +generate_udf_test("theta_sketch_get_estimate_and_bounds", [{ + inputs: [ `CAST(NULL AS BYTES)`, 1 ], + expected_output: null +}]); + generate_udf_test("theta_sketch_get_estimate_and_bounds", [{ inputs: [ theta_union_2, 3 ], expected_output: `STRUCT(19736.541348415347 AS estimate, 18927.112205958525 AS lower_bound, 20580.437426810073 AS upper_bound)` }]); +generate_udf_test("theta_sketch_get_theta", [{ + inputs: [ `CAST(NULL AS BYTES)` ], + expected_output: null +}]); + generate_udf_test("theta_sketch_get_theta", [{ inputs: [ theta_union_2 ], expected_output: 0.20753382913916013 }]); +generate_udf_test("theta_sketch_get_num_retained", [{ + inputs: [ `CAST(NULL AS BYTES)` ], + expected_output: null +}]); + generate_udf_test("theta_sketch_get_num_retained", [{ inputs: [ theta_union_2 ], expected_output: 4096 diff --git a/tuple/sqlx/tuple_sketch_int64_a_not_b_seed.sqlx b/tuple/sqlx/tuple_sketch_int64_a_not_b_seed.sqlx index 6c640bb..43c3314 100644 --- a/tuple/sqlx/tuple_sketch_int64_a_not_b_seed.sqlx +++ b/tuple/sqlx/tuple_sketch_int64_a_not_b_seed.sqlx @@ -38,6 +38,7 @@ For more information: - https://datasketches.apache.org/docs/Tuple/TupleSketches.html ''' ) AS R""" +if (sketchA == null || sketchB == null) return null try { return Module.tupleAnotBInt64(sketchA, sketchB, seed ? BigInt(seed) : BigInt(Module.DEFAULT_SEED)); } catch (e) { diff --git a/tuple/sqlx/tuple_sketch_int64_filter_low_high_seed.sqlx b/tuple/sqlx/tuple_sketch_int64_filter_low_high_seed.sqlx index 1346be0..40629e5 100644 --- a/tuple/sqlx/tuple_sketch_int64_filter_low_high_seed.sqlx +++ b/tuple/sqlx/tuple_sketch_int64_filter_low_high_seed.sqlx @@ -41,7 +41,7 @@ For more information: - https://datasketches.apache.org/docs/Tuple/TupleSketches.html ''' ) AS R""" -var sketchObject = null; +if (sketch == null) return null try { return Module.compact_tuple_sketch_int64.filterLowHigh(sketch, Number(low), Number(high), seed ? BigInt(seed) : BigInt(Module.DEFAULT_SEED)); } catch (e) { diff --git a/tuple/sqlx/tuple_sketch_int64_from_theta_sketch_seed.sqlx b/tuple/sqlx/tuple_sketch_int64_from_theta_sketch_seed.sqlx index a93fd8e..2a92975 100644 --- a/tuple/sqlx/tuple_sketch_int64_from_theta_sketch_seed.sqlx +++ b/tuple/sqlx/tuple_sketch_int64_from_theta_sketch_seed.sqlx @@ -37,6 +37,7 @@ For more information: - https://datasketches.apache.org/docs/Tuple/TupleSketches.html ''' ) AS R""" +if (sketch == null) return null try { return Module.compact_tuple_sketch_int64.convertTheta(sketch, BigInt(value), seed ? BigInt(seed) : BigInt(Module.DEFAULT_SEED)); } catch (e) { diff --git a/tuple/sqlx/tuple_sketch_int64_get_estimate_and_bounds_seed.sqlx b/tuple/sqlx/tuple_sketch_int64_get_estimate_and_bounds_seed.sqlx index a11b66b..257c152 100644 --- a/tuple/sqlx/tuple_sketch_int64_get_estimate_and_bounds_seed.sqlx +++ b/tuple/sqlx/tuple_sketch_int64_get_estimate_and_bounds_seed.sqlx @@ -43,6 +43,7 @@ For more information: - https://datasketches.apache.org/docs/Tuple/TupleSketches.html ''' ) AS R""" +if (sketch == null) return null try { return Module.compact_tuple_sketch_int64.getEstimateAndBounds(sketch, Number(num_std_devs), seed ? BigInt(seed) : BigInt(Module.DEFAULT_SEED)); } catch (e) { diff --git a/tuple/sqlx/tuple_sketch_int64_get_estimate_seed.sqlx b/tuple/sqlx/tuple_sketch_int64_get_estimate_seed.sqlx index 71b1161..8085da0 100644 --- a/tuple/sqlx/tuple_sketch_int64_get_estimate_seed.sqlx +++ b/tuple/sqlx/tuple_sketch_int64_get_estimate_seed.sqlx @@ -37,6 +37,7 @@ For more information: - https://datasketches.apache.org/docs/Tuple/TupleSketches.html ''' ) AS R""" +if (sketch == null) return null try { return Module.compact_tuple_sketch_int64.getEstimate(sketch, seed ? BigInt(seed) : BigInt(Module.DEFAULT_SEED)); } catch (e) { diff --git a/tuple/sqlx/tuple_sketch_int64_get_num_retained_seed.sqlx b/tuple/sqlx/tuple_sketch_int64_get_num_retained_seed.sqlx index 81b6e63..115962a 100644 --- a/tuple/sqlx/tuple_sketch_int64_get_num_retained_seed.sqlx +++ b/tuple/sqlx/tuple_sketch_int64_get_num_retained_seed.sqlx @@ -36,6 +36,7 @@ For more information: - https://datasketches.apache.org/docs/Tuple/TupleSketches.html ''' ) AS R""" +if (sketch == null) return null try { return Module.compact_tuple_sketch_int64.getNumRetained(sketch, seed ? BigInt(seed) : BigInt(Module.DEFAULT_SEED)); } catch (e) { diff --git a/tuple/sqlx/tuple_sketch_int64_get_sum_estimate_and_bounds_seed.sqlx b/tuple/sqlx/tuple_sketch_int64_get_sum_estimate_and_bounds_seed.sqlx index 8ca15e2..1741607 100644 --- a/tuple/sqlx/tuple_sketch_int64_get_sum_estimate_and_bounds_seed.sqlx +++ b/tuple/sqlx/tuple_sketch_int64_get_sum_estimate_and_bounds_seed.sqlx @@ -44,6 +44,7 @@ For more information: - https://datasketches.apache.org/docs/Tuple/TupleSketches.html ''' ) AS R""" +if (sketch == null) return null try { return Module.compact_tuple_sketch_int64.getSumEstimateAndBounds(sketch, Number(num_std_devs), seed ? BigInt(seed) : BigInt(Module.DEFAULT_SEED)); } catch (e) { diff --git a/tuple/sqlx/tuple_sketch_int64_get_theta_seed.sqlx b/tuple/sqlx/tuple_sketch_int64_get_theta_seed.sqlx index 7a15493..a42e4f8 100644 --- a/tuple/sqlx/tuple_sketch_int64_get_theta_seed.sqlx +++ b/tuple/sqlx/tuple_sketch_int64_get_theta_seed.sqlx @@ -37,6 +37,7 @@ For more information: - https://datasketches.apache.org/docs/Tuple/TupleSketches.html ''' ) AS R""" +if (sketch == null) return null try { return Module.compact_tuple_sketch_int64.getTheta(sketch, seed ? BigInt(seed) : BigInt(Module.DEFAULT_SEED)); } catch (e) { diff --git a/tuple/sqlx/tuple_sketch_int64_intersection_seed_mode.sqlx b/tuple/sqlx/tuple_sketch_int64_intersection_seed_mode.sqlx index fb48087..742e000 100644 --- a/tuple/sqlx/tuple_sketch_int64_intersection_seed_mode.sqlx +++ b/tuple/sqlx/tuple_sketch_int64_intersection_seed_mode.sqlx @@ -38,6 +38,7 @@ For more information: - https://datasketches.apache.org/docs/Tuple/TupleSketches.html ''' ) AS R""" +if (sketchA == null || sketchB == null) return null const default_seed = BigInt(Module.DEFAULT_SEED); try { return Module.tupleIntersectionInt64(sketchA, sketchB, seed ? BigInt(seed) : default_seed, mode ? mode : ""); diff --git a/tuple/sqlx/tuple_sketch_int64_jaccard_similarity_seed.sqlx b/tuple/sqlx/tuple_sketch_int64_jaccard_similarity_seed.sqlx index 6c786f3..9fed91f 100644 --- a/tuple/sqlx/tuple_sketch_int64_jaccard_similarity_seed.sqlx +++ b/tuple/sqlx/tuple_sketch_int64_jaccard_similarity_seed.sqlx @@ -40,6 +40,7 @@ For more information: - https://datasketches.apache.org/docs/Tuple/TupleSketches.html ''' ) AS R""" +if (sketchA == null || sketchB == null) return null const default_seed = BigInt(Module.DEFAULT_SEED); try { const jaccard = Module.tupleInt64JaccardSimilarity(sketchA, sketchB, seed == null ? default_seed : BigInt(seed)); diff --git a/tuple/sqlx/tuple_sketch_int64_to_string_seed.sqlx b/tuple/sqlx/tuple_sketch_int64_to_string_seed.sqlx index 7cba0b3..5afc776 100644 --- a/tuple/sqlx/tuple_sketch_int64_to_string_seed.sqlx +++ b/tuple/sqlx/tuple_sketch_int64_to_string_seed.sqlx @@ -37,6 +37,7 @@ For more information: - https://datasketches.apache.org/docs/Tuple/TupleSketches.html ''' ) AS R""" +if (sketch == null) return null try { return Module.compact_tuple_sketch_int64.toString(sketch, seed ? BigInt(seed) : BigInt(Module.DEFAULT_SEED)); } catch (e) { diff --git a/tuple/sqlx/tuple_sketch_int64_union_lgk_seed_mode.sqlx b/tuple/sqlx/tuple_sketch_int64_union_lgk_seed_mode.sqlx index 06ffe40..f3df795 100644 --- a/tuple/sqlx/tuple_sketch_int64_union_lgk_seed_mode.sqlx +++ b/tuple/sqlx/tuple_sketch_int64_union_lgk_seed_mode.sqlx @@ -38,6 +38,7 @@ For more information: - https://datasketches.apache.org/docs/Tuple/TupleSketches.html ''' ) AS R""" +if (sketchA == null || sketchB == null) return null const default_lg_k = Number(Module.DEFAULT_LG_K); const default_seed = BigInt(Module.DEFAULT_SEED); try { diff --git a/tuple/test/tuple_sketch_int64_test.js b/tuple/test/tuple_sketch_int64_test.js index b30adfa..a7db5e3 100644 --- a/tuple/test/tuple_sketch_int64_test.js +++ b/tuple/test/tuple_sketch_int64_test.js @@ -56,6 +56,16 @@ generate_udaf_test("tuple_sketch_int64_agg_string", { expected_output: tuple_2 }); +generate_udf_test("tuple_sketch_int64_union", [{ + inputs: [ tuple_1, `CAST(NULL AS BYTES)` ], + expected_output: null +}]); + +generate_udf_test("tuple_sketch_int64_union", [{ + inputs: [ `CAST(NULL AS BYTES)`, tuple_2 ], + expected_output: null +}]); + const tuple_union_1 = `FROM_BASE64('AgMJAQAazJMFAAAAAAAAALcMbuWor0AIAgAAAAAAAABOPehbCCvBLgEAAAAAAAAAhX9AtonH5TQBAAAAAAAAAOBfNe11HQBzAQAAAAAAAAAXwR1ShQcBewEAAAAAAAAA')`; generate_udf_test("tuple_sketch_int64_union", [{ @@ -63,11 +73,21 @@ generate_udf_test("tuple_sketch_int64_union", [{ expected_output: tuple_union_1 }]); +generate_udf_test("tuple_sketch_int64_get_estimate", [{ + inputs: [ `CAST(NULL AS BYTES)` ], + expected_output: null +}]); + generate_udf_test("tuple_sketch_int64_get_estimate", [{ inputs: [ tuple_union_1 ], expected_output: 5 }]); +generate_udf_test("tuple_sketch_int64_to_string", [{ + inputs: [ `CAST(NULL AS BYTES)` ], + expected_output: null +}]); + generate_udf_test("tuple_sketch_int64_to_string", [{ inputs: [ tuple_union_1 ], expected_output: `'''### Tuple sketch summary: @@ -85,6 +105,16 @@ generate_udf_test("tuple_sketch_int64_to_string", [{ '''` }]); +generate_udf_test("tuple_sketch_int64_intersection", [{ + inputs: [ tuple_1, `CAST(NULL AS BYTES)` ], + expected_output: null +}]); + +generate_udf_test("tuple_sketch_int64_intersection", [{ + inputs: [ `CAST(NULL AS BYTES)`, tuple_2 ], + expected_output: null +}]); + const tuple_intersection = `FROM_BASE64('AQMJAQAazJO3DG7lqK9ACAIAAAAAAAAA')`; generate_udf_test("tuple_sketch_int64_intersection", [{ @@ -97,6 +127,16 @@ generate_udf_test("tuple_sketch_int64_get_estimate", [{ expected_output: 1 }]); +generate_udf_test("tuple_sketch_int64_a_not_b", [{ + inputs: [ tuple_1, `CAST(NULL AS BYTES)` ], + expected_output: null +}]); + +generate_udf_test("tuple_sketch_int64_a_not_b", [{ + inputs: [ `CAST(NULL AS BYTES)`, tuple_2 ], + expected_output: null +}]); + const tuple_a_not_b = `FROM_BASE64('AgMJAQAazJMCAAAAAAAAAIV/QLaJx+U0AQAAAAAAAAAXwR1ShQcBewEAAAAAAAAA')`; generate_udf_test("tuple_sketch_int64_a_not_b", [{ @@ -109,6 +149,16 @@ generate_udf_test("tuple_sketch_int64_get_estimate", [{ expected_output: 2 }]); +generate_udf_test("tuple_sketch_int64_jaccard_similarity", [{ + inputs: [ tuple_1, `CAST(NULL AS BYTES)` ], + expected_output: null +}]); + +generate_udf_test("tuple_sketch_int64_jaccard_similarity", [{ + inputs: [ `CAST(NULL AS BYTES)`, tuple_2 ], + expected_output: null +}]); + generate_udf_test("tuple_sketch_int64_jaccard_similarity", [{ inputs: [ tuple_1, tuple_2 ], expected_output: `STRUCT(0.2 AS lower_bound, 0.2 AS estimate, 0.2 AS upper_bound)` @@ -138,31 +188,61 @@ generate_udaf_test("tuple_sketch_int64_agg_union", { expected_output: tuple_union_2 }); +generate_udf_test("tuple_sketch_int64_get_estimate_and_bounds", [{ + inputs: [ `CAST(NULL AS BYTES)`, 3 ], + expected_output: null +}]); + generate_udf_test("tuple_sketch_int64_get_estimate_and_bounds", [{ inputs: [ tuple_union_2, 3 ], expected_output: `STRUCT(5 AS estimate, 5 AS lower_bound, 5 AS upper_bound)` }]); +generate_udf_test("tuple_sketch_int64_get_theta", [{ + inputs: [ `CAST(NULL AS BYTES)` ], + expected_output: null +}]); + generate_udf_test("tuple_sketch_int64_get_theta", [{ inputs: [ tuple_union_2 ], expected_output: 1 }]); +generate_udf_test("tuple_sketch_int64_get_num_retained", [{ + inputs: [ `CAST(NULL AS BYTES)` ], + expected_output: null +}]); + generate_udf_test("tuple_sketch_int64_get_num_retained", [{ inputs: [ tuple_union_2 ], expected_output: 5 }]); +generate_udf_test("tuple_sketch_int64_from_theta_sketch", [{ + inputs: [ `CAST(NULL AS BYTES)`, 1 ], + expected_output: null +}]); + generate_udf_test("tuple_sketch_int64_from_theta_sketch", [{ inputs: [ `FROM_BASE64('AQQDPwEazJMDEIFfUcrcGW6ylF+DQ0nLOjDZ/9ze6gyQ')`, 1 ], expected_output: tuple_1 }]); +generate_udf_test("tuple_sketch_int64_get_sum_estimate_and_bounds", [{ + inputs: [ `CAST(NULL AS BYTES)`, 2 ], + expected_output: null +}]); + generate_udf_test("tuple_sketch_int64_get_sum_estimate_and_bounds", [{ inputs: [ tuple_union_2, 2 ], expected_output: `STRUCT(6 AS sum_estimate, 6 AS sum_lower_bound, 6 AS sum_upper_bound)` }]); +generate_udf_test("tuple_sketch_int64_filter_low_high", [{ + inputs: [ `CAST(NULL AS BYTES)`, 1, 1 ], + expected_output: null +}]); + generate_udf_test("tuple_sketch_int64_filter_low_high", [{ inputs: [ tuple_union_2, 1, 1 ], expected_output: `FROM_BASE64('AgMJAQAazJMEAAAAAAAAABX5fcu9hqEFAQAAAAAAAABA3i7hyds9CAEAAAAAAAAAvTJzckaRzBQBAAAAAAAAAMOX/BKBcJ0eAQAAAAAAAAA=')` --------------------------------------------------------------------- To unsubscribe, e-mail: [email protected] For additional commands, e-mail: [email protected]
