This is an automated email from the ASF dual-hosted git repository. alsay pushed a commit to branch null_handling in repository https://gitbox.apache.org/repos/asf/datasketches-bigquery.git
commit a23786dd46d002124291cd73da50ec3d2e07e698 Author: AlexanderSaydakov <[email protected]> AuthorDate: Sun Apr 13 20:23:22 2025 -0700 aggs produce null with null input --- cpc/sqlx/cpc_sketch_agg_int64_lgk_seed.sqlx | 7 ++--- cpc/sqlx/cpc_sketch_agg_string_lgk_seed.sqlx | 7 ++--- cpc/sqlx/cpc_sketch_agg_union_lgk_seed.sqlx | 19 +++++++++--- cpc/test/cpc_sketch_test.js | 8 ++--- fi/sqlx/frequent_strings_sketch_build.sqlx | 20 +++++++------ fi/sqlx/frequent_strings_sketch_merge.sqlx | 20 +++++++------ fi/test/frequent_strings_sketch_test.js | 6 ++-- hll/sqlx/hll_sketch_agg_int64_lgk_type.sqlx | 7 ++--- hll/sqlx/hll_sketch_agg_string_lgk_type.sqlx | 7 ++--- hll/sqlx/hll_sketch_agg_union_lgk_type.sqlx | 17 ++++++++--- hll/test/hll_sketch_test.js | 8 ++--- kll/sqlx/kll_sketch_float_build_k.sqlx | 35 +++++++++++----------- kll/sqlx/kll_sketch_float_merge_k.sqlx | 35 +++++++++++----------- kll/test/kll_sketch_float_test.js | 6 ++-- req/sqlx/req_sketch_float_build_k_hra.sqlx | 33 ++++++++++---------- req/sqlx/req_sketch_float_merge_k_hra.sqlx | 33 ++++++++++---------- req/test/req_sketch_float_test.js | 6 ++-- tdigest/sqlx/tdigest_double_build_k.sqlx | 20 +++++++------ tdigest/sqlx/tdigest_double_merge_k.sqlx | 20 +++++++------ tdigest/test/tdigest_test.js | 6 ++-- theta/sqlx/theta_sketch_agg_int64_lgk_seed_p.sqlx | 7 ++--- theta/sqlx/theta_sketch_agg_string_lgk_seed_p.sqlx | 7 ++--- theta/sqlx/theta_sketch_agg_union_lgk_seed.sqlx | 20 ++++++++++--- theta/test/theta_sketch_test.js | 8 ++--- ...ple_sketch_int64_agg_int64_lgk_seed_p_mode.sqlx | 7 ++--- ...le_sketch_int64_agg_string_lgk_seed_p_mode.sqlx | 7 ++--- ...tuple_sketch_int64_agg_union_lgk_seed_mode.sqlx | 14 ++++++--- tuple/test/tuple_sketch_int64_test.js | 8 ++--- 28 files changed, 206 insertions(+), 192 deletions(-) diff --git a/cpc/sqlx/cpc_sketch_agg_int64_lgk_seed.sqlx b/cpc/sqlx/cpc_sketch_agg_int64_lgk_seed.sqlx index d664c7b..c20a7f6 100644 --- a/cpc/sqlx/cpc_sketch_agg_int64_lgk_seed.sqlx +++ b/cpc/sqlx/cpc_sketch_agg_int64_lgk_seed.sqlx @@ -89,11 +89,10 @@ export function serialize(state) { } } else if (state.union != null) { state.serialized = state.union.getResultAsUint8Array(); - } else { - if (state.sketch == null) { - state.sketch = new Module.cpc_sketch(state.lg_k, state.seed); - } + } else if (state.sketch != null) { state.serialized = state.sketch.serializeAsUint8Array(); + } else { + state.serialized = null; } return { lg_k: state.lg_k, diff --git a/cpc/sqlx/cpc_sketch_agg_string_lgk_seed.sqlx b/cpc/sqlx/cpc_sketch_agg_string_lgk_seed.sqlx index 1aca29f..f1cd22d 100644 --- a/cpc/sqlx/cpc_sketch_agg_string_lgk_seed.sqlx +++ b/cpc/sqlx/cpc_sketch_agg_string_lgk_seed.sqlx @@ -89,11 +89,10 @@ export function serialize(state) { } } else if (state.union != null) { state.serialized = state.union.getResultAsUint8Array(); - } else { - if (state.sketch == null) { - state.sketch = new Module.cpc_sketch(state.lg_k, state.seed); - } + } else if (state.sketch != null) { state.serialized = state.sketch.serializeAsUint8Array(); + } else { + state.serialized = null; } return { lg_k: state.lg_k, diff --git a/cpc/sqlx/cpc_sketch_agg_union_lgk_seed.sqlx b/cpc/sqlx/cpc_sketch_agg_union_lgk_seed.sqlx index 9225ebb..5409e2d 100644 --- a/cpc/sqlx/cpc_sketch_agg_union_lgk_seed.sqlx +++ b/cpc/sqlx/cpc_sketch_agg_union_lgk_seed.sqlx @@ -78,19 +78,30 @@ export function aggregate(state, sketch) { export function serialize(state) { if (state.union == null && state.serialized != null) return state; // for transition deserialize-serialize - ensureUnion(state); + if (state.union != null) { + // for prior transition deserialize-aggregate + // merge aggregated and serialized state + if (state.serialized != null) { + state.union.updateWithBytes(state.serialized, state.seed); + } + state.serialized = state.union.getResultAsUint8Array(); + } else { + state.serialized = null; + } try { return { lg_k: state.lg_k, seed: state.seed, - serialized: state.union.getResultAsUint8Array() + serialized: state.serialized }; } catch (e) { if (e.message != null) throw e; throw new Error(Module.getExceptionMessage(e)); } finally { - state.union.delete(); - state.union = null; + if (state.union != null) { + state.union.delete(); + delete state.union; + } } } diff --git a/cpc/test/cpc_sketch_test.js b/cpc/test/cpc_sketch_test.js index d999a4c..8553a9e 100644 --- a/cpc/test/cpc_sketch_test.js +++ b/cpc/test/cpc_sketch_test.js @@ -20,24 +20,22 @@ const { generate_udf_test, generate_udaf_test } = unit_test_utils; // using defaults -const cpc_empty = `FROM_BASE64('AgEQCwAGzJM=')`; - generate_udaf_test("cpc_sketch_agg_string", { input_columns: [`str`], input_rows: `SELECT * FROM UNNEST([CAST(NULL AS STRING), CAST(NULL AS STRING), CAST(NULL AS STRING)]) AS str`, - expected_output: cpc_empty + expected_output: null }); generate_udaf_test("cpc_sketch_agg_int64", { input_columns: [`value`], input_rows: `SELECT * FROM UNNEST([NULL, NULL, NULL]) AS value`, - expected_output: cpc_empty + expected_output: null }); generate_udaf_test("cpc_sketch_agg_union", { input_columns: [`sketch`], input_rows: `SELECT * FROM UNNEST([CAST(NULL AS BYTES), CAST(NULL AS BYTES), CAST(NULL AS BYTES)]) AS sketch`, - expected_output: cpc_empty + expected_output: null }); const cpc_1 = `FROM_BASE64('CAEQCwAOzJMDAAAAAgAAAAAAAAAA+p9AiIAEKIABCEC+FRhuAwAAAA==')`; diff --git a/fi/sqlx/frequent_strings_sketch_build.sqlx b/fi/sqlx/frequent_strings_sketch_build.sqlx index ecaedcb..952ae9f 100644 --- a/fi/sqlx/frequent_strings_sketch_build.sqlx +++ b/fi/sqlx/frequent_strings_sketch_build.sqlx @@ -58,24 +58,26 @@ export function aggregate(state, item, weight) { export function serialize(state) { if (state.sketch == null && state.serialized != null) return state; // for transition deserialize-serialize try { - // for prior transition deserialize-aggregate - // merge aggregated and serialized state - if (state.sketch != null && state.serialized != null) { - state.sketch.merge(state.serialized); + if (state.sketch != null) { + // for prior transition deserialize-aggregate + // merge aggregated and serialized state + if (state.serialized != null) state.sketch.merge(state.serialized); + state.serialized = state.sketch.serializeAsUint8Array(); } else { - if (state.sketch == null) { - state.sketch = new Module.frequent_strings_sketch(state.lg_max_map_size); - } + state.serialized = null; } return { lg_max_map_size: state.lg_max_map_size, - serialized: state.sketch.serializeAsUint8Array() + serialized: state.serialized }; } catch (e) { if (e.message != null) throw e; throw new Error(Module.getExceptionMessage(e)); } finally { - state.sketch.delete(); + if (state.sketch != null) { + state.sketch.delete(); + delete state.sketch; + } } } diff --git a/fi/sqlx/frequent_strings_sketch_merge.sqlx b/fi/sqlx/frequent_strings_sketch_merge.sqlx index ab50354..daaaf68 100644 --- a/fi/sqlx/frequent_strings_sketch_merge.sqlx +++ b/fi/sqlx/frequent_strings_sketch_merge.sqlx @@ -57,24 +57,26 @@ export function aggregate(state, sketch) { export function serialize(state) { if (state.sketch == null && state.serialized != null) return state; // for transition deserialize-serialize try { - // for prior transition deserialize-aggregate - // merge aggregated and serialized state - if (state.sketch != null && state.serialized != null) { - state.sketch.merge(state.serialized); + if (state.sketch != null) { + // for prior transition deserialize-aggregate + // merge aggregated and serialized state + if (state.serialized != null) state.sketch.merge(state.serialized); + state.serialized = state.sketch.serializeAsUint8Array(); } else { - if (state.sketch == null) { - state.sketch = new Module.frequent_strings_sketch(state.lg_max_map_size); - } + state.serialized = null; } return { lg_max_map_size: state.lg_max_map_size, - serialized: state.sketch.serializeAsUint8Array() + serialized: state.serialized }; } catch (e) { if (e.message != null) throw e; throw new Error(Module.getExceptionMessage(e)); } finally { - state.sketch.delete(); + if (state.sketch != null) { + state.sketch.delete(); + delete state.sketch; + } } } diff --git a/fi/test/frequent_strings_sketch_test.js b/fi/test/frequent_strings_sketch_test.js index 288d4b1..01e8b0a 100644 --- a/fi/test/frequent_strings_sketch_test.js +++ b/fi/test/frequent_strings_sketch_test.js @@ -18,18 +18,16 @@ const { generate_udf_test, generate_udaf_test } = unit_test_utils; -const fi_empty = `FROM_BASE64('AQEKBQMFAAA=')`; - generate_udaf_test("frequent_strings_sketch_build", { input_columns: [`str`, `1`, `5 NOT AGGREGATE`], input_rows: `SELECT * FROM UNNEST([CAST(NULL AS STRING), CAST(NULL AS STRING), CAST(NULL AS STRING)]) AS str`, - expected_output: fi_empty + expected_output: null }); generate_udaf_test("frequent_strings_sketch_merge", { input_columns: [`sketch`, `5 NOT AGGREGATE`], input_rows: `SELECT * FROM UNNEST([CAST(NULL AS BYTES), CAST(NULL AS BYTES), CAST(NULL AS BYTES)]) AS sketch`, - expected_output: fi_empty + expected_output: null }); const fi_1 = `FROM_BASE64('BAEKBQMAAAADAAAAAAAAAAMAAAAAAAAAAAAAAAAAAAABAAAAAAAAAAEAAAAAAAAAAQAAAAAAAAABAAAAYQEAAABiAQAAAGM=')`; diff --git a/hll/sqlx/hll_sketch_agg_int64_lgk_type.sqlx b/hll/sqlx/hll_sketch_agg_int64_lgk_type.sqlx index 9f76665..45ca7a8 100644 --- a/hll/sqlx/hll_sketch_agg_int64_lgk_type.sqlx +++ b/hll/sqlx/hll_sketch_agg_int64_lgk_type.sqlx @@ -88,11 +88,10 @@ export function serialize(state) { } } else if (state.union != null) { state.serialized = state.union.getResultAsUint8Array(state.tgt_type); - } else { - if (state.sketch == null) { - state.sketch = new Module.hll_sketch(state.lg_k, state.tgt_type); - } + } else if (state.sketch != null) { state.serialized = state.sketch.serializeAsUint8Array(); + } else { + state.serialized = null; } return { lg_k: state.lg_k, diff --git a/hll/sqlx/hll_sketch_agg_string_lgk_type.sqlx b/hll/sqlx/hll_sketch_agg_string_lgk_type.sqlx index 51fe524..4b949dd 100644 --- a/hll/sqlx/hll_sketch_agg_string_lgk_type.sqlx +++ b/hll/sqlx/hll_sketch_agg_string_lgk_type.sqlx @@ -88,11 +88,10 @@ export function serialize(state) { } } else if (state.union != null) { state.serialized = state.union.getResultAsUint8Array(state.tgt_type); - } else { - if (state.sketch == null) { - state.sketch = new Module.hll_sketch(state.lg_k, state.tgt_type); - } + } else if (state.sketch != null) { state.serialized = state.sketch.serializeAsUint8Array(); + } else { + state.serialized = null; } return { lg_k: state.lg_k, diff --git a/hll/sqlx/hll_sketch_agg_union_lgk_type.sqlx b/hll/sqlx/hll_sketch_agg_union_lgk_type.sqlx index 85595af..e6349d3 100644 --- a/hll/sqlx/hll_sketch_agg_union_lgk_type.sqlx +++ b/hll/sqlx/hll_sketch_agg_union_lgk_type.sqlx @@ -77,19 +77,28 @@ export function aggregate(state, sketch) { export function serialize(state) { if (state.union == null && state.serialized != null) return state; // for transition deserialize-serialize - ensureUnion(state); + if (state.union != null) { + if (state.serialized != null) { + state.union.updateWithBytes(state.serialized); + } + state.serialized = state.union.getResultAsUint8Array(state.tgt_type); + } else { + state.serialized = null; + } try { return { lg_k: state.lg_k, tgt_type: state.tgt_type, - serialized: state.union.getResultAsUint8Array(state.tgt_type) + serialized: state.serialized }; } catch (e) { if (e.message != null) throw e; throw new Error(Module.getExceptionMessage(e)); } finally { - state.union.delete(); - state.union = null; + if (state.union != null) { + state.union.delete(); + delete state.union; + } } } diff --git a/hll/test/hll_sketch_test.js b/hll/test/hll_sketch_test.js index f7045da..3d1c8bf 100644 --- a/hll/test/hll_sketch_test.js +++ b/hll/test/hll_sketch_test.js @@ -20,24 +20,22 @@ const { generate_udf_test, generate_udaf_test } = unit_test_utils; // using defaults -const hll_empty = `FROM_BASE64('AgEHDAMMAAA=')`; - generate_udaf_test("hll_sketch_agg_string", { input_columns: [`str`], input_rows: `SELECT * FROM UNNEST([CAST(NULL AS STRING), CAST(NULL AS STRING), CAST(NULL AS STRING)]) AS str`, - expected_output: hll_empty + expected_output: null }); generate_udaf_test("hll_sketch_agg_int64", { input_columns: [`value`], input_rows: `SELECT * FROM UNNEST([NULL, NULL, NULL]) AS value`, - expected_output: hll_empty + expected_output: null }); generate_udaf_test("hll_sketch_agg_union", { input_columns: [`sketch`], input_rows: `SELECT * FROM UNNEST([CAST(NULL AS BYTES), CAST(NULL AS BYTES), CAST(NULL AS BYTES)]) AS sketch`, - expected_output: hll_empty + expected_output: null }); const hll_1 = `FROM_BASE64('AgEHDAMIAwAvgjsECv+ABG8Z3AY=')`; diff --git a/kll/sqlx/kll_sketch_float_build_k.sqlx b/kll/sqlx/kll_sketch_float_build_k.sqlx index d4f6d36..c688c3e 100644 --- a/kll/sqlx/kll_sketch_float_build_k.sqlx +++ b/kll/sqlx/kll_sketch_float_build_k.sqlx @@ -40,21 +40,14 @@ const default_k = Number(Module.DEFAULT_K); // UDAF interface export function initialState(k) { - try { - var state = { - k: k == null ? default_k : Number(k), - }; - state.sketch = new Module.kll_sketch_float(state.k); - return state; - } catch (e) { - if (e.message != null) throw e; - throw new Error(Module.getExceptionMessage(e)); - } + return { + k: k == null ? default_k : Number(k), + }; } export function aggregate(state, value) { try { - if (state.sketch == null) { // for transition deserialize-aggregate + if (state.sketch == null) { state.sketch = new Module.kll_sketch_float(state.k); } state.sketch.update(value); @@ -65,22 +58,28 @@ export function aggregate(state, value) { } export function serialize(state) { - if (state.sketch == null) return state; // for transition deserialize-serialize + if (state.sketch == null && state.serialized != null) return state; // for transition deserialize-serialize try { - // for prior transition deserialize-aggregate - // merge aggregated and serialized state - if (state.sketch != null && state.serialized != null) { - state.sketch.merge(state.serialized); + if (state.sketch != null) { + // for prior transition deserialize-aggregate + // merge aggregated and serialized state + if (state.serialized != null) state.sketch.merge(state.serialized); + state.serialized = state.sketch.serializeAsUint8Array(); + } else { + state.serialized = null; } return { k: state.k, - serialized: state.sketch.serializeAsUint8Array() + serialized: state.serialized }; } catch (e) { if (e.message != null) throw e; throw new Error(Module.getExceptionMessage(e)); } finally { - state.sketch.delete(); + if (state.sketch != null) { + state.sketch.delete(); + delete state.sketch; + } } } diff --git a/kll/sqlx/kll_sketch_float_merge_k.sqlx b/kll/sqlx/kll_sketch_float_merge_k.sqlx index a00e5ea..3ebba5e 100644 --- a/kll/sqlx/kll_sketch_float_merge_k.sqlx +++ b/kll/sqlx/kll_sketch_float_merge_k.sqlx @@ -40,21 +40,14 @@ const default_k = Number(Module.DEFAULT_K); // UDAF interface export function initialState(k) { - try { - var state = { - k: k == null ? default_k : Number(k), - }; - state.sketch = new Module.kll_sketch_float(state.k); - return state; - } catch (e) { - if (e.message != null) throw e; - throw new Error(Module.getExceptionMessage(e)); - } + return { + k: k == null ? default_k : Number(k) + }; } export function aggregate(state, sketch) { try { - if (state.sketch == null) { // for transition deserialize-aggregate + if (state.sketch == null) { state.sketch = new Module.kll_sketch_float(state.k); } state.sketch.merge(sketch); @@ -65,22 +58,28 @@ export function aggregate(state, sketch) { } export function serialize(state) { - if (state.sketch == null) return state; // for transition deserialize-serialize + if (state.sketch == null && state.serialized != null) return state; // for transition deserialize-serialize try { - // for prior transition deserialize-aggregate - // merge aggregated and serialized state - if (state.sketch != null && state.serialized != null) { - state.sketch.merge(state.serialized); + if (state.sketch != null) { + // for prior transition deserialize-aggregate + // merge aggregated and serialized state + if (state.serialized != null) state.sketch.merge(state.serialized); + state.serialized = state.sketch.serializeAsUint8Array(); + } else { + state.serialized = null; } return { k: state.k, - serialized: state.sketch.serializeAsUint8Array() + serialized: state.serialized }; } catch (e) { if (e.message != null) throw e; throw new Error(Module.getExceptionMessage(e)); } finally { - state.sketch.delete(); + if (state.sketch != null) { + state.sketch.delete(); + delete state.sketch; + } } } diff --git a/kll/test/kll_sketch_float_test.js b/kll/test/kll_sketch_float_test.js index 3eda19e..96b7d54 100644 --- a/kll/test/kll_sketch_float_test.js +++ b/kll/test/kll_sketch_float_test.js @@ -18,18 +18,16 @@ const { generate_udf_test, generate_udaf_test } = unit_test_utils; -const kll_empty = `FROM_BASE64('AgEPAcgACAA=')`; - generate_udaf_test("kll_sketch_float_build", { input_columns: [`value`], input_rows: `SELECT * FROM UNNEST([NULL, NULL, NULL, NULL, NULL]) AS value`, - expected_output: kll_empty + expected_output: null }); generate_udaf_test("kll_sketch_float_merge", { input_columns: [`sketch`], input_rows: `SELECT * FROM UNNEST([CAST(NULL AS BYTES), CAST(NULL AS BYTES), CAST(NULL AS BYTES)]) AS sketch`, - expected_output: kll_empty + expected_output: null }); const kll_1 = `FROM_BASE64('BQEPAMgACAAKAAAAAAAAAMgAAQC+AAAAAACAPwAAIEEAACBBAAAQQQAAAEEAAOBAAADAQAAAoEAAAIBAAABAQAAAAEAAAIA/')`; diff --git a/req/sqlx/req_sketch_float_build_k_hra.sqlx b/req/sqlx/req_sketch_float_build_k_hra.sqlx index 63b9a91..eb2c092 100644 --- a/req/sqlx/req_sketch_float_build_k_hra.sqlx +++ b/req/sqlx/req_sketch_float_build_k_hra.sqlx @@ -41,15 +41,10 @@ const default_k = Number(Module.DEFAULT_K); // UDAF interface export function initialState(params) { - try { - return { - k: params.k == null ? default_k : Number(params.k), - hra: params.hra == null ? true : params.hra - }; - } catch (e) { - if (e.message != null) throw e; - throw new Error(Module.getExceptionMessage(e)); - } + return { + k: params.k == null ? default_k : Number(params.k), + hra: params.hra == null ? true : params.hra + }; } export function aggregate(state, value) { @@ -67,25 +62,27 @@ export function aggregate(state, value) { export function serialize(state) { if (state.sketch == null && state.serialized != null) return state; // for transition deserialize-serialize try { - // for prior transition deserialize-aggregate - // merge aggregated and serialized state - if (state.sketch != null && state.serialized != null) { - state.sketch.merge(state.serialized); + if (state.sketch != null) { + // for prior transition deserialize-aggregate + // merge aggregated and serialized state + if (state.serialized != null) state.sketch.merge(state.serialized); + state.serialized = state.sketch.serializeAsUint8Array(); } else { - if (state.sketch == null) { - state.sketch = new Module.req_sketch_float(state.k, state.hra); - } + state.serialized = null; } return { k: state.k, hra: state.hra, - serialized: state.sketch.serializeAsUint8Array() + serialized: state.serialized }; } catch (e) { if (e.message != null) throw e; throw new Error(Module.getExceptionMessage(e)); } finally { - state.sketch.delete(); + if (state.sketch != null) { + state.sketch.delete(); + delete state.sketch; + } } } diff --git a/req/sqlx/req_sketch_float_merge_k_hra.sqlx b/req/sqlx/req_sketch_float_merge_k_hra.sqlx index 10b4daf..a11f5e3 100644 --- a/req/sqlx/req_sketch_float_merge_k_hra.sqlx +++ b/req/sqlx/req_sketch_float_merge_k_hra.sqlx @@ -41,15 +41,10 @@ const default_k = Number(Module.DEFAULT_K); // UDAF interface export function initialState(params) { - try { - return { - k: params.k == null ? default_k : Number(params.k), - hra: params.hra == null ? true : params.hra - }; - } catch (e) { - if (e.message != null) throw e; - throw new Error(Module.getExceptionMessage(e)); - } + return { + k: params.k == null ? default_k : Number(params.k), + hra: params.hra == null ? true : params.hra + }; } export function aggregate(state, sketch) { @@ -67,25 +62,27 @@ export function aggregate(state, sketch) { export function serialize(state) { if (state.sketch == null && state.serialized != null) return state; // for transition deserialize-serialize try { - // for prior transition deserialize-aggregate - // merge aggregated and serialized state - if (state.sketch != null && state.serialized != null) { - state.sketch.merge(state.serialized); + if (state.sketch != null) { + // for prior transition deserialize-aggregate + // merge aggregated and serialized state + if (state.serialized != null) state.sketch.merge(state.serialized); + state.serialized = state.sketch.serializeAsUint8Array(); } else { - if (state.sketch == null) { - state.sketch = new Module.req_sketch_float(state.k, state.hra); - } + state.serialized = null; } return { k: state.k, hra: state.hra, - serialized: state.sketch.serializeAsUint8Array() + serialized: state.serialized }; } catch (e) { if (e.message != null) throw e; throw new Error(Module.getExceptionMessage(e)); } finally { - state.sketch.delete(); + if (state.sketch != null) { + state.sketch.delete(); + delete state.sketch; + } } } diff --git a/req/test/req_sketch_float_test.js b/req/test/req_sketch_float_test.js index 0880e35..692d17a 100644 --- a/req/test/req_sketch_float_test.js +++ b/req/test/req_sketch_float_test.js @@ -18,18 +18,16 @@ const { generate_udf_test, generate_udaf_test } = unit_test_utils; -const req_empty = `FROM_BASE64('AgERPAwAAAA=')`; - generate_udaf_test("req_sketch_float_build", { input_columns: [`value`], input_rows: `SELECT * FROM UNNEST([NULL, NULL, NULL, NULL, NULL]) AS value`, - expected_output: req_empty + expected_output: null }); generate_udaf_test("req_sketch_float_merge", { input_columns: [`sketch`], input_rows: `SELECT * FROM UNNEST([CAST(NULL AS BYTES), CAST(NULL AS BYTES), CAST(NULL AS BYTES)]) AS sketch`, - expected_output: req_empty + expected_output: null }); const req_1 = `FROM_BASE64('AgERCAwAAQAAAAAAAAAAAAAAQEEAAwAACgAAAAAAIEEAABBBAAAAQQAA4EAAAMBAAACgQAAAgEAAAEBAAAAAQAAAgD8=')`; diff --git a/tdigest/sqlx/tdigest_double_build_k.sqlx b/tdigest/sqlx/tdigest_double_build_k.sqlx index 339c8a5..2de426a 100644 --- a/tdigest/sqlx/tdigest_double_build_k.sqlx +++ b/tdigest/sqlx/tdigest_double_build_k.sqlx @@ -60,24 +60,26 @@ export function aggregate(state, value) { export function serialize(state) { if (state.sketch == null && state.serialized != null) return state; // for transition deserialize-serialize try { - // for prior transition deserialize-aggregate - // merge aggregated and serialized state - if (state.sketch != null && state.serialized != null) { - state.sketch.merge(state.serialized); + if (state.sketch != null) { + // for prior transition deserialize-aggregate + // merge aggregated and serialized state + if (state.serialized != null) state.sketch.merge(state.serialized); + state.serialized = state.sketch.serializeAsUint8Array(); } else { - if (state.sketch == null) { - state.sketch = new Module.tdigest_double(state.k); - } + state.serialized = null; } return { k: state.k, - serialized: state.sketch.serializeAsUint8Array() + serialized: state.serialized }; } catch (e) { if (e.message != null) throw e; throw new Error(Module.getExceptionMessage(e)); } finally { - state.sketch.delete(); + if (state.sketch != null) { + state.sketch.delete(); + delete state.sketch; + } } } diff --git a/tdigest/sqlx/tdigest_double_merge_k.sqlx b/tdigest/sqlx/tdigest_double_merge_k.sqlx index 686693c..9a1ee40 100644 --- a/tdigest/sqlx/tdigest_double_merge_k.sqlx +++ b/tdigest/sqlx/tdigest_double_merge_k.sqlx @@ -60,24 +60,26 @@ export function aggregate(state, sketch) { export function serialize(state) { if (state.sketch == null && state.serialized != null) return state; // for transition deserialize-serialize try { - // for prior transition deserialize-aggregate - // merge aggregated and serialized state - if (state.sketch != null && state.serialized != null) { - state.sketch.merge(state.serialized); + if (state.sketch != null) { + // for prior transition deserialize-aggregate + // merge aggregated and serialized state + if (state.serialized != null) state.sketch.merge(state.serialized); + state.serialized = state.sketch.serializeAsUint8Array(); } else { - if (state.sketch == null) { - state.sketch = new Module.tdigest_double(state.k); - } + state.serialized = null; } return { k: state.k, - serialized: state.sketch.serializeAsUint8Array() + serialized: state.serialized }; } catch (e) { if (e.message != null) throw e; throw new Error(Module.getExceptionMessage(e)); } finally { - state.sketch.delete(); + if (state.sketch != null) { + state.sketch.delete(); + delete state.sketch; + } } } diff --git a/tdigest/test/tdigest_test.js b/tdigest/test/tdigest_test.js index 6855084..e4b2944 100644 --- a/tdigest/test/tdigest_test.js +++ b/tdigest/test/tdigest_test.js @@ -18,18 +18,16 @@ const { generate_udf_test, generate_udaf_test } = unit_test_utils; -const td_empty = `FROM_BASE64('AQEUyAABAAA=')`; - generate_udaf_test("tdigest_double_build", { input_columns: [`value`], input_rows: `SELECT * FROM UNNEST([NULL, NULL, NULL, NULL, NULL]) AS value`, - expected_output: td_empty + expected_output: null }); generate_udaf_test("tdigest_double_merge", { input_columns: [`sketch`], input_rows: `SELECT * FROM UNNEST([CAST(NULL AS BYTES), CAST(NULL AS BYTES), CAST(NULL AS BYTES)]) AS sketch`, - expected_output: td_empty + expected_output: null }); const td_1 = `FROM_BASE64('AgEUyAAEAAAKAAAAAAAAAAAAAAAAAPA/AAAAAAAAJEAAAAAAAADwPwEAAAAAAAAAAAAAAAAAAEABAAAAAAAAAAAAAAAAAAhAAQAAAAAAAAAAAAAAAAAQQAEAAAAAAAAAAAAAAAAAFEABAAAAAAAAAAAAAAAAABhAAQAAAAAAAAAAAAAAAAAcQAEAAAAAAAAAAAAAAAAAIEABAAAAAAAAAAAAAAAAACJAAQAAAAAAAAAAAAAAAAAkQAEAAAAAAAAA')`; diff --git a/theta/sqlx/theta_sketch_agg_int64_lgk_seed_p.sqlx b/theta/sqlx/theta_sketch_agg_int64_lgk_seed_p.sqlx index 26cc399..c127d67 100644 --- a/theta/sqlx/theta_sketch_agg_int64_lgk_seed_p.sqlx +++ b/theta/sqlx/theta_sketch_agg_int64_lgk_seed_p.sqlx @@ -85,13 +85,12 @@ export function serialize(state) { state.serialized = state.union.getResultAsUint8ArrayCompressed(); state.union.delete(); delete state.union; - } else { - if (state.sketch == null) { - state.sketch = new Module.update_theta_sketch(state.lg_k, state.seed, state.p); - } + } else if (state.sketch != null) { state.serialized = state.sketch.serializeAsUint8ArrayCompressed(); state.sketch.delete(); delete state.sketch; + } else { + state.serialized = null; } return state; } catch (e) { diff --git a/theta/sqlx/theta_sketch_agg_string_lgk_seed_p.sqlx b/theta/sqlx/theta_sketch_agg_string_lgk_seed_p.sqlx index 06ed69b..dd13053 100644 --- a/theta/sqlx/theta_sketch_agg_string_lgk_seed_p.sqlx +++ b/theta/sqlx/theta_sketch_agg_string_lgk_seed_p.sqlx @@ -85,13 +85,12 @@ export function serialize(state) { state.serialized = state.union.getResultAsUint8ArrayCompressed(); state.union.delete(); delete state.union; - } else { - if (state.sketch == null) { - state.sketch = new Module.update_theta_sketch(state.lg_k, state.seed, state.p); - } + } else if (state.sketch != null) { state.serialized = state.sketch.serializeAsUint8ArrayCompressed(); state.sketch.delete(); delete state.sketch; + } else { + state.serialized = null; } return state; } catch (e) { diff --git a/theta/sqlx/theta_sketch_agg_union_lgk_seed.sqlx b/theta/sqlx/theta_sketch_agg_union_lgk_seed.sqlx index a5c087c..07fe80c 100644 --- a/theta/sqlx/theta_sketch_agg_union_lgk_seed.sqlx +++ b/theta/sqlx/theta_sketch_agg_union_lgk_seed.sqlx @@ -96,14 +96,26 @@ export function aggregate(state, sketch) { export function serialize(state) { if (state.union == null && state.serialized != null) return state; // for transition deserialize-serialize - ensureUnion(state); try { - reserveBuffer(Module.compact_theta_sketch.getMaxSerializedSizeBytes(state.lg_k)); - var size = state.union.getResultStreamCompressed(buffer.ptr, buffer.size); + if (state.union != null) { + // for prior transition deserialize-aggregate + // merge aggregated and serialized state + if (state.serialized != null) { + reserveBuffer(state.serialized.length); + Module.HEAPU8.subarray(buffer.ptr, buffer.ptr + state.serialized.length).set(state.serialized); + state.union.updateWithBuffer(buffer.ptr, state.serialized.length, state.seed); + delete state.serialized; + } + reserveBuffer(Module.compact_theta_sketch.getMaxSerializedSizeBytes(state.lg_k)); + var size = state.union.getResultStreamCompressed(buffer.ptr, buffer.size); + state.serialized = Module.HEAPU8.slice(buffer.ptr, buffer.ptr + size); + } else { + state.serialized = null; + } return { lg_k: state.lg_k, seed: state.seed, - serialized: Module.HEAPU8.slice(buffer.ptr, buffer.ptr + size) + serialized: state.serialized }; } catch (e) { if (e.message != null) throw e; diff --git a/theta/test/theta_sketch_test.js b/theta/test/theta_sketch_test.js index 093b890..0c9159b 100644 --- a/theta/test/theta_sketch_test.js +++ b/theta/test/theta_sketch_test.js @@ -20,24 +20,22 @@ const { generate_udf_test, generate_udaf_test } = unit_test_utils; // using defaults -const theta_empty = `FROM_BASE64('AQMDAAAezJM=')`; - generate_udaf_test("theta_sketch_agg_string", { input_columns: [`str`], input_rows: `SELECT * FROM UNNEST([CAST(NULL AS STRING), CAST(NULL AS STRING), CAST(NULL AS STRING)]) AS str`, - expected_output: theta_empty + expected_output: null }); generate_udaf_test("theta_sketch_agg_int64", { input_columns: [`value`], input_rows: `SELECT * FROM UNNEST([NULL, NULL, NULL]) AS value`, - expected_output: theta_empty + expected_output: null }); generate_udaf_test("theta_sketch_agg_union", { input_columns: [`sketch`], input_rows: `SELECT * FROM UNNEST([CAST(NULL AS BYTES), CAST(NULL AS BYTES), CAST(NULL AS BYTES)]) AS sketch`, - expected_output: theta_empty + expected_output: null }); const theta_1 = `FROM_BASE64('AQQDPwEazJMDEIFfUcrcGW6ylF+DQ0nLOjDZ/9ze6gyQ')`; diff --git a/tuple/sqlx/tuple_sketch_int64_agg_int64_lgk_seed_p_mode.sqlx b/tuple/sqlx/tuple_sketch_int64_agg_int64_lgk_seed_p_mode.sqlx index b4404bc..6fe2aa8 100644 --- a/tuple/sqlx/tuple_sketch_int64_agg_int64_lgk_seed_p_mode.sqlx +++ b/tuple/sqlx/tuple_sketch_int64_agg_int64_lgk_seed_p_mode.sqlx @@ -91,13 +91,12 @@ export function serialize(state) { state.serialized = state.union.getResultAsUint8Array(); state.union.delete(); delete state.union; - } else { - if (state.sketch == null) { - state.sketch = new Module.update_tuple_sketch_int64(state.lg_k, state.seed, state.p, state.mode); - } + } else if (state.sketch != null) { state.serialized = state.sketch.serializeAsUint8Array(); state.sketch.delete(); delete state.sketch; + } else { + state.serialized = null; } return state; } catch (e) { diff --git a/tuple/sqlx/tuple_sketch_int64_agg_string_lgk_seed_p_mode.sqlx b/tuple/sqlx/tuple_sketch_int64_agg_string_lgk_seed_p_mode.sqlx index ca99e91..d7dbb23 100644 --- a/tuple/sqlx/tuple_sketch_int64_agg_string_lgk_seed_p_mode.sqlx +++ b/tuple/sqlx/tuple_sketch_int64_agg_string_lgk_seed_p_mode.sqlx @@ -91,13 +91,12 @@ export function serialize(state) { state.serialized = state.union.getResultAsUint8Array(); state.union.delete(); delete state.union; - } else { - if (state.sketch == null) { - state.sketch = new Module.update_tuple_sketch_int64(state.lg_k, state.seed, state.p, state.mode); - } + } else if (state.sketch != null) { state.serialized = state.sketch.serializeAsUint8Array(); state.sketch.delete(); delete state.sketch; + } else { + state.serialized = null; } return state; } catch (e) { diff --git a/tuple/sqlx/tuple_sketch_int64_agg_union_lgk_seed_mode.sqlx b/tuple/sqlx/tuple_sketch_int64_agg_union_lgk_seed_mode.sqlx index d7e18b9..6c76010 100644 --- a/tuple/sqlx/tuple_sketch_int64_agg_union_lgk_seed_mode.sqlx +++ b/tuple/sqlx/tuple_sketch_int64_agg_union_lgk_seed_mode.sqlx @@ -77,11 +77,17 @@ export function aggregate(state, sketch) { export function serialize(state) { if (state.union == null && state.serialized != null) return state; // for transition deserialize-serialize - ensureUnion(state); try { - state.serialized = state.union.getResultAsUint8Array(); - state.union.delete(); - delete state.union; + if (state.union != null) { + if (state.serialized != null) { + state.union.updateWithBytes(state.serialized, state.seed); + } + state.serialized = state.union.getResultAsUint8Array(); + state.union.delete(); + delete state.union; + } else { + state.serialized = null; + } return state; } catch (e) { if (e.message != null) throw e; diff --git a/tuple/test/tuple_sketch_int64_test.js b/tuple/test/tuple_sketch_int64_test.js index 8045af2..73eb7b5 100644 --- a/tuple/test/tuple_sketch_int64_test.js +++ b/tuple/test/tuple_sketch_int64_test.js @@ -20,24 +20,22 @@ const { generate_udf_test, generate_udaf_test } = unit_test_utils; // using defaults -const tuple_empty = `FROM_BASE64('AQMJAQAezJM=')`; - generate_udaf_test("tuple_sketch_int64_agg_string", { input_columns: [`str`, `1`], input_rows: `SELECT * FROM UNNEST([CAST(NULL AS STRING), CAST(NULL AS STRING), CAST(NULL AS STRING)]) AS str`, - expected_output: tuple_empty + expected_output: null }); generate_udaf_test("tuple_sketch_int64_agg_int64", { input_columns: [`value`, `1`], input_rows: `SELECT * FROM UNNEST([NULL, NULL, NULL]) AS value`, - expected_output: tuple_empty + expected_output: null }); generate_udaf_test("tuple_sketch_int64_agg_union", { input_columns: [`sketch`], input_rows: `SELECT * FROM UNNEST([CAST(NULL AS BYTES), CAST(NULL AS BYTES), CAST(NULL AS BYTES)]) AS sketch`, - expected_output: tuple_empty + expected_output: null }); const tuple_1 = `FROM_BASE64('AgMJAQAazJMDAAAAAAAAALcMbuWor0AIAQAAAAAAAACFf0C2icflNAEAAAAAAAAAF8EdUoUHAXsBAAAAAAAAAA==')`; --------------------------------------------------------------------- To unsubscribe, e-mail: [email protected] For additional commands, e-mail: [email protected]
