This is an automated email from the ASF dual-hosted git repository. alsay pushed a commit to branch req_sketch_float in repository https://gitbox.apache.org/repos/asf/datasketches-bigquery.git
commit c6b5f47993ebcb6cf393a1fad817c45f72a3e651 Author: AlexanderSaydakov <[email protected]> AuthorDate: Thu Nov 21 15:39:16 2024 -0800 req_sketch_float --- definitions/req/req_sketch_float_build.sqlx | 36 ++++++ definitions/req/req_sketch_float_build_k_hra.sqlx | 114 +++++++++++++++++ definitions/req/req_sketch_float_get_cdf.sqlx | 71 +++++++++++ .../req/req_sketch_float_get_max_value.sqlx | 49 ++++++++ .../req/req_sketch_float_get_min_value.sqlx | 49 ++++++++ definitions/req/req_sketch_float_get_n.sqlx | 49 ++++++++ .../req/req_sketch_float_get_num_retained.sqlx | 49 ++++++++ definitions/req/req_sketch_float_get_pmf.sqlx | 73 +++++++++++ definitions/req/req_sketch_float_get_quantile.sqlx | 52 ++++++++ definitions/req/req_sketch_float_get_rank.sqlx | 52 ++++++++ .../req/req_sketch_float_get_rank_lower_bound.sqlx | 51 ++++++++ .../req/req_sketch_float_get_rank_upper_bound.sqlx | 51 ++++++++ definitions/req/req_sketch_float_merge.sqlx | 36 ++++++ definitions/req/req_sketch_float_merge_k_hra.sqlx | 114 +++++++++++++++++ definitions/req/req_sketch_float_test.js | 137 +++++++++++++++++++++ definitions/req/req_sketch_float_to_string.sqlx | 49 ++++++++ req/Makefile | 62 ++++++++++ req/crypto.js | 20 +++ req/req_sketch_float.cpp | 79 ++++++++++++ req/test/req_sketch_float_test.sql | 70 +++++++++++ 20 files changed, 1263 insertions(+) diff --git a/definitions/req/req_sketch_float_build.sqlx b/definitions/req/req_sketch_float_build.sqlx new file mode 100644 index 0000000..d825f54 --- /dev/null +++ b/definitions/req/req_sketch_float_build.sqlx @@ -0,0 +1,36 @@ +/* + * Licensed to the Apache Software Foundation (ASF) under one + * or more contributor license agreements. See the NOTICE file + * distributed with this work for additional information + * regarding copyright ownership. The ASF licenses this file + * to you under the Apache License, Version 2.0 (the + * "License"); you may not use this file except in compliance + * with the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, + * software distributed under the License is distributed on an + * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY + * KIND, either express or implied. See the License for the + * specific language governing permissions and limitations + * under the License. + */ + +config { hasOutput: true, tags: ["req", "udfs"] } + +CREATE OR REPLACE AGGREGATE FUNCTION ${self()}(value FLOAT64) +RETURNS BYTES +OPTIONS ( + description = '''Creates a sketch that represents the distribution of the given column. + +Param value: the column of FLOAT64 values. +Defaults: k = 12, hra = true. +Returns: a serialized REQ Sketch as BYTES. + +For more information: + - https://datasketches.apache.org/docs/REQ/ReqSketch.html +''' +) AS ( + ${ref("req_sketch_float_build_k_hra")}(value, STRUCT<INT, BOOL>(NULL, NULL)) +); diff --git a/definitions/req/req_sketch_float_build_k_hra.sqlx b/definitions/req/req_sketch_float_build_k_hra.sqlx new file mode 100644 index 0000000..95716b0 --- /dev/null +++ b/definitions/req/req_sketch_float_build_k_hra.sqlx @@ -0,0 +1,114 @@ +/* + * Licensed to the Apache Software Foundation (ASF) under one + * or more contributor license agreements. See the NOTICE file + * distributed with this work for additional information + * regarding copyright ownership. The ASF licenses this file + * to you under the Apache License, Version 2.0 (the + * "License"); you may not use this file except in compliance + * with the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, + * software distributed under the License is distributed on an + * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY + * KIND, either express or implied. See the License for the + * specific language governing permissions and limitations + * under the License. + */ + +config { hasOutput: true, tags: ["req", "udfs"] } + +CREATE OR REPLACE AGGREGATE FUNCTION ${self()}(value FLOAT64, params STRUCT<k INT, hra BOOL> NOT AGGREGATE) +RETURNS BYTES +LANGUAGE js +OPTIONS ( + library=["${dataform.projectConfig.vars.jsBucket}/req_sketch_float.mjs"], + description = '''Creates a sketch that represents the distribution of the given column. + +Param value: the column of FLOAT64 values. +Param k: the sketch accuracy/size parameter as an even INT in the range [4, 65534]. +Param hra: if true, the high ranks are prioritized for better accuracy. Otherwise the low ranks are prioritized for better accuracy. +Returns: a serialized REQ Sketch as BYTES. + +For more information: + - https://datasketches.apache.org/docs/REQ/ReqSketch.html +''' +) AS R""" +import ModuleFactory from "${dataform.projectConfig.vars.jsBucket}/req_sketch_float.mjs"; +var Module = await ModuleFactory(); +const default_k = Number(Module.DEFAULT_K); + +// UDAF interface +export function initialState(params) { + try { + return { + k: params.k == null ? default_k : Number(params.k), + hra: params.hra == null ? true : params.hra + }; + } catch (e) { + if (e.message != null) throw e; + throw new Error(Module.getExceptionMessage(e)); + } +} + +export function aggregate(state, value) { + try { + if (state.sketch == null) { // for transition deserialize-aggregate + state.sketch = new Module.req_sketch_float(state.k, state.hra); + } + state.sketch.update(value); + } catch (e) { + if (e.message != null) throw e; + throw new Error(Module.getExceptionMessage(e)); + } +} + +export function serialize(state) { + if (state.sketch == null) return state; // for transition deserialize-serialize + try { + // for prior transition deserialize-aggregate + // merge aggregated and serialized state + if (state.sketch != null && state.serialized != null) { + sketch.merge(state.serialized); + } + return { + k: state.k, + hra: state.hra, + serialized: state.sketch.serializeAsUint8Array() + }; + } catch (e) { + if (e.message != null) throw e; + throw new Error(Module.getExceptionMessage(e)); + } finally { + state.sketch.delete(); + } +} + +export function deserialize(serialized) { + return serialized; +} + +export function merge(state, other_state) { + try { + if (state.sketch == null) { + state.sketch = new Module.req_sketch_float(state.k, state.hra); + } + if (state.serialized != null) { + state.sketch.merge(state.serialized); + state.serialized = null; + } + if (other_state.serialized != null) { + state.sketch.merge(other_state.serialized); + other_state.serialized = null; + } + } catch (e) { + if (e.message != null) throw e; + throw new Error(Module.getExceptionMessage(e)); + } +} + +export function finalize(state) { + return serialize(state).serialized; +} +"""; diff --git a/definitions/req/req_sketch_float_get_cdf.sqlx b/definitions/req/req_sketch_float_get_cdf.sqlx new file mode 100644 index 0000000..60734eb --- /dev/null +++ b/definitions/req/req_sketch_float_get_cdf.sqlx @@ -0,0 +1,71 @@ +/* + * Licensed to the Apache Software Foundation (ASF) under one + * or more contributor license agreements. See the NOTICE file + * distributed with this work for additional information + * regarding copyright ownership. The ASF licenses this file + * to you under the Apache License, Version 2.0 (the + * "License"); you may not use this file except in compliance + * with the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, + * software distributed under the License is distributed on an + * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY + * KIND, either express or implied. See the License for the + * specific language governing permissions and limitations + * under the License. + */ + +config { hasOutput: true, tags: ["req", "udfs"] } + +CREATE OR REPLACE FUNCTION ${self()}(sketch BYTES, split_points ARRAY<FLOAT64>, inclusive BOOL) +RETURNS ARRAY<FLOAT64> +LANGUAGE js +OPTIONS ( + library=["${dataform.projectConfig.vars.jsBucket}/req_sketch_float.js"], + js_parameter_encoding_mode='STANDARD', + description = '''Returns an approximation to the Cumulative Distribution Function (CDF) +of the input stream as an array of cumulative probabilities defined by the given split_points. + +Param sketch: the given sketch as BYTES. + +Param split_points: an array of M unique, monotonically increasing values + (of the same type as the input values to the sketch) + that divide the input value domain into M+1 overlapping intervals. + + The start of each interval is below the lowest input value retained by the sketch + (corresponding to a zero rank or zero probability). + + The end of each interval is the associated split-point except for the top interval + where the end is the maximum input value of the stream. + +Param inclusive: if true and the upper boundary of an interval equals a value retained by the sketch, the interval will include that value. + If the lower boundary of an interval equals a value retained by the sketch, the interval will exclude that value. + + If false and the upper boundary of an interval equals a value retained by the sketch, the interval will exclude that value. + If the lower boundary of an interval equals a value retained by the sketch, the interval will include that value. + +Returns: the CDF as a monotonically increasing FLOAT64 array of M+1 cumulative probablities on the interval [0.0, 1.0]. + The top-most probability of the returned array is always 1.0. + +For more information: + - https://datasketches.apache.org/docs/REQ/ReqSketch.html +''' +) AS R""" +try { + var sketchObject = null; + try { + sketchObject = Module.req_sketch_float.deserialize(sketch); + if (sketchObject.isEmpty()) return null; + var vectorFloat = new Module.VectorFloat(); + split_points.forEach(value => vectorFloat.push_back(value)); + return sketchObject.getCDF(vectorFloat, inclusive); + } finally { + if (sketchObject != null) sketchObject.delete(); + } +} catch (e) { + if (e.message != null) throw e; + throw new Error(Module.getExceptionMessage(e)); +} +"""; diff --git a/definitions/req/req_sketch_float_get_max_value.sqlx b/definitions/req/req_sketch_float_get_max_value.sqlx new file mode 100644 index 0000000..806c069 --- /dev/null +++ b/definitions/req/req_sketch_float_get_max_value.sqlx @@ -0,0 +1,49 @@ +/* + * Licensed to the Apache Software Foundation (ASF) under one + * or more contributor license agreements. See the NOTICE file + * distributed with this work for additional information + * regarding copyright ownership. The ASF licenses this file + * to you under the Apache License, Version 2.0 (the + * "License"); you may not use this file except in compliance + * with the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, + * software distributed under the License is distributed on an + * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY + * KIND, either express or implied. See the License for the + * specific language governing permissions and limitations + * under the License. + */ + +config { hasOutput: true, tags: ["req", "udfs"] } + +CREATE OR REPLACE FUNCTION ${self()}(sketch BYTES) +RETURNS FLOAT64 +LANGUAGE js +OPTIONS ( + library=["${dataform.projectConfig.vars.jsBucket}/req_sketch_float.js"], + js_parameter_encoding_mode='STANDARD', + description = '''Returns the maximum value of the input stream. + +Param sketch: the given sketch as BYTES. +Returns: max value as FLOAT64 + +For more information: + - https://datasketches.apache.org/docs/REQ/ReqSketch.html +''' +) AS R""" +try { + var sketchObject = null; + try { + sketchObject = Module.req_sketch_float.deserialize(sketch); + return sketchObject.getMaxValue(); + } finally { + if (sketchObject != null) sketchObject.delete(); + } +} catch (e) { + if (e.message != null) throw e; + throw new Error(Module.getExceptionMessage(e)); +} +"""; diff --git a/definitions/req/req_sketch_float_get_min_value.sqlx b/definitions/req/req_sketch_float_get_min_value.sqlx new file mode 100644 index 0000000..debc648 --- /dev/null +++ b/definitions/req/req_sketch_float_get_min_value.sqlx @@ -0,0 +1,49 @@ +/* + * Licensed to the Apache Software Foundation (ASF) under one + * or more contributor license agreements. See the NOTICE file + * distributed with this work for additional information + * regarding copyright ownership. The ASF licenses this file + * to you under the Apache License, Version 2.0 (the + * "License"); you may not use this file except in compliance + * with the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, + * software distributed under the License is distributed on an + * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY + * KIND, either express or implied. See the License for the + * specific language governing permissions and limitations + * under the License. + */ + +config { hasOutput: true, tags: ["req", "udfs"] } + +CREATE OR REPLACE FUNCTION ${self()}(sketch BYTES) +RETURNS FLOAT64 +LANGUAGE js +OPTIONS ( + library=["${dataform.projectConfig.vars.jsBucket}/req_sketch_float.js"], + js_parameter_encoding_mode='STANDARD', + description = '''Returns the minimum value of the input stream. + +Param sketch: the given sketch as BYTES. +Returns: min value as FLOAT64 + +For more information: + - https://datasketches.apache.org/docs/REQ/ReqSketch.html +''' +) AS R""" +try { + var sketchObject = null; + try { + sketchObject = Module.req_sketch_float.deserialize(sketch); + return sketchObject.getMinValue(); + } finally { + if (sketchObject != null) sketchObject.delete(); + } +} catch (e) { + if (e.message != null) throw e; + throw new Error(Module.getExceptionMessage(e)); +} +"""; diff --git a/definitions/req/req_sketch_float_get_n.sqlx b/definitions/req/req_sketch_float_get_n.sqlx new file mode 100644 index 0000000..bf2068b --- /dev/null +++ b/definitions/req/req_sketch_float_get_n.sqlx @@ -0,0 +1,49 @@ +/* + * Licensed to the Apache Software Foundation (ASF) under one + * or more contributor license agreements. See the NOTICE file + * distributed with this work for additional information + * regarding copyright ownership. The ASF licenses this file + * to you under the Apache License, Version 2.0 (the + * "License"); you may not use this file except in compliance + * with the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, + * software distributed under the License is distributed on an + * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY + * KIND, either express or implied. See the License for the + * specific language governing permissions and limitations + * under the License. + */ + +config { hasOutput: true, tags: ["req", "udfs"] } + +CREATE OR REPLACE FUNCTION ${self()}(sketch BYTES) +RETURNS INT64 +LANGUAGE js +OPTIONS ( + library=["${dataform.projectConfig.vars.jsBucket}/req_sketch_float.js"], + js_parameter_encoding_mode='STANDARD', + description = '''Returns the length of the input stream. + +Param sketch: the given sketch as BYTES. +Returns: stream length as INT64 + +For more information: + - https://datasketches.apache.org/docs/REQ/ReqSketch.html +''' +) AS R""" +try { + var sketchObject = null; + try { + sketchObject = Module.req_sketch_float.deserialize(sketch); + return sketchObject.getN(); + } finally { + if (sketchObject != null) sketchObject.delete(); + } +} catch (e) { + if (e.message != null) throw e; + throw new Error(Module.getExceptionMessage(e)); +} +"""; diff --git a/definitions/req/req_sketch_float_get_num_retained.sqlx b/definitions/req/req_sketch_float_get_num_retained.sqlx new file mode 100644 index 0000000..ecc6f3c --- /dev/null +++ b/definitions/req/req_sketch_float_get_num_retained.sqlx @@ -0,0 +1,49 @@ +/* + * Licensed to the Apache Software Foundation (ASF) under one + * or more contributor license agreements. See the NOTICE file + * distributed with this work for additional information + * regarding copyright ownership. The ASF licenses this file + * to you under the Apache License, Version 2.0 (the + * "License"); you may not use this file except in compliance + * with the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, + * software distributed under the License is distributed on an + * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY + * KIND, either express or implied. See the License for the + * specific language governing permissions and limitations + * under the License. + */ + +config { hasOutput: true, tags: ["req", "udfs"] } + +CREATE OR REPLACE FUNCTION ${self()}(sketch BYTES) +RETURNS INT64 +LANGUAGE js +OPTIONS ( + library=["${dataform.projectConfig.vars.jsBucket}/req_sketch_float.js"], + js_parameter_encoding_mode='STANDARD', + description = '''Returns the number of retained items (samples) in the sketch. + +Param sketch: the given sketch as BYTES. +Returns: number of retained items as INT64 + +For more information: + - https://datasketches.apache.org/docs/REQ/ReqSketch.html +''' +) AS R""" +try { + var sketchObject = null; + try { + sketchObject = Module.req_sketch_float.deserialize(sketch); + return sketchObject.getNumRetained(); + } finally { + if (sketchObject != null) sketchObject.delete(); + } +} catch (e) { + if (e.message != null) throw e; + throw new Error(Module.getExceptionMessage(e)); +} +"""; diff --git a/definitions/req/req_sketch_float_get_pmf.sqlx b/definitions/req/req_sketch_float_get_pmf.sqlx new file mode 100644 index 0000000..a91d673 --- /dev/null +++ b/definitions/req/req_sketch_float_get_pmf.sqlx @@ -0,0 +1,73 @@ +/* + * Licensed to the Apache Software Foundation (ASF) under one + * or more contributor license agreements. See the NOTICE file + * distributed with this work for additional information + * regarding copyright ownership. The ASF licenses this file + * to you under the Apache License, Version 2.0 (the + * "License"); you may not use this file except in compliance + * with the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, + * software distributed under the License is distributed on an + * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY + * KIND, either express or implied. See the License for the + * specific language governing permissions and limitations + * under the License. + */ + +config { hasOutput: true, tags: ["req", "udfs"] } + +CREATE OR REPLACE FUNCTION ${self()}(sketch BYTES, split_points ARRAY<FLOAT64>, inclusive BOOL) +RETURNS ARRAY<FLOAT64> +LANGUAGE js +OPTIONS ( + library=["${dataform.projectConfig.vars.jsBucket}/req_sketch_float.js"], + js_parameter_encoding_mode='STANDARD', + description = '''Returns an approximation to the Probability Mass Function (PMF) +of the input stream as an array of probability masses defined by the given split_points. + +Param sketch: the given sketch as BYTES. + +Param split_points: an array of M unique, monotonically increasing values + (of the same type as the input values) + that divide the input value domain into M+1 non-overlapping intervals. + + Each interval except for the end intervals starts with a split-point and ends with the next split-point in sequence. + + The first interval starts below the minimum value of the stream (corresponding to a zero rank or zero probability), + and ends with the first split-point + + The last (m+1)th interval starts with the last split-point + and ends above the maximum value of the stream (corresponding to a rank or probability of 1.0). + +Param inclusive: if true and the upper boundary of an interval equals a value retained by the sketch, the interval will include that value. + If the lower boundary of an interval equals a value retained by the sketch, the interval will exclude that value. + + If false and the upper boundary of an interval equals a value retained by the sketch, the interval will exclude that value. + If the lower boundary of an interval equals a value retained by the sketch, the interval will include that value. + +Returns: the PMF as a FLOAT64 array of M+1 probability masses on the interval [0.0, 1.0]. + The sum of the probability masses of all (m+1) intervals is 1.0. + +For more information: + - https://datasketches.apache.org/docs/REQ/ReqSketch.html +''' +) AS R""" +try { + var sketchObject = null; + try { + sketchObject = Module.req_sketch_float.deserialize(sketch); + if (sketchObject.isEmpty()) return null; + var vectorFloat = new Module.VectorFloat(); + split_points.forEach(value => vectorFloat.push_back(value)); + return sketchObject.getPMF(vectorFloat, inclusive); + } finally { + if (sketchObject != null) sketchObject.delete(); + } +} catch (e) { + if (e.message != null) throw e; + throw new Error(Module.getExceptionMessage(e)); +} +"""; diff --git a/definitions/req/req_sketch_float_get_quantile.sqlx b/definitions/req/req_sketch_float_get_quantile.sqlx new file mode 100644 index 0000000..0590ece --- /dev/null +++ b/definitions/req/req_sketch_float_get_quantile.sqlx @@ -0,0 +1,52 @@ +/* + * Licensed to the Apache Software Foundation (ASF) under one + * or more contributor license agreements. See the NOTICE file + * distributed with this work for additional information + * regarding copyright ownership. The ASF licenses this file + * to you under the Apache License, Version 2.0 (the + * "License"); you may not use this file except in compliance + * with the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, + * software distributed under the License is distributed on an + * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY + * KIND, either express or implied. See the License for the + * specific language governing permissions and limitations + * under the License. + */ + +config { hasOutput: true, tags: ["req", "udfs"] } + +CREATE OR REPLACE FUNCTION ${self()}(sketch BYTES, rank FLOAT64, inclusive BOOL) +RETURNS FLOAT64 +LANGUAGE js +OPTIONS ( + library=["${dataform.projectConfig.vars.jsBucket}/req_sketch_float.js"], + js_parameter_encoding_mode='STANDARD', + description = '''Returns a value from the sketch that is the best approximation to a value from the original stream with the given rank. + +Param sketch: the given sketch in serialized form. +Param rank: rank of a value in the hypothetical sorted stream. +Param inclusive: if true, the given rank is considered inclusive (includes weight of a value) +Returns: an approximate quantile associated with the given rank. + +For more information: + - https://datasketches.apache.org/docs/REQ/ReqSketch.html +''' +) AS R""" +try { + var sketchObject = null; + try { + sketchObject = Module.req_sketch_float.deserialize(sketch); + if (sketchObject.isEmpty()) return null; + return sketchObject.getQuantile(rank, inclusive); + } finally { + if (sketchObject != null) sketchObject.delete(); + } +} catch (e) { + if (e.message != null) throw e; + throw new Error(Module.getExceptionMessage(e)); +} +"""; diff --git a/definitions/req/req_sketch_float_get_rank.sqlx b/definitions/req/req_sketch_float_get_rank.sqlx new file mode 100644 index 0000000..32d96fe --- /dev/null +++ b/definitions/req/req_sketch_float_get_rank.sqlx @@ -0,0 +1,52 @@ +/* + * Licensed to the Apache Software Foundation (ASF) under one + * or more contributor license agreements. See the NOTICE file + * distributed with this work for additional information + * regarding copyright ownership. The ASF licenses this file + * to you under the Apache License, Version 2.0 (the + * "License"); you may not use this file except in compliance + * with the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, + * software distributed under the License is distributed on an + * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY + * KIND, either express or implied. See the License for the + * specific language governing permissions and limitations + * under the License. + */ + +config { hasOutput: true, tags: ["req", "udfs"] } + +CREATE OR REPLACE FUNCTION ${self()}(sketch BYTES, value FLOAT64, inclusive BOOL) +RETURNS FLOAT64 +LANGUAGE js +OPTIONS ( + library=["${dataform.projectConfig.vars.jsBucket}/req_sketch_float.js"], + js_parameter_encoding_mode='STANDARD', + description = '''Returns an approximation to the normalized rank, on the interval [0.0, 1.0], of the given value. + +Param sketch: the given sketch in serialized form. +Param value: value to be ranked. +Param inclusive: if true the weight of the given value is included into the rank. +Returns: an approximate rank of the given value. + +For more information: + - https://datasketches.apache.org/docs/REQ/ReqSketch.html +''' +) AS R""" +try { + var sketchObject = null; + try { + sketchObject = Module.req_sketch_float.deserialize(sketch); + if (sketchObject.isEmpty()) return null; + return sketchObject.getRank(value, inclusive); + } finally { + if (sketchObject != null) sketchObject.delete(); + } +} catch (e) { + if (e.message != null) throw e; + throw new Error(Module.getExceptionMessage(e)); +} +"""; diff --git a/definitions/req/req_sketch_float_get_rank_lower_bound.sqlx b/definitions/req/req_sketch_float_get_rank_lower_bound.sqlx new file mode 100644 index 0000000..e03a551 --- /dev/null +++ b/definitions/req/req_sketch_float_get_rank_lower_bound.sqlx @@ -0,0 +1,51 @@ +/* + * Licensed to the Apache Software Foundation (ASF) under one + * or more contributor license agreements. See the NOTICE file + * distributed with this work for additional information + * regarding copyright ownership. The ASF licenses this file + * to you under the Apache License, Version 2.0 (the + * "License"); you may not use this file except in compliance + * with the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, + * software distributed under the License is distributed on an + * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY + * KIND, either express or implied. See the License for the + * specific language governing permissions and limitations + * under the License. + */ + +config { hasOutput: true, tags: ["req", "udfs"] } + +CREATE OR REPLACE FUNCTION ${self()}(sketch BYTES, rank FLOAT64, num_std_dev BYTEINT) +RETURNS FLOAT64 +LANGUAGE js +OPTIONS ( + library=["${dataform.projectConfig.vars.jsBucket}/req_sketch_float.js"], + js_parameter_encoding_mode='STANDARD', + description = '''Returns an approximate lower bound of the given normalized rank. +Param sketch: the given sketch as BYTES. +Param rank: the given rank, a value between 0 and 1.0. +Param num_std_dev: The returned bounds will be based on the statistical confidence interval determined by the given number of standard deviations + from the returned estimate. This number may be one of {1,2,3}, where 1 represents 68% confidence, 2 represents 95% confidence and 3 represents 99.7% confidence. +Returns: an approximate lower bound rank. + +For more information: + - https://datasketches.apache.org/docs/REQ/ReqSketch.html +''' +) AS R""" +try { + var sketchObject = null; + try { + sketchObject = Module.req_sketch_float.deserialize(sketch); + return sketchObject.getRankLowerBound(rank, Number(num_std_dev)); + } finally { + if (sketchObject != null) sketchObject.delete(); + } +} catch (e) { + if (e.message != null) throw e; + throw new Error(Module.getExceptionMessage(e)); +} +"""; diff --git a/definitions/req/req_sketch_float_get_rank_upper_bound.sqlx b/definitions/req/req_sketch_float_get_rank_upper_bound.sqlx new file mode 100644 index 0000000..ab37081 --- /dev/null +++ b/definitions/req/req_sketch_float_get_rank_upper_bound.sqlx @@ -0,0 +1,51 @@ +/* + * Licensed to the Apache Software Foundation (ASF) under one + * or more contributor license agreements. See the NOTICE file + * distributed with this work for additional information + * regarding copyright ownership. The ASF licenses this file + * to you under the Apache License, Version 2.0 (the + * "License"); you may not use this file except in compliance + * with the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, + * software distributed under the License is distributed on an + * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY + * KIND, either express or implied. See the License for the + * specific language governing permissions and limitations + * under the License. + */ + +config { hasOutput: true, tags: ["req", "udfs"] } + +CREATE OR REPLACE FUNCTION ${self()}(sketch BYTES, rank FLOAT64, num_std_dev BYTEINT) +RETURNS FLOAT64 +LANGUAGE js +OPTIONS ( + library=["${dataform.projectConfig.vars.jsBucket}/req_sketch_float.js"], + js_parameter_encoding_mode='STANDARD', + description = '''Returns an approximate upper bound of the given normalized rank. +Param sketch: the given sketch as BYTES. +Param rank: the given rank, a value between 0 and 1.0. +Param num_std_dev: The returned bounds will be based on the statistical confidence interval determined by the given number of standard deviations + from the returned estimate. This number may be one of {1,2,3}, where 1 represents 68% confidence, 2 represents 95% confidence and 3 represents 99.7% confidence. +Returns: an approximate upper bound rank. + +For more information: + - https://datasketches.apache.org/docs/REQ/ReqSketch.html +''' +) AS R""" +try { + var sketchObject = null; + try { + sketchObject = Module.req_sketch_float.deserialize(sketch); + return sketchObject.getRankUpperBound(rank, Number(num_std_dev)); + } finally { + if (sketchObject != null) sketchObject.delete(); + } +} catch (e) { + if (e.message != null) throw e; + throw new Error(Module.getExceptionMessage(e)); +} +"""; diff --git a/definitions/req/req_sketch_float_merge.sqlx b/definitions/req/req_sketch_float_merge.sqlx new file mode 100644 index 0000000..1dc6ad1 --- /dev/null +++ b/definitions/req/req_sketch_float_merge.sqlx @@ -0,0 +1,36 @@ +/* + * Licensed to the Apache Software Foundation (ASF) under one + * or more contributor license agreements. See the NOTICE file + * distributed with this work for additional information + * regarding copyright ownership. The ASF licenses this file + * to you under the Apache License, Version 2.0 (the + * "License"); you may not use this file except in compliance + * with the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, + * software distributed under the License is distributed on an + * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY + * KIND, either express or implied. See the License for the + * specific language governing permissions and limitations + * under the License. + */ + +config { hasOutput: true, tags: ["req", "udfs"] } + +CREATE OR REPLACE AGGREGATE FUNCTION ${self()}(sketch BYTES) +RETURNS BYTES +OPTIONS ( + description = '''Merges sketches from the given column. + +Param sketch: the column of sketches. +Defaults: k = 12, hra = true. +Returns: a serialized REQ sketch as BYTES. + +For more information: + - https://datasketches.apache.org/docs/REQ/ReqSketch.html +''' +) AS ( + ${ref("req_sketch_float_merge_k_hra")}(sketch, STRUCT<INT, BOOL>(NULL, NULL)) +); diff --git a/definitions/req/req_sketch_float_merge_k_hra.sqlx b/definitions/req/req_sketch_float_merge_k_hra.sqlx new file mode 100644 index 0000000..a4a767f --- /dev/null +++ b/definitions/req/req_sketch_float_merge_k_hra.sqlx @@ -0,0 +1,114 @@ +/* + * Licensed to the Apache Software Foundation (ASF) under one + * or more contributor license agreements. See the NOTICE file + * distributed with this work for additional information + * regarding copyright ownership. The ASF licenses this file + * to you under the Apache License, Version 2.0 (the + * "License"); you may not use this file except in compliance + * with the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, + * software distributed under the License is distributed on an + * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY + * KIND, either express or implied. See the License for the + * specific language governing permissions and limitations + * under the License. + */ + +config { hasOutput: true, tags: ["req", "udfs"] } + +CREATE OR REPLACE AGGREGATE FUNCTION ${self()}(sketch BYTES, params STRUCT<k INT, hra BOOL> NOT AGGREGATE) +RETURNS BYTES +LANGUAGE js +OPTIONS ( + library=["${dataform.projectConfig.vars.jsBucket}/req_sketch_float.mjs"], + description = '''Merges sketches from the given column. + +Param sketch: the column of values. +Param k: the sketch accuracy/size parameter as an even INT in the range [4, 65534]. +Param hra: if true, the high ranks are prioritized for better accuracy. Otherwise the low ranks are prioritized for better accuracy. +Returns: a serialized REQ sketch as BYTES. + +For more information: + - https://datasketches.apache.org/docs/REQ/ReqSketch.html +''' +) AS R""" +import ModuleFactory from "${dataform.projectConfig.vars.jsBucket}/req_sketch_float.mjs"; +var Module = await ModuleFactory(); +const default_k = Number(Module.DEFAULT_K); + +// UDAF interface +export function initialState(params) { + try { + return { + k: params.k == null ? default_k : Number(params.k), + hra: params.hra == null ? true : params.hra + }; + } catch (e) { + if (e.message != null) throw e; + throw new Error(Module.getExceptionMessage(e)); + } +} + +export function aggregate(state, sketch) { + try { + if (state.sketch == null) { // for transition deserialize-aggregate + state.sketch = new Module.req_sketch_float(state.k, state.hra); + } + state.sketch.merge(sketch); + } catch (e) { + if (e.message != null) throw e; + throw new Error(Module.getExceptionMessage(e)); + } +} + +export function serialize(state) { + if (state.sketch == null) return state; // for transition deserialize-serialize + try { + // for prior transition deserialize-aggregate + // merge aggregated and serialized state + if (state.sketch != null && state.serialized != null) { + sketch.merge(state.serialized); + } + return { + k: state.k, + hra: state.hra, + serialized: state.sketch.serializeAsUint8Array() + }; + } catch (e) { + if (e.message != null) throw e; + throw new Error(Module.getExceptionMessage(e)); + } finally { + state.sketch.delete(); + } +} + +export function deserialize(serialized) { + return serialized; +} + +export function merge(state, other_state) { + try { + if (state.sketch == null) { + state.sketch = new Module.req_sketch_float(state.k, state.hra); + } + if (state.serialized != null) { + state.sketch.merge(state.serialized); + delete state.serialized; + } + if (other_state.serialized != null) { + state.sketch.merge(other_state.serialized); + delete other_state.serialized; + } + } catch (e) { + if (e.message != null) throw e; + throw new Error(Module.getExceptionMessage(e)); + } +} + +export function finalize(state) { + return serialize(state).serialized; +} +"""; diff --git a/definitions/req/req_sketch_float_test.js b/definitions/req/req_sketch_float_test.js new file mode 100644 index 0000000..59a4932 --- /dev/null +++ b/definitions/req/req_sketch_float_test.js @@ -0,0 +1,137 @@ + +// Licensed to the Apache Software Foundation (ASF) under one +// or more contributor license agreements. See the NOTICE file +// distributed with this work for additional information +// regarding copyright ownership. The ASF licenses this file +// to you under the Apache License, Version 2.0 (the +// "License"); you may not use this file except in compliance +// with the License. You may obtain a copy of the License at +// +// http://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, +// software distributed under the License is distributed on an +// "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY +// KIND, either express or implied. See the License for the +// specific language governing permissions and limitations +// under the License. + +const { generate_udf_test, generate_udaf_test } = require('../unit_test_utils'); + +const req_1 = `FROM_BASE64('AgERCAwAAQAAAAAAAAAAAAAAQEEAAwAACgAAAAAAIEEAABBBAAAAQQAA4EAAAMBAAACgQAAAgEAAAEBAAAAAQAAAgD8=')`; + +generate_udaf_test("req_sketch_float_build", { + input_columns: [`value`], + input_rows: `SELECT * FROM UNNEST([1, 2, 3, 4, 5, 6, 7, 8, 9, 10]) AS value`, + expected_output: req_1 +}); + +const req_2 = `FROM_BASE64('AgERCAwAAQAAAAAAAAAAAAAAQEEAAwAACgAAAAAAoEEAAJhBAACQQQAAiEEAAIBBAABwQQAAYEEAAFBBAABAQQAAMEE=')`; + +generate_udaf_test("req_sketch_float_build", { + input_columns: [`value`], + input_rows: `SELECT * FROM UNNEST([11, 12, 13, 14, 15, 16, 17, 18, 19, 20]) AS value`, + expected_output: req_2 +}); + +const req_3 = `FROM_BASE64('AgERKAwAAQAAAAAAAAAAAAAAQEEAAwAAFAAAAAAAgD8AAABAAABAQAAAgEAAAKBAAADAQAAA4EAAAABBAAAQQQAAIEEAADBBAABAQQAAUEEAAGBBAABwQQAAgEEAAIhBAACQQQAAmEEAAKBB')`; + +generate_udaf_test("req_sketch_float_merge", { + input_columns: [`sketch`], + input_rows: `SELECT * FROM UNNEST([${req_1}, ${req_2}]) AS sketch`, + expected_output: req_3 +}); + +generate_udf_test("req_sketch_float_to_string", [{ + inputs: [ req_3 ], + expected_output: `'''### REQ sketch summary: + K : 12 + High Rank Acc : true + Empty : false + Estimation mode: false + Sorted : true + N : 20 + Levels : 1 + Retained items : 20 + Capacity items : 72 + Min item : 1 + Max item : 20 +### End sketch summary +'''` +}]); + +generate_udf_test("req_sketch_float_get_rank", [{ + inputs: [ req_3, 10, true ], + expected_output: 0.5 +}]); + +generate_udf_test("req_sketch_float_get_quantile", [{ + inputs: [ req_3, 0.5, true ], + expected_output: 10 +}]); + +generate_udf_test("req_sketch_float_get_min_value", [{ + inputs: [ req_3 ], + expected_output: 1 +}]); + +generate_udf_test("req_sketch_float_get_max_value", [{ + inputs: [ req_3 ], + expected_output: 20 +}]); + +generate_udf_test("req_sketch_float_get_n", [{ + inputs: [ req_3 ], + expected_output: 20 +}]); + +generate_udf_test("req_sketch_float_get_num_retained", [{ + inputs: [ req_3 ], + expected_output: 20 +}]); + +generate_udf_test("req_sketch_float_get_pmf", [{ + inputs: [ req_3, `[10.0]`, true ], + expected_output: `[0.5, 0.5]` +}]); + +generate_udf_test("req_sketch_float_get_cdf", [{ + inputs: [ req_3, `[10.0]`, true ], + expected_output: `[0.5, 1.0]` +}]); + +generate_udf_test("req_sketch_float_get_rank_lower_bound", [{ + inputs: [ req_3, 0.95, 2 ], + expected_output: 0.95 +}]); + +generate_udf_test("req_sketch_float_get_rank_upper_bound", [{ + inputs: [ req_3, 0.95, 2 ], + expected_output: 0.95 +}]); + +// using full signatures + +const req_4 = `FROM_BASE64('AgERAAoAAQAAAAAAAAAAAAAAIEEAAwAACgAAAAAAgD8AAABAAABAQAAAgEAAAKBAAADAQAAA4EAAAABBAAAQQQAAIEE=')`; + +generate_udaf_test("req_sketch_float_build_k_hra", { + input_columns: [`value`, `STRUCT(10 AS k, false AS hra) NOT AGGREGATE`], + input_rows: `SELECT * FROM UNNEST([1, 2, 3, 4, 5, 6, 7, 8, 9, 10]) AS value`, + expected_output: req_4 +}); + +const req_5 = `FROM_BASE64('AgERAAoAAQAAAAAAAAAAAAAAIEEAAwAACgAAAAAAMEEAAEBBAABQQQAAYEEAAHBBAACAQQAAiEEAAJBBAACYQQAAoEE=')`; + +generate_udaf_test("req_sketch_float_build_k_hra", { + input_columns: [`value`, `STRUCT(10 AS k, false AS hra) NOT AGGREGATE`], + input_rows: `SELECT * FROM UNNEST([11, 12, 13, 14, 15, 16, 17, 18, 19, 20]) AS value`, + expected_output: req_5 +}); + +const req_6 = `FROM_BASE64('AgERIAoAAQAAAAAAAAAAAAAAIEEAAwAAFAAAAAAAgD8AAABAAABAQAAAgEAAAKBAAADAQAAA4EAAAABBAAAQQQAAIEEAADBBAABAQQAAUEEAAGBBAABwQQAAgEEAAIhBAACQQQAAmEEAAKBB')`; + +generate_udaf_test("req_sketch_float_merge_k_hra", { + input_columns: [`sketch`, `STRUCT(10 AS k, false AS hra) NOT AGGREGATE`], + input_rows: `SELECT * FROM UNNEST([${req_4}, ${req_5}]) AS sketch`, + expected_output: req_6 +}); diff --git a/definitions/req/req_sketch_float_to_string.sqlx b/definitions/req/req_sketch_float_to_string.sqlx new file mode 100644 index 0000000..7fd7a57 --- /dev/null +++ b/definitions/req/req_sketch_float_to_string.sqlx @@ -0,0 +1,49 @@ +/* + * Licensed to the Apache Software Foundation (ASF) under one + * or more contributor license agreements. See the NOTICE file + * distributed with this work for additional information + * regarding copyright ownership. The ASF licenses this file + * to you under the Apache License, Version 2.0 (the + * "License"); you may not use this file except in compliance + * with the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, + * software distributed under the License is distributed on an + * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY + * KIND, either express or implied. See the License for the + * specific language governing permissions and limitations + * under the License. + */ + +config { hasOutput: true, tags: ["req", "udfs"] } + +CREATE OR REPLACE FUNCTION ${self()}(sketch BYTES) +RETURNS STRING +LANGUAGE js +OPTIONS ( + library=["${dataform.projectConfig.vars.jsBucket}/req_sketch_float.js"], + js_parameter_encoding_mode='STANDARD', + description = '''Returns a summary string that represents the state of the given sketch. + +Param sketch: the given sketch as BYTES. +Returns: a string that represents the state of the given sketch. + +For more information: + - https://datasketches.apache.org/docs/REQ/ReqSketch.html +''' +) AS R""" +try { + var sketchObject = null; + try { + sketchObject = Module.req_sketch_float.deserialize(sketch); + return sketchObject.toString(); + } finally { + if (sketchObject != null) sketchObject.delete(); + } +} catch (e) { + if (e.message != null) throw e; + throw new Error(Module.getExceptionMessage(e)); +} +"""; diff --git a/req/Makefile b/req/Makefile new file mode 100644 index 0000000..813826a --- /dev/null +++ b/req/Makefile @@ -0,0 +1,62 @@ +# Licensed to the Apache Software Foundation (ASF) under one +# or more contributor license agreements. See the NOTICE file +# distributed with this work for additional information +# regarding copyright ownership. The ASF licenses this file +# to you under the Apache License, Version 2.0 (the +# "License"); you may not use this file except in compliance +# with the License. You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, +# software distributed under the License is distributed on an +# "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY +# KIND, either express or implied. See the License for the +# specific language governing permissions and limitations +# under the License. + +EMCC=emcc +EMCFLAGS=-I../datasketches-cpp/common/include \ + -I../datasketches-cpp/req/include \ + --no-entry \ + -sWASM_BIGINT=1 \ + -sEXPORTED_FUNCTIONS=[_malloc,_free] \ + -sENVIRONMENT=shell \ + -sTOTAL_MEMORY=1024MB \ + -O3 \ + --bind \ + --pre-js crypto.js + +ARTIFACTS=req_sketch_float.mjs req_sketch_float.js req_sketch_float.wasm + +all: $(ARTIFACTS) + +%.mjs: %.cpp + $(EMCC) $< $(EMCFLAGS) -sSINGLE_FILE=1 -o $@ + +# this rule creates a non-es6 loadable library +%.js: %.cpp + $(EMCC) $< $(EMCFLAGS) -sSINGLE_FILE=1 -o $@ + +%.wasm: %.cpp + $(EMCC) $< $(EMCFLAGS) -sSTANDALONE_WASM=1 -o $@ + +clean: + $(RM) $(ARTIFACTS) + +upload: all + @for file in $(ARTIFACTS); do \ + gcloud storage cp $$file $(JS_BUCKET)/ ; \ + done + +create: + cd .. && dataform run --tags req + +install: upload + +test: + @for file in $(wildcard test/*sql); do \ + ../substitute_and_run.sh $$file ; \ + done + +.PHONY: all clean install upload create test diff --git a/req/crypto.js b/req/crypto.js new file mode 100644 index 0000000..f90e472 --- /dev/null +++ b/req/crypto.js @@ -0,0 +1,20 @@ +/* + * Licensed to the Apache Software Foundation (ASF) under one + * or more contributor license agreements. See the NOTICE file + * distributed with this work for additional information + * regarding copyright ownership. The ASF licenses this file + * to you under the Apache License, Version 2.0 (the + * "License"); you may not use this file except in compliance + * with the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, + * software distributed under the License is distributed on an + * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY + * KIND, either express or implied. See the License for the + * specific language governing permissions and limitations + * under the License. + */ + +var crypto = { getRandomValues: (array) => { for (var i = 0; i < array.length; i++) array[i] = (Math.random()*256)|0 } }; diff --git a/req/req_sketch_float.cpp b/req/req_sketch_float.cpp new file mode 100644 index 0000000..fec8367 --- /dev/null +++ b/req/req_sketch_float.cpp @@ -0,0 +1,79 @@ +/* + * Licensed to the Apache Software Foundation (ASF) under one + * or more contributor license agreements. See the NOTICE file + * distributed with this work for additional information + * regarding copyright ownership. The ASF licenses this file + * to you under the Apache License, Version 2.0 (the + * "License"); you may not use this file except in compliance + * with the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, + * software distributed under the License is distributed on an + * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY + * KIND, either express or implied. See the License for the + * specific language governing permissions and limitations + * under the License. + */ + +#include <emscripten/bind.h> + +#include <req_sketch.hpp> + +using req_sketch_float = datasketches::req_sketch<float>; + +const emscripten::val Uint8Array = emscripten::val::global("Uint8Array"); + +EMSCRIPTEN_BINDINGS(req_sketch_float) { + emscripten::function("getExceptionMessage", emscripten::optional_override([](intptr_t ptr) { + return std::string(reinterpret_cast<std::exception*>(ptr)->what()); + })); + + emscripten::register_vector<float>("VectorFloat"); + + emscripten::constant("DEFAULT_K", 12); + + emscripten::class_<req_sketch_float>("req_sketch_float") + .constructor(emscripten::optional_override([](uint16_t k, bool hra) { + return new req_sketch_float(k, hra); + })) + .function("isEmpty", &req_sketch_float::is_empty) + .function("update", emscripten::optional_override([](req_sketch_float& self, float value) { + self.update(value); + })) + .function("merge", emscripten::optional_override([](req_sketch_float& self, const std::string& bytes) { + self.merge(req_sketch_float::deserialize(bytes.data(), bytes.size())); + })) + .function("serializeAsUint8Array", emscripten::optional_override([](const req_sketch_float& self) { + auto bytes = self.serialize(); + return Uint8Array.new_(emscripten::typed_memory_view(bytes.size(), bytes.data())); + })) + .class_function("deserialize", emscripten::optional_override([](const std::string& bytes) { + return new req_sketch_float(req_sketch_float::deserialize(bytes.data(), bytes.size())); + }), emscripten::allow_raw_pointers()) + .function("getN", &req_sketch_float::get_n) + .function("getNumRetained", &req_sketch_float::get_num_retained) + .function("getMinValue", &req_sketch_float::get_min_item) + .function("getMaxValue", &req_sketch_float::get_max_item) + .function("getRank", &req_sketch_float::get_rank) + .function("getQuantile", &req_sketch_float::get_quantile) + .function("getPMF", emscripten::optional_override([](const req_sketch_float& self, const std::vector<float>& split_points, bool inclusive) { + const auto pmf = self.get_PMF(split_points.data(), split_points.size(), inclusive); + return emscripten::val::array(pmf.begin(), pmf.end()); + })) + .function("getCDF", emscripten::optional_override([](const req_sketch_float& self, const std::vector<float>& split_points, bool inclusive) { + const auto cdf = self.get_CDF(split_points.data(), split_points.size(), inclusive); + return emscripten::val::array(cdf.begin(), cdf.end()); + })) + .function("toString", emscripten::optional_override([](const req_sketch_float& self) { + return self.to_string(); + })) + .function("getRankLowerBound", emscripten::optional_override([](const req_sketch_float& self, double rank, uint8_t num_std_dev) { + return self.get_rank_lower_bound(rank, num_std_dev); + })) + .function("getRankUpperBound", emscripten::optional_override([](const req_sketch_float& self, double rank, uint8_t num_std_dev) { + return self.get_rank_upper_bound(rank, num_std_dev); + })) + ; +} diff --git a/req/test/req_sketch_float_test.sql b/req/test/req_sketch_float_test.sql new file mode 100644 index 0000000..1349177 --- /dev/null +++ b/req/test/req_sketch_float_test.sql @@ -0,0 +1,70 @@ +/* + * Licensed to the Apache Software Foundation (ASF) under one + * or more contributor license agreements. See the NOTICE file + * distributed with this work for additional information + * regarding copyright ownership. The ASF licenses this file + * to you under the Apache License, Version 2.0 (the + * "License"); you may not use this file except in compliance + * with the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, + * software distributed under the License is distributed on an + * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY + * KIND, either express or implied. See the License for the + * specific language governing permissions and limitations + * under the License. + */ + +# using defaults + +create or replace table `$BQ_DATASET`.req_sketch(sketch bytes); + +insert into `$BQ_DATASET`.req_sketch +(select `$BQ_DATASET`.req_sketch_float_build(value) from unnest([1,2,3,4,5,6,7,8,9,10]) as value); + +insert into `$BQ_DATASET`.req_sketch +(select `$BQ_DATASET`.req_sketch_float_build(value) from unnest([11,12,13,14,15,16,17,18,19,20]) as value); + +select `$BQ_DATASET`.req_sketch_float_to_string(`$BQ_DATASET`.req_sketch_float_merge(sketch)) from `$BQ_DATASET`.req_sketch; + +# expected 0.5 +select `$BQ_DATASET`.req_sketch_float_get_rank(`$BQ_DATASET`.req_sketch_float_merge(sketch), 10, true) from `$BQ_DATASET`.req_sketch; + +# expected 10 +select `$BQ_DATASET`.req_sketch_float_get_quantile(`$BQ_DATASET`.req_sketch_float_merge(sketch), 0.5, true) from `$BQ_DATASET`.req_sketch; + +# expected 0.5, 0.5 +select `$BQ_DATASET`.req_sketch_float_get_pmf(`$BQ_DATASET`.req_sketch_float_merge(sketch), [10.0], true) from `$BQ_DATASET`.req_sketch; + +# expected 0.5, 1 +select `$BQ_DATASET`.req_sketch_float_get_cdf(`$BQ_DATASET`.req_sketch_float_merge(sketch), [10.0], true) from `$BQ_DATASET`.req_sketch; + +# expected 1 +select `$BQ_DATASET`.req_sketch_float_get_min_value(`$BQ_DATASET`.req_sketch_float_merge(sketch)) from `$BQ_DATASET`.req_sketch; + +# expected 20 +select `$BQ_DATASET`.req_sketch_float_get_max_value(`$BQ_DATASET`.req_sketch_float_merge(sketch)) from `$BQ_DATASET`.req_sketch; + +# expected 20 +select `$BQ_DATASET`.req_sketch_float_get_n(`$BQ_DATASET`.req_sketch_float_merge(sketch)) from `$BQ_DATASET`.req_sketch; + +# expected 20 +select `$BQ_DATASET`.req_sketch_float_get_num_retained(`$BQ_DATASET`.req_sketch_float_merge(sketch)) from `$BQ_DATASET`.req_sketch; + +drop table `$BQ_DATASET`.req_sketch; + +# using full signatures + +create or replace table `$BQ_DATASET`.req_sketch(sketch bytes); + +insert into `$BQ_DATASET`.req_sketch +(select `$BQ_DATASET`.req_sketch_float_build_k_hra(value, struct<int, bool>(10, false)) from unnest([1,2,3,4,5,6,7,8,9,10]) as value); + +insert into `$BQ_DATASET`.req_sketch +(select `$BQ_DATASET`.req_sketch_float_build_k_hra(value, struct<int, bool>(10, false)) from unnest([11,12,13,14,15,16,17,18,19,20]) as value); + +select `$BQ_DATASET`.req_sketch_float_to_string(`$BQ_DATASET`.req_sketch_float_merge_k_hra(sketch, struct<int, bool>(10, false))) from `$BQ_DATASET`.req_sketch; + +drop table `$BQ_DATASET`.req_sketch; --------------------------------------------------------------------- To unsubscribe, e-mail: [email protected] For additional commands, e-mail: [email protected]
