This is an automated email from the ASF dual-hosted git repository.

alsay pushed a commit to branch theta_docs
in repository https://gitbox.apache.org/repos/asf/datasketches-bigquery.git

commit 195c07f6fffdf09d7b913059540ba0c857c71e67
Author: AlexanderSaydakov <[email protected]>
AuthorDate: Fri Jan 31 18:40:06 2025 -0800

    better wording
---
 theta/sqlx/theta_sketch_agg_int64_nop.sqlx         | 140 +++++++++++++++++++++
 theta/sqlx/theta_sketch_get_estimate.sqlx          |   6 +-
 .../sqlx/theta_sketch_get_estimate_and_bounds.sqlx |   2 +-
 .../theta_sketch_get_estimate_and_bounds_seed.sqlx |   2 +-
 theta/sqlx/theta_sketch_get_estimate_seed.sqlx     |   4 +-
 5 files changed, 147 insertions(+), 7 deletions(-)

diff --git a/theta/sqlx/theta_sketch_agg_int64_nop.sqlx 
b/theta/sqlx/theta_sketch_agg_int64_nop.sqlx
new file mode 100644
index 0000000..4dd2bd3
--- /dev/null
+++ b/theta/sqlx/theta_sketch_agg_int64_nop.sqlx
@@ -0,0 +1,140 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one
+ * or more contributor license agreements.  See the NOTICE file
+ * distributed with this work for additional information
+ * regarding copyright ownership.  The ASF licenses this file
+ * to you under the Apache License, Version 2.0 (the
+ * "License"); you may not use this file except in compliance
+ * with the License.  You may obtain a copy of the License at
+ *
+ *   http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing,
+ * software distributed under the License is distributed on an
+ * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+ * KIND, either express or implied.  See the License for the
+ * specific language governing permissions and limitations
+ * under the License.
+ */
+
+config { hasOutput: true, tags: ["theta", "udfs"] }
+
+CREATE OR REPLACE AGGREGATE FUNCTION ${self()}(value INT64, params STRUCT<lg_k 
BYTEINT, seed INT64, p FLOAT64> NOT AGGREGATE)
+RETURNS BYTES 
+LANGUAGE js
+OPTIONS (
+  library=["${dataform.projectConfig.vars.jsBucket}/theta.mjs"],
+  description = '''Creates a sketch that represents the cardinality of the 
given INT64 column.
+
+Param value: the INT64 column of identifiers.
+Param lg_k: the sketch accuracy/size parameter as a BYTEINT in the range [4, 
26]. A NULL specifies the default of 12.
+Param seed: the seed to be used by the underlying hash function. A NULL 
specifies the default of 9001.
+Param p: up-front sampling probability. A NULL specifies the default of 1.0.
+Returns: a Compact, Compressed Theta Sketch, as BYTES.
+
+For more information:
+ - https://datasketches.apache.org/docs/Theta/ThetaSketches.html
+ '''
+) AS R"""
+import ModuleFactory from "${dataform.projectConfig.vars.jsBucket}/theta.mjs";
+var Module = await ModuleFactory();
+const default_lg_k = Number(Module.DEFAULT_LG_K);
+const default_seed = BigInt(Module.DEFAULT_SEED);
+const default_p = 1.0;
+
+// UDAF interface
+export function initialState(params) {
+  return {
+    lg_k: params.lg_k == null ? default_lg_k : Number(params.lg_k),
+    seed: params.seed == null ? default_seed : BigInt(params.seed),
+    p: params.p == null ? default_p : params.p
+  };
+}
+
+export function aggregate(state, value) {
+  if (value == null) return;
+//  try {
+//    if (state.sketch == null) {
+//      state.sketch = new Module.update_theta_sketch(state.lg_k, state.seed, 
state.p);
+//    }
+//    state.sketch.updateInt64(value);
+//  } catch (e) {
+//    if (e.message != null) throw e;
+//    throw new Error(Module.getExceptionMessage(e));
+//  }
+}
+
+export function serialize(state) {
+  if (state.sketch == null && state.union == null) return state; // for 
transition deserialize-serialize
+  return state;
+/*  try {
+    // for prior transition deserialize-aggregate
+    // merge aggregated and serialized state
+    if (state.sketch != null) {
+      if (state.serialized != null) {
+        var u = null;
+        try {
+          u = new Module.theta_union(state.lg_k, state.seed);
+          u.updateWithUpdateSketch(state.sketch);
+          u.updateWithBytes(state.serialized, state.seed);
+          state.serialized = u.getResultAsUint8ArrayCompressed();
+        } finally {
+          if (u != null) u.delete();
+        }
+      } else {
+        state.serialized = state.sketch.serializeAsUint8ArrayCompressed();
+      }
+      state.sketch.delete();
+      delete state.sketch;
+    } else if (state.union != null) {
+      state.serialized = state.union.getResultAsUint8ArrayCompressed();
+      state.union.delete();
+      delete state.union;
+    }
+    return state;
+  } catch (e) {
+    if (e.message != null) throw e;
+    throw new Error(Module.getExceptionMessage(e));
+  } finally {
+    if (state.sketch != null) state.sketch.delete();
+    if (state.union != null) state.unon.delete();
+  }*/
+}
+
+export function deserialize(state) {
+  return state;
+}
+
+export function merge(state, other_state) {
+/*  try {
+    if (state.union == null) {
+      state.union = new Module.theta_union(state.lg_k, state.seed);
+    }
+    if (state.serialized != null) {
+      state.union.updateWithBytes(state.serialized, state.seed);
+      delete state.serialized;
+    }
+    if (other_state.serialized != null) {
+      state.union.updateWithBytes(other_state.serialized, state.seed);
+      delete other_state.serialized;
+    }
+  } catch (e) {
+    if (e.message != null) throw e;
+    throw new Error(Module.getExceptionMessage(e));
+  }*/
+}
+
+export function finalize(state) {
+//  return serialize(state).serialized
+  var sketch = null;
+  try {
+    sketch = new Module.update_theta_sketch(state.lg_k, state.seed, state.p);
+    return sketch.serializeAsUint8ArrayCompressed();
+  } catch (e) {
+    if (e.message != null) throw e;
+    throw new Error(Module.getExceptionMessage(e));
+  } finally {
+    if (sketch != null) sketch.delete();
+  }
+}
+""";
diff --git a/theta/sqlx/theta_sketch_get_estimate.sqlx 
b/theta/sqlx/theta_sketch_get_estimate.sqlx
index ec0af76..2cdd52a 100644
--- a/theta/sqlx/theta_sketch_get_estimate.sqlx
+++ b/theta/sqlx/theta_sketch_get_estimate.sqlx
@@ -22,11 +22,11 @@ config { hasOutput: true, tags: ["theta", "udfs"] }
 CREATE OR REPLACE FUNCTION ${self()}(sketch BYTES)
 RETURNS FLOAT64
 OPTIONS (
-  description = '''Gets cardinality estimate and bounds from given sketch.
-  
+  description = '''Gets distinct count estimate from a  given sketch.
+
 Param sketch: The given sketch to query as BYTES.
 Defaults: seed = 9001.
-Returns: a FLOAT64 value as the cardinality estimate.
+Returns: distinct count estimate as FLOAT64.
 
 For more information:
  - https://datasketches.apache.org/docs/Theta/ThetaSketches.html
diff --git a/theta/sqlx/theta_sketch_get_estimate_and_bounds.sqlx 
b/theta/sqlx/theta_sketch_get_estimate_and_bounds.sqlx
index c2a9e68..12b1266 100644
--- a/theta/sqlx/theta_sketch_get_estimate_and_bounds.sqlx
+++ b/theta/sqlx/theta_sketch_get_estimate_and_bounds.sqlx
@@ -22,7 +22,7 @@ config { hasOutput: true, tags: ["theta", "udfs"] }
 CREATE OR REPLACE FUNCTION ${self()}(sketch BYTES, num_std_devs BYTEINT)
 RETURNS STRUCT<estimate FLOAT64, lower_bound FLOAT64, upper_bound FLOAT64>
 OPTIONS (
-  description = '''Gets cardinality estimate and bounds from given sketch.
+  description = '''Gets distinct count estimate and bounds from a given sketch.
 
 Param sketch: The given sketch to query as BYTES.
 Param num_std_devs: The returned bounds will be based on the statistical 
confidence interval
diff --git a/theta/sqlx/theta_sketch_get_estimate_and_bounds_seed.sqlx 
b/theta/sqlx/theta_sketch_get_estimate_and_bounds_seed.sqlx
index 579edf4..3702e4b 100644
--- a/theta/sqlx/theta_sketch_get_estimate_and_bounds_seed.sqlx
+++ b/theta/sqlx/theta_sketch_get_estimate_and_bounds_seed.sqlx
@@ -25,7 +25,7 @@ LANGUAGE js
 OPTIONS (
   library=["${dataform.projectConfig.vars.jsBucket}/theta.js"],
   js_parameter_encoding_mode='STANDARD',
-  description = '''Gets cardinality estimate and bounds from given sketch.
+  description = '''Gets distinct count estimate and bounds from a given sketch.
 
 Param sketch: The given sketch to query as BYTES.
 Param num_std_devs: The returned bounds will be based on the statistical 
confidence interval
diff --git a/theta/sqlx/theta_sketch_get_estimate_seed.sqlx 
b/theta/sqlx/theta_sketch_get_estimate_seed.sqlx
index 78163cf..7abde98 100644
--- a/theta/sqlx/theta_sketch_get_estimate_seed.sqlx
+++ b/theta/sqlx/theta_sketch_get_estimate_seed.sqlx
@@ -25,11 +25,11 @@ LANGUAGE js
 OPTIONS (
   library=["${dataform.projectConfig.vars.jsBucket}/theta.js"],
   js_parameter_encoding_mode='STANDARD',
-  description = '''Gets cardinality estimate and bounds from given sketch.
+  description = '''Gets distinct count estimate from a given sketch.
   
 Param sketch: The given sketch to query as BYTES.
 Param seed: This is used to confirm that the given sketch was configured with 
the correct seed.
-Returns: a FLOAT64 value as the cardinality estimate.
+Returns: distinct count estimate as FLOA64.
 
 For more information:
  - https://datasketches.apache.org/docs/Theta/ThetaSketches.html


---------------------------------------------------------------------
To unsubscribe, e-mail: [email protected]
For additional commands, e-mail: [email protected]

Reply via email to