This is an automated email from the ASF dual-hosted git repository. alsay pushed a commit to branch cpc_no_base64 in repository https://gitbox.apache.org/repos/asf/datasketches-bigquery.git
commit 853aefac58238a83fa8b3c0c04ffd3de9a723032 Author: AlexanderSaydakov <[email protected]> AuthorDate: Wed Sep 25 14:40:00 2024 -0700 removed base64 encoding-decoding from CPC --- cpc/cpc_sketch.cpp | 52 ++++++++-------------- .../cpc_sketch_get_estimate_and_bounds_seed.sqlx | 19 ++++---- cpc/sqlx/cpc_sketch_get_estimate_seed.sqlx | 10 ++--- cpc/sqlx/cpc_sketch_to_string_seed.sqlx | 10 ++--- cpc/sqlx/cpc_sketch_union_lgk_seed.sqlx | 14 ++---- 5 files changed, 36 insertions(+), 69 deletions(-) diff --git a/cpc/cpc_sketch.cpp b/cpc/cpc_sketch.cpp index fcb5eb8..bb70a64 100644 --- a/cpc/cpc_sketch.cpp +++ b/cpc/cpc_sketch.cpp @@ -23,11 +23,10 @@ #include <cpc_sketch.hpp> #include <cpc_union.hpp> -#include "../base64.hpp" - const emscripten::val Uint8Array = emscripten::val::global("Uint8Array"); EMSCRIPTEN_BINDINGS(cpc_sketch) { + emscripten::register_vector<double>("VectorDouble"); emscripten::function("getExceptionMessage", emscripten::optional_override([](intptr_t ptr) { return std::string(reinterpret_cast<std::exception*>(ptr)->what()); @@ -45,19 +44,16 @@ EMSCRIPTEN_BINDINGS(cpc_sketch) { auto bytes = self.serialize(); return Uint8Array.new_(emscripten::typed_memory_view(bytes.size(), bytes.data())); })) - .class_function("deserializeFromB64", emscripten::optional_override([](const std::string& b64, uint64_t seed) { - std::vector<char> bytes(b64_dec_len(b64.data(), b64.size())); - b64_decode(b64.data(), b64.size(), bytes.data()); - return new datasketches::cpc_sketch(datasketches::cpc_sketch::deserialize(bytes.data(), bytes.size(), seed)); - }), emscripten::allow_raw_pointers()) - .class_function("deserializeFromBytes", emscripten::optional_override([](const std::string& bytes, uint64_t seed) { - return new datasketches::cpc_sketch(datasketches::cpc_sketch::deserialize(bytes.data(), bytes.size(), seed)); - }), emscripten::allow_raw_pointers()) - .function("getEstimate", &datasketches::cpc_sketch::get_estimate) - .function("getLowerBound", &datasketches::cpc_sketch::get_lower_bound) - .function("getUpperBound", &datasketches::cpc_sketch::get_upper_bound) - .function("toString", &datasketches::cpc_sketch::to_string) - .class_function("getMaxSerializedSizeBytes", &datasketches::cpc_sketch::get_max_serialized_size_bytes) + .class_function("getEstimate", emscripten::optional_override([](const std::string& sketch_bytes, uint64_t seed) { + return datasketches::cpc_sketch::deserialize(sketch_bytes.data(), sketch_bytes.size(), seed).get_estimate(); + })) + .class_function("getEstimateAndBounds", emscripten::optional_override([](const std::string& sketch_bytes, uint8_t num_std_devs, uint64_t seed) { + const auto sketch = datasketches::cpc_sketch::deserialize(sketch_bytes.data(), sketch_bytes.size(), seed); + return std::vector<double>{sketch.get_estimate(), sketch.get_lower_bound(num_std_devs), sketch.get_upper_bound(num_std_devs)}; + })) + .class_function("toString", emscripten::optional_override([](const std::string& sketch_bytes, uint64_t seed) { + return datasketches::cpc_sketch::deserialize(sketch_bytes.data(), sketch_bytes.size(), seed).to_string(); + })) ; emscripten::class_<datasketches::cpc_union>("cpc_union") @@ -70,29 +66,17 @@ EMSCRIPTEN_BINDINGS(cpc_sketch) { .function("updateWithBytes", emscripten::optional_override([](datasketches::cpc_union& self, const std::string& bytes, uint64_t seed) { self.update(datasketches::cpc_sketch::deserialize(bytes.data(), bytes.size(), seed)); }), emscripten::allow_raw_pointers()) - .function("updateWithB64", emscripten::optional_override([](datasketches::cpc_union& self, const std::string& b64, uint64_t seed) { - std::vector<char> bytes(b64_dec_len(b64.data(), b64.size())); - b64_decode(b64.data(), b64.size(), bytes.data()); - self.update(datasketches::cpc_sketch::deserialize(bytes.data(), bytes.size(), seed)); - }), emscripten::allow_raw_pointers()) - .function("updateWithBuffer", emscripten::optional_override([](datasketches::cpc_union& self, intptr_t bytes, size_t size, uint64_t seed) { - self.update(datasketches::cpc_sketch::deserialize(reinterpret_cast<void*>(bytes), size, seed)); - })) -// .function("getResultStream", emscripten::optional_override([](datasketches::cpc_union& self, intptr_t bytes, size_t size) { -// std::strstream stream(reinterpret_cast<char*>(bytes), size); -// self.get_result().serialize(stream); -// return (int) stream.tellp(); -// })) .function("getResultAsUint8Array", emscripten::optional_override([](datasketches::cpc_union& self) { auto bytes = self.get_result().serialize(); return Uint8Array.new_(emscripten::typed_memory_view(bytes.size(), bytes.data())); })) - .function("getResultB64", emscripten::optional_override([](datasketches::cpc_union& self) { - auto bytes = self.get_result().serialize(); - std::vector<char> b64(b64_enc_len(bytes.size())); - b64_encode((const char*) bytes.data(), bytes.size(), b64.data()); - return std::string(b64.data(), b64.size()); - })) ; + emscripten::function("cpcUnion", emscripten::optional_override([](const std::string& bytes1, const std::string& bytes2, uint8_t lg_k, uint64_t seed) { + datasketches::cpc_union u(lg_k, seed); + u.update(datasketches::cpc_sketch::deserialize(bytes1.data(), bytes1.size(), seed)); + u.update(datasketches::cpc_sketch::deserialize(bytes2.data(), bytes2.size(), seed)); + const auto bytes = u.get_result().serialize(); + return Uint8Array.new_(emscripten::typed_memory_view(bytes.size(), bytes.data())); + })); } diff --git a/cpc/sqlx/cpc_sketch_get_estimate_and_bounds_seed.sqlx b/cpc/sqlx/cpc_sketch_get_estimate_and_bounds_seed.sqlx index 1559592..d47be0c 100644 --- a/cpc/sqlx/cpc_sketch_get_estimate_and_bounds_seed.sqlx +++ b/cpc/sqlx/cpc_sketch_get_estimate_and_bounds_seed.sqlx @@ -24,6 +24,7 @@ RETURNS STRUCT<estimate FLOAT64, lower_bound FLOAT64, upper_bound FLOAT64> LANGUAGE js OPTIONS ( library=["gs://$GCS_BUCKET/cpc_sketch.js"], + js_parameter_encoding_mode='STANDARD', description = '''Gets cardinality estimate and bounds from given sketch. Param sketch: The given sketch to query as bytes. @@ -38,18 +39,14 @@ For more information: ''' ) AS R""" try { - var sketchObject = null; - try { - sketchObject = Module.cpc_sketch.deserializeFromB64(sketch, seed ? BigInt(seed) : BigInt(Module.DEFAULT_SEED)); - return { - estimate: sketchObject.getEstimate(), - lower_bound: sketchObject.getLowerBound(num_std_devs), - upper_bound: sketchObject.getUpperBound(num_std_devs) - }; - } finally { - if (sketchObject != null) sketchObject.delete(); - } + const result = Module.cpc_sketch.getEstimateAndBounds(sketch, Number(num_std_devs), seed ? BigInt(seed) : BigInt(Module.DEFAULT_SEED)); + return { + estimate: result.get(0), + lower_bound: result.get(1), + upper_bound: result.get(2) + }; } catch (e) { + if (e.message != null) throw e; throw new Error(Module.getExceptionMessage(e)); } """; diff --git a/cpc/sqlx/cpc_sketch_get_estimate_seed.sqlx b/cpc/sqlx/cpc_sketch_get_estimate_seed.sqlx index 520eb4c..90722ad 100644 --- a/cpc/sqlx/cpc_sketch_get_estimate_seed.sqlx +++ b/cpc/sqlx/cpc_sketch_get_estimate_seed.sqlx @@ -24,6 +24,7 @@ RETURNS FLOAT64 LANGUAGE js OPTIONS ( library=["gs://$GCS_BUCKET/cpc_sketch.js"], + js_parameter_encoding_mode='STANDARD', description = '''Gets cardinality estimate and bounds from given sketch. Param sketch: The given sketch to query as BYTES. @@ -35,14 +36,9 @@ For more information: ''' ) AS R""" try { - var sketchObject = null; - try { - sketchObject = Module.cpc_sketch.deserializeFromB64(sketch, seed ? BigInt(seed) : BigInt(Module.DEFAULT_SEED)); - return sketchObject.getEstimate(); - } finally { - if (sketchObject != null) sketchObject.delete(); - } + return Module.cpc_sketch.getEstimate(sketch, seed ? BigInt(seed) : BigInt(Module.DEFAULT_SEED)); } catch (e) { + if (e.message != null) throw e; throw new Error(Module.getExceptionMessage(e)); } """; diff --git a/cpc/sqlx/cpc_sketch_to_string_seed.sqlx b/cpc/sqlx/cpc_sketch_to_string_seed.sqlx index ae97dda..3d74051 100644 --- a/cpc/sqlx/cpc_sketch_to_string_seed.sqlx +++ b/cpc/sqlx/cpc_sketch_to_string_seed.sqlx @@ -24,6 +24,7 @@ RETURNS STRING LANGUAGE js OPTIONS ( library=["gs://$GCS_BUCKET/cpc_sketch.js"], + js_parameter_encoding_mode='STANDARD', description = '''Returns a summary string that represents the state of the given sketch. Param sketch the given sketch as BYTES. @@ -36,14 +37,9 @@ For more information: ) AS R""" const default_seed = BigInt(Module.DEFAULT_SEED); try { - var sketchObject = null; - try { - sketchObject = Module.cpc_sketch.deserializeFromB64(sketch, seed ? BigInt(seed) : default_seed); - return sketchObject.toString(); - } finally { - if (sketchObject != null) sketchObject.delete(); - } + return Module.cpc_sketch.toString(sketch, seed ? BigInt(seed) : default_seed); } catch (e) { + if (e.message != null) throw e; throw new Error(Module.getExceptionMessage(e)); } """; diff --git a/cpc/sqlx/cpc_sketch_union_lgk_seed.sqlx b/cpc/sqlx/cpc_sketch_union_lgk_seed.sqlx index 7d82223..5089d5e 100644 --- a/cpc/sqlx/cpc_sketch_union_lgk_seed.sqlx +++ b/cpc/sqlx/cpc_sketch_union_lgk_seed.sqlx @@ -24,6 +24,7 @@ RETURNS BYTES LANGUAGE js OPTIONS ( library=["gs://$GCS_BUCKET/cpc_sketch.js"], + js_parameter_encoding_mode='STANDARD', description = '''Computes a sketch that represents the scalar union of the two given sketches. Param sketchA: the first sketch as BYTES. @@ -36,19 +37,12 @@ For more information: - https://datasketches.apache.org/docs/CPC/CpcSketches.html ''' ) AS R""" -const default_lg_k = 12; +const default_lg_k = Number(12); const default_seed = BigInt(Module.DEFAULT_SEED); try { - var union = null; - try { - union = new Module.cpc_union(lg_k ? lg_k : default_lg_k, seed ? BigInt(seed) : default_seed); - union.updateWithB64(sketchA, seed ? BigInt(seed) : default_seed) - union.updateWithB64(sketchB, seed ? BigInt(seed) : default_seed) - return union.getResultB64(); - } finally { - if (union != null) union.delete(); - } + return Module.cpcUnion(sketchA, sketchB, lg_k ? Number(lg_k) : default_lg_k, seed ? BigInt(seed) : default_seed); } catch (e) { + if (e.message != null) throw e; throw new Error(Module.getExceptionMessage(e)); } """; --------------------------------------------------------------------- To unsubscribe, e-mail: [email protected] For additional commands, e-mail: [email protected]
