This is an automated email from the ASF dual-hosted git repository.

alsay pushed a commit to branch cpc_tests
in repository https://gitbox.apache.org/repos/asf/datasketches-bigquery.git

commit c21730e83dba354d187737b006c9564c6b5d9a09
Author: AlexanderSaydakov <[email protected]>
AuthorDate: Tue Nov 5 17:09:19 2024 -0800

    added CPC sketch tests
---
 tests/cpc_sketch_test.js    | 153 ++++++++++++++++++++++++++++++++++++++++++++
 tests/run_dataform_tests.sh |   5 +-
 2 files changed, 155 insertions(+), 3 deletions(-)

diff --git a/tests/cpc_sketch_test.js b/tests/cpc_sketch_test.js
new file mode 100644
index 0000000..4893efb
--- /dev/null
+++ b/tests/cpc_sketch_test.js
@@ -0,0 +1,153 @@
+
+// Licensed to the Apache Software Foundation (ASF) under one
+// or more contributor license agreements.  See the NOTICE file
+// distributed with this work for additional information
+// regarding copyright ownership.  The ASF licenses this file
+// to you under the Apache License, Version 2.0 (the
+// "License"); you may not use this file except in compliance
+// with the License.  You may obtain a copy of the License at
+//
+//   http://www.apache.org/licenses/LICENSE-2.0
+//
+// Unless required by applicable law or agreed to in writing,
+// software distributed under the License is distributed on an
+// "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+// KIND, either express or implied.  See the License for the
+// specific language governing permissions and limitations
+// under the License.
+
+const { generate_udf_test, generate_udaf_test } = require('./unit_test_utils');
+
+// using defaults
+
+const cpc_1 = 
`FROM_BASE64('CAEQCwAOzJMDAAAAAgAAAAAAAAAA+p9AiIAEKIABCEC+FRhuAwAAAA==')`;
+
+generate_udaf_test("cpc_sketch_agg_string", {
+  input_columns: [`str`],
+  input_rows: `SELECT * FROM UNNEST(['a', 'b', 'c']) AS str`,
+  expected_output: cpc_1
+});
+
+const cpc_2 = 
`FROM_BASE64('CAEQCwAOzJMDAAAAAgAAAAAAAADA/J9AVTYhEhABCEC+rcvjSAAAAA==')`;
+
+generate_udaf_test("cpc_sketch_agg_string", {
+  input_columns: [`str`],
+  input_rows: `SELECT * FROM UNNEST(['c', 'd', 'e']) AS str`,
+  expected_output: cpc_2
+});
+
+const cpc_union_1 = `FROM_BASE64('BAEQCwAKzJMFAAAAAgAAAHwTuG5g27UF')`;
+
+generate_udf_test("cpc_sketch_union", [{
+  inputs: [ cpc_1, cpc_2 ],
+  expected_output: cpc_union_1
+}]);
+
+generate_udf_test("cpc_sketch_get_estimate", [{
+  inputs: [ cpc_union_1 ],
+  expected_output: 5.00162840932184
+}]);
+
+generate_udf_test("cpc_sketch_to_string", [{
+  inputs: [ cpc_union_1 ],
+  expected_output: `'''### CPC sketch summary:
+   lg_k           : 11
+   seed hash      : 93cc
+   C              : 5
+   flavor         : 1
+   merged         : true
+   interesting col: 0
+   table entries  : 5
+   window         : not allocated
+### End sketch summary
+'''`
+}]);
+
+const cpc_3 = 
`FROM_BASE64('CgEQCwAezJMAFQAAJgAAAAAAACDomYJAR9CpOgRcw0AMAAAAJgEAALFuuPtTekqPjdsIcV1W8eGS92h1HRSv4ygo4NXXjivr+bTu1EUfnE79HYVQ6agvNz9+Hagp1a4EHx+EY+k2p6o6x2pk0Sliv3NPNigUgRBaQwl3C2j+uFkMGsx4OdrqcdlE53p48+Coa3LN7ZYop6ESxxYUJi83zSxi23wia67Sy/uMTVt3ZCZHMeneexhsvD1oUKDzqjs50zwlZLgrVZhOWEYaR6r92giX4haa9Dw7H3L39AmOVe7RdTL2jD2NDsMWWRKpMT01VCGLWO+NsLO9KudSPM4YwuzWcT8UBng9o84WNabrHqWLWJ/MPLn13rCcdTFTg8V0u3gWDQsbucasWB7U2AWe4CFvqw5ZnlhhMIxuPNOJuW4f6t5ocuJg1jB7LaLfnxelVtFUjKbikMrBq3c8
 [...]
+
+generate_udaf_test("cpc_sketch_agg_int64", {
+  input_columns: [`value`],
+  input_rows: `SELECT * FROM UNNEST(GENERATE_ARRAY(1, 10000, 1)) AS value`,
+  expected_output: cpc_3
+});
+
+const cpc_4 = 
`FROM_BASE64('CgEQCwAezJPwFAAAHQAAAAAAAJBRgoJADAspKmFqw0AJAAAAIwEAAI+bD2HczIQh3KVdHCIUeTRCZ4YUVruJS2nR1MIzqGU7kEVDrL2y6DiaK2WFk3WzpbIX5B56qs4GOZIth55op49CDCiv3ii6tBe57pogsS4YxBOjLFanxXrQFNv7cHzraiAxsrpvJpH7cbNfKQsQFnqy2jZCXnazVq2rUqWvIirYQV2n9TcjfWf7yvFMwbr3yL5WXQejcUVuqE93i6DgYFqdTZ5SVJ7VrYzKSuWOEvYi0cdjqsIBMW1b2PBkkIN0hm2k/u6ZU4NpkHNCFtcLD8Ib+EzED7uGwJU9sTIrZYxLqxzizlSPdULMU5Qweplci6cJM+tLN5i4iVSnRP7RUx3u9PmUYWe4euLcOvmg9apJ5kGdOoaGFLIKCwlsSClYwbOjiv6EDZpP9tnyWYT5FAWfcyxg
 [...]
+
+generate_udaf_test("cpc_sketch_agg_int64", {
+  input_columns: [`value`],
+  input_rows: `SELECT * FROM UNNEST(GENERATE_ARRAY(100000, 110000, 1)) AS 
value`,
+  expected_output: cpc_4
+});
+
+const cpc_union_2 = 
`FROM_BASE64('BgEQCwAazJP3HAAALQAAAA4AAAAmAQAAJ0NIHW+tM67tHUnwsPbKORpUC7EyQ5MsfHRXN0Q+6Z1Ubi/XQo58XeNUTkXixXesrY8tQlBMsrOz4j2doKYCj80DxdXTVA9ELykthMswmdJ2l9VpOHILjhm9pTrjBXDMMYzFMte5KjrQinVsZd4c1+Mv9upeu/k0i+J+LTgwFYXCX+1Sxok6Vi4mC7U4SVfGVlIMtF3hNGtEJ0AdnaQLZ1yahu1IIzRY84ThaGC5Pb7YSds2cquigRVysFpDprMjUy+24mlCu21QZEtlV5pt513c2PPIY1s45m+7rZIjceRDqXMvvlxLvl2+5LA1uzUJc1dFEvKhbuedJPWzsNXdyrDMounWGNqiM4hj4xwtT+emTuQQO5adOFmTCsCzhqpCGGpoKGmx9kLQGE20/k505of0vy1JF9YYgsELEiPzU0
 [...]
+
+generate_udaf_test("cpc_sketch_agg_union", {
+  input_columns: [`sketch`],
+  input_rows: `SELECT * FROM UNNEST([${cpc_3}, ${cpc_4}]) AS sketch`,
+  expected_output: cpc_union_2
+});
+
+generate_udf_test("cpc_sketch_get_estimate_and_bounds", [{
+  inputs: [ cpc_union_2, 3 ],
+  expected_output: `STRUCT(20000.731632174215 AS estimate, 19103.49112120969 
AS lower_bound, 20932 AS upper_bound)`
+}]);
+
+
+// using full signatures
+
+const cpc_10_111_1 = 
`FROM_BASE64('CAEQCgAOjNMDAAAAAgAAAAAAAAAA+I9AkAIOUAACCEApL1e3AAAAAA==')`;
+
+generate_udaf_test("cpc_sketch_agg_string_lgk_seed", {
+  input_columns: [`str`, 'STRUCT(10 AS lgk, 111 AS seed) NOT AGGREGATE'],
+  input_rows: `SELECT * FROM UNNEST(['a', 'b', 'c']) AS str`,
+  expected_output: cpc_10_111_1
+});
+
+const cpc_10_111_2 = 
`FROM_BASE64('CAEQCgAOjNMDAAAAAgAAAAAAAACA+49AHHqBFCABCEDnccXLCQAAAA==')`;
+
+generate_udaf_test("cpc_sketch_agg_string_lgk_seed", {
+  input_columns: [`str`, 'STRUCT(10 AS lgk, 111 AS seed) NOT AGGREGATE'],
+  input_rows: `SELECT * FROM UNNEST(['c', 'd', 'e']) AS str`,
+  expected_output: cpc_10_111_2
+});
+
+const cpc_union_10_111_1 = `FROM_BASE64('BAEQCgAKjNMFAAAAAgAAAIfHruR2mxwA')`;
+
+generate_udf_test("cpc_sketch_union_lgk_seed", [{
+  inputs: [ cpc_10_111_1, cpc_10_111_2, 10, 111 ],
+  expected_output: cpc_union_10_111_1
+}]);
+
+generate_udf_test("cpc_sketch_get_estimate_seed", [{
+  inputs: [ cpc_union_10_111_1, 111 ],
+  expected_output: 5.003258518185566
+}]);
+
+const cpc_10_111_3 = 
`FROM_BASE64('CgEQCgAejNObDgAAEwAAAAAAAODTamJAEuxQsYzzw0AHAAAAkwAAAItbgcaBtLoCnGenKGkCETFyOS7wvKi2gMwNzDqqRZ/mxry4yzw35mVS/KqEzafPfdfa8cjkrnqKux4nnm/ltHoIYSSxQBrnJnF0nYvhGpSt2Kp7XVBocExL1HRWxmbzSsURbgKE2j3kvk7a0cp33F5fskKIRnam+oovpGrsDvAITGp0QhFm72qi3Cwfu6bWwX727GC96atVa1q4EQuKH90kW+QA8PoNyJkDMeW9W5NsZ0ivKwiIW43R1x0aJkJ66UvCCrRBzGvIqml9M02+vmq6aiQCi8uMnpFsXLPku+E4Wh6iI/eERBdqvcdKkkcmUNZXe5Edqp5jfrW+riW6lZYhByM3Nwmq5RqJE3fdGMQyXialhnb3IZZWhiotTGUIl6lz7XcxMGaNFWAhPM4sV
 [...]
+
+generate_udaf_test("cpc_sketch_agg_int64_lgk_seed", {
+  input_columns: [`value`, 'STRUCT(10 AS lgk, 111 AS seed) NOT AGGREGATE'],
+  input_rows: `SELECT * FROM UNNEST(GENERATE_ARRAY(1, 10000, 1)) AS value`,
+  expected_output: cpc_10_111_3
+});
+
+const cpc_10_111_4 = 
`FROM_BASE64('CgEQCgAejNNPDgAAHgAAAAAAAABC02JADI03YoJjw0AJAAAAjgAAABaSEL4NyUXqagO6ZjJQMLDbybVyp3cuDaOSvCpt0YILk8N6gxzTVqX2pxUUvN5bbkJJ5x4uerdUH8j5aF/Jl1237GFnlrjJNmjrd7XaETjbRJKhSwO8Ep6tHh5Z1ZGnth0tsMOJR+xVyxlrMAxk6pepY0gYJbPTfKRXdIQYIsRjSPrBZT7FCuQIgQvU7DEpGef39md7YskoD6KwlN6X28aBbtQmYc/X8eMo5GiATtYsox7KxCCNWg/HgO+BGjbmrQ0ahAXporzKhq89cCSsX9lAkWr1NbrEUs8U91SLcwUaGuW6QhbZjtR6VYJH1jXbJTg2I2xyuW9qvpKio9wvZakzDFMPVdTmMkK6CqRWKtq3Hw2z2rIiMaLSsUWhL0fIYzC67RavIVnsPFpF5ksbZ
 [...]
+
+generate_udaf_test("cpc_sketch_agg_int64_lgk_seed", {
+  input_columns: [`value`, 'STRUCT(10 AS lgk, 111 AS seed) NOT AGGREGATE'],
+  input_rows: `SELECT * FROM UNNEST(GENERATE_ARRAY(100000, 110000, 1)) AS 
value`,
+  expected_output: cpc_10_111_4
+});
+
+const cpc_union_10_111_2 = 
`FROM_BASE64('BgEQCgEajNN5EgAAGAAAAAgAAACTAAAAsyPBi2WqQwBo3cJcFgbmGiz9aHOsGDpMSteKVlRvU6162XVKI0f0LpxTazu+ea3YURXbk+aIY3TSqUXnAjdkt7oBFjUCGx+2XtCjlfPZvB3D5FJnVQ7BqS6n1RW9InJ/FrwUUWMIifVqfNZHP9u3Q5qbY4Egsj3biopdPsd9oy3hhuNyv2nHpkkvTN0paS/a7Q1JWOkCI+w66PxpV0PHksJezDbNXSvyYXtIT11nFTFhHE/t2OToKFQYsOibD8lzf9x7USK1rIOhQs1Jl3T1sKFmJhOYr8byJimRUJN+6qsi95LakyxlMZThUVxoZWpIZX0z7id2FFp5IFNxJGsObiPKq+NLdIAGU9LUx9OFuW5Doog9ZnLpAEwMcmL2Qu/IUFqoDIGbNZAeMxFbqTOrq5FMdmj6aJ2ER3a
 [...]
+
+generate_udaf_test("cpc_sketch_agg_union_lgk_seed", {
+  input_columns: [`sketch`, 'STRUCT(10 AS lgk, 111 AS seed) NOT AGGREGATE'],
+  input_rows: `SELECT * FROM UNNEST([${cpc_10_111_3}, ${cpc_10_111_4}]) AS 
sketch`,
+  expected_output: cpc_union_10_111_2
+});
+
+generate_udf_test("cpc_sketch_get_estimate_and_bounds_seed", [{
+  inputs: [ cpc_union_10_111_2, 3, 111 ],
+  expected_output: `STRUCT(19967.703650307845 AS estimate, 18707.573211045103 
AS lower_bound, 21304 AS upper_bound)`
+}]);
diff --git a/tests/run_dataform_tests.sh b/tests/run_dataform_tests.sh
index 00a1481..b357a3b 100755
--- a/tests/run_dataform_tests.sh
+++ b/tests/run_dataform_tests.sh
@@ -56,8 +56,7 @@ EOF
   mkdir "$TEST_DIR/definitions"
 
   # Copy the test files to the definitions directory
-  cp unit_test_cases.js "$TEST_DIR/definitions/"
-  cp unit_test_utils.js "$TEST_DIR/definitions/"
+  cp *.js "$TEST_DIR/definitions/"
 
   # Run Dataform tests
   cd "$TEST_DIR"
@@ -84,4 +83,4 @@ if [[ -z "$BQ_LOCATION" ]]; then
 fi
 
 # Run the tests
-run_dataform_tests
\ No newline at end of file
+run_dataform_tests


---------------------------------------------------------------------
To unsubscribe, e-mail: [email protected]
For additional commands, e-mail: [email protected]

Reply via email to