This is an automated email from the ASF dual-hosted git repository. alsay pushed a commit to branch cpc_tests in repository https://gitbox.apache.org/repos/asf/datasketches-bigquery.git
commit c21730e83dba354d187737b006c9564c6b5d9a09 Author: AlexanderSaydakov <[email protected]> AuthorDate: Tue Nov 5 17:09:19 2024 -0800 added CPC sketch tests --- tests/cpc_sketch_test.js | 153 ++++++++++++++++++++++++++++++++++++++++++++ tests/run_dataform_tests.sh | 5 +- 2 files changed, 155 insertions(+), 3 deletions(-) diff --git a/tests/cpc_sketch_test.js b/tests/cpc_sketch_test.js new file mode 100644 index 0000000..4893efb --- /dev/null +++ b/tests/cpc_sketch_test.js @@ -0,0 +1,153 @@ + +// Licensed to the Apache Software Foundation (ASF) under one +// or more contributor license agreements. See the NOTICE file +// distributed with this work for additional information +// regarding copyright ownership. The ASF licenses this file +// to you under the Apache License, Version 2.0 (the +// "License"); you may not use this file except in compliance +// with the License. You may obtain a copy of the License at +// +// http://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, +// software distributed under the License is distributed on an +// "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY +// KIND, either express or implied. See the License for the +// specific language governing permissions and limitations +// under the License. + +const { generate_udf_test, generate_udaf_test } = require('./unit_test_utils'); + +// using defaults + +const cpc_1 = `FROM_BASE64('CAEQCwAOzJMDAAAAAgAAAAAAAAAA+p9AiIAEKIABCEC+FRhuAwAAAA==')`; + +generate_udaf_test("cpc_sketch_agg_string", { + input_columns: [`str`], + input_rows: `SELECT * FROM UNNEST(['a', 'b', 'c']) AS str`, + expected_output: cpc_1 +}); + +const cpc_2 = `FROM_BASE64('CAEQCwAOzJMDAAAAAgAAAAAAAADA/J9AVTYhEhABCEC+rcvjSAAAAA==')`; + +generate_udaf_test("cpc_sketch_agg_string", { + input_columns: [`str`], + input_rows: `SELECT * FROM UNNEST(['c', 'd', 'e']) AS str`, + expected_output: cpc_2 +}); + +const cpc_union_1 = `FROM_BASE64('BAEQCwAKzJMFAAAAAgAAAHwTuG5g27UF')`; + +generate_udf_test("cpc_sketch_union", [{ + inputs: [ cpc_1, cpc_2 ], + expected_output: cpc_union_1 +}]); + +generate_udf_test("cpc_sketch_get_estimate", [{ + inputs: [ cpc_union_1 ], + expected_output: 5.00162840932184 +}]); + +generate_udf_test("cpc_sketch_to_string", [{ + inputs: [ cpc_union_1 ], + expected_output: `'''### CPC sketch summary: + lg_k : 11 + seed hash : 93cc + C : 5 + flavor : 1 + merged : true + interesting col: 0 + table entries : 5 + window : not allocated +### End sketch summary +'''` +}]); + +const cpc_3 = `FROM_BASE64('CgEQCwAezJMAFQAAJgAAAAAAACDomYJAR9CpOgRcw0AMAAAAJgEAALFuuPtTekqPjdsIcV1W8eGS92h1HRSv4ygo4NXXjivr+bTu1EUfnE79HYVQ6agvNz9+Hagp1a4EHx+EY+k2p6o6x2pk0Sliv3NPNigUgRBaQwl3C2j+uFkMGsx4OdrqcdlE53p48+Coa3LN7ZYop6ESxxYUJi83zSxi23wia67Sy/uMTVt3ZCZHMeneexhsvD1oUKDzqjs50zwlZLgrVZhOWEYaR6r92giX4haa9Dw7H3L39AmOVe7RdTL2jD2NDsMWWRKpMT01VCGLWO+NsLO9KudSPM4YwuzWcT8UBng9o84WNabrHqWLWJ/MPLn13rCcdTFTg8V0u3gWDQsbucasWB7U2AWe4CFvqw5ZnlhhMIxuPNOJuW4f6t5ocuJg1jB7LaLfnxelVtFUjKbikMrBq3c8 [...] + +generate_udaf_test("cpc_sketch_agg_int64", { + input_columns: [`value`], + input_rows: `SELECT * FROM UNNEST(GENERATE_ARRAY(1, 10000, 1)) AS value`, + expected_output: cpc_3 +}); + +const cpc_4 = `FROM_BASE64('CgEQCwAezJPwFAAAHQAAAAAAAJBRgoJADAspKmFqw0AJAAAAIwEAAI+bD2HczIQh3KVdHCIUeTRCZ4YUVruJS2nR1MIzqGU7kEVDrL2y6DiaK2WFk3WzpbIX5B56qs4GOZIth55op49CDCiv3ii6tBe57pogsS4YxBOjLFanxXrQFNv7cHzraiAxsrpvJpH7cbNfKQsQFnqy2jZCXnazVq2rUqWvIirYQV2n9TcjfWf7yvFMwbr3yL5WXQejcUVuqE93i6DgYFqdTZ5SVJ7VrYzKSuWOEvYi0cdjqsIBMW1b2PBkkIN0hm2k/u6ZU4NpkHNCFtcLD8Ib+EzED7uGwJU9sTIrZYxLqxzizlSPdULMU5Qweplci6cJM+tLN5i4iVSnRP7RUx3u9PmUYWe4euLcOvmg9apJ5kGdOoaGFLIKCwlsSClYwbOjiv6EDZpP9tnyWYT5FAWfcyxg [...] + +generate_udaf_test("cpc_sketch_agg_int64", { + input_columns: [`value`], + input_rows: `SELECT * FROM UNNEST(GENERATE_ARRAY(100000, 110000, 1)) AS value`, + expected_output: cpc_4 +}); + +const cpc_union_2 = `FROM_BASE64('BgEQCwAazJP3HAAALQAAAA4AAAAmAQAAJ0NIHW+tM67tHUnwsPbKORpUC7EyQ5MsfHRXN0Q+6Z1Ubi/XQo58XeNUTkXixXesrY8tQlBMsrOz4j2doKYCj80DxdXTVA9ELykthMswmdJ2l9VpOHILjhm9pTrjBXDMMYzFMte5KjrQinVsZd4c1+Mv9upeu/k0i+J+LTgwFYXCX+1Sxok6Vi4mC7U4SVfGVlIMtF3hNGtEJ0AdnaQLZ1yahu1IIzRY84ThaGC5Pb7YSds2cquigRVysFpDprMjUy+24mlCu21QZEtlV5pt513c2PPIY1s45m+7rZIjceRDqXMvvlxLvl2+5LA1uzUJc1dFEvKhbuedJPWzsNXdyrDMounWGNqiM4hj4xwtT+emTuQQO5adOFmTCsCzhqpCGGpoKGmx9kLQGE20/k505of0vy1JF9YYgsELEiPzU0 [...] + +generate_udaf_test("cpc_sketch_agg_union", { + input_columns: [`sketch`], + input_rows: `SELECT * FROM UNNEST([${cpc_3}, ${cpc_4}]) AS sketch`, + expected_output: cpc_union_2 +}); + +generate_udf_test("cpc_sketch_get_estimate_and_bounds", [{ + inputs: [ cpc_union_2, 3 ], + expected_output: `STRUCT(20000.731632174215 AS estimate, 19103.49112120969 AS lower_bound, 20932 AS upper_bound)` +}]); + + +// using full signatures + +const cpc_10_111_1 = `FROM_BASE64('CAEQCgAOjNMDAAAAAgAAAAAAAAAA+I9AkAIOUAACCEApL1e3AAAAAA==')`; + +generate_udaf_test("cpc_sketch_agg_string_lgk_seed", { + input_columns: [`str`, 'STRUCT(10 AS lgk, 111 AS seed) NOT AGGREGATE'], + input_rows: `SELECT * FROM UNNEST(['a', 'b', 'c']) AS str`, + expected_output: cpc_10_111_1 +}); + +const cpc_10_111_2 = `FROM_BASE64('CAEQCgAOjNMDAAAAAgAAAAAAAACA+49AHHqBFCABCEDnccXLCQAAAA==')`; + +generate_udaf_test("cpc_sketch_agg_string_lgk_seed", { + input_columns: [`str`, 'STRUCT(10 AS lgk, 111 AS seed) NOT AGGREGATE'], + input_rows: `SELECT * FROM UNNEST(['c', 'd', 'e']) AS str`, + expected_output: cpc_10_111_2 +}); + +const cpc_union_10_111_1 = `FROM_BASE64('BAEQCgAKjNMFAAAAAgAAAIfHruR2mxwA')`; + +generate_udf_test("cpc_sketch_union_lgk_seed", [{ + inputs: [ cpc_10_111_1, cpc_10_111_2, 10, 111 ], + expected_output: cpc_union_10_111_1 +}]); + +generate_udf_test("cpc_sketch_get_estimate_seed", [{ + inputs: [ cpc_union_10_111_1, 111 ], + expected_output: 5.003258518185566 +}]); + +const cpc_10_111_3 = `FROM_BASE64('CgEQCgAejNObDgAAEwAAAAAAAODTamJAEuxQsYzzw0AHAAAAkwAAAItbgcaBtLoCnGenKGkCETFyOS7wvKi2gMwNzDqqRZ/mxry4yzw35mVS/KqEzafPfdfa8cjkrnqKux4nnm/ltHoIYSSxQBrnJnF0nYvhGpSt2Kp7XVBocExL1HRWxmbzSsURbgKE2j3kvk7a0cp33F5fskKIRnam+oovpGrsDvAITGp0QhFm72qi3Cwfu6bWwX727GC96atVa1q4EQuKH90kW+QA8PoNyJkDMeW9W5NsZ0ivKwiIW43R1x0aJkJ66UvCCrRBzGvIqml9M02+vmq6aiQCi8uMnpFsXLPku+E4Wh6iI/eERBdqvcdKkkcmUNZXe5Edqp5jfrW+riW6lZYhByM3Nwmq5RqJE3fdGMQyXialhnb3IZZWhiotTGUIl6lz7XcxMGaNFWAhPM4sV [...] + +generate_udaf_test("cpc_sketch_agg_int64_lgk_seed", { + input_columns: [`value`, 'STRUCT(10 AS lgk, 111 AS seed) NOT AGGREGATE'], + input_rows: `SELECT * FROM UNNEST(GENERATE_ARRAY(1, 10000, 1)) AS value`, + expected_output: cpc_10_111_3 +}); + +const cpc_10_111_4 = `FROM_BASE64('CgEQCgAejNNPDgAAHgAAAAAAAABC02JADI03YoJjw0AJAAAAjgAAABaSEL4NyUXqagO6ZjJQMLDbybVyp3cuDaOSvCpt0YILk8N6gxzTVqX2pxUUvN5bbkJJ5x4uerdUH8j5aF/Jl1237GFnlrjJNmjrd7XaETjbRJKhSwO8Ep6tHh5Z1ZGnth0tsMOJR+xVyxlrMAxk6pepY0gYJbPTfKRXdIQYIsRjSPrBZT7FCuQIgQvU7DEpGef39md7YskoD6KwlN6X28aBbtQmYc/X8eMo5GiATtYsox7KxCCNWg/HgO+BGjbmrQ0ahAXporzKhq89cCSsX9lAkWr1NbrEUs8U91SLcwUaGuW6QhbZjtR6VYJH1jXbJTg2I2xyuW9qvpKio9wvZakzDFMPVdTmMkK6CqRWKtq3Hw2z2rIiMaLSsUWhL0fIYzC67RavIVnsPFpF5ksbZ [...] + +generate_udaf_test("cpc_sketch_agg_int64_lgk_seed", { + input_columns: [`value`, 'STRUCT(10 AS lgk, 111 AS seed) NOT AGGREGATE'], + input_rows: `SELECT * FROM UNNEST(GENERATE_ARRAY(100000, 110000, 1)) AS value`, + expected_output: cpc_10_111_4 +}); + +const cpc_union_10_111_2 = `FROM_BASE64('BgEQCgEajNN5EgAAGAAAAAgAAACTAAAAsyPBi2WqQwBo3cJcFgbmGiz9aHOsGDpMSteKVlRvU6162XVKI0f0LpxTazu+ea3YURXbk+aIY3TSqUXnAjdkt7oBFjUCGx+2XtCjlfPZvB3D5FJnVQ7BqS6n1RW9InJ/FrwUUWMIifVqfNZHP9u3Q5qbY4Egsj3biopdPsd9oy3hhuNyv2nHpkkvTN0paS/a7Q1JWOkCI+w66PxpV0PHksJezDbNXSvyYXtIT11nFTFhHE/t2OToKFQYsOibD8lzf9x7USK1rIOhQs1Jl3T1sKFmJhOYr8byJimRUJN+6qsi95LakyxlMZThUVxoZWpIZX0z7id2FFp5IFNxJGsObiPKq+NLdIAGU9LUx9OFuW5Doog9ZnLpAEwMcmL2Qu/IUFqoDIGbNZAeMxFbqTOrq5FMdmj6aJ2ER3a [...] + +generate_udaf_test("cpc_sketch_agg_union_lgk_seed", { + input_columns: [`sketch`, 'STRUCT(10 AS lgk, 111 AS seed) NOT AGGREGATE'], + input_rows: `SELECT * FROM UNNEST([${cpc_10_111_3}, ${cpc_10_111_4}]) AS sketch`, + expected_output: cpc_union_10_111_2 +}); + +generate_udf_test("cpc_sketch_get_estimate_and_bounds_seed", [{ + inputs: [ cpc_union_10_111_2, 3, 111 ], + expected_output: `STRUCT(19967.703650307845 AS estimate, 18707.573211045103 AS lower_bound, 21304 AS upper_bound)` +}]); diff --git a/tests/run_dataform_tests.sh b/tests/run_dataform_tests.sh index 00a1481..b357a3b 100755 --- a/tests/run_dataform_tests.sh +++ b/tests/run_dataform_tests.sh @@ -56,8 +56,7 @@ EOF mkdir "$TEST_DIR/definitions" # Copy the test files to the definitions directory - cp unit_test_cases.js "$TEST_DIR/definitions/" - cp unit_test_utils.js "$TEST_DIR/definitions/" + cp *.js "$TEST_DIR/definitions/" # Run Dataform tests cd "$TEST_DIR" @@ -84,4 +83,4 @@ if [[ -z "$BQ_LOCATION" ]]; then fi # Run the tests -run_dataform_tests \ No newline at end of file +run_dataform_tests --------------------------------------------------------------------- To unsubscribe, e-mail: [email protected] For additional commands, e-mail: [email protected]
