This is an automated email from the ASF dual-hosted git repository.

jmalkin pushed a commit to branch python
in repository https://gitbox.apache.org/repos/asf/datasketches-spark.git


The following commit(s) were added to refs/heads/python by this push:
     new 50794a0  improve kll merge test
50794a0 is described below

commit 50794a0ac7f54d89a8af27d1c96e6ac92bc9642b
Author: Jon Malkin <[email protected]>
AuthorDate: Fri Feb 14 09:47:31 2025 -0800

    improve kll merge test
---
 python/tests/kll_test.py | 9 +++++++--
 1 file changed, 7 insertions(+), 2 deletions(-)

diff --git a/python/tests/kll_test.py b/python/tests/kll_test.py
index 1c3cf38..a37c2f3 100644
--- a/python/tests/kll_test.py
+++ b/python/tests/kll_test.py
@@ -57,12 +57,17 @@ def test_kll_merge(spark):
   df_agg = df.groupBy("id").agg(kll_sketch_double_agg_build("value", 
k).alias("sketch"))
   assert(df_agg.count() == 2)
 
+  # merge and get a few attributes to check
   result = df_agg.select(
     kll_sketch_double_agg_merge("sketch").alias("sketch")
+  ).select(
+    "sketch",
+    kll_sketch_double_get_min("sketch").alias("min"),
+    kll_sketch_double_get_max("sketch").alias("max")
   ).first()
   sk = result["sketch"]
 
   assert(sk.n == 2 * n)
   assert(sk.k == k)
-  assert(sk.get_min_value() == 1.0)
-  assert(sk.get_max_value() == 2 * n)
+  assert(sk.get_min_value() == result["min"])
+  assert(sk.get_max_value() == result["max"])


---------------------------------------------------------------------
To unsubscribe, e-mail: [email protected]
For additional commands, e-mail: [email protected]

Reply via email to