Re: [PR] add unit tests for expression functions [datafusion-python]

via GitHub Fri, 16 May 2025 08:50:04 -0700


kevinjqliu commented on code in PR #1121:
URL: 
https://github.com/apache/datafusion-python/pull/1121#discussion_r2093257339



##########
python/tests/test_expr.py:
##########
@@ -275,3 +277,465 @@ def test_col_getattr():
 def test_alias_with_metadata(df):
     df = df.select(col("a").alias("b", {"key": "value"}))
     assert df.schema().field("b").metadata == {b"key": b"value"}
+
+
+# These unit tests are to ensure the expression functions do not regress
+# For the math functions we will use `functions.round` so we can more
+# easily test for equivalence and not worry about floating point precision
+@pytest.mark.parametrize(
+    ("function", "expected_result"),
+    [
+        # Math Functions
+        pytest.param(
+            functions.round(col("a").asin(), lit(4)),
+            pa.array([-0.8481, 0.5236, 0.0, None], type=pa.float64()),
+            id="asin",
+        ),
+        pytest.param(
+            functions.round(col("a").sin(), lit(4)),
+            pa.array([-0.6816, 0.4794, 0.0, None], type=pa.float64()),
+            id="sin",
+        ),
+        pytest.param(
+            # Since log10 of negative returns NaN and you can't test NaN for
+            # equivalence, also do an abs() here.
+            functions.round(col("a").abs().log10(), lit(4)),
+            pa.array([-0.1249, -0.301, -float("inf"), None], 
type=pa.float64()),
+            id="log10",
+        ),
+        pytest.param(
+            col("a").iszero(),
+            pa.array([False, False, True, None], type=pa.bool_()),
+            id="iszero",
+        ),
+        pytest.param(
+            functions.round(col("a").acos(), lit(4)),
+            pa.array([2.4189, 1.0472, 1.5708, None], type=pa.float64()),
+            id="acos",
+        ),
+        pytest.param(
+            col("e").isnan(),
+            pa.array([False, True, False, None], type=pa.bool_()),
+            id="isnan",
+        ),
+        pytest.param(
+            functions.round(col("a").degrees(), lit(4)),
+            pa.array([-42.9718, 28.6479, 0.0, None], type=pa.float64()),
+            id="degrees",
+        ),
+        pytest.param(
+            functions.round(col("a").asinh(), lit(4)),
+            pa.array([-0.6931, 0.4812, 0.0, None], type=pa.float64()),
+            id="asinh",
+        ),
+        pytest.param(
+            col("a").abs(),
+            pa.array([0.75, 0.5, 0.0, None], type=pa.float64()),
+            id="abs",
+        ),
+        pytest.param(
+            functions.round(col("a").exp(), lit(4)),
+            pa.array([0.4724, 1.6487, 1.0, None], type=pa.float64()),
+            id="exp",
+        ),
+        pytest.param(
+            functions.round(col("a").cosh(), lit(4)),
+            pa.array([1.2947, 1.1276, 1.0, None], type=pa.float64()),
+            id="cosh",
+        ),
+        pytest.param(
+            functions.round(col("a").radians(), lit(4)),
+            pa.array([-0.0131, 0.0087, 0.0, None], type=pa.float64()),
+            id="radians",
+        ),
+        pytest.param(
+            functions.round(col("a").abs().sqrt(), lit(4)),
+            pa.array([0.866, 0.7071, 0.0, None], type=pa.float64()),
+            id="sqrt",
+        ),
+        pytest.param(
+            functions.round(col("a").tanh(), lit(4)),
+            pa.array([-0.6351, 0.4621, 0.0, None], type=pa.float64()),
+            id="tanh",
+        ),
+        pytest.param(
+            functions.round(col("a").atan(), lit(4)),
+            pa.array([-0.6435, 0.4636, 0.0, None], type=pa.float64()),
+            id="atan",
+        ),
+        pytest.param(
+            functions.round(col("a").atanh(), lit(4)),
+            pa.array([-0.973, 0.5493, 0.0, None], type=pa.float64()),
+            id="atanh",
+        ),
+        pytest.param(
+            # large numbers cause an integer overflow so divid to make smaller
+            (col("b") / lit(4)).factorial(),
+            pa.array([1, 3628800, 1, None], type=pa.int64()),
+            id="factorial",
+        ),
+        pytest.param(
+            # Valid values of acosh must be >= 1.0
+            functions.round((col("a").abs() + lit(1.0)).abs().acosh(), lit(4)),
+            pa.array([1.1588, 0.9624, 0.0, None], type=pa.float64()),
+            id="acosh",
+        ),
+        pytest.param(
+            col("a").floor(),
+            pa.array([-1.0, 0.0, 0.0, None], type=pa.float64()),
+            id="floor",
+        ),
+        pytest.param(
+            col("a").ceil(),
+            pa.array([-0.0, 1.0, 0.0, None], type=pa.float64()),
+            id="ceil",
+        ),
+        pytest.param(
+            functions.round(col("a").abs().ln(), lit(4)),
+            pa.array([-0.2877, -0.6931, float("-inf"), None], 
type=pa.float64()),
+            id="ln",
+        ),
+        pytest.param(
+            functions.round(col("a").tan(), lit(4)),
+            pa.array([-0.9316, 0.5463, 0.0, None], type=pa.float64()),
+            id="tan",
+        ),
+        pytest.param(
+            functions.round(col("a").cbrt(), lit(4)),
+            pa.array([-0.9086, 0.7937, 0.0, None], type=pa.float64()),
+            id="cbrt",
+        ),
+        pytest.param(
+            functions.round(col("a").cos(), lit(4)),
+            pa.array([0.7317, 0.8776, 1.0, None], type=pa.float64()),
+            id="cos",
+        ),
+        pytest.param(
+            functions.round(col("a").sinh(), lit(4)),
+            pa.array([-0.8223, 0.5211, 0.0, None], type=pa.float64()),
+            id="sinh",
+        ),
+        pytest.param(
+            col("a").signum(),
+            pa.array([-1.0, 1.0, 0.0, None], type=pa.float64()),
+            id="signum",
+        ),
+        pytest.param(
+            functions.round(col("a").abs().log2(), lit(4)),
+            pa.array([-0.415, -1.0, float("-inf"), None], type=pa.float64()),
+            id="log2",
+        ),
+        pytest.param(
+            functions.round(col("a").cot(), lit(4)),
+            pa.array([-1.0734, 1.8305, float("inf"), None], type=pa.float64()),
+            id="cot",
+        ),
+        #
+        # String Functions
+        #
+        pytest.param(
+            col("c").reverse(),
+            pa.array(["olleH", " dlrow ", "!", None], type=pa.string()),
+            id="reverse",
+        ),
+        pytest.param(
+            col("c").bit_length(),
+            pa.array([40, 56, 8, None], type=pa.int32()),
+            id="bit_length",
+        ),
+        pytest.param(
+            col("b").to_hex(),
+            pa.array(["ffffffffffffffe2", "2a", "0", None], type=pa.string()),
+            id="to_hex",
+        ),
+        pytest.param(
+            col("c").length(),
+            pa.array([5, 7, 1, None], type=pa.int32()),
+            id="length",
+        ),
+        pytest.param(
+            col("c").lower(),
+            pa.array(["hello", " world ", "!", None], type=pa.string()),
+            id="lower",
+        ),
+        pytest.param(
+            col("c").ascii(),
+            pa.array([72, 32, 33, None], type=pa.int32()),
+            id="ascii",
+        ),
+        pytest.param(
+            col("c").sha512(),
+            pa.array(
+                [
+                    bytes.fromhex(
+                        
"3615F80C9D293ED7402687F94B22D58E529B8CC7916F8FAC7FDDF7FBD5AF4CF777D3D795A7A00A16BF7E7F3FB9561EE9BAAE480DA9FE7A18769E71886B03F315"
+                    ),
+                    bytes.fromhex(
+                        
"A6758FDA3C2F0B554084E18308EA99B94B54EEE8FDA72697CEA7844E524CC2F2F2EE4CC8BAC87D2E3E7222959FE3D0CA1A841761FDC0D1780F6FE9E39E369500"
+                    ),
+                    bytes.fromhex(
+                        
"3831A6A6155E509DEE59A7F451EB35324D8F8F2DF6E3708894740F98FDEE23889F4DE5ADB0C5010DFB555CDA77C8AB5DC902094C52DE3278F35A75EBC25F093A"
+                    ),
+                    None,
+                ],
+                type=pa.binary(),
+            ),
+            id="sha512",
+        ),
+        pytest.param(
+            col("c").sha384(),
+            pa.array(
+                [
+                    bytes.fromhex(
+                        
"3519FE5AD2C596EFE3E276A6F351B8FC0B03DB861782490D45F7598EBD0AB5FD5520ED102F38C4A5EC834E98668035FC"
+                    ),
+                    bytes.fromhex(
+                        
"A6A38A9AE2CFD0D67F49989AD584632BF7D7A07DAD2277E92326A6A0B37F884A871D6173FB342CFE258E375258ACAAEC"
+                    ),
+                    bytes.fromhex(
+                        
"1D0EC8C84EE9521E21F06774DE232367B64DE628474CB5B2E372B699A1F55AE335CC37193EF823E33324DFD9A70738A6"
+                    ),
+                    None,
+                ],
+                type=pa.binary(),
+            ),
+            id="sha384",
+        ),
+        pytest.param(
+            col("c").sha256(),
+            pa.array(
+                [
+                    bytes.fromhex(
+                        
"185F8DB32271FE25F561A6FC938B2E264306EC304EDA518007D1764826381969"
+                    ),
+                    bytes.fromhex(
+                        
"DE2EF0D77D456EC1CDE2C52F75996F6636A64079297213D548D875A488B03A75"
+                    ),
+                    bytes.fromhex(
+                        
"BB7208BC9B5D7C04F1236A82A0093A5E33F40423D5BA8D4266F7092C3BA43B62"
+                    ),
+                    None,
+                ],
+                type=pa.binary(),
+            ),
+            id="sha256",
+        ),
+        pytest.param(
+            col("c").sha224(),
+            pa.array(
+                [
+                    bytes.fromhex(
+                        
"4149DA18AA8BFC2B1E382C6C26556D01A92C261B6436DAD5E3BE3FCC"
+                    ),
+                    bytes.fromhex(
+                        
"AD6DF6D9ECDDF50AF2A72D5E3144BA813EE954537572C0E8AB3066BE"
+                    ),
+                    bytes.fromhex(
+                        
"6641A7E8278BCD49E476E7ACAE158F4105B2952D22AEB2E0B9A231A0"
+                    ),
+                    None,
+                ],
+                type=pa.binary(),
+            ),
+            id="sha224",
+        ),
+        pytest.param(
+            col("c").btrim(),
+            pa.array(["Hello", "world", "!", None], type=pa.string_view()),
+            id="btrim",
+        ),
+        pytest.param(
+            col("c").trim(),
+            pa.array(["Hello", "world", "!", None], type=pa.string_view()),
+            id="trim",
+        ),
+        pytest.param(
+            col("c").md5(),
+            pa.array(
+                [
+                    "8b1a9953c4611296a827abf8c47804d7",
+                    "de802497c24568d9a85d4eb8c2b6e8fe",
+                    "9033e0e305f247c0c3c80d0c7848c8b3",
+                    None,
+                ],
+                type=pa.string(),
+            ),
+            id="md5",
+        ),
+        pytest.param(
+            col("c").octet_length(),
+            pa.array([5, 7, 1, None], type=pa.int32()),
+            id="octet_length",
+        ),
+        pytest.param(
+            col("c").character_length(),
+            pa.array([5, 7, 1, None], type=pa.int32()),
+            id="character_length",
+        ),
+        pytest.param(
+            col("c").char_length(),
+            pa.array([5, 7, 1, None], type=pa.int32()),
+            id="char_length",
+        ),
+        pytest.param(
+            col("c").rtrim(),
+            pa.array(["Hello", " world", "!", None], type=pa.string_view()),
+            id="rtrim",
+        ),
+        pytest.param(
+            col("c").ltrim(),
+            pa.array(["Hello", "world ", "!", None], type=pa.string_view()),
+            id="ltrim",
+        ),
+        pytest.param(
+            col("c").upper(),
+            pa.array(["HELLO", " WORLD ", "!", None], type=pa.string()),
+            id="upper",
+        ),
+        pytest.param(
+            lit(65).chr(),
+            pa.array(["A", "A", "A", "A"], type=pa.string()),
+            id="chr",
+        ),
+        #
+        # Time Functions
+        #
+        pytest.param(
+            col("b").from_unixtime(),
+            pa.array(
+                [
+                    datetime(1969, 12, 31, 23, 59, 30, tzinfo=timezone.utc),
+                    datetime(1970, 1, 1, 0, 0, 42, tzinfo=timezone.utc),
+                    datetime(1970, 1, 1, 0, 0, 0, tzinfo=timezone.utc),
+                    None,
+                ],
+                type=pa.timestamp("s"),
+            ),
+            id="from_unixtime",
+        ),
+        pytest.param(
+            col("c").initcap(),
+            pa.array(["Hello", " World ", "!", None], type=pa.string_view()),
+            id="initcap",
+        ),
+        #
+        # Array Functions
+        #
+        pytest.param(
+            col("d").array_pop_back(),
+            pa.array([[-1, 1], [5, 10, 15], [], None], 
type=pa.list_(pa.int64())),
+            id="array_pop_back",
+        ),
+        pytest.param(
+            col("d").array_pop_front(),
+            pa.array([[1, 0], [10, 15, 20], [], None], 
type=pa.list_(pa.int64())),
+            id="array_pop_front",
+        ),
+        pytest.param(
+            col("d").array_length(),
+            pa.array([3, 4, 0, None], type=pa.uint64()),
+            id="array_length",
+        ),
+        pytest.param(
+            col("d").list_length(),
+            pa.array([3, 4, 0, None], type=pa.uint64()),
+            id="list_length",
+        ),
+        pytest.param(
+            col("d").array_ndims(),
+            pa.array([1, 1, 1, None], type=pa.uint64()),
+            id="array_ndims",
+        ),
+        pytest.param(
+            col("d").list_ndims(),
+            pa.array([1, 1, 1, None], type=pa.uint64()),
+            id="list_ndims",
+        ),
+        pytest.param(
+            col("d").array_dims(),
+            pa.array([[3], [4], None, None], type=pa.list_(pa.uint64())),
+            id="array_dims",
+        ),
+        pytest.param(
+            col("d").array_empty(),
+            pa.array([False, False, True, None], type=pa.bool_()),
+            id="array_empty",
+        ),
+        pytest.param(
+            col("d").list_distinct(),
+            pa.array(
+                [[-1, 0, 1], [5, 10, 15, 20], [], None], 
type=pa.list_(pa.int64())
+            ),
+            id="list_distinct",
+        ),
+        pytest.param(
+            col("d").array_distinct(),
+            pa.array(
+                [[-1, 0, 1], [5, 10, 15, 20], [], None], 
type=pa.list_(pa.int64())
+            ),
+            id="array_distinct",
+        ),
+        pytest.param(
+            col("d").cardinality(),
+            pa.array([3, 4, None, None], type=pa.uint64()),
+            id="cardinality",
+        ),
+        pytest.param(
+            col("f").flatten(),
+            pa.array(
+                [[-1, 1, 0, 4, 4], [5, 10, 15, 20, 3], [], []],
+                type=pa.list_(pa.int64()),
+            ),
+            id="flatten",
+        ),
+        pytest.param(
+            col("d").list_dims(),
+            pa.array([[3], [4], None, None], type=pa.list_(pa.uint64())),
+            id="list_dims",
+        ),
+        pytest.param(
+            col("d").empty(),
+            pa.array([False, False, True, None], type=pa.bool_()),
+            id="empty",
+        ),
+        #
+        # Other Tests
+        #
+        pytest.param(
+            col("d").arrow_typeof(),
+            pa.array(
+                [
+                    'List(Field { name: "item", data_type: Int64, nullable: 
true, dict_id: 0, dict_is_ordered: false, metadata: {} })',  # noqa: E501
+                    'List(Field { name: "item", data_type: Int64, nullable: 
true, dict_id: 0, dict_is_ordered: false, metadata: {} })',  # noqa: E501
+                    'List(Field { name: "item", data_type: Int64, nullable: 
true, dict_id: 0, dict_is_ordered: false, metadata: {} })',  # noqa: E501
+                    'List(Field { name: "item", data_type: Int64, nullable: 
true, dict_id: 0, dict_is_ordered: false, metadata: {} })',  # noqa: E501
+                ],
+                type=pa.string(),
+            ),
+            id="arrow_typeof",
+        ),
+    ],
+)
+def test_expr_functions(ctx, function, expected_result):
+    ctx = SessionContext()

Review Comment:
   ```suggestion
   ```



##########
python/tests/test_expr.py:
##########
@@ -275,3 +277,465 @@ def test_col_getattr():
 def test_alias_with_metadata(df):
     df = df.select(col("a").alias("b", {"key": "value"}))
     assert df.schema().field("b").metadata == {b"key": b"value"}
+
+
+# These unit tests are to ensure the expression functions do not regress
+# For the math functions we will use `functions.round` so we can more
+# easily test for equivalence and not worry about floating point precision
+@pytest.mark.parametrize(
+    ("function", "expected_result"),
+    [
+        # Math Functions
+        pytest.param(
+            functions.round(col("a").asin(), lit(4)),
+            pa.array([-0.8481, 0.5236, 0.0, None], type=pa.float64()),
+            id="asin",
+        ),
+        pytest.param(
+            functions.round(col("a").sin(), lit(4)),
+            pa.array([-0.6816, 0.4794, 0.0, None], type=pa.float64()),
+            id="sin",
+        ),
+        pytest.param(
+            # Since log10 of negative returns NaN and you can't test NaN for
+            # equivalence, also do an abs() here.
+            functions.round(col("a").abs().log10(), lit(4)),
+            pa.array([-0.1249, -0.301, -float("inf"), None], 
type=pa.float64()),
+            id="log10",
+        ),
+        pytest.param(
+            col("a").iszero(),
+            pa.array([False, False, True, None], type=pa.bool_()),
+            id="iszero",
+        ),
+        pytest.param(
+            functions.round(col("a").acos(), lit(4)),
+            pa.array([2.4189, 1.0472, 1.5708, None], type=pa.float64()),
+            id="acos",
+        ),
+        pytest.param(
+            col("e").isnan(),
+            pa.array([False, True, False, None], type=pa.bool_()),
+            id="isnan",
+        ),
+        pytest.param(
+            functions.round(col("a").degrees(), lit(4)),
+            pa.array([-42.9718, 28.6479, 0.0, None], type=pa.float64()),
+            id="degrees",
+        ),
+        pytest.param(
+            functions.round(col("a").asinh(), lit(4)),
+            pa.array([-0.6931, 0.4812, 0.0, None], type=pa.float64()),
+            id="asinh",
+        ),
+        pytest.param(
+            col("a").abs(),
+            pa.array([0.75, 0.5, 0.0, None], type=pa.float64()),
+            id="abs",
+        ),
+        pytest.param(
+            functions.round(col("a").exp(), lit(4)),
+            pa.array([0.4724, 1.6487, 1.0, None], type=pa.float64()),
+            id="exp",
+        ),
+        pytest.param(
+            functions.round(col("a").cosh(), lit(4)),
+            pa.array([1.2947, 1.1276, 1.0, None], type=pa.float64()),
+            id="cosh",
+        ),
+        pytest.param(
+            functions.round(col("a").radians(), lit(4)),
+            pa.array([-0.0131, 0.0087, 0.0, None], type=pa.float64()),
+            id="radians",
+        ),
+        pytest.param(
+            functions.round(col("a").abs().sqrt(), lit(4)),
+            pa.array([0.866, 0.7071, 0.0, None], type=pa.float64()),
+            id="sqrt",
+        ),
+        pytest.param(
+            functions.round(col("a").tanh(), lit(4)),
+            pa.array([-0.6351, 0.4621, 0.0, None], type=pa.float64()),
+            id="tanh",
+        ),
+        pytest.param(
+            functions.round(col("a").atan(), lit(4)),
+            pa.array([-0.6435, 0.4636, 0.0, None], type=pa.float64()),
+            id="atan",
+        ),
+        pytest.param(
+            functions.round(col("a").atanh(), lit(4)),
+            pa.array([-0.973, 0.5493, 0.0, None], type=pa.float64()),
+            id="atanh",
+        ),
+        pytest.param(
+            # large numbers cause an integer overflow so divid to make smaller
+            (col("b") / lit(4)).factorial(),
+            pa.array([1, 3628800, 1, None], type=pa.int64()),
+            id="factorial",
+        ),
+        pytest.param(
+            # Valid values of acosh must be >= 1.0
+            functions.round((col("a").abs() + lit(1.0)).abs().acosh(), lit(4)),
+            pa.array([1.1588, 0.9624, 0.0, None], type=pa.float64()),
+            id="acosh",
+        ),
+        pytest.param(
+            col("a").floor(),
+            pa.array([-1.0, 0.0, 0.0, None], type=pa.float64()),
+            id="floor",
+        ),
+        pytest.param(
+            col("a").ceil(),
+            pa.array([-0.0, 1.0, 0.0, None], type=pa.float64()),
+            id="ceil",
+        ),
+        pytest.param(
+            functions.round(col("a").abs().ln(), lit(4)),
+            pa.array([-0.2877, -0.6931, float("-inf"), None], 
type=pa.float64()),
+            id="ln",
+        ),
+        pytest.param(
+            functions.round(col("a").tan(), lit(4)),
+            pa.array([-0.9316, 0.5463, 0.0, None], type=pa.float64()),
+            id="tan",
+        ),
+        pytest.param(
+            functions.round(col("a").cbrt(), lit(4)),
+            pa.array([-0.9086, 0.7937, 0.0, None], type=pa.float64()),
+            id="cbrt",
+        ),
+        pytest.param(
+            functions.round(col("a").cos(), lit(4)),
+            pa.array([0.7317, 0.8776, 1.0, None], type=pa.float64()),
+            id="cos",
+        ),
+        pytest.param(
+            functions.round(col("a").sinh(), lit(4)),
+            pa.array([-0.8223, 0.5211, 0.0, None], type=pa.float64()),
+            id="sinh",
+        ),
+        pytest.param(
+            col("a").signum(),
+            pa.array([-1.0, 1.0, 0.0, None], type=pa.float64()),
+            id="signum",
+        ),
+        pytest.param(
+            functions.round(col("a").abs().log2(), lit(4)),
+            pa.array([-0.415, -1.0, float("-inf"), None], type=pa.float64()),
+            id="log2",
+        ),
+        pytest.param(
+            functions.round(col("a").cot(), lit(4)),
+            pa.array([-1.0734, 1.8305, float("inf"), None], type=pa.float64()),
+            id="cot",
+        ),
+        #
+        # String Functions
+        #
+        pytest.param(
+            col("c").reverse(),
+            pa.array(["olleH", " dlrow ", "!", None], type=pa.string()),
+            id="reverse",
+        ),
+        pytest.param(
+            col("c").bit_length(),
+            pa.array([40, 56, 8, None], type=pa.int32()),
+            id="bit_length",
+        ),
+        pytest.param(
+            col("b").to_hex(),
+            pa.array(["ffffffffffffffe2", "2a", "0", None], type=pa.string()),
+            id="to_hex",
+        ),
+        pytest.param(
+            col("c").length(),
+            pa.array([5, 7, 1, None], type=pa.int32()),
+            id="length",
+        ),
+        pytest.param(
+            col("c").lower(),
+            pa.array(["hello", " world ", "!", None], type=pa.string()),
+            id="lower",
+        ),
+        pytest.param(
+            col("c").ascii(),
+            pa.array([72, 32, 33, None], type=pa.int32()),
+            id="ascii",
+        ),
+        pytest.param(
+            col("c").sha512(),
+            pa.array(
+                [
+                    bytes.fromhex(
+                        
"3615F80C9D293ED7402687F94B22D58E529B8CC7916F8FAC7FDDF7FBD5AF4CF777D3D795A7A00A16BF7E7F3FB9561EE9BAAE480DA9FE7A18769E71886B03F315"
+                    ),
+                    bytes.fromhex(
+                        
"A6758FDA3C2F0B554084E18308EA99B94B54EEE8FDA72697CEA7844E524CC2F2F2EE4CC8BAC87D2E3E7222959FE3D0CA1A841761FDC0D1780F6FE9E39E369500"
+                    ),
+                    bytes.fromhex(
+                        
"3831A6A6155E509DEE59A7F451EB35324D8F8F2DF6E3708894740F98FDEE23889F4DE5ADB0C5010DFB555CDA77C8AB5DC902094C52DE3278F35A75EBC25F093A"
+                    ),
+                    None,
+                ],
+                type=pa.binary(),
+            ),
+            id="sha512",
+        ),
+        pytest.param(
+            col("c").sha384(),
+            pa.array(
+                [
+                    bytes.fromhex(
+                        
"3519FE5AD2C596EFE3E276A6F351B8FC0B03DB861782490D45F7598EBD0AB5FD5520ED102F38C4A5EC834E98668035FC"
+                    ),
+                    bytes.fromhex(
+                        
"A6A38A9AE2CFD0D67F49989AD584632BF7D7A07DAD2277E92326A6A0B37F884A871D6173FB342CFE258E375258ACAAEC"
+                    ),
+                    bytes.fromhex(
+                        
"1D0EC8C84EE9521E21F06774DE232367B64DE628474CB5B2E372B699A1F55AE335CC37193EF823E33324DFD9A70738A6"
+                    ),
+                    None,
+                ],
+                type=pa.binary(),
+            ),
+            id="sha384",
+        ),
+        pytest.param(
+            col("c").sha256(),
+            pa.array(
+                [
+                    bytes.fromhex(
+                        
"185F8DB32271FE25F561A6FC938B2E264306EC304EDA518007D1764826381969"
+                    ),
+                    bytes.fromhex(
+                        
"DE2EF0D77D456EC1CDE2C52F75996F6636A64079297213D548D875A488B03A75"
+                    ),
+                    bytes.fromhex(
+                        
"BB7208BC9B5D7C04F1236A82A0093A5E33F40423D5BA8D4266F7092C3BA43B62"
+                    ),
+                    None,
+                ],
+                type=pa.binary(),
+            ),
+            id="sha256",
+        ),
+        pytest.param(
+            col("c").sha224(),
+            pa.array(
+                [
+                    bytes.fromhex(
+                        
"4149DA18AA8BFC2B1E382C6C26556D01A92C261B6436DAD5E3BE3FCC"
+                    ),
+                    bytes.fromhex(
+                        
"AD6DF6D9ECDDF50AF2A72D5E3144BA813EE954537572C0E8AB3066BE"
+                    ),
+                    bytes.fromhex(
+                        
"6641A7E8278BCD49E476E7ACAE158F4105B2952D22AEB2E0B9A231A0"
+                    ),
+                    None,
+                ],
+                type=pa.binary(),
+            ),
+            id="sha224",
+        ),
+        pytest.param(
+            col("c").btrim(),
+            pa.array(["Hello", "world", "!", None], type=pa.string_view()),
+            id="btrim",
+        ),
+        pytest.param(
+            col("c").trim(),
+            pa.array(["Hello", "world", "!", None], type=pa.string_view()),
+            id="trim",
+        ),
+        pytest.param(
+            col("c").md5(),
+            pa.array(
+                [
+                    "8b1a9953c4611296a827abf8c47804d7",
+                    "de802497c24568d9a85d4eb8c2b6e8fe",
+                    "9033e0e305f247c0c3c80d0c7848c8b3",
+                    None,
+                ],
+                type=pa.string(),
+            ),
+            id="md5",
+        ),
+        pytest.param(
+            col("c").octet_length(),
+            pa.array([5, 7, 1, None], type=pa.int32()),
+            id="octet_length",
+        ),
+        pytest.param(
+            col("c").character_length(),
+            pa.array([5, 7, 1, None], type=pa.int32()),
+            id="character_length",
+        ),
+        pytest.param(
+            col("c").char_length(),
+            pa.array([5, 7, 1, None], type=pa.int32()),
+            id="char_length",
+        ),
+        pytest.param(
+            col("c").rtrim(),
+            pa.array(["Hello", " world", "!", None], type=pa.string_view()),
+            id="rtrim",
+        ),
+        pytest.param(
+            col("c").ltrim(),
+            pa.array(["Hello", "world ", "!", None], type=pa.string_view()),
+            id="ltrim",
+        ),
+        pytest.param(
+            col("c").upper(),
+            pa.array(["HELLO", " WORLD ", "!", None], type=pa.string()),
+            id="upper",
+        ),
+        pytest.param(
+            lit(65).chr(),
+            pa.array(["A", "A", "A", "A"], type=pa.string()),
+            id="chr",
+        ),
+        #
+        # Time Functions
+        #
+        pytest.param(
+            col("b").from_unixtime(),
+            pa.array(
+                [
+                    datetime(1969, 12, 31, 23, 59, 30, tzinfo=timezone.utc),
+                    datetime(1970, 1, 1, 0, 0, 42, tzinfo=timezone.utc),
+                    datetime(1970, 1, 1, 0, 0, 0, tzinfo=timezone.utc),
+                    None,
+                ],
+                type=pa.timestamp("s"),
+            ),
+            id="from_unixtime",
+        ),
+        pytest.param(
+            col("c").initcap(),
+            pa.array(["Hello", " World ", "!", None], type=pa.string_view()),
+            id="initcap",
+        ),
+        #
+        # Array Functions
+        #
+        pytest.param(
+            col("d").array_pop_back(),
+            pa.array([[-1, 1], [5, 10, 15], [], None], 
type=pa.list_(pa.int64())),
+            id="array_pop_back",
+        ),
+        pytest.param(
+            col("d").array_pop_front(),
+            pa.array([[1, 0], [10, 15, 20], [], None], 
type=pa.list_(pa.int64())),
+            id="array_pop_front",
+        ),
+        pytest.param(
+            col("d").array_length(),
+            pa.array([3, 4, 0, None], type=pa.uint64()),
+            id="array_length",
+        ),
+        pytest.param(
+            col("d").list_length(),
+            pa.array([3, 4, 0, None], type=pa.uint64()),
+            id="list_length",
+        ),
+        pytest.param(
+            col("d").array_ndims(),
+            pa.array([1, 1, 1, None], type=pa.uint64()),
+            id="array_ndims",
+        ),
+        pytest.param(
+            col("d").list_ndims(),
+            pa.array([1, 1, 1, None], type=pa.uint64()),
+            id="list_ndims",
+        ),
+        pytest.param(
+            col("d").array_dims(),
+            pa.array([[3], [4], None, None], type=pa.list_(pa.uint64())),
+            id="array_dims",
+        ),
+        pytest.param(
+            col("d").array_empty(),
+            pa.array([False, False, True, None], type=pa.bool_()),
+            id="array_empty",
+        ),
+        pytest.param(
+            col("d").list_distinct(),
+            pa.array(
+                [[-1, 0, 1], [5, 10, 15, 20], [], None], 
type=pa.list_(pa.int64())
+            ),
+            id="list_distinct",
+        ),
+        pytest.param(
+            col("d").array_distinct(),
+            pa.array(
+                [[-1, 0, 1], [5, 10, 15, 20], [], None], 
type=pa.list_(pa.int64())
+            ),
+            id="array_distinct",
+        ),
+        pytest.param(
+            col("d").cardinality(),
+            pa.array([3, 4, None, None], type=pa.uint64()),
+            id="cardinality",
+        ),
+        pytest.param(
+            col("f").flatten(),
+            pa.array(
+                [[-1, 1, 0, 4, 4], [5, 10, 15, 20, 3], [], []],
+                type=pa.list_(pa.int64()),
+            ),
+            id="flatten",
+        ),
+        pytest.param(
+            col("d").list_dims(),
+            pa.array([[3], [4], None, None], type=pa.list_(pa.uint64())),
+            id="list_dims",
+        ),
+        pytest.param(
+            col("d").empty(),
+            pa.array([False, False, True, None], type=pa.bool_()),
+            id="empty",
+        ),
+        #
+        # Other Tests
+        #
+        pytest.param(
+            col("d").arrow_typeof(),
+            pa.array(
+                [
+                    'List(Field { name: "item", data_type: Int64, nullable: 
true, dict_id: 0, dict_is_ordered: false, metadata: {} })',  # noqa: E501
+                    'List(Field { name: "item", data_type: Int64, nullable: 
true, dict_id: 0, dict_is_ordered: false, metadata: {} })',  # noqa: E501
+                    'List(Field { name: "item", data_type: Int64, nullable: 
true, dict_id: 0, dict_is_ordered: false, metadata: {} })',  # noqa: E501
+                    'List(Field { name: "item", data_type: Int64, nullable: 
true, dict_id: 0, dict_is_ordered: false, metadata: {} })',  # noqa: E501
+                ],
+                type=pa.string(),
+            ),
+            id="arrow_typeof",
+        ),
+    ],
+)
+def test_expr_functions(ctx, function, expected_result):
+    ctx = SessionContext()
+    batch = pa.RecordBatch.from_arrays(
+        [
+            pa.array([-0.75, 0.5, 0.0, None], type=pa.float64()),
+            pa.array([-30, 42, 0, None], type=pa.int64()),
+            pa.array(["Hello", " world ", "!", None], type=pa.string_view()),
+            pa.array(
+                [[-1, 1, 0], [5, 10, 15, 20], [], None], 
type=pa.list_(pa.int64())
+            ),
+            pa.array([-0.75, float("nan"), 0.0, None], type=pa.float64()),
+            pa.array(
+                [[[-1, 1, 0], [4, 4]], [[5, 10, 15, 20], [3]], [[]], [None]],
+                type=pa.list_(pa.list_(pa.int64())),
+            ),
+        ],
+        names=["a", "b", "c", "d", "e", "f"],
+    )
+    df = ctx.create_dataframe([[batch]]).select(function)
+    result = df.collect()
+
+    assert len(result) == 1
+    assert result[0].column(0) == expected_result

Review Comment:
   ```suggestion
       assert result[0].column(0).equals(expected_result)
   ```



-- 
This is an automated message from the Apache Git Service.
To respond to the message, please log on to GitHub and use the
URL above to go to the specific comment.

To unsubscribe, e-mail: github-unsubscr...@datafusion.apache.org

For queries about this service, please contact Infrastructure at:
us...@infra.apache.org


---------------------------------------------------------------------
To unsubscribe, e-mail: github-unsubscr...@datafusion.apache.org
For additional commands, e-mail: github-h...@datafusion.apache.org

Re: [PR] add unit tests for expression functions [datafusion-python]

Reply via email to