This is an automated email from the ASF dual-hosted git repository.

liuxun pushed a commit to branch main
in repository https://gitbox.apache.org/repos/asf/gravitino.git


The following commit(s) were added to refs/heads/main by this push:
     new e9d8ee7bc [#5203] feat(client-python): porting partitions from java 
client (#5964)
e9d8ee7bc is described below

commit e9d8ee7bc05d3226c5f0ce0b492b2c207018ed73
Author: Eric Chang <e850...@gmail.com>
AuthorDate: Thu Jan 9 11:32:48 2025 +0800

    [#5203] feat(client-python): porting partitions from java client (#5964)
    
    ### What changes were proposed in this pull request?
    
    Porting `interface Partitions`, `interface IdentityPartition`,
    `interface ListPartition`, `interface RangePartition`, and `class
    Partitions` from java to python.
    
    Fix: #5203
    
    ### Does this PR introduce _any_ user-facing change?
    
    Yes.
    
    ### How was this patch tested?
    
    Unit tests.
---
 .../expressions/partitions/identity_partition.py   |  54 +++++
 .../api/expressions/partitions/list_partition.py   |  47 +++++
 .../api/expressions/partitions/partition.py        |  44 ++++
 .../api/expressions/partitions/partitions.py       | 231 +++++++++++++++++++++
 .../api/expressions/partitions/range_partition.py  |  52 +++++
 .../tests/unittests/rel/test_partitions.py         | 108 ++++++++++
 6 files changed, 536 insertions(+)

diff --git 
a/clients/client-python/gravitino/api/expressions/partitions/identity_partition.py
 
b/clients/client-python/gravitino/api/expressions/partitions/identity_partition.py
new file mode 100644
index 000000000..e4b660c09
--- /dev/null
+++ 
b/clients/client-python/gravitino/api/expressions/partitions/identity_partition.py
@@ -0,0 +1,54 @@
+# Licensed to the Apache Software Foundation (ASF) under one
+# or more contributor license agreements.  See the NOTICE file
+# distributed with this work for additional information
+# regarding copyright ownership.  The ASF licenses this file
+# to you under the Apache License, Version 2.0 (the
+# "License"); you may not use this file except in compliance
+# with the License.  You may obtain a copy of the License at
+#
+#   http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing,
+# software distributed under the License is distributed on an
+# "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+# KIND, either express or implied.  See the License for the
+# specific language governing permissions and limitations
+# under the License.
+
+from abc import abstractmethod
+from typing import List, Any
+
+from .partition import Partition
+from ..literals.literal import Literal
+
+
+class IdentityPartition(Partition):
+    """
+    An identity partition represents a result of identity partitioning. For 
example, for Hive
+    partition
+
+    ```
+    PARTITION (dt='2008-08-08',country='us')
+    ```
+
+    its partition name is "dt=2008-08-08/country=us", field names are [["dt"], 
["country"]] and
+    values are ["2008-08-08", "us"].
+
+    APIs that are still evolving towards becoming stable APIs, and can change 
from one feature release to another (0.5.0 to 0.6.0).
+    """
+
+    @abstractmethod
+    def field_names(self) -> List[List[str]]:
+        """
+        Returns:
+            List[List[str]]: A list of lists representing the field names of 
the identity partition.
+        """
+        pass
+
+    @abstractmethod
+    def values(self) -> List[Literal[Any]]:
+        """
+        Returns:
+            List[Literal[Any]]: The values of the identity partition.
+        """
+        pass
diff --git 
a/clients/client-python/gravitino/api/expressions/partitions/list_partition.py 
b/clients/client-python/gravitino/api/expressions/partitions/list_partition.py
new file mode 100644
index 000000000..8316e4daa
--- /dev/null
+++ 
b/clients/client-python/gravitino/api/expressions/partitions/list_partition.py
@@ -0,0 +1,47 @@
+# Licensed to the Apache Software Foundation (ASF) under one
+# or more contributor license agreements.  See the NOTICE file
+# distributed with this work for additional information
+# regarding copyright ownership.  The ASF licenses this file
+# to you under the Apache License, Version 2.0 (the
+# "License"); you may not use this file except in compliance
+# with the License.  You may obtain a copy of the License at
+#
+#   http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing,
+# software distributed under the License is distributed on an
+# "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+# KIND, either express or implied.  See the License for the
+# specific language governing permissions and limitations
+# under the License.
+
+from abc import abstractmethod
+from typing import List, Any
+
+from gravitino.api.expressions.literals.literal import Literal
+from gravitino.api.expressions.partitions.partition import Partition
+
+
+class ListPartition(Partition):
+    """
+    A list partition represents a result of list partitioning. For example, 
for list partition
+
+    ```
+    PARTITION p202204_California VALUES IN (
+      ("2022-04-01", "Los Angeles"),
+      ("2022-04-01", "San Francisco")
+    )
+    ```
+
+    its name is "p202204_California" and lists are [["2022-04-01","Los 
Angeles"], ["2022-04-01", "San Francisco"]].
+
+    APIs that are still evolving towards becoming stable APIs, and can change 
from one feature release to another (0.5.0 to 0.6.0).
+    """
+
+    @abstractmethod
+    def lists(self) -> List[List[Literal[Any]]]:
+        """
+        Returns:
+            List[List[Literal[Any]]]: The values of the list partition.
+        """
+        pass
diff --git 
a/clients/client-python/gravitino/api/expressions/partitions/partition.py 
b/clients/client-python/gravitino/api/expressions/partitions/partition.py
new file mode 100644
index 000000000..7f9a0b873
--- /dev/null
+++ b/clients/client-python/gravitino/api/expressions/partitions/partition.py
@@ -0,0 +1,44 @@
+# Licensed to the Apache Software Foundation (ASF) under one
+# or more contributor license agreements.  See the NOTICE file
+# distributed with this work for additional information
+# regarding copyright ownership.  The ASF licenses this file
+# to you under the Apache License, Version 2.0 (the
+# "License"); you may not use this file except in compliance
+# with the License.  You may obtain a copy of the License at
+#
+#   http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing,
+# software distributed under the License is distributed on an
+# "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+# KIND, either express or implied.  See the License for the
+# specific language governing permissions and limitations
+# under the License.
+
+from abc import ABC, abstractmethod
+from typing import Dict
+
+
+class Partition(ABC):
+    """
+    A partition represents a result of partitioning a table. The partition can 
be either a
+    `IdentityPartition`, `ListPartition`, or `RangePartition`. It depends on 
the `Table.partitioning()`.
+
+    APIs that are still evolving towards becoming stable APIs, and can change 
from one feature release to another (0.5.0 to 0.6.0).
+    """
+
+    @abstractmethod
+    def name(self) -> str:
+        """
+        Returns:
+            str: The name of the partition.
+        """
+        pass
+
+    @abstractmethod
+    def properties(self) -> Dict[str, str]:
+        """
+        Returns:
+            Dict[str, str]: The properties of the partition, such as 
statistics, location, etc.
+        """
+        pass
diff --git 
a/clients/client-python/gravitino/api/expressions/partitions/partitions.py 
b/clients/client-python/gravitino/api/expressions/partitions/partitions.py
new file mode 100644
index 000000000..6cb4b4a47
--- /dev/null
+++ b/clients/client-python/gravitino/api/expressions/partitions/partitions.py
@@ -0,0 +1,231 @@
+# Licensed to the Apache Software Foundation (ASF) under one
+# or more contributor license agreements.  See the NOTICE file
+# distributed with this work for additional information
+# regarding copyright ownership.  The ASF licenses this file
+# to you under the Apache License, Version 2.0 (the
+# "License"); you may not use this file except in compliance
+# with the License.  You may obtain a copy of the License at
+#
+#  http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing,
+# software distributed under the License is distributed on an
+# "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+# KIND, either express or implied.  See the License for the
+# specific language governing permissions and limitations
+# under the License.
+
+from typing import List, Dict, Any, Optional
+
+from gravitino.api.expressions.literals.literal import Literal
+from gravitino.api.expressions.partitions.identity_partition import 
IdentityPartition
+from gravitino.api.expressions.partitions.list_partition import ListPartition
+from gravitino.api.expressions.partitions.partition import Partition
+from gravitino.api.expressions.partitions.range_partition import RangePartition
+
+
+class Partitions:
+    """The helper class for partition expressions."""
+
+    EMPTY_PARTITIONS: List[Partition] = []
+    """
+    An empty array of partitions
+    """
+
+    @staticmethod
+    def range(
+        name: str,
+        upper: Literal[Any],
+        lower: Literal[Any],
+        properties: Optional[Dict[str, str]],
+    ) -> RangePartition:
+        """
+        Creates a range partition.
+
+        Args:
+            name: The name of the partition.
+            upper: The upper bound of the partition.
+            lower: The lower bound of the partition.
+            properties: The properties of the partition.
+
+        Returns:
+            The created partition.
+        """
+        return RangePartitionImpl(name, upper, lower, properties)
+
+    @staticmethod
+    def list(
+        name: str,
+        lists: List[List[Literal[Any]]],
+        properties: Optional[Dict[str, str]],
+    ) -> ListPartition:
+        """
+        Creates a list partition.
+
+        Args:
+            name: The name of the partition.
+            lists: The values of the list partition.
+            properties: The properties of the partition.
+
+        Returns:
+            The created partition.
+        """
+        return ListPartitionImpl(name, lists, properties or {})
+
+    @staticmethod
+    def identity(
+        name: Optional[str],
+        field_names: List[List[str]],
+        values: List[Literal[Any]],
+        properties: Optional[Dict[str, str]] = None,
+    ) -> IdentityPartition:
+        """
+        Creates an identity partition.
+
+        The `values` must correspond to the `field_names`.
+
+        Args:
+            name: The name of the partition.
+            field_names: The field names of the identity partition.
+            values: The value of the identity partition.
+            properties: The properties of the partition.
+
+        Returns:
+            The created partition.
+        """
+        return IdentityPartitionImpl(name, field_names, values, properties or 
{})
+
+
+class RangePartitionImpl(RangePartition):
+    """
+    Represents a result of range partitioning.
+    """
+
+    def __init__(
+        self,
+        name: str,
+        upper: Literal[Any],
+        lower: Literal[Any],
+        properties: Optional[Dict[str, str]],
+    ):
+        self._name = name
+        self._upper = upper
+        self._lower = lower
+        self._properties = properties
+
+    def upper(self) -> Literal[Any]:
+        """Returns the upper bound of the partition."""
+        return self._upper
+
+    def lower(self) -> Literal[Any]:
+        """Returns the lower bound of the partition."""
+        return self._lower
+
+    def name(self) -> str:
+        return self._name
+
+    def properties(self) -> Dict[str, str]:
+        return self._properties
+
+    def __eq__(self, other: Any) -> bool:
+        if not isinstance(other, RangePartitionImpl):
+            return False
+        return (
+            self._name == other._name
+            and self._upper == other._upper
+            and self._lower == other._lower
+            and self._properties == other._properties
+        )
+
+    def __hash__(self) -> int:
+        return hash(
+            (self._name, self._upper, self._lower, 
frozenset(self._properties.items()))
+        )
+
+
+class ListPartitionImpl(ListPartition):
+    def __init__(
+        self,
+        name: str,
+        lists: List[List[Literal[Any]]],
+        properties: Optional[Dict[str, str]],
+    ):
+        self._name = name
+        self._lists = lists
+        self._properties = properties
+
+    def lists(self) -> List[List[Literal[Any]]]:
+        """Returns the values of the list partition."""
+        return self._lists
+
+    def name(self) -> str:
+        return self._name
+
+    def properties(self) -> Dict[str, str]:
+        return self._properties
+
+    def __eq__(self, other: Any) -> bool:
+        if not isinstance(other, ListPartitionImpl):
+            return False
+        return (
+            self._name == other._name
+            and self._lists == other._lists
+            and self._properties == other._properties
+        )
+
+    def __hash__(self) -> int:
+        return hash(
+            (
+                self._name,
+                tuple(tuple(l) for l in self._lists),
+                frozenset(self._properties.items()),
+            )
+        )
+
+
+class IdentityPartitionImpl(IdentityPartition):
+    def __init__(
+        self,
+        name: str,
+        field_names: List[List[str]],
+        values: List[Literal[Any]],
+        properties: Dict[str, str],
+    ):
+        self._name = name
+        self._field_names = field_names
+        self._values = values
+        self._properties = properties
+
+    def field_names(self) -> List[List[str]]:
+        """Returns the field names of the identity partition."""
+        return self._field_names
+
+    def values(self) -> List[Literal[Any]]:
+        """Returns the values of the identity partition."""
+        return self._values
+
+    def name(self) -> str:
+        return self._name
+
+    def properties(self) -> Dict[str, str]:
+        return self._properties
+
+    def __eq__(self, other: Any) -> bool:
+        if not isinstance(other, IdentityPartitionImpl):
+            return False
+        return (
+            self._name == other._name
+            and self._field_names == other._field_names
+            and self._values == other._values
+            and self._properties == other._properties
+        )
+
+    def __hash__(self) -> int:
+        return hash(
+            (
+                self._name,
+                tuple(tuple(fn) for fn in self._field_names),
+                tuple(self._values),
+                frozenset(self._properties.items()),
+            )
+        )
diff --git 
a/clients/client-python/gravitino/api/expressions/partitions/range_partition.py 
b/clients/client-python/gravitino/api/expressions/partitions/range_partition.py
new file mode 100644
index 000000000..7155c033c
--- /dev/null
+++ 
b/clients/client-python/gravitino/api/expressions/partitions/range_partition.py
@@ -0,0 +1,52 @@
+# Licensed to the Apache Software Foundation (ASF) under one
+# or more contributor license agreements.  See the NOTICE file
+# distributed with this work for additional information
+# regarding copyright ownership.  The ASF licenses this file
+# to you under the Apache License, Version 2.0 (the
+# "License"); you may not use this file except in compliance
+# with the License.  You may obtain a copy of the License at
+#
+#   http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing,
+# software distributed under the License is distributed on an
+# "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+# KIND, either express or implied.  See the License for the
+# specific language governing permissions and limitations
+# under the License.
+
+from abc import abstractmethod
+from typing import Any
+
+from gravitino.api.expressions.literals.literal import Literal
+from gravitino.api.expressions.partitions.partition import Partition
+
+
+class RangePartition(Partition):
+    """
+    A range partition represents a result of range partitioning. For example, 
for range partition
+
+    ```
+    PARTITION p20200321 VALUES LESS THAN ("2020-03-22")
+    ```
+
+    its upper bound is "2020-03-22" and its lower bound is null.
+
+    APIs that are still evolving towards becoming stable APIs, and can change 
from one feature release to another (0.5.0 to 0.6.0).
+    """
+
+    @abstractmethod
+    def upper(self) -> Literal[Any]:
+        """
+        Returns:
+            Literal[Any]: The upper bound of the partition.
+        """
+        pass
+
+    @abstractmethod
+    def lower(self) -> Literal[Any]:
+        """
+        Returns:
+            Literal[Any]: The lower bound of the partition.
+        """
+        pass
diff --git a/clients/client-python/tests/unittests/rel/test_partitions.py 
b/clients/client-python/tests/unittests/rel/test_partitions.py
new file mode 100644
index 000000000..a14eb079d
--- /dev/null
+++ b/clients/client-python/tests/unittests/rel/test_partitions.py
@@ -0,0 +1,108 @@
+# Licensed to the Apache Software Foundation (ASF) under one
+# or more contributor license agreements.  See the NOTICE file
+# distributed with this work for additional information
+# regarding copyright ownership.  The ASF licenses this file
+# to you under the Apache License, Version 2.0 (the
+# "License"); you may not use this file except in compliance
+# with the License.  You may obtain a copy of the License at
+#
+#   http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing,
+# software distributed under the License is distributed on an
+# "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+# KIND, either express or implied.  See the License for the
+# specific language governing permissions and limitations
+# under the License.
+import unittest
+from datetime import date
+
+from gravitino.api.expressions.literals.literals import Literals
+from gravitino.api.expressions.partitions.partitions import Partitions
+
+
+class TestPartitions(unittest.TestCase):
+    def test_partitions(self):
+        # Test RangePartition
+        partition = Partitions.range(
+            "p0", Literals.NULL, Literals.integer_literal(6), {}
+        )
+        self.assertEqual("p0", partition.name())
+        self.assertEqual({}, partition.properties())
+        self.assertEqual(Literals.NULL, partition.upper())
+        self.assertEqual(Literals.integer_literal(6), partition.lower())
+
+        # Test ListPartition
+        partition = Partitions.list(
+            "p202204_California",
+            [
+                [
+                    Literals.date_literal(date(2022, 4, 1)),
+                    Literals.string_literal("Los Angeles"),
+                ],
+                [
+                    Literals.date_literal(date(2022, 4, 1)),
+                    Literals.string_literal("San Francisco"),
+                ],
+            ],
+            {},
+        )
+        self.assertEqual("p202204_California", partition.name())
+        self.assertEqual({}, partition.properties())
+        self.assertEqual(
+            Literals.date_literal(date(2022, 4, 1)), partition.lists()[0][0]
+        )
+        self.assertEqual(
+            Literals.string_literal("Los Angeles"), partition.lists()[0][1]
+        )
+        self.assertEqual(
+            Literals.date_literal(date(2022, 4, 1)), partition.lists()[1][0]
+        )
+        self.assertEqual(
+            Literals.string_literal("San Francisco"), partition.lists()[1][1]
+        )
+
+        # Test IdentityPartition
+        partition = Partitions.identity(
+            "dt=2008-08-08/country=us",
+            [["dt"], ["country"]],
+            [Literals.date_literal(date(2008, 8, 8)), 
Literals.string_literal("us")],
+            {"location": "/user/hive/warehouse/tpch_flat_orc_2.db/orders"},
+        )
+        self.assertEqual("dt=2008-08-08/country=us", partition.name())
+        self.assertEqual(
+            {"location": "/user/hive/warehouse/tpch_flat_orc_2.db/orders"},
+            partition.properties(),
+        )
+        self.assertEqual(["dt"], partition.field_names()[0])
+        self.assertEqual(["country"], partition.field_names()[1])
+        self.assertEqual(Literals.date_literal(date(2008, 8, 8)), 
partition.values()[0])
+        self.assertEqual(Literals.string_literal("us"), partition.values()[1])
+
+    def test_eq(self):
+        """
+        Test the correctness of the __eq__ method.
+        """
+        partition1 = Partitions.range(
+            "p1", Literals.NULL, Literals.integer_literal(6), {}
+        )
+        partition2 = Partitions.range(
+            "p1", Literals.NULL, Literals.integer_literal(6), {}
+        )
+        partition3 = Partitions.range(
+            "p2", Literals.NULL, Literals.integer_literal(10), {}
+        )
+
+        # Test same objects are equal
+        self.assertEqual(partition1, partition2)  # Should be equal
+        self.assertNotEqual(partition1, partition3)  # Should not be equal
+
+        # Test different objects are not equal
+        partition4 = Partitions.range(
+            "p1", Literals.NULL, Literals.integer_literal(10), {}
+        )
+        self.assertNotEqual(partition1, partition4)
+
+        # Test comparison with different types
+        self.assertNotEqual(partition1, "not_a_partition")  # Different type
+        self.assertNotEqual(partition1, None)  # NoneType

Reply via email to