This is an automated email from the ASF dual-hosted git repository. liuxun pushed a commit to branch main in repository https://gitbox.apache.org/repos/asf/gravitino.git
The following commit(s) were added to refs/heads/main by this push: new e9d8ee7bc [#5203] feat(client-python): porting partitions from java client (#5964) e9d8ee7bc is described below commit e9d8ee7bc05d3226c5f0ce0b492b2c207018ed73 Author: Eric Chang <e850...@gmail.com> AuthorDate: Thu Jan 9 11:32:48 2025 +0800 [#5203] feat(client-python): porting partitions from java client (#5964) ### What changes were proposed in this pull request? Porting `interface Partitions`, `interface IdentityPartition`, `interface ListPartition`, `interface RangePartition`, and `class Partitions` from java to python. Fix: #5203 ### Does this PR introduce _any_ user-facing change? Yes. ### How was this patch tested? Unit tests. --- .../expressions/partitions/identity_partition.py | 54 +++++ .../api/expressions/partitions/list_partition.py | 47 +++++ .../api/expressions/partitions/partition.py | 44 ++++ .../api/expressions/partitions/partitions.py | 231 +++++++++++++++++++++ .../api/expressions/partitions/range_partition.py | 52 +++++ .../tests/unittests/rel/test_partitions.py | 108 ++++++++++ 6 files changed, 536 insertions(+) diff --git a/clients/client-python/gravitino/api/expressions/partitions/identity_partition.py b/clients/client-python/gravitino/api/expressions/partitions/identity_partition.py new file mode 100644 index 000000000..e4b660c09 --- /dev/null +++ b/clients/client-python/gravitino/api/expressions/partitions/identity_partition.py @@ -0,0 +1,54 @@ +# Licensed to the Apache Software Foundation (ASF) under one +# or more contributor license agreements. See the NOTICE file +# distributed with this work for additional information +# regarding copyright ownership. The ASF licenses this file +# to you under the Apache License, Version 2.0 (the +# "License"); you may not use this file except in compliance +# with the License. You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, +# software distributed under the License is distributed on an +# "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY +# KIND, either express or implied. See the License for the +# specific language governing permissions and limitations +# under the License. + +from abc import abstractmethod +from typing import List, Any + +from .partition import Partition +from ..literals.literal import Literal + + +class IdentityPartition(Partition): + """ + An identity partition represents a result of identity partitioning. For example, for Hive + partition + + ``` + PARTITION (dt='2008-08-08',country='us') + ``` + + its partition name is "dt=2008-08-08/country=us", field names are [["dt"], ["country"]] and + values are ["2008-08-08", "us"]. + + APIs that are still evolving towards becoming stable APIs, and can change from one feature release to another (0.5.0 to 0.6.0). + """ + + @abstractmethod + def field_names(self) -> List[List[str]]: + """ + Returns: + List[List[str]]: A list of lists representing the field names of the identity partition. + """ + pass + + @abstractmethod + def values(self) -> List[Literal[Any]]: + """ + Returns: + List[Literal[Any]]: The values of the identity partition. + """ + pass diff --git a/clients/client-python/gravitino/api/expressions/partitions/list_partition.py b/clients/client-python/gravitino/api/expressions/partitions/list_partition.py new file mode 100644 index 000000000..8316e4daa --- /dev/null +++ b/clients/client-python/gravitino/api/expressions/partitions/list_partition.py @@ -0,0 +1,47 @@ +# Licensed to the Apache Software Foundation (ASF) under one +# or more contributor license agreements. See the NOTICE file +# distributed with this work for additional information +# regarding copyright ownership. The ASF licenses this file +# to you under the Apache License, Version 2.0 (the +# "License"); you may not use this file except in compliance +# with the License. You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, +# software distributed under the License is distributed on an +# "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY +# KIND, either express or implied. See the License for the +# specific language governing permissions and limitations +# under the License. + +from abc import abstractmethod +from typing import List, Any + +from gravitino.api.expressions.literals.literal import Literal +from gravitino.api.expressions.partitions.partition import Partition + + +class ListPartition(Partition): + """ + A list partition represents a result of list partitioning. For example, for list partition + + ``` + PARTITION p202204_California VALUES IN ( + ("2022-04-01", "Los Angeles"), + ("2022-04-01", "San Francisco") + ) + ``` + + its name is "p202204_California" and lists are [["2022-04-01","Los Angeles"], ["2022-04-01", "San Francisco"]]. + + APIs that are still evolving towards becoming stable APIs, and can change from one feature release to another (0.5.0 to 0.6.0). + """ + + @abstractmethod + def lists(self) -> List[List[Literal[Any]]]: + """ + Returns: + List[List[Literal[Any]]]: The values of the list partition. + """ + pass diff --git a/clients/client-python/gravitino/api/expressions/partitions/partition.py b/clients/client-python/gravitino/api/expressions/partitions/partition.py new file mode 100644 index 000000000..7f9a0b873 --- /dev/null +++ b/clients/client-python/gravitino/api/expressions/partitions/partition.py @@ -0,0 +1,44 @@ +# Licensed to the Apache Software Foundation (ASF) under one +# or more contributor license agreements. See the NOTICE file +# distributed with this work for additional information +# regarding copyright ownership. The ASF licenses this file +# to you under the Apache License, Version 2.0 (the +# "License"); you may not use this file except in compliance +# with the License. You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, +# software distributed under the License is distributed on an +# "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY +# KIND, either express or implied. See the License for the +# specific language governing permissions and limitations +# under the License. + +from abc import ABC, abstractmethod +from typing import Dict + + +class Partition(ABC): + """ + A partition represents a result of partitioning a table. The partition can be either a + `IdentityPartition`, `ListPartition`, or `RangePartition`. It depends on the `Table.partitioning()`. + + APIs that are still evolving towards becoming stable APIs, and can change from one feature release to another (0.5.0 to 0.6.0). + """ + + @abstractmethod + def name(self) -> str: + """ + Returns: + str: The name of the partition. + """ + pass + + @abstractmethod + def properties(self) -> Dict[str, str]: + """ + Returns: + Dict[str, str]: The properties of the partition, such as statistics, location, etc. + """ + pass diff --git a/clients/client-python/gravitino/api/expressions/partitions/partitions.py b/clients/client-python/gravitino/api/expressions/partitions/partitions.py new file mode 100644 index 000000000..6cb4b4a47 --- /dev/null +++ b/clients/client-python/gravitino/api/expressions/partitions/partitions.py @@ -0,0 +1,231 @@ +# Licensed to the Apache Software Foundation (ASF) under one +# or more contributor license agreements. See the NOTICE file +# distributed with this work for additional information +# regarding copyright ownership. The ASF licenses this file +# to you under the Apache License, Version 2.0 (the +# "License"); you may not use this file except in compliance +# with the License. You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, +# software distributed under the License is distributed on an +# "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY +# KIND, either express or implied. See the License for the +# specific language governing permissions and limitations +# under the License. + +from typing import List, Dict, Any, Optional + +from gravitino.api.expressions.literals.literal import Literal +from gravitino.api.expressions.partitions.identity_partition import IdentityPartition +from gravitino.api.expressions.partitions.list_partition import ListPartition +from gravitino.api.expressions.partitions.partition import Partition +from gravitino.api.expressions.partitions.range_partition import RangePartition + + +class Partitions: + """The helper class for partition expressions.""" + + EMPTY_PARTITIONS: List[Partition] = [] + """ + An empty array of partitions + """ + + @staticmethod + def range( + name: str, + upper: Literal[Any], + lower: Literal[Any], + properties: Optional[Dict[str, str]], + ) -> RangePartition: + """ + Creates a range partition. + + Args: + name: The name of the partition. + upper: The upper bound of the partition. + lower: The lower bound of the partition. + properties: The properties of the partition. + + Returns: + The created partition. + """ + return RangePartitionImpl(name, upper, lower, properties) + + @staticmethod + def list( + name: str, + lists: List[List[Literal[Any]]], + properties: Optional[Dict[str, str]], + ) -> ListPartition: + """ + Creates a list partition. + + Args: + name: The name of the partition. + lists: The values of the list partition. + properties: The properties of the partition. + + Returns: + The created partition. + """ + return ListPartitionImpl(name, lists, properties or {}) + + @staticmethod + def identity( + name: Optional[str], + field_names: List[List[str]], + values: List[Literal[Any]], + properties: Optional[Dict[str, str]] = None, + ) -> IdentityPartition: + """ + Creates an identity partition. + + The `values` must correspond to the `field_names`. + + Args: + name: The name of the partition. + field_names: The field names of the identity partition. + values: The value of the identity partition. + properties: The properties of the partition. + + Returns: + The created partition. + """ + return IdentityPartitionImpl(name, field_names, values, properties or {}) + + +class RangePartitionImpl(RangePartition): + """ + Represents a result of range partitioning. + """ + + def __init__( + self, + name: str, + upper: Literal[Any], + lower: Literal[Any], + properties: Optional[Dict[str, str]], + ): + self._name = name + self._upper = upper + self._lower = lower + self._properties = properties + + def upper(self) -> Literal[Any]: + """Returns the upper bound of the partition.""" + return self._upper + + def lower(self) -> Literal[Any]: + """Returns the lower bound of the partition.""" + return self._lower + + def name(self) -> str: + return self._name + + def properties(self) -> Dict[str, str]: + return self._properties + + def __eq__(self, other: Any) -> bool: + if not isinstance(other, RangePartitionImpl): + return False + return ( + self._name == other._name + and self._upper == other._upper + and self._lower == other._lower + and self._properties == other._properties + ) + + def __hash__(self) -> int: + return hash( + (self._name, self._upper, self._lower, frozenset(self._properties.items())) + ) + + +class ListPartitionImpl(ListPartition): + def __init__( + self, + name: str, + lists: List[List[Literal[Any]]], + properties: Optional[Dict[str, str]], + ): + self._name = name + self._lists = lists + self._properties = properties + + def lists(self) -> List[List[Literal[Any]]]: + """Returns the values of the list partition.""" + return self._lists + + def name(self) -> str: + return self._name + + def properties(self) -> Dict[str, str]: + return self._properties + + def __eq__(self, other: Any) -> bool: + if not isinstance(other, ListPartitionImpl): + return False + return ( + self._name == other._name + and self._lists == other._lists + and self._properties == other._properties + ) + + def __hash__(self) -> int: + return hash( + ( + self._name, + tuple(tuple(l) for l in self._lists), + frozenset(self._properties.items()), + ) + ) + + +class IdentityPartitionImpl(IdentityPartition): + def __init__( + self, + name: str, + field_names: List[List[str]], + values: List[Literal[Any]], + properties: Dict[str, str], + ): + self._name = name + self._field_names = field_names + self._values = values + self._properties = properties + + def field_names(self) -> List[List[str]]: + """Returns the field names of the identity partition.""" + return self._field_names + + def values(self) -> List[Literal[Any]]: + """Returns the values of the identity partition.""" + return self._values + + def name(self) -> str: + return self._name + + def properties(self) -> Dict[str, str]: + return self._properties + + def __eq__(self, other: Any) -> bool: + if not isinstance(other, IdentityPartitionImpl): + return False + return ( + self._name == other._name + and self._field_names == other._field_names + and self._values == other._values + and self._properties == other._properties + ) + + def __hash__(self) -> int: + return hash( + ( + self._name, + tuple(tuple(fn) for fn in self._field_names), + tuple(self._values), + frozenset(self._properties.items()), + ) + ) diff --git a/clients/client-python/gravitino/api/expressions/partitions/range_partition.py b/clients/client-python/gravitino/api/expressions/partitions/range_partition.py new file mode 100644 index 000000000..7155c033c --- /dev/null +++ b/clients/client-python/gravitino/api/expressions/partitions/range_partition.py @@ -0,0 +1,52 @@ +# Licensed to the Apache Software Foundation (ASF) under one +# or more contributor license agreements. See the NOTICE file +# distributed with this work for additional information +# regarding copyright ownership. The ASF licenses this file +# to you under the Apache License, Version 2.0 (the +# "License"); you may not use this file except in compliance +# with the License. You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, +# software distributed under the License is distributed on an +# "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY +# KIND, either express or implied. See the License for the +# specific language governing permissions and limitations +# under the License. + +from abc import abstractmethod +from typing import Any + +from gravitino.api.expressions.literals.literal import Literal +from gravitino.api.expressions.partitions.partition import Partition + + +class RangePartition(Partition): + """ + A range partition represents a result of range partitioning. For example, for range partition + + ``` + PARTITION p20200321 VALUES LESS THAN ("2020-03-22") + ``` + + its upper bound is "2020-03-22" and its lower bound is null. + + APIs that are still evolving towards becoming stable APIs, and can change from one feature release to another (0.5.0 to 0.6.0). + """ + + @abstractmethod + def upper(self) -> Literal[Any]: + """ + Returns: + Literal[Any]: The upper bound of the partition. + """ + pass + + @abstractmethod + def lower(self) -> Literal[Any]: + """ + Returns: + Literal[Any]: The lower bound of the partition. + """ + pass diff --git a/clients/client-python/tests/unittests/rel/test_partitions.py b/clients/client-python/tests/unittests/rel/test_partitions.py new file mode 100644 index 000000000..a14eb079d --- /dev/null +++ b/clients/client-python/tests/unittests/rel/test_partitions.py @@ -0,0 +1,108 @@ +# Licensed to the Apache Software Foundation (ASF) under one +# or more contributor license agreements. See the NOTICE file +# distributed with this work for additional information +# regarding copyright ownership. The ASF licenses this file +# to you under the Apache License, Version 2.0 (the +# "License"); you may not use this file except in compliance +# with the License. You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, +# software distributed under the License is distributed on an +# "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY +# KIND, either express or implied. See the License for the +# specific language governing permissions and limitations +# under the License. +import unittest +from datetime import date + +from gravitino.api.expressions.literals.literals import Literals +from gravitino.api.expressions.partitions.partitions import Partitions + + +class TestPartitions(unittest.TestCase): + def test_partitions(self): + # Test RangePartition + partition = Partitions.range( + "p0", Literals.NULL, Literals.integer_literal(6), {} + ) + self.assertEqual("p0", partition.name()) + self.assertEqual({}, partition.properties()) + self.assertEqual(Literals.NULL, partition.upper()) + self.assertEqual(Literals.integer_literal(6), partition.lower()) + + # Test ListPartition + partition = Partitions.list( + "p202204_California", + [ + [ + Literals.date_literal(date(2022, 4, 1)), + Literals.string_literal("Los Angeles"), + ], + [ + Literals.date_literal(date(2022, 4, 1)), + Literals.string_literal("San Francisco"), + ], + ], + {}, + ) + self.assertEqual("p202204_California", partition.name()) + self.assertEqual({}, partition.properties()) + self.assertEqual( + Literals.date_literal(date(2022, 4, 1)), partition.lists()[0][0] + ) + self.assertEqual( + Literals.string_literal("Los Angeles"), partition.lists()[0][1] + ) + self.assertEqual( + Literals.date_literal(date(2022, 4, 1)), partition.lists()[1][0] + ) + self.assertEqual( + Literals.string_literal("San Francisco"), partition.lists()[1][1] + ) + + # Test IdentityPartition + partition = Partitions.identity( + "dt=2008-08-08/country=us", + [["dt"], ["country"]], + [Literals.date_literal(date(2008, 8, 8)), Literals.string_literal("us")], + {"location": "/user/hive/warehouse/tpch_flat_orc_2.db/orders"}, + ) + self.assertEqual("dt=2008-08-08/country=us", partition.name()) + self.assertEqual( + {"location": "/user/hive/warehouse/tpch_flat_orc_2.db/orders"}, + partition.properties(), + ) + self.assertEqual(["dt"], partition.field_names()[0]) + self.assertEqual(["country"], partition.field_names()[1]) + self.assertEqual(Literals.date_literal(date(2008, 8, 8)), partition.values()[0]) + self.assertEqual(Literals.string_literal("us"), partition.values()[1]) + + def test_eq(self): + """ + Test the correctness of the __eq__ method. + """ + partition1 = Partitions.range( + "p1", Literals.NULL, Literals.integer_literal(6), {} + ) + partition2 = Partitions.range( + "p1", Literals.NULL, Literals.integer_literal(6), {} + ) + partition3 = Partitions.range( + "p2", Literals.NULL, Literals.integer_literal(10), {} + ) + + # Test same objects are equal + self.assertEqual(partition1, partition2) # Should be equal + self.assertNotEqual(partition1, partition3) # Should not be equal + + # Test different objects are not equal + partition4 = Partitions.range( + "p1", Literals.NULL, Literals.integer_literal(10), {} + ) + self.assertNotEqual(partition1, partition4) + + # Test comparison with different types + self.assertNotEqual(partition1, "not_a_partition") # Different type + self.assertNotEqual(partition1, None) # NoneType