This is an automated email from the ASF dual-hosted git repository.
jshao pushed a commit to branch main
in repository https://gitbox.apache.org/repos/asf/gravitino.git
The following commit(s) were added to refs/heads/main by this push:
new 2b88148375 [#5199] feat(client-python): Add Partition DTOs (#7902)
2b88148375 is described below
commit 2b88148375a80a7e1034cc7d8aa8f2793293c3d0
Author: George T. C. Lai <[email protected]>
AuthorDate: Thu Aug 7 12:07:20 2025 +0800
[#5199] feat(client-python): Add Partition DTOs (#7902)
### What changes were proposed in this pull request?
This PR is aimed at implementing the following Java classes in
client-python.
- PartitionDTO.java
- IdentityPartitionDTO.java
- RangePartitionDTO.java
- ListPartitionDTO.java
**NOTE** that the implementation does not include the `Builder` since it
seems not necessarily to be implemented in Python. We can have further
discussion as to enabling the `Builder` if a reviewer thinks we have to.
### Why are the changes needed?
We need to support table partitioning, bucketing and sort ordering and
indexes
#5199
### Does this PR introduce _any_ user-facing change?
No
### How was this patch tested?
Unit tests
---------
Signed-off-by: George T. C. Lai <[email protected]>
---
.../gravitino/dto/rel/partitions/__init__.py | 16 ++
.../dto/rel/partitions/identity_partition_dto.py | 75 ++++++++
.../dto/rel/partitions/list_partition_dto.py | 66 ++++++++
.../gravitino/dto/rel/partitions/partition_dto.py | 45 +++++
.../dto/rel/partitions/range_partition_dto.py | 71 ++++++++
.../tests/unittests/dto/rel/test_partition_dtos.py | 188 +++++++++++++++++++++
6 files changed, 461 insertions(+)
diff --git a/clients/client-python/gravitino/dto/rel/partitions/__init__.py
b/clients/client-python/gravitino/dto/rel/partitions/__init__.py
new file mode 100644
index 0000000000..13a83393a9
--- /dev/null
+++ b/clients/client-python/gravitino/dto/rel/partitions/__init__.py
@@ -0,0 +1,16 @@
+# Licensed to the Apache Software Foundation (ASF) under one
+# or more contributor license agreements. See the NOTICE file
+# distributed with this work for additional information
+# regarding copyright ownership. The ASF licenses this file
+# to you under the Apache License, Version 2.0 (the
+# "License"); you may not use this file except in compliance
+# with the License. You may obtain a copy of the License at
+#
+# http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing,
+# software distributed under the License is distributed on an
+# "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+# KIND, either express or implied. See the License for the
+# specific language governing permissions and limitations
+# under the License.
diff --git
a/clients/client-python/gravitino/dto/rel/partitions/identity_partition_dto.py
b/clients/client-python/gravitino/dto/rel/partitions/identity_partition_dto.py
new file mode 100644
index 0000000000..ab675cd769
--- /dev/null
+++
b/clients/client-python/gravitino/dto/rel/partitions/identity_partition_dto.py
@@ -0,0 +1,75 @@
+# Licensed to the Apache Software Foundation (ASF) under one
+# or more contributor license agreements. See the NOTICE file
+# distributed with this work for additional information
+# regarding copyright ownership. The ASF licenses this file
+# to you under the Apache License, Version 2.0 (the
+# "License"); you may not use this file except in compliance
+# with the License. You may obtain a copy of the License at
+#
+# http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing,
+# software distributed under the License is distributed on an
+# "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+# KIND, either express or implied. See the License for the
+# specific language governing permissions and limitations
+# under the License.
+
+
+from typing import Dict, List
+
+from gravitino.api.expressions.partitions.identity_partition import
IdentityPartition
+from gravitino.dto.rel.expressions.literal_dto import LiteralDTO
+from gravitino.dto.rel.partitions.partition_dto import PartitionDTO
+
+
+class IdentityPartitionDTO(PartitionDTO, IdentityPartition):
+ """Represents an Identity Partition Data Transfer Object (DTO) that
implements the IdentityPartition interface."""
+
+ def __init__(
+ self,
+ name: str,
+ values: List[LiteralDTO],
+ field_names: List[List[str]],
+ properties: Dict[str, str],
+ ):
+ self._name = name
+ self._values = values
+ self._field_names = field_names
+ self._properties = properties
+
+ def name(self) -> str:
+ return self._name
+
+ def values(self) -> List[LiteralDTO]:
+ return self._values
+
+ def field_names(self) -> List[List[str]]:
+ return self._field_names
+
+ def properties(self) -> Dict[str, str]:
+ return self._properties
+
+ def type(self) -> PartitionDTO.Type:
+ return self.Type.IDENTITY
+
+ def __eq__(self, value: object) -> bool:
+ if not isinstance(value, IdentityPartitionDTO):
+ return False
+ return (
+ self is value
+ or self._name == value.name()
+ and self._values == value.values()
+ and self._field_names == value.field_names()
+ and self._properties == value.properties()
+ )
+
+ def __hash__(self) -> int:
+ return hash(
+ (
+ self._name,
+ tuple(self._values),
+ tuple(tuple(field) for field in self._field_names),
+ tuple(self._properties.items()),
+ )
+ )
diff --git
a/clients/client-python/gravitino/dto/rel/partitions/list_partition_dto.py
b/clients/client-python/gravitino/dto/rel/partitions/list_partition_dto.py
new file mode 100644
index 0000000000..9ccd542b75
--- /dev/null
+++ b/clients/client-python/gravitino/dto/rel/partitions/list_partition_dto.py
@@ -0,0 +1,66 @@
+# Licensed to the Apache Software Foundation (ASF) under one
+# or more contributor license agreements. See the NOTICE file
+# distributed with this work for additional information
+# regarding copyright ownership. The ASF licenses this file
+# to you under the Apache License, Version 2.0 (the
+# "License"); you may not use this file except in compliance
+# with the License. You may obtain a copy of the License at
+#
+# http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing,
+# software distributed under the License is distributed on an
+# "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+# KIND, either express or implied. See the License for the
+# specific language governing permissions and limitations
+# under the License.
+
+
+from typing import Dict, List
+
+from gravitino.api.expressions.partitions.list_partition import ListPartition
+from gravitino.dto.rel.expressions.literal_dto import LiteralDTO
+from gravitino.dto.rel.partitions.partition_dto import PartitionDTO
+
+
+class ListPartitionDTO(PartitionDTO, ListPartition):
+ """Represents a List Partition Data Transfer Object (DTO) that implements
the ListPartition interface."""
+
+ def __init__(
+ self, name: str, lists: List[List[LiteralDTO]], properties: Dict[str,
str]
+ ):
+ self._name = name
+ self._lists = lists
+ self._properties = properties
+
+ def name(self) -> str:
+ return self._name
+
+ def lists(self) -> List[List[LiteralDTO]]:
+ return self._lists
+
+ def properties(self) -> Dict[str, str]:
+ return self._properties
+
+ def type(self) -> PartitionDTO.Type:
+ return self.Type.LIST
+
+ def __eq__(self, value: object) -> bool:
+ if not isinstance(value, ListPartitionDTO):
+ return False
+
+ return (
+ self is value
+ or self._name == value.name()
+ and self._properties == value.properties()
+ and self._lists == value.lists()
+ )
+
+ def __hash__(self) -> int:
+ return hash(
+ (
+ self._name,
+ tuple(self._properties.items()),
+ tuple(tuple(item) for item in self._lists),
+ )
+ )
diff --git
a/clients/client-python/gravitino/dto/rel/partitions/partition_dto.py
b/clients/client-python/gravitino/dto/rel/partitions/partition_dto.py
new file mode 100644
index 0000000000..cbfddd0ab5
--- /dev/null
+++ b/clients/client-python/gravitino/dto/rel/partitions/partition_dto.py
@@ -0,0 +1,45 @@
+# Licensed to the Apache Software Foundation (ASF) under one
+# or more contributor license agreements. See the NOTICE file
+# distributed with this work for additional information
+# regarding copyright ownership. The ASF licenses this file
+# to you under the Apache License, Version 2.0 (the
+# "License"); you may not use this file except in compliance
+# with the License. You may obtain a copy of the License at
+#
+# http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing,
+# software distributed under the License is distributed on an
+# "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+# KIND, either express or implied. See the License for the
+# specific language governing permissions and limitations
+# under the License.
+
+
+from abc import abstractmethod
+from enum import Enum
+
+from gravitino.api.expressions.partitions.partition import Partition
+
+
+class PartitionDTO(Partition):
+ """Represents a Partition Data Transfer Object (DTO) that implements the
Partition interface."""
+
+ class Type(Enum):
+ """Type of the partition."""
+
+ RANGE = "range"
+ """The range partition type."""
+ LIST = "list"
+ """The list partition type."""
+ IDENTITY = "identity"
+ """The identity partition type."""
+
+ @abstractmethod
+ def type(self) -> Type:
+ """Gets the type of the partition.
+
+ Returns:
+ Type: The type of the partition.
+ """
+ pass # pragma: no cover
diff --git
a/clients/client-python/gravitino/dto/rel/partitions/range_partition_dto.py
b/clients/client-python/gravitino/dto/rel/partitions/range_partition_dto.py
new file mode 100644
index 0000000000..967ff1a2b7
--- /dev/null
+++ b/clients/client-python/gravitino/dto/rel/partitions/range_partition_dto.py
@@ -0,0 +1,71 @@
+# Licensed to the Apache Software Foundation (ASF) under one
+# or more contributor license agreements. See the NOTICE file
+# distributed with this work for additional information
+# regarding copyright ownership. The ASF licenses this file
+# to you under the Apache License, Version 2.0 (the
+# "License"); you may not use this file except in compliance
+# with the License. You may obtain a copy of the License at
+#
+# http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing,
+# software distributed under the License is distributed on an
+# "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+# KIND, either express or implied. See the License for the
+# specific language governing permissions and limitations
+# under the License.
+
+
+from typing import Dict
+
+from gravitino.api.expressions.partitions.range_partition import RangePartition
+from gravitino.dto.rel.expressions.literal_dto import LiteralDTO
+from gravitino.dto.rel.partitions.partition_dto import PartitionDTO
+
+
+class RangePartitionDTO(PartitionDTO, RangePartition):
+ """Data transfer object representing a range partition."""
+
+ def __init__(
+ self,
+ name: str,
+ properties: Dict[str, str],
+ upper: LiteralDTO,
+ lower: LiteralDTO,
+ ):
+ self._name = name
+ self._properties = properties
+ self._upper = upper
+ self._lower = lower
+
+ def name(self) -> str:
+ return self._name
+
+ def properties(self) -> Dict[str, str]:
+ return self._properties
+
+ def upper(self) -> LiteralDTO:
+ return self._upper
+
+ def lower(self) -> LiteralDTO:
+ return self._lower
+
+ def type(self) -> PartitionDTO.Type:
+ return self.Type.RANGE
+
+ def __eq__(self, value: object) -> bool:
+ if not isinstance(value, RangePartitionDTO):
+ return False
+
+ return (
+ self is value
+ or self._name == value.name()
+ and self._properties == value.properties()
+ and self._upper == value.upper()
+ and self._lower == value.lower()
+ )
+
+ def __hash__(self) -> int:
+ return hash(
+ (self._name, tuple(self._properties.items()), self._upper,
self._lower)
+ )
diff --git
a/clients/client-python/tests/unittests/dto/rel/test_partition_dtos.py
b/clients/client-python/tests/unittests/dto/rel/test_partition_dtos.py
new file mode 100644
index 0000000000..9bcf9f106a
--- /dev/null
+++ b/clients/client-python/tests/unittests/dto/rel/test_partition_dtos.py
@@ -0,0 +1,188 @@
+# Licensed to the Apache Software Foundation (ASF) under one
+# or more contributor license agreements. See the NOTICE file
+# distributed with this work for additional information
+# regarding copyright ownership. The ASF licenses this file
+# to you under the Apache License, Version 2.0 (the
+# "License"); you may not use this file except in compliance
+# with the License. You may obtain a copy of the License at
+#
+# http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing,
+# software distributed under the License is distributed on an
+# "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+# KIND, either express or implied. See the License for the
+# specific language governing permissions and limitations
+# under the License.
+
+import unittest
+
+from gravitino.api.types.types import Types
+from gravitino.dto.rel.expressions.literal_dto import LiteralDTO
+from gravitino.dto.rel.partitions.identity_partition_dto import
IdentityPartitionDTO
+from gravitino.dto.rel.partitions.list_partition_dto import ListPartitionDTO
+from gravitino.dto.rel.partitions.partition_dto import PartitionDTO
+from gravitino.dto.rel.partitions.range_partition_dto import RangePartitionDTO
+
+
+class TestPartitionDTOs(unittest.TestCase):
+ def test_identity_partition_dto(self):
+ partition_name = "dt=2025-08-08/country=us"
+ field_names = [["dt"], ["country"]]
+ properties = {}
+ values = [
+ LiteralDTO.builder()
+ .with_data_type(data_type=Types.DateType.get())
+ .with_value(value="2025-08-08")
+ .build(),
+ LiteralDTO.builder()
+ .with_data_type(data_type=Types.StringType.get())
+ .with_value(value="us")
+ .build(),
+ ]
+ dto = IdentityPartitionDTO(
+ name=partition_name,
+ field_names=field_names,
+ values=values,
+ properties=properties,
+ )
+
+ similar_dto = IdentityPartitionDTO(
+ name=partition_name,
+ field_names=field_names,
+ values=values,
+ properties=properties,
+ )
+
+ different_dto = IdentityPartitionDTO(
+ name="different_partition",
+ field_names=field_names,
+ values=values,
+ properties={},
+ )
+
+ dtos = {dto: 1, similar_dto: 2, different_dto: 3}
+
+ self.assertIsInstance(dto, IdentityPartitionDTO)
+ self.assertIs(dto.type(), PartitionDTO.Type.IDENTITY)
+ self.assertEqual(dto.name(), partition_name)
+ self.assertListEqual(dto.field_names(), field_names)
+ self.assertListEqual(dto.values(), values)
+ self.assertDictEqual(dto.properties(), properties)
+
+ self.assertTrue(dto == similar_dto)
+ self.assertFalse(dto == different_dto)
+ self.assertFalse(dto == "dummy_string")
+
+ self.assertEqual(len(dtos), 2)
+ self.assertEqual(dtos[dto], 2)
+
+ def test_list_partition_dto(self):
+ partition_name = "p202508_California"
+ properties = {}
+ lists = [
+ [
+ LiteralDTO.builder()
+ .with_data_type(data_type=Types.DateType.get())
+ .with_value(value="2025-08-08")
+ .build(),
+ LiteralDTO.builder()
+ .with_data_type(data_type=Types.StringType.get())
+ .with_value(value="Los Angeles")
+ .build(),
+ ],
+ [
+ LiteralDTO.builder()
+ .with_data_type(data_type=Types.DateType.get())
+ .with_value(value="2025-08-08")
+ .build(),
+ LiteralDTO.builder()
+ .with_data_type(data_type=Types.StringType.get())
+ .with_value(value="San Francisco")
+ .build(),
+ ],
+ ]
+ dto = ListPartitionDTO(
+ name=partition_name,
+ lists=lists,
+ properties=properties,
+ )
+
+ similar_dto = ListPartitionDTO(
+ name=partition_name,
+ lists=lists,
+ properties=properties,
+ )
+
+ different_dto = ListPartitionDTO(
+ name="different_partition",
+ lists=lists,
+ properties=properties,
+ )
+
+ dtos = {dto: 1, similar_dto: 2, different_dto: 3}
+
+ self.assertIsInstance(dto, ListPartitionDTO)
+ self.assertIs(dto.type(), PartitionDTO.Type.LIST)
+ self.assertEqual(dto.name(), partition_name)
+ self.assertListEqual(dto.lists(), lists)
+ self.assertDictEqual(dto.properties(), properties)
+
+ self.assertTrue(dto == similar_dto)
+ self.assertFalse(dto == different_dto)
+ self.assertFalse(dto == "dummy_string")
+
+ self.assertEqual(len(dtos), 2)
+ self.assertEqual(dtos[dto], 2)
+
+ def test_range_partition_dto(self):
+ partition_name = "p20250808"
+ properties = {}
+ upper = (
+ LiteralDTO.builder()
+ .with_data_type(data_type=Types.DateType.get())
+ .with_value(value="2025-08-08")
+ .build()
+ )
+ lower = (
+ LiteralDTO.builder()
+ .with_data_type(data_type=Types.NullType.get())
+ .with_value(value="null")
+ .build()
+ )
+ dto = RangePartitionDTO(
+ name=partition_name,
+ properties=properties,
+ upper=upper,
+ lower=lower,
+ )
+
+ similar_dto = RangePartitionDTO(
+ name=partition_name,
+ properties=properties,
+ upper=upper,
+ lower=lower,
+ )
+
+ different_dto = RangePartitionDTO(
+ name="different_partition",
+ properties=properties,
+ upper=upper,
+ lower=lower,
+ )
+
+ dtos = {dto: 1, similar_dto: 2, different_dto: 3}
+
+ self.assertIsInstance(dto, RangePartitionDTO)
+ self.assertIs(dto.type(), PartitionDTO.Type.RANGE)
+ self.assertEqual(dto.name(), partition_name)
+ self.assertEqual(dto.upper(), upper)
+ self.assertEqual(dto.lower(), lower)
+ self.assertDictEqual(dto.properties(), properties)
+
+ self.assertTrue(dto == similar_dto)
+ self.assertFalse(dto == different_dto)
+ self.assertFalse(dto == "dummy_string")
+
+ self.assertEqual(len(dtos), 2)
+ self.assertEqual(dtos[dto], 2)