This is an automated email from the ASF dual-hosted git repository.
jshao pushed a commit to branch main
in repository https://gitbox.apache.org/repos/asf/gravitino.git
The following commit(s) were added to refs/heads/main by this push:
new 35222d172f [#5199] refactor(client-python): refactor of serdes utils
(#7904)
35222d172f is described below
commit 35222d172fcbbec47fb356c8c62f1aabce95fc78
Author: George T. C. Lai <[email protected]>
AuthorDate: Mon Aug 11 10:15:28 2025 +0800
[#5199] refactor(client-python): refactor of serdes utils (#7904)
### What changes were proposed in this pull request?
This PR is aimed at introducing a `SerdesUtilsBase` class to accommodate
shared class variables so that they could be shared across all its
sub-classes like `SerdesUtils`. In doing so, we could
- achieve single source of truth against shared class variables,
- facilitate maintenance effort.
### Why are the changes needed?
The current implementation of `SerdesUtils` in client-python tries to
split `JsonUtils` in Java client into various classes listed as follows.
-
[gravitino.api.types.json_serdes._helper.serdes_utils.SerdesUtils](https://github.com/apache/gravitino/blob/main/clients/client-python/gravitino/api/types/json_serdes/_helper/serdes_utils.py)
-
[gravitino.dto.rel.expressions.json_serdes._helper.serdes_utils.SerdesUtils](https://github.com/apache/gravitino/blob/main/clients/client-python/gravitino/dto/rel/expressions/json_serdes/_helper/serdes_utils.py)
Many class variables (as serialized keys) are shared and therefore
defined more than once in the `SerdesUtils` class. In the end, we will
define these shared class variables in multiple `SerdesUtils`. This
could introduce issues of breaking single source of truth against these
class variables (serialized keys) and could be difficult to achieve
consistency.
#5199
### Does this PR introduce _any_ user-facing change?
No
### How was this patch tested?
Unit tests
---------
Signed-off-by: George T. C. Lai <[email protected]>
---
.../api/types/json_serdes/_helper/serdes_utils.py | 80 +---------------
.../json_serdes/_helper/serdes_utils.py | 13 +--
clients/client-python/gravitino/utils/serdes.py | 104 +++++++++++++++++++++
3 files changed, 110 insertions(+), 87 deletions(-)
diff --git
a/clients/client-python/gravitino/api/types/json_serdes/_helper/serdes_utils.py
b/clients/client-python/gravitino/api/types/json_serdes/_helper/serdes_utils.py
index 0bff743414..6aaa12a45e 100644
---
a/clients/client-python/gravitino/api/types/json_serdes/_helper/serdes_utils.py
+++
b/clients/client-python/gravitino/api/types/json_serdes/_helper/serdes_utils.py
@@ -16,91 +16,17 @@
# under the License.
import json
-import re
-from types import MappingProxyType
-from typing import Any, ClassVar, Dict, Mapping, Pattern, Set, Union, overload
+from typing import Any, Dict, Union, overload
from dataclasses_json.core import Json
from gravitino.api.types.type import Name, Type
from gravitino.api.types.types import Types
from gravitino.utils.precondition import Precondition
+from gravitino.utils.serdes import SerdesUtilsBase
-class SerdesUtils:
- EXPRESSION_TYPE: ClassVar[str] = "type"
- DATA_TYPE: ClassVar[str] = "dataType"
- LITERAL_VALUE: ClassVar[str] = "value"
- FIELD_NAME: ClassVar[str] = "fieldName"
- FUNCTION_NAME: ClassVar[str] = "funcName"
- FUNCTION_ARGS: ClassVar[str] = "funcArgs"
- UNPARSED_EXPRESSION: ClassVar[str] = "unparsedExpression"
- TYPE: ClassVar[str] = "type"
- STRUCT: ClassVar[str] = "struct"
- FIELDS: ClassVar[str] = "fields"
- STRUCT_FIELD_NAME: ClassVar[str] = "name"
- STRUCT_FIELD_NULLABLE: ClassVar[str] = "nullable"
- STRUCT_FIELD_COMMENT: ClassVar[str] = "comment"
- LIST: ClassVar[str] = "list"
- LIST_ELEMENT_TYPE: ClassVar[str] = "elementType"
- LIST_ELEMENT_NULLABLE: ClassVar[str] = "containsNull"
- MAP: ClassVar[str] = "map"
- MAP_KEY_TYPE: ClassVar[str] = "keyType"
- MAP_VALUE_TYPE: ClassVar[str] = "valueType"
- MAP_VALUE_NULLABLE: ClassVar[str] = "valueContainsNull"
- UNION: ClassVar[str] = "union"
- UNION_TYPES: ClassVar[str] = "types"
- UNPARSED: ClassVar[str] = "unparsed"
- UNPARSED_TYPE: ClassVar[str] = "unparsedType"
- EXTERNAL: ClassVar[str] = "external"
- CATALOG_STRING: ClassVar[str] = "catalogString"
-
- NON_PRIMITIVE_TYPES: ClassVar[Set[Name]] = {
- Name.STRUCT,
- Name.LIST,
- Name.MAP,
- Name.UNION,
- Name.UNPARSED,
- Name.EXTERNAL,
- }
- PRIMITIVE_AND_NULL_TYPES: ClassVar[Set[Name]] = (
- set(list(Name)) - NON_PRIMITIVE_TYPES
- )
-
- DECIMAL_PATTERN: ClassVar[Pattern[str]] = re.compile(
- r"decimal\(\s*(\d+)\s*,\s*(\d+)\s*\)"
- )
- FIXED_PATTERN: ClassVar[Pattern[str]] = re.compile(r"fixed\(\s*(\d+)\s*\)")
- FIXEDCHAR_PATTERN: ClassVar[Pattern[str]] =
re.compile(r"char\(\s*(\d+)\s*\)")
- VARCHAR_PATTERN: ClassVar[Pattern[str]] =
re.compile(r"varchar\(\s*(\d+)\s*\)")
- TYPES: ClassVar[Mapping] = MappingProxyType(
- {
- type_instance.simple_string(): type_instance
- for type_instance in {
- Types.NullType.get(),
- Types.BooleanType.get(),
- Types.ByteType.get(),
- Types.ByteType.unsigned(),
- Types.IntegerType.get(),
- Types.IntegerType.unsigned(),
- Types.ShortType.get(),
- Types.ShortType.unsigned(),
- Types.LongType.get(),
- Types.LongType.unsigned(),
- Types.FloatType.get(),
- Types.DoubleType.get(),
- Types.DateType.get(),
- Types.TimeType.get(),
- Types.TimestampType.with_time_zone(),
- Types.TimestampType.without_time_zone(),
- Types.IntervalYearType.get(),
- Types.IntervalDayType.get(),
- Types.StringType.get(),
- Types.UUIDType.get(),
- }
- }
- )
-
+class SerdesUtils(SerdesUtilsBase):
@classmethod
def write_data_type(cls, data_type: Type) -> Union[str, Dict[str, Any]]:
"""Write Gravitino Type to JSON data. Used for Gravitino Type JSON
Serialization.
diff --git
a/clients/client-python/gravitino/dto/rel/expressions/json_serdes/_helper/serdes_utils.py
b/clients/client-python/gravitino/dto/rel/expressions/json_serdes/_helper/serdes_utils.py
index fc4b7734cd..4edfc133f7 100644
---
a/clients/client-python/gravitino/dto/rel/expressions/json_serdes/_helper/serdes_utils.py
+++
b/clients/client-python/gravitino/dto/rel/expressions/json_serdes/_helper/serdes_utils.py
@@ -15,7 +15,7 @@
# specific language governing permissions and limitations
# under the License.
-from typing import Any, ClassVar, Dict, cast
+from typing import Any, Dict, cast
from gravitino.api.types.json_serdes._helper.serdes_utils import (
SerdesUtils as TypesSerdesUtils,
@@ -27,17 +27,10 @@ from gravitino.dto.rel.expressions.literal_dto import
LiteralDTO
from gravitino.dto.rel.expressions.unparsed_expression_dto import
UnparsedExpressionDTO
from gravitino.exceptions.base import IllegalArgumentException
from gravitino.utils.precondition import Precondition
+from gravitino.utils.serdes import SerdesUtilsBase
-class SerdesUtils:
- EXPRESSION_TYPE: ClassVar[str] = "type"
- DATA_TYPE: ClassVar[str] = "dataType"
- LITERAL_VALUE: ClassVar[str] = "value"
- FIELD_NAME: ClassVar[str] = "fieldName"
- FUNCTION_NAME: ClassVar[str] = "funcName"
- FUNCTION_ARGS: ClassVar[str] = "funcArgs"
- UNPARSED_EXPRESSION: ClassVar[str] = "unparsedExpression"
-
+class SerdesUtils(SerdesUtilsBase):
@classmethod
def write_function_arg(cls, arg: FunctionArg) -> Dict[str, Any]:
arg_type = arg.arg_type()
diff --git a/clients/client-python/gravitino/utils/serdes.py
b/clients/client-python/gravitino/utils/serdes.py
new file mode 100644
index 0000000000..e67f03edc2
--- /dev/null
+++ b/clients/client-python/gravitino/utils/serdes.py
@@ -0,0 +1,104 @@
+# Licensed to the Apache Software Foundation (ASF) under one
+# or more contributor license agreements. See the NOTICE file
+# distributed with this work for additional information
+# regarding copyright ownership. The ASF licenses this file
+# to you under the Apache License, Version 2.0 (the
+# "License"); you may not use this file except in compliance
+# with the License. You may obtain a copy of the License at
+#
+# http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing,
+# software distributed under the License is distributed on an
+# "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+# KIND, either express or implied. See the License for the
+# specific language governing permissions and limitations
+# under the License.
+
+import re
+from collections.abc import Mapping
+from types import MappingProxyType
+from typing import Final, Pattern, Set
+
+from gravitino.api.types.types import Name, Types
+
+
+class SerdesUtilsBase:
+ EXPRESSION_TYPE: Final[str] = "type"
+ DATA_TYPE: Final[str] = "dataType"
+ LITERAL_VALUE: Final[str] = "value"
+ FIELD_NAME: Final[str] = "fieldName"
+ FUNCTION_NAME: Final[str] = "funcName"
+ FUNCTION_ARGS: Final[str] = "funcArgs"
+ UNPARSED_EXPRESSION: Final[str] = "unparsedExpression"
+ TYPE: Final[str] = "type"
+ STRUCT: Final[str] = "struct"
+ FIELDS: Final[str] = "fields"
+ STRUCT_FIELD_NAME: Final[str] = "name"
+ STRUCT_FIELD_NULLABLE: Final[str] = "nullable"
+ STRUCT_FIELD_COMMENT: Final[str] = "comment"
+ LIST: Final[str] = "list"
+ LIST_ELEMENT_TYPE: Final[str] = "elementType"
+ LIST_ELEMENT_NULLABLE: Final[str] = "containsNull"
+ MAP: Final[str] = "map"
+ MAP_KEY_TYPE: Final[str] = "keyType"
+ MAP_VALUE_TYPE: Final[str] = "valueType"
+ MAP_VALUE_NULLABLE: Final[str] = "valueContainsNull"
+ UNION: Final[str] = "union"
+ UNION_TYPES: Final[str] = "types"
+ UNPARSED: Final[str] = "unparsed"
+ UNPARSED_TYPE: Final[str] = "unparsedType"
+ EXTERNAL: Final[str] = "external"
+ CATALOG_STRING: Final[str] = "catalogString"
+
+ PARTITION_TYPE: Final[str] = "type"
+ PARTITION_NAME: Final[str] = "name"
+ FIELD_NAMES: Final[str] = "fieldNames"
+ IDENTITY_PARTITION_VALUES: Final[str] = "values"
+ LIST_PARTITION_LISTS: Final[str] = "lists"
+ RANGE_PARTITION_UPPER: Final[str] = "upper"
+ RANGE_PARTITION_LOWER: Final[str] = "lower"
+
+ NON_PRIMITIVE_TYPES: Final[Set[Name]] = {
+ Name.STRUCT,
+ Name.LIST,
+ Name.MAP,
+ Name.UNION,
+ Name.UNPARSED,
+ Name.EXTERNAL,
+ }
+ PRIMITIVE_AND_NULL_TYPES: Final[Set[Name]] = set(list(Name)) -
NON_PRIMITIVE_TYPES
+
+ DECIMAL_PATTERN: Final[Pattern[str]] = re.compile(
+ r"decimal\(\s*(\d+)\s*,\s*(\d+)\s*\)"
+ )
+ FIXED_PATTERN: Final[Pattern[str]] = re.compile(r"fixed\(\s*(\d+)\s*\)")
+ FIXEDCHAR_PATTERN: Final[Pattern[str]] = re.compile(r"char\(\s*(\d+)\s*\)")
+ VARCHAR_PATTERN: Final[Pattern[str]] =
re.compile(r"varchar\(\s*(\d+)\s*\)")
+ TYPES: Final[Mapping] = MappingProxyType(
+ {
+ type_instance.simple_string(): type_instance
+ for type_instance in (
+ Types.NullType.get(),
+ Types.BooleanType.get(),
+ Types.ByteType.get(),
+ Types.ByteType.unsigned(),
+ Types.IntegerType.get(),
+ Types.IntegerType.unsigned(),
+ Types.ShortType.get(),
+ Types.ShortType.unsigned(),
+ Types.LongType.get(),
+ Types.LongType.unsigned(),
+ Types.FloatType.get(),
+ Types.DoubleType.get(),
+ Types.DateType.get(),
+ Types.TimeType.get(),
+ Types.TimestampType.with_time_zone(),
+ Types.TimestampType.without_time_zone(),
+ Types.IntervalYearType.get(),
+ Types.IntervalDayType.get(),
+ Types.StringType.get(),
+ Types.UUIDType.get(),
+ )
+ }
+ )