This is an automated email from the ASF dual-hosted git repository.
jshao pushed a commit to branch main
in repository https://gitbox.apache.org/repos/asf/gravitino.git
The following commit(s) were added to refs/heads/main by this push:
new 106ba422be [#5199] feat(client-python): add index and indexes (#8143)
106ba422be is described below
commit 106ba422bef3520a46e4725441552bc2886894e8
Author: George T. C. Lai <[email protected]>
AuthorDate: Wed Aug 20 17:34:23 2025 +0800
[#5199] feat(client-python): add index and indexes (#8143)
### What changes were proposed in this pull request?
This PR is aimed at implementing the following classes corresponding to
the Java client.
- `Index` in Index.java
- `Indexes` in Indexes.java
### Why are the changes needed?
We need to support table partitioning, bucketing and sort ordering and
indexes
#5199
### Does this PR introduce _any_ user-facing change?
No
### How was this patch tested?
Unit tests
---------
Signed-off-by: George T. C. Lai <[email protected]>
---
.../gravitino/api/expressions/indexes/__init__.py | 16 +++++
.../gravitino/api/expressions/indexes/index.py | 81 ++++++++++++++++++++++
.../gravitino/api/expressions/indexes/indexes.py | 66 ++++++++++++++++++
.../tests/unittests/rel/test_indexes.py | 45 ++++++++++++
4 files changed, 208 insertions(+)
diff --git
a/clients/client-python/gravitino/api/expressions/indexes/__init__.py
b/clients/client-python/gravitino/api/expressions/indexes/__init__.py
new file mode 100644
index 0000000000..13a83393a9
--- /dev/null
+++ b/clients/client-python/gravitino/api/expressions/indexes/__init__.py
@@ -0,0 +1,16 @@
+# Licensed to the Apache Software Foundation (ASF) under one
+# or more contributor license agreements. See the NOTICE file
+# distributed with this work for additional information
+# regarding copyright ownership. The ASF licenses this file
+# to you under the Apache License, Version 2.0 (the
+# "License"); you may not use this file except in compliance
+# with the License. You may obtain a copy of the License at
+#
+# http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing,
+# software distributed under the License is distributed on an
+# "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+# KIND, either express or implied. See the License for the
+# specific language governing permissions and limitations
+# under the License.
diff --git a/clients/client-python/gravitino/api/expressions/indexes/index.py
b/clients/client-python/gravitino/api/expressions/indexes/index.py
new file mode 100644
index 0000000000..82d0931d8f
--- /dev/null
+++ b/clients/client-python/gravitino/api/expressions/indexes/index.py
@@ -0,0 +1,81 @@
+# Licensed to the Apache Software Foundation (ASF) under one
+# or more contributor license agreements. See the NOTICE file
+# distributed with this work for additional information
+# regarding copyright ownership. The ASF licenses this file
+# to you under the Apache License, Version 2.0 (the
+# "License"); you may not use this file except in compliance
+# with the License. You may obtain a copy of the License at
+#
+# http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing,
+# software distributed under the License is distributed on an
+# "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+# KIND, either express or implied. See the License for the
+# specific language governing permissions and limitations
+# under the License.
+
+
+from abc import ABC, abstractmethod
+from enum import Enum, unique
+from typing import List
+
+
+class Index(ABC):
+ """The Index interface defines methods for implementing table index
columns.
+
+ Currently, settings for `PRIMARY_KEY` and `UNIQUE_KEY` are provided.
+ """
+
+ @unique
+ class IndexType(str, Enum):
+ """The enum IndexType defines the type of the index.
+
+ Currently, `PRIMARY_KEY` and `UNIQUE_KEY` are supported.
+ """
+
+ PRIMARY_KEY = "PRIMARY_KEY"
+ """`PRIMARY` KEY index in a relational database is a field or a
combination of fields that
+ uniquely identifies each record in a table. It serves as a unique
identifier for each row,
+ ensuring that no two rows have the same key. The PRIMARY KEY is used
to establish
+ relationships between tables and enforce the entity integrity of a
database. Additionally, it
+ helps in indexing and organizing the data for efficient retrieval and
maintenance."""
+
+ UNIQUE_KEY = "UNIQUE_KEY"
+ """UNIQUE KEY in a relational database is a field or a combination of
fields that ensures each
+ record in a table has a distinct value or combination of values.
Unlike a primary key, a
+ UNIQUE KEY allows for the presence of null values, but it still
enforces the constraint that
+ no two records can have the same unique key value(s). UNIQUE KEYs are
used to maintain data
+ integrity by preventing duplicate entries in specific columns, and
they can be applied to
+ columns that are not designated as the primary key. The uniqueness
constraint imposed by
+ UNIQUE KEY helps in avoiding redundancy and ensuring data accuracy in
the database."""
+
+ @abstractmethod
+ def type(self) -> IndexType:
+ """Returns the type of the index. eg: `PRIMARY_KEY` and `UNIQUE_KEY`.
+
+ Returns:
+ IndexType: The type of the index. eg: `PRIMARY_KEY` and
`UNIQUE_KEY`.
+ """
+ pass # pragma: no cover
+
+ @abstractmethod
+ def name(self) -> str:
+ """Returns the name of the index.
+
+ Returns:
+ str: The name of the index.
+ """
+ pass # pragma: no cover
+
+ @abstractmethod
+ def field_names(self) -> List[List[str]]:
+ """Returns the field name under the table contained in the index.
+
+ It is the column names, could be "a.b.c" for nested column, but
normally it
+ could only be "a".
+
+ Returns:
+ List[List[str]]: The field name under the table contained in the
index.
+ """
+ pass # pragma: no cover
diff --git a/clients/client-python/gravitino/api/expressions/indexes/indexes.py
b/clients/client-python/gravitino/api/expressions/indexes/indexes.py
new file mode 100644
index 0000000000..f2a75ba768
--- /dev/null
+++ b/clients/client-python/gravitino/api/expressions/indexes/indexes.py
@@ -0,0 +1,66 @@
+# Licensed to the Apache Software Foundation (ASF) under one
+# or more contributor license agreements. See the NOTICE file
+# distributed with this work for additional information
+# regarding copyright ownership. The ASF licenses this file
+# to you under the Apache License, Version 2.0 (the
+# "License"); you may not use this file except in compliance
+# with the License. You may obtain a copy of the License at
+#
+# http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing,
+# software distributed under the License is distributed on an
+# "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+# KIND, either express or implied. See the License for the
+# specific language governing permissions and limitations
+# under the License.
+
+
+from typing import ClassVar, List, final
+
+from gravitino.api.expressions.indexes.index import Index
+
+
+class Indexes:
+ """Helper methods to create index to pass into Apache Gravitino.
+
+ Attributes:
+ EMPTY_INDEXES (List[Index]):
+ An empty array of indexes.
+ DEFAULT_MYSQL_PRIMARY_KEY_NAME (str):
+ MySQL does not support setting the name of the primary key,
+ so the default name is used.
+ """
+
+ EMPTY_INDEXES: ClassVar[List[Index]] = []
+ DEFAULT_MYSQL_PRIMARY_KEY_NAME: ClassVar[str] = "PRIMARY"
+
+ @staticmethod
+ def unique(name: str, field_names: List[List[str]]) -> Index:
+ return Indexes.IndexImpl(Index.IndexType.UNIQUE_KEY, name, field_names)
+
+ @staticmethod
+ def primary(name: str, field_names: List[List[str]]) -> Index:
+ return Indexes.IndexImpl(Index.IndexType.PRIMARY_KEY, name,
field_names)
+
+ @staticmethod
+ def create_mysql_primary_key(field_names: List[List[str]]) -> Index:
+ return Indexes.primary(Indexes.DEFAULT_MYSQL_PRIMARY_KEY_NAME,
field_names)
+
+ @final
+ class IndexImpl(Index):
+ def __init__(
+ self, index_type: Index.IndexType, name: str, field_names:
List[List[str]]
+ ):
+ self._index_type = index_type
+ self._name = name
+ self._field_names = field_names
+
+ def type(self) -> Index.IndexType:
+ return self._index_type
+
+ def name(self) -> str:
+ return self._name
+
+ def field_names(self) -> List[List[str]]:
+ return self._field_names
diff --git a/clients/client-python/tests/unittests/rel/test_indexes.py
b/clients/client-python/tests/unittests/rel/test_indexes.py
new file mode 100644
index 0000000000..6633d07efe
--- /dev/null
+++ b/clients/client-python/tests/unittests/rel/test_indexes.py
@@ -0,0 +1,45 @@
+# Licensed to the Apache Software Foundation (ASF) under one
+# or more contributor license agreements. See the NOTICE file
+# distributed with this work for additional information
+# regarding copyright ownership. The ASF licenses this file
+# to you under the Apache License, Version 2.0 (the
+# "License"); you may not use this file except in compliance
+# with the License. You may obtain a copy of the License at
+#
+# http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing,
+# software distributed under the License is distributed on an
+# "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+# KIND, either express or implied. See the License for the
+# specific language governing permissions and limitations
+# under the License.
+
+import unittest
+
+from gravitino.api.expressions.indexes.index import Index
+from gravitino.api.expressions.indexes.indexes import Indexes
+
+
+class TestIndexes(unittest.TestCase):
+ def test_indexes_class_vars(self):
+ self.assertEqual(Indexes.EMPTY_INDEXES, [])
+ self.assertEqual(Indexes.DEFAULT_MYSQL_PRIMARY_KEY_NAME, "PRIMARY")
+
+ def test_indexes_create_index(self):
+ field_names = [["col_1"], ["col_2"]]
+ unique = Indexes.unique(name="unique", field_names=field_names)
+ primary = Indexes.primary(name="primary", field_names=field_names)
+ mysql_primary =
Indexes.create_mysql_primary_key(field_names=field_names)
+
+ self.assertIs(unique.type(), Index.IndexType.UNIQUE_KEY)
+ self.assertIs(primary.type(), Index.IndexType.PRIMARY_KEY)
+ self.assertIs(mysql_primary.type(), Index.IndexType.PRIMARY_KEY)
+
+ self.assertEqual(unique.name(), "unique")
+ self.assertEqual(primary.name(), "primary")
+ self.assertEqual(mysql_primary.name(), "PRIMARY")
+
+ self.assertListEqual(unique.field_names(), field_names)
+ self.assertListEqual(primary.field_names(), field_names)
+ self.assertListEqual(mysql_primary.field_names(), field_names)