This is an automated email from the ASF dual-hosted git repository.
jshao pushed a commit to branch main
in repository https://gitbox.apache.org/repos/asf/gravitino.git
The following commit(s) were added to refs/heads/main by this push:
new b888447c24 [#7670] feat(client-python): Support config request timeout
for Gravitino Python client (#7679)
b888447c24 is described below
commit b888447c2490d846bba7fe85e1e526631f112a96
Author: qbhan <[email protected]>
AuthorDate: Mon Aug 11 10:27:21 2025 +0800
[#7670] feat(client-python): Support config request timeout for Gravitino
Python client (#7679)
### What changes were proposed in this pull request?
Support config request timeout for Gravitino Python client
### Why are the changes needed?
Currently, python client request timeout is a constant values (10s).
Fix: #7670
### Does this PR introduce _any_ user-facing change?
- add `client_config` in `GravitinoClientBase`
- add `client_config_` in `GVFSConfig`
### How was this patch tested?
localhost tests
---
.../gravitino/client/gravitino_client.py | 6 +-
.../gravitino/client/gravitino_client_base.py | 6 +-
.../gravitino/client/gravitino_client_config.py | 130 +++++++++++++++++++++
.../gravitino/filesystem/gvfs_base_operations.py | 17 ++-
.../gravitino/filesystem/gvfs_config.py | 3 +
.../gravitino/filesystem/gvfs_utils.py | 4 +
.../client-python/gravitino/utils/http_client.py | 10 +-
.../tests/unittests/test_gravitino_client.py | 48 ++++++++
.../tests/unittests/test_gvfs_with_local.py | 20 ++++
...va-client.md => how-to-use-gravitino-client.md} | 41 ++++++-
docs/how-to-use-gvfs.md | 7 ++
11 files changed, 279 insertions(+), 13 deletions(-)
diff --git a/clients/client-python/gravitino/client/gravitino_client.py
b/clients/client-python/gravitino/client/gravitino_client.py
index a8c3cd8535..cc70cdd971 100644
--- a/clients/client-python/gravitino/client/gravitino_client.py
+++ b/clients/client-python/gravitino/client/gravitino_client.py
@@ -40,6 +40,7 @@ class GravitinoClient(GravitinoClientBase):
check_version: bool = True,
auth_data_provider: AuthDataProvider = None,
request_headers: dict = None,
+ client_config: dict = None,
):
"""Constructs a new GravitinoClient with the given URI, authenticator
and AuthDataProvider.
@@ -48,11 +49,14 @@ class GravitinoClient(GravitinoClientBase):
metalake_name: The specified metalake name.
auth_data_provider: The provider of the data which is used for
authentication.
request_headers: The headers to be included in the HTTP requests.
+ client_config: The config properties for the HTTP Client
Raises:
NoSuchMetalakeException if the metalake with specified name does
not exist.
"""
- super().__init__(uri, check_version, auth_data_provider,
request_headers)
+ super().__init__(
+ uri, check_version, auth_data_provider, request_headers,
client_config
+ )
self.check_metalake_name(metalake_name)
self._metalake = super().load_metalake(metalake_name)
diff --git a/clients/client-python/gravitino/client/gravitino_client_base.py
b/clients/client-python/gravitino/client/gravitino_client_base.py
index 7a1744595f..2aeeba99d3 100644
--- a/clients/client-python/gravitino/client/gravitino_client_base.py
+++ b/clients/client-python/gravitino/client/gravitino_client_base.py
@@ -57,9 +57,13 @@ class GravitinoClientBase:
check_version: bool = True,
auth_data_provider: AuthDataProvider = None,
request_headers: dict = None,
+ client_config: dict = None,
):
self._rest_client = HTTPClient(
- uri, auth_data_provider=auth_data_provider,
request_headers=request_headers
+ uri,
+ auth_data_provider=auth_data_provider,
+ request_headers=request_headers,
+ client_config=client_config,
)
if check_version:
self.check_version()
diff --git a/clients/client-python/gravitino/client/gravitino_client_config.py
b/clients/client-python/gravitino/client/gravitino_client_config.py
new file mode 100644
index 0000000000..fac53a90cd
--- /dev/null
+++ b/clients/client-python/gravitino/client/gravitino_client_config.py
@@ -0,0 +1,130 @@
+# Licensed to the Apache Software Foundation (ASF) under one
+# or more contributor license agreements. See the NOTICE file
+# distributed with this work for additional information
+# regarding copyright ownership. The ASF licenses this file
+# to you under the Apache License, Version 2.0 (the
+# "License"); you may not use this file except in compliance
+# with the License. You may obtain a copy of the License at
+#
+# http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing,
+# software distributed under the License is distributed on an
+# "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+# KIND, either express or implied. See the License for the
+# specific language governing permissions and limitations
+# under the License.
+
+from typing import TypeVar
+
+from gravitino.constants.timeout import TIMEOUT
+
+T = TypeVar("T")
+
+
+class GravitinoClientConfig:
+ """
+ Configuration class for Gravitino Python client;
+ It encapsulates HTTP connection configurations and validates client
properties.
+ """
+
+ POSITIVE_NUMBER_ERROR_MSG: str = "The value must be a positive number"
+ """Error message for positive number validation"""
+
+ GRAVITINO_CLIENT_CONFIG_PREFIX: str = "gravitino_client_"
+ """Configuration key prefix for Gravitino client config"""
+
+ CLIENT_REQUEST_TIMEOUT_DEFAULT: int = TIMEOUT
+ """Default HTTP request timeout in seconds"""
+
+ CLIENT_REQUEST_TIMEOUT: str = "gravitino_client_request_timeout"
+ """Configuration key for request timeout"""
+
+ SUPPORT_CLIENT_CONFIG_KEYS: set = {CLIENT_REQUEST_TIMEOUT}
+ """Set of supported configuration keys"""
+
+ def __init__(self, properties: dict):
+ """Initializes the configuration with validated properties
+
+ Args:
+ properties: Key-value pairs of configuration parameters
+ """
+ self._properties = properties
+
+ @classmethod
+ def build_from_properties(cls, properties: dict) ->
"GravitinoClientConfig":
+ """Factory method to create configuration from properties
+
+ Args:
+ properties: Input configuration dictionary
+
+ Returns:
+ GravitinoClientConfig instance
+
+ Raises:
+ ValueError: If unsupported keys are provided
+ """
+ if properties is None:
+ return cls({})
+ for key in properties:
+ if key not in cls.SUPPORT_CLIENT_CONFIG_KEYS:
+ raise ValueError(f"Invalid property for client: {key}")
+ return cls({key: properties[key] for key in properties})
+
+ def get_client_request_timeout(self) -> int:
+ """Retrieves and validates HTTP request timeout
+
+ Returns:
+ Timeout value in seconds
+
+ Raises:
+ IllegalArgumentException: If value is negative or value cannot be
converted to integer
+ """
+ timeout = self._property_as_int(
+ self.CLIENT_REQUEST_TIMEOUT, self.CLIENT_REQUEST_TIMEOUT_DEFAULT
+ )
+ self._check_value(
+ self.CLIENT_REQUEST_TIMEOUT,
+ timeout,
+ lambda x: x >= 0,
+ self.POSITIVE_NUMBER_ERROR_MSG,
+ )
+ return timeout
+
+ def _property_as_int(self, key: str, default: int) -> int:
+ """Safely converts property value to integer
+
+ Args:
+ key: Configuration key to retrieve
+ default: Default value if key not found
+
+ Returns:
+ Converted integer value
+
+ Raises:
+ ValueError: If value cannot be converted to integer
+ """
+ value = self._properties.get(key)
+ if value is None:
+ return default
+ try:
+ return int(value)
+ except (TypeError, ValueError) as err:
+ raise ValueError(
+ f"Value '{value}' for key '{key}' must be an integer"
+ ) from err
+
+ def _check_value(self, key: str, value: T, validator: callable, error_msg:
str):
+ """Generic validation method for configuration values
+
+ Args:
+ key: Config key
+ value: Value to validate
+ validator: Validation function returning boolean
+ error_msg: Error message template
+
+ Raises:
+ ValueError: If validation fails
+ """
+ if not validator(value):
+ raise ValueError(f"Value '{value}' for key '{key}' is invalid.
{error_msg}")
diff --git a/clients/client-python/gravitino/filesystem/gvfs_base_operations.py
b/clients/client-python/gravitino/filesystem/gvfs_base_operations.py
index 02eb1cca0a..30cd5dd048 100644
--- a/clients/client-python/gravitino/filesystem/gvfs_base_operations.py
+++ b/clients/client-python/gravitino/filesystem/gvfs_base_operations.py
@@ -34,6 +34,7 @@ from gravitino.audit.fileset_data_operation import
FilesetDataOperation
from gravitino.audit.internal_client_type import InternalClientType
from gravitino.client.fileset_catalog import FilesetCatalog
from gravitino.client.generic_fileset import GenericFileset
+from gravitino.client.gravitino_client_config import GravitinoClientConfig
from gravitino.exceptions.base import (
GravitinoRuntimeException,
NoSuchLocationNameException,
@@ -101,8 +102,22 @@ class BaseGVFSOperations(ABC):
)
}
)
+
+ client_config = (
+ None
+ if options is None
+ else {
+ key.replace(
+ GVFSConfig.GVFS_FILESYSTEM_CLIENT_CONFIG_PREFIX,
+ GravitinoClientConfig.GRAVITINO_CLIENT_CONFIG_PREFIX,
+ ): value
+ for key, value in options.items()
+ if
key.startswith(GVFSConfig.GVFS_FILESYSTEM_CLIENT_CONFIG_PREFIX)
+ }
+ )
+
self._client = create_client(
- options, server_uri, metalake_name, request_headers
+ options, server_uri, metalake_name, request_headers, client_config
)
cache_size = (
diff --git a/clients/client-python/gravitino/filesystem/gvfs_config.py
b/clients/client-python/gravitino/filesystem/gvfs_config.py
index 9dd5aab451..507528ecd0 100644
--- a/clients/client-python/gravitino/filesystem/gvfs_config.py
+++ b/clients/client-python/gravitino/filesystem/gvfs_config.py
@@ -73,3 +73,6 @@ class GVFSConfig:
# The configuration key for whether to enable credential vending. The
default is false.
GVFS_FILESYSTEM_ENABLE_CREDENTIAL_VENDING = "enable_credential_vending"
+
+ # The configuration key prefix for the client.
+ GVFS_FILESYSTEM_CLIENT_CONFIG_PREFIX = "gvfs_gravitino_client_"
diff --git a/clients/client-python/gravitino/filesystem/gvfs_utils.py
b/clients/client-python/gravitino/filesystem/gvfs_utils.py
index bb4553288a..ac71af815a 100644
--- a/clients/client-python/gravitino/filesystem/gvfs_utils.py
+++ b/clients/client-python/gravitino/filesystem/gvfs_utils.py
@@ -48,12 +48,14 @@ def create_client(
server_uri: str,
metalake_name: str,
request_headers: dict = None,
+ client_config: dict = None,
):
"""Create the Gravitino client.
:param options: The options
:param server_uri: The server URI
:param metalake_name: The metalake name
:param request_headers: The request headers
+ :param client_config: The client config
:return The Gravitino client
"""
auth_type = (
@@ -68,6 +70,7 @@ def create_client(
metalake_name=metalake_name,
auth_data_provider=SimpleAuthProvider(),
request_headers=request_headers,
+ client_config=client_config,
)
if auth_type == GVFSConfig.OAUTH2_AUTH_TYPE:
@@ -92,6 +95,7 @@ def create_client(
metalake_name=metalake_name,
auth_data_provider=oauth2_token_provider,
request_headers=request_headers,
+ client_config=client_config,
)
raise GravitinoRuntimeException(
diff --git a/clients/client-python/gravitino/utils/http_client.py
b/clients/client-python/gravitino/utils/http_client.py
index a99b5bed0c..0aaa0e3940 100644
--- a/clients/client-python/gravitino/utils/http_client.py
+++ b/clients/client-python/gravitino/utils/http_client.py
@@ -31,10 +31,9 @@ import json as _json
from gravitino.auth.auth_constants import AuthConstants
from gravitino.auth.auth_data_provider import AuthDataProvider
+from gravitino.client.gravitino_client_config import GravitinoClientConfig
from gravitino.typing import JSONType
-from gravitino.constants.timeout import TIMEOUT
-
from gravitino.dto.responses.error_response import ErrorResponse
from gravitino.dto.responses.oauth2_error_response import OAuth2ErrorResponse
from gravitino.exceptions.base import RESTException, UnknownError
@@ -86,13 +85,16 @@ class HTTPClient:
host,
*,
request_headers=None,
- timeout=TIMEOUT,
+ client_config=None,
is_debug=False,
auth_data_provider: AuthDataProvider = None,
) -> None:
+ gravitino_client_config = GravitinoClientConfig.build_from_properties(
+ client_config
+ )
self.host = host
self.request_headers = request_headers or {}
- self.timeout = timeout
+ self.timeout = gravitino_client_config.get_client_request_timeout()
self.is_debug = is_debug
self.auth_data_provider = auth_data_provider
diff --git a/clients/client-python/tests/unittests/test_gravitino_client.py
b/clients/client-python/tests/unittests/test_gravitino_client.py
index ef8e87615a..103b8cd396 100644
--- a/clients/client-python/tests/unittests/test_gravitino_client.py
+++ b/clients/client-python/tests/unittests/test_gravitino_client.py
@@ -17,6 +17,8 @@
import unittest
from gravitino import GravitinoAdminClient, GravitinoClient
+from gravitino.client.gravitino_client_config import GravitinoClientConfig
+from gravitino.constants.timeout import TIMEOUT
from tests.unittests import mock_base
@@ -43,3 +45,49 @@ class TestMetalake(unittest.TestCase):
self.assertEqual(
expected_headers, gravitino_client._rest_client.request_headers
)
+
+ def test_gravitino_client_timeout(self, *mock_methods):
+ gravitino_admin_client = GravitinoAdminClient(
+ uri="http://localhost:8090",
+ )
+ self.assertEqual(TIMEOUT, gravitino_admin_client._rest_client.timeout)
+
+ gravitino_admin_client = GravitinoAdminClient(
+ uri="http://localhost:8090",
+ client_config={"gravitino_client_request_timeout": 60},
+ )
+ self.assertEqual(60, gravitino_admin_client._rest_client.timeout)
+
+ gravitino_client = GravitinoClient(
+ uri="http://localhost:8090",
+ metalake_name="test",
+ client_config={"gravitino_client_request_timeout": 60},
+ )
+ self.assertEqual(60, gravitino_client._rest_client.timeout)
+
+ def test_invalid_gravitino_client_config(self, *mock_methods):
+ # test invalid config
+ self.assertRaisesRegex(
+ ValueError,
+ "Invalid property for client:
gravitino_client_request_timeout_xxxxxx",
+ GravitinoClientConfig.build_from_properties,
+ {"gravitino_client_request_timeout_xxxxxx": 1},
+ )
+
+ client_config = GravitinoClientConfig.build_from_properties(
+ {"gravitino_client_request_timeout": -1}
+ )
+ self.assertRaisesRegex(
+ ValueError,
+ "Value '-1' for key 'gravitino_client_request_timeout' is invalid.
The value must be a positive number",
+ client_config.get_client_request_timeout,
+ )
+
+ client_config = GravitinoClientConfig.build_from_properties(
+ {"gravitino_client_request_timeout": "a"}
+ )
+ self.assertRaisesRegex(
+ ValueError,
+ "Value 'a' for key 'gravitino_client_request_timeout' must be an
integer",
+ client_config.get_client_request_timeout,
+ )
diff --git a/clients/client-python/tests/unittests/test_gvfs_with_local.py
b/clients/client-python/tests/unittests/test_gvfs_with_local.py
index 30b5c0f436..bcc873b9d3 100644
--- a/clients/client-python/tests/unittests/test_gvfs_with_local.py
+++ b/clients/client-python/tests/unittests/test_gvfs_with_local.py
@@ -32,6 +32,7 @@ from fsspec.implementations.local import LocalFileSystem
from gravitino import gvfs, NameIdentifier, Fileset
from gravitino.auth.auth_constants import AuthConstants
+from gravitino.constants.timeout import TIMEOUT
from gravitino.exceptions.base import (
GravitinoRuntimeException,
IllegalArgumentException,
@@ -95,6 +96,25 @@ class TestLocalFilesystem(unittest.TestCase):
headers = fs._operations._client._rest_client.request_headers
self.assertEqual(headers["k1"], "v1")
+ def test_request_timeout(self, *mock_methods):
+ fs = gvfs.GravitinoVirtualFileSystem(
+ server_uri="http://localhost:9090",
+ metalake_name="metalake_demo",
+ skip_instance_cache=True,
+ )
+ self.assertEqual(fs._operations._client._rest_client.timeout, TIMEOUT)
+
+ options = {
+
f"{GVFSConfig.GVFS_FILESYSTEM_CLIENT_CONFIG_PREFIX}request_timeout": 60,
+ }
+ fs = gvfs.GravitinoVirtualFileSystem(
+ server_uri="http://localhost:9090",
+ metalake_name="metalake_demo",
+ options=options,
+ skip_instance_cache=True,
+ )
+ self.assertEqual(fs._operations._client._rest_client.timeout, 60)
+
def test_cache(self, *mock_methods):
fileset_storage_location = f"{self._fileset_dir}/test_cache"
fileset_virtual_location = "fileset/fileset_catalog/tmp/test_cache"
diff --git a/docs/how-to-use-java-client.md
b/docs/how-to-use-gravitino-client.md
similarity index 59%
rename from docs/how-to-use-java-client.md
rename to docs/how-to-use-gravitino-client.md
index b78e216989..2f559f3a27 100644
--- a/docs/how-to-use-java-client.md
+++ b/docs/how-to-use-gravitino-client.md
@@ -1,22 +1,24 @@
---
-title: "How to use Apache Gravitino Java client"
-slug: /how-to-use-gravitino-java-client
+title: "How to use Apache Gravitino client"
+slug: /how-to-use-gravitino-client
date: 2025-07-09
-keyword: Gravitino Java client
+keyword: Gravitino client
license: This software is licensed under the Apache License version 2.
---
## Introduction
-You can use Gravitino Java client library with Spark, Spring and other Java
environment.
+You can use Gravitino Java client library with Spark, Spring and other Java
environment or
+use Gravitino Python client library with Spark, PyTorch, Tensorflow, Ray and
Python environment.
First of all, you must have a Gravitino server set up and run, you can refer
document of
[how to install Gravitino](./how-to-install.md) to build Gravitino server from
source code and
install it in your local.
-## Gravitino Java client configurations
+## Gravitino Java client
+
+You can customize the Gravitino Java client by using `withClientConfig` like
this:
-You can customize the Gravitino Java client by using `withClientConfig`.
```java
Map<String, String> properties =
ImmutableMap.of(
@@ -42,4 +44,31 @@ GravitinoAdminClient gravitinoAdminClient =
GravitinoAdminClient.builder("http:/
| `gravitino.client.connectionTimeoutMs` | An optional http connection timeout
in milliseconds. | `180000`(3 minutes) | No | 1.0.0 |
| `gravitino.client.socketTimeoutMs` | An optional http socket timeout in
milliseconds. | `180000`(3 minutes) | No | 1.0.0 |
+**Note:** Invalid configuration properties will result in exceptions.
+
+## Gravitino Python client
+
+You can customize the Gravitino Python client with config properties like this:
+
+```python
+gravitino_admin_client = GravitinoAdminClient(
+ uri="http://localhost:8090",
+ client_config={"gravitino_client_request_timeout": 60},
+)
+# ...
+
+gravitino_client = GravitinoClient(
+ uri="http://localhost:8090",
+ metalake_name="test",
+ client_config={"gravitino_client_request_timeout": 60},
+)
+# ...
+```
+
+### Gravitino Python client configuration
+
+| Configuration item | Description
| Default value | Required | Since version |
+|------------------------------------|----------------------------------------|---------------|----------|---------------|
+| `gravitino_client_request_timeout` | An optional client timeout in seconds.
| `10` | No | 1.0.0 |
+
**Note:** Invalid configuration properties will result in exceptions.
diff --git a/docs/how-to-use-gvfs.md b/docs/how-to-use-gvfs.md
index c3648f35c7..4a197862ef 100644
--- a/docs/how-to-use-gvfs.md
+++ b/docs/how-to-use-gvfs.md
@@ -387,6 +387,13 @@ to recompile the native libraries like `libhdfs` and
others, and completely repl
| `hook_class` | The hook class to inject into the
Gravitino Virtual File System. Users can implement their own
`GravitinoVirtualFileSystemHook` and configure the class name in this conf to
inject custom code.
|
`gravitino.filesystem.gvfs_hook.NoOpHook` | No
| 0.9.0-incubating |
| `client_request_header_` | The configuration key prefix for the
Gravitino client request header. You can set the request header for the
Gravitino client.
| (none)
| No | 0.9.0-incubating |
| `enable_credential_vending` | Whether to enable credential vending for
the Gravitino Virtual File System.
| `false`
| No | 0.9.0-incubating |
+| `gvfs_gravitino_client_` | The configuration key prefix for the
Gravitino client. You can set the config for the Gravitino client.
| (none)
| No | 1.0.0 |
+
+To configure the Gravitino Python client, use properties prefixed with
`gvfs_gravitino_client_`. These properties undergo automatic transformation:
the prefix is replaced with `gravitino_client_` and passed to the Gravitino
Python client.
+
+**Example:** Setting `gvfs_gravitino_client_request_timeout` is equivalent to
setting `gravitino_client_request_timeout` for the Gravitino Python client.
+
+**Note:** Invalid configuration properties will result in exceptions. Please
see [Gravitino Python client
configurations](./how-to-use-gravitino-client.md#gravitino-python-client-configuration)
for more support client configuration.
#### Configurations for S3, GCS, OSS and Azure Blob storage fileset