This is an automated email from the ASF dual-hosted git repository.

lzljs3620320 pushed a commit to branch master
in repository https://gitbox.apache.org/repos/asf/paimon.git


The following commit(s) were added to refs/heads/master by this push:
     new a32b94021f [python] Add doc and refine config for using pyjindosdk in 
pypaimon (#7565)
a32b94021f is described below

commit a32b94021fefa95e43d8d935b2db337da3e75ed9
Author: timmyyao <[email protected]>
AuthorDate: Tue Mar 31 20:18:27 2026 +0800

    [python] Add doc and refine config for using pyjindosdk in pypaimon (#7565)
    
    Using pyjindosdk as the default implementation for pypaimon as long as
    pyjindosdk is installed. Fall back is introduced in the doc.
---
 docs/content/pypaimon/pyjindosdk-support.md        | 56 ++++++++++++++++++++++
 paimon-python/pypaimon/common/options/config.py    |  4 +-
 .../pypaimon/filesystem/pyarrow_file_io.py         | 23 +++++++--
 paimon-python/pypaimon/tests/file_io_test.py       |  2 +
 4 files changed, 78 insertions(+), 7 deletions(-)

diff --git a/docs/content/pypaimon/pyjindosdk-support.md 
b/docs/content/pypaimon/pyjindosdk-support.md
new file mode 100644
index 0000000000..7db012c928
--- /dev/null
+++ b/docs/content/pypaimon/pyjindosdk-support.md
@@ -0,0 +1,56 @@
+---
+title: "PyJindoSDK Support"
+weight: 8
+type: docs
+aliases:
+  - /pypaimon/pyjindosdk-support.html
+---
+
+# PyJindoSDK Support
+
+## Introduction
+
+[JindoSDK](https://github.com/aliyun/alibabacloud-jindodata) is a 
high-performance storage SDK developed by Alibaba Cloud for accessing OSS 
(Object Storage Service) and other cloud storage systems. It provides optimized 
I/O performance and deep integration with the Alibaba Cloud ecosystem.
+
+PyPaimon now supports using 
[PyJindoSDK](https://github.com/aliyun/alibabacloud-jindodata) (the Python 
binding of JindoSDK) to access OSS. Compared to the legacy implementation based 
on PyArrow's S3FileSystem, PyJindoSDK offers better performance and 
compatibility when working with OSS.
+
+## Usage
+
+### Installation
+
+Install `pyjindosdk` via pip:
+
+```shell
+pip install pyjindosdk
+```
+
+Once installed, PyPaimon will automatically use PyJindoSDK as the default file 
I/O implementation for accessing OSS. No additional configuration is required.
+
+### Fallback to Legacy Implementation
+
+Since JindoSDK is a native implementation, pre-built Python packages may not 
be available for all OS or platform versions. If you need to fall back to the 
legacy PyArrow-based implementation for any reason, there are two ways to do so:
+
+**Option 1: Set catalog option `fs.oss.impl` to `legacy`**
+
+```python
+from pypaimon import CatalogFactory
+
+catalog_options = {
+    'metastore': 'rest',
+    'uri': 'http://rest-server:8080',
+    'warehouse': 'oss://my-bucket/warehouse',
+
+    # Fallback to the legacy PyArrow S3FileSystem implementation
+    'fs.oss.impl': 'legacy',
+}
+
+catalog = CatalogFactory.create(catalog_options)
+```
+
+**Option 2: Uninstall pyjindosdk**
+
+Simply uninstalling the `pyjindosdk` package will cause PyPaimon to 
automatically fall back to the legacy implementation:
+
+```shell
+pip uninstall pyjindosdk
+```
diff --git a/paimon-python/pypaimon/common/options/config.py 
b/paimon-python/pypaimon/common/options/config.py
index 83d46c85bf..249ad810a2 100644
--- a/paimon-python/pypaimon/common/options/config.py
+++ b/paimon-python/pypaimon/common/options/config.py
@@ -18,8 +18,8 @@ from pypaimon.common.options.config_options import 
ConfigOptions
 
 
 class OssOptions:
-    OSS_IMPL = 
ConfigOptions.key("fs.oss.impl").string_type().default_value("default").with_description(
-        "OSS filesystem implementation: default or jindo")
+    OSS_IMPL = 
ConfigOptions.key("fs.oss.impl").string_type().default_value("jindo").with_description(
+        "OSS filesystem implementation: legacy or jindo")
     OSS_ACCESS_KEY_ID = 
ConfigOptions.key("fs.oss.accessKeyId").string_type().no_default_value().with_description(
         "OSS access key ID")
     OSS_ACCESS_KEY_SECRET = ConfigOptions.key(
diff --git a/paimon-python/pypaimon/filesystem/pyarrow_file_io.py 
b/paimon-python/pypaimon/filesystem/pyarrow_file_io.py
index 2f4f8fd974..db06eb6f99 100644
--- a/paimon-python/pypaimon/filesystem/pyarrow_file_io.py
+++ b/paimon-python/pypaimon/filesystem/pyarrow_file_io.py
@@ -34,7 +34,7 @@ from pypaimon.common.file_io import FileIO
 from pypaimon.common.options import Options
 from pypaimon.common.options.config import OssOptions, S3Options
 from pypaimon.common.uri_reader import UriReaderFactory
-from pypaimon.filesystem.jindo_file_system_handler import 
JindoFileSystemHandler
+from pypaimon.filesystem.jindo_file_system_handler import 
JindoFileSystemHandler, JINDO_AVAILABLE
 from pypaimon.schema.data_types import (AtomicType, DataField,
                                         PyarrowFieldParser)
 from pypaimon.table.row.blob import Blob, BlobData, BlobDescriptor
@@ -57,12 +57,24 @@ class PyArrowFileIO(FileIO):
         self.uri_reader_factory = UriReaderFactory(catalog_options)
         self._is_oss = scheme in {"oss"}
         self._oss_bucket = None
-        self._oss_impl = self.properties.get(OssOptions.OSS_IMPL)
+        _oss_impl = self.properties.get(OssOptions.OSS_IMPL)
+        self._use_jindo = False
+
         if self._is_oss:
             self._oss_bucket = self._extract_oss_bucket(path)
-            if self._oss_impl == "jindo":
+            if _oss_impl not in ("jindo", "legacy"):
+                raise ValueError(
+                    f"Unsupported fs.oss.impl value: '{_oss_impl}'. "
+                    f"Supported values are 'jindo' and 'legacy'.")
+            if _oss_impl == "legacy":
+                self.filesystem = self._initialize_oss_fs(path)
+            elif JINDO_AVAILABLE:
                 self.filesystem = self._initialize_jindo_fs(path)
             else:
+                self.logger.info(
+                    "fs.oss.impl is 'jindo' but pyjindosdk is not installed. "
+                    "Falling back to legacy PyArrow S3FileSystem 
implementation. "
+                    "Install pyjindosdk for better performance: pip install 
pyjindosdk")
                 self.filesystem = self._initialize_oss_fs(path)
         elif scheme in {"s3", "s3a", "s3n"}:
             self.filesystem = self._initialize_s3_fs()
@@ -126,6 +138,7 @@ class PyArrowFileIO(FileIO):
         self.logger.info(f"Initializing JindoFileSystem for OSS access: 
{path}")
         root_path = f"oss://{self._oss_bucket}/"
         fs_handler = JindoFileSystemHandler(root_path, self.properties)
+        self._use_jindo = True
         return pafs.PyFileSystem(fs_handler)
 
     def _initialize_oss_fs(self, path) -> FileSystem:
@@ -210,7 +223,7 @@ class PyArrowFileIO(FileIO):
     def new_output_stream(self, path: str):
         path_str = self.to_filesystem_path(path)
 
-        if self._oss_impl == "jindo":
+        if self._use_jindo:
             pass
         elif self._is_oss and not self._pyarrow_gte_7:
             # For PyArrow 6.x + OSS, path_str is already just the key part
@@ -592,7 +605,7 @@ class PyArrowFileIO(FileIO):
             path_part = normalized_path.lstrip('/')
             return f"{drive_letter}:/{path_part}" if path_part else 
f"{drive_letter}:"
 
-        if self._oss_impl == "jindo":
+        if self._use_jindo:
             # For JindoFileSystem, pass key only
             path_part = normalized_path.lstrip('/')
             return path_part if path_part else '.'
diff --git a/paimon-python/pypaimon/tests/file_io_test.py 
b/paimon-python/pypaimon/tests/file_io_test.py
index 5cc4d7a821..d39d0c6461 100644
--- a/paimon-python/pypaimon/tests/file_io_test.py
+++ b/paimon-python/pypaimon/tests/file_io_test.py
@@ -70,6 +70,7 @@ class FileIOTest(unittest.TestCase):
             OssOptions.OSS_ENDPOINT.key(): 'oss-cn-hangzhou.aliyuncs.com',
             OssOptions.OSS_ACCESS_KEY_ID.key(): 'test-key',
             OssOptions.OSS_ACCESS_KEY_SECRET.key(): 'test-secret',
+            OssOptions.OSS_IMPL.key(): 'legacy',
         }))
         got = oss_io.to_filesystem_path("oss://test-bucket/path/to/file.txt")
         self.assertEqual(got, "path/to/file.txt" if lt7 else 
"test-bucket/path/to/file.txt")
@@ -291,6 +292,7 @@ class FileIOTest(unittest.TestCase):
                 OssOptions.OSS_ENDPOINT.key(): 'oss-cn-hangzhou.aliyuncs.com',
                 OssOptions.OSS_ACCESS_KEY_ID.key(): 'test-key',
                 OssOptions.OSS_ACCESS_KEY_SECRET.key(): 'test-secret',
+                OssOptions.OSS_IMPL.key(): 'legacy',
             }))
             mock_fs = MagicMock()
             mock_fs.get_file_info.return_value = [

Reply via email to