This is an automated email from the ASF dual-hosted git repository.

raulcd pushed a commit to branch main
in repository https://gitbox.apache.org/repos/asf/arrow.git


The following commit(s) were added to refs/heads/main by this push:
     new 0124d5b5b7 GH-49078: [FS][Azure] Fix lossy pickling of 
`SubTreeFileSystem(base_path, AzureFileSystem(...))` (#49140)
0124d5b5b7 is described below

commit 0124d5b5b7f60b6ae6312bbb962dcff6dd4bc641
Author: Thomas Newton <[email protected]>
AuthorDate: Mon Mar 2 10:28:25 2026 +0000

    GH-49078: [FS][Azure] Fix lossy pickling of `SubTreeFileSystem(base_path, 
AzureFileSystem(...))` (#49140)
    
    ### Rationale for this change
    Fix https://github.com/apache/arrow/issues/49078
    
    ### What changes are included in this PR?
    - Implement getters on the C++ side of `AzureOptions`, for the values that 
are currently stored only on the python side.
      - This required adding some more member variables
      - I decided to add `ClearCredentials` , so that it can't get into strange 
states by configuring one credential type then another. IMO configuring the 
credentials during initialisation on the `AzureOptions` would be neater but I 
don't want to make this PR too big.
    - Update the C++ side `AzureOptions::Equals`
    - Remove python side `self` attributes and instead depend on getters from 
C++ side.
    
    ### Are these changes tested?
    - Updated tests on the C++ side for the updated `Equals` and newly added 
getter methods.
    - Added a fixture `pickle_with_and_without_subtree_filesystem`, which can 
be used in place of the `pickle_module`. This adds test combinations with and 
without wrapping the filesystem in a `SubTreeFilesystem` before pickling it.
    
    ### Are there any user-facing changes?
    Only that pickling `SubTreeFileSystem(base_path, AzureFileSystem(...))` now 
works properly.
    
    * GitHub Issue: #49078
    
    Authored-by: Thomas Newton <[email protected]>
    Signed-off-by: Raúl Cumplido <[email protected]>
---
 cpp/src/arrow/filesystem/azurefs.cc      | 45 +++++++++++++---
 cpp/src/arrow/filesystem/azurefs.h       | 15 ++++--
 cpp/src/arrow/filesystem/azurefs_test.cc | 92 ++++++++++++++++++++++++++++++--
 python/pyarrow/_azurefs.pyx              | 41 ++++++--------
 python/pyarrow/includes/libarrow_fs.pxd  |  5 ++
 python/pyarrow/tests/test_fs.py          | 47 +++++++++++++---
 6 files changed, 194 insertions(+), 51 deletions(-)

diff --git a/cpp/src/arrow/filesystem/azurefs.cc 
b/cpp/src/arrow/filesystem/azurefs.cc
index 7aa3e58c1d..20b0a655d8 100644
--- a/cpp/src/arrow/filesystem/azurefs.cc
+++ b/cpp/src/arrow/filesystem/azurefs.cc
@@ -248,13 +248,17 @@ Result<AzureOptions> AzureOptions::FromUri(const 
std::string& uri_string,
 }
 
 bool AzureOptions::Equals(const AzureOptions& other) const {
-  const bool equals = blob_storage_authority == other.blob_storage_authority &&
-                      dfs_storage_authority == other.dfs_storage_authority &&
-                      blob_storage_scheme == other.blob_storage_scheme &&
-                      dfs_storage_scheme == other.dfs_storage_scheme &&
-                      default_metadata == other.default_metadata &&
-                      account_name == other.account_name &&
-                      credential_kind_ == other.credential_kind_;
+  const bool equals =
+      account_name == other.account_name &&
+      blob_storage_authority == other.blob_storage_authority &&
+      dfs_storage_authority == other.dfs_storage_authority &&
+      blob_storage_scheme == other.blob_storage_scheme &&
+      dfs_storage_scheme == other.dfs_storage_scheme &&
+      default_metadata == other.default_metadata &&
+      background_writes == other.background_writes &&
+      credential_kind_ == other.credential_kind_ && account_key_ == 
other.account_key_ &&
+      sas_token_ == other.sas_token_ && tenant_id_ == other.tenant_id_ &&
+      client_id_ == other.client_id_ && client_secret_ == other.client_secret_;
   if (!equals) {
     return false;
   }
@@ -318,39 +322,59 @@ std::string AzureOptions::AccountDfsUrl(const 
std::string& account_name) const {
   return BuildBaseUrl(dfs_storage_scheme, dfs_storage_authority, account_name);
 }
 
+void AzureOptions::ClearCredentials() {
+  credential_kind_ = CredentialKind::kDefault;
+  storage_shared_key_credential_ = nullptr;
+  account_key_.clear();
+  sas_token_.clear();
+  tenant_id_.clear();
+  client_id_.clear();
+  client_secret_.clear();
+  token_credential_ = nullptr;
+}
+
 Status AzureOptions::ConfigureDefaultCredential() {
+  ClearCredentials();
   credential_kind_ = CredentialKind::kDefault;
   token_credential_ = 
std::make_shared<Azure::Identity::DefaultAzureCredential>();
   return Status::OK();
 }
 
 Status AzureOptions::ConfigureAnonymousCredential() {
+  ClearCredentials();
   credential_kind_ = CredentialKind::kAnonymous;
   return Status::OK();
 }
 
 Status AzureOptions::ConfigureAccountKeyCredential(const std::string& 
account_key) {
+  ClearCredentials();
   credential_kind_ = CredentialKind::kStorageSharedKey;
   if (account_name.empty()) {
     return Status::Invalid("AzureOptions doesn't contain a valid account 
name");
   }
+  account_key_ = account_key;
   storage_shared_key_credential_ =
       std::make_shared<Storage::StorageSharedKeyCredential>(account_name, 
account_key);
   return Status::OK();
 }
 
 Status AzureOptions::ConfigureSASCredential(const std::string& sas_token) {
-  credential_kind_ = CredentialKind::kSASToken;
+  ClearCredentials();
   if (account_name.empty()) {
     return Status::Invalid("AzureOptions doesn't contain a valid account 
name");
   }
   sas_token_ = sas_token;
+  credential_kind_ = CredentialKind::kSASToken;
   return Status::OK();
 }
 
 Status AzureOptions::ConfigureClientSecretCredential(const std::string& 
tenant_id,
                                                      const std::string& 
client_id,
                                                      const std::string& 
client_secret) {
+  ClearCredentials();
+  tenant_id_ = tenant_id;
+  client_id_ = client_id;
+  client_secret_ = client_secret;
   credential_kind_ = CredentialKind::kClientSecret;
   token_credential_ = 
std::make_shared<Azure::Identity::ClientSecretCredential>(
       tenant_id, client_id, client_secret);
@@ -358,6 +382,8 @@ Status AzureOptions::ConfigureClientSecretCredential(const 
std::string& tenant_i
 }
 
 Status AzureOptions::ConfigureManagedIdentityCredential(const std::string& 
client_id) {
+  ClearCredentials();
+  client_id_ = client_id;
   credential_kind_ = CredentialKind::kManagedIdentity;
   token_credential_ =
       std::make_shared<Azure::Identity::ManagedIdentityCredential>(client_id);
@@ -365,18 +391,21 @@ Status 
AzureOptions::ConfigureManagedIdentityCredential(const std::string& clien
 }
 
 Status AzureOptions::ConfigureCLICredential() {
+  ClearCredentials();
   credential_kind_ = CredentialKind::kCLI;
   token_credential_ = std::make_shared<Azure::Identity::AzureCliCredential>();
   return Status::OK();
 }
 
 Status AzureOptions::ConfigureWorkloadIdentityCredential() {
+  ClearCredentials();
   credential_kind_ = CredentialKind::kWorkloadIdentity;
   token_credential_ = 
std::make_shared<Azure::Identity::WorkloadIdentityCredential>();
   return Status::OK();
 }
 
 Status AzureOptions::ConfigureEnvironmentCredential() {
+  ClearCredentials();
   credential_kind_ = CredentialKind::kEnvironment;
   token_credential_ = 
std::make_shared<Azure::Identity::EnvironmentCredential>();
   return Status::OK();
diff --git a/cpp/src/arrow/filesystem/azurefs.h 
b/cpp/src/arrow/filesystem/azurefs.h
index ae374d487b..d21bcd08df 100644
--- a/cpp/src/arrow/filesystem/azurefs.h
+++ b/cpp/src/arrow/filesystem/azurefs.h
@@ -99,10 +99,6 @@ struct ARROW_EXPORT AzureOptions {
   /// Default: "https"
   std::string dfs_storage_scheme = "https";
 
-  // TODO(GH-38598): Add support for more auth methods.
-  // std::string connection_string;
-  // std::string sas_token;
-
   /// \brief Default metadata for OpenOutputStream.
   ///
   /// This will be ignored if non-empty metadata is passed to OpenOutputStream.
@@ -126,7 +122,11 @@ struct ARROW_EXPORT AzureOptions {
 
   std::shared_ptr<Azure::Storage::StorageSharedKeyCredential>
       storage_shared_key_credential_;
+  std::string account_key_;
   std::string sas_token_;
+  std::string tenant_id_;
+  std::string client_id_;
+  std::string client_secret_;
   mutable std::shared_ptr<Azure::Core::Credentials::TokenCredential> 
token_credential_;
 
  public:
@@ -136,6 +136,7 @@ struct ARROW_EXPORT AzureOptions {
  private:
   void ExtractFromUriSchemeAndHierPart(const Uri& uri, std::string* out_path);
   Status ExtractFromUriQuery(const Uri& uri);
+  void ClearCredentials();
 
  public:
   /// \brief Construct a new AzureOptions from an URI.
@@ -204,6 +205,12 @@ struct ARROW_EXPORT AzureOptions {
   std::string AccountBlobUrl(const std::string& account_name) const;
   std::string AccountDfsUrl(const std::string& account_name) const;
 
+  std::string AccountKey() const { return account_key_; }
+  std::string SasToken() const { return sas_token_; }
+  std::string TenantId() const { return tenant_id_; }
+  std::string ClientId() const { return client_id_; }
+  std::string ClientSecret() const { return client_secret_; }
+
   Result<std::unique_ptr<Azure::Storage::Blobs::BlobServiceClient>>
   MakeBlobServiceClient() const;
 
diff --git a/cpp/src/arrow/filesystem/azurefs_test.cc 
b/cpp/src/arrow/filesystem/azurefs_test.cc
index c3af6fb079..4cd4250555 100644
--- a/cpp/src/arrow/filesystem/azurefs_test.cc
+++ b/cpp/src/arrow/filesystem/azurefs_test.cc
@@ -493,6 +493,11 @@ TEST(AzureFileSystem, InitializeWithDefaultCredential) {
   AzureOptions options;
   options.account_name = "dummy-account-name";
   ARROW_EXPECT_OK(options.ConfigureDefaultCredential());
+  ASSERT_EQ(options.AccountKey(), "");
+  ASSERT_EQ(options.SasToken(), "");
+  ASSERT_EQ(options.TenantId(), "");
+  ASSERT_EQ(options.ClientId(), "");
+  ASSERT_EQ(options.ClientSecret(), "");
   EXPECT_OK_AND_ASSIGN(auto fs, AzureFileSystem::Make(options));
 }
 
@@ -509,6 +514,23 @@ TEST(AzureFileSystem, InitializeWithAnonymousCredential) {
   AzureOptions options;
   options.account_name = "dummy-account-name";
   ARROW_EXPECT_OK(options.ConfigureAnonymousCredential());
+  ASSERT_EQ(options.AccountKey(), "");
+  ASSERT_EQ(options.SasToken(), "");
+  ASSERT_EQ(options.TenantId(), "");
+  ASSERT_EQ(options.ClientId(), "");
+  ASSERT_EQ(options.ClientSecret(), "");
+  EXPECT_OK_AND_ASSIGN(auto fs, AzureFileSystem::Make(options));
+}
+
+TEST(AzureFileSystem, InitializeWithAccountKeyCredential) {
+  AzureOptions options;
+  options.account_name = "dummy-account-name";
+  ARROW_EXPECT_OK(options.ConfigureAccountKeyCredential("account_key"));
+  ASSERT_EQ(options.AccountKey(), "account_key");
+  ASSERT_EQ(options.SasToken(), "");
+  ASSERT_EQ(options.TenantId(), "");
+  ASSERT_EQ(options.ClientId(), "");
+  ASSERT_EQ(options.ClientSecret(), "");
   EXPECT_OK_AND_ASSIGN(auto fs, AzureFileSystem::Make(options));
 }
 
@@ -517,6 +539,11 @@ TEST(AzureFileSystem, 
InitializeWithClientSecretCredential) {
   options.account_name = "dummy-account-name";
   ARROW_EXPECT_OK(
       options.ConfigureClientSecretCredential("tenant_id", "client_id", 
"client_secret"));
+  ASSERT_EQ(options.AccountKey(), "");
+  ASSERT_EQ(options.SasToken(), "");
+  ASSERT_EQ(options.TenantId(), "tenant_id");
+  ASSERT_EQ(options.ClientId(), "client_id");
+  ASSERT_EQ(options.ClientSecret(), "client_secret");
   EXPECT_OK_AND_ASSIGN(auto fs, AzureFileSystem::Make(options));
 }
 
@@ -524,9 +551,19 @@ TEST(AzureFileSystem, 
InitializeWithManagedIdentityCredential) {
   AzureOptions options;
   options.account_name = "dummy-account-name";
   ARROW_EXPECT_OK(options.ConfigureManagedIdentityCredential());
+  ASSERT_EQ(options.AccountKey(), "");
+  ASSERT_EQ(options.SasToken(), "");
+  ASSERT_EQ(options.TenantId(), "");
+  ASSERT_EQ(options.ClientId(), "");
+  ASSERT_EQ(options.ClientSecret(), "");
   EXPECT_OK_AND_ASSIGN(auto fs, AzureFileSystem::Make(options));
 
   
ARROW_EXPECT_OK(options.ConfigureManagedIdentityCredential("specific-client-id"));
+  ASSERT_EQ(options.AccountKey(), "");
+  ASSERT_EQ(options.SasToken(), "");
+  ASSERT_EQ(options.TenantId(), "");
+  ASSERT_EQ(options.ClientId(), "specific-client-id");
+  ASSERT_EQ(options.ClientSecret(), "");
   EXPECT_OK_AND_ASSIGN(fs, AzureFileSystem::Make(options));
 }
 
@@ -534,6 +571,11 @@ TEST(AzureFileSystem, InitializeWithCLICredential) {
   AzureOptions options;
   options.account_name = "dummy-account-name";
   ARROW_EXPECT_OK(options.ConfigureCLICredential());
+  ASSERT_EQ(options.AccountKey(), "");
+  ASSERT_EQ(options.SasToken(), "");
+  ASSERT_EQ(options.TenantId(), "");
+  ASSERT_EQ(options.ClientId(), "");
+  ASSERT_EQ(options.ClientSecret(), "");
   EXPECT_OK_AND_ASSIGN(auto fs, AzureFileSystem::Make(options));
 }
 
@@ -541,6 +583,11 @@ TEST(AzureFileSystem, 
InitializeWithWorkloadIdentityCredential) {
   AzureOptions options;
   options.account_name = "dummy-account-name";
   ARROW_EXPECT_OK(options.ConfigureWorkloadIdentityCredential());
+  ASSERT_EQ(options.AccountKey(), "");
+  ASSERT_EQ(options.SasToken(), "");
+  ASSERT_EQ(options.TenantId(), "");
+  ASSERT_EQ(options.ClientId(), "");
+  ASSERT_EQ(options.ClientSecret(), "");
   EXPECT_OK_AND_ASSIGN(auto fs, AzureFileSystem::Make(options));
 }
 
@@ -548,12 +595,42 @@ TEST(AzureFileSystem, 
InitializeWithEnvironmentCredential) {
   AzureOptions options;
   options.account_name = "dummy-account-name";
   ARROW_EXPECT_OK(options.ConfigureEnvironmentCredential());
+  ASSERT_EQ(options.AccountKey(), "");
+  ASSERT_EQ(options.SasToken(), "");
+  ASSERT_EQ(options.TenantId(), "");
+  ASSERT_EQ(options.ClientId(), "");
+  ASSERT_EQ(options.ClientSecret(), "");
   EXPECT_OK_AND_ASSIGN(auto fs, AzureFileSystem::Make(options));
 }
 
 TEST(AzureFileSystem, OptionsCompare) {
-  AzureOptions options;
-  EXPECT_TRUE(options.Equals(options));
+  AzureOptions options0;
+  EXPECT_TRUE(options0.Equals(options0));
+
+  AzureOptions options1;
+  options1.account_name = "account_name";
+  EXPECT_FALSE(options1.Equals(options0));
+
+  AzureOptions options2;
+  options2.account_name = "account_name";
+  ASSERT_OK(options2.ConfigureAccountKeyCredential("fake_account_key"));
+  EXPECT_FALSE(options2.Equals(options1));
+
+  AzureOptions options3;
+  options3.account_name = "account_name";
+  
ASSERT_OK(options3.ConfigureAccountKeyCredential("different_fake_account_key"));
+  EXPECT_FALSE(options3.Equals(options2));
+
+  AzureOptions options4;
+  options4.account_name = "account_name";
+  ASSERT_OK(options4.ConfigureSASCredential("fake_sas_token"));
+  EXPECT_FALSE(options4.Equals(options3));
+
+  AzureOptions options5;
+  options5.account_name = "account_name";
+  ASSERT_OK(options5.ConfigureClientSecretCredential("fake_tenant_id", 
"fake_client_id",
+                                                     "fake_client_secret"));
+  EXPECT_FALSE(options5.Equals(options4));
 }
 
 class TestAzureOptions : public ::testing::Test {
@@ -1679,9 +1756,14 @@ class TestAzureFileSystem : public ::testing::Test {
                                  env->account_name(), env->account_key())));
     // AzureOptions::FromUri will not cut off extra query parameters that it 
consumes, so
     // make sure these don't cause problems.
-    ARROW_EXPECT_OK(options.ConfigureSASCredential(
-        "?blob_storage_authority=dummy_value0&" + sas_token.substr(1) +
-        "&credential_kind=dummy-value1"));
+    auto polluted_sas_token = "?blob_storage_authority=dummy_value0&" +
+                              sas_token.substr(1) + 
"&credential_kind=dummy-value1";
+    ARROW_EXPECT_OK(options.ConfigureSASCredential(polluted_sas_token));
+    ASSERT_EQ(options.AccountKey(), "");
+    ASSERT_EQ(options.SasToken(), polluted_sas_token);
+    ASSERT_EQ(options.TenantId(), "");
+    ASSERT_EQ(options.ClientId(), "");
+    ASSERT_EQ(options.ClientSecret(), "");
     EXPECT_OK_AND_ASSIGN(auto fs, AzureFileSystem::Make(options));
 
     AssertFileInfo(fs.get(), data.ObjectPath(), FileType::File);
diff --git a/python/pyarrow/_azurefs.pyx b/python/pyarrow/_azurefs.pyx
index aa6ad1d90e..deb58b0aed 100644
--- a/python/pyarrow/_azurefs.pyx
+++ b/python/pyarrow/_azurefs.pyx
@@ -28,33 +28,33 @@ cdef class AzureFileSystem(FileSystem):
     Azure Blob Storage backed FileSystem implementation
 
     This implementation supports flat namespace and hierarchical namespace 
(HNS) a.k.a.
-    Data Lake Gen2 storage accounts. HNS will be automatically detected and 
HNS specific 
-    features will be used when they provide a performance advantage. Azurite 
emulator is 
+    Data Lake Gen2 storage accounts. HNS will be automatically detected and 
HNS specific
+    features will be used when they provide a performance advantage. Azurite 
emulator is
     also supported. Note: `/` is the only supported delimiter.
 
-    The storage account is considered the root of the filesystem. When 
enabled, containers 
-    will be created or deleted during relevant directory operations. 
Obviously, this also 
-    requires authentication with the additional permissions. 
+    The storage account is considered the root of the filesystem. When 
enabled, containers
+    will be created or deleted during relevant directory operations. 
Obviously, this also
+    requires authentication with the additional permissions.
 
-    By default `DefaultAzureCredential 
<https://github.com/Azure/azure-sdk-for-cpp/blob/main/sdk/identity/azure-identity/README.md#defaultazurecredential>`__
 
+    By default `DefaultAzureCredential 
<https://github.com/Azure/azure-sdk-for-cpp/blob/main/sdk/identity/azure-identity/README.md#defaultazurecredential>`__
     is used for authentication. This means it will try several types of 
authentication
-    and go with the first one that works. If any authentication parameters are 
provided when 
+    and go with the first one that works. If any authentication parameters are 
provided when
     initialising the FileSystem, they will be used instead of the default 
credential.
 
     Parameters
     ----------
     account_name : str
-        Azure Blob Storage account name. This is the globally unique 
identifier for the 
+        Azure Blob Storage account name. This is the globally unique 
identifier for the
         storage account.
     account_key : str, default None
-        Account key of the storage account. If sas_token and account_key are 
None the 
+        Account key of the storage account. If sas_token and account_key are 
None the
         default credential will be used. The parameters account_key and 
sas_token are
         mutually exclusive.
     blob_storage_authority : str, default None
         hostname[:port] of the Blob Service. Defaults to 
`.blob.core.windows.net`. Useful
         for connecting to a local emulator, like Azurite.
     blob_storage_scheme : str, default None
-        Either `http` or `https`. Defaults to `https`. Useful for connecting 
to a local 
+        Either `http` or `https`. Defaults to `https`. Useful for connecting 
to a local
         emulator, like Azurite.
     client_id : str, default None
         The client ID (Application ID) for Azure Active Directory 
authentication.
@@ -101,11 +101,6 @@ cdef class AzureFileSystem(FileSystem):
     """
     cdef:
         CAzureFileSystem* azurefs
-        c_string account_key
-        c_string sas_token
-        c_string tenant_id
-        c_string client_id
-        c_string client_secret
 
     def __init__(self, account_name, *, account_key=None, 
blob_storage_authority=None,
                  blob_storage_scheme=None, client_id=None, client_secret=None,
@@ -133,14 +128,10 @@ cdef class AzureFileSystem(FileSystem):
                 raise ValueError("client_id must be specified")
             if not tenant_id and not client_secret:
                 options.ConfigureManagedIdentityCredential(tobytes(client_id))
-                self.client_id = tobytes(client_id)
             elif tenant_id and client_secret:
                 options.ConfigureClientSecretCredential(
                     tobytes(tenant_id), tobytes(client_id), 
tobytes(client_secret)
                 )
-                self.tenant_id = tobytes(tenant_id)
-                self.client_id = tobytes(client_id)
-                self.client_secret = tobytes(client_secret)
             else:
                 raise ValueError(
                     "Invalid Azure credential configuration: "
@@ -149,10 +140,8 @@ cdef class AzureFileSystem(FileSystem):
                 )
         elif account_key:
             options.ConfigureAccountKeyCredential(tobytes(account_key))
-            self.account_key = tobytes(account_key)
         elif sas_token:
             options.ConfigureSASCredential(tobytes(sas_token))
-            self.sas_token = tobytes(sas_token)
         else:
             options.ConfigureDefaultCredential()
 
@@ -176,13 +165,13 @@ cdef class AzureFileSystem(FileSystem):
         return (
             AzureFileSystem._reconstruct, (dict(
                 account_name=frombytes(opts.account_name),
-                account_key=frombytes(self.account_key),
+                account_key=frombytes(opts.AccountKey()),
                 blob_storage_authority=frombytes(opts.blob_storage_authority),
                 blob_storage_scheme=frombytes(opts.blob_storage_scheme),
-                client_id=frombytes(self.client_id),
-                client_secret=frombytes(self.client_secret),
+                client_id=frombytes(opts.ClientId()),
+                client_secret=frombytes(opts.ClientSecret()),
                 dfs_storage_authority=frombytes(opts.dfs_storage_authority),
                 dfs_storage_scheme=frombytes(opts.dfs_storage_scheme),
-                sas_token=frombytes(self.sas_token),
-                tenant_id=frombytes(self.tenant_id)
+                sas_token=frombytes(opts.SasToken()),
+                tenant_id=frombytes(opts.TenantId())
             ),))
diff --git a/python/pyarrow/includes/libarrow_fs.pxd 
b/python/pyarrow/includes/libarrow_fs.pxd
index af01c47c8c..d18dc2d2bd 100644
--- a/python/pyarrow/includes/libarrow_fs.pxd
+++ b/python/pyarrow/includes/libarrow_fs.pxd
@@ -259,6 +259,11 @@ cdef extern from "arrow/filesystem/api.h" namespace 
"arrow::fs" nogil:
         CStatus ConfigureClientSecretCredential(c_string tenant_id,
                                                 c_string client_id,
                                                 c_string client_secret)
+        c_string SasToken()
+        c_string AccountKey()
+        c_string TenantId()
+        c_string ClientId()
+        c_string ClientSecret()
 
     cdef cppclass CAzureFileSystem "arrow::fs::AzureFileSystem":
         @staticmethod
diff --git a/python/pyarrow/tests/test_fs.py b/python/pyarrow/tests/test_fs.py
index 376398baa0..5bf1950c06 100644
--- a/python/pyarrow/tests/test_fs.py
+++ b/python/pyarrow/tests/test_fs.py
@@ -636,7 +636,29 @@ def test_subtree_filesystem():
                                   ' base_fs=<pyarrow._fs.LocalFileSystem')
 
 
-def test_filesystem_pickling(fs, pickle_module):
+class _PickleModuleSubTreeFileSystemWrapper:
+    def __init__(self, pickle_module):
+        self.pickle_module = pickle_module
+
+    def dumps(self, obj):
+        return self.pickle_module.dumps(SubTreeFileSystem("/", obj))
+
+    def loads(self, data):
+        return self.pickle_module.loads(data).base_fs
+
+
[email protected](params=[True, False])
+def pickle_with_and_without_subtree_filesystem(pickle_module, request):
+    # When creating a SubTreeFileSystem, the python side object for the base
+    # filesystem is lost. This makes it a pickling scenario worth testing.
+    wrap_with_subtree_filesystem = request.param
+    if wrap_with_subtree_filesystem:
+        return _PickleModuleSubTreeFileSystemWrapper(pickle_module)
+    return pickle_module
+
+
+def test_filesystem_pickling(fs, pickle_with_and_without_subtree_filesystem):
+    pickle_module = pickle_with_and_without_subtree_filesystem
     if fs.type_name.split('::')[-1] == 'mock':
         pytest.xfail(reason='MockFileSystem is not serializable')
 
@@ -646,7 +668,10 @@ def test_filesystem_pickling(fs, pickle_module):
     assert restored.equals(fs)
 
 
-def test_filesystem_is_functional_after_pickling(fs, pathfn, pickle_module):
+def test_filesystem_is_functional_after_pickling(
+    fs, pathfn, pickle_with_and_without_subtree_filesystem
+):
+    pickle_module = pickle_with_and_without_subtree_filesystem
     if fs.type_name.split('::')[-1] == 'mock':
         pytest.xfail(reason='MockFileSystem is not serializable')
     skip_fsspec_s3fs(fs)
@@ -1171,7 +1196,8 @@ def test_mockfs_mtime_roundtrip(mockfs):
 
 
 @pytest.mark.gcs
-def test_gcs_options(pickle_module):
+def test_gcs_options(pickle_with_and_without_subtree_filesystem):
+    pickle_module = pickle_with_and_without_subtree_filesystem
     from pyarrow.fs import GcsFileSystem
     dt = datetime.now()
     fs = GcsFileSystem(access_token='abc',
@@ -1209,7 +1235,8 @@ def test_gcs_options(pickle_module):
 
 
 @pytest.mark.s3
-def test_s3_options(pickle_module):
+def test_s3_options(pickle_with_and_without_subtree_filesystem):
+    pickle_module = pickle_with_and_without_subtree_filesystem
     from pyarrow.fs import (AwsDefaultS3RetryStrategy,
                             AwsStandardS3RetryStrategy, S3FileSystem,
                             S3RetryStrategy)
@@ -1313,7 +1340,8 @@ def test_s3_options(pickle_module):
 
 
 @pytest.mark.s3
-def test_s3_proxy_options(monkeypatch, pickle_module):
+def test_s3_proxy_options(monkeypatch, 
pickle_with_and_without_subtree_filesystem):
+    pickle_module = pickle_with_and_without_subtree_filesystem
     from pyarrow.fs import S3FileSystem
 
     # The following two are equivalent:
@@ -1478,7 +1506,8 @@ def test_s3fs_wrong_region():
 
 
 @pytest.mark.azure
-def test_azurefs_options(pickle_module):
+def test_azurefs_options(pickle_with_and_without_subtree_filesystem):
+    pickle_module = pickle_with_and_without_subtree_filesystem
     from pyarrow.fs import AzureFileSystem
 
     fs1 = AzureFileSystem(account_name='fake-account-name')
@@ -1572,7 +1601,8 @@ def test_azurefs_options(pickle_module):
 
 
 @pytest.mark.hdfs
-def test_hdfs_options(hdfs_connection, pickle_module):
+def test_hdfs_options(hdfs_connection, 
pickle_with_and_without_subtree_filesystem):
+    pickle_module = pickle_with_and_without_subtree_filesystem
     from pyarrow.fs import HadoopFileSystem
     if not pa.have_libhdfs():
         pytest.skip('Cannot locate libhdfs')
@@ -1765,7 +1795,8 @@ def test_py_filesystem_equality():
     assert fs1 != object()
 
 
-def test_py_filesystem_pickling(pickle_module):
+def test_py_filesystem_pickling(pickle_with_and_without_subtree_filesystem):
+    pickle_module = pickle_with_and_without_subtree_filesystem
     handler = DummyHandler()
     fs = PyFileSystem(handler)
 

Reply via email to