This is an automated email from the ASF dual-hosted git repository.

dataroaring pushed a commit to branch master
in repository https://gitbox.apache.org/repos/asf/doris.git


The following commit(s) were added to refs/heads/master by this push:
     new 900ca7bd6e8 [fix](cloud) Always use absolute path in hdfs (#33259)
900ca7bd6e8 is described below

commit 900ca7bd6e89b1a6a3f7a008c71f95a832d42429
Author: plat1ko <platonekos...@gmail.com>
AuthorDate: Fri Apr 5 08:52:06 2024 +0800

    [fix](cloud) Always use absolute path in hdfs (#33259)
---
 be/src/io/hdfs_util.cpp              | 23 +++++++++++++----------
 cloud/src/recycler/hdfs_accessor.cpp | 20 +++-----------------
 2 files changed, 16 insertions(+), 27 deletions(-)

diff --git a/be/src/io/hdfs_util.cpp b/be/src/io/hdfs_util.cpp
index 4ee5e8da29b..98e2e223c70 100644
--- a/be/src/io/hdfs_util.cpp
+++ b/be/src/io/hdfs_util.cpp
@@ -130,17 +130,20 @@ Status HdfsHandlerCache::get_connection(const 
THdfsParams& hdfs_params, const st
 }
 
 Path convert_path(const Path& path, const std::string& namenode) {
-    Path real_path(path);
-    if (path.string().find(namenode) != std::string::npos) {
-        std::string real_path_str = path.string().substr(namenode.size());
-        if (!real_path_str.starts_with("/")) {
-            // The real path must starts with "/"
-            // Or the hadoop client will add a prefix like "/user/hadoop".
-            real_path_str = "/" + real_path_str;
-        }
-        real_path = real_path_str;
+    std::string fs_path;
+    if (path.native().find(namenode) != std::string::npos) {
+        // `path` is uri format, remove the namenode part in `path`
+        // FIXME(plat1ko): Not robust if `namenode` doesn't appear at the 
beginning of `path`
+        fs_path = path.native().substr(namenode.size());
+    } else {
+        fs_path = path;
+    }
+
+    // Always use absolute path (start with '/') in hdfs
+    if (fs_path.empty() || fs_path[0] != '/') {
+        fs_path.insert(fs_path.begin(), '/');
     }
-    return real_path;
+    return fs_path;
 }
 
 bool is_hdfs(const std::string& path_or_fs) {
diff --git a/cloud/src/recycler/hdfs_accessor.cpp 
b/cloud/src/recycler/hdfs_accessor.cpp
index 70197581601..2ec85621742 100644
--- a/cloud/src/recycler/hdfs_accessor.cpp
+++ b/cloud/src/recycler/hdfs_accessor.cpp
@@ -27,21 +27,6 @@
 namespace doris::cloud {
 namespace {
 
-// Removes any leading, and trailing `c`
-void strip(std::string& str, char c) {
-    if (!str.empty()) {
-        size_t start = str.find_first_not_of(c);
-        if (start == std::string::npos) {
-            str = "";
-        } else {
-            size_t end = str.find_last_not_of(c);
-            if (start > 0 || end < str.size() - 1) {
-                str = str.substr(start, end - start + 1);
-            }
-        }
-    }
-}
-
 std::string hdfs_error() {
 #ifdef USE_HADOOP_HDFS
     const char* err_msg = hdfsGetLastExceptionRootCause();
@@ -173,8 +158,9 @@ private:
 };
 
 HdfsAccessor::HdfsAccessor(const HdfsVaultInfo& info) : info_(info), 
prefix_(info.prefix()) {
-    strip(prefix_, '/');
-    prefix_ = "/" + prefix_;
+    if (!prefix_.empty() && prefix_[0] != '/') {
+        prefix_.insert(prefix_.begin(), '/');
+    }
     uri_ = info.build_conf().fs_name() + prefix_;
 }
 


---------------------------------------------------------------------
To unsubscribe, e-mail: commits-unsubscr...@doris.apache.org
For additional commands, e-mail: commits-h...@doris.apache.org

Reply via email to