This is an automated email from the ASF dual-hosted git repository.

stigahuang pushed a commit to branch master
in repository https://gitbox.apache.org/repos/asf/impala.git

commit e26a4057edb3ae9e1960f3aeaffe7a92271abe33
Author: Tamas Mate <[email protected]>
AuthorDate: Tue Jan 10 12:47:47 2023 +0100

    IMPALA-11834: Fix Iceberg LOAD DATA hdfsDelete JVM crash
    
    The LOAD DATA statement could crash the JVM when there were differences
    between the 'fs.defaultFS' and loaded paths. This happened because the
    hdfsFS object was initialized with the default FS instead of the correct
    ones for the paths.
    
    This commit fixes the hdfsFS object initialization.
    
    Testing:
     - Ran the existing tests locally.
    
    Change-Id: Ifb8f6ebf5b7100e69c1b02137d03fe70c331c30f
    Reviewed-on: http://gerrit.cloudera.org:8080/19410
    Reviewed-by: Impala Public Jenkins <[email protected]>
    Tested-by: Impala Public Jenkins <[email protected]>
---
 be/src/service/client-request-state.cc | 20 ++++++++++++++------
 be/src/service/client-request-state.h  |  1 +
 2 files changed, 15 insertions(+), 6 deletions(-)

diff --git a/be/src/service/client-request-state.cc 
b/be/src/service/client-request-state.cc
index a641600b9..13c9bef63 100644
--- a/be/src/service/client-request-state.cc
+++ b/be/src/service/client-request-state.cc
@@ -870,18 +870,26 @@ void 
ClientRequestState::ExecLoadIcebergDataRequestImpl(TLoadDataResp response)
   RETURN_VOID_IF_ERROR(child_query_executor_->ExecAsync(move(child_queries)));
   vector<ChildQuery*>* completed_queries = new vector<ChildQuery*>();
   Status query_status = child_query_executor_->WaitForAll(completed_queries);
-  hdfsFS fs = hdfsConnect("default", 0);
   if (query_status.ok()) {
-    if (hdfsDelete(fs, response.create_location.c_str(), 1)) {
-      Status hdfs_ret("Failed to remove staging data under '" + 
response.create_location
+    const char* path = response.create_location.c_str();
+    hdfsFS hdfs_conn;
+    Status hdfs_ret = HdfsFsCache::instance()->GetConnection(path, &hdfs_conn);
+    if (!hdfs_ret.ok() || hdfsDelete(hdfs_conn, path, 1)) {
+      hdfs_ret = Status("Failed to remove staging data under '" + 
response.create_location
           + "' after query failure: " + query_status.msg().msg());
       lock_guard<mutex> l(lock_);
       RETURN_VOID_IF_ERROR(UpdateQueryStatus(hdfs_ret));
     }
   } else {
-    for (string path : response.loaded_files) {
-      if (hdfsMove(fs, path.c_str(), fs, load_data_req.source_path.c_str())) {
-        Status hdfs_ret("Failed to revert data movement, some files might 
still be in '"
+    const char* dst_path = load_data_req.source_path.c_str();
+    hdfsFS hdfs_dst_conn;
+    Status hdfs_ret = HdfsFsCache::instance()->GetConnection(dst_path, 
&hdfs_dst_conn);
+    for (string src_path : response.loaded_files) {
+      hdfsFS hdfs_src_conn;
+      hdfs_ret = Status(HdfsFsCache::instance()->GetConnection(dst_path, 
&hdfs_src_conn));
+      if (!hdfs_ret.ok() || hdfsMove(hdfs_src_conn, src_path.c_str(), 
hdfs_dst_conn,
+          dst_path)) {
+        hdfs_ret = Status("Failed to revert data movement, some files might 
still be in '"
             + response.create_location + "' after query failure: "
             + query_status.msg().msg());
         lock_guard<mutex> l(lock_);
diff --git a/be/src/service/client-request-state.h 
b/be/src/service/client-request-state.h
index c0458c72b..4a98b9cef 100644
--- a/be/src/service/client-request-state.h
+++ b/be/src/service/client-request-state.h
@@ -24,6 +24,7 @@
 #include "service/child-query.h"
 #include "service/impala-server.h"
 #include "service/query-result-set.h"
+#include "runtime/hdfs-fs-cache.h"
 #include "util/condition-variable.h"
 #include "util/runtime-profile.h"
 #include "gen-cpp/Frontend_types.h"

Reply via email to