This is an automated email from the ASF dual-hosted git repository. stigahuang pushed a commit to branch master in repository https://gitbox.apache.org/repos/asf/impala.git
commit e26a4057edb3ae9e1960f3aeaffe7a92271abe33 Author: Tamas Mate <[email protected]> AuthorDate: Tue Jan 10 12:47:47 2023 +0100 IMPALA-11834: Fix Iceberg LOAD DATA hdfsDelete JVM crash The LOAD DATA statement could crash the JVM when there were differences between the 'fs.defaultFS' and loaded paths. This happened because the hdfsFS object was initialized with the default FS instead of the correct ones for the paths. This commit fixes the hdfsFS object initialization. Testing: - Ran the existing tests locally. Change-Id: Ifb8f6ebf5b7100e69c1b02137d03fe70c331c30f Reviewed-on: http://gerrit.cloudera.org:8080/19410 Reviewed-by: Impala Public Jenkins <[email protected]> Tested-by: Impala Public Jenkins <[email protected]> --- be/src/service/client-request-state.cc | 20 ++++++++++++++------ be/src/service/client-request-state.h | 1 + 2 files changed, 15 insertions(+), 6 deletions(-) diff --git a/be/src/service/client-request-state.cc b/be/src/service/client-request-state.cc index a641600b9..13c9bef63 100644 --- a/be/src/service/client-request-state.cc +++ b/be/src/service/client-request-state.cc @@ -870,18 +870,26 @@ void ClientRequestState::ExecLoadIcebergDataRequestImpl(TLoadDataResp response) RETURN_VOID_IF_ERROR(child_query_executor_->ExecAsync(move(child_queries))); vector<ChildQuery*>* completed_queries = new vector<ChildQuery*>(); Status query_status = child_query_executor_->WaitForAll(completed_queries); - hdfsFS fs = hdfsConnect("default", 0); if (query_status.ok()) { - if (hdfsDelete(fs, response.create_location.c_str(), 1)) { - Status hdfs_ret("Failed to remove staging data under '" + response.create_location + const char* path = response.create_location.c_str(); + hdfsFS hdfs_conn; + Status hdfs_ret = HdfsFsCache::instance()->GetConnection(path, &hdfs_conn); + if (!hdfs_ret.ok() || hdfsDelete(hdfs_conn, path, 1)) { + hdfs_ret = Status("Failed to remove staging data under '" + response.create_location + "' after query failure: " + query_status.msg().msg()); lock_guard<mutex> l(lock_); RETURN_VOID_IF_ERROR(UpdateQueryStatus(hdfs_ret)); } } else { - for (string path : response.loaded_files) { - if (hdfsMove(fs, path.c_str(), fs, load_data_req.source_path.c_str())) { - Status hdfs_ret("Failed to revert data movement, some files might still be in '" + const char* dst_path = load_data_req.source_path.c_str(); + hdfsFS hdfs_dst_conn; + Status hdfs_ret = HdfsFsCache::instance()->GetConnection(dst_path, &hdfs_dst_conn); + for (string src_path : response.loaded_files) { + hdfsFS hdfs_src_conn; + hdfs_ret = Status(HdfsFsCache::instance()->GetConnection(dst_path, &hdfs_src_conn)); + if (!hdfs_ret.ok() || hdfsMove(hdfs_src_conn, src_path.c_str(), hdfs_dst_conn, + dst_path)) { + hdfs_ret = Status("Failed to revert data movement, some files might still be in '" + response.create_location + "' after query failure: " + query_status.msg().msg()); lock_guard<mutex> l(lock_); diff --git a/be/src/service/client-request-state.h b/be/src/service/client-request-state.h index c0458c72b..4a98b9cef 100644 --- a/be/src/service/client-request-state.h +++ b/be/src/service/client-request-state.h @@ -24,6 +24,7 @@ #include "service/child-query.h" #include "service/impala-server.h" #include "service/query-result-set.h" +#include "runtime/hdfs-fs-cache.h" #include "util/condition-variable.h" #include "util/runtime-profile.h" #include "gen-cpp/Frontend_types.h"
