This is an automated email from the ASF dual-hosted git repository.

csringhofer pushed a commit to branch master
in repository https://gitbox.apache.org/repos/asf/impala.git

commit 30ccfce590903ab64af1ca10b9d42fe3f665aac3
Author: Csaba Ringhofer <[email protected]>
AuthorDate: Tue Sep 10 18:19:53 2024 +0200

    IMPALA-13371: Avoid throwing exception in FindFileInPath()
    
    Some boost::filesystem:: functions can throw exceptions, which led to
    crash without informative error when FileSystemUtil::FindFileInPath()
    bumped into a problematic path in JavaAddJammAgent().
    Fixed by switching to overloads of the functions that can't throw
    exception (other functions in FileSystemUtil already used these).
    
    Testing:
    - Added tests with paths where Impala has no permissions (/root)
    
    Change-Id: I6ed9f288ac5c400778a6b1215e16baf191bf5d0c
    Reviewed-on: http://gerrit.cloudera.org:8080/21778
    Reviewed-by: Impala Public Jenkins <[email protected]>
    Tested-by: Impala Public Jenkins <[email protected]>
---
 be/src/common/init.cc               |  6 ++++-
 be/src/util/filesystem-util-test.cc | 36 +++++++++++++++-----------
 be/src/util/filesystem-util.cc      | 50 ++++++++++++++++++++++++++-----------
 be/src/util/filesystem-util.h       |  3 ++-
 4 files changed, 65 insertions(+), 30 deletions(-)

diff --git a/be/src/common/init.cc b/be/src/common/init.cc
index 0eb085e39..2cf048711 100644
--- a/be/src/common/init.cc
+++ b/be/src/common/init.cc
@@ -352,7 +352,11 @@ static Status JavaAddJammAgent() {
   istringstream classpath {getenv("CLASSPATH")};
   string jamm_path, test_path;
   while (getline(classpath, test_path, ':')) {
-    jamm_path = FileSystemUtil::FindFileInPath(test_path, "jamm-.*.jar");
+    Status status = FileSystemUtil::FindFileInPath(test_path, "jamm-.*.jar", 
&jamm_path);
+    // Error during FindFileInPath is not fatal if jamm path is found in 
another path.
+    if (!status.ok()) {
+      LOG(ERROR) << "Error when processing class path: " << status.msg().msg();
+    }
     if (!jamm_path.empty()) break;
   }
   if (jamm_path.empty()) {
diff --git a/be/src/util/filesystem-util-test.cc 
b/be/src/util/filesystem-util-test.cc
index 57d7d87e7..6d21af4ac 100644
--- a/be/src/util/filesystem-util-test.cc
+++ b/be/src/util/filesystem-util-test.cc
@@ -137,39 +137,47 @@ TEST(FilesystemUtil, FindFileInPath) {
   path file3 = subdir1 / "a_file3";
   ASSERT_OK(FileSystemUtil::CreateFile(file3.string()));
 
-  string path = FileSystemUtil::FindFileInPath(dir.string(), "a.*1");
+  // Impala has no permission for /root so these will lead to errors in 
FindFileInPath().
+  path root_path1 = "/root/"; // permission error in 
filesystem::directory_iterator()
+  path root_path2 = "/root/a"; // permission error in filesystem::exists()
+
+  string path;
+  ASSERT_OK(FileSystemUtil::FindFileInPath(dir.string(), "a.*1", &path));
   EXPECT_EQ(file1.string(), path);
-  path = FileSystemUtil::FindFileInPath(dir.string(), "a.*2");
+  ASSERT_OK(FileSystemUtil::FindFileInPath(dir.string(), "a.*2", &path));
   EXPECT_EQ("", path);
-  path = FileSystemUtil::FindFileInPath(dir.string(), "a.*3");
+  ASSERT_OK(FileSystemUtil::FindFileInPath(dir.string(), "a.*3", &path));
   EXPECT_EQ("", path);
-  path = FileSystemUtil::FindFileInPath(dir.string(), "");
+  ASSERT_OK(FileSystemUtil::FindFileInPath(dir.string(), "", &path));
   EXPECT_EQ("", path);
 
-  path = FileSystemUtil::FindFileInPath(dir.string() + "/*", "a_file1");
+  ASSERT_OK(FileSystemUtil::FindFileInPath(dir.string() + "/*", "a_file1", 
&path));
   EXPECT_EQ(file1.string(), path);
-  path = FileSystemUtil::FindFileInPath(dir.string(), "a_file2");
+  ASSERT_OK(FileSystemUtil::FindFileInPath(dir.string(), "a_file2", &path));
   EXPECT_EQ("", path);
 
-  path = FileSystemUtil::FindFileInPath(dir.string(), "impala1");
+  ASSERT_OK(FileSystemUtil::FindFileInPath(dir.string(), "impala1", &path));
   EXPECT_EQ(subdir1.string(), path);
-  path = FileSystemUtil::FindFileInPath(dir.string(), "impala2");
+  ASSERT_OK(FileSystemUtil::FindFileInPath(dir.string(), "impala2", &path));
   EXPECT_EQ("", path);
 
-  path = FileSystemUtil::FindFileInPath(subdir1.string(), "a.*3");
+  ASSERT_OK(FileSystemUtil::FindFileInPath(subdir1.string(), "a.*3", &path));
   EXPECT_EQ(file3.string(), path);
-  path = FileSystemUtil::FindFileInPath(subdir1.string(), "anything");
+  ASSERT_OK(FileSystemUtil::FindFileInPath(subdir1.string(), "anything", 
&path));
   EXPECT_EQ("", path);
-  path = FileSystemUtil::FindFileInPath(subdir2.string(), ".*");
+  ASSERT_OK(FileSystemUtil::FindFileInPath(subdir2.string(), ".*", &path));
   EXPECT_EQ("", path);
 
-  path = FileSystemUtil::FindFileInPath(file1.string(), "a.*1");
+  ASSERT_OK(FileSystemUtil::FindFileInPath(file1.string(), "a.*1", &path));
   EXPECT_EQ(file1.string(), path);
-  path = FileSystemUtil::FindFileInPath(file1.string(), "a.*2");
+  ASSERT_OK(FileSystemUtil::FindFileInPath(file1.string(), "a.*2", &path));
   EXPECT_EQ("", path);
-  path = FileSystemUtil::FindFileInPath(file2.string(), "a.*2");
+  ASSERT_OK(FileSystemUtil::FindFileInPath(file2.string(), "a.*2", &path));
   EXPECT_EQ("", path);
 
+  EXPECT_FALSE(FileSystemUtil::FindFileInPath(root_path1.string(), "a", 
&path).ok());
+  EXPECT_FALSE(FileSystemUtil::FindFileInPath(root_path2.string(), "a", 
&path).ok());
+
   // Cleanup
   filesystem::remove_all(dir);
 }
diff --git a/be/src/util/filesystem-util.cc b/be/src/util/filesystem-util.cc
index a46e431ac..2975f07af 100644
--- a/be/src/util/filesystem-util.cc
+++ b/be/src/util/filesystem-util.cc
@@ -139,24 +139,31 @@ Status FileSystemUtil::CreateFile(const string& 
file_path) {
   return Status::OK();
 }
 
-Status FileSystemUtil::VerifyIsDirectory(const string& directory_path) {
+Status IsDirectory(const string& path, bool* is_dir) {
   error_code errcode;
-  bool exists = filesystem::exists(directory_path, errcode);
+  bool exists = filesystem::exists(path, errcode);
   if (errcode != errc::success) {
     return Status(ErrorMsg(TErrorCode::RUNTIME_ERROR, Substitute(
         "Encountered exception while verifying existence of directory path $0: 
$1",
-        directory_path, errcode.message())));
+        path, errcode.message())));
   }
   if (!exists) {
     return Status(ErrorMsg(TErrorCode::RUNTIME_ERROR, Substitute(
-        "Directory path $0 does not exist", directory_path)));
+        "Directory path $0 does not exist", path)));
   }
-  bool is_dir = filesystem::is_directory(directory_path, errcode);
+  DCHECK(is_dir != nullptr);
+  *is_dir = filesystem::is_directory(path, errcode);
   if (errcode != errc::success) {
     return Status(ErrorMsg(TErrorCode::RUNTIME_ERROR, Substitute(
         "Encountered exception while verifying existence of directory path $0: 
$1",
-        directory_path, errcode.message())));
+        path, errcode.message())));
   }
+  return Status::OK();
+}
+
+Status FileSystemUtil::VerifyIsDirectory(const string& directory_path) {
+  bool is_dir = false;
+  RETURN_IF_ERROR(IsDirectory(directory_path, &is_dir));
   if (!is_dir) {
     return Status(ErrorMsg(TErrorCode::RUNTIME_ERROR, Substitute(
         "Path $0 is not a directory", directory_path)));
@@ -264,23 +271,38 @@ bool FileSystemUtil::GetRelativePath(const string& path, 
const string& start,
   return false;
 }
 
-std::string FileSystemUtil::FindFileInPath(string path, const std::string& 
regex) {
+Status FileSystemUtil::FindFileInPath(string path, const std::string& regex,
+    std::string* result) {
   // Wildcard at the end matches any file, so trim and treat as a directory.
   boost::algorithm::trim_right_if(path, boost::algorithm::is_any_of("*"));
-  if (path.empty()) return "";
-  if (!filesystem::exists(path)) return "";
+  DCHECK(result != nullptr);
+  *result = "";
+  if (path.empty()) return Status::OK();
+  bool exists = false;
+  RETURN_IF_ERROR(PathExists(path, &exists));
+  if (!exists) return Status::OK();
+  bool is_dir = false;
+  RETURN_IF_ERROR(IsDirectory(path, &is_dir));
   std::regex pattern{regex};
-  if (filesystem::is_directory(path)) {
-    for (filesystem::directory_entry& child : 
filesystem::directory_iterator(path)) {
+  if (is_dir) {
+    error_code errcode;
+    for (filesystem::directory_entry& child :
+        filesystem::directory_iterator(path, errcode)) {
       // If child matches pattern, use it
       if (std::regex_match(child.path().filename().string(), pattern)) {
-        return child.path().string();
+        *result = child.path().string();
+        break;
       }
     }
+    if (errcode != errc::success) {
+      return Status(ErrorMsg(TErrorCode::RUNTIME_ERROR, Substitute(
+          "Encountered exception while iterating over directory path $0: $1",
+          path, errcode.message())));
+    }
   } else if (std::regex_match(filesystem::path(path).filename().string(), 
pattern)) {
-    return path;
+    *result = path;
   }
-  return "";
+  return Status::OK();
 }
 
 FileSystemUtil::Directory::Directory(const string& path)
diff --git a/be/src/util/filesystem-util.h b/be/src/util/filesystem-util.h
index 916cb5ec2..819369872 100644
--- a/be/src/util/filesystem-util.h
+++ b/be/src/util/filesystem-util.h
@@ -95,7 +95,8 @@ class FileSystemUtil {
   /// any files in that directory.
   /// If 'path' ends with a wildcard - as in "/lib/*" - it will treat path as 
a directory
   /// excluding the wildcard.
-  static std::string FindFileInPath(string path, const std::string& regex);
+  static Status FindFileInPath(string path, const std::string& regex,
+      std::string* result);
 
   /// Ext filesystem on certain kernel versions may result in inconsistent 
metadata after
   /// punching holes in files. The filesystem may require fsck repair on next 
reboot.

Reply via email to