This is an automated email from the ASF dual-hosted git repository.

dockerzhang pushed a commit to branch master
in repository https://gitbox.apache.org/repos/asf/inlong.git


The following commit(s) were added to refs/heads/master by this push:
     new 06c7b829f1 [INLONG-9102][Agent] Add file utils (#9103)
06c7b829f1 is described below

commit 06c7b829f170698e65c7ffd822b4db9c4078aef7
Author: justinwwhuang <hww_jus...@163.com>
AuthorDate: Wed Oct 25 03:38:44 2023 -0500

    [INLONG-9102][Agent] Add file utils (#9103)
---
 .../utils/file/DirNameRegexMatchPredicate.java     |  46 ++++
 .../apache/inlong/agent/utils/file/FileFinder.java | 296 +++++++++++++++++++++
 .../agent/utils/file/FileFinderIterator.java       | 162 +++++++++++
 .../utils/file/FileNameRegexMatchPredicate.java    |  43 +++
 .../apache/inlong/agent/utils/file/FileUtils.java  |  76 ++++++
 5 files changed, 623 insertions(+)

diff --git 
a/inlong-agent/agent-common/src/main/java/org/apache/inlong/agent/utils/file/DirNameRegexMatchPredicate.java
 
b/inlong-agent/agent-common/src/main/java/org/apache/inlong/agent/utils/file/DirNameRegexMatchPredicate.java
new file mode 100644
index 0000000000..cd22a730b7
--- /dev/null
+++ 
b/inlong-agent/agent-common/src/main/java/org/apache/inlong/agent/utils/file/DirNameRegexMatchPredicate.java
@@ -0,0 +1,46 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one or more
+ * contributor license agreements. See the NOTICE file distributed with
+ * this work for additional information regarding copyright ownership.
+ * The ASF licenses this file to You under the Apache License, Version 2.0
+ * (the "License"); you may not use this file except in compliance with
+ * the License. You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+package org.apache.inlong.agent.utils.file;
+
+import com.google.common.base.Predicate;
+
+import java.io.File;
+import java.util.regex.Matcher;
+import java.util.regex.Pattern;
+
+public class DirNameRegexMatchPredicate implements Predicate<File> {
+
+    private Pattern pattern;
+
+    public DirNameRegexMatchPredicate(String regex, boolean caseSensitive) {
+        if (regex == null || regex.isEmpty()) {
+            regex = ".*";
+        }
+        if (!caseSensitive) {
+            pattern = Pattern.compile(regex, Pattern.CASE_INSENSITIVE);
+        } else {
+            pattern = Pattern.compile(regex);
+        }
+    }
+
+    public boolean apply(File input) {
+        Matcher matcher = pattern.matcher(input.getAbsolutePath());
+        return matcher.matches() || matcher.lookingAt();
+    }
+
+}
diff --git 
a/inlong-agent/agent-common/src/main/java/org/apache/inlong/agent/utils/file/FileFinder.java
 
b/inlong-agent/agent-common/src/main/java/org/apache/inlong/agent/utils/file/FileFinder.java
new file mode 100644
index 0000000000..c8addc0d78
--- /dev/null
+++ 
b/inlong-agent/agent-common/src/main/java/org/apache/inlong/agent/utils/file/FileFinder.java
@@ -0,0 +1,296 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one or more
+ * contributor license agreements. See the NOTICE file distributed with
+ * this work for additional information regarding copyright ownership.
+ * The ASF licenses this file to You under the Apache License, Version 2.0
+ * (the "License"); you may not use this file except in compliance with
+ * the License. You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+package org.apache.inlong.agent.utils.file;
+
+import com.google.common.base.Predicate;
+import com.google.common.base.Predicates;
+import com.google.common.collect.Lists;
+
+import java.io.File;
+import java.util.Iterator;
+import java.util.List;
+
+/**
+ * The {@link FileFinder} is used to configure the options for the search. 
This is done by following
+ * the builder pattern. Several methods can be used to influence the search.
+ * <p/>
+ * To define if you want only files, only directories or both in your result 
you can use the {@link
+ * #yieldFiles()}, {@link #yieldDirectories()} or {@link 
#yieldFilesAndDirectories()} methods.
+ * <p/>
+ * Without any options only the current directory is searched. The {@link 
#recursive()} and {@link
+ * #recursive(Predicate)} methods allow you to enable recursive searching.
+ * <p/>
+ * You can search for specific filter by using {@link #withName(String)}, 
{@link
+ * #withExtension(String)} or the custom {@link #withFileFilter(Predicate)} 
methods. The string
+ * comparison of filenames can be fine tuned with the {@link #caseSensitive()} 
and {@link
+ * #ignoreCase()} methods.
+ * <p/>
+ * All these methods can be chained together with the following limitations.
+ * <ul>
+ * <li>{@link #caseSensitive()} and {@link #ignoreCase()} only affect the
+ * following filter definitions.
+ * <li>Only one yield* makes sense. If you use multiple the last one wins.
+ * </ul>
+ * To finally execute the search you have two options.
+ * <ul>
+ * <li>Use the {@link #list()} method to execute the search in one go and get
+ * all the results in a {@link List<File>}.
+ * <li>Call {@link #iterator()} and get the results piece by piece. Since the
+ * {@link FileFinder} implements {@link Iterable} it can even be used in a for
+ * each loop.
+ * </ul>
+ * <p/>
+ * Examples: <code><pre>
+ * // Iterate over all files in the windows directory
+ * for (File f : Files.find("c:\\windows")) { ... }
+ * <p/>
+ * // Get all the files in a directory as a list of files.
+ * List<File> allFiles = Files.find(somedir).list();
+ * <p/>
+ * // Skip all .svn directories within a source tree
+ * Predicate<File> noSvnDirs = new Predicate<File>() {
+ *   boolean apply(File file) {
+ *     return !file.getName().equals(".svn");
+ *   }
+ * }
+ * for (File f : Files.find("src/java/").recursive(noSvnDir)) { ... }
+ * </code></pre>
+ *
+ */
+public class FileFinder implements Iterable<File> {
+
+    /**
+     * Predicate that returns true, when a {@link File}-object points actually 
to a file.
+     */
+    private final static Predicate<File> isFile = new Predicate<File>() {
+
+        public boolean apply(File input) {
+            return input.isFile();
+        }
+
+        ;
+    };
+
+    /**
+     * Predicate that returns true, when a {@link File}-object points to a 
directory.
+     */
+    private final static Predicate<File> isDirectory = new Predicate<File>() {
+
+        public boolean apply(File input) {
+            return input.isDirectory();
+        }
+
+        ;
+    };
+
+    /**
+     * The base directory which will be used for the search
+     */
+    private final File baseDir;
+
+    /**
+     * A filter which determines which type of files will be returned (files, 
directories or both).
+     */
+    private Predicate<File> yieldFilter = isFile;
+
+    /**
+     * A {@link Predicate}-filter which determines which will be processed 
recursively.
+     */
+    private Predicate<File> branchFilter = Predicates.alwaysFalse();
+
+    /**
+     * A filter that can be used to filter specific files.
+     */
+    private Predicate<File> fileFilter = Predicates.alwaysTrue();
+
+    private Predicate<File> dirFilter = Predicates.alwaysTrue();
+
+    /**
+     * A boolean option that that defines if {@link String} comparisons are 
made case sensitive or
+     * insensitive, e.g. for filenames and extensions. The default are case 
sensitive comparisons.
+     */
+    private boolean caseSensitive = true;
+
+    private int maxDepth = 1;
+
+    /**
+     * Creates a new {@link FileFinder} object for a given base directory.
+     *
+     * @param baseDir The base directory where the search starts.
+     */
+    public FileFinder(File baseDir) {
+        this.baseDir = baseDir;
+    }
+
+    public FileFinder withDepth(int depth) {
+        maxDepth = depth;
+        return this;
+    }
+
+    /**
+     * Returns the result of the search as a list.
+     *
+     * @return A list with the files that where found.
+     */
+    public List<File> list() {
+        return Lists.newArrayList(iterator());
+    }
+
+    /**
+     * Creates an Iterator that can be used to iterate through the results of 
the search. Note: This
+     * actually works iterative, i.e. the recursion happens as you fetch files 
from the iterator.
+     * The result is not fetched into a huge list.
+     *
+     * @return An {@link Iterator<File>} that retrieves the results bit by bit.
+     * @see Iterable#iterator()
+     */
+    public Iterator<File> iterator() {
+        return new FileFinderIterator(baseDir, yieldFilter, branchFilter,
+                fileFilter, dirFilter, maxDepth);
+    }
+
+    /**
+     * Configures the {@link FileFinder} to return files.
+     *
+     * @return The current {@link FileFinder} to perform method chaining.
+     */
+    public FileFinder yieldFiles() {
+        yieldFilter = isFile;
+        return this;
+    }
+
+    /**
+     * Configures the {@link FileFinder} to return directories.
+     *
+     * @return The current {@link FileFinder} to perform method chaining.
+     */
+    public FileFinder yieldDirectories() {
+        yieldFilter = isDirectory;
+        return this;
+    }
+
+    /**
+     * Configures the {@link FileFinder} to return files and directories.
+     *
+     * @return The current {@link FileFinder} to perform method chaining.
+     */
+    public FileFinder yieldFilesAndDirectories() {
+        yieldFilter = Predicates.or(isFile, isDirectory);
+        return this;
+    }
+
+    /**
+     * Configures the {@link FileFinder} to use case sensitive comparisons for 
filenames.
+     *
+     * @return The current {@link FileFinder} to perform method chaining.
+     */
+    public FileFinder caseSensitive() {
+        caseSensitive = true;
+        return this;
+    }
+
+    /**
+     * Configures the {@link FileFinder} to ignore the case for comparisons of 
filenames.
+     *
+     * @return The current {@link FileFinder} to perform method chaining.
+     */
+    public FileFinder ignoreCase() {
+        caseSensitive = false;
+        return this;
+    }
+
+    public FileFinder withFileNameRegex(final String name) {
+        return withFileFilter(new FileNameRegexMatchPredicate(name, 
caseSensitive));
+    }
+
+    public FileFinder withDirNameRegex(final String name) {
+        return withDirFilter(new DirNameRegexMatchPredicate(name, 
caseSensitive));
+    }
+
+    /**
+     * Enables a recursive search that processes all sub directories 
recursively with a depth-first
+     * search.
+     *
+     * @return The current {@link FileFinder} to perform method chaining.
+     */
+    public FileFinder recursive() {
+        branchFilter = Predicates.alwaysTrue();
+        return this;
+    }
+
+    /**
+     * Enables a recursive search that processes sub directories that match 
the given {@link
+     * Predicate} recursively with a depth-first search.
+     *
+     * @param branchFilter The {@link Predicate<File>} that returns true for 
all directories
+     *         that should be used in the search.
+     * @return The current {@link FileFinder} to perform method chaining.
+     */
+    public FileFinder recursive(Predicate<File> branchFilter) {
+        this.branchFilter = branchFilter;
+        return this;
+    }
+
+    /**
+     * Accepts a custom filter to search for specific files or directories. 
All files that match the
+     * {@link Predicate} (and match the yield* file type) will be returned in 
the result.
+     * <p/>
+     * Multiple calls of {@link #withFileFilter(Predicate)} as well as other 
filter methods like
+     * {@link #withName(String)} will be combined with an AND condition, i.e. 
all filters have to
+     * match.
+     *
+     * @param filter The {@link Predicate} that should be used to filter files.
+     * @return The current {@link FileFinder} to perform method chaining.
+     */
+    public FileFinder withFileFilter(Predicate<File> filter) {
+        this.fileFilter = Predicates.and(fileFilter, filter);
+        return this;
+    }
+
+    public FileFinder withDirFilter(Predicate<File> filter) {
+        this.dirFilter = Predicates.and(dirFilter, filter);
+        return this;
+    }
+
+    public FileFinder containingFile(final Predicate<File> fileInDir) {
+        return withFileFilter(new Predicate<File>() {
+
+            public boolean apply(File directory) {
+                return directoryContainsFile(directory, fileInDir);
+            }
+        });
+    }
+
+    public FileFinder contains(byte[] bytes) {
+        throw new IllegalArgumentException();
+    }
+
+    private boolean directoryContainsFile(File directory,
+            final Predicate<File> containgFileFilter) {
+        if (directory.isDirectory()) {
+            File[] allFiles = directory.listFiles();
+            for (File file : allFiles) {
+                if (containgFileFilter.apply(file)) {
+                    return true;
+                }
+            }
+        }
+        return false;
+    }
+
+}
\ No newline at end of file
diff --git 
a/inlong-agent/agent-common/src/main/java/org/apache/inlong/agent/utils/file/FileFinderIterator.java
 
b/inlong-agent/agent-common/src/main/java/org/apache/inlong/agent/utils/file/FileFinderIterator.java
new file mode 100644
index 0000000000..051790b0b1
--- /dev/null
+++ 
b/inlong-agent/agent-common/src/main/java/org/apache/inlong/agent/utils/file/FileFinderIterator.java
@@ -0,0 +1,162 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one or more
+ * contributor license agreements. See the NOTICE file distributed with
+ * this work for additional information regarding copyright ownership.
+ * The ASF licenses this file to You under the Apache License, Version 2.0
+ * (the "License"); you may not use this file except in compliance with
+ * the License. You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+package org.apache.inlong.agent.utils.file;
+
+import com.google.common.base.Predicate;
+
+import java.io.File;
+import java.util.Iterator;
+import java.util.LinkedList;
+import java.util.Queue;
+
+/**
+ * Implements an {@link Iterator<File>} that runs the recursive search that 
has been defined by the
+ * {@link FileFinder} builder object.
+ *
+ */
+public class FileFinderIterator implements Iterator<File> {
+
+    /**
+     * The {@link Predicate} that defines the type of files that will be 
returned.
+     */
+    private final Predicate<File> yieldFilter;
+    /**
+     * The {@link Predicate} that is used for branching with recursion.
+     */
+    private final Predicate<File> branchFilter;
+    /**
+     * The {@link Predicate} that is used as filter to find specific files.
+     */
+    private final Predicate<File> fileFilter;
+    private final Predicate<File> dirFilter;
+    /**
+     * A stack that stores all files and directories that still habe to be 
processed.
+     */
+    private LinkedList<DepthControl> depthStack = new 
LinkedList<DepthControl>();
+    /**
+     * A queue to cache results that will be offered one by one by the {@link 
Iterator}.
+     */
+    private Queue<File> resultQueue = new LinkedList<File>();
+    private int maxDepth = 1;
+
+    /**
+     * Creates the Iterator with all configuration options for the search.
+     *
+     * @param baseDir The directory where the search will start.
+     * @param yieldFilter The {@link Predicate} that defines the type of files 
that will be
+     *         returned.
+     * @param branchFilter The {@link Predicate} that is used for branching 
with recursion.
+     * @param fileFilter The {@link Predicate} that is used as filter to find 
specific
+     *         files.
+     */
+    public FileFinderIterator(File baseDir, Predicate<File> yieldFilter,
+            Predicate<File> branchFilter, Predicate<File> fileFilter,
+            Predicate<File> dirFilter, int maxDepth) {
+        this.yieldFilter = yieldFilter;
+        this.branchFilter = branchFilter;
+        this.fileFilter = fileFilter;
+        this.maxDepth = maxDepth;
+        this.dirFilter = dirFilter;
+        File[] listFiles = baseDir.listFiles();
+        if (listFiles != null) {
+            for (File f : listFiles) {
+                depthStack.add(new DepthControl(1, f));
+            }
+        }
+    }
+
+    /**
+     * Fills the result queue if necessary and tests if another result is 
available.
+     *
+     * @see Iterator#hasNext()
+     */
+    public boolean hasNext() {
+        if (resultQueue.isEmpty()) {
+            populateResults();
+        }
+        return !resultQueue.isEmpty();
+    }
+
+    /**
+     * Returns the next file from the result queue.
+     *
+     * @see Iterator#next()
+     */
+    public File next() {
+        if (resultQueue.isEmpty()) {
+            populateResults();
+        }
+        return resultQueue.poll();
+    }
+
+    /**
+     * Fills the result queue by processing the files and directories from 
fileStack.
+     */
+    private void populateResults() {
+        while (!depthStack.isEmpty() && resultQueue.isEmpty()) {
+            DepthControl currentDepthControl = depthStack.pop();
+            File currentFile = currentDepthControl.getFile();
+            int currentDepth = currentDepthControl.getDepth();
+
+            if (yieldFilter.apply(currentFile)) {
+                if (currentFile.isDirectory() && dirFilter.apply(currentFile)) 
{
+                    if (branchFilter.apply(currentFile) && currentDepth < 
maxDepth) {
+                        File[] subFiles = currentFile.listFiles();
+                        if (subFiles != null) {
+                            for (File f : subFiles) {
+                                depthStack.add(new DepthControl(currentDepth + 
1, f));
+                            }
+                        }
+                    }
+                } else if (currentFile.isFile() && 
fileFilter.apply(currentFile)) {
+                    resultQueue.offer(currentFile);
+                }
+            }
+        }
+
+    }
+
+    /**
+     * The remove method of the {@link Iterator} is not implemented.
+     *
+     * @see Iterator#remove()
+     */
+    public void remove() {
+        // not implemented
+    }
+
+    public class DepthControl {
+
+        private int depth;
+        private File file;
+
+        public DepthControl(int depth, File file) {
+
+            this.depth = depth;
+            this.file = file;
+        }
+
+        public File getFile() {
+            return file;
+        }
+
+        public int getDepth() {
+            return depth;
+        }
+    }
+}
diff --git 
a/inlong-agent/agent-common/src/main/java/org/apache/inlong/agent/utils/file/FileNameRegexMatchPredicate.java
 
b/inlong-agent/agent-common/src/main/java/org/apache/inlong/agent/utils/file/FileNameRegexMatchPredicate.java
new file mode 100644
index 0000000000..593a237e5e
--- /dev/null
+++ 
b/inlong-agent/agent-common/src/main/java/org/apache/inlong/agent/utils/file/FileNameRegexMatchPredicate.java
@@ -0,0 +1,43 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one or more
+ * contributor license agreements. See the NOTICE file distributed with
+ * this work for additional information regarding copyright ownership.
+ * The ASF licenses this file to You under the Apache License, Version 2.0
+ * (the "License"); you may not use this file except in compliance with
+ * the License. You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+package org.apache.inlong.agent.utils.file;
+
+import com.google.common.base.Predicate;
+
+import java.io.File;
+import java.util.regex.Matcher;
+import java.util.regex.Pattern;
+
+public class FileNameRegexMatchPredicate implements Predicate<File> {
+
+    private Pattern pattern;
+
+    public FileNameRegexMatchPredicate(String regex, boolean caseSensitive) {
+        if (!caseSensitive) {
+            pattern = Pattern.compile(regex, Pattern.CASE_INSENSITIVE);
+        } else {
+            pattern = Pattern.compile(regex);
+        }
+    }
+
+    public boolean apply(File input) {
+        Matcher matcher = pattern.matcher(input.getAbsolutePath());
+        return matcher.matches() || matcher.lookingAt();
+    }
+
+}
diff --git 
a/inlong-agent/agent-common/src/main/java/org/apache/inlong/agent/utils/file/FileUtils.java
 
b/inlong-agent/agent-common/src/main/java/org/apache/inlong/agent/utils/file/FileUtils.java
new file mode 100644
index 0000000000..b141bad43a
--- /dev/null
+++ 
b/inlong-agent/agent-common/src/main/java/org/apache/inlong/agent/utils/file/FileUtils.java
@@ -0,0 +1,76 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one or more
+ * contributor license agreements. See the NOTICE file distributed with
+ * this work for additional information regarding copyright ownership.
+ * The ASF licenses this file to You under the Apache License, Version 2.0
+ * (the "License"); you may not use this file except in compliance with
+ * the License. You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+package org.apache.inlong.agent.utils.file;
+
+import org.slf4j.Logger;
+import org.slf4j.LoggerFactory;
+
+import java.io.File;
+import java.io.IOException;
+import java.nio.file.Files;
+import java.nio.file.Paths;
+import java.nio.file.attribute.BasicFileAttributes;
+
+public class FileUtils {
+
+    private static final Logger LOGGER = 
LoggerFactory.getLogger(FileUtils.class);
+
+    /**
+     * Finds files or sub directories within a given base directory.
+     *
+     * @param baseDirectory A path string representing a directory to search 
within.
+     * @return A {@link FileFinder}-Object to specify the search parameters 
using a builder
+     *         pattern.
+     */
+    public static FileFinder find(String baseDirectory) {
+        return find(new File(baseDirectory));
+    }
+
+    /**
+     * Finds files or sub directories within a given base directory.
+     *
+     * @param baseDirectory A path as {@link File} object to search within.
+     * @return A {@link FileFinder} object to specify the search parameters 
using a builder
+     *         pattern.
+     */
+    public static FileFinder find(File baseDirectory) {
+        return new FileFinder(baseDirectory);
+    }
+
+    public static long getFileCreationTime(String fileName) {
+        long creationTime = 0L;
+        try {
+            creationTime = Files.readAttributes(Paths.get(fileName), 
BasicFileAttributes.class).creationTime()
+                    .toMillis();
+        } catch (IOException e) {
+            LOGGER.error("getFileCreationTime error {}", e.getMessage());
+        }
+        return creationTime;
+    }
+
+    public static long getFileLastModifyTime(String fileName) {
+        long lastModify = 0L;
+        try {
+            lastModify = 
Files.getLastModifiedTime(Paths.get(fileName)).toMillis();
+        } catch (IOException e) {
+            LOGGER.error("getFileLastModifyTime error {}", e.getMessage());
+        }
+        return lastModify;
+    }
+
+}
\ No newline at end of file

Reply via email to