This is an automated email from the ASF dual-hosted git repository. dockerzhang pushed a commit to branch master in repository https://gitbox.apache.org/repos/asf/inlong.git
The following commit(s) were added to refs/heads/master by this push: new 06c7b829f1 [INLONG-9102][Agent] Add file utils (#9103) 06c7b829f1 is described below commit 06c7b829f170698e65c7ffd822b4db9c4078aef7 Author: justinwwhuang <hww_jus...@163.com> AuthorDate: Wed Oct 25 03:38:44 2023 -0500 [INLONG-9102][Agent] Add file utils (#9103) --- .../utils/file/DirNameRegexMatchPredicate.java | 46 ++++ .../apache/inlong/agent/utils/file/FileFinder.java | 296 +++++++++++++++++++++ .../agent/utils/file/FileFinderIterator.java | 162 +++++++++++ .../utils/file/FileNameRegexMatchPredicate.java | 43 +++ .../apache/inlong/agent/utils/file/FileUtils.java | 76 ++++++ 5 files changed, 623 insertions(+) diff --git a/inlong-agent/agent-common/src/main/java/org/apache/inlong/agent/utils/file/DirNameRegexMatchPredicate.java b/inlong-agent/agent-common/src/main/java/org/apache/inlong/agent/utils/file/DirNameRegexMatchPredicate.java new file mode 100644 index 0000000000..cd22a730b7 --- /dev/null +++ b/inlong-agent/agent-common/src/main/java/org/apache/inlong/agent/utils/file/DirNameRegexMatchPredicate.java @@ -0,0 +1,46 @@ +/* + * Licensed to the Apache Software Foundation (ASF) under one or more + * contributor license agreements. See the NOTICE file distributed with + * this work for additional information regarding copyright ownership. + * The ASF licenses this file to You under the Apache License, Version 2.0 + * (the "License"); you may not use this file except in compliance with + * the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +package org.apache.inlong.agent.utils.file; + +import com.google.common.base.Predicate; + +import java.io.File; +import java.util.regex.Matcher; +import java.util.regex.Pattern; + +public class DirNameRegexMatchPredicate implements Predicate<File> { + + private Pattern pattern; + + public DirNameRegexMatchPredicate(String regex, boolean caseSensitive) { + if (regex == null || regex.isEmpty()) { + regex = ".*"; + } + if (!caseSensitive) { + pattern = Pattern.compile(regex, Pattern.CASE_INSENSITIVE); + } else { + pattern = Pattern.compile(regex); + } + } + + public boolean apply(File input) { + Matcher matcher = pattern.matcher(input.getAbsolutePath()); + return matcher.matches() || matcher.lookingAt(); + } + +} diff --git a/inlong-agent/agent-common/src/main/java/org/apache/inlong/agent/utils/file/FileFinder.java b/inlong-agent/agent-common/src/main/java/org/apache/inlong/agent/utils/file/FileFinder.java new file mode 100644 index 0000000000..c8addc0d78 --- /dev/null +++ b/inlong-agent/agent-common/src/main/java/org/apache/inlong/agent/utils/file/FileFinder.java @@ -0,0 +1,296 @@ +/* + * Licensed to the Apache Software Foundation (ASF) under one or more + * contributor license agreements. See the NOTICE file distributed with + * this work for additional information regarding copyright ownership. + * The ASF licenses this file to You under the Apache License, Version 2.0 + * (the "License"); you may not use this file except in compliance with + * the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +package org.apache.inlong.agent.utils.file; + +import com.google.common.base.Predicate; +import com.google.common.base.Predicates; +import com.google.common.collect.Lists; + +import java.io.File; +import java.util.Iterator; +import java.util.List; + +/** + * The {@link FileFinder} is used to configure the options for the search. This is done by following + * the builder pattern. Several methods can be used to influence the search. + * <p/> + * To define if you want only files, only directories or both in your result you can use the {@link + * #yieldFiles()}, {@link #yieldDirectories()} or {@link #yieldFilesAndDirectories()} methods. + * <p/> + * Without any options only the current directory is searched. The {@link #recursive()} and {@link + * #recursive(Predicate)} methods allow you to enable recursive searching. + * <p/> + * You can search for specific filter by using {@link #withName(String)}, {@link + * #withExtension(String)} or the custom {@link #withFileFilter(Predicate)} methods. The string + * comparison of filenames can be fine tuned with the {@link #caseSensitive()} and {@link + * #ignoreCase()} methods. + * <p/> + * All these methods can be chained together with the following limitations. + * <ul> + * <li>{@link #caseSensitive()} and {@link #ignoreCase()} only affect the + * following filter definitions. + * <li>Only one yield* makes sense. If you use multiple the last one wins. + * </ul> + * To finally execute the search you have two options. + * <ul> + * <li>Use the {@link #list()} method to execute the search in one go and get + * all the results in a {@link List<File>}. + * <li>Call {@link #iterator()} and get the results piece by piece. Since the + * {@link FileFinder} implements {@link Iterable} it can even be used in a for + * each loop. + * </ul> + * <p/> + * Examples: <code><pre> + * // Iterate over all files in the windows directory + * for (File f : Files.find("c:\\windows")) { ... } + * <p/> + * // Get all the files in a directory as a list of files. + * List<File> allFiles = Files.find(somedir).list(); + * <p/> + * // Skip all .svn directories within a source tree + * Predicate<File> noSvnDirs = new Predicate<File>() { + * boolean apply(File file) { + * return !file.getName().equals(".svn"); + * } + * } + * for (File f : Files.find("src/java/").recursive(noSvnDir)) { ... } + * </code></pre> + * + */ +public class FileFinder implements Iterable<File> { + + /** + * Predicate that returns true, when a {@link File}-object points actually to a file. + */ + private final static Predicate<File> isFile = new Predicate<File>() { + + public boolean apply(File input) { + return input.isFile(); + } + + ; + }; + + /** + * Predicate that returns true, when a {@link File}-object points to a directory. + */ + private final static Predicate<File> isDirectory = new Predicate<File>() { + + public boolean apply(File input) { + return input.isDirectory(); + } + + ; + }; + + /** + * The base directory which will be used for the search + */ + private final File baseDir; + + /** + * A filter which determines which type of files will be returned (files, directories or both). + */ + private Predicate<File> yieldFilter = isFile; + + /** + * A {@link Predicate}-filter which determines which will be processed recursively. + */ + private Predicate<File> branchFilter = Predicates.alwaysFalse(); + + /** + * A filter that can be used to filter specific files. + */ + private Predicate<File> fileFilter = Predicates.alwaysTrue(); + + private Predicate<File> dirFilter = Predicates.alwaysTrue(); + + /** + * A boolean option that that defines if {@link String} comparisons are made case sensitive or + * insensitive, e.g. for filenames and extensions. The default are case sensitive comparisons. + */ + private boolean caseSensitive = true; + + private int maxDepth = 1; + + /** + * Creates a new {@link FileFinder} object for a given base directory. + * + * @param baseDir The base directory where the search starts. + */ + public FileFinder(File baseDir) { + this.baseDir = baseDir; + } + + public FileFinder withDepth(int depth) { + maxDepth = depth; + return this; + } + + /** + * Returns the result of the search as a list. + * + * @return A list with the files that where found. + */ + public List<File> list() { + return Lists.newArrayList(iterator()); + } + + /** + * Creates an Iterator that can be used to iterate through the results of the search. Note: This + * actually works iterative, i.e. the recursion happens as you fetch files from the iterator. + * The result is not fetched into a huge list. + * + * @return An {@link Iterator<File>} that retrieves the results bit by bit. + * @see Iterable#iterator() + */ + public Iterator<File> iterator() { + return new FileFinderIterator(baseDir, yieldFilter, branchFilter, + fileFilter, dirFilter, maxDepth); + } + + /** + * Configures the {@link FileFinder} to return files. + * + * @return The current {@link FileFinder} to perform method chaining. + */ + public FileFinder yieldFiles() { + yieldFilter = isFile; + return this; + } + + /** + * Configures the {@link FileFinder} to return directories. + * + * @return The current {@link FileFinder} to perform method chaining. + */ + public FileFinder yieldDirectories() { + yieldFilter = isDirectory; + return this; + } + + /** + * Configures the {@link FileFinder} to return files and directories. + * + * @return The current {@link FileFinder} to perform method chaining. + */ + public FileFinder yieldFilesAndDirectories() { + yieldFilter = Predicates.or(isFile, isDirectory); + return this; + } + + /** + * Configures the {@link FileFinder} to use case sensitive comparisons for filenames. + * + * @return The current {@link FileFinder} to perform method chaining. + */ + public FileFinder caseSensitive() { + caseSensitive = true; + return this; + } + + /** + * Configures the {@link FileFinder} to ignore the case for comparisons of filenames. + * + * @return The current {@link FileFinder} to perform method chaining. + */ + public FileFinder ignoreCase() { + caseSensitive = false; + return this; + } + + public FileFinder withFileNameRegex(final String name) { + return withFileFilter(new FileNameRegexMatchPredicate(name, caseSensitive)); + } + + public FileFinder withDirNameRegex(final String name) { + return withDirFilter(new DirNameRegexMatchPredicate(name, caseSensitive)); + } + + /** + * Enables a recursive search that processes all sub directories recursively with a depth-first + * search. + * + * @return The current {@link FileFinder} to perform method chaining. + */ + public FileFinder recursive() { + branchFilter = Predicates.alwaysTrue(); + return this; + } + + /** + * Enables a recursive search that processes sub directories that match the given {@link + * Predicate} recursively with a depth-first search. + * + * @param branchFilter The {@link Predicate<File>} that returns true for all directories + * that should be used in the search. + * @return The current {@link FileFinder} to perform method chaining. + */ + public FileFinder recursive(Predicate<File> branchFilter) { + this.branchFilter = branchFilter; + return this; + } + + /** + * Accepts a custom filter to search for specific files or directories. All files that match the + * {@link Predicate} (and match the yield* file type) will be returned in the result. + * <p/> + * Multiple calls of {@link #withFileFilter(Predicate)} as well as other filter methods like + * {@link #withName(String)} will be combined with an AND condition, i.e. all filters have to + * match. + * + * @param filter The {@link Predicate} that should be used to filter files. + * @return The current {@link FileFinder} to perform method chaining. + */ + public FileFinder withFileFilter(Predicate<File> filter) { + this.fileFilter = Predicates.and(fileFilter, filter); + return this; + } + + public FileFinder withDirFilter(Predicate<File> filter) { + this.dirFilter = Predicates.and(dirFilter, filter); + return this; + } + + public FileFinder containingFile(final Predicate<File> fileInDir) { + return withFileFilter(new Predicate<File>() { + + public boolean apply(File directory) { + return directoryContainsFile(directory, fileInDir); + } + }); + } + + public FileFinder contains(byte[] bytes) { + throw new IllegalArgumentException(); + } + + private boolean directoryContainsFile(File directory, + final Predicate<File> containgFileFilter) { + if (directory.isDirectory()) { + File[] allFiles = directory.listFiles(); + for (File file : allFiles) { + if (containgFileFilter.apply(file)) { + return true; + } + } + } + return false; + } + +} \ No newline at end of file diff --git a/inlong-agent/agent-common/src/main/java/org/apache/inlong/agent/utils/file/FileFinderIterator.java b/inlong-agent/agent-common/src/main/java/org/apache/inlong/agent/utils/file/FileFinderIterator.java new file mode 100644 index 0000000000..051790b0b1 --- /dev/null +++ b/inlong-agent/agent-common/src/main/java/org/apache/inlong/agent/utils/file/FileFinderIterator.java @@ -0,0 +1,162 @@ +/* + * Licensed to the Apache Software Foundation (ASF) under one or more + * contributor license agreements. See the NOTICE file distributed with + * this work for additional information regarding copyright ownership. + * The ASF licenses this file to You under the Apache License, Version 2.0 + * (the "License"); you may not use this file except in compliance with + * the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +package org.apache.inlong.agent.utils.file; + +import com.google.common.base.Predicate; + +import java.io.File; +import java.util.Iterator; +import java.util.LinkedList; +import java.util.Queue; + +/** + * Implements an {@link Iterator<File>} that runs the recursive search that has been defined by the + * {@link FileFinder} builder object. + * + */ +public class FileFinderIterator implements Iterator<File> { + + /** + * The {@link Predicate} that defines the type of files that will be returned. + */ + private final Predicate<File> yieldFilter; + /** + * The {@link Predicate} that is used for branching with recursion. + */ + private final Predicate<File> branchFilter; + /** + * The {@link Predicate} that is used as filter to find specific files. + */ + private final Predicate<File> fileFilter; + private final Predicate<File> dirFilter; + /** + * A stack that stores all files and directories that still habe to be processed. + */ + private LinkedList<DepthControl> depthStack = new LinkedList<DepthControl>(); + /** + * A queue to cache results that will be offered one by one by the {@link Iterator}. + */ + private Queue<File> resultQueue = new LinkedList<File>(); + private int maxDepth = 1; + + /** + * Creates the Iterator with all configuration options for the search. + * + * @param baseDir The directory where the search will start. + * @param yieldFilter The {@link Predicate} that defines the type of files that will be + * returned. + * @param branchFilter The {@link Predicate} that is used for branching with recursion. + * @param fileFilter The {@link Predicate} that is used as filter to find specific + * files. + */ + public FileFinderIterator(File baseDir, Predicate<File> yieldFilter, + Predicate<File> branchFilter, Predicate<File> fileFilter, + Predicate<File> dirFilter, int maxDepth) { + this.yieldFilter = yieldFilter; + this.branchFilter = branchFilter; + this.fileFilter = fileFilter; + this.maxDepth = maxDepth; + this.dirFilter = dirFilter; + File[] listFiles = baseDir.listFiles(); + if (listFiles != null) { + for (File f : listFiles) { + depthStack.add(new DepthControl(1, f)); + } + } + } + + /** + * Fills the result queue if necessary and tests if another result is available. + * + * @see Iterator#hasNext() + */ + public boolean hasNext() { + if (resultQueue.isEmpty()) { + populateResults(); + } + return !resultQueue.isEmpty(); + } + + /** + * Returns the next file from the result queue. + * + * @see Iterator#next() + */ + public File next() { + if (resultQueue.isEmpty()) { + populateResults(); + } + return resultQueue.poll(); + } + + /** + * Fills the result queue by processing the files and directories from fileStack. + */ + private void populateResults() { + while (!depthStack.isEmpty() && resultQueue.isEmpty()) { + DepthControl currentDepthControl = depthStack.pop(); + File currentFile = currentDepthControl.getFile(); + int currentDepth = currentDepthControl.getDepth(); + + if (yieldFilter.apply(currentFile)) { + if (currentFile.isDirectory() && dirFilter.apply(currentFile)) { + if (branchFilter.apply(currentFile) && currentDepth < maxDepth) { + File[] subFiles = currentFile.listFiles(); + if (subFiles != null) { + for (File f : subFiles) { + depthStack.add(new DepthControl(currentDepth + 1, f)); + } + } + } + } else if (currentFile.isFile() && fileFilter.apply(currentFile)) { + resultQueue.offer(currentFile); + } + } + } + + } + + /** + * The remove method of the {@link Iterator} is not implemented. + * + * @see Iterator#remove() + */ + public void remove() { + // not implemented + } + + public class DepthControl { + + private int depth; + private File file; + + public DepthControl(int depth, File file) { + + this.depth = depth; + this.file = file; + } + + public File getFile() { + return file; + } + + public int getDepth() { + return depth; + } + } +} diff --git a/inlong-agent/agent-common/src/main/java/org/apache/inlong/agent/utils/file/FileNameRegexMatchPredicate.java b/inlong-agent/agent-common/src/main/java/org/apache/inlong/agent/utils/file/FileNameRegexMatchPredicate.java new file mode 100644 index 0000000000..593a237e5e --- /dev/null +++ b/inlong-agent/agent-common/src/main/java/org/apache/inlong/agent/utils/file/FileNameRegexMatchPredicate.java @@ -0,0 +1,43 @@ +/* + * Licensed to the Apache Software Foundation (ASF) under one or more + * contributor license agreements. See the NOTICE file distributed with + * this work for additional information regarding copyright ownership. + * The ASF licenses this file to You under the Apache License, Version 2.0 + * (the "License"); you may not use this file except in compliance with + * the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +package org.apache.inlong.agent.utils.file; + +import com.google.common.base.Predicate; + +import java.io.File; +import java.util.regex.Matcher; +import java.util.regex.Pattern; + +public class FileNameRegexMatchPredicate implements Predicate<File> { + + private Pattern pattern; + + public FileNameRegexMatchPredicate(String regex, boolean caseSensitive) { + if (!caseSensitive) { + pattern = Pattern.compile(regex, Pattern.CASE_INSENSITIVE); + } else { + pattern = Pattern.compile(regex); + } + } + + public boolean apply(File input) { + Matcher matcher = pattern.matcher(input.getAbsolutePath()); + return matcher.matches() || matcher.lookingAt(); + } + +} diff --git a/inlong-agent/agent-common/src/main/java/org/apache/inlong/agent/utils/file/FileUtils.java b/inlong-agent/agent-common/src/main/java/org/apache/inlong/agent/utils/file/FileUtils.java new file mode 100644 index 0000000000..b141bad43a --- /dev/null +++ b/inlong-agent/agent-common/src/main/java/org/apache/inlong/agent/utils/file/FileUtils.java @@ -0,0 +1,76 @@ +/* + * Licensed to the Apache Software Foundation (ASF) under one or more + * contributor license agreements. See the NOTICE file distributed with + * this work for additional information regarding copyright ownership. + * The ASF licenses this file to You under the Apache License, Version 2.0 + * (the "License"); you may not use this file except in compliance with + * the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +package org.apache.inlong.agent.utils.file; + +import org.slf4j.Logger; +import org.slf4j.LoggerFactory; + +import java.io.File; +import java.io.IOException; +import java.nio.file.Files; +import java.nio.file.Paths; +import java.nio.file.attribute.BasicFileAttributes; + +public class FileUtils { + + private static final Logger LOGGER = LoggerFactory.getLogger(FileUtils.class); + + /** + * Finds files or sub directories within a given base directory. + * + * @param baseDirectory A path string representing a directory to search within. + * @return A {@link FileFinder}-Object to specify the search parameters using a builder + * pattern. + */ + public static FileFinder find(String baseDirectory) { + return find(new File(baseDirectory)); + } + + /** + * Finds files or sub directories within a given base directory. + * + * @param baseDirectory A path as {@link File} object to search within. + * @return A {@link FileFinder} object to specify the search parameters using a builder + * pattern. + */ + public static FileFinder find(File baseDirectory) { + return new FileFinder(baseDirectory); + } + + public static long getFileCreationTime(String fileName) { + long creationTime = 0L; + try { + creationTime = Files.readAttributes(Paths.get(fileName), BasicFileAttributes.class).creationTime() + .toMillis(); + } catch (IOException e) { + LOGGER.error("getFileCreationTime error {}", e.getMessage()); + } + return creationTime; + } + + public static long getFileLastModifyTime(String fileName) { + long lastModify = 0L; + try { + lastModify = Files.getLastModifiedTime(Paths.get(fileName)).toMillis(); + } catch (IOException e) { + LOGGER.error("getFileLastModifyTime error {}", e.getMessage()); + } + return lastModify; + } + +} \ No newline at end of file