This is an automated email from the ASF dual-hosted git repository. mawiesne pushed a commit to branch OPENNLP-1829-Transfer-DirectoryModelFinder-to-core in repository https://gitbox.apache.org/repos/asf/opennlp.git
commit bd45c1d9235e6ab776bd5d2a925e94b9c47332d1 Author: Martin Wiesner <[email protected]> AuthorDate: Tue May 12 10:02:41 2026 +0200 OPENNLP-1829: Transfer DirectoryModelFinder to OpenNLP core --- .../tools/models/AbstractClassPathModelFinder.java | 70 ++++++++++ .../tools/models/dir/DirectoryModelFinder.java | 145 +++++++++++++++++++++ .../models/simple/SimpleClassPathModelFinder.java | 59 --------- 3 files changed, 215 insertions(+), 59 deletions(-) diff --git a/opennlp-core/opennlp-model-resolver/src/main/java/opennlp/tools/models/AbstractClassPathModelFinder.java b/opennlp-core/opennlp-model-resolver/src/main/java/opennlp/tools/models/AbstractClassPathModelFinder.java index b31128dd8..2050892c8 100644 --- a/opennlp-core/opennlp-model-resolver/src/main/java/opennlp/tools/models/AbstractClassPathModelFinder.java +++ b/opennlp-core/opennlp-model-resolver/src/main/java/opennlp/tools/models/AbstractClassPathModelFinder.java @@ -16,12 +16,22 @@ */ package opennlp.tools.models; +import java.io.IOException; +import java.net.JarURLConnection; import java.net.URI; +import java.net.URISyntaxException; +import java.net.URL; +import java.util.ArrayList; +import java.util.Enumeration; import java.util.HashSet; import java.util.List; +import java.util.Locale; import java.util.Objects; import java.util.Optional; import java.util.Set; +import java.util.jar.JarEntry; +import java.util.jar.JarFile; +import java.util.regex.Pattern; /** * A base implementation of a {@link ClassPathModelFinder} for the detection of @@ -126,4 +136,64 @@ public abstract class AbstractClassPathModelFinder implements ClassPathModelFind return jarModelPrefix; } + /** + * Escapes a {@code wildcard} expressions for usage as a Java regular expression. + * + * @param wildcard A valid expression. It must not be {@code null}. + * @return The escaped regex. + */ + protected String asRegex(String wildcard) { + return wildcard + .replace(".", "\\.") + .replace("*", ".*") + .replace("?", "."); + } + + protected boolean matchesPattern(URL url, Pattern pattern) { + return pattern.matcher(url.getFile()).matches(); + } + + /** + * Converts a {@code location} in String form to a {@link URL}. + * + * @param location The resource path and/or reference. + * @return The converted {@link URL} form. + * @throws IOException Thrown if IO errors occurred during conversion + */ + protected static URL toURL(String location) throws IOException { + try { + return new URI(location).toURL(); + } catch (URISyntaxException e) { + throw new IOException(e); + } + } + + protected List<URI> getURIsFromJar(URL fileUrl, boolean isWindows) throws IOException { + final List<URI> uris = new ArrayList<>(); + final String location = JAR + ":" + + (isWindows ? fileUrl.toString().replace("\\", "/") + : fileUrl.toString()) + "!/"; + final URL jarUrl = toURL(location); + final JarURLConnection jarConnection = (JarURLConnection) jarUrl.openConnection(); + try (JarFile jarFile = jarConnection.getJarFile()) { + final Enumeration<JarEntry> entries = jarFile.entries(); + while (entries.hasMoreElements()) { + final JarEntry entry = entries.nextElement(); + if (!entry.isDirectory()) { + try { + uris.add(new URI(jarUrl + entry.getName())); + } catch (URISyntaxException ignored) { + //if we cannot convert to URI here, we ignore that entry. + } + } + } + } + + return uris; + } + + protected boolean isWindows() { + return System.getProperty("os.name", "unknown").toLowerCase(Locale.ROOT).contains("win"); + } + } diff --git a/opennlp-core/opennlp-model-resolver/src/main/java/opennlp/tools/models/dir/DirectoryModelFinder.java b/opennlp-core/opennlp-model-resolver/src/main/java/opennlp/tools/models/dir/DirectoryModelFinder.java new file mode 100644 index 000000000..e31c7552c --- /dev/null +++ b/opennlp-core/opennlp-model-resolver/src/main/java/opennlp/tools/models/dir/DirectoryModelFinder.java @@ -0,0 +1,145 @@ +/* + * Licensed to the Apache Software Foundation (ASF) under one + * or more contributor license agreements. See the NOTICE file + * distributed with this work for additional information + * regarding copyright ownership. The ASF licenses this file + * to you under the Apache License, Version 2.0 (the + * "License"); you may not use this file except in compliance + * with the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, + * software distributed under the License is distributed on an + * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY + * KIND, either express or implied. See the License for the + * specific language governing permissions and limitations + * under the License. + */ +package opennlp.tools.models.dir; + +import java.io.IOException; +import java.net.MalformedURLException; +import java.net.URI; +import java.net.URL; +import java.nio.file.Files; +import java.nio.file.Path; +import java.util.ArrayList; +import java.util.Collections; +import java.util.List; +import java.util.regex.Pattern; +import java.util.stream.Stream; + +import org.slf4j.LoggerFactory; + +import opennlp.tools.models.AbstractClassPathModelFinder; +import opennlp.tools.models.ClassPathModelFinder; + +/** + * The {@code DirectoryModelFinder} class is responsible for finding model files in a given directory + * on the classpath. + * + * <p>This class allows searching for models based on wildcard patterns, either in plain directory structures + * or within JAR files. The search can be performed recursively depending on the specified configuration. + * + * <p><b>Usage:</b> + * <ul> + * <li>Provide the prefix for models to be found in JAR files using the {@code jarModelPrefix} parameter.</li> + * <li>Specify the directory to search and whether to enable recursive scanning.</li> + * <li>The class supports resolving both direct file matches and entries within JAR archives.</li> + * </ul> + * + * @see AbstractClassPathModelFinder + * @see ClassPathModelFinder + */ +public class DirectoryModelFinder extends AbstractClassPathModelFinder implements ClassPathModelFinder { + + private static final org.slf4j.Logger logger = LoggerFactory.getLogger(DirectoryModelFinder.class); + + private final Path directory; + private final boolean recursive; + private final Pattern jarPattern; + private Pattern filePattern; + private String prevFilePattern; + + /** + * Instantiates a new {@link DirectoryModelFinder} with the specified parameters. + * + * @param jarModelPrefix The prefix for identifying model files in JAR archives; may be {@code null}. + * If it is {@code null}, {@link ClassPathModelFinder#OPENNLP_MODEL_JAR_PREFIX} is used. + * @param directory The root directory to scan from for model files; must not be {@code null}. + * @param recursive {@code true} if the search should include subdirectories, {@code false} otherwise. + * @throws IllegalArgumentException Thrown if {@code directory} is {@code null}. + */ + public DirectoryModelFinder(String jarModelPrefix, Path directory, boolean recursive) { + super(jarModelPrefix == null ? OPENNLP_MODEL_JAR_PREFIX : jarModelPrefix); + if (directory == null) { + throw new IllegalArgumentException("Given directory must not be NULL"); + } + this.directory = directory; + this.recursive = recursive; + this.jarPattern = Pattern.compile(asRegex("*" + getJarModelPrefix())); + } + + /** + * @return Always {@code null} as it is not needed for the directory case. + */ + @Override + protected Object getContext() { + return null; //not needed for the simple case. Just return NULL. + } + + /** + * {@inheritDoc} + */ + @Override + protected List<URI> getMatchingURIs(String wildcardPattern, Object context) { + if (wildcardPattern == null) { + return Collections.emptyList(); + } + + final boolean isWindows = isWindows(); + final List<URL> cp = getDirectoryContent(); + final List<URI> cpu = new ArrayList<>(); + final String filePatternString = asRegex("*" + wildcardPattern); + if(!filePatternString.equals(prevFilePattern)) { + this.filePattern = Pattern.compile(filePatternString); + this.prevFilePattern = filePatternString; + } + + for (URL url : cp) { + if (matchesPattern(url, jarPattern)) { + try { + for (URI u : getURIsFromJar(url, isWindows)) { + if (matchesPattern(u.toURL(), filePattern)) { + cpu.add(u); + } + } + } catch (IOException e) { + logger.warn("Cannot read content of {}.", url, e); + } + } + } + + return cpu; + } + + private List<URL> getDirectoryContent() { + final List<URL> fileList = new ArrayList<>(); + try (Stream<Path> files = Files.walk(directory, recursive ? Integer.MAX_VALUE : 1)) { + files.filter(Files::isRegularFile).forEach(path -> { + try { + fileList.add(path.toUri().toURL()); + } catch (MalformedURLException ignored) { + + } + }); + } catch (IOException e) { + logger.warn(e.getLocalizedMessage(), e); + } + return fileList; + } + + + +} diff --git a/opennlp-core/opennlp-model-resolver/src/main/java/opennlp/tools/models/simple/SimpleClassPathModelFinder.java b/opennlp-core/opennlp-model-resolver/src/main/java/opennlp/tools/models/simple/SimpleClassPathModelFinder.java index 44bf4d61a..10b83f938 100644 --- a/opennlp-core/opennlp-model-resolver/src/main/java/opennlp/tools/models/simple/SimpleClassPathModelFinder.java +++ b/opennlp-core/opennlp-model-resolver/src/main/java/opennlp/tools/models/simple/SimpleClassPathModelFinder.java @@ -19,21 +19,15 @@ package opennlp.tools.models.simple; import java.io.IOException; import java.lang.reflect.Field; import java.lang.reflect.Method; -import java.net.JarURLConnection; import java.net.MalformedURLException; import java.net.URI; -import java.net.URISyntaxException; import java.net.URL; import java.net.URLClassLoader; import java.nio.file.Path; import java.util.ArrayList; import java.util.Arrays; import java.util.Collections; -import java.util.Enumeration; import java.util.List; -import java.util.Locale; -import java.util.jar.JarEntry; -import java.util.jar.JarFile; import java.util.regex.Pattern; import org.slf4j.Logger; @@ -131,59 +125,6 @@ public class SimpleClassPathModelFinder extends AbstractClassPathModelFinder imp return cpu; } - /** - * Escapes a {@code wildcard} expressions for usage as a Java regular expression. - * - * @param wildcard A valid expression. It must not be {@code null}. - * @return The escaped regex. - */ - private String asRegex(String wildcard) { - return wildcard - .replace(".", "\\.") - .replace("*", ".*") - .replace("?", "."); - } - - private boolean matchesPattern(URL url, Pattern pattern) { - return pattern.matcher(url.getFile()).matches(); - } - - private static URL toURL(String location) throws IOException { - try { - return new URI(location).toURL(); - } catch (URISyntaxException e) { - throw new IOException(e); - } - } - - private List<URI> getURIsFromJar(URL fileUrl, boolean isWindows) throws IOException { - final List<URI> uris = new ArrayList<>(); - final String location = JAR + ":" + - (isWindows ? fileUrl.toString().replace("\\", "/") - : fileUrl.toString()) + "!/"; - final URL jarUrl = toURL(location); - final JarURLConnection jarConnection = (JarURLConnection) jarUrl.openConnection(); - try (JarFile jarFile = jarConnection.getJarFile()) { - final Enumeration<JarEntry> entries = jarFile.entries(); - while (entries.hasMoreElements()) { - final JarEntry entry = entries.nextElement(); - if (!entry.isDirectory()) { - try { - uris.add(new URI(jarUrl + entry.getName())); - } catch (URISyntaxException ignored) { - //if we cannot convert to URI here, we ignore that entry. - } - } - } - } - - return uris; - } - - private boolean isWindows() { - return System.getProperty("os.name", "unknown").toLowerCase(Locale.ROOT).contains("win"); - } - /** * Attempts to obtain {@link URL URLs} from the classpath in the following order: * <p>
