This is an automated email from the ASF dual-hosted git repository. krickert pushed a commit to branch OPENNLP-1850-2-tokenizer in repository https://gitbox.apache.org/repos/asf/opennlp.git
commit f48f50f1f69a3dd7a96ec1f3dc71b707a9508400 Author: Kristian Rickert <[email protected]> AuthorDate: Sat Jun 20 13:05:02 2026 -0400 OPENNLP-1850 Address Copilot review on the UAX #29 tokenizer - WordBoundaryConformanceTest: guard the conformance resource stream with Objects.requireNonNull and a clear message instead of an opaque NPE in InputStreamReader, and remove the unused NO_BOUNDARY constant. - NormalizationProfiles.forLanguage: fail loud on a null language argument at the public entry point, with a null-rejection test. --- .../java/opennlp/tools/util/normalizer/NormalizationProfiles.java | 2 ++ .../opennlp/tools/tokenize/uax29/WordBoundaryConformanceTest.java | 6 ++++-- .../opennlp/tools/util/normalizer/NormalizationProfilesTest.java | 6 ++++++ 3 files changed, 12 insertions(+), 2 deletions(-) diff --git a/opennlp-core/opennlp-runtime/src/main/java/opennlp/tools/util/normalizer/NormalizationProfiles.java b/opennlp-core/opennlp-runtime/src/main/java/opennlp/tools/util/normalizer/NormalizationProfiles.java index 4cd93f287..f46c57dc5 100644 --- a/opennlp-core/opennlp-runtime/src/main/java/opennlp/tools/util/normalizer/NormalizationProfiles.java +++ b/opennlp-core/opennlp-runtime/src/main/java/opennlp/tools/util/normalizer/NormalizationProfiles.java @@ -20,6 +20,7 @@ import java.util.HashMap; import java.util.Locale; import java.util.Map; import java.util.MissingResourceException; +import java.util.Objects; import java.util.Optional; import java.util.Set; @@ -83,6 +84,7 @@ public final class NormalizationProfiles { * @return The profile, or empty if the language has no Snowball stemmer. */ public static Optional<NormalizationProfile> forLanguage(String language) { + Objects.requireNonNull(language, "language"); String code = language.strip().toLowerCase(Locale.ROOT); if (code.length() == 2) { try { diff --git a/opennlp-core/opennlp-runtime/src/test/java/opennlp/tools/tokenize/uax29/WordBoundaryConformanceTest.java b/opennlp-core/opennlp-runtime/src/test/java/opennlp/tools/tokenize/uax29/WordBoundaryConformanceTest.java index e1bc8231d..80339a801 100644 --- a/opennlp-core/opennlp-runtime/src/test/java/opennlp/tools/tokenize/uax29/WordBoundaryConformanceTest.java +++ b/opennlp-core/opennlp-runtime/src/test/java/opennlp/tools/tokenize/uax29/WordBoundaryConformanceTest.java @@ -24,6 +24,7 @@ import java.nio.charset.StandardCharsets; import java.util.ArrayList; import java.util.Arrays; import java.util.List; +import java.util.Objects; import org.junit.jupiter.api.Test; @@ -37,7 +38,6 @@ import static org.junit.jupiter.api.Assertions.assertTrue; public class WordBoundaryConformanceTest { private static final int BOUNDARY = 0x00F7; // division sign - private static final int NO_BOUNDARY = 0x00D7; // multiplication sign @Test void testOfficialUnicodeWordBreakConformance() throws IOException { @@ -45,7 +45,9 @@ public class WordBoundaryConformanceTest { int passed = 0; final List<String> failures = new ArrayList<>(); - try (InputStream in = WordBoundaryConformanceTest.class.getResourceAsStream("WordBreakTest.txt"); + try (InputStream in = Objects.requireNonNull( + WordBoundaryConformanceTest.class.getResourceAsStream("WordBreakTest.txt"), + "Missing test resource: WordBreakTest.txt"); BufferedReader reader = new BufferedReader(new InputStreamReader(in, StandardCharsets.UTF_8))) { String raw; diff --git a/opennlp-core/opennlp-runtime/src/test/java/opennlp/tools/util/normalizer/NormalizationProfilesTest.java b/opennlp-core/opennlp-runtime/src/test/java/opennlp/tools/util/normalizer/NormalizationProfilesTest.java index 2d2c02b38..6dbe95260 100644 --- a/opennlp-core/opennlp-runtime/src/test/java/opennlp/tools/util/normalizer/NormalizationProfilesTest.java +++ b/opennlp-core/opennlp-runtime/src/test/java/opennlp/tools/util/normalizer/NormalizationProfilesTest.java @@ -27,6 +27,7 @@ import opennlp.tools.stemmer.snowball.SnowballStemmer; import static org.junit.jupiter.api.Assertions.assertEquals; import static org.junit.jupiter.api.Assertions.assertNull; import static org.junit.jupiter.api.Assertions.assertSame; +import static org.junit.jupiter.api.Assertions.assertThrows; import static org.junit.jupiter.api.Assertions.assertTrue; public class NormalizationProfilesTest { @@ -131,4 +132,9 @@ public class NormalizationProfilesTest { assertEquals(19, NormalizationProfiles.supportedLanguages().size()); assertTrue(NormalizationProfiles.supportedLanguages().containsAll(List.of("eng", "deu", "fra"))); } + + @Test + void testForLanguageRejectsNull() { + assertThrows(NullPointerException.class, () -> NormalizationProfiles.forLanguage(null)); + } }
