This is an automated email from the ASF dual-hosted git repository. krickert pushed a commit to branch OPENNLP-1850-2-tokenizer in repository https://gitbox.apache.org/repos/asf/opennlp.git
commit a75f272f93bfb9af836d246ee968a3fb78d0ed36 Author: Kristian Rickert <[email protected]> AuthorDate: Sun Jun 21 23:57:20 2026 -0400 OPENNLP-1850 Fail fast on null public-entry arguments (review nits) NormalizationProfiles.detect now rejects a null text or detector with a clear NullPointerException instead of failing deeper inside language detection. The TermAnalyzer caseFold(Locale) builder step rejects a null locale up front. ExtendedPictographic names the missing resource in its read-failure message, matching WordBreakProperty. --- .../main/java/opennlp/tools/tokenize/uax29/ExtendedPictographic.java | 3 ++- .../main/java/opennlp/tools/util/normalizer/NormalizationProfiles.java | 2 ++ .../src/main/java/opennlp/tools/util/normalizer/TermAnalyzer.java | 1 + 3 files changed, 5 insertions(+), 1 deletion(-) diff --git a/opennlp-core/opennlp-runtime/src/main/java/opennlp/tools/tokenize/uax29/ExtendedPictographic.java b/opennlp-core/opennlp-runtime/src/main/java/opennlp/tools/tokenize/uax29/ExtendedPictographic.java index 46903bc1a..2fac2740a 100644 --- a/opennlp-core/opennlp-runtime/src/main/java/opennlp/tools/tokenize/uax29/ExtendedPictographic.java +++ b/opennlp-core/opennlp-runtime/src/main/java/opennlp/tools/tokenize/uax29/ExtendedPictographic.java @@ -45,7 +45,8 @@ public final class ExtendedPictographic { } load(in); } catch (IOException e) { - throw new UncheckedIOException("Unable to read Extended_Pictographic data resource", e); + throw new UncheckedIOException( + "Unable to read Extended_Pictographic data resource " + RESOURCE, e); } } diff --git a/opennlp-core/opennlp-runtime/src/main/java/opennlp/tools/util/normalizer/NormalizationProfiles.java b/opennlp-core/opennlp-runtime/src/main/java/opennlp/tools/util/normalizer/NormalizationProfiles.java index f46c57dc5..c39abee43 100644 --- a/opennlp-core/opennlp-runtime/src/main/java/opennlp/tools/util/normalizer/NormalizationProfiles.java +++ b/opennlp-core/opennlp-runtime/src/main/java/opennlp/tools/util/normalizer/NormalizationProfiles.java @@ -108,6 +108,8 @@ public final class NormalizationProfiles { */ public static Optional<NormalizationProfile> detect(CharSequence text, LanguageDetector detector) { + Objects.requireNonNull(text, "text"); + Objects.requireNonNull(detector, "detector"); return forLanguage(detector.predictLanguage(text).getLang()); } diff --git a/opennlp-core/opennlp-runtime/src/main/java/opennlp/tools/util/normalizer/TermAnalyzer.java b/opennlp-core/opennlp-runtime/src/main/java/opennlp/tools/util/normalizer/TermAnalyzer.java index 7262d580d..d382fc09c 100644 --- a/opennlp-core/opennlp-runtime/src/main/java/opennlp/tools/util/normalizer/TermAnalyzer.java +++ b/opennlp-core/opennlp-runtime/src/main/java/opennlp/tools/util/normalizer/TermAnalyzer.java @@ -248,6 +248,7 @@ public final class TermAnalyzer { * @return this builder */ public Builder caseFold(Locale locale) { + Objects.requireNonNull(locale, "locale"); return transform(Dimension.CASE_FOLD, CaseFoldCharSequenceNormalizer.getInstance(locale)); }
