This is an automated email from the ASF dual-hosted git repository.
mawiesne pushed a commit to branch opennlp-1.x
in repository https://gitbox.apache.org/repos/asf/opennlp.git
The following commit(s) were added to refs/heads/opennlp-1.x by this push:
new dfd561a22 [1.x] OPENNLP-1821: Prevent OutOfMemory due to huge array
allocation (#1079)
dfd561a22 is described below
commit dfd561a22ecb95cb43f452edd4ad478fa14c97bc
Author: Richard Zowalla <[email protected]>
AuthorDate: Fri Jun 12 16:29:00 2026 +0200
[1.x] OPENNLP-1821: Prevent OutOfMemory due to huge array allocation (#1079)
Backport of #1022 to opennlp-1.x.
AbstractModelReader read count fields (outcomes, outcome patterns,
predicates) straight from the model stream and used them as array
sizes. A crafted model could declare an oversized or negative count
and trigger OOM / DoS before any data was read.
Guard each count against a configurable upper bound (MAX_ENTRIES,
default 10_000_000, overridable via the OPENNLP_MAX_ENTRIES system
property) and reject negative values with IllegalArgumentException
before allocating.
Adapted for opennlp-1.x: JUnit 4 test; the 2.x ModelParameterChunker
hunk is omitted (that class does not exist on 1.x and the change was a
cosmetic blank line).
---
.../tools/ml/model/AbstractModelReader.java | 38 ++++++
.../tools/ml/model/AbstractModelReaderOomTest.java | 133 +++++++++++++++++++++
2 files changed, 171 insertions(+)
diff --git
a/opennlp-tools/src/main/java/opennlp/tools/ml/model/AbstractModelReader.java
b/opennlp-tools/src/main/java/opennlp/tools/ml/model/AbstractModelReader.java
index 085b85608..433e2c265 100644
---
a/opennlp-tools/src/main/java/opennlp/tools/ml/model/AbstractModelReader.java
+++
b/opennlp-tools/src/main/java/opennlp/tools/ml/model/AbstractModelReader.java
@@ -27,6 +27,32 @@ import java.util.zip.GZIPInputStream;
public abstract class AbstractModelReader {
+ /**
+ * System property for overriding the maximum number of entries (outcomes,
predicates,
+ * outcome patterns, chunk counts) that may be read from a model file or
training data.
+ * Set at JVM startup, e.g. {@code -DOPENNLP_MAX_ENTRIES=5000000}.
+ * Falls back to {@code 10_000_000} if absent or invalid.
+ */
+ public static final String MAX_ENTRIES_PROPERTY = "OPENNLP_MAX_ENTRIES";
+
+ /**
+ * Upper bound on count fields read from a model file.
+ * Prevents OOM on crafted inputs with oversized array size declarations.
+ * Configurable via the {@link #MAX_ENTRIES_PROPERTY} system property.
+ */
+ static final int MAX_ENTRIES = initMaxEntries();
+
+ private static int initMaxEntries() {
+ String prop = System.getProperty(MAX_ENTRIES_PROPERTY, "").trim();
+ if (!prop.isEmpty()) {
+ try {
+ int val = Integer.parseInt(prop);
+ if (val > 0) return val;
+ } catch (NumberFormatException ignore) { }
+ }
+ return 10_000_000;
+ }
+
/**
* The number of predicates contained in the model.
*/
@@ -91,6 +117,10 @@ public abstract class AbstractModelReader {
protected String[] getOutcomes() throws java.io.IOException {
int numOutcomes = readInt();
+ if (numOutcomes < 0 || numOutcomes > MAX_ENTRIES) {
+ throw new IllegalArgumentException(
+ "Outcome count " + numOutcomes + " exceeds safe limit of " +
MAX_ENTRIES);
+ }
String[] outcomeLabels = new String[numOutcomes];
for (int i = 0; i < numOutcomes; i++) outcomeLabels[i] = readUTF();
return outcomeLabels;
@@ -98,6 +128,10 @@ public abstract class AbstractModelReader {
protected int[][] getOutcomePatterns() throws java.io.IOException {
int numOCTypes = readInt();
+ if (numOCTypes < 0 || numOCTypes > MAX_ENTRIES) {
+ throw new IllegalArgumentException(
+ "Outcome pattern count " + numOCTypes + " exceeds safe limit of " +
MAX_ENTRIES);
+ }
int[][] outcomePatterns = new int[numOCTypes][];
for (int i = 0; i < numOCTypes; i++) {
StringTokenizer tok = new StringTokenizer(readUTF(), " ");
@@ -112,6 +146,10 @@ public abstract class AbstractModelReader {
protected String[] getPredicates() throws java.io.IOException {
NUM_PREDS = readInt();
+ if (NUM_PREDS < 0 || NUM_PREDS > MAX_ENTRIES) {
+ throw new IllegalArgumentException(
+ "Predicate count " + NUM_PREDS + " exceeds safe limit of " +
MAX_ENTRIES);
+ }
String[] predLabels = new String[NUM_PREDS];
for (int i = 0; i < NUM_PREDS; i++)
predLabels[i] = readUTF();
diff --git
a/opennlp-tools/src/test/java/opennlp/tools/ml/model/AbstractModelReaderOomTest.java
b/opennlp-tools/src/test/java/opennlp/tools/ml/model/AbstractModelReaderOomTest.java
new file mode 100644
index 000000000..96b09b43d
--- /dev/null
+++
b/opennlp-tools/src/test/java/opennlp/tools/ml/model/AbstractModelReaderOomTest.java
@@ -0,0 +1,133 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one or more
+ * contributor license agreements. See the NOTICE file distributed with
+ * this work for additional information regarding copyright ownership.
+ * The ASF licenses this file to You under the Apache License, Version 2.0
+ * (the "License"); you may not use this file except in compliance with
+ * the License. You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+package opennlp.tools.ml.model;
+
+import java.io.ByteArrayInputStream;
+import java.io.ByteArrayOutputStream;
+import java.io.DataInputStream;
+import java.io.DataOutputStream;
+import java.io.IOException;
+
+import org.junit.Assert;
+import org.junit.Test;
+
+/**
+ * Verifies that crafted model files with oversized count fields are rejected
before array
+ * allocation occurs, preventing OOM DoS. See OPENNLP-1821.
+ */
+public class AbstractModelReaderOomTest {
+
+ /**
+ * Minimal concrete subclass that exposes the three protected methods under
test.
+ */
+ static class TestableReader extends AbstractModelReader {
+ TestableReader(DataReader dr) {
+ super(dr);
+ }
+
+ @Override
+ public void checkModelType() {
+ }
+
+ @Override
+ public AbstractModel constructModel() {
+ return null;
+ }
+
+ String[] outcomes() throws IOException {
+ return getOutcomes();
+ }
+
+ int[][] outcomePatterns() throws IOException {
+ return getOutcomePatterns();
+ }
+
+ String[] predicates() throws IOException {
+ return getPredicates();
+ }
+ }
+
+ /** Reader whose stream starts with a single int (the count field). */
+ private static TestableReader readerFor(int countValue) throws IOException {
+ ByteArrayOutputStream baos = new ByteArrayOutputStream();
+ DataOutputStream dos = new DataOutputStream(baos);
+ dos.writeInt(countValue);
+ dos.flush();
+ DataInputStream dis = new DataInputStream(new
ByteArrayInputStream(baos.toByteArray()));
+ return new TestableReader(new BinaryFileDataReader(dis));
+ }
+
+ @Test
+ public void testGetOutcomes_RejectsMaxValue() throws IOException {
+ Assert.assertThrows(IllegalArgumentException.class,
readerFor(Integer.MAX_VALUE)::outcomes);
+ }
+
+ @Test
+ public void testGetOutcomePatterns_RejectsMaxValue() throws IOException {
+ Assert.assertThrows(IllegalArgumentException.class,
readerFor(Integer.MAX_VALUE)::outcomePatterns);
+ }
+
+ @Test
+ public void testGetPredicates_RejectsMaxValue() throws IOException {
+ Assert.assertThrows(IllegalArgumentException.class,
readerFor(Integer.MAX_VALUE)::predicates);
+ }
+
+ @Test
+ public void testGetOutcomes_RejectsNegativeCount() throws IOException {
+ Assert.assertThrows(IllegalArgumentException.class,
readerFor(-1)::outcomes);
+ }
+
+ @Test
+ public void testGetOutcomePatterns_RejectsNegativeCount() throws IOException
{
+ Assert.assertThrows(IllegalArgumentException.class,
readerFor(-1)::outcomePatterns);
+ }
+
+ @Test
+ public void testGetPredicates_RejectsNegativeCount() throws IOException {
+ Assert.assertThrows(IllegalArgumentException.class,
readerFor(-1)::predicates);
+ }
+
+ @Test
+ public void testGetOutcomes_ValidCountReturnsLabels() throws IOException {
+ ByteArrayOutputStream baos = new ByteArrayOutputStream();
+ DataOutputStream dos = new DataOutputStream(baos);
+ dos.writeInt(2);
+ dos.writeUTF("label-A");
+ dos.writeUTF("label-B");
+ dos.flush();
+
+ TestableReader reader = new TestableReader(
+ new BinaryFileDataReader(new DataInputStream(new
ByteArrayInputStream(baos.toByteArray()))));
+ Assert.assertArrayEquals(new String[]{"label-A", "label-B"},
reader.outcomes());
+ }
+
+ @Test
+ public void testGetPredicates_ValidCountReturnsLabels() throws IOException {
+ ByteArrayOutputStream baos = new ByteArrayOutputStream();
+ DataOutputStream dos = new DataOutputStream(baos);
+ dos.writeInt(3);
+ dos.writeUTF("pred-X");
+ dos.writeUTF("pred-Y");
+ dos.writeUTF("pred-Z");
+ dos.flush();
+
+ TestableReader reader = new TestableReader(
+ new BinaryFileDataReader(new DataInputStream(new
ByteArrayInputStream(baos.toByteArray()))));
+ Assert.assertArrayEquals(new String[]{"pred-X", "pred-Y", "pred-Z"},
reader.predicates());
+ }
+}