This is an automated email from the ASF dual-hosted git repository.

mawiesne pushed a commit to branch main
in repository https://gitbox.apache.org/repos/asf/opennlp.git


The following commit(s) were added to refs/heads/main by this push:
     new 96a073f6 OPENNLP-1821: Prevent OutOfMemory Due To Huge Array 
Allocation  (#1022)
96a073f6 is described below

commit 96a073f693f3a0ded808a475f7d7773c072bb8a1
Author: subbudvk <[email protected]>
AuthorDate: Mon Apr 27 18:03:07 2026 +0530

    OPENNLP-1821: Prevent OutOfMemory Due To Huge Array Allocation  (#1022)
    
    * Fix : Prevent OOM/DoS from Crafted Inputs
    
    * Customizable entry code in OpenNLP
    
    * Use Max_Entries Declared to prevent OOM
    
    * Use correct exception in fix for OOM
---
 .../tools/ml/model/AbstractModelReader.java        |  44 ++++++++
 .../tools/ml/model/ModelParameterChunker.java      |   1 +
 .../tools/ml/model/AbstractModelReaderOomTest.java | 119 +++++++++++++++++++++
 3 files changed, 164 insertions(+)

diff --git 
a/opennlp-core/opennlp-ml/opennlp-ml-commons/src/main/java/opennlp/tools/ml/model/AbstractModelReader.java
 
b/opennlp-core/opennlp-ml/opennlp-ml-commons/src/main/java/opennlp/tools/ml/model/AbstractModelReader.java
index 55164614..d8ae3cb6 100644
--- 
a/opennlp-core/opennlp-ml/opennlp-ml-commons/src/main/java/opennlp/tools/ml/model/AbstractModelReader.java
+++ 
b/opennlp-core/opennlp-ml/opennlp-ml-commons/src/main/java/opennlp/tools/ml/model/AbstractModelReader.java
@@ -29,6 +29,32 @@ import java.util.zip.GZIPInputStream;
  */
 public abstract class AbstractModelReader {
 
+  /**
+   * System property for overriding the maximum number of entries (outcomes, 
predicates,
+   * outcome patterns, chunk counts) that may be read from a model file or 
training data.
+   * Set at JVM startup, e.g. {@code -DOPENNLP_MAX_ENTRIES=5000000}.
+   * Falls back to {@code 10_000_000} if absent or invalid.
+   */
+  public static final String MAX_ENTRIES_PROPERTY = "OPENNLP_MAX_ENTRIES";
+
+  /**
+   * Upper bound on count fields read from a model file.
+   * Prevents OOM on crafted inputs with oversized array size declarations.
+   * Configurable via the {@link #MAX_ENTRIES_PROPERTY} system property.
+   */
+  static final int MAX_ENTRIES = initMaxEntries();
+
+  private static int initMaxEntries() {
+    String prop = System.getProperty(MAX_ENTRIES_PROPERTY, "").trim();
+    if (!prop.isEmpty()) {
+      try {
+        int val = Integer.parseInt(prop);
+        if (val > 0) return val;
+      } catch (NumberFormatException ignore) { }
+    }
+    return 10_000_000;
+  }
+
   /**
    * The number of predicates contained in a model.
    */
@@ -128,9 +154,15 @@ public abstract class AbstractModelReader {
   /**
    * @return Reads and retrieves the {@code outcome labels} from the model.
    * @throws IOException Thrown if IO errors occurred.
+   * @throws IllegalArgumentException Thrown if the outcome count is negative 
or
+   *     exceeds {@link #MAX_ENTRIES}.
    */
   protected String[] getOutcomes() throws IOException {
     int numOutcomes = readInt();
+    if (numOutcomes < 0 || numOutcomes > MAX_ENTRIES) {
+      throw new IllegalArgumentException(
+          "Outcome count " + numOutcomes + " exceeds safe limit of " + 
MAX_ENTRIES);
+    }
     String[] outcomeLabels = new String[numOutcomes];
     for (int i = 0; i < numOutcomes; i++) outcomeLabels[i] = readUTF();
     return outcomeLabels;
@@ -139,9 +171,15 @@ public abstract class AbstractModelReader {
   /**
    * @return Reads and retrieves the {@code outcome patterns} from the model.
    * @throws IOException Thrown if IO errors occurred.
+   * @throws IllegalArgumentException Thrown if the outcome pattern count is 
negative or
+   *     exceeds {@link #MAX_ENTRIES}.
    */
   protected int[][] getOutcomePatterns() throws IOException {
     int numOCTypes = readInt();
+    if (numOCTypes < 0 || numOCTypes > MAX_ENTRIES) {
+      throw new IllegalArgumentException(
+          "Outcome pattern count " + numOCTypes + " exceeds safe limit of " + 
MAX_ENTRIES);
+    }
     int[][] outcomePatterns = new int[numOCTypes][];
     for (int i = 0; i < numOCTypes; i++) {
       StringTokenizer tok = new StringTokenizer(readUTF(), " ");
@@ -157,9 +195,15 @@ public abstract class AbstractModelReader {
   /**
    * @return Reads and retrieves the {@code predicates} from the model.
    * @throws IOException Thrown if IO errors occurred.
+   * @throws IllegalArgumentException Thrown if the predicate count is 
negative or
+   *     exceeds {@link #MAX_ENTRIES}.
    */
   protected String[] getPredicates() throws IOException {
     NUM_PREDS = readInt();
+    if (NUM_PREDS < 0 || NUM_PREDS > MAX_ENTRIES) {
+      throw new IllegalArgumentException(
+          "Predicate count " + NUM_PREDS + " exceeds safe limit of " + 
MAX_ENTRIES);
+    }
     String[] predLabels = new String[NUM_PREDS];
     for (int i = 0; i < NUM_PREDS; i++)
         predLabels[i] = readUTF();
diff --git 
a/opennlp-core/opennlp-ml/opennlp-ml-commons/src/main/java/opennlp/tools/ml/model/ModelParameterChunker.java
 
b/opennlp-core/opennlp-ml/opennlp-ml-commons/src/main/java/opennlp/tools/ml/model/ModelParameterChunker.java
index 98d74be1..eab2b853 100644
--- 
a/opennlp-core/opennlp-ml/opennlp-ml-commons/src/main/java/opennlp/tools/ml/model/ModelParameterChunker.java
+++ 
b/opennlp-core/opennlp-ml/opennlp-ml-commons/src/main/java/opennlp/tools/ml/model/ModelParameterChunker.java
@@ -29,6 +29,7 @@ import java.nio.charset.StandardCharsets;
 import java.util.ArrayList;
 import java.util.List;
 
+
 /**
  * A helper class that handles Strings with more than 64k (65535 bytes) in 
length.
  * This is achieved via the signature {@link #SIGNATURE_CHUNKED_PARAMS} at the 
beginning of
diff --git 
a/opennlp-core/opennlp-ml/opennlp-ml-commons/src/test/java/opennlp/tools/ml/model/AbstractModelReaderOomTest.java
 
b/opennlp-core/opennlp-ml/opennlp-ml-commons/src/test/java/opennlp/tools/ml/model/AbstractModelReaderOomTest.java
new file mode 100644
index 00000000..242555c7
--- /dev/null
+++ 
b/opennlp-core/opennlp-ml/opennlp-ml-commons/src/test/java/opennlp/tools/ml/model/AbstractModelReaderOomTest.java
@@ -0,0 +1,119 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one or more
+ * contributor license agreements.  See the NOTICE file distributed with
+ * this work for additional information regarding copyright ownership.
+ * The ASF licenses this file to You under the Apache License, Version 2.0
+ * (the "License"); you may not use this file except in compliance with
+ * the License. You may obtain a copy of the License at
+ *
+ *     http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+package opennlp.tools.ml.model;
+
+import java.io.ByteArrayInputStream;
+import java.io.ByteArrayOutputStream;
+import java.io.DataInputStream;
+import java.io.DataOutputStream;
+import java.io.IOException;
+
+import org.junit.jupiter.api.Test;
+
+import static org.junit.jupiter.api.Assertions.assertArrayEquals;
+import static org.junit.jupiter.api.Assertions.assertThrows;
+
+/**
+ * Verifies that crafted model files with oversized count fields are rejected 
before array
+ * allocation occurs, preventing OOM DoS. See OPENNLP-1821.
+ */
+class AbstractModelReaderOomTest {
+
+  /**
+   * Minimal concrete subclass that exposes the three protected methods under 
test.
+   */
+  static class TestableReader extends AbstractModelReader {
+    TestableReader(DataReader dr) { super(dr); }
+
+    @Override public void checkModelType() {}
+    @Override public AbstractModel constructModel() { return null; }
+
+    String[] outcomes() throws IOException { return getOutcomes(); }
+    int[][] outcomePatterns() throws IOException { return 
getOutcomePatterns(); }
+    String[] predicates() throws IOException { return getPredicates(); }
+  }
+
+  /** Reader whose stream starts with a single int (the count field). */
+  private static TestableReader readerFor(int countValue) throws IOException {
+    ByteArrayOutputStream baos = new ByteArrayOutputStream();
+    DataOutputStream dos = new DataOutputStream(baos);
+    dos.writeInt(countValue);
+    dos.flush();
+    DataInputStream dis = new DataInputStream(new 
ByteArrayInputStream(baos.toByteArray()));
+    return new TestableReader(new BinaryFileDataReader(dis));
+  }
+
+  @Test
+  void testGetOutcomes_RejectsMaxValue() throws IOException {
+    assertThrows(IllegalArgumentException.class, 
readerFor(Integer.MAX_VALUE)::outcomes);
+  }
+
+  @Test
+  void testGetOutcomePatterns_RejectsMaxValue() throws IOException {
+    assertThrows(IllegalArgumentException.class, 
readerFor(Integer.MAX_VALUE)::outcomePatterns);
+  }
+
+  @Test
+  void testGetPredicates_RejectsMaxValue() throws IOException {
+    assertThrows(IllegalArgumentException.class, 
readerFor(Integer.MAX_VALUE)::predicates);
+  }
+
+  @Test
+  void testGetOutcomes_RejectsNegativeCount() throws IOException {
+    assertThrows(IllegalArgumentException.class, readerFor(-1)::outcomes);
+  }
+
+  @Test
+  void testGetOutcomePatterns_RejectsNegativeCount() throws IOException {
+    assertThrows(IllegalArgumentException.class, 
readerFor(-1)::outcomePatterns);
+  }
+
+  @Test
+  void testGetPredicates_RejectsNegativeCount() throws IOException {
+    assertThrows(IllegalArgumentException.class, readerFor(-1)::predicates);
+  }
+
+  @Test
+  void testGetOutcomes_ValidCountReturnsLabels() throws IOException {
+    ByteArrayOutputStream baos = new ByteArrayOutputStream();
+    DataOutputStream dos = new DataOutputStream(baos);
+    dos.writeInt(2);
+    dos.writeUTF("label-A");
+    dos.writeUTF("label-B");
+    dos.flush();
+
+    TestableReader reader = new TestableReader(
+        new BinaryFileDataReader(new DataInputStream(new 
ByteArrayInputStream(baos.toByteArray()))));
+    assertArrayEquals(new String[]{"label-A", "label-B"}, reader.outcomes());
+  }
+
+  @Test
+  void testGetPredicates_ValidCountReturnsLabels() throws IOException {
+    ByteArrayOutputStream baos = new ByteArrayOutputStream();
+    DataOutputStream dos = new DataOutputStream(baos);
+    dos.writeInt(3);
+    dos.writeUTF("pred-X");
+    dos.writeUTF("pred-Y");
+    dos.writeUTF("pred-Z");
+    dos.flush();
+
+    TestableReader reader = new TestableReader(
+        new BinaryFileDataReader(new DataInputStream(new 
ByteArrayInputStream(baos.toByteArray()))));
+    assertArrayEquals(new String[]{"pred-X", "pred-Y", "pred-Z"}, 
reader.predicates());
+  }
+}

Reply via email to