This is an automated email from the ASF dual-hosted git repository. mawiesne pushed a commit to branch cleanup/drop-mahout-addon in repository https://gitbox.apache.org/repos/asf/opennlp-sandbox.git
commit 16d4d991c4278923f9488dbb19035d45e7f16f84 Author: Martin Wiesner <[email protected]> AuthorDate: Fri Mar 20 22:15:17 2026 +0100 Drop mahout-addon component - deletes the mahout-addon component entirely - reasons: Mahout is dead upstream (0.9 from 2014), no tests, no value --- README.md | 1 - mahout-addon/pom.xml | 151 --------------------- mahout-addon/src/main/java/SimpleTest.java | 68 ---------- .../mahout/AbstractOnlineLearnerTrainer.java | 77 ----------- .../mahout/AdaptiveLogisticRegressionTrainer.java | 65 --------- .../addons/mahout/LogisticRegressionTrainer.java | 88 ------------ .../mahout/OnlineLogisticRegressionTrainer.java | 64 --------- .../addons/mahout/PassiveAggressiveTrainer.java | 58 -------- .../addons/mahout/VectorClassifierModel.java | 111 --------------- pom.xml | 1 - 10 files changed, 684 deletions(-) diff --git a/README.md b/README.md index 107fda3..d8e3d32 100644 --- a/README.md +++ b/README.md @@ -56,7 +56,6 @@ Currently, the library has different components: * `caseeditor-corpus-server-plugin`: A set of Java classes for [Apache UIMA](https://uima.apache.org) as Eclipse plugin to integrate corpora. * `caseeditor-opennlp-plugin`: An OpenNLP plugin for [Apache UIMA](https://uima.apache.org). * `corpus-server`: A multi-module component to create, search, remove, and serve multiple corpora. -* `mahout-addon`: An addon for [Apache Mahout](https://mahout.apache.org). * `mallet-addon`: An addon for [Mallet](https://mimno.github.io/Mallet/topics.html) targeting topic modelling techniques. * `modelbuilder-addon`: A set of classes to build models. * `nlp-utils`: A set of OpenNLP util classes. diff --git a/mahout-addon/pom.xml b/mahout-addon/pom.xml deleted file mode 100644 index 34b3bd1..0000000 --- a/mahout-addon/pom.xml +++ /dev/null @@ -1,151 +0,0 @@ -<?xml version="1.0" encoding="UTF-8"?> - -<!-- - Licensed to the Apache Software Foundation (ASF) under one - or more contributor license agreements. See the NOTICE file - distributed with this work for additional information - regarding copyright ownership. The ASF licenses this file - to you under the Apache License, Version 2.0 (the - "License"); you may not use this file except in compliance - with the License. You may obtain a copy of the License at - - http://www.apache.org/licenses/LICENSE-2.0 - - Unless required by applicable law or agreed to in writing, - software distributed under the License is distributed on an - "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY - KIND, either express or implied. See the License for the - specific language governing permissions and limitations - under the License. ---> - -<project xmlns="http://maven.apache.org/POM/4.0.0" xmlns:xsi="http://www.w3.org/2001/XMLSchema-instance" xsi:schemaLocation="http://maven.apache.org/POM/4.0.0 http://maven.apache.org/xsd/maven-4.0.0.xsd"> - <modelVersion>4.0.0</modelVersion> - <parent> - <groupId>org.apache.opennlp</groupId> - <artifactId>opennlp-sandbox</artifactId> - <version>3.0.0-SNAPSHOT</version> - </parent> - - <artifactId>mahout-addon</artifactId> - <name>Apache OpenNLP Mahout Addon</name> - <packaging>jar</packaging> - - <dependencies> - <dependency> - <groupId>org.apache.opennlp</groupId> - <artifactId>opennlp-runtime</artifactId> - </dependency> - - <dependency> - <groupId>org.apache.mahout</groupId> - <artifactId>mahout-core</artifactId> - <version>0.9</version> - <exclusions> - <!-- No need for CLI tooling here --> - <exclusion> - <groupId>org.apache.mahout.commons</groupId> - <artifactId>commons-cli</artifactId> - </exclusion> - <!-- exluding as mahout-core brings ancient version (CVEs) --> - <exclusion> - <groupId>com.google</groupId> - <artifactId>guava</artifactId> - </exclusion> - <!-- exluding as mahout-core brings ancient version (CVEs) --> - <exclusion> - <groupId>com.thoughtworks.xstream</groupId> - <artifactId>xstream</artifactId> - </exclusion> - <!-- This is not needed any way and brings in ancient jersey --> - <exclusion> - <groupId>com.sun.jersey</groupId> - <artifactId>jersey-core</artifactId> - </exclusion> - <exclusion> - <groupId>com.sun.jersey</groupId> - <artifactId>jersey-server</artifactId> - </exclusion> - <exclusion> - <groupId>com.sun.jersey</groupId> - <artifactId>jersey-json</artifactId> - </exclusion> - </exclusions> - </dependency> - - <dependency> - <groupId>com.google.guava</groupId> - <artifactId>guava</artifactId> - <version>33.5.0-jre</version> - <scope>runtime</scope> - </dependency> - - <dependency> - <groupId>com.thoughtworks.xstream</groupId> - <artifactId>xstream</artifactId> - <version>1.4.21</version> - <scope>runtime</scope> - </dependency> - - <dependency> - <groupId>commons-beanutils</groupId> - <artifactId>commons-beanutils</artifactId> - <scope>runtime</scope> - </dependency> - - <dependency> - <groupId>org.junit.jupiter</groupId> - <artifactId>junit-jupiter-api</artifactId> - </dependency> - - <dependency> - <groupId>org.junit.jupiter</groupId> - <artifactId>junit-jupiter-engine</artifactId> - </dependency> - - <dependency> - <groupId>org.junit.jupiter</groupId> - <artifactId>junit-jupiter-params</artifactId> - </dependency> - </dependencies> - - <build> - <plugins> - <plugin> - <groupId>org.apache.maven.plugins</groupId> - <artifactId>maven-compiler-plugin</artifactId> - <configuration> - <source>${maven.compiler.source}</source> - <target>${maven.compiler.target}</target> - <compilerArgument>-Xlint</compilerArgument> - </configuration> - </plugin> - <plugin> - <groupId>org.apache.maven.plugins</groupId> - <artifactId>maven-dependency-plugin</artifactId> - <version>3.10.0</version> - <executions> - <execution> - <id>copy-dependencies</id> - <phase>package</phase> - <goals> - <goal>copy-dependencies</goal> - </goals> - <configuration> - <excludeScope>provided</excludeScope> - <stripVersion>true</stripVersion> - </configuration> - </execution> - </executions> - </plugin> - <plugin> - <groupId>org.apache.maven.plugins</groupId> - <artifactId>maven-surefire-plugin</artifactId> - <configuration> - <skipTests>true</skipTests> - <argLine>-Xmx512m</argLine> - </configuration> - </plugin> - </plugins> - </build> -</project> \ No newline at end of file diff --git a/mahout-addon/src/main/java/SimpleTest.java b/mahout-addon/src/main/java/SimpleTest.java deleted file mode 100644 index 18773cd..0000000 --- a/mahout-addon/src/main/java/SimpleTest.java +++ /dev/null @@ -1,68 +0,0 @@ -/* - * Licensed to the Apache Software Foundation (ASF) under one or more - * contributor license agreements. See the NOTICE file distributed with - * this work for additional information regarding copyright ownership. - * The ASF licenses this file to You under the Apache License, Version 2.0 - * (the "License"); you may not use this file except in compliance with - * the License. You may obtain a copy of the License at - * - * http://www.apache.org/licenses/LICENSE-2.0 - * - * Unless required by applicable law or agreed to in writing, software - * distributed under the License is distributed on an "AS IS" BASIS, - * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. - * See the License for the specific language governing permissions and - * limitations under the License. - */ - -import org.apache.mahout.classifier.sgd.PassiveAggressive; -import org.apache.mahout.math.RandomAccessSparseVector; -import org.apache.mahout.math.Vector; -import org.apache.mahout.vectorizer.encoders.StaticWordValueEncoder; - -public class SimpleTest { - - public static void main(String[] args) { - - // Prepare data in vector format ... - - // The basic idea is that you create a vector, typically a RandomAccessSparseVector, - // and then you use various feature encoders to progressively add features to that vector. - // The size of the vector should be large enough to avoid feature collisions as features are hashed. - - // NOTE: Looks like we need to store the cardinality of the vector in the model ?! - - StaticWordValueEncoder encoder = new StaticWordValueEncoder("word-encoder"); - - RandomAccessSparseVector vector1 = new RandomAccessSparseVector(3); - vector1.set(0, 1); - vector1.set(1, 0); - vector1.set(2, 1); - -// encoder.addToVector("f1", vector1); -// encoder.addToVector("f", vector1); - - RandomAccessSparseVector vector2 = new RandomAccessSparseVector(3); - - vector2.set(0, 0); - vector2.set(1, 1); - vector2.set(2, 1); - -// encoder.addToVector("f2", vector2); -// encoder.addToVector("f", vector2); - - // do the training - PassiveAggressive pa = new PassiveAggressive(2, 3); - pa.train(0, vector1); - pa.train(1, vector2); - - RandomAccessSparseVector vector = new RandomAccessSparseVector(pa.numFeatures()); - vector.set(0, 1); - vector.set(1, 0); - vector.set(2, 1); - - Vector result = pa.classifyFull(vector); - - System.out.println(result); - } -} diff --git a/mahout-addon/src/main/java/opennlp/addons/mahout/AbstractOnlineLearnerTrainer.java b/mahout-addon/src/main/java/opennlp/addons/mahout/AbstractOnlineLearnerTrainer.java deleted file mode 100644 index c04a7cf..0000000 --- a/mahout-addon/src/main/java/opennlp/addons/mahout/AbstractOnlineLearnerTrainer.java +++ /dev/null @@ -1,77 +0,0 @@ -/* - * Licensed to the Apache Software Foundation (ASF) under one - * or more contributor license agreements. See the NOTICE file - * distributed with this work for additional information - * regarding copyright ownership. The ASF licenses this file - * to you under the Apache License, Version 2.0 (the - * "License"); you may not use this file except in compliance - * with the License. You may obtain a copy of the License at - * - * http://www.apache.org/licenses/LICENSE-2.0 - * - * Unless required by applicable law or agreed to in writing, - * software distributed under the License is distributed on an - * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY - * KIND, either express or implied. See the License for the - * specific language governing permissions and limitations - * under the License. - */ - -package opennlp.addons.mahout; - -import java.util.HashMap; -import java.util.Map; - -import org.apache.mahout.math.RandomAccessSparseVector; -import org.apache.mahout.math.Vector; - -import opennlp.tools.ml.AbstractEventTrainer; -import opennlp.tools.ml.model.DataIndexer; -import opennlp.tools.util.TrainingParameters; - -abstract class AbstractOnlineLearnerTrainer extends AbstractEventTrainer<TrainingParameters> { - - protected int iterations; - - public AbstractOnlineLearnerTrainer() { - } - - @Override - public void init(TrainingParameters trainParams, Map<String,String> reportMap) { - iterations = trainParams.getIntParameter("Iterations", 20); - } - - protected void trainOnlineLearner(DataIndexer<TrainingParameters> indexer, org.apache.mahout.classifier.OnlineLearner pa) { - int cardinality = indexer.getPredLabels().length; - int[] outcomes = indexer.getOutcomeList(); - - for (int i = 0; i < indexer.getContexts().length; i++) { - - Vector vector = new RandomAccessSparseVector(cardinality); - - int[] features = indexer.getContexts()[i]; - - for (int feature : features) { - vector.set(feature, indexer.getNumTimesEventsSeen()[i]); - } - - pa.train(outcomes[i], vector); - } - } - - protected Map<String, Integer> createPrepMap(DataIndexer<TrainingParameters> indexer) { - Map<String, Integer> predMap = new HashMap<>(); - - String[] predLabels = indexer.getPredLabels(); - for (int i = 0; i < predLabels.length; i++) { - predMap.put(predLabels[i], i); - } - - return predMap; - } - - @Override - public boolean isSortAndMerge() { - return true; - } -} diff --git a/mahout-addon/src/main/java/opennlp/addons/mahout/AdaptiveLogisticRegressionTrainer.java b/mahout-addon/src/main/java/opennlp/addons/mahout/AdaptiveLogisticRegressionTrainer.java deleted file mode 100644 index b567493..0000000 --- a/mahout-addon/src/main/java/opennlp/addons/mahout/AdaptiveLogisticRegressionTrainer.java +++ /dev/null @@ -1,65 +0,0 @@ -/* - * Licensed to the Apache Software Foundation (ASF) under one - * or more contributor license agreements. See the NOTICE file - * distributed with this work for additional information - * regarding copyright ownership. The ASF licenses this file - * to you under the Apache License, Version 2.0 (the - * "License"); you may not use this file except in compliance - * with the License. You may obtain a copy of the License at - * - * http://www.apache.org/licenses/LICENSE-2.0 - * - * Unless required by applicable law or agreed to in writing, - * software distributed under the License is distributed on an - * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY - * KIND, either express or implied. See the License for the - * specific language governing permissions and limitations - * under the License. - */ - -package opennlp.addons.mahout; - -import java.io.IOException; -import java.util.Map; - -import opennlp.tools.ml.model.DataIndexer; -import opennlp.tools.ml.model.MaxentModel; -import opennlp.tools.util.TrainingParameters; - -import org.apache.mahout.classifier.sgd.AdaptiveLogisticRegression; -import org.apache.mahout.classifier.sgd.L1; - -public class AdaptiveLogisticRegressionTrainer extends AbstractOnlineLearnerTrainer { - - public AdaptiveLogisticRegressionTrainer(Map<String, String> trainParams, - Map<String, String> reportMap) { - } - - @Override - public MaxentModel doTrain(DataIndexer<TrainingParameters> indexer) throws IOException { - - // TODO: Lets use the predMap here as well for encoding - int numberOfOutcomes = indexer.getOutcomeLabels().length; - int numberOfFeatures = indexer.getPredLabels().length; - - AdaptiveLogisticRegression pa = new AdaptiveLogisticRegression(numberOfOutcomes, - numberOfFeatures, new L1()); - - // TODO: Make these parameters configurable ... - // what are good values ?! - pa.setInterval(800); - pa.setAveragingWindow(500); - - for (int k = 0; k < iterations; k++) { - trainOnlineLearner(indexer, pa); - - // What should be reported at the end of every iteration ?! - System.out.println("Iteration " + (k + 1)); - } - - pa.close(); - - return new VectorClassifierModel(pa.getBest().getPayload().getLearner(), - indexer.getOutcomeLabels(), createPrepMap(indexer)); - } -} diff --git a/mahout-addon/src/main/java/opennlp/addons/mahout/LogisticRegressionTrainer.java b/mahout-addon/src/main/java/opennlp/addons/mahout/LogisticRegressionTrainer.java deleted file mode 100644 index 80a1f67..0000000 --- a/mahout-addon/src/main/java/opennlp/addons/mahout/LogisticRegressionTrainer.java +++ /dev/null @@ -1,88 +0,0 @@ -/* - * Licensed to the Apache Software Foundation (ASF) under one - * or more contributor license agreements. See the NOTICE file - * distributed with this work for additional information - * regarding copyright ownership. The ASF licenses this file - * to you under the Apache License, Version 2.0 (the - * "License"); you may not use this file except in compliance - * with the License. You may obtain a copy of the License at - * - * http://www.apache.org/licenses/LICENSE-2.0 - * - * Unless required by applicable law or agreed to in writing, - * software distributed under the License is distributed on an - * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY - * KIND, either express or implied. See the License for the - * specific language governing permissions and limitations - * under the License. - */ - -package opennlp.addons.mahout; - -import java.io.IOException; -import java.util.HashMap; -import java.util.Map; - -import opennlp.tools.ml.model.DataIndexer; -import opennlp.tools.ml.model.MaxentModel; -import opennlp.tools.util.TrainingParameters; - -import org.apache.mahout.classifier.sgd.AdaptiveLogisticRegression; -import org.apache.mahout.classifier.sgd.L1; - -public class LogisticRegressionTrainer extends AbstractOnlineLearnerTrainer { - - public LogisticRegressionTrainer(Map<String, String> trainParams, - Map<String, String> reportMap) { - } - - @Override - public MaxentModel doTrain(DataIndexer<TrainingParameters> indexer) throws IOException { - - // TODO: Lets use the predMap here as well for encoding - - int[] outcomes = indexer.getOutcomeList(); - - int cardinality = indexer.getPredLabels().length; - - - AdaptiveLogisticRegression pa = new AdaptiveLogisticRegression(indexer.getOutcomeLabels().length, - cardinality, new L1()); - - pa.setInterval(800); - pa.setAveragingWindow(500); - -// PassiveAggressive pa = new PassiveAggressive(indexer.getOutcomeLabels().length, cardinality); -// pa.learningRate(10000); - -// OnlineLogisticRegression pa = new OnlineLogisticRegression(indexer.getOutcomeLabels().length, cardinality, -// new L1()); -// -// pa.alpha(1).stepOffset(250) -// .decayExponent(0.9) -// .lambda(3.0e-5) -// .learningRate(3000); - - // TODO: Should we do both ?! AdaptiveLogisticRegression ?! - - for (int k = 0; k < iterations; k++) { - trainOnlineLearner(indexer, pa); - - // What should be reported at the end of every iteration ?! - System.out.println("Iteration " + (k + 1)); - } - - pa.close(); - - Map<String, Integer> predMap = new HashMap<>(); - - String[] predLabels = indexer.getPredLabels(); - for (int i = 0; i < predLabels.length; i++) { - predMap.put(predLabels[i], i); - } - - return new VectorClassifierModel(pa.getBest().getPayload().getLearner(), indexer.getOutcomeLabels(), predMap); - - } - -} diff --git a/mahout-addon/src/main/java/opennlp/addons/mahout/OnlineLogisticRegressionTrainer.java b/mahout-addon/src/main/java/opennlp/addons/mahout/OnlineLogisticRegressionTrainer.java deleted file mode 100644 index 7850027..0000000 --- a/mahout-addon/src/main/java/opennlp/addons/mahout/OnlineLogisticRegressionTrainer.java +++ /dev/null @@ -1,64 +0,0 @@ -/* - * Licensed to the Apache Software Foundation (ASF) under one - * or more contributor license agreements. See the NOTICE file - * distributed with this work for additional information - * regarding copyright ownership. The ASF licenses this file - * to you under the Apache License, Version 2.0 (the - * "License"); you may not use this file except in compliance - * with the License. You may obtain a copy of the License at - * - * http://www.apache.org/licenses/LICENSE-2.0 - * - * Unless required by applicable law or agreed to in writing, - * software distributed under the License is distributed on an - * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY - * KIND, either express or implied. See the License for the - * specific language governing permissions and limitations - * under the License. - */ - -package opennlp.addons.mahout; - -import java.io.IOException; -import java.util.Map; - -import opennlp.tools.ml.model.DataIndexer; -import opennlp.tools.ml.model.MaxentModel; -import opennlp.tools.util.TrainingParameters; - -import org.apache.mahout.classifier.sgd.L1; -import org.apache.mahout.classifier.sgd.OnlineLogisticRegression; - -public class OnlineLogisticRegressionTrainer extends AbstractOnlineLearnerTrainer { - - public OnlineLogisticRegressionTrainer(Map<String, String> trainParams, - Map<String, String> reportMap) { - } - - @Override - public MaxentModel doTrain(DataIndexer<TrainingParameters> indexer) throws IOException { - - // TODO: Lets use the predMap here as well for encoding - int numberOfOutcomes = indexer.getOutcomeLabels().length; - int numberOfFeatures = indexer.getPredLabels().length; - - // TODO: Make these parameters configurable ... - OnlineLogisticRegression pa = new OnlineLogisticRegression( - numberOfOutcomes, numberOfFeatures, new L1()); - - pa.alpha(1).stepOffset(250).decayExponent(0.9).lambda(3.0e-5) - .learningRate(3000); - - for (int k = 0; k < iterations; k++) { - trainOnlineLearner(indexer, pa); - - // What should be reported at the end of every iteration ?! - System.out.println("Iteration " + (k + 1)); - } - - pa.close(); - - return new VectorClassifierModel(pa, indexer.getOutcomeLabels(), createPrepMap(indexer)); - } - -} diff --git a/mahout-addon/src/main/java/opennlp/addons/mahout/PassiveAggressiveTrainer.java b/mahout-addon/src/main/java/opennlp/addons/mahout/PassiveAggressiveTrainer.java deleted file mode 100644 index 643bb76..0000000 --- a/mahout-addon/src/main/java/opennlp/addons/mahout/PassiveAggressiveTrainer.java +++ /dev/null @@ -1,58 +0,0 @@ -/* - * Licensed to the Apache Software Foundation (ASF) under one - * or more contributor license agreements. See the NOTICE file - * distributed with this work for additional information - * regarding copyright ownership. The ASF licenses this file - * to you under the Apache License, Version 2.0 (the - * "License"); you may not use this file except in compliance - * with the License. You may obtain a copy of the License at - * - * http://www.apache.org/licenses/LICENSE-2.0 - * - * Unless required by applicable law or agreed to in writing, - * software distributed under the License is distributed on an - * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY - * KIND, either express or implied. See the License for the - * specific language governing permissions and limitations - * under the License. - */ - -package opennlp.addons.mahout; - -import java.io.IOException; -import java.util.Map; - -import opennlp.tools.ml.model.DataIndexer; -import opennlp.tools.ml.model.MaxentModel; -import opennlp.tools.util.TrainingParameters; - -import org.apache.mahout.classifier.sgd.PassiveAggressive; - -public class PassiveAggressiveTrainer extends AbstractOnlineLearnerTrainer { - - public PassiveAggressiveTrainer(Map<String, String> trainParams, - Map<String, String> reportMap) { - } - - @Override - public MaxentModel doTrain(DataIndexer<TrainingParameters> indexer) throws IOException { - - // TODO: Lets use the predMap here as well for encoding - int numberOfOutcomes = indexer.getOutcomeLabels().length; - int numberOfFeatures = indexer.getPredLabels().length; - - PassiveAggressive pa = new PassiveAggressive(numberOfOutcomes, numberOfFeatures); - - for (int k = 0; k < iterations; k++) { - trainOnlineLearner(indexer, pa); - - // What should be reported at the end of every iteration ?! - System.out.println("Iteration " + (k + 1)); - } - - pa.close(); - - return new VectorClassifierModel(pa, indexer.getOutcomeLabels(), createPrepMap(indexer)); - } - -} diff --git a/mahout-addon/src/main/java/opennlp/addons/mahout/VectorClassifierModel.java b/mahout-addon/src/main/java/opennlp/addons/mahout/VectorClassifierModel.java deleted file mode 100644 index 6785220..0000000 --- a/mahout-addon/src/main/java/opennlp/addons/mahout/VectorClassifierModel.java +++ /dev/null @@ -1,111 +0,0 @@ -/* - * Licensed to the Apache Software Foundation (ASF) under one - * or more contributor license agreements. See the NOTICE file - * distributed with this work for additional information - * regarding copyright ownership. The ASF licenses this file - * to you under the Apache License, Version 2.0 (the - * "License"); you may not use this file except in compliance - * with the License. You may obtain a copy of the License at - * - * http://www.apache.org/licenses/LICENSE-2.0 - * - * Unless required by applicable law or agreed to in writing, - * software distributed under the License is distributed on an - * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY - * KIND, either express or implied. See the License for the - * specific language governing permissions and limitations - * under the License. - */ -package opennlp.addons.mahout; - -import java.util.Map; - -import opennlp.tools.ml.model.MaxentModel; - -import org.apache.mahout.classifier.AbstractVectorClassifier; -import org.apache.mahout.math.RandomAccessSparseVector; -import org.apache.mahout.math.Vector; - -// TODO: Would be nice to have an abstract maxent model impl .. - -public class VectorClassifierModel implements MaxentModel { - - private final AbstractVectorClassifier classifier; - private final String[] outcomeLabels; - private final Map<String, Integer> predMap; - - public VectorClassifierModel(AbstractVectorClassifier pa, String[] outcomeLabels, - Map<String, Integer> predMap) { - this.classifier = pa; - // TODO: We should make a copy, so the model is immutable ... - this.outcomeLabels = outcomeLabels; - this.predMap = predMap; - } - - @Override - public double[] eval(String[] features) { - Vector vector = new RandomAccessSparseVector(predMap.size()); - - for (String feature : features) { - Integer featureId = predMap.get(feature); - - if (featureId != null) { - vector.set(featureId, vector.get(featureId) + 1); - } - } - - Vector resultVector = classifier.classifyFull(vector); - - double[] outcomes = new double[classifier.numCategories()]; - - for (int i = 0; i < outcomes.length; i++) { - outcomes[i] = resultVector.get(i); - } - - return outcomes; - } - - @Override - public double[] eval(String[] context, double[] probs) { - return eval(context); - } - - @Override - public double[] eval(String[] context, float[] values) { - return eval(context); - } - - @Override - public String getBestOutcome(double[] ocs) { - int best = 0; - for (int i = 1; i < ocs.length; i++) - if (ocs[i] > ocs[best]) best = i; - return outcomeLabels[best]; - } - - @Override - public String getAllOutcomes(double[] outcomes) { - return null; - } - - @Override - public String getOutcome(int i) { - return outcomeLabels[i]; - } - - @Override - public int getIndex(String outcome) { - for (int i = 0; i < outcomeLabels.length; i++) { - if (outcomeLabels[i].equals(outcome)) { - return i; - } - } - - return -1; - } - - @Override - public int getNumOutcomes() { - return outcomeLabels.length; - } -} diff --git a/pom.xml b/pom.xml index 90f6c33..59514d9 100644 --- a/pom.xml +++ b/pom.xml @@ -98,7 +98,6 @@ <module>caseditor-corpus-server-plugin</module> <module>caseditor-opennlp-plugin</module> <module>corpus-server</module> - <module>mahout-addon</module> <module>mallet-addon</module> <module>modelbuilder-addon</module> <module>nlp-utils</module>
