With spark.serializer.objectStreamReset set to 1, I ran a sample scala test code which still seems to be crashing at the same place. If someone could verify this independently, I would greatly appreciate it.
Scala Code: ---------------------------------------------------------------------- import scala.util.Random import scala.collection.mutable.ArrayBuffer import org.apache.spark.mllib.tree.RandomForest import org.apache.spark.mllib.tree.model.RandomForestModel import org.apache.spark.mllib.util.MLUtils import org.apache.spark.mllib.linalg.Vectors import org.apache.spark.mllib.regression.LabeledPoint val r = Random var size = 15000000 var count = 3000 val indptr = (1 to size by size/count).toArray val data = Seq.fill(count)(r.nextDouble()).toArray var dset = ArrayBuffer[LabeledPoint]() for (i <- 1 to 10) { dset += LabeledPoint(r.nextInt(2), Vectors.sparse(size, indptr, data)); } val distData = sc.parallelize(dset) val splits = distData.randomSplit(Array(0.7, 0.3)) val (trainingData, testData) = (splits(0), splits(1)) // Train a RandomForest model. // Empty categoricalFeaturesInfo indicates all features are continuous. val numClasses = 2 val categoricalFeaturesInfo = Map[Int, Int]() val numTrees = 3 // Use more in practice. val featureSubsetStrategy = "auto" // Let the algorithm choose. val impurity = "gini" val maxDepth = 4 val maxBins = 32 val model = RandomForest.trainClassifier(trainingData, numClasses, categoricalFeaturesInfo, numTrees, featureSubsetStrategy, impurity, maxDepth, maxBins) -- View this message in context: http://apache-spark-user-list.1001560.n3.nabble.com/Spark-ClosureCleaner-or-java-serializer-OOM-when-trying-to-grow-tp24796p24818.html Sent from the Apache Spark User List mailing list archive at Nabble.com. --------------------------------------------------------------------- To unsubscribe, e-mail: user-unsubscr...@spark.apache.org For additional commands, e-mail: user-h...@spark.apache.org