With spark.serializer.objectStreamReset set to 1, I ran a sample scala test
code which still seems to be crashing at the same place. If someone could
verify this independently, I would greatly appreciate it.

Scala Code:
----------------------------------------------------------------------
import scala.util.Random
import scala.collection.mutable.ArrayBuffer

import org.apache.spark.mllib.tree.RandomForest
import org.apache.spark.mllib.tree.model.RandomForestModel
import org.apache.spark.mllib.util.MLUtils

import org.apache.spark.mllib.linalg.Vectors
import org.apache.spark.mllib.regression.LabeledPoint

val r = Random

var size = 15000000
var count = 3000
val indptr = (1 to size by size/count).toArray
val data = Seq.fill(count)(r.nextDouble()).toArray

var dset = ArrayBuffer[LabeledPoint]()
for (i <- 1 to 10) {
        dset += LabeledPoint(r.nextInt(2), Vectors.sparse(size, indptr, data));
}

val distData = sc.parallelize(dset)
val splits = distData.randomSplit(Array(0.7, 0.3))
val (trainingData, testData) = (splits(0), splits(1))

// Train a RandomForest model.
//  Empty categoricalFeaturesInfo indicates all features are continuous.
val numClasses = 2
val categoricalFeaturesInfo = Map[Int, Int]()
val numTrees = 3 // Use more in practice.
val featureSubsetStrategy = "auto" // Let the algorithm choose.
val impurity = "gini"
val maxDepth = 4
val maxBins = 32

val model = RandomForest.trainClassifier(trainingData, numClasses,
categoricalFeaturesInfo,
  numTrees, featureSubsetStrategy, impurity, maxDepth, maxBins)



--
View this message in context: 
http://apache-spark-user-list.1001560.n3.nabble.com/Spark-ClosureCleaner-or-java-serializer-OOM-when-trying-to-grow-tp24796p24818.html
Sent from the Apache Spark User List mailing list archive at Nabble.com.

---------------------------------------------------------------------
To unsubscribe, e-mail: user-unsubscr...@spark.apache.org
For additional commands, e-mail: user-h...@spark.apache.org

Reply via email to