Hi Any idea how I can debug this problem. I suspect the problem has to do with how I am converting a JavaRDD<Tuple2<Long, Double>> to a DataFrame.
Is it boxing problem? I tried to use long and double instead of Long and Double when ever possible. Thanks in advance, Happy Holidays. Andy allData.printSchema() root |-- label: string (nullable = true) |-- text: string (nullable = true) |-- id: long (nullable = true) |-- createdAt: long (nullable = true) |-- binomialLabel: string (nullable = true) |-- words: array (nullable = true) | |-- element: string (containsNull = true) |-- features: vector (nullable = true) |-- labelIndex: double (nullable = true) // // make predictions using all the data // The real out of sample error will be higher // JavaRDD<Tuple2<Long, Double>> predictions = idLabeledPoingRDD.map((Tuple2<Long, LabeledPoint> t2) -> { Long id = t2._1(); LabeledPoint lp = t2._2(); double prediction = naiveBayesModel.predict(lp.features()); return new Tuple2<Long, Double>(id, prediction); }); public class Prediction { double prediction; long id; Public Getters and setters } DataFrame predictionDF = sqlContext.createDataFrame(predictions, Prediction.class); predictionDF.printSchema() root |-- id: long (nullable = false) |-- prediction: double (nullable = false) DataFrame results = allData.join(predictionDF, "id"); results.show() Here is the top of long stack trace. I do not know how it relates back to my code. I do not see any of my class, colNames, function names, java.lang.IllegalArgumentException: object is not an instance of declaring class at sun.reflect.NativeMethodAccessorImpl.invoke0(Native Method) ~[na:1.8.0_66] at sun.reflect.NativeMethodAccessorImpl.invoke(NativeMethodAccessorImpl.java:62 ) ~[na:1.8.0_66] at sun.reflect.DelegatingMethodAccessorImpl.invoke(DelegatingMethodAccessorImpl .java:43) ~[na:1.8.0_66] at java.lang.reflect.Method.invoke(Method.java:497) ~[na:1.8.0_66] at org.apache.spark.sql.SQLContext$$anonfun$9$$anonfun$apply$1$$anonfun$apply$2 .apply(SQLContext.scala:500) ~[spark-sql_2.10-1.5.2.jar:1.5.2] at org.apache.spark.sql.SQLContext$$anonfun$9$$anonfun$apply$1$$anonfun$apply$2 .apply(SQLContext.scala:500) ~[spark-sql_2.10-1.5.2.jar:1.5.2] at scala.collection.TraversableLike$$anonfun$map$1.apply(TraversableLike.scala: 244) ~[scala-library-2.10.5.jar:na] at scala.collection.TraversableLike$$anonfun$map$1.apply(TraversableLike.scala: 244) ~[scala-library-2.10.5.jar:na] at scala.collection.IndexedSeqOptimized$class.foreach(IndexedSeqOptimized.scala :33) ~[scala-library-2.10.5.jar:na] at scala.collection.mutable.ArrayOps$ofRef.foreach(ArrayOps.scala:108) ~[scala-library-2.10.5.jar:na] at scala.collection.TraversableLike$class.map(TraversableLike.scala:244) ~[scala-library-2.10.5.jar:na] at scala.collection.mutable.ArrayOps$ofRef.map(ArrayOps.scala:108) ~[scala-library-2.10.5.jar:na] at org.apache.spark.sql.SQLContext$$anonfun$9$$anonfun$apply$1.apply(SQLContext .scala:500) ~[spark-sql_2.10-1.5.2.jar:1.5.2] at org.apache.spark.sql.SQLContext$$anonfun$9$$anonfun$apply$1.apply(SQLContext .scala:498) ~[spark-sql_2.10-1.5.2.jar:1.5.2] at scala.collection.Iterator$$anon$11.next(Iterator.scala:328) ~[scala-library-2.10.5.jar:na] at scala.collection.Iterator$$anon$11.next(Iterator.scala:328) ~[scala-library-2.10.5.jar:na] at scala.collection.Iterator$$anon$11.next(Iterator.scala:328) ~[scala-library-2.10.5.jar:na] at org.apache.spark.shuffle.sort.BypassMergeSortShuffleWriter.insertAll(BypassM ergeSortShuffleWriter.java:119) ~[spark-core_2.10-1.5.2.jar:1.5.2] at org.apache.spark.shuffle.sort.SortShuffleWriter.write(SortShuffleWriter.scal a:73) ~[spark-core_2.10-1.5.2.jar:1.5.2] at org.apache.spark.scheduler.ShuffleMapTask.runTask(ShuffleMapTask.scala:73) ~[spark-core_2.10-1.5.2.jar:1.5.2] at org.apache.spark.scheduler.ShuffleMapTask.runTask(ShuffleMapTask.scala:41) ~[spark-core_2.10-1.5.2.jar:1.5.2] at org.apache.spark.scheduler.Task.run(Task.scala:88) ~[spark-core_2.10-1.5.2.jar:1.5.2] at org.apache.spark.executor.Executor$TaskRunner.run(Executor.scala:214) ~[spark-core_2.10-1.5.2.jar:1.5.2] at java.util.concurrent.ThreadPoolExecutor.runWorker(ThreadPoolExecutor.java:11 42) [na:1.8.0_66] at java.util.concurrent.ThreadPoolExecutor$Worker.run(ThreadPoolExecutor.java:6 17) [na:1.8.0_66] at java.lang.Thread.run(Thread.java:745) [na:1.8.0_66]