Hi All,
I'm getting an error when trying to save a ALS MatrixFactorizationModel.
I'm using following method to save the model.
*model.save(sc, outPath)*
I'm getting the following exception when saving the model. I have attached
the full stack trace. Any help would be appreciated to resolve this issue.
org.apache.spark.SparkException: Job aborted.
at
org.apache.spark.sql.sources.InsertIntoHadoopFsRelation.insert(commands.scala:166)
at
org.apache.spark.sql.sources.InsertIntoHadoopFsRelation.run(commands.scala:139)
at
org.apache.spark.sql.execution.ExecutedCommand.sideEffectResult$lzycompute(commands.scala:57)
at
org.apache.spark.sql.execution.ExecutedCommand.sideEffectResult(commands.scala:57)
at
org.apache.spark.sql.execution.ExecutedCommand.doExecute(commands.scala:68)
at
org.apache.spark.sql.execution.SparkPlan$$anonfun$execute$1.apply(SparkPlan.scala:88)
at
org.apache.spark.sql.execution.SparkPlan$$anonfun$execute$1.apply(SparkPlan.scala:88)
at
org.apache.spark.rdd.RDDOperationScope$.withScope(RDDOperationScope.scala:147)
at
org.apache.spark.sql.execution.SparkPlan.execute(SparkPlan.scala:87)
at
org.apache.spark.sql.SQLContext$QueryExecution.toRdd$lzycompute(SQLContext.scala:950)
at
org.apache.spark.sql.SQLContext$QueryExecution.toRdd(SQLContext.scala:950)
at
org.apache.spark.sql.sources.ResolvedDataSource$.apply(ddl.scala:336)
at
org.apache.spark.sql.DataFrameWriter.save(DataFrameWriter.scala:144)
at
org.apache.spark.sql.DataFrameWriter.save(DataFrameWriter.scala:135)
at
org.apache.spark.sql.DataFrameWriter.parquet(DataFrameWriter.scala:281)
at
org.apache.spark.mllib.recommendation.MatrixFactorizationModel$SaveLoadV1_0$.save(MatrixFactorizationModel.scala:284)
at
org.apache.spark.mllib.recommendation.MatrixFactorizationModel.save(MatrixFactorizationModel.scala:141)
Thanks,
Madawa
org.apache.spark.SparkException: Job aborted.
at
org.apache.spark.sql.sources.InsertIntoHadoopFsRelation.insert(commands.scala:166)
at
org.apache.spark.sql.sources.InsertIntoHadoopFsRelation.run(commands.scala:139)
at
org.apache.spark.sql.execution.ExecutedCommand.sideEffectResult$lzycompute(commands.scala:57)
at
org.apache.spark.sql.execution.ExecutedCommand.sideEffectResult(commands.scala:57)
at
org.apache.spark.sql.execution.ExecutedCommand.doExecute(commands.scala:68)
at
org.apache.spark.sql.execution.SparkPlan$$anonfun$execute$1.apply(SparkPlan.scala:88)
at
org.apache.spark.sql.execution.SparkPlan$$anonfun$execute$1.apply(SparkPlan.scala:88)
at
org.apache.spark.rdd.RDDOperationScope$.withScope(RDDOperationScope.scala:147)
at org.apache.spark.sql.execution.SparkPlan.execute(SparkPlan.scala:87)
at
org.apache.spark.sql.SQLContext$QueryExecution.toRdd$lzycompute(SQLContext.scala:950)
at
org.apache.spark.sql.SQLContext$QueryExecution.toRdd(SQLContext.scala:950)
at org.apache.spark.sql.sources.ResolvedDataSource$.apply(ddl.scala:336)
at org.apache.spark.sql.DataFrameWriter.save(DataFrameWriter.scala:144)
at org.apache.spark.sql.DataFrameWriter.save(DataFrameWriter.scala:135)
at
org.apache.spark.sql.DataFrameWriter.parquet(DataFrameWriter.scala:281)
at
org.apache.spark.mllib.recommendation.MatrixFactorizationModel$SaveLoadV1_0$.save(MatrixFactorizationModel.scala:284)
at
org.apache.spark.mllib.recommendation.MatrixFactorizationModel.save(MatrixFactorizationModel.scala:141)
org.apache.spark.SparkException: Job aborted due to stage failure: Task 0 in
stage 234.0 failed 1 times, most recent failure: Lost task 0.0 in stage 234.0
(TID 141, localhost): java.lang.NullPointerException
at
parquet.hadoop.InternalParquetRecordWriter.flushRowGroupToStore(InternalParquetRecordWriter.java:147)
at
parquet.hadoop.InternalParquetRecordWriter.close(InternalParquetRecordWriter.java:113)
at
parquet.hadoop.ParquetRecordWriter.close(ParquetRecordWriter.java:112)
at
org.apache.spark.sql.parquet.ParquetOutputWriter.close(newParquet.scala:88)
at
org.apache.spark.sql.sources.DefaultWriterContainer.abortTask(commands.scala:491)
at
org.apache.spark.sql.sources.InsertIntoHadoopFsRelation.org$apache$spark$sql$sources$InsertIntoHadoopFsRelation$$writeRows$1(commands.scala:190)
at
org.apache.spark.sql.sources.InsertIntoHadoopFsRelation$$anonfun$insert$1.apply(commands.scala:160)
at
org.apache.spark.sql.sources.InsertIntoHadoopFsRelation$$anonfun$insert$1.apply(commands.scala:160)
at org.apache.spark.scheduler.ResultTask.runTask(ResultTask.scala:63)
at org.apache.spark.scheduler.Task.run(Task.scala:70)
at org.apache.spark.executor.Executor$TaskRunner.run(Executor.scala:213)
Driver stacktrace:
at
org.apache.spark.scheduler.DAGScheduler.org$apache$spark$scheduler$DAGScheduler$$failJobAndIndependentStages(DAGScheduler.scala:1273)
at
org.apache.spark.scheduler.DAGScheduler$$anonfun$abortStage$1.apply(DAGScheduler.scala:1264)
at
org.apache.spark.scheduler.DAGScheduler$$anonfun$abortStage$1.apply(DAGScheduler.scala:1263)
at
scala.collection.mutable.ResizableArray$class.foreach(ResizableArray.scala:59)
at scala.collection.mutable.ArrayBuffer.foreach(ArrayBuffer.scala:47)
at
org.apache.spark.scheduler.DAGScheduler.abortStage(DAGScheduler.scala:1263)
at
org.apache.spark.scheduler.DAGScheduler$$anonfun$handleTaskSetFailed$1.apply(DAGScheduler.scala:730)
at
org.apache.spark.scheduler.DAGScheduler$$anonfun$handleTaskSetFailed$1.apply(DAGScheduler.scala:730)
at scala.Option.foreach(Option.scala:236)
at
org.apache.spark.scheduler.DAGScheduler.handleTaskSetFailed(DAGScheduler.scala:730)
at
org.apache.spark.scheduler.DAGSchedulerEventProcessLoop.onReceive(DAGScheduler.scala:1457)
at
org.apache.spark.scheduler.DAGSchedulerEventProcessLoop.onReceive(DAGScheduler.scala:1418)
at org.apache.spark.util.EventLoop$$anon$1.run(EventLoop.scala:48)
---------------------------------------------------------------------
To unsubscribe, e-mail: [email protected]
For additional commands, e-mail: [email protected]