Hi guys, Using Spark 1.4, trying to save a dataframe as a table, a really simple test, but I'm getting a bunch of NPEs;
The code Im running is very simple; qc.read.parquet("/user/sparkuser/data/staged/item_sales_basket_id.parquet").write.format("parquet").saveAsTable("is_20150617_test2") Logs of tasks lost; [Stage 0:=================================> (8771 + 450) / 13000]15/06/16 03:42:30 WARN TaskSetManager: Lost task 10681.0 in stage 0.0 (TID 8757, qtausc-pphd0146): java.lang.NullPointerException at parquet.hadoop.InternalParquetRecordWriter.flushRowGroupToStore(InternalParquetRecordWriter.java:146) at parquet.hadoop.InternalParquetRecordWriter.close(InternalParquetRecordWriter.java:112) at parquet.hadoop.ParquetRecordWriter.close(ParquetRecordWriter.java:73) at org.apache.spark.sql.parquet.ParquetOutputWriter.close(newParquet.scala:116) at org.apache.spark.sql.sources.DefaultWriterContainer.abortTask(commands.scala:404) at org.apache.spark.sql.sources.InsertIntoHadoopFsRelation.org $apache$spark$sql$sources$InsertIntoHadoopFsRelation$$writeRows$1(commands.scala:160) at org.apache.spark.sql.sources.InsertIntoHadoopFsRelation$$anonfun$insert$1.apply(commands.scala:132) at org.apache.spark.sql.sources.InsertIntoHadoopFsRelation$$anonfun$insert$1.apply(commands.scala:132) at org.apache.spark.scheduler.ResultTask.runTask(ResultTask.scala:63) at org.apache.spark.scheduler.Task.run(Task.scala:70) at org.apache.spark.executor.Executor$TaskRunner.run(Executor.scala:213) at java.util.concurrent.ThreadPoolExecutor.runWorker(ThreadPoolExecutor.java:1145) at java.util.concurrent.ThreadPoolExecutor$Worker.run(ThreadPoolExecutor.java:615) at java.lang.Thread.run(Thread.java:745) [Stage 0:==================================> (9006 + 490) / 13000]15/06/16 03:43:22 WARN TaskSetManager: Lost task 10323.0 in stage 0.0 (TID 8896, qtausc-pphd0167): java.lang.NullPointerException at parquet.hadoop.InternalParquetRecordWriter.flushRowGroupToStore(InternalParquetRecordWriter.java:146) at parquet.hadoop.InternalParquetRecordWriter.close(InternalParquetRecordWriter.java:112) at parquet.hadoop.ParquetRecordWriter.close(ParquetRecordWriter.java:73) at org.apache.spark.sql.parquet.ParquetOutputWriter.close(newParquet.scala:116) at org.apache.spark.sql.sources.DefaultWriterContainer.abortTask(commands.scala:404) at org.apache.spark.sql.sources.InsertIntoHadoopFsRelation.org $apache$spark$sql$sources$InsertIntoHadoopFsRelation$$writeRows$1(commands.scala:160) at org.apache.spark.sql.sources.InsertIntoHadoopFsRelation$$anonfun$insert$1.apply(commands.scala:132) at org.apache.spark.sql.sources.InsertIntoHadoopFsRelation$$anonfun$insert$1.apply(commands.scala:132) at org.apache.spark.scheduler.ResultTask.runTask(ResultTask.scala:63) at org.apache.spark.scheduler.Task.run(Task.scala:70) at org.apache.spark.executor.Executor$TaskRunner.run(Executor.scala:213) at java.util.concurrent.ThreadPoolExecutor.runWorker(ThreadPoolExecutor.java:1145) at java.util.concurrent.ThreadPoolExecutor$Worker.run(ThreadPoolExecutor.java:615) at java.lang.Thread.run(Thread.java:745)