Please refer to the code snippet below . I get following error */tmp/temp/trade.parquet/part-r-00036.parquet is not a Parquet file. expected magic number at tail [80, 65, 82, 49] but found [20, -28, -93, 93] at parquet.hadoop.ParquetFileReader.readFooter(ParquetFileReader.java:418) at org.apache.spark.sql.parquet.ParquetTypesConverter$$anonfun$readMetaData$3.apply(ParquetTypes.scala:494) at org.apache.spark.sql.parquet.ParquetTypesConverter$$anonfun$readMetaData$3.apply(ParquetTypes.scala:494) at scala.Option.map(Option.scala:145) at org.apache.spark.sql.parquet.ParquetTypesConverter$.readMetaData(ParquetTypes.scala:494) at org.apache.spark.sql.parquet.ParquetTypesConverter$.readSchemaFromFile(ParquetTypes.scala:515) at org.apache.spark.sql.parquet.ParquetRelation.<init>(ParquetRelation.scala:67) at org.apache.spark.sql.SQLContext.parquetFile(SQLContext.scala:542) at com.bfm.spark.data.handlers.input.InputFormatRegistry$.registerTable(InputFormatRegistry.scala:42) at com.bfm.spark.data.handlers.input.CassandraInputHandler.handleConfiguration(CassandraInputHandler.scala:43) at com.bfm.spark.data.handlers.input.CassandraInputHandler.handleConfiguration(CassandraInputHandler.scala:21) at com.bfm.spark.data.services.CompositeConfigurationHandler$$anonfun$handleConfiguration$1.apply(CompositeConfigurationHandler.scala:18) at com.bfm.spark.data.services.CompositeConfigurationHandler$$anonfun$handleConfiguration$1.apply(CompositeConfigurationHandler.scala:15) at scala.collection.Iterator$class.foreach(Iterator.scala:727) at scala.collection.AbstractIterator.foreach(Iterator.scala:1157) at scala.collection.IterableLike$class.foreach(IterableLike.scala:72) at scala.collection.AbstractIterable.foreach(Iterable.scala:54) at com.bfm.spark.data.services.CompositeConfigurationHandler.handleConfiguration(CompositeConfigurationHandler.scala:15) at com.bfm.spark.data.services.CompositeConfigurationHandler$$anonfun$handleConfiguration$1.apply(CompositeConfigurationHandler.scala:18) at com.bfm.spark.data.services.CompositeConfigurationHandler$$anonfun$handleConfiguration$1.apply(CompositeConfigurationHandler.scala:15) at scala.collection.Iterator$class.foreach(Iterator.scala:727) at scala.collection.AbstractIterator.foreach(Iterator.scala:1157) at scala.collection.IterableLike$class.foreach(IterableLike.scala:72) at scala.collection.AbstractIterable.foreach(Iterable.scala:54) at com.bfm.spark.data.services.CompositeConfigurationHandler.handleConfiguration(CompositeConfigurationHandler.scala:15) at com.bfm.spark.data.services.ConfigurationHandlerService.execute(ConfigurationHandlerService.scala:43) at com.bfm.spark.data.scheduler.DataLoadingScheduler$$anonfun$scheduleJobsByHour$2.apply(DataLoadingScheduler.scala:45) at com.bfm.spark.data.scheduler.DataLoadingScheduler$$anonfun$scheduleJobsByHour$2.apply(DataLoadingScheduler.scala:45) at scala.collection.immutable.Set$Set1.foreach(Set.scala:74) at com.bfm.spark.data.scheduler.DataLoadingScheduler.scheduleJobsByHour(DataLoadingScheduler.scala:45) at com.bfm.spark.data.scheduler.DataLoadingScheduler.everyThreeHours(DataLoadingScheduler.scala:20) at sun.reflect.NativeMethodAccessorImpl.invoke0(Native Method) at sun.reflect.NativeMethodAccessorImpl.invoke(NativeMethodAccessorImpl.java:62) at sun.reflect.DelegatingMethodAccessorImpl.invoke(DelegatingMethodAccessorImpl.java:43) at java.lang.reflect.Method.invoke(Method.java:483) *
import com.datastax.spark.connector._ import com.google.gson.GsonBuilder import scala.collection.mutable._ import scala.util._ case class Trade(org_ :String, fund:Int, invnum:Int, touch_count:Int, blob:String) val rdd = sc.cassandraTable[Trade]( "TEST", "trade") val filteredRDD = rdd.filter(data => data.org_.equals("DEV")) val cassandraRDD = rdd.map(data => data.blob) sqlContext.jsonRDD(cassandraRDD, 0.01).registerTempTable("trade") sqlContext.sql("select * from trade").repartition(1).saveAsParquetFile("/tmp/temp/trade1.parquet") If I don't do the repartition, this works fine. Am I missing something here ? I am using Spark -1.3.1 Regards, Gaurav -- View this message in context: http://apache-spark-user-list.1001560.n3.nabble.com/Error-while-saving-parquet-tp24770.html Sent from the Apache Spark User List mailing list archive at Nabble.com. --------------------------------------------------------------------- To unsubscribe, e-mail: user-unsubscr...@spark.apache.org For additional commands, e-mail: user-h...@spark.apache.org