Experts on spark-connector-user mailing list should be able to chime in. My reply to that list bounced since I didn't subscribe to that list.
FYI On Mon, Sep 28, 2015 at 10:07 AM, Priya Ch <learnings.chitt...@gmail.com> wrote: > Ted, > > I am using spark 1.3.0 and running the code in YARN mode. > > Here is the code.. > > object streaming { > def main(args:Array[String]) > { > val conf = new SparkConfst > conf..setMaster("yarn-client") > conf.setAppName("SimpleApp") > conf.set("spark.cassandra.coonection.host","<ipaddress>") > > val sc = new SparkContext(conf) > val sqlContext = new CassandraSQLContext(sc) > > val dstream = KafkaUtils.createStream(........) > val words = dstream.flatMap(line => line.split(",")) > val wordPairs = words.map(word =>(word,1)) > val reducedStream = wordPairs.reduceByKey((a,b) => a+b) > > reducedStream.foreachRDD{ > rdd => rdd.foreach{ > case(key,value) => val df = getDataFrame(sqlContext, key) > df.save("org.apach.spark.cassandra",SaveMode.Overwrite, Map("c_table" -> > "table1","kespace" -> "test")) > } > } > } > > def getDataFrame(cqlContext:CassandraSQLContext, key:String):DataFrame = > cqlContext.sql("select word,count from wordcount where word = '"+key+"'") > } > > In the above code, the method getDataFrame is throwing Null Pointer > Exception at cqlContext.sql line. > > On Mon, Sep 28, 2015 at 6:54 PM, Ted Yu <yuzhih...@gmail.com> wrote: > >> Which Spark release are you using ? >> >> Can you show the snippet of your code around CassandraSQLContext#sql() ? >> >> Thanks >> >> On Mon, Sep 28, 2015 at 6:21 AM, Priya Ch <learnings.chitt...@gmail.com> >> wrote: >> >>> Hi All, >>> >>> I am trying to use dataframes (which contain data from cassandra) in >>> rdd.foreach. This is throwing the following exception: >>> >>> Is CassandraSQLContext accessible within executor ???? >>> >>> 15/09/28 17:22:40 ERROR JobScheduler: Error running job streaming job >>> 1443441160000 ms.0 >>> org.apache.spark.SparkException: Job aborted due to stage failure: Task >>> 0 in stage 6.0 failed 4 times, most recent failure: Lost task 0.3 in stage >>> 6.0 (TID 83, blrrndnbudn05.rnd.amadeus.net): >>> java.lang.NullPointerException >>> at org.apache.spark.sql.SQLContext.parseSql(SQLContext.scala:134) >>> at >>> org.apache.spark.sql.cassandra.CassandraSQLContext.cassandraSql(CassandraSQLContext.scala:74) >>> at >>> org.apache.spark.sql.cassandra.CassandraSQLContext.sql(CassandraSQLContext.scala:77) >>> at >>> com.amadeus.spark.example.SparkDemo$.withDataFrames(SparkDemo.scala:170) >>> at >>> com.amadeus.spark.example.SparkDemo$$anonfun$main$1$$anonfun$apply$1.apply(SparkDemo.scala:158) >>> at >>> com.amadeus.spark.example.SparkDemo$$anonfun$main$1$$anonfun$apply$1.apply(SparkDemo.scala:158) >>> at scala.collection.Iterator$class.foreach(Iterator.scala:727) >>> at scala.collection.AbstractIterator.foreach(Iterator.scala:1157) >>> at >>> org.apache.spark.rdd.RDD$$anonfun$foreach$1.apply(RDD.scala:798) >>> at >>> org.apache.spark.rdd.RDD$$anonfun$foreach$1.apply(RDD.scala:798) >>> at >>> org.apache.spark.SparkContext$$anonfun$runJob$5.apply(SparkContext.scala:1503) >>> at >>> org.apache.spark.SparkContext$$anonfun$runJob$5.apply(SparkContext.scala:1503) >>> at >>> org.apache.spark.scheduler.ResultTask.runTask(ResultTask.scala:61) >>> at org.apache.spark.scheduler.Task.run(Task.scala:64) >>> at >>> org.apache.spark.executor.Executor$TaskRunner.run(Executor.scala:203) >>> at >>> java.util.concurrent.ThreadPoolExecutor.runWorker(ThreadPoolExecutor.java:1145) >>> at >>> java.util.concurrent.ThreadPoolExecutor$Worker.run(ThreadPoolExecutor.java:615) >>> at java.lang.Thread.run(Thread.java:745) >>> >>> Driver stacktrace: >>> at org.apache.spark.scheduler.DAGScheduler.org >>> $apache$spark$scheduler$DAGScheduler$$failJobAndIndependentStages(DAGScheduler.scala:1203) >>> at >>> org.apache.spark.scheduler.DAGScheduler$$anonfun$abortStage$1.apply(DAGScheduler.scala:1192) >>> at >>> org.apache.spark.scheduler.DAGScheduler$$anonfun$abortStage$1.apply(DAGScheduler.scala:1191) >>> at >>> scala.collection.mutable.ResizableArray$class.foreach(ResizableArray.scala:59) >>> at >>> scala.collection.mutable.ArrayBuffer.foreach(ArrayBuffer.scala:47) >>> at >>> org.apache.spark.scheduler.DAGScheduler.abortStage(DAGScheduler.scala:1191) >>> at >>> org.apache.spark.scheduler.DAGScheduler$$anonfun$handleTaskSetFailed$1.apply(DAGScheduler.scala:693) >>> at >>> org.apache.spark.scheduler.DAGScheduler$$anonfun$handleTaskSetFailed$1.apply(DAGScheduler.scala:693) >>> at scala.Option.foreach(Option.scala:236) >>> at >>> org.apache.spark.scheduler.DAGScheduler.handleTaskSetFailed(DAGScheduler.scala:693) >>> at >>> org.apache.spark.scheduler.DAGSchedulerEventProcessLoop.onReceive(DAGScheduler.scala:1393) >>> at >>> org.apache.spark.scheduler.DAGSchedulerEventProcessLoop.onReceive(DAGScheduler.scala:1354) >>> at >>> org.apache.spark.util.EventLoop$$anon$1.run(EventLoop.scala:48) >>> >>> >> >