Experts on spark-connector-user mailing list should be able to chime in.

My reply to that list bounced since I didn't subscribe to that list.

FYI

On Mon, Sep 28, 2015 at 10:07 AM, Priya Ch <learnings.chitt...@gmail.com>
wrote:

> Ted,
>
> I am using spark 1.3.0 and running the code in YARN mode.
>
> Here is the code..
>
> object streaming {
> def main(args:Array[String])
> {
>   val conf = new SparkConfst
>   conf..setMaster("yarn-client")
>   conf.setAppName("SimpleApp")
> conf.set("spark.cassandra.coonection.host","<ipaddress>")
>
> val sc = new SparkContext(conf)
> val sqlContext = new CassandraSQLContext(sc)
>
> val dstream = KafkaUtils.createStream(........)
> val words = dstream.flatMap(line => line.split(","))
> val wordPairs = words.map(word =>(word,1))
> val reducedStream = wordPairs.reduceByKey((a,b) => a+b)
>
> reducedStream.foreachRDD{
> rdd => rdd.foreach{
> case(key,value) => val df = getDataFrame(sqlContext, key)
> df.save("org.apach.spark.cassandra",SaveMode.Overwrite, Map("c_table" ->
> "table1","kespace" -> "test"))
> }
> }
>   }
>
> def getDataFrame(cqlContext:CassandraSQLContext, key:String):DataFrame =
> cqlContext.sql("select word,count from wordcount where word = '"+key+"'")
> }
>
> In the above code, the method getDataFrame is throwing Null Pointer
> Exception at cqlContext.sql line.
>
> On Mon, Sep 28, 2015 at 6:54 PM, Ted Yu <yuzhih...@gmail.com> wrote:
>
>> Which Spark release are you using ?
>>
>> Can you show the snippet of your code around CassandraSQLContext#sql() ?
>>
>> Thanks
>>
>> On Mon, Sep 28, 2015 at 6:21 AM, Priya Ch <learnings.chitt...@gmail.com>
>> wrote:
>>
>>> Hi All,
>>>
>>>  I am trying to use dataframes (which contain data from cassandra) in
>>> rdd.foreach. This is throwing the following exception:
>>>
>>> Is CassandraSQLContext accessible within executor ????
>>>
>>> 15/09/28 17:22:40 ERROR JobScheduler: Error running job streaming job
>>> 1443441160000 ms.0
>>> org.apache.spark.SparkException: Job aborted due to stage failure: Task
>>> 0 in stage 6.0 failed 4 times, most recent failure: Lost task 0.3 in stage
>>> 6.0 (TID 83, blrrndnbudn05.rnd.amadeus.net):
>>> java.lang.NullPointerException
>>>         at org.apache.spark.sql.SQLContext.parseSql(SQLContext.scala:134)
>>>         at
>>> org.apache.spark.sql.cassandra.CassandraSQLContext.cassandraSql(CassandraSQLContext.scala:74)
>>>         at
>>> org.apache.spark.sql.cassandra.CassandraSQLContext.sql(CassandraSQLContext.scala:77)
>>>         at
>>> com.amadeus.spark.example.SparkDemo$.withDataFrames(SparkDemo.scala:170)
>>>         at
>>> com.amadeus.spark.example.SparkDemo$$anonfun$main$1$$anonfun$apply$1.apply(SparkDemo.scala:158)
>>>         at
>>> com.amadeus.spark.example.SparkDemo$$anonfun$main$1$$anonfun$apply$1.apply(SparkDemo.scala:158)
>>>         at scala.collection.Iterator$class.foreach(Iterator.scala:727)
>>>         at scala.collection.AbstractIterator.foreach(Iterator.scala:1157)
>>>         at
>>> org.apache.spark.rdd.RDD$$anonfun$foreach$1.apply(RDD.scala:798)
>>>         at
>>> org.apache.spark.rdd.RDD$$anonfun$foreach$1.apply(RDD.scala:798)
>>>         at
>>> org.apache.spark.SparkContext$$anonfun$runJob$5.apply(SparkContext.scala:1503)
>>>         at
>>> org.apache.spark.SparkContext$$anonfun$runJob$5.apply(SparkContext.scala:1503)
>>>         at
>>> org.apache.spark.scheduler.ResultTask.runTask(ResultTask.scala:61)
>>>         at org.apache.spark.scheduler.Task.run(Task.scala:64)
>>>         at
>>> org.apache.spark.executor.Executor$TaskRunner.run(Executor.scala:203)
>>>         at
>>> java.util.concurrent.ThreadPoolExecutor.runWorker(ThreadPoolExecutor.java:1145)
>>>         at
>>> java.util.concurrent.ThreadPoolExecutor$Worker.run(ThreadPoolExecutor.java:615)
>>>         at java.lang.Thread.run(Thread.java:745)
>>>
>>> Driver stacktrace:
>>>         at org.apache.spark.scheduler.DAGScheduler.org
>>> $apache$spark$scheduler$DAGScheduler$$failJobAndIndependentStages(DAGScheduler.scala:1203)
>>>         at
>>> org.apache.spark.scheduler.DAGScheduler$$anonfun$abortStage$1.apply(DAGScheduler.scala:1192)
>>>         at
>>> org.apache.spark.scheduler.DAGScheduler$$anonfun$abortStage$1.apply(DAGScheduler.scala:1191)
>>>         at
>>> scala.collection.mutable.ResizableArray$class.foreach(ResizableArray.scala:59)
>>>         at
>>> scala.collection.mutable.ArrayBuffer.foreach(ArrayBuffer.scala:47)
>>>         at
>>> org.apache.spark.scheduler.DAGScheduler.abortStage(DAGScheduler.scala:1191)
>>>         at
>>> org.apache.spark.scheduler.DAGScheduler$$anonfun$handleTaskSetFailed$1.apply(DAGScheduler.scala:693)
>>>         at
>>> org.apache.spark.scheduler.DAGScheduler$$anonfun$handleTaskSetFailed$1.apply(DAGScheduler.scala:693)
>>>         at scala.Option.foreach(Option.scala:236)
>>>         at
>>> org.apache.spark.scheduler.DAGScheduler.handleTaskSetFailed(DAGScheduler.scala:693)
>>>         at
>>> org.apache.spark.scheduler.DAGSchedulerEventProcessLoop.onReceive(DAGScheduler.scala:1393)
>>>         at
>>> org.apache.spark.scheduler.DAGSchedulerEventProcessLoop.onReceive(DAGScheduler.scala:1354)
>>>         at
>>> org.apache.spark.util.EventLoop$$anon$1.run(EventLoop.scala:48)
>>>
>>>
>>
>

Reply via email to