Hi, I am getting error when I try to write data to Phoenix . *Software Confguration :* Spark 1.5.2 Phoenix 4.4 Hbase 1.1
*Spark Scala Script :* val dfLCR = readTable(sqlContext, "", "TEST") val schemaL = dfLCR.schema val lcrReportPath = "/TestDivya/Spark/Results/TestData/" val dfReadReport= sqlContext.read.format("com.databricks.spark.csv").option("header", "true").schema(schemaL).load(lcrReportPath) dfReadlcrReport.show() val dfWidCol = dfReadReport.withColumn("RPT_DATE",lit("2015-01-01")) val dfSelect = dfWidCol.select("RPT_DATE") dfSelect.write.format("org.apache.phoenix.spark").mode(SaveMode.Overwrite).options(collection.immutable.Map( "zkUrl" -> "localhost", "table" -> "TEST")).save() *Command Line to run Script * spark-shell --conf "spark.driver.extraClassPath=/usr/hdp/2.3.4.0-3485/phoenix/phoenix-client.jar" --conf "spark.executor.extraClassPath=/usr/hdp/2.3.4.0-3485/phoenix/phoenix-client.jar" --properties-file /TestDivya/Spark/Phoenix.properties --jars /usr/hdp/2.3.4.0-3485/phoenix/lib/phoenix-spark-4.4.0.2.3.4.0-3485.jar,/usr/hdp/2.3.4.0-3485/phoenix/phoenix-client.jar --driver-class-path /usr/hdp/2.3.4.0-3485/phoenix/lib/phoenix-spark-4.4.0.2.3.4.0-3485.jar,/usr/hdp/2.3.4.0-3485/hbase/lib/phoenix-client-4.4.0.jar --packages com.databricks:spark-csv_2.10:1.4.0 --master yarn-client -i /TestDivya/Spark/WriteToPheonix.scala *Error Stack Trace :* 16/04/12 02:53:59 INFO YarnScheduler: Removed TaskSet 3.0, whose tasks have all completed, from pool org.apache.spark.SparkException: Job aborted due to stage failure: Task 1 in stage 3.0 failed 4 times, most recent failure: Lost task 1.3 in stage 3.0 (TID 410, ip-172-31-22-135.ap-southeast-1.compute.internal): java.lang.RuntimeException: java.sql.SQLException: No suitable driver found for jdbc:phoenix:localhost:2181:/hbase-unsecure; at org.apache.phoenix.mapreduce.PhoenixOutputFormat.getRecordWriter(PhoenixOutputFormat.java:58) at org.apache.spark.rdd.PairRDDFunctions$$anonfun$saveAsNewAPIHadoopDataset$1$$anonfun$12.apply(PairRDDFunctions.scala:1030) at org.apache.spark.rdd.PairRDDFunctions$$anonfun$saveAsNewAPIHadoopDataset$1$$anonfun$12.apply(PairRDDFunctions.scala:1014) at org.apache.spark.scheduler.ResultTask.runTask(ResultTask.scala:66) at org.apache.spark.scheduler.Task.run(Task.scala:88) at org.apache.spark.executor.Executor$TaskRunner.run(Executor.scala:214) at java.util.concurrent.ThreadPoolExecutor.runWorker(ThreadPoolExecutor.java:1145) at java.util.concurrent.ThreadPoolExecutor$Worker.run(ThreadPoolExecutor.java:615) at java.lang.Thread.run(Thread.java:745) Caused by: java.sql.SQLException: No suitable driver found for jdbc:phoenix:localhost:2181:/hbase-unsecure; at java.sql.DriverManager.getConnection(DriverManager.java:596) at java.sql.DriverManager.getConnection(DriverManager.java:187) at org.apache.phoenix.mapreduce.util.ConnectionUtil.getConnection(ConnectionUtil.java:99) at org.apache.phoenix.mapreduce.util.ConnectionUtil.getOutputConnection(ConnectionUtil.java:82) at org.apache.phoenix.mapreduce.util.ConnectionUtil.getOutputConnection(ConnectionUtil.java:70) at org.apache.phoenix.mapreduce.PhoenixRecordWriter.<init>(PhoenixRecordWriter.java:49) at org.apache.phoenix.mapreduce.PhoenixOutputFormat.getRecordWriter(PhoenixOutputFormat.java:55) ... 8 more Driver stacktrace: at org.apache.spark.scheduler.DAGScheduler.org $apache$spark$scheduler$DAGScheduler$$failJobAndIndependentStages(DAGScheduler.scala:1283) at org.apache.spark.scheduler.DAGScheduler$$anonfun$abortStage$1.apply(DAGScheduler.scala:1271) at org.apache.spark.scheduler.DAGScheduler$$anonfun$abortStage$1.apply(DAGScheduler.scala:1270) at scala.collection.mutable.ResizableArray$class.foreach(ResizableArray.scala:59) at scala.collection.mutable.ArrayBuffer.foreach(ArrayBuffer.scala:47) at org.apache.spark.scheduler.DAGScheduler.abortStage(DAGScheduler.scala:1270) at org.apache.spark.scheduler.DAGScheduler$$anonfun$handleTaskSetFailed$1.apply(DAGScheduler.scala:697) at org.apache.spark.scheduler.DAGScheduler$$anonfun$handleTaskSetFailed$1.apply(DAGScheduler.scala:697) at scala.Option.foreach(Option.scala:236) at org.apache.spark.scheduler.DAGScheduler.handleTaskSetFailed(DAGScheduler.scala:697) at org.apache.spark.scheduler.DAGSchedulerEventProcessLoop.doOnReceive(DAGScheduler.scala:1496) at org.apache.spark.scheduler.DAGSchedulerEventProcessLoop.onReceive(DAGScheduler.scala:1458) at org.apache.spark.scheduler.DAGSchedulerEventProcessLoop.onReceive(DAGScheduler.scala:1447) at org.apache.spark.util.EventLoop$$anon$1.run(EventLoop.scala:48) at org.apache.spark.scheduler.DAGScheduler.runJob(DAGScheduler.scala:567) at org.apache.spark.SparkContext.runJob(SparkContext.scala:1824) at org.apache.spark.SparkContext.runJob(SparkContext.scala:1837) at org.apache.spark.SparkContext.runJob(SparkContext.scala:1914) at org.apache.spark.rdd.PairRDDFunctions$$anonfun$saveAsNewAPIHadoopDataset$1.apply$mcV$sp(PairRDDFunctions.scala:1055) at org.apache.spark.rdd.PairRDDFunctions$$anonfun$saveAsNewAPIHadoopDataset$1.apply(PairRDDFunctions.scala:998) at org.apache.spark.rdd.PairRDDFunctions$$anonfun$saveAsNewAPIHadoopDataset$1.apply(PairRDDFunctions.scala:998) at org.apache.spark.rdd.RDDOperationScope$.withScope(RDDOperationScope.scala:147) at org.apache.spark.rdd.RDDOperationScope$.withScope(RDDOperationScope.scala:108) at org.apache.spark.rdd.RDD.withScope(RDD.scala:310) at org.apache.spark.rdd.PairRDDFunctions.saveAsNewAPIHadoopDataset(PairRDDFunctions.scala:998) at org.apache.spark.rdd.PairRDDFunctions$$anonfun$saveAsNewAPIHadoopFile$2.apply$mcV$sp(PairRDDFunctions.scala:938) at org.apache.spark.rdd.PairRDDFunctions$$anonfun$saveAsNewAPIHadoopFile$2.apply(PairRDDFunctions.scala:930) at org.apache.spark.rdd.PairRDDFunctions$$anonfun$saveAsNewAPIHadoopFile$2.apply(PairRDDFunctions.scala:930) at org.apache.spark.rdd.RDDOperationScope$.withScope(RDDOperationScope.scala:147) at org.apache.spark.rdd.RDDOperationScope$.withScope(RDDOperationScope.scala:108) at org.apache.spark.rdd.RDD.withScope(RDD.scala:310) at org.apache.spark.rdd.PairRDDFunctions.saveAsNewAPIHadoopFile(PairRDDFunctions.scala:930) at org.apache.phoenix.spark.DataFrameFunctions.saveToPhoenix(DataFrameFunctions.scala:43) at org.apache.phoenix.spark.DefaultSource.createRelation(DefaultSource.scala:47) at org.apache.spark.sql.execution.datasources.ResolvedDataSource$.apply(ResolvedDataSource.scala:170) Could some body help me figuring out the missing properties/configurations or link ? Would really appreciate the help. Thanks, Divya