Resolved above error by creating SparkSession val spark = SparkSession.builder().appName("Hbase - Spark POC").getOrCreate()
Error after: spark.sql("SELECT * FROM student").show() But while doing show() action on Dataframe throws below error: scala> sqlContext.sql("select * from student").show() 16/12/28 21:04:23 ERROR executor.Executor: Exception in task 0.0 in stage 2.0 (TID 2) java.lang.RuntimeException: Error while encoding: java.lang.RuntimeException: java.lang.Integer is not a valid external type for schema of string if (assertnotnull(input[0, org.apache.spark.sql.Row, true], top level row object).isNullAt) null else staticinvoke(class org.apache.spark.unsafe.types.UTF8String, StringType, fromString, validateexternaltype(getexternalrowfield(assertnotnull(input[0, org.apache.spark.sql.Row, true], top level row object), 0, Rowid), StringType), true) AS Rowid#35 +- if (assertnotnull(input[0, org.apache.spark.sql.Row, true], top level row object).isNullAt) null else staticinvoke(class org.apache.spark.unsafe.types.UTF8String, StringType, fromString, validateexternaltype(getexternalrowfield(assertnotnull(input[0, org.apache.spark.sql.Row, true], top level row object), 0, Rowid), StringType), true) :- assertnotnull(input[0, org.apache.spark.sql.Row, true], top level row object).isNullAt : :- assertnotnull(input[0, org.apache.spark.sql.Row, true], top level row object) : : +- input[0, org.apache.spark.sql.Row, true] : +- 0 :- null +- staticinvoke(class org.apache.spark.unsafe.types.UTF8String, StringType, fromString, validateexternaltype(getexternalrowfield(assertnotnull(input[0, org.apache.spark.sql.Row, true], top level row object), 0, Rowid), StringType), true) +- validateexternaltype(getexternalrowfield(assertnotnull(input[0, org.apache.spark.sql.Row, true], top level row object), 0, Rowid), StringType) +- getexternalrowfield(assertnotnull(input[0, org.apache.spark.sql.Row, true], top level row object), 0, Rowid) +- assertnotnull(input[0, org.apache.spark.sql.Row, true], top level row object) +- input[0, org.apache.spark.sql.Row, true] if (assertnotnull(input[0, org.apache.spark.sql.Row, true], top level row object).isNullAt) null else staticinvoke(class org.apache.spark.unsafe.types.UTF8String, StringType, fromString, validateexternaltype(getexternalrowfield(assertnotnull(input[0, org.apache.spark.sql.Row, true], top level row object), 1, maths), StringType), true) AS maths#36 +- if (assertnotnull(input[0, org.apache.spark.sql.Row, true], top level row object).isNullAt) null else staticinvoke(class org.apache.spark.unsafe.types.UTF8String, StringType, fromString, validateexternaltype(getexternalrowfield(assertnotnull(input[0, org.apache.spark.sql.Row, true], top level row object), 1, maths), StringType), true) :- assertnotnull(input[0, org.apache.spark.sql.Row, true], top level row object).isNullAt : :- assertnotnull(input[0, org.apache.spark.sql.Row, true], top level row object) : : +- input[0, org.apache.spark.sql.Row, true] : +- 1 :- null +- staticinvoke(class org.apache.spark.unsafe.types.UTF8String, StringType, fromString, validateexternaltype(getexternalrowfield(assertnotnull(input[0, org.apache.spark.sql.Row, true], top level row object), 1, maths), StringType), true) +- validateexternaltype(getexternalrowfield(assertnotnull(input[0, org.apache.spark.sql.Row, true], top level row object), 1, maths), StringType) +- getexternalrowfield(assertnotnull(input[0, org.apache.spark.sql.Row, true], top level row object), 1, maths) +- assertnotnull(input[0, org.apache.spark.sql.Row, true], top level row object) +- input[0, org.apache.spark.sql.Row, true] if (assertnotnull(input[0, org.apache.spark.sql.Row, true], top level row object).isNullAt) null else staticinvoke(class org.apache.spark.unsafe.types.UTF8String, StringType, fromString, validateexternaltype(getexternalrowfield(assertnotnull(input[0, org.apache.spark.sql.Row, true], top level row object), 2, english), StringType), true) AS english#37 +- if (assertnotnull(input[0, org.apache.spark.sql.Row, true], top level row object).isNullAt) null else staticinvoke(class org.apache.spark.unsafe.types.UTF8String, StringType, fromString, validateexternaltype(getexternalrowfield(assertnotnull(input[0, org.apache.spark.sql.Row, true], top level row object), 2, english), StringType), true) Kindly help, unable to check with error that what exactly is. Thanks., On Wed, Dec 28, 2016 at 9:00 PM, Chetan Khatri <chetan.opensou...@gmail.com> wrote: > Hello Spark Community, > > I am reading HBase table from Spark and getting RDD but now i wants to > convert RDD of Spark Rows and want to convert to DF. > > *Source Code:* > > bin/spark-shell --packages > it.nerdammer.bigdata:spark-hbase-connector_2.10:1.0.3 > --conf spark.hbase.host=127.0.0.1 > > import it.nerdammer.spark.hbase._ > import org.apache.spark.{SparkConf, SparkContext} > import org.apache.spark.sql.Row > import org.apache.spark.sql.types.StructType > import org.apache.spark.sql.types.StructField > import org.apache.spark.sql.types.StringType > > val sparkConf = new SparkConf().setAppName("HBase Spark POC") > > val sparkContext = new SparkContext(sparkConf) > > val sqlContext = new org.apache.spark.sql.SQLContext(sparkContext) > > val hBaseRDD = sc.hbaseTable[(Option[String], Option[Int], Option[Int], > Option[Int], Option[Int], Option[Int])]("university").select("maths", > "english","science","history","computer").inColumnFamily("school") > > val rowRDD = hBaseRDD.map(i => Row(i._1.get,i._2.get,i._3. > get,i._4.get,i._5.get,i._6.get)) > > val stdSchemaString= "Rowid,maths,english,science,history,computer" > > val stdSchema= StructType(stdSchemaString.split(",").map(fieldName => > StructField(fieldName, StringType, true))) > > val stdDf = sqlContext.createDataFrame(rowRDD,stdSchema); > > // Getting Error > > stdDf.registerTempTable("student") > > sqlContext.sql("select * from student").show() > > *Error* > > scala> val stdDf = sqlContext.createDataFrame(rowRDD,stdSchema); > 16/12/28 20:50:59 ERROR metastore.RetryingHMSHandler: > AlreadyExistsException(message:Database default already exists) > at org.apache.hadoop.hive.metastore.HiveMetaStore$ > HMSHandler.create_database(HiveMetaStore.java:891) > at sun.reflect.NativeMethodAccessorImpl.invoke0(Native Method) > at sun.reflect.NativeMethodAccessorImpl.invoke( > NativeMethodAccessorImpl.java:62) > at sun.reflect.DelegatingMethodAccessorImpl.invoke( > DelegatingMethodAccessorImpl.java:43) > at java.lang.reflect.Method.invoke(Method.java:498) > at org.apache.hadoop.hive.metastore.RetryingHMSHandler. > invoke(RetryingHMSHandler.java:107) > at com.sun.proxy.$Proxy21.create_database(Unknown Source) > at org.apache.hadoop.hive.metastore.HiveMetaStoreClient.createDatabase( > HiveMetaStoreClient.java:644) > at sun.reflect.NativeMethodAccessorImpl.invoke0(Native Method) > at sun.reflect.NativeMethodAccessorImpl.invoke( > NativeMethodAccessorImpl.java:62) > at sun.reflect.DelegatingMethodAccessorImpl.invoke( > DelegatingMethodAccessorImpl.java:43) > at java.lang.reflect.Method.invoke(Method.java:498) > at org.apache.hadoop.hive.metastore.RetryingMetaStoreClient.invoke( > RetryingMetaStoreClient.java:156) > at com.sun.proxy.$Proxy22.createDatabase(Unknown Source) > at org.apache.hadoop.hive.ql.metadata.Hive.createDatabase(Hive.java:306) > at org.apache.spark.sql.hive.client.HiveClientImpl$$ > anonfun$createDatabase$1.apply$mcV$sp(HiveClientImpl.scala:309) > at org.apache.spark.sql.hive.client.HiveClientImpl$$ > anonfun$createDatabase$1.apply(HiveClientImpl.scala:309) > at org.apache.spark.sql.hive.client.HiveClientImpl$$ > anonfun$createDatabase$1.apply(HiveClientImpl.scala:309) > at org.apache.spark.sql.hive.client.HiveClientImpl$$ > anonfun$withHiveState$1.apply(HiveClientImpl.scala:280) > at org.apache.spark.sql.hive.client.HiveClientImpl. > liftedTree1$1(HiveClientImpl.scala:227) > at org.apache.spark.sql.hive.client.HiveClientImpl. > retryLocked(HiveClientImpl.scala:226) > at org.apache.spark.sql.hive.client.HiveClientImpl. > withHiveState(HiveClientImpl.scala:269) > at org.apache.spark.sql.hive.client.HiveClientImpl. > createDatabase(HiveClientImpl.scala:308) > at org.apache.spark.sql.hive.HiveExternalCatalog$$anonfun$ > createDatabase$1.apply$mcV$sp(HiveExternalCatalog.scala:99) > at org.apache.spark.sql.hive.HiveExternalCatalog$$anonfun$ > createDatabase$1.apply(HiveExternalCatalog.scala:99) > at org.apache.spark.sql.hive.HiveExternalCatalog$$anonfun$ > createDatabase$1.apply(HiveExternalCatalog.scala:99) > at org.apache.spark.sql.hive.HiveExternalCatalog.withClient( > HiveExternalCatalog.scala:72) > at org.apache.spark.sql.hive.HiveExternalCatalog.createDatabase( > HiveExternalCatalog.scala:98) > at org.apache.spark.sql.catalyst.catalog.SessionCatalog. > createDatabase(SessionCatalog.scala:147) > at org.apache.spark.sql.catalyst.catalog.SessionCatalog.<init>( > SessionCatalog.scala:89) > at org.apache.spark.sql.hive.HiveSessionCatalog.<init>( > HiveSessionCatalog.scala:51) > at org.apache.spark.sql.hive.HiveSessionState.catalog$ > lzycompute(HiveSessionState.scala:49) > at org.apache.spark.sql.hive.HiveSessionState.catalog( > HiveSessionState.scala:48) > at org.apache.spark.sql.hive.HiveSessionState$$anon$1.< > init>(HiveSessionState.scala:63) > at org.apache.spark.sql.hive.HiveSessionState.analyzer$ > lzycompute(HiveSessionState.scala:63) > at org.apache.spark.sql.hive.HiveSessionState.analyzer( > HiveSessionState.scala:62) > at org.apache.spark.sql.execution.QueryExecution. > assertAnalyzed(QueryExecution.scala:49) > at org.apache.spark.sql.Dataset$.ofRows(Dataset.scala:64) > at org.apache.spark.sql.SparkSession.createDataFrame( > SparkSession.scala:542) > at org.apache.spark.sql.SparkSession.createDataFrame( > SparkSession.scala:302) > at org.apache.spark.sql.SQLContext.createDataFrame(SQLContext.scala:337) > at $line34.$read$$iw$$iw$$iw$$iw$$iw$$iw$$iw$$iw$$iw$$iw.<init> > (<console>:42) > at $line34.$read$$iw$$iw$$iw$$iw$$iw$$iw$$iw$$iw$$iw.<init>(<console>:47) > at $line34.$read$$iw$$iw$$iw$$iw$$iw$$iw$$iw$$iw.<init>(<console>:49) > at $line34.$read$$iw$$iw$$iw$$iw$$iw$$iw$$iw.<init>(<console>:51) > at $line34.$read$$iw$$iw$$iw$$iw$$iw$$iw.<init>(<console>:53) > at $line34.$read$$iw$$iw$$iw$$iw$$iw.<init>(<console>:55) > at $line34.$read$$iw$$iw$$iw$$iw.<init>(<console>:57) > at $line34.$read$$iw$$iw$$iw.<init>(<console>:59) > at $line34.$read$$iw$$iw.<init>(<console>:61) > at $line34.$read$$iw.<init>(<console>:63) > at $line34.$read.<init>(<console>:65) > at $line34.$read$.<init>(<console>:69) > at $line34.$read$.<clinit>(<console>) > at $line34.$eval$.$print$lzycompute(<console>:7) > at $line34.$eval$.$print(<console>:6) > at $line34.$eval.$print(<console>) > at sun.reflect.NativeMethodAccessorImpl.invoke0(Native Method) > at sun.reflect.NativeMethodAccessorImpl.invoke( > NativeMethodAccessorImpl.java:62) > at sun.reflect.DelegatingMethodAccessorImpl.invoke( > DelegatingMethodAccessorImpl.java:43) > at java.lang.reflect.Method.invoke(Method.java:498) > at scala.tools.nsc.interpreter.IMain$ReadEvalPrint.call(IMain.scala:786) > at scala.tools.nsc.interpreter.IMain$Request.loadAndRun(IMain.scala:1047) > at scala.tools.nsc.interpreter.IMain$WrappedRequest$$anonfun$ > loadAndRunReq$1.apply(IMain.scala:638) > at scala.tools.nsc.interpreter.IMain$WrappedRequest$$anonfun$ > loadAndRunReq$1.apply(IMain.scala:637) > at scala.reflect.internal.util.ScalaClassLoader$class. > asContext(ScalaClassLoader.scala:31) > at scala.reflect.internal.util.AbstractFileClassLoader.asContext( > AbstractFileClassLoader.scala:19) > at scala.tools.nsc.interpreter.IMain$WrappedRequest. > loadAndRunReq(IMain.scala:637) > at scala.tools.nsc.interpreter.IMain.interpret(IMain.scala:569) > at scala.tools.nsc.interpreter.IMain.interpret(IMain.scala:565) > at scala.tools.nsc.interpreter.ILoop.interpretStartingWith( > ILoop.scala:807) > at scala.tools.nsc.interpreter.ILoop.command(ILoop.scala:681) > at scala.tools.nsc.interpreter.ILoop.processLine(ILoop.scala:395) > at scala.tools.nsc.interpreter.ILoop.loop(ILoop.scala:415) > at scala.tools.nsc.interpreter.ILoop$$anonfun$process$1. > apply$mcZ$sp(ILoop.scala:923) > at scala.tools.nsc.interpreter.ILoop$$anonfun$process$1. > apply(ILoop.scala:909) > at scala.tools.nsc.interpreter.ILoop$$anonfun$process$1. > apply(ILoop.scala:909) > at scala.reflect.internal.util.ScalaClassLoader$.savingContextLoader( > ScalaClassLoader.scala:97) > at scala.tools.nsc.interpreter.ILoop.process(ILoop.scala:909) > at org.apache.spark.repl.Main$.doMain(Main.scala:68) > at org.apache.spark.repl.Main$.main(Main.scala:51) > at org.apache.spark.repl.Main.main(Main.scala) > at sun.reflect.NativeMethodAccessorImpl.invoke0(Native Method) > at sun.reflect.NativeMethodAccessorImpl.invoke( > NativeMethodAccessorImpl.java:62) > at sun.reflect.DelegatingMethodAccessorImpl.invoke( > DelegatingMethodAccessorImpl.java:43) > at java.lang.reflect.Method.invoke(Method.java:498) > at org.apache.spark.deploy.SparkSubmit$.org$apache$spark$ > deploy$SparkSubmit$$runMain(SparkSubmit.scala:736) > at org.apache.spark.deploy.SparkSubmit$.doRunMain$1(SparkSubmit.scala:185) > at org.apache.spark.deploy.SparkSubmit$.submit(SparkSubmit.scala:210) > at org.apache.spark.deploy.SparkSubmit$.main(SparkSubmit.scala:124) > at org.apache.spark.deploy.SparkSubmit.main(SparkSubmit.scala) > > stdDf: org.apache.spark.sql.DataFrame = [Rowid: string, maths: string ... > 4 more fields] > > What would be resolution ? > > Thanks, > Chetan > > > > >