Hi Jenny, How's your metastore configured for both Hive and Spark SQL? Which metastore mode are you using (based on https://cwiki.apache.org/confluence/display/Hive/AdminManual+MetastoreAdmin )?
Thanks, Yin On Mon, Aug 11, 2014 at 6:15 PM, Jenny Zhao <[email protected]> wrote: > > > you can reproduce this issue with the following steps (assuming you have > Yarn cluster + Hive 12): > > 1) using hive shell, create a database, e.g: create database ttt > > 2) write a simple spark sql program > > import org.apache.spark.{SparkConf, SparkContext} > import org.apache.spark.sql._ > import org.apache.spark.sql.hive.HiveContext > > object HiveSpark { > case class Record(key: Int, value: String) > > def main(args: Array[String]) { > val sparkConf = new SparkConf().setAppName("HiveSpark") > val sc = new SparkContext(sparkConf) > > // A hive context creates an instance of the Hive Metastore in process, > val hiveContext = new HiveContext(sc) > import hiveContext._ > > hql("use ttt") > hql("CREATE TABLE IF NOT EXISTS src (key INT, value STRING)") > hql("LOAD DATA INPATH '/user/biadmin/kv1.txt' INTO TABLE src") > > // Queries are expressed in HiveQL > println("Result of 'SELECT *': ") > hql("SELECT * FROM src").collect.foreach(println) > sc.stop() > } > } > 3) run it in yarn-cluster mode. > > > On Mon, Aug 11, 2014 at 9:44 AM, Cheng Lian <[email protected]> wrote: > >> Since you were using hql(...), it’s probably not related to JDBC driver. >> But I failed to reproduce this issue locally with a single node pseudo >> distributed YARN cluster. Would you mind to elaborate more about steps to >> reproduce this bug? Thanks >> >> >> >> On Sun, Aug 10, 2014 at 9:36 PM, Cheng Lian <[email protected]> >> wrote: >> >>> Hi Jenny, does this issue only happen when running Spark SQL with YARN >>> in your environment? >>> >>> >>> On Sat, Aug 9, 2014 at 3:56 AM, Jenny Zhao <[email protected]> >>> wrote: >>> >>>> >>>> Hi, >>>> >>>> I am able to run my hql query on yarn cluster mode when connecting to >>>> the default hive metastore defined in hive-site.xml. >>>> >>>> however, if I want to switch to a different database, like: >>>> >>>> hql("use other-database") >>>> >>>> >>>> it only works in yarn client mode, but failed on yarn-cluster mode with >>>> the following stack: >>>> >>>> 14/08/08 12:09:11 INFO HiveMetaStore: 0: get_database: tt >>>> 14/08/08 12:09:11 INFO audit: ugi=biadmin ip=unknown-ip-addr >>>> cmd=get_database: tt >>>> 14/08/08 12:09:11 ERROR RetryingHMSHandler: >>>> NoSuchObjectException(message:There is no database named tt) >>>> at >>>> org.apache.hadoop.hive.metastore.ObjectStore.getMDatabase(ObjectStore.java:431) >>>> at >>>> org.apache.hadoop.hive.metastore.ObjectStore.getDatabase(ObjectStore.java:441) >>>> at sun.reflect.NativeMethodAccessorImpl.invoke0(Native Method) >>>> at >>>> sun.reflect.NativeMethodAccessorImpl.invoke(NativeMethodAccessorImpl.java:60) >>>> at >>>> sun.reflect.DelegatingMethodAccessorImpl.invoke(DelegatingMethodAccessorImpl.java:37) >>>> at java.lang.reflect.Method.invoke(Method.java:611) >>>> at >>>> org.apache.hadoop.hive.metastore.RetryingRawStore.invoke(RetryingRawStore.java:124) >>>> at $Proxy15.getDatabase(Unknown Source) >>>> at >>>> org.apache.hadoop.hive.metastore.HiveMetaStore$HMSHandler.get_database(HiveMetaStore.java:628) >>>> at sun.reflect.NativeMethodAccessorImpl.invoke0(Native Method) >>>> at >>>> sun.reflect.NativeMethodAccessorImpl.invoke(NativeMethodAccessorImpl.java:60) >>>> at >>>> sun.reflect.DelegatingMethodAccessorImpl.invoke(DelegatingMethodAccessorImpl.java:37) >>>> at java.lang.reflect.Method.invoke(Method.java:611) >>>> at >>>> org.apache.hadoop.hive.metastore.RetryingHMSHandler.invoke(RetryingHMSHandler.java:103) >>>> at $Proxy17.get_database(Unknown Source) >>>> at >>>> org.apache.hadoop.hive.metastore.HiveMetaStoreClient.getDatabase(HiveMetaStoreClient.java:810) >>>> at sun.reflect.NativeMethodAccessorImpl.invoke0(Native Method) >>>> at >>>> sun.reflect.NativeMethodAccessorImpl.invoke(NativeMethodAccessorImpl.java:60) >>>> at >>>> sun.reflect.DelegatingMethodAccessorImpl.invoke(DelegatingMethodAccessorImpl.java:37) >>>> at java.lang.reflect.Method.invoke(Method.java:611) >>>> at >>>> org.apache.hadoop.hive.metastore.RetryingMetaStoreClient.invoke(RetryingMetaStoreClient.java:89) >>>> at $Proxy18.getDatabase(Unknown Source) >>>> at org.apache.hadoop.hive.ql.metadata.Hive.getDatabase(Hive.java:1139) >>>> at >>>> org.apache.hadoop.hive.ql.metadata.Hive.databaseExists(Hive.java:1128) >>>> at >>>> org.apache.hadoop.hive.ql.exec.DDLTask.switchDatabase(DDLTask.java:3479) >>>> at org.apache.hadoop.hive.ql.exec.DDLTask.execute(DDLTask.java:237) >>>> at org.apache.hadoop.hive.ql.exec.Task.executeTask(Task.java:151) >>>> at >>>> org.apache.hadoop.hive.ql.exec.TaskRunner.runSequential(TaskRunner.java:65) >>>> at org.apache.hadoop.hive.ql.Driver.launchTask(Driver.java:1414) >>>> at org.apache.hadoop.hive.ql.Driver.execute(Driver.java:1192) >>>> at org.apache.hadoop.hive.ql.Driver.runInternal(Driver.java:1020) >>>> at org.apache.hadoop.hive.ql.Driver.run(Driver.java:888) >>>> at org.apache.spark.sql.hive.HiveContext.runHive(HiveContext.scala:208) >>>> at >>>> org.apache.spark.sql.hive.HiveContext.runSqlHive(HiveContext.scala:182) >>>> at >>>> org.apache.spark.sql.hive.HiveContext$QueryExecution.toRdd$lzycompute(HiveContext.scala:272) >>>> at >>>> org.apache.spark.sql.hive.HiveContext$QueryExecution.toRdd(HiveContext.scala:269) >>>> at org.apache.spark.sql.hive.HiveContext.hiveql(HiveContext.scala:86) >>>> at org.apache.spark.sql.hive.HiveContext.hql(HiveContext.scala:91) >>>> at >>>> org.apache.spark.examples.sql.hive.HiveSpark$.main(HiveSpark.scala:35) >>>> at org.apache.spark.examples.sql.hive.HiveSpark.main(HiveSpark.scala) >>>> at sun.reflect.NativeMethodAccessorImpl.invoke0(Native Method) >>>> at >>>> sun.reflect.NativeMethodAccessorImpl.invoke(NativeMethodAccessorImpl.java:60) >>>> at >>>> sun.reflect.DelegatingMethodAccessorImpl.invoke(DelegatingMethodAccessorImpl.java:37) >>>> at java.lang.reflect.Method.invoke(Method.java:611) >>>> at >>>> org.apache.spark.deploy.yarn.ApplicationMaster$$anon$2.run(ApplicationMaster.scala:186) >>>> >>>> 14/08/08 12:09:11 ERROR DDLTask: >>>> org.apache.hadoop.hive.ql.metadata.HiveException: Database does not exist: >>>> tt >>>> at >>>> org.apache.hadoop.hive.ql.exec.DDLTask.switchDatabase(DDLTask.java:3480) >>>> at org.apache.hadoop.hive.ql.exec.DDLTask.execute(DDLTask.java:237) >>>> at org.apache.hadoop.hive.ql.exec.Task.executeTask(Task.java:151) >>>> at >>>> org.apache.hadoop.hive.ql.exec.TaskRunner.runSequential(TaskRunner.java:65) >>>> at org.apache.hadoop.hive.ql.Driver.launchTask(Driver.java:1414) >>>> at org.apache.hadoop.hive.ql.Driver.execute(Driver.java:1192) >>>> at org.apache.hadoop.hive.ql.Driver.runInternal(Driver.java:1020) >>>> at org.apache.hadoop.hive.ql.Driver.run(Driver.java:888) >>>> at org.apache.spark.sql.hive.HiveContext.runHive(HiveContext.scala:208) >>>> at >>>> org.apache.spark.sql.hive.HiveContext.runSqlHive(HiveContext.scala:182) >>>> at >>>> org.apache.spark.sql.hive.HiveContext$QueryExecution.toRdd$lzycompute(HiveContext.scala:272) >>>> at >>>> org.apache.spark.sql.hive.HiveContext$QueryExecution.toRdd(HiveContext.scala:269) >>>> at org.apache.spark.sql.hive.HiveContext.hiveql(HiveContext.scala:86) >>>> at org.apache.spark.sql.hive.HiveContext.hql(HiveContext.scala:91) >>>> at >>>> org.apache.spark.examples.sql.hive.HiveSpark$.main(HiveSpark.scala:35) >>>> at org.apache.spark.examples.sql.hive.HiveSpark.main(HiveSpark.scala) >>>> at sun.reflect.NativeMethodAccessorImpl.invoke0(Native Method) >>>> at >>>> sun.reflect.NativeMethodAccessorImpl.invoke(NativeMethodAccessorImpl.java:60) >>>> at >>>> sun.reflect.DelegatingMethodAccessorImpl.invoke(DelegatingMethodAccessorImpl.java:37) >>>> at java.lang.reflect.Method.invoke(Method.java:611) >>>> at >>>> org.apache.spark.deploy.yarn.ApplicationMaster$$anon$2.run(ApplicationMaster.scala:186) >>>> nono >>>> >>>> why is that? not sure if this is something to do with hive jdbc >>>> driver? >>>> >>>> Thank you! >>>> >>>> Jenny >>>> >>> >>> >> >
