you can reproduce this issue with the following steps (assuming you have
Yarn cluster + Hive 12):
1) using hive shell, create a database, e.g: create database ttt
2) write a simple spark sql program
import org.apache.spark.{SparkConf, SparkContext}
import org.apache.spark.sql._
import org.apache.spark.sql.hive.HiveContext
object HiveSpark {
case class Record(key: Int, value: String)
def main(args: Array[String]) {
val sparkConf = new SparkConf().setAppName("HiveSpark")
val sc = new SparkContext(sparkConf)
// A hive context creates an instance of the Hive Metastore in process,
val hiveContext = new HiveContext(sc)
import hiveContext._
hql("use ttt")
hql("CREATE TABLE IF NOT EXISTS src (key INT, value STRING)")
hql("LOAD DATA INPATH '/user/biadmin/kv1.txt' INTO TABLE src")
// Queries are expressed in HiveQL
println("Result of 'SELECT *': ")
hql("SELECT * FROM src").collect.foreach(println)
sc.stop()
}
}
3) run it in yarn-cluster mode.
On Mon, Aug 11, 2014 at 9:44 AM, Cheng Lian <[email protected]> wrote:
> Since you were using hql(...), it’s probably not related to JDBC driver.
> But I failed to reproduce this issue locally with a single node pseudo
> distributed YARN cluster. Would you mind to elaborate more about steps to
> reproduce this bug? Thanks
>
>
>
> On Sun, Aug 10, 2014 at 9:36 PM, Cheng Lian <[email protected]> wrote:
>
>> Hi Jenny, does this issue only happen when running Spark SQL with YARN in
>> your environment?
>>
>>
>> On Sat, Aug 9, 2014 at 3:56 AM, Jenny Zhao <[email protected]>
>> wrote:
>>
>>>
>>> Hi,
>>>
>>> I am able to run my hql query on yarn cluster mode when connecting to
>>> the default hive metastore defined in hive-site.xml.
>>>
>>> however, if I want to switch to a different database, like:
>>>
>>> hql("use other-database")
>>>
>>>
>>> it only works in yarn client mode, but failed on yarn-cluster mode with
>>> the following stack:
>>>
>>> 14/08/08 12:09:11 INFO HiveMetaStore: 0: get_database: tt
>>> 14/08/08 12:09:11 INFO audit: ugi=biadmin ip=unknown-ip-addr
>>> cmd=get_database: tt
>>> 14/08/08 12:09:11 ERROR RetryingHMSHandler:
>>> NoSuchObjectException(message:There is no database named tt)
>>> at
>>> org.apache.hadoop.hive.metastore.ObjectStore.getMDatabase(ObjectStore.java:431)
>>> at
>>> org.apache.hadoop.hive.metastore.ObjectStore.getDatabase(ObjectStore.java:441)
>>> at sun.reflect.NativeMethodAccessorImpl.invoke0(Native Method)
>>> at
>>> sun.reflect.NativeMethodAccessorImpl.invoke(NativeMethodAccessorImpl.java:60)
>>> at
>>> sun.reflect.DelegatingMethodAccessorImpl.invoke(DelegatingMethodAccessorImpl.java:37)
>>> at java.lang.reflect.Method.invoke(Method.java:611)
>>> at
>>> org.apache.hadoop.hive.metastore.RetryingRawStore.invoke(RetryingRawStore.java:124)
>>> at $Proxy15.getDatabase(Unknown Source)
>>> at
>>> org.apache.hadoop.hive.metastore.HiveMetaStore$HMSHandler.get_database(HiveMetaStore.java:628)
>>> at sun.reflect.NativeMethodAccessorImpl.invoke0(Native Method)
>>> at
>>> sun.reflect.NativeMethodAccessorImpl.invoke(NativeMethodAccessorImpl.java:60)
>>> at
>>> sun.reflect.DelegatingMethodAccessorImpl.invoke(DelegatingMethodAccessorImpl.java:37)
>>> at java.lang.reflect.Method.invoke(Method.java:611)
>>> at
>>> org.apache.hadoop.hive.metastore.RetryingHMSHandler.invoke(RetryingHMSHandler.java:103)
>>> at $Proxy17.get_database(Unknown Source)
>>> at
>>> org.apache.hadoop.hive.metastore.HiveMetaStoreClient.getDatabase(HiveMetaStoreClient.java:810)
>>> at sun.reflect.NativeMethodAccessorImpl.invoke0(Native Method)
>>> at
>>> sun.reflect.NativeMethodAccessorImpl.invoke(NativeMethodAccessorImpl.java:60)
>>> at
>>> sun.reflect.DelegatingMethodAccessorImpl.invoke(DelegatingMethodAccessorImpl.java:37)
>>> at java.lang.reflect.Method.invoke(Method.java:611)
>>> at
>>> org.apache.hadoop.hive.metastore.RetryingMetaStoreClient.invoke(RetryingMetaStoreClient.java:89)
>>> at $Proxy18.getDatabase(Unknown Source)
>>> at org.apache.hadoop.hive.ql.metadata.Hive.getDatabase(Hive.java:1139)
>>> at
>>> org.apache.hadoop.hive.ql.metadata.Hive.databaseExists(Hive.java:1128)
>>> at
>>> org.apache.hadoop.hive.ql.exec.DDLTask.switchDatabase(DDLTask.java:3479)
>>> at org.apache.hadoop.hive.ql.exec.DDLTask.execute(DDLTask.java:237)
>>> at org.apache.hadoop.hive.ql.exec.Task.executeTask(Task.java:151)
>>> at
>>> org.apache.hadoop.hive.ql.exec.TaskRunner.runSequential(TaskRunner.java:65)
>>> at org.apache.hadoop.hive.ql.Driver.launchTask(Driver.java:1414)
>>> at org.apache.hadoop.hive.ql.Driver.execute(Driver.java:1192)
>>> at org.apache.hadoop.hive.ql.Driver.runInternal(Driver.java:1020)
>>> at org.apache.hadoop.hive.ql.Driver.run(Driver.java:888)
>>> at org.apache.spark.sql.hive.HiveContext.runHive(HiveContext.scala:208)
>>> at
>>> org.apache.spark.sql.hive.HiveContext.runSqlHive(HiveContext.scala:182)
>>> at
>>> org.apache.spark.sql.hive.HiveContext$QueryExecution.toRdd$lzycompute(HiveContext.scala:272)
>>> at
>>> org.apache.spark.sql.hive.HiveContext$QueryExecution.toRdd(HiveContext.scala:269)
>>> at org.apache.spark.sql.hive.HiveContext.hiveql(HiveContext.scala:86)
>>> at org.apache.spark.sql.hive.HiveContext.hql(HiveContext.scala:91)
>>> at
>>> org.apache.spark.examples.sql.hive.HiveSpark$.main(HiveSpark.scala:35)
>>> at org.apache.spark.examples.sql.hive.HiveSpark.main(HiveSpark.scala)
>>> at sun.reflect.NativeMethodAccessorImpl.invoke0(Native Method)
>>> at
>>> sun.reflect.NativeMethodAccessorImpl.invoke(NativeMethodAccessorImpl.java:60)
>>> at
>>> sun.reflect.DelegatingMethodAccessorImpl.invoke(DelegatingMethodAccessorImpl.java:37)
>>> at java.lang.reflect.Method.invoke(Method.java:611)
>>> at
>>> org.apache.spark.deploy.yarn.ApplicationMaster$$anon$2.run(ApplicationMaster.scala:186)
>>>
>>> 14/08/08 12:09:11 ERROR DDLTask:
>>> org.apache.hadoop.hive.ql.metadata.HiveException: Database does not exist:
>>> tt
>>> at
>>> org.apache.hadoop.hive.ql.exec.DDLTask.switchDatabase(DDLTask.java:3480)
>>> at org.apache.hadoop.hive.ql.exec.DDLTask.execute(DDLTask.java:237)
>>> at org.apache.hadoop.hive.ql.exec.Task.executeTask(Task.java:151)
>>> at
>>> org.apache.hadoop.hive.ql.exec.TaskRunner.runSequential(TaskRunner.java:65)
>>> at org.apache.hadoop.hive.ql.Driver.launchTask(Driver.java:1414)
>>> at org.apache.hadoop.hive.ql.Driver.execute(Driver.java:1192)
>>> at org.apache.hadoop.hive.ql.Driver.runInternal(Driver.java:1020)
>>> at org.apache.hadoop.hive.ql.Driver.run(Driver.java:888)
>>> at org.apache.spark.sql.hive.HiveContext.runHive(HiveContext.scala:208)
>>> at
>>> org.apache.spark.sql.hive.HiveContext.runSqlHive(HiveContext.scala:182)
>>> at
>>> org.apache.spark.sql.hive.HiveContext$QueryExecution.toRdd$lzycompute(HiveContext.scala:272)
>>> at
>>> org.apache.spark.sql.hive.HiveContext$QueryExecution.toRdd(HiveContext.scala:269)
>>> at org.apache.spark.sql.hive.HiveContext.hiveql(HiveContext.scala:86)
>>> at org.apache.spark.sql.hive.HiveContext.hql(HiveContext.scala:91)
>>> at
>>> org.apache.spark.examples.sql.hive.HiveSpark$.main(HiveSpark.scala:35)
>>> at org.apache.spark.examples.sql.hive.HiveSpark.main(HiveSpark.scala)
>>> at sun.reflect.NativeMethodAccessorImpl.invoke0(Native Method)
>>> at
>>> sun.reflect.NativeMethodAccessorImpl.invoke(NativeMethodAccessorImpl.java:60)
>>> at
>>> sun.reflect.DelegatingMethodAccessorImpl.invoke(DelegatingMethodAccessorImpl.java:37)
>>> at java.lang.reflect.Method.invoke(Method.java:611)
>>> at
>>> org.apache.spark.deploy.yarn.ApplicationMaster$$anon$2.run(ApplicationMaster.scala:186)
>>> nono
>>>
>>> why is that? not sure if this is something to do with hive jdbc
>>> driver?
>>>
>>> Thank you!
>>>
>>> Jenny
>>>
>>
>>
>