you can reproduce this issue with the following steps (assuming you have
Yarn cluster + Hive 12):

1) using hive shell, create a database, e.g: create database ttt

2) write a simple spark sql program

import org.apache.spark.{SparkConf, SparkContext}
import org.apache.spark.sql._
import org.apache.spark.sql.hive.HiveContext

object HiveSpark {
  case class Record(key: Int, value: String)

  def main(args: Array[String]) {
    val sparkConf = new SparkConf().setAppName("HiveSpark")
    val sc = new SparkContext(sparkConf)

    // A hive context creates an instance of the Hive Metastore in process,
    val hiveContext = new HiveContext(sc)
    import hiveContext._

    hql("use ttt")
    hql("CREATE TABLE IF NOT EXISTS src (key INT, value STRING)")
    hql("LOAD DATA INPATH '/user/biadmin/kv1.txt' INTO TABLE src")

    // Queries are expressed in HiveQL
    println("Result of 'SELECT *': ")
    hql("SELECT * FROM src").collect.foreach(println)
    sc.stop()
  }
}
3) run it in yarn-cluster mode.


On Mon, Aug 11, 2014 at 9:44 AM, Cheng Lian <[email protected]> wrote:

> Since you were using hql(...), it’s probably not related to JDBC driver.
> But I failed to reproduce this issue locally with a single node pseudo
> distributed YARN cluster. Would you mind to elaborate more about steps to
> reproduce this bug? Thanks
> ​
>
>
> On Sun, Aug 10, 2014 at 9:36 PM, Cheng Lian <[email protected]> wrote:
>
>> Hi Jenny, does this issue only happen when running Spark SQL with YARN in
>> your environment?
>>
>>
>> On Sat, Aug 9, 2014 at 3:56 AM, Jenny Zhao <[email protected]>
>> wrote:
>>
>>>
>>> Hi,
>>>
>>> I am able to run my hql query on yarn cluster mode when connecting to
>>> the default hive metastore defined in hive-site.xml.
>>>
>>> however, if I want to switch to a different database, like:
>>>
>>>   hql("use other-database")
>>>
>>>
>>> it only works in yarn client mode, but failed on yarn-cluster mode with
>>> the following stack:
>>>
>>> 14/08/08 12:09:11 INFO HiveMetaStore: 0: get_database: tt
>>> 14/08/08 12:09:11 INFO audit: ugi=biadmin   ip=unknown-ip-addr      
>>> cmd=get_database: tt    
>>> 14/08/08 12:09:11 ERROR RetryingHMSHandler: 
>>> NoSuchObjectException(message:There is no database named tt)
>>>     at 
>>> org.apache.hadoop.hive.metastore.ObjectStore.getMDatabase(ObjectStore.java:431)
>>>     at 
>>> org.apache.hadoop.hive.metastore.ObjectStore.getDatabase(ObjectStore.java:441)
>>>     at sun.reflect.NativeMethodAccessorImpl.invoke0(Native Method)
>>>     at 
>>> sun.reflect.NativeMethodAccessorImpl.invoke(NativeMethodAccessorImpl.java:60)
>>>     at 
>>> sun.reflect.DelegatingMethodAccessorImpl.invoke(DelegatingMethodAccessorImpl.java:37)
>>>     at java.lang.reflect.Method.invoke(Method.java:611)
>>>     at 
>>> org.apache.hadoop.hive.metastore.RetryingRawStore.invoke(RetryingRawStore.java:124)
>>>     at $Proxy15.getDatabase(Unknown Source)
>>>     at 
>>> org.apache.hadoop.hive.metastore.HiveMetaStore$HMSHandler.get_database(HiveMetaStore.java:628)
>>>     at sun.reflect.NativeMethodAccessorImpl.invoke0(Native Method)
>>>     at 
>>> sun.reflect.NativeMethodAccessorImpl.invoke(NativeMethodAccessorImpl.java:60)
>>>     at 
>>> sun.reflect.DelegatingMethodAccessorImpl.invoke(DelegatingMethodAccessorImpl.java:37)
>>>     at java.lang.reflect.Method.invoke(Method.java:611)
>>>     at 
>>> org.apache.hadoop.hive.metastore.RetryingHMSHandler.invoke(RetryingHMSHandler.java:103)
>>>     at $Proxy17.get_database(Unknown Source)
>>>     at 
>>> org.apache.hadoop.hive.metastore.HiveMetaStoreClient.getDatabase(HiveMetaStoreClient.java:810)
>>>     at sun.reflect.NativeMethodAccessorImpl.invoke0(Native Method)
>>>     at 
>>> sun.reflect.NativeMethodAccessorImpl.invoke(NativeMethodAccessorImpl.java:60)
>>>     at 
>>> sun.reflect.DelegatingMethodAccessorImpl.invoke(DelegatingMethodAccessorImpl.java:37)
>>>     at java.lang.reflect.Method.invoke(Method.java:611)
>>>     at 
>>> org.apache.hadoop.hive.metastore.RetryingMetaStoreClient.invoke(RetryingMetaStoreClient.java:89)
>>>     at $Proxy18.getDatabase(Unknown Source)
>>>     at org.apache.hadoop.hive.ql.metadata.Hive.getDatabase(Hive.java:1139)
>>>     at 
>>> org.apache.hadoop.hive.ql.metadata.Hive.databaseExists(Hive.java:1128)
>>>     at 
>>> org.apache.hadoop.hive.ql.exec.DDLTask.switchDatabase(DDLTask.java:3479)
>>>     at org.apache.hadoop.hive.ql.exec.DDLTask.execute(DDLTask.java:237)
>>>     at org.apache.hadoop.hive.ql.exec.Task.executeTask(Task.java:151)
>>>     at 
>>> org.apache.hadoop.hive.ql.exec.TaskRunner.runSequential(TaskRunner.java:65)
>>>     at org.apache.hadoop.hive.ql.Driver.launchTask(Driver.java:1414)
>>>     at org.apache.hadoop.hive.ql.Driver.execute(Driver.java:1192)
>>>     at org.apache.hadoop.hive.ql.Driver.runInternal(Driver.java:1020)
>>>     at org.apache.hadoop.hive.ql.Driver.run(Driver.java:888)
>>>     at org.apache.spark.sql.hive.HiveContext.runHive(HiveContext.scala:208)
>>>     at 
>>> org.apache.spark.sql.hive.HiveContext.runSqlHive(HiveContext.scala:182)
>>>     at 
>>> org.apache.spark.sql.hive.HiveContext$QueryExecution.toRdd$lzycompute(HiveContext.scala:272)
>>>     at 
>>> org.apache.spark.sql.hive.HiveContext$QueryExecution.toRdd(HiveContext.scala:269)
>>>     at org.apache.spark.sql.hive.HiveContext.hiveql(HiveContext.scala:86)
>>>     at org.apache.spark.sql.hive.HiveContext.hql(HiveContext.scala:91)
>>>     at 
>>> org.apache.spark.examples.sql.hive.HiveSpark$.main(HiveSpark.scala:35)
>>>     at org.apache.spark.examples.sql.hive.HiveSpark.main(HiveSpark.scala)
>>>     at sun.reflect.NativeMethodAccessorImpl.invoke0(Native Method)
>>>     at 
>>> sun.reflect.NativeMethodAccessorImpl.invoke(NativeMethodAccessorImpl.java:60)
>>>     at 
>>> sun.reflect.DelegatingMethodAccessorImpl.invoke(DelegatingMethodAccessorImpl.java:37)
>>>     at java.lang.reflect.Method.invoke(Method.java:611)
>>>     at 
>>> org.apache.spark.deploy.yarn.ApplicationMaster$$anon$2.run(ApplicationMaster.scala:186)
>>>
>>> 14/08/08 12:09:11 ERROR DDLTask: 
>>> org.apache.hadoop.hive.ql.metadata.HiveException: Database does not exist: 
>>> tt
>>>     at 
>>> org.apache.hadoop.hive.ql.exec.DDLTask.switchDatabase(DDLTask.java:3480)
>>>     at org.apache.hadoop.hive.ql.exec.DDLTask.execute(DDLTask.java:237)
>>>     at org.apache.hadoop.hive.ql.exec.Task.executeTask(Task.java:151)
>>>     at 
>>> org.apache.hadoop.hive.ql.exec.TaskRunner.runSequential(TaskRunner.java:65)
>>>     at org.apache.hadoop.hive.ql.Driver.launchTask(Driver.java:1414)
>>>     at org.apache.hadoop.hive.ql.Driver.execute(Driver.java:1192)
>>>     at org.apache.hadoop.hive.ql.Driver.runInternal(Driver.java:1020)
>>>     at org.apache.hadoop.hive.ql.Driver.run(Driver.java:888)
>>>     at org.apache.spark.sql.hive.HiveContext.runHive(HiveContext.scala:208)
>>>     at 
>>> org.apache.spark.sql.hive.HiveContext.runSqlHive(HiveContext.scala:182)
>>>     at 
>>> org.apache.spark.sql.hive.HiveContext$QueryExecution.toRdd$lzycompute(HiveContext.scala:272)
>>>     at 
>>> org.apache.spark.sql.hive.HiveContext$QueryExecution.toRdd(HiveContext.scala:269)
>>>     at org.apache.spark.sql.hive.HiveContext.hiveql(HiveContext.scala:86)
>>>     at org.apache.spark.sql.hive.HiveContext.hql(HiveContext.scala:91)
>>>     at 
>>> org.apache.spark.examples.sql.hive.HiveSpark$.main(HiveSpark.scala:35)
>>>     at org.apache.spark.examples.sql.hive.HiveSpark.main(HiveSpark.scala)
>>>     at sun.reflect.NativeMethodAccessorImpl.invoke0(Native Method)
>>>     at 
>>> sun.reflect.NativeMethodAccessorImpl.invoke(NativeMethodAccessorImpl.java:60)
>>>     at 
>>> sun.reflect.DelegatingMethodAccessorImpl.invoke(DelegatingMethodAccessorImpl.java:37)
>>>     at java.lang.reflect.Method.invoke(Method.java:611)
>>>     at 
>>> org.apache.spark.deploy.yarn.ApplicationMaster$$anon$2.run(ApplicationMaster.scala:186)
>>> nono
>>>
>>>     why is that? not sure if this is something to do with hive jdbc
>>> driver?
>>>
>>> Thank you!
>>>
>>> Jenny
>>>
>>
>>
>

Reply via email to