Hi, I think you should set hive-site.xml before init SparkSession, spark will connect to metostore,and logged like that: ====================================== 2021-08-12 09:21:21 INFO HiveUtils:54 - Initializing HiveMetastoreConnection version 1.2.1 using Spark classes. 2021-08-12 09:21:22 INFO metastore:376 - Trying to connect to metastore with URI thrift://hadoop001:9083 2021-08-12 09:21:22 WARN UserGroupInformation:1535 - No groups available for user hdfs 2021-08-12 09:21:22 INFO metastore:472 - Connected to metastore. 2021-08-12 09:21:22 INFO SessionState:641 - Created local directory: /tmp/8bc342dd-aa0b-407b-b9ad-ff7ed3cd4076_resources 2021-08-12 09:21:22 INFO SessionState:641 - Created HDFS directory: /tmp/hive/hdfs/8bc342dd-aa0b-407b-b9ad-ff7ed3cd4076 2021-08-12 09:21:22 INFO SessionState:641 - Created local directory: /tmp/tempo/8bc342dd-aa0b-407b-b9ad-ff7ed3cd4076 2021-08-12 09:21:22 INFO SessionState:641 - Created HDFS directory: /tmp/hive/hdfs/8bc342dd-aa0b-407b-b9ad-ff7ed3cd4076/_tmp_space.db =======================================
eab...@163.com From: igyu Date: 2021-08-12 11:33 To: user Subject: How can I config hive.metastore.warehouse.dir I need write data to hive with spark val proper = new Properties proper.setProperty("fs.defaultFS", "hdfs://nameservice1") proper.setProperty("dfs.nameservices", "nameservice1") proper.setProperty("dfs.ha.namenodes.nameservice1", "namenode337,namenode369") proper.setProperty("dfs.namenode.rpc-address.nameservice1.namenode337", "bigdser1:8020") proper.setProperty("dfs.namenode.rpc-address.nameservice1.namenode369", "bigdser5:8020") proper.setProperty("dfs.client.failover.proxy.provider.nameservice1", "org.apache.hadoop.hdfs.server.namenode.ha.ConfiguredFailoverProxyProvider") proper.setProperty("hadoop.security.authentication", "Kerberos") proper.setProperty("fs.hdfs.impl", "org.apache.hadoop.hdfs.DistributedFileSystem") proper.setProperty("spark.sql.warehouse.dir", "/user/hive/warehouse") proper.setProperty("hive.metastore.warehouse.dir","/user/hive/warehouse") proper.setProperty("hive.metastore.uris", "thrift://bigdser1:9083") sparkSession.sqlContext.setConf(proper) sparkSession.sqlContext.setConf("hive.exec.dynamic.partition", "true") sparkSession.sqlContext.setConf("hive.exec.dynamic.partition.mode", "nonstrict") DF.write.format("jdbc") .option("timestampFormat", "yyyy/MM/dd HH:mm:ss ZZ") .options(cfg) // .partitionBy(partitions:_*) .mode(mode) .insertInto(table) but I get a error 21/08/12 11:25:07 INFO SharedState: Setting hive.metastore.warehouse.dir ('null') to the value of spark.sql.warehouse.dir ('file:/D:/file/code/Java/jztsynctools/spark-warehouse/'). 21/08/12 11:25:07 INFO SharedState: Warehouse path is 'file:/D:/file/code/Java/jztsynctools/spark-warehouse/'. 21/08/12 11:25:08 INFO StateStoreCoordinatorRef: Registered StateStoreCoordinator endpoint 21/08/12 11:25:08 INFO Version: Elasticsearch Hadoop v7.10.2 [f53f4b7b2b] 21/08/12 11:25:08 INFO Utils: Supplied authorities: tidb4ser:11000 21/08/12 11:25:08 INFO Utils: Resolved authority: tidb4ser:11000 21/08/12 11:25:16 INFO UserGroupInformation: Login successful for user jztwk/had...@join.com using keytab file D:\file\jztwk.keytab. Keytab auto renewal enabled : false login user: jztwk/had...@join.com (auth:KERBEROS) 21/08/12 11:25:25 WARN ProcfsMetricsGetter: Exception when trying to compute pagesize, as a result reporting of ProcessTree metrics is stopped Exception in thread "main" org.apache.spark.sql.AnalysisException: Table or view not found: hivetest.chinese_part1, the database hivetest doesn't exist.; at org.apache.spark.sql.catalyst.analysis.package$AnalysisErrorAt.failAnalysis(package.scala:47) at org.apache.spark.sql.catalyst.analysis.Analyzer$ResolveRelations$.org$apache$spark$sql$catalyst$analysis$Analyzer$ResolveRelations$$lookupTableFromCatalog(Analyzer.scala:737) at org.apache.spark.sql.catalyst.analysis.Analyzer$ResolveRelations$$anonfun$apply$8.applyOrElse(Analyzer.scala:710) at org.apache.spark.sql.catalyst.analysis.Analyzer$ResolveRelations$$anonfun$apply$8.applyOrElse(Analyzer.scala:708) at org.apache.spark.sql.catalyst.plans.logical.AnalysisHelper$$anonfun$resolveOperatorsUp$1$$anonfun$apply$1.apply(AnalysisHelper.scala:90) at org.apache.spark.sql.catalyst.plans.logical.AnalysisHelper$$anonfun$resolveOperatorsUp$1$$anonfun$apply$1.apply(AnalysisHelper.scala:90) at org.apache.spark.sql.catalyst.trees.CurrentOrigin$.withOrigin(TreeNode.scala:70) at org.apache.spark.sql.catalyst.plans.logical.AnalysisHelper$$anonfun$resolveOperatorsUp$1.apply(AnalysisHelper.scala:89) at org.apache.spark.sql.catalyst.plans.logical.AnalysisHelper$$anonfun$resolveOperatorsUp$1.apply(AnalysisHelper.scala:86) at org.apache.spark.sql.catalyst.plans.logical.AnalysisHelper$.allowInvokingTransformsInAnalyzer(AnalysisHelper.scala:194) at org.apache.spark.sql.catalyst.plans.logical.AnalysisHelper$class.resolveOperatorsUp(AnalysisHelper.scala:86) at org.apache.spark.sql.catalyst.plans.logical.LogicalPlan.resolveOperatorsUp(LogicalPlan.scala:29) at org.apache.spark.sql.catalyst.analysis.Analyzer$ResolveRelations$.apply(Analyzer.scala:708) at org.apache.spark.sql.catalyst.analysis.Analyzer$ResolveRelations$.apply(Analyzer.scala:654) at org.apache.spark.sql.catalyst.rules.RuleExecutor$$anonfun$execute$1$$anonfun$apply$1.apply(RuleExecutor.scala:87) at org.apache.spark.sql.catalyst.rules.RuleExecutor$$anonfun$execute$1$$anonfun$apply$1.apply(RuleExecutor.scala:84) at scala.collection.LinearSeqOptimized$class.foldLeft(LinearSeqOptimized.scala:124) at scala.collection.immutable.List.foldLeft(List.scala:84) at org.apache.spark.sql.catalyst.rules.RuleExecutor$$anonfun$execute$1.apply(RuleExecutor.scala:84) at org.apache.spark.sql.catalyst.rules.RuleExecutor$$anonfun$execute$1.apply(RuleExecutor.scala:76) at scala.collection.immutable.List.foreach(List.scala:392) at org.apache.spark.sql.catalyst.rules.RuleExecutor.execute(RuleExecutor.scala:76) at org.apache.spark.sql.catalyst.analysis.Analyzer.org$apache$spark$sql$catalyst$analysis$Analyzer$$executeSameContext(Analyzer.scala:127) at org.apache.spark.sql.catalyst.analysis.Analyzer.execute(Analyzer.scala:121) at org.apache.spark.sql.catalyst.analysis.Analyzer$$anonfun$executeAndCheck$1.apply(Analyzer.scala:106) at org.apache.spark.sql.catalyst.analysis.Analyzer$$anonfun$executeAndCheck$1.apply(Analyzer.scala:105) at org.apache.spark.sql.catalyst.plans.logical.AnalysisHelper$.markInAnalyzer(AnalysisHelper.scala:201) at org.apache.spark.sql.catalyst.analysis.Analyzer.executeAndCheck(Analyzer.scala:105) at org.apache.spark.sql.execution.QueryExecution.analyzed$lzycompute(QueryExecution.scala:57) at org.apache.spark.sql.execution.QueryExecution.analyzed(QueryExecution.scala:55) at org.apache.spark.sql.execution.QueryExecution.assertAnalyzed(QueryExecution.scala:47) at org.apache.spark.sql.execution.QueryExecution.withCachedData$lzycompute(QueryExecution.scala:61) at org.apache.spark.sql.execution.QueryExecution.withCachedData(QueryExecution.scala:60) at org.apache.spark.sql.execution.QueryExecution.optimizedPlan$lzycompute(QueryExecution.scala:66) at org.apache.spark.sql.execution.QueryExecution.optimizedPlan(QueryExecution.scala:66) at org.apache.spark.sql.execution.QueryExecution.sparkPlan$lzycompute(QueryExecution.scala:72) at org.apache.spark.sql.execution.QueryExecution.sparkPlan(QueryExecution.scala:68) at org.apache.spark.sql.execution.QueryExecution.executedPlan$lzycompute(QueryExecution.scala:77) at org.apache.spark.sql.execution.QueryExecution.executedPlan(QueryExecution.scala:77) at org.apache.spark.sql.execution.SQLExecution$$anonfun$withNewExecutionId$1.apply(SQLExecution.scala:76) at org.apache.spark.sql.execution.SQLExecution$.withSQLConfPropagated(SQLExecution.scala:125) at org.apache.spark.sql.execution.SQLExecution$.withNewExecutionId(SQLExecution.scala:73) at org.apache.spark.sql.DataFrameWriter.runCommand(DataFrameWriter.scala:668) at org.apache.spark.sql.DataFrameWriter.insertInto(DataFrameWriter.scala:325) at org.apache.spark.sql.DataFrameWriter.insertInto(DataFrameWriter.scala:311) at com.join.hive.writer.HiveJdbcWriter.saveTo(HiveJdbcWriter.scala:87) at com.join.Synctool$.main(Synctool.scala:249) at com.join.Synctool.main(Synctool.scala) Caused by: org.apache.spark.sql.catalyst.analysis.NoSuchDatabaseException: Database 'hivetest' not found; at org.apache.spark.sql.catalyst.catalog.ExternalCatalog$class.requireDbExists(ExternalCatalog.scala:44) at org.apache.spark.sql.catalyst.catalog.InMemoryCatalog.requireDbExists(InMemoryCatalog.scala:46) at org.apache.spark.sql.catalyst.catalog.InMemoryCatalog.tableExists(InMemoryCatalog.scala:338) at org.apache.spark.sql.catalyst.catalog.ExternalCatalog$class.requireTableExists(ExternalCatalog.scala:49) at org.apache.spark.sql.catalyst.catalog.InMemoryCatalog.requireTableExists(InMemoryCatalog.scala:46) at org.apache.spark.sql.catalyst.catalog.InMemoryCatalog.getTable(InMemoryCatalog.scala:333) at org.apache.spark.sql.catalyst.catalog.ExternalCatalogWithListener.getTable(ExternalCatalogWithListener.scala:146) at org.apache.spark.sql.catalyst.catalog.SessionCatalog.lookupRelation(SessionCatalog.scala:701) at org.apache.spark.sql.catalyst.analysis.Analyzer$ResolveRelations$.org$apache$spark$sql$catalyst$analysis$Analyzer$ResolveRelations$$lookupTableFromCatalog(Analyzer.scala:730) ... 46 more INFO SharedState: Setting hive.metastore.warehouse.dir ('null') to the value of spark.sql.warehouse.dir ('file:/D:/file/code/Java/jztsynctools/spark-warehouse/'). I think hive.metastore.warehouse.dir is null so I can find hivetest database but I set proper.setProperty("spark.sql.warehouse.dir", "/user/hive/warehouse") proper.setProperty("hive.metastore.warehouse.dir","/user/hive/warehouse") proper.setProperty("hive.metastore.uris", "thrift://bigdser1:9083") use sparkSession.sparkContext.hadoopConfiguration.addResource("D:\\file\\core-site.xml") sparkSession.sparkContext.hadoopConfiguration.addResource("D:\\file\\hdfs-site.xml") sparkSession.sparkContext.hadoopConfiguration.addResource("D:\\file\\hive-site.xml") sparkSession.sparkContext.hadoopConfiguration.addResource("D:\\file\\yarn-site.xml")I aslo get the same error igyu