Hi,
I try to insert data into a partitioned hive table. The groupByKey is to
combine dataset into a partition of the hive table. After the
groupByKey, I converted the iterable[X] to DB by X.toList.toDF(). But
the hiveContext.sql throws NullPointerException, see below. Any
suggestions? What could be wrong? Thanks!
val varWHeightFlatRDD =
varWHeightRDD.flatMap(FlatMapUtilClass().flatKeyFromWrf).groupByKey()
.foreach(
x => {
val zone = x._1._1
val z = x._1._2
val year = x._1._3
val month = x._1._4
val df_table_4dim = x._2.toList.toDF()
df_table_4dim.registerTempTable("table_4Dim")
hiveContext.sql("INSERT OVERWRITE table 4dim partition
(zone=" + zone + ",z=" + z + ",year=" + year + ",month=" + month + ") " +
"select date, hh, x, y, height, u, v, w, ph, phb, t, p, pb,
qvapor, qgraup, qnice, qnrain, tke_pbl, el_pbl from table_4Dim");
})
java.lang.NullPointerException
at org.apache.spark.sql.hive.HiveContext.sql(HiveContext.scala:100)
at
no.uni.computing.etl.LoadWrfIntoHiveOptReduce1$$anonfun$7.apply(LoadWrfIntoHiveOptReduce1.scala:113)
at
no.uni.computing.etl.LoadWrfIntoHiveOptReduce1$$anonfun$7.apply(LoadWrfIntoHiveOptReduce1.scala:103)
at scala.collection.Iterator$class.foreach(Iterator.scala:727)
at
org.apache.spark.InterruptibleIterator.foreach(InterruptibleIterator.scala:28)
at org.apache.spark.rdd.RDD$$anonfun$foreach$1.apply(RDD.scala:798)
at org.apache.spark.rdd.RDD$$anonfun$foreach$1.apply(RDD.scala:798)
at
org.apache.spark.SparkContext$$anonfun$runJob$5.apply(SparkContext.scala:1511)
at
org.apache.spark.SparkContext$$anonfun$runJob$5.apply(SparkContext.scala:1511)
at org.apache.spark.scheduler.ResultTask.runTask(ResultTask.scala:61)
at org.apache.spark.scheduler.Task.run(Task.scala:64)
at
org.apache.spark.executor.Executor$TaskRunner.run(Executor.scala:203)
at
java.util.concurrent.ThreadPoolExecutor.runWorker(ThreadPoolExecutor.java:1145)
at
java.util.concurrent.ThreadPoolExecutor$Worker.run(ThreadPoolExecutor.java:615)
at java.lang.Thread.run(Thread.java:744)
---------------------------------------------------------------------
To unsubscribe, e-mail: user-unsubscr...@spark.apache.org
For additional commands, e-mail: user-h...@spark.apache.org