[ https://issues.apache.org/jira/browse/HIVE-16823?page=com.atlassian.jira.plugin.system.issuetabpanels:comment-tabpanel&focusedCommentId=16039995#comment-16039995 ]
Jianguo Tian edited comment on HIVE-16823 at 6/7/17 1:58 AM: ------------------------------------------------------------- Hi, [~mmccline]. This exception was indeed triggered by [-HIVE-16273-|https://issues.apache.org/jira/browse/HIVE-16273], if I build Hive with code before this patch, this exception won't occur. Any comments and suggestion will be appreciated. Thx! And in my opinion, it would be better to add some detailed description for [-HIVE-16273-|https://issues.apache.org/jira/browse/HIVE-16273]. was (Author: jonnyr): Hi, [~mmccline]. This exception was indeed triggered by [-HIVE-16273-|https://issues.apache.org/jira/browse/HIVE-16273], if I build Hive with code before this patch, this exception won't occur. Any comments and suggestion will be appreciated. Thx! > "ArrayIndexOutOfBoundsException" in > spark_vectorized_dynamic_partition_pruning.q > -------------------------------------------------------------------------------- > > Key: HIVE-16823 > URL: https://issues.apache.org/jira/browse/HIVE-16823 > Project: Hive > Issue Type: Bug > Reporter: Jianguo Tian > > script.q > {code} > set hive.optimize.ppd=true; > set hive.ppd.remove.duplicatefilters=true; > set hive.spark.dynamic.partition.pruning=true; > set hive.optimize.metadataonly=false; > -- set hive.optimize.index.filter=true; > set hive.vectorized.execution.enabled=true; > set hive.strict.checks.cartesian.product=false; > -- parent is reduce tasks > select count(*) from srcpart join (select ds as ds, ds as `date` from srcpart > group by ds) s on (srcpart.ds = s.ds) where s.`date` = '2008-04-08'; > {code} > The exceptions are as follows: > {code} > 2017-06-05T09:20:31,468 ERROR [Executor task launch worker-0] > spark.SparkReduceRecordHandler: Fatal error: > org.apache.hadoop.hive.ql.metadata.HiveException: Error while processing > vector batch (tag=0) Column vector types: 0:BYTES, 1:BYTES > ["2008-04-08", "2008-04-08"] > org.apache.hadoop.hive.ql.metadata.HiveException: Error while processing > vector batch (tag=0) Column vector types: 0:BYTES, 1:BYTES > ["2008-04-08", "2008-04-08"] > at > org.apache.hadoop.hive.ql.exec.spark.SparkReduceRecordHandler.processVectors(SparkReduceRecordHandler.java:413) > ~[hive-exec-3.0.0-SNAPSHOT.jar:3.0.0-SNAPSHOT] > at > org.apache.hadoop.hive.ql.exec.spark.SparkReduceRecordHandler.processRow(SparkReduceRecordHandler.java:301) > ~[hive-exec-3.0.0-SNAPSHOT.jar:3.0.0-SNAPSHOT] > at > org.apache.hadoop.hive.ql.exec.spark.HiveReduceFunctionResultList.processNextRecord(HiveReduceFunctionResultList.java:54) > ~[hive-exec-3.0.0-SNAPSHOT.jar:3.0.0-SNAPSHOT] > at > org.apache.hadoop.hive.ql.exec.spark.HiveReduceFunctionResultList.processNextRecord(HiveReduceFunctionResultList.java:28) > ~[hive-exec-3.0.0-SNAPSHOT.jar:3.0.0-SNAPSHOT] > at > org.apache.hadoop.hive.ql.exec.spark.HiveBaseFunctionResultList.hasNext(HiveBaseFunctionResultList.java:85) > ~[hive-exec-3.0.0-SNAPSHOT.jar:3.0.0-SNAPSHOT] > at > scala.collection.convert.Wrappers$JIteratorWrapper.hasNext(Wrappers.scala:42) > ~[scala-library-2.11.8.jar:?] > at scala.collection.Iterator$class.foreach(Iterator.scala:893) > ~[scala-library-2.11.8.jar:?] > at scala.collection.AbstractIterator.foreach(Iterator.scala:1336) > ~[scala-library-2.11.8.jar:?] > at > org.apache.spark.rdd.AsyncRDDActions$$anonfun$foreachAsync$1$$anonfun$apply$12.apply(AsyncRDDActions.scala:127) > ~[spark-core_2.11-2.0.0.jar:2.0.0] > at > org.apache.spark.rdd.AsyncRDDActions$$anonfun$foreachAsync$1$$anonfun$apply$12.apply(AsyncRDDActions.scala:127) > ~[spark-core_2.11-2.0.0.jar:2.0.0] > at > org.apache.spark.SparkContext$$anonfun$33.apply(SparkContext.scala:1974) > ~[spark-core_2.11-2.0.0.jar:2.0.0] > at > org.apache.spark.SparkContext$$anonfun$33.apply(SparkContext.scala:1974) > ~[spark-core_2.11-2.0.0.jar:2.0.0] > at org.apache.spark.scheduler.ResultTask.runTask(ResultTask.scala:70) > ~[spark-core_2.11-2.0.0.jar:2.0.0] > at org.apache.spark.scheduler.Task.run(Task.scala:85) > ~[spark-core_2.11-2.0.0.jar:2.0.0] > at > org.apache.spark.executor.Executor$TaskRunner.run(Executor.scala:274) > ~[spark-core_2.11-2.0.0.jar:2.0.0] > at > java.util.concurrent.ThreadPoolExecutor.runWorker(ThreadPoolExecutor.java:1142) > [?:1.8.0_112] > at > java.util.concurrent.ThreadPoolExecutor$Worker.run(ThreadPoolExecutor.java:617) > [?:1.8.0_112] > at java.lang.Thread.run(Thread.java:745) [?:1.8.0_112] > Caused by: java.lang.ArrayIndexOutOfBoundsException: 1 > at > org.apache.hadoop.hive.ql.exec.vector.VectorGroupKeyHelper.copyGroupKey(VectorGroupKeyHelper.java:107) > ~[hive-exec-3.0.0-SNAPSHOT.jar:3.0.0-SNAPSHOT] > at > org.apache.hadoop.hive.ql.exec.vector.VectorGroupByOperator$ProcessingModeReduceMergePartial.doProcessBatch(VectorGroupByOperator.java:832) > ~[hive-exec-3.0.0-SNAPSHOT.jar:3.0.0-SNAPSHOT] > at > org.apache.hadoop.hive.ql.exec.vector.VectorGroupByOperator$ProcessingModeBase.processBatch(VectorGroupByOperator.java:179) > ~[hive-exec-3.0.0-SNAPSHOT.jar:3.0.0-SNAPSHOT] > at > org.apache.hadoop.hive.ql.exec.vector.VectorGroupByOperator.process(VectorGroupByOperator.java:1035) > ~[hive-exec-3.0.0-SNAPSHOT.jar:3.0.0-SNAPSHOT] > at > org.apache.hadoop.hive.ql.exec.spark.SparkReduceRecordHandler.processVectors(SparkReduceRecordHandler.java:400) > ~[hive-exec-3.0.0-SNAPSHOT.jar:3.0.0-SNAPSHOT] > ... 17 more > 2017-06-05T09:20:31,472 ERROR [Executor task launch worker-0] > executor.Executor: Exception in task 2.0 in stage 1.0 (TID 8) > java.lang.RuntimeException: org.apache.hadoop.hive.ql.metadata.HiveException: > Error while processing vector batch (tag=0) Column vector types: 0:BYTES, > 1:BYTES > ["2008-04-08", "2008-04-08"] > at > org.apache.hadoop.hive.ql.exec.spark.SparkReduceRecordHandler.processRow(SparkReduceRecordHandler.java:315) > ~[hive-exec-3.0.0-SNAPSHOT.jar:3.0.0-SNAPSHOT] > at > org.apache.hadoop.hive.ql.exec.spark.HiveReduceFunctionResultList.processNextRecord(HiveReduceFunctionResultList.java:54) > ~[hive-exec-3.0.0-SNAPSHOT.jar:3.0.0-SNAPSHOT] > at > org.apache.hadoop.hive.ql.exec.spark.HiveReduceFunctionResultList.processNextRecord(HiveReduceFunctionResultList.java:28) > ~[hive-exec-3.0.0-SNAPSHOT.jar:3.0.0-SNAPSHOT] > at > org.apache.hadoop.hive.ql.exec.spark.HiveBaseFunctionResultList.hasNext(HiveBaseFunctionResultList.java:85) > ~[hive-exec-3.0.0-SNAPSHOT.jar:3.0.0-SNAPSHOT] > at > scala.collection.convert.Wrappers$JIteratorWrapper.hasNext(Wrappers.scala:42) > ~[scala-library-2.11.8.jar:?] > at scala.collection.Iterator$class.foreach(Iterator.scala:893) > ~[scala-library-2.11.8.jar:?] > at scala.collection.AbstractIterator.foreach(Iterator.scala:1336) > ~[scala-library-2.11.8.jar:?] > at > org.apache.spark.rdd.AsyncRDDActions$$anonfun$foreachAsync$1$$anonfun$apply$12.apply(AsyncRDDActions.scala:127) > ~[spark-core_2.11-2.0.0.jar:2.0.0] > at > org.apache.spark.rdd.AsyncRDDActions$$anonfun$foreachAsync$1$$anonfun$apply$12.apply(AsyncRDDActions.scala:127) > ~[spark-core_2.11-2.0.0.jar:2.0.0] > at > org.apache.spark.SparkContext$$anonfun$33.apply(SparkContext.scala:1974) > ~[spark-core_2.11-2.0.0.jar:2.0.0] > at > org.apache.spark.SparkContext$$anonfun$33.apply(SparkContext.scala:1974) > ~[spark-core_2.11-2.0.0.jar:2.0.0] > at org.apache.spark.scheduler.ResultTask.runTask(ResultTask.scala:70) > ~[spark-core_2.11-2.0.0.jar:2.0.0] > at org.apache.spark.scheduler.Task.run(Task.scala:85) > ~[spark-core_2.11-2.0.0.jar:2.0.0] > at > org.apache.spark.executor.Executor$TaskRunner.run(Executor.scala:274) > ~[spark-core_2.11-2.0.0.jar:2.0.0] > at > java.util.concurrent.ThreadPoolExecutor.runWorker(ThreadPoolExecutor.java:1142) > [?:1.8.0_112] > at > java.util.concurrent.ThreadPoolExecutor$Worker.run(ThreadPoolExecutor.java:617) > [?:1.8.0_112] > at java.lang.Thread.run(Thread.java:745) [?:1.8.0_112] > Caused by: org.apache.hadoop.hive.ql.metadata.HiveException: Error while > processing vector batch (tag=0) Column vector types: 0:BYTES, 1:BYTES > ["2008-04-08", "2008-04-08"] > at > org.apache.hadoop.hive.ql.exec.spark.SparkReduceRecordHandler.processVectors(SparkReduceRecordHandler.java:413) > ~[hive-exec-3.0.0-SNAPSHOT.jar:3.0.0-SNAPSHOT] > at > org.apache.hadoop.hive.ql.exec.spark.SparkReduceRecordHandler.processRow(SparkReduceRecordHandler.java:301) > ~[hive-exec-3.0.0-SNAPSHOT.jar:3.0.0-SNAPSHOT] > ... 16 more > Caused by: java.lang.ArrayIndexOutOfBoundsException: 1 > at > org.apache.hadoop.hive.ql.exec.vector.VectorGroupKeyHelper.copyGroupKey(VectorGroupKeyHelper.java:107) > ~[hive-exec-3.0.0-SNAPSHOT.jar:3.0.0-SNAPSHOT] > at > org.apache.hadoop.hive.ql.exec.vector.VectorGroupByOperator$ProcessingModeReduceMergePartial.doProcessBatch(VectorGroupByOperator.java:832) > ~[hive-exec-3.0.0-SNAPSHOT.jar:3.0.0-SNAPSHOT] > at > org.apache.hadoop.hive.ql.exec.vector.VectorGroupByOperator$ProcessingModeBase.processBatch(VectorGroupByOperator.java:179) > ~[hive-exec-3.0.0-SNAPSHOT.jar:3.0.0-SNAPSHOT] > at > org.apache.hadoop.hive.ql.exec.vector.VectorGroupByOperator.process(VectorGroupByOperator.java:1035) > ~[hive-exec-3.0.0-SNAPSHOT.jar:3.0.0-SNAPSHOT] > at > org.apache.hadoop.hive.ql.exec.spark.SparkReduceRecordHandler.processVectors(SparkReduceRecordHandler.java:400) > ~[hive-exec-3.0.0-SNAPSHOT.jar:3.0.0-SNAPSHOT] > at > org.apache.hadoop.hive.ql.exec.spark.SparkReduceRecordHandler.processRow(SparkReduceRecordHandler.java:301) > ~[hive-exec-3.0.0-SNAPSHOT.jar:3.0.0-SNAPSHOT] > ... 16 more > 2017-06-05T09:20:31,488 DEBUG [dispatcher-event-loop-2] > scheduler.TaskSchedulerImpl: parentName: , name: TaskSet_1, runningTasks: 0 > 2017-06-05T09:20:31,493 WARN [task-result-getter-0] > scheduler.TaskSetManager: Lost task 2.0 in stage 1.0 (TID 8, localhost): > java.lang.RuntimeException: org.apache.hadoop.hive.ql.metadata.HiveException: > Error while processing vector batch (tag=0) Column vector types: 0:BYTES, > 1:BYTES > ["2008-04-08", "2008-04-08"] > at > org.apache.hadoop.hive.ql.exec.spark.SparkReduceRecordHandler.processRow(SparkReduceRecordHandler.java:315) > at > org.apache.hadoop.hive.ql.exec.spark.HiveReduceFunctionResultList.processNextRecord(HiveReduceFunctionResultList.java:54) > at > org.apache.hadoop.hive.ql.exec.spark.HiveReduceFunctionResultList.processNextRecord(HiveReduceFunctionResultList.java:28) > at > org.apache.hadoop.hive.ql.exec.spark.HiveBaseFunctionResultList.hasNext(HiveBaseFunctionResultList.java:85) > at > scala.collection.convert.Wrappers$JIteratorWrapper.hasNext(Wrappers.scala:42) > at scala.collection.Iterator$class.foreach(Iterator.scala:893) > at scala.collection.AbstractIterator.foreach(Iterator.scala:1336) > at > org.apache.spark.rdd.AsyncRDDActions$$anonfun$foreachAsync$1$$anonfun$apply$12.apply(AsyncRDDActions.scala:127) > at > org.apache.spark.rdd.AsyncRDDActions$$anonfun$foreachAsync$1$$anonfun$apply$12.apply(AsyncRDDActions.scala:127) > at > org.apache.spark.SparkContext$$anonfun$33.apply(SparkContext.scala:1974) > at > org.apache.spark.SparkContext$$anonfun$33.apply(SparkContext.scala:1974) > at org.apache.spark.scheduler.ResultTask.runTask(ResultTask.scala:70) > at org.apache.spark.scheduler.Task.run(Task.scala:85) > at org.apache.spark.executor.Executor$TaskRunner.run(Executor.scala:274) > at > java.util.concurrent.ThreadPoolExecutor.runWorker(ThreadPoolExecutor.java:1142) > at > java.util.concurrent.ThreadPoolExecutor$Worker.run(ThreadPoolExecutor.java:617) > at java.lang.Thread.run(Thread.java:745) > Caused by: org.apache.hadoop.hive.ql.metadata.HiveException: Error while > processing vector batch (tag=0) Column vector types: 0:BYTES, 1:BYTES > ["2008-04-08", "2008-04-08"] > at > org.apache.hadoop.hive.ql.exec.spark.SparkReduceRecordHandler.processVectors(SparkReduceRecordHandler.java:413) > at > org.apache.hadoop.hive.ql.exec.spark.SparkReduceRecordHandler.processRow(SparkReduceRecordHandler.java:301) > ... 16 more > Caused by: java.lang.ArrayIndexOutOfBoundsException: 1 > at > org.apache.hadoop.hive.ql.exec.vector.VectorGroupKeyHelper.copyGroupKey(VectorGroupKeyHelper.java:107) > at > org.apache.hadoop.hive.ql.exec.vector.VectorGroupByOperator$ProcessingModeReduceMergePartial.doProcessBatch(VectorGroupByOperator.java:832) > at > org.apache.hadoop.hive.ql.exec.vector.VectorGroupByOperator$ProcessingModeBase.processBatch(VectorGroupByOperator.java:179) > at > org.apache.hadoop.hive.ql.exec.vector.VectorGroupByOperator.process(VectorGroupByOperator.java:1035) > at > org.apache.hadoop.hive.ql.exec.spark.SparkReduceRecordHandler.processVectors(SparkReduceRecordHandler.java:400) > ... 17 more > 2017-06-05T09:20:31,495 ERROR [task-result-getter-0] > scheduler.TaskSetManager: Task 2 in stage 1.0 failed 1 times; aborting job > {code} > This exception happens in this line of VectorGroupKeyHelper.java: > {code} > BytesColumnVector outputColumnVector = (BytesColumnVector) > outputBatch.cols[columnIndex]; > {code} -- This message was sent by Atlassian JIRA (v6.3.15#6346)