[jira] [Work logged] (HIVE-25067) Add more tests to Iceberg partition pruning

ASF GitHub Bot (Jira) Tue, 27 Jul 2021 05:08:06 -0700


     [ 
https://issues.apache.org/jira/browse/HIVE-25067?focusedWorklogId=628386&page=com.atlassian.jira.plugin.system.issuetabpanels:worklog-tabpanel#worklog-628386
 ]


ASF GitHub Bot logged work on HIVE-25067:
-----------------------------------------

                Author: ASF GitHub Bot
            Created on: 27/Jul/21 12:07
            Start Date: 27/Jul/21 12:07
    Worklog Time Spent: 10m 
      Work Description: pvary commented on a change in pull request #2521:
URL: https://github.com/apache/hive/pull/2521#discussion_r677385177



##########
File path: 
iceberg/iceberg-handler/src/test/queries/positive/dynamic_partition_pruning.q
##########
@@ -0,0 +1,160 @@
+--! qt:dataset:srcpart
+set hive.compute.query.using.stats=false;
+set hive.mapred.mode=nonstrict;
+set hive.explain.user=false;
+set hive.optimize.ppd=true;
+set hive.ppd.remove.duplicatefilters=true;
+set hive.tez.dynamic.partition.pruning=true;
+set hive.optimize.metadataonly=false;
+set hive.optimize.index.filter=true;
+set hive.tez.min.bloom.filter.entries=1;
+set hive.tez.bigtable.minsize.semijoin.reduction=1;
+
+select distinct ds from srcpart;
+select distinct hr from srcpart;
+
+CREATE TABLE srcpart_iceberg (key STRING, value STRING)
+PARTITIONED BY (ds STRING, hr STRING) STORED BY iceberg;
+INSERT INTO srcpart_iceberg select * from srcpart;
+
+EXPLAIN create table srcpart_date_n2 as select ds as ds, ds as `date`  from 
srcpart group by ds;
+create table srcpart_date_n2 as select ds as ds, ds as `date` from srcpart 
group by ds;
+create table srcpart_hour_n0 as select hr as hr, hr as hour from srcpart group 
by hr;
+create table srcpart_date_hour_n0 as select ds as ds, ds as `date`, hr as hr, 
hr as hour from srcpart group by ds, hr;
+create table srcpart_double_hour_n0 as select (hr*2) as hr, hr as hour from 
srcpart group by hr;
+
+-- single column, single key
+EXPLAIN select count(*) from srcpart_iceberg join srcpart_date_n2 on 
(srcpart_iceberg.ds = srcpart_date_n2.ds) where srcpart_date_n2.`date` = 
'2008-04-08';
+select count(*) from srcpart_iceberg join srcpart_date_n2 on 
(srcpart_iceberg.ds = srcpart_date_n2.ds) where srcpart_date_n2.`date` = 
'2008-04-08';
+set hive.tez.dynamic.partition.pruning=false;
+EXPLAIN select count(*) from srcpart_iceberg join srcpart_date_n2 on 
(srcpart_iceberg.ds = srcpart_date_n2.ds) where srcpart_date_n2.`date` = 
'2008-04-08';
+select count(*) from srcpart_iceberg join srcpart_date_n2 on 
(srcpart_iceberg.ds = srcpart_date_n2.ds) where srcpart_date_n2.`date` = 
'2008-04-08';
+set hive.tez.dynamic.partition.pruning=true;
+select count(*) from srcpart where ds = '2008-04-08';
+
+-- multiple sources, single key
+EXPLAIN select count(*) from srcpart_iceberg join srcpart_date_n2 on 
(srcpart_iceberg.ds = srcpart_date_n2.ds) join srcpart_hour_n0 on 
(srcpart_iceberg.hr = srcpart_hour_n0.hr)
+where srcpart_date_n2.`date` = '2008-04-08' and srcpart_hour_n0.hour = 11;
+select count(*) from srcpart_iceberg join srcpart_date_n2 on 
(srcpart_iceberg.ds = srcpart_date_n2.ds) join srcpart_hour_n0 on 
(srcpart_iceberg.hr = srcpart_hour_n0.hr)
+where srcpart_date_n2.`date` = '2008-04-08' and srcpart_hour_n0.hour = 11;
+set hive.tez.dynamic.partition.pruning=false;
+EXPLAIN select count(*) from srcpart_iceberg join srcpart_date_n2 on 
(srcpart_iceberg.ds = srcpart_date_n2.ds) join srcpart_hour_n0 on 
(srcpart_iceberg.hr = srcpart_hour_n0.hr)
+where srcpart_date_n2.`date` = '2008-04-08' and srcpart_hour_n0.hour = 11;
+select count(*) from srcpart_iceberg join srcpart_date_n2 on 
(srcpart_iceberg.ds = srcpart_date_n2.ds) join srcpart_hour_n0 on 
(srcpart_iceberg.hr = srcpart_hour_n0.hr)
+where srcpart_date_n2.`date` = '2008-04-08' and srcpart_hour_n0.hour = 11;
+set hive.tez.dynamic.partition.pruning=true;
+select count(*) from srcpart where hr = 11 and ds = '2008-04-08';
+
+-- multiple columns single source
+EXPLAIN select count(*) from srcpart_iceberg join srcpart_date_hour_n0 on 
(srcpart_iceberg.ds = srcpart_date_hour_n0.ds and srcpart_iceberg.hr = 
srcpart_date_hour_n0.hr) where srcpart_date_hour_n0.`date` = '2008-04-08' and 
srcpart_date_hour_n0.hour = 11;
+select count(*) from srcpart_iceberg join srcpart_date_hour_n0 on 
(srcpart_iceberg.ds = srcpart_date_hour_n0.ds and srcpart_iceberg.hr = 
srcpart_date_hour_n0.hr) where srcpart_date_hour_n0.`date` = '2008-04-08' and 
srcpart_date_hour_n0.hour = 11;
+set hive.tez.dynamic.partition.pruning=false;
+EXPLAIN select count(*) from srcpart_iceberg join srcpart_date_hour_n0 on 
(srcpart_iceberg.ds = srcpart_date_hour_n0.ds and srcpart_iceberg.hr = 
srcpart_date_hour_n0.hr) where srcpart_date_hour_n0.`date` = '2008-04-08' and 
srcpart_date_hour_n0.hour = 11;
+select count(*) from srcpart_iceberg join srcpart_date_hour_n0 on 
(srcpart_iceberg.ds = srcpart_date_hour_n0.ds and srcpart_iceberg.hr = 
srcpart_date_hour_n0.hr) where srcpart_date_hour_n0.`date` = '2008-04-08' and 
srcpart_date_hour_n0.hour = 11;
+set hive.tez.dynamic.partition.pruning=true;
+select count(*) from srcpart where ds = '2008-04-08' and hr = 11;
+
+-- empty set
+EXPLAIN select count(*) from srcpart_iceberg join srcpart_date_n2 on 
(srcpart_iceberg.ds = srcpart_date_n2.ds) where srcpart_date_n2.`date` = 'I 
DONT EXIST';
+select count(*) from srcpart_iceberg join srcpart_date_n2 on 
(srcpart_iceberg.ds = srcpart_date_n2.ds) where srcpart_date_n2.`date` = 'I 
DONT EXIST';
+set hive.tez.dynamic.partition.pruning=false;
+EXPLAIN select count(*) from srcpart_iceberg join srcpart_date_n2 on 
(srcpart_iceberg.ds = srcpart_date_n2.ds) where srcpart_date_n2.`date` = 'I 
DONT EXIST';
+select count(*) from srcpart_iceberg join srcpart_date_n2 on 
(srcpart_iceberg.ds = srcpart_date_n2.ds) where srcpart_date_n2.`date` = 'I 
DONT EXIST';
+set hive.tez.dynamic.partition.pruning=true;
+select count(*) from srcpart where ds = 'I DONT EXIST';
+
+-- expressions
+EXPLAIN select count(*) from srcpart_iceberg join srcpart_double_hour_n0 on 
(srcpart_iceberg.hr = cast(cast(srcpart_double_hour_n0.hr/2 as int) as string)) 
where srcpart_double_hour_n0.hour = 11;
+select count(*) from srcpart_iceberg join srcpart_double_hour_n0 on 
(srcpart_iceberg.hr = cast(cast(srcpart_double_hour_n0.hr/2 as int) as string)) 
where srcpart_double_hour_n0.hour = 11;
+set hive.tez.dynamic.partition.pruning=false;
+EXPLAIN select count(*) from srcpart_iceberg join srcpart_double_hour_n0 on 
(srcpart_iceberg.hr = cast(cast(srcpart_double_hour_n0.hr/2 as int) as string)) 
where srcpart_double_hour_n0.hour = 11;
+select count(*) from srcpart_iceberg join srcpart_double_hour_n0 on 
(srcpart_iceberg.hr = cast(cast(srcpart_double_hour_n0.hr/2 as int) as string)) 
where srcpart_double_hour_n0.hour = 11;
+set hive.tez.dynamic.partition.pruning=true;
+
+-- old style join syntax
+EXPLAIN select count(*) from srcpart_iceberg, srcpart_date_hour_n0 where 
srcpart_date_hour_n0.`date` = '2008-04-08' and srcpart_date_hour_n0.hour = 11 
and srcpart_iceberg.ds = srcpart_date_hour_n0.ds and srcpart_iceberg.hr = 
srcpart_date_hour_n0.hr;
+select count(*) from srcpart_iceberg, srcpart_date_hour_n0 where 
srcpart_date_hour_n0.`date` = '2008-04-08' and srcpart_date_hour_n0.hour = 11 
and srcpart_iceberg.ds = srcpart_date_hour_n0.ds and srcpart_iceberg.hr = 
srcpart_date_hour_n0.hr;
+
+-- with static pruning
+EXPLAIN select count(*) from srcpart_iceberg join srcpart_date_n2 on 
(srcpart_iceberg.ds = srcpart_date_n2.ds) join srcpart_hour_n0 on 
(srcpart_iceberg.hr = srcpart_hour_n0.hr)
+where srcpart_date_n2.`date` = '2008-04-08' and srcpart_hour_n0.hour = 11 and 
srcpart_iceberg.hr = 11;
+select count(*) from srcpart_iceberg join srcpart_date_n2 on 
(srcpart_iceberg.ds = srcpart_date_n2.ds) join srcpart_hour_n0 on 
(srcpart_iceberg.hr = srcpart_hour_n0.hr)
+where srcpart_date_n2.`date` = '2008-04-08' and srcpart_hour_n0.hour = 11 and 
srcpart_iceberg.hr = 11;
+EXPLAIN select count(*) from srcpart_iceberg join srcpart_date_n2 on 
(srcpart_iceberg.ds = srcpart_date_n2.ds) join srcpart_hour_n0 on 
(srcpart_iceberg.hr = srcpart_hour_n0.hr)
+where srcpart_date_n2.`date` = '2008-04-08' and srcpart_iceberg.hr = 13;
+select count(*) from srcpart_iceberg join srcpart_date_n2 on 
(srcpart_iceberg.ds = srcpart_date_n2.ds) join srcpart_hour_n0 on 
(srcpart_iceberg.hr = srcpart_hour_n0.hr)
+where srcpart_date_n2.`date` = '2008-04-08' and srcpart_iceberg.hr = 13;
+
+-- union + subquery
+EXPLAIN select count(*) from srcpart_iceberg where srcpart_iceberg.ds in 
(select max(srcpart_iceberg.ds) from srcpart_iceberg union all select 
min(srcpart_iceberg.ds) from srcpart_iceberg);
+select count(*) from srcpart_iceberg where srcpart_iceberg.ds in (select 
max(srcpart_iceberg.ds) from srcpart_iceberg union all select 
min(srcpart_iceberg.ds) from srcpart_iceberg);
+EXPLAIN select distinct(ds) from srcpart_iceberg where srcpart_iceberg.ds in 
(select max(srcpart_iceberg.ds) from srcpart_iceberg union all select 
min(srcpart_iceberg.ds) from srcpart_iceberg);
+select distinct(ds) from srcpart_iceberg where srcpart_iceberg.ds in (select 
max(srcpart_iceberg.ds) from srcpart_iceberg union all select 
min(srcpart_iceberg.ds) from srcpart_iceberg);
+EXPLAIN select ds from (select distinct(ds) as ds from srcpart_iceberg union 
all select distinct(ds) as ds from srcpart_iceberg) s where s.ds in (select 
max(srcpart_iceberg.ds) from srcpart_iceberg union all select 
min(srcpart_iceberg.ds) from srcpart_iceberg);
+select ds from (select distinct(ds) as ds from srcpart_iceberg union all 
select distinct(ds) as ds from srcpart_iceberg) s where s.ds in (select 
max(srcpart_iceberg.ds) from srcpart_iceberg union all select 
min(srcpart_iceberg.ds) from srcpart_iceberg);
+
+set hive.auto.convert.join=true;
+set hive.auto.convert.join.noconditionaltask = true;
+set hive.auto.convert.join.noconditionaltask.size = 10000000;
+
+-- single column, single key
+EXPLAIN select count(*) from srcpart_iceberg join srcpart_date_n2 on 
(srcpart_iceberg.ds = srcpart_date_n2.ds) where srcpart_date_n2.`date` = 
'2008-04-08';
+select count(*) from srcpart_iceberg join srcpart_date_n2 on 
(srcpart_iceberg.ds = srcpart_date_n2.ds) where srcpart_date_n2.`date` = 
'2008-04-08';
+select count(*) from srcpart where ds = '2008-04-08';
+
+-- multiple sources, single key
+EXPLAIN select count(*) from srcpart_iceberg join srcpart_date_n2 on 
(srcpart_iceberg.ds = srcpart_date_n2.ds) join srcpart_hour_n0 on 
(srcpart_iceberg.hr = srcpart_hour_n0.hr)
+where srcpart_date_n2.`date` = '2008-04-08' and srcpart_hour_n0.hour = 11;
+select count(*) from srcpart_iceberg join srcpart_date_n2 on 
(srcpart_iceberg.ds = srcpart_date_n2.ds) join srcpart_hour_n0 on 
(srcpart_iceberg.hr = srcpart_hour_n0.hr)
+where srcpart_date_n2.`date` = '2008-04-08' and srcpart_hour_n0.hour = 11;
+select count(*) from srcpart where hr = 11 and ds = '2008-04-08';
+
+-- multiple columns single source
+EXPLAIN select count(*) from srcpart_iceberg join srcpart_date_hour_n0 on 
(srcpart_iceberg.ds = srcpart_date_hour_n0.ds and srcpart_iceberg.hr = 
srcpart_date_hour_n0.hr) where srcpart_date_hour_n0.`date` = '2008-04-08' and 
srcpart_date_hour_n0.hour = 11;
+select count(*) from srcpart_iceberg join srcpart_date_hour_n0 on 
(srcpart_iceberg.ds = srcpart_date_hour_n0.ds and srcpart_iceberg.hr = 
srcpart_date_hour_n0.hr) where srcpart_date_hour_n0.`date` = '2008-04-08' and 
srcpart_date_hour_n0.hour = 11;
+select count(*) from srcpart where ds = '2008-04-08' and hr = 11;
+
+-- empty set
+EXPLAIN select count(*) from srcpart_iceberg join srcpart_date_n2 on 
(srcpart_iceberg.ds = srcpart_date_n2.ds) where srcpart_date_n2.`date` = 'I 
DONT EXIST';
+select count(*) from srcpart_iceberg join srcpart_date_n2 on 
(srcpart_iceberg.ds = srcpart_date_n2.ds) where srcpart_date_n2.`date` = 'I 
DONT EXIST';
+
+-- expressions
+EXPLAIN select count(*) from srcpart_iceberg join srcpart_double_hour_n0 on 
(srcpart_iceberg.hr = cast(cast(srcpart_double_hour_n0.hr/2 as int) as string)) 
where srcpart_double_hour_n0.hour = 11;
+select count(*) from srcpart_iceberg join srcpart_double_hour_n0 on 
(srcpart_iceberg.hr = cast(cast(srcpart_double_hour_n0.hr/2 as int) as string)) 
where srcpart_double_hour_n0.hour = 11;
+select count(*) from srcpart where hr = 11;
+
+-- with static pruning
+EXPLAIN select count(*) from srcpart_iceberg join srcpart_date_n2 on 
(srcpart_iceberg.ds = srcpart_date_n2.ds) join srcpart_hour_n0 on 
(srcpart_iceberg.hr = srcpart_hour_n0.hr)
+where srcpart_date_n2.`date` = '2008-04-08' and srcpart_hour_n0.hour = 11 and 
srcpart_iceberg.hr = 11;
+select count(*) from srcpart_iceberg join srcpart_date_n2 on 
(srcpart_iceberg.ds = srcpart_date_n2.ds) join srcpart_hour_n0 on 
(srcpart_iceberg.hr = srcpart_hour_n0.hr)
+where srcpart_date_n2.`date` = '2008-04-08' and srcpart_hour_n0.hour = 11 and 
srcpart_iceberg.hr = 11;
+EXPLAIN select count(*) from srcpart_iceberg join srcpart_date_n2 on 
(srcpart_iceberg.ds = srcpart_date_n2.ds) join srcpart_hour_n0 on 
(srcpart_iceberg.hr = srcpart_hour_n0.hr)
+where srcpart_date_n2.`date` = '2008-04-08' and srcpart_iceberg.hr = 13;
+
+select count(*) from srcpart_iceberg join srcpart_date_n2 on 
(srcpart_iceberg.ds = srcpart_date_n2.ds) join srcpart_hour_n0 on 
(srcpart_iceberg.hr = srcpart_hour_n0.hr)
+where srcpart_date_n2.`date` = '2008-04-08' and srcpart_iceberg.hr = 13;
+
+-- union + subquery
+EXPLAIN select distinct(ds) from srcpart_iceberg where srcpart_iceberg.ds in 
(select max(srcpart_iceberg.ds) from srcpart_iceberg union all select 
min(srcpart_iceberg.ds) from srcpart_iceberg);
+select distinct(ds) from srcpart_iceberg where srcpart_iceberg.ds in (select 
max(srcpart_iceberg.ds) from srcpart_iceberg union all select 
min(srcpart_iceberg.ds) from srcpart_iceberg);
+
+
+-- Two iceberg tables
+
+create table srcpart_date_hour_n0_iceberg (ds string, `date` string, hr 
string, hour string)
+STORED BY 'org.apache.iceberg.mr.hive.HiveIcebergStorageHandler';
+INSERT INTO srcpart_date_hour_n0_iceberg select ds as ds, ds as `date`, hr as 
hr, hr as hour from srcpart group by ds, hr;
+
+set hive.vectorized.execution.enabled=false;

Review comment:
       It turns out that we hit a vectorization issue here, so I would keep 
`set hive.vectorized.execution.enabled=false` until it is fixed. CC: @szlta:
   ```
   See ./ql/target/tmp/log/hive.log or ./itests/qtest/target/tmp/log/hive.log, 
or check ./ql/target/surefire-reports or 
./itests/qtest/target/surefire-reports/ for specific test cases logs.
    org.apache.hadoop.hive.ql.metadata.HiveException: Vertex failed, 
vertexName=Map 1, vertexId=vertex_1627387142352_0001_11_01, diagnostics=[Task 
failed, taskId=task_1627387142352_0001_11_01_000000, diagnostics=[TaskAttempt 0 
failed, info=[Error: Error while running task ( failure ) : 
attempt_1627387142352_0001_11_01_000000_0:java.lang.RuntimeException: 
org.apache.hadoop.hive.ql.metadata.HiveException: java.io.IOException: 
java.lang.NullPointerException
        at 
org.apache.hadoop.hive.ql.exec.tez.TezProcessor.initializeAndRunProcessor(TezProcessor.java:365)
        at 
org.apache.hadoop.hive.ql.exec.tez.TezProcessor.run(TezProcessor.java:277)
        at 
org.apache.tez.runtime.LogicalIOProcessorRuntimeTask.run(LogicalIOProcessorRuntimeTask.java:381)
        at 
org.apache.tez.runtime.task.TaskRunner2Callable$1.run(TaskRunner2Callable.java:82)
        at 
org.apache.tez.runtime.task.TaskRunner2Callable$1.run(TaskRunner2Callable.java:69)
        at java.security.AccessController.doPrivileged(Native Method)
        at javax.security.auth.Subject.doAs(Subject.java:422)
        at 
org.apache.hadoop.security.UserGroupInformation.doAs(UserGroupInformation.java:1682)
        at 
org.apache.tez.runtime.task.TaskRunner2Callable.callInternal(TaskRunner2Callable.java:69)
        at 
org.apache.tez.runtime.task.TaskRunner2Callable.callInternal(TaskRunner2Callable.java:39)
        at org.apache.tez.common.CallableWithNdc.call(CallableWithNdc.java:36)
        at 
com.google.common.util.concurrent.TrustedListenableFutureTask$TrustedFutureInterruptibleTask.runInterruptibly(TrustedListenableFutureTask.java:108)
        at 
com.google.common.util.concurrent.InterruptibleTask.run(InterruptibleTask.java:41)
        at 
com.google.common.util.concurrent.TrustedListenableFutureTask.run(TrustedListenableFutureTask.java:77)
        at 
java.util.concurrent.ThreadPoolExecutor.runWorker(ThreadPoolExecutor.java:1149)
        at 
java.util.concurrent.ThreadPoolExecutor$Worker.run(ThreadPoolExecutor.java:624)
        at java.lang.Thread.run(Thread.java:748)
   Caused by: org.apache.hadoop.hive.ql.metadata.HiveException: 
java.io.IOException: java.lang.NullPointerException
        at 
org.apache.hadoop.hive.ql.exec.tez.MapRecordSource.pushRecord(MapRecordSource.java:89)
        at 
org.apache.hadoop.hive.ql.exec.tez.MapRecordProcessor.run(MapRecordProcessor.java:414)
        at 
org.apache.hadoop.hive.ql.exec.tez.TezProcessor.initializeAndRunProcessor(TezProcessor.java:311)
        ... 16 more
   Caused by: java.io.IOException: java.lang.NullPointerException
        at 
org.apache.hadoop.hive.io.HiveIOExceptionHandlerChain.handleRecordReaderNextException(HiveIOExceptionHandlerChain.java:121)
        at 
org.apache.hadoop.hive.io.HiveIOExceptionHandlerUtil.handleRecordReaderNextException(HiveIOExceptionHandlerUtil.java:77)
        at 
org.apache.hadoop.hive.ql.io.HiveContextAwareRecordReader.doNext(HiveContextAwareRecordReader.java:374)
        at 
org.apache.hadoop.hive.ql.io.HiveRecordReader.doNext(HiveRecordReader.java:82)
        at 
org.apache.hadoop.hive.ql.io.HiveContextAwareRecordReader.next(HiveContextAwareRecordReader.java:119)
        at 
org.apache.hadoop.hive.ql.io.HiveContextAwareRecordReader.next(HiveContextAwareRecordReader.java:59)
        at 
org.apache.hadoop.mapred.split.TezGroupedSplitsInputFormat$TezGroupedSplitsRecordReader.next(TezGroupedSplitsInputFormat.java:145)
        at 
org.apache.tez.mapreduce.lib.MRReaderMapred.next(MRReaderMapred.java:116)
        at 
org.apache.hadoop.hive.ql.exec.tez.MapRecordSource.pushRecord(MapRecordSource.java:75)
        ... 18 more
   Caused by: java.lang.NullPointerException
        at 
org.apache.hadoop.hive.ql.exec.vector.VectorizedRowBatchCtx.addPartitionColsToBatch(VectorizedRowBatchCtx.java:595)
        at 
org.apache.iceberg.mr.hive.vector.VectorizedRowBatchIterator.advance(VectorizedRowBatchIterator.java:69)
        at 
org.apache.iceberg.mr.hive.vector.VectorizedRowBatchIterator.hasNext(VectorizedRowBatchIterator.java:81)
        at 
org.apache.iceberg.mr.mapreduce.IcebergInputFormat$IcebergRecordReader.nextKeyValue(IcebergInputFormat.java:222)
        at 
org.apache.iceberg.mr.hive.vector.HiveIcebergVectorizedRecordReader.next(HiveIcebergVectorizedRecordReader.java:48)
        at 
org.apache.iceberg.mr.hive.vector.HiveIcebergVectorizedRecordReader.next(HiveIcebergVectorizedRecordReader.java:34)
        at 
org.apache.hadoop.hive.ql.io.HiveContextAwareRecordReader.doNext(HiveContextAwareRecordReader.java:369)
        ... 24 more
   ], TaskAttempt 1 failed, info=[Error: Error while running task ( failure ) : 
attempt_1627387142352_0001_11_01_000000_1:java.lang.RuntimeException: 
org.apache.hadoop.hive.ql.metadata.HiveException: java.io.IOException: 
java.lang.NullPointerException
        at 
org.apache.hadoop.hive.ql.exec.tez.TezProcessor.initializeAndRunProcessor(TezProcessor.java:365)
        at 
org.apache.hadoop.hive.ql.exec.tez.TezProcessor.run(TezProcessor.java:277)
        at 
org.apache.tez.runtime.LogicalIOProcessorRuntimeTask.run(LogicalIOProcessorRuntimeTask.java:381)
        at 
org.apache.tez.runtime.task.TaskRunner2Callable$1.run(TaskRunner2Callable.java:82)
        at 
org.apache.tez.runtime.task.TaskRunner2Callable$1.run(TaskRunner2Callable.java:69)
        at java.security.AccessController.doPrivileged(Native Method)
        at javax.security.auth.Subject.doAs(Subject.java:422)
        at 
org.apache.hadoop.security.UserGroupInformation.doAs(UserGroupInformation.java:1682)
        at 
org.apache.tez.runtime.task.TaskRunner2Callable.callInternal(TaskRunner2Callable.java:69)
        at 
org.apache.tez.runtime.task.TaskRunner2Callable.callInternal(TaskRunner2Callable.java:39)
        at org.apache.tez.common.CallableWithNdc.call(CallableWithNdc.java:36)
        at 
com.google.common.util.concurrent.TrustedListenableFutureTask$TrustedFutureInterruptibleTask.runInterruptibly(TrustedListenableFutureTask.java:108)
        at 
com.google.common.util.concurrent.InterruptibleTask.run(InterruptibleTask.java:41)
        at 
com.google.common.util.concurrent.TrustedListenableFutureTask.run(TrustedListenableFutureTask.java:77)
        at 
java.util.concurrent.ThreadPoolExecutor.runWorker(ThreadPoolExecutor.java:1149)
        at 
java.util.concurrent.ThreadPoolExecutor$Worker.run(ThreadPoolExecutor.java:624)
        at java.lang.Thread.run(Thread.java:748)
   Caused by: org.apache.hadoop.hive.ql.metadata.HiveException: 
java.io.IOException: java.lang.NullPointerException
        at 
org.apache.hadoop.hive.ql.exec.tez.MapRecordSource.pushRecord(MapRecordSource.java:89)
        at 
org.apache.hadoop.hive.ql.exec.tez.MapRecordProcessor.run(MapRecordProcessor.java:414)
        at 
org.apache.hadoop.hive.ql.exec.tez.TezProcessor.initializeAndRunProcessor(TezProcessor.java:311)
        ... 16 more
   Caused by: java.io.IOException: java.lang.NullPointerException
        at 
org.apache.hadoop.hive.io.HiveIOExceptionHandlerChain.handleRecordReaderNextException(HiveIOExceptionHandlerChain.java:121)
        at 
org.apache.hadoop.hive.io.HiveIOExceptionHandlerUtil.handleRecordReaderNextException(HiveIOExceptionHandlerUtil.java:77)
        at 
org.apache.hadoop.hive.ql.io.HiveContextAwareRecordReader.doNext(HiveContextAwareRecordReader.java:374)
        at 
org.apache.hadoop.hive.ql.io.HiveRecordReader.doNext(HiveRecordReader.java:82)
        at 
org.apache.hadoop.hive.ql.io.HiveContextAwareRecordReader.next(HiveContextAwareRecordReader.java:119)
        at 
org.apache.hadoop.hive.ql.io.HiveContextAwareRecordReader.next(HiveContextAwareRecordReader.java:59)
        at 
org.apache.hadoop.mapred.split.TezGroupedSplitsInputFormat$TezGroupedSplitsRecordReader.next(TezGroupedSplitsInputFormat.java:145)
        at 
org.apache.tez.mapreduce.lib.MRReaderMapred.next(MRReaderMapred.java:116)
        at 
org.apache.hadoop.hive.ql.exec.tez.MapRecordSource.pushRecord(MapRecordSource.java:75)
        ... 18 more
   Caused by: java.lang.NullPointerException
        at 
org.apache.hadoop.hive.ql.exec.vector.VectorizedRowBatchCtx.addPartitionColsToBatch(VectorizedRowBatchCtx.java:595)
        at 
org.apache.iceberg.mr.hive.vector.VectorizedRowBatchIterator.advance(VectorizedRowBatchIterator.java:69)
        at 
org.apache.iceberg.mr.hive.vector.VectorizedRowBatchIterator.hasNext(VectorizedRowBatchIterator.java:81)
        at 
org.apache.iceberg.mr.mapreduce.IcebergInputFormat$IcebergRecordReader.nextKeyValue(IcebergInputFormat.java:222)
        at 
org.apache.iceberg.mr.hive.vector.HiveIcebergVectorizedRecordReader.next(HiveIcebergVectorizedRecordReader.java:48)
        at 
org.apache.iceberg.mr.hive.vector.HiveIcebergVectorizedRecordReader.next(HiveIcebergVectorizedRecordReader.java:34)
        at 
org.apache.hadoop.hive.ql.io.HiveContextAwareRecordReader.doNext(HiveContextAwareRecordReader.java:369)
        ... 24 more
   ]
   ```




-- 
This is an automated message from the Apache Git Service.
To respond to the message, please log on to GitHub and use the
URL above to go to the specific comment.

To unsubscribe, e-mail: gitbox-unsubscr...@hive.apache.org

For queries about this service, please contact Infrastructure at:
us...@infra.apache.org


Issue Time Tracking
-------------------

    Worklog Id:     (was: 628386)
    Time Spent: 50m  (was: 40m)

> Add more tests to Iceberg partition pruning
> -------------------------------------------
>
>                 Key: HIVE-25067
>                 URL: https://issues.apache.org/jira/browse/HIVE-25067
>             Project: Hive
>          Issue Type: Test
>            Reporter: Peter Vary
>            Assignee: Peter Vary
>            Priority: Major
>              Labels: pull-request-available
>          Time Spent: 50m
>  Remaining Estimate: 0h
>
> As we have qtest for Iceberg now, it would be good to add some partition 
> pruning qtest to have better coverage



--
This message was sent by Atlassian Jira
(v8.3.4#803005)

[jira] [Work logged] (HIVE-25067) Add more tests to Iceberg partition pruning

Reply via email to