[jira] [Updated] (HIVE-18105) Aggregation of an empty set doesn't pass constants to the UDAF

Zoltan Haindrich (JIRA) Tue, 11 Sep 2018 07:40:40 -0700


     [ 
https://issues.apache.org/jira/browse/HIVE-18105?page=com.atlassian.jira.plugin.system.issuetabpanels:all-tabpanel
 ]


Zoltan Haindrich updated HIVE-18105:
------------------------------------
    Description: 
the groupbyoperator's logic for firstrow passes {{null}} for all parameters.
see 
[here|https://github.com/apache/hive/blob/39d46e8af5a3794f7395060b890f94ddc84516e7/ql/src/java/org/apache/hadoop/hive/ql/exec/GroupByOperator.java#L1116].

this could obstruct {{compute_stats}} operations because it has a constant 
argument.

affected unit test: -Dtest=TestCliDriver -Dqfile=acid_insert_overwrite.q 

exception backtrace:

{code}
java.lang.Exception: java.lang.RuntimeException: 
org.apache.hadoop.hive.ql.metadata.HiveException: Hive Runtime Error while 
processing row (tag=0) 
{"key":{},"value":{"_col0":{"columntype":"String","maxlength":0,
"sumlength":0,"count":0,"countnulls":1,"bitvector":FM^@^@},"_col1":{"columntype":"Long","min":null,"max":null,"countnulls":1,"bitvector":FM^@^@}}}
        at 
org.apache.hadoop.mapred.LocalJobRunner$Job.runTasks(LocalJobRunner.java:492) 
~[hadoop-mapreduce-client-common-3.1.0.jar:?]
        at 
org.apache.hadoop.mapred.LocalJobRunner$Job.run(LocalJobRunner.java:559) 
[hadoop-mapreduce-client-common-3.1.0.jar:?]
Caused by: java.lang.RuntimeException: 
org.apache.hadoop.hive.ql.metadata.HiveException: Hive Runtime Error while 
processing row (tag=0) 
{"key":{},"value":{"_col0":{"columntype":"String","maxlength":0,"sumlength
":0,"count":0,"countnulls":1,"bitvector":FM^@^@},"_col1":{"columntype":"Long","min":null,"max":null,"countnulls":1,"bitvector":FM^@^@}}}
        at 
org.apache.hadoop.hive.ql.exec.mr.ExecReducer.reduce(ExecReducer.java:255) 
~[hive-exec-4.0.0-SNAPSHOT.jar:4.0.0-SNAPSHOT]
        at 
org.apache.hadoop.mapred.ReduceTask.runOldReducer(ReduceTask.java:445) 
~[hadoop-mapreduce-client-core-3.1.0.jar:?]
        at org.apache.hadoop.mapred.ReduceTask.run(ReduceTask.java:393) 
~[hadoop-mapreduce-client-core-3.1.0.jar:?]
        at 
org.apache.hadoop.mapred.LocalJobRunner$Job$ReduceTaskRunnable.run(LocalJobRunner.java:347)
 ~[hadoop-mapreduce-client-common-3.1.0.jar:?]
        at 
java.util.concurrent.Executors$RunnableAdapter.call(Executors.java:511) 
~[?:1.8.0_181]
        at java.util.concurrent.FutureTask.run(FutureTask.java:266) 
~[?:1.8.0_181]
        at 
java.util.concurrent.ThreadPoolExecutor.runWorker(ThreadPoolExecutor.java:1149) 
~[?:1.8.0_181]
        at 
java.util.concurrent.ThreadPoolExecutor$Worker.run(ThreadPoolExecutor.java:624) 
~[?:1.8.0_181]
        at java.lang.Thread.run(Thread.java:748) ~[?:1.8.0_181]
Caused by: org.apache.hadoop.hive.ql.metadata.HiveException: Hive Runtime Error 
while processing row (tag=0) 
{"key":{},"value":{"_col0":{"columntype":"String","maxlength":0,"sumlength":0,"count":0,"countnulls":1
,"bitvector":FM^@^@},"_col1":{"columntype":"Long","min":null,"max":null,"countnulls":1,"bitvector":FM^@^@}}}
        at 
org.apache.hadoop.hive.ql.exec.mr.ExecReducer.reduce(ExecReducer.java:243) 
~[hive-exec-4.0.0-SNAPSHOT.jar:4.0.0-SNAPSHOT]
        at 
org.apache.hadoop.mapred.ReduceTask.runOldReducer(ReduceTask.java:445) 
~[hadoop-mapreduce-client-core-3.1.0.jar:?]
        at org.apache.hadoop.mapred.ReduceTask.run(ReduceTask.java:393) 
~[hadoop-mapreduce-client-core-3.1.0.jar:?]
        at 
org.apache.hadoop.mapred.LocalJobRunner$Job$ReduceTaskRunnable.run(LocalJobRunner.java:347)
 ~[hadoop-mapreduce-client-common-3.1.0.jar:?]
        at 
java.util.concurrent.Executors$RunnableAdapter.call(Executors.java:511) 
~[?:1.8.0_181]
        at java.util.concurrent.FutureTask.run(FutureTask.java:266) 
~[?:1.8.0_181]
        at 
java.util.concurrent.ThreadPoolExecutor.runWorker(ThreadPoolExecutor.java:1149) 
~[?:1.8.0_181]
        at 
java.util.concurrent.ThreadPoolExecutor$Worker.run(ThreadPoolExecutor.java:624) 
~[?:1.8.0_181]
        at java.lang.Thread.run(Thread.java:748) ~[?:1.8.0_181]
Caused by: org.apache.hadoop.hive.ql.metadata.HiveException: 
java.lang.ClassCastException: org.apache.hadoop.hive.common.ndv.fm.FMSketch 
cannot be cast to org.apache.hadoop.hive.common.ndv.hll.HyperLogLog
        at 
org.apache.hadoop.hive.ql.exec.GroupByOperator.process(GroupByOperator.java:795)
 ~[hive-exec-4.0.0-SNAPSHOT.jar:4.0.0-SNAPSHOT]
        at 
org.apache.hadoop.hive.ql.exec.mr.ExecReducer.reduce(ExecReducer.java:234) 
~[hive-exec-4.0.0-SNAPSHOT.jar:4.0.0-SNAPSHOT]
        at 
org.apache.hadoop.mapred.ReduceTask.runOldReducer(ReduceTask.java:445) 
~[hadoop-mapreduce-client-core-3.1.0.jar:?]
        at org.apache.hadoop.mapred.ReduceTask.run(ReduceTask.java:393) 
~[hadoop-mapreduce-client-core-3.1.0.jar:?]
        at 
org.apache.hadoop.mapred.LocalJobRunner$Job$ReduceTaskRunnable.run(LocalJobRunner.java:347)
 ~[hadoop-mapreduce-client-common-3.1.0.jar:?]
        at 
java.util.concurrent.Executors$RunnableAdapter.call(Executors.java:511) 
~[?:1.8.0_181]
        at java.util.concurrent.FutureTask.run(FutureTask.java:266) 
~[?:1.8.0_181]
        at 
java.util.concurrent.ThreadPoolExecutor.runWorker(ThreadPoolExecutor.java:1149) 
~[?:1.8.0_181]
        at 
java.util.concurrent.ThreadPoolExecutor$Worker.run(ThreadPoolExecutor.java:624) 
~[?:1.8.0_181]
        at java.lang.Thread.run(Thread.java:748) ~[?:1.8.0_181]
Caused by: java.lang.ClassCastException: 
org.apache.hadoop.hive.common.ndv.fm.FMSketch cannot be cast to 
org.apache.hadoop.hive.common.ndv.hll.HyperLogLog
        at 
org.apache.hadoop.hive.common.ndv.hll.HyperLogLog.mergeEstimators(HyperLogLog.java:650)
 ~[hive-standalone-metastore-server-4.0.0-SNAPSHOT.jar:4.0.0-SNAPSHOT]
        at 
org.apache.hadoop.hive.ql.udf.generic.GenericUDAFComputeStats$GenericUDAFStringStatsEvaluator.merge(GenericUDAFComputeStats.java:964)
 ~[hive-exec-4.0.0-SNAPSHOT.jar:4.0.0-SNAPSHOT]
        at 
org.apache.hadoop.hive.ql.udf.generic.GenericUDAFEvaluator.aggregate(GenericUDAFEvaluator.java:215)
 ~[hive-exec-4.0.0-SNAPSHOT.jar:4.0.0-SNAPSHOT]
        at 
org.apache.hadoop.hive.ql.exec.GroupByOperator.updateAggregations(GroupByOperator.java:641)
 ~[hive-exec-4.0.0-SNAPSHOT.jar:4.0.0-SNAPSHOT]
        at 
org.apache.hadoop.hive.ql.exec.GroupByOperator.processAggr(GroupByOperator.java:880)
 ~[hive-exec-4.0.0-SNAPSHOT.jar:4.0.0-SNAPSHOT]
        at 
org.apache.hadoop.hive.ql.exec.GroupByOperator.processKey(GroupByOperator.java:724)
 ~[hive-exec-4.0.0-SNAPSHOT.jar:4.0.0-SNAPSHOT]
        at 
org.apache.hadoop.hive.ql.exec.GroupByOperator.process(GroupByOperator.java:790)
 ~[hive-exec-4.0.0-SNAPSHOT.jar:4.0.0-SNAPSHOT]
        at 
org.apache.hadoop.hive.ql.exec.mr.ExecReducer.reduce(ExecReducer.java:234) 
~[hive-exec-4.0.0-SNAPSHOT.jar:4.0.0-SNAPSHOT]
        at 
org.apache.hadoop.mapred.ReduceTask.runOldReducer(ReduceTask.java:445) 
~[hadoop-mapreduce-client-core-3.1.0.jar:?]
        at org.apache.hadoop.mapred.ReduceTask.run(ReduceTask.java:393) 
~[hadoop-mapreduce-client-core-3.1.0.jar:?]
        at 
org.apache.hadoop.mapred.LocalJobRunner$Job$ReduceTaskRunnable.run(LocalJobRunner.java:347)
 ~[hadoop-mapreduce-client-common-3.1.0.jar:?]
        at 
java.util.concurrent.Executors$RunnableAdapter.call(Executors.java:511) 
~[?:1.8.0_181]
        at java.util.concurrent.FutureTask.run(FutureTask.java:266) 
~[?:1.8.0_181]
        at 
java.util.concurrent.ThreadPoolExecutor.runWorker(ThreadPoolExecutor.java:1149) 
~[?:1.8.0_181]
        at 
java.util.concurrent.ThreadPoolExecutor$Worker.run(ThreadPoolExecutor.java:624) 
~[?:1.8.0_181]
        at java.lang.Thread.run(Thread.java:748) ~[?:1.8.0_181]
{code}

  was:
the groupbyoperator's logic for firstrow passes {{null}} for all parameters.
see 
[here|https://github.com/apache/hive/blob/39d46e8af5a3794f7395060b890f94ddc84516e7/ql/src/java/org/apache/hadoop/hive/ql/exec/GroupByOperator.java#L1116].

this could obstruct {{compute_stats}} operations because it has a constant 
argument.


> Aggregation of an empty set doesn't pass constants to the UDAF
> --------------------------------------------------------------
>
>                 Key: HIVE-18105
>                 URL: https://issues.apache.org/jira/browse/HIVE-18105
>             Project: Hive
>          Issue Type: Sub-task
>            Reporter: Zoltan Haindrich
>            Assignee: Zoltan Haindrich
>            Priority: Major
>         Attachments: HIVE-18105.01.patch, HIVE-18105.02.patch, 
> HIVE-18105.02.patch
>
>
> the groupbyoperator's logic for firstrow passes {{null}} for all parameters.
> see 
> [here|https://github.com/apache/hive/blob/39d46e8af5a3794f7395060b890f94ddc84516e7/ql/src/java/org/apache/hadoop/hive/ql/exec/GroupByOperator.java#L1116].
> this could obstruct {{compute_stats}} operations because it has a constant 
> argument.
> affected unit test: -Dtest=TestCliDriver -Dqfile=acid_insert_overwrite.q 
> exception backtrace:
> {code}
> java.lang.Exception: java.lang.RuntimeException: 
> org.apache.hadoop.hive.ql.metadata.HiveException: Hive Runtime Error while 
> processing row (tag=0) 
> {"key":{},"value":{"_col0":{"columntype":"String","maxlength":0,
> "sumlength":0,"count":0,"countnulls":1,"bitvector":FM^@^@},"_col1":{"columntype":"Long","min":null,"max":null,"countnulls":1,"bitvector":FM^@^@}}}
>         at 
> org.apache.hadoop.mapred.LocalJobRunner$Job.runTasks(LocalJobRunner.java:492) 
> ~[hadoop-mapreduce-client-common-3.1.0.jar:?]
>         at 
> org.apache.hadoop.mapred.LocalJobRunner$Job.run(LocalJobRunner.java:559) 
> [hadoop-mapreduce-client-common-3.1.0.jar:?]
> Caused by: java.lang.RuntimeException: 
> org.apache.hadoop.hive.ql.metadata.HiveException: Hive Runtime Error while 
> processing row (tag=0) 
> {"key":{},"value":{"_col0":{"columntype":"String","maxlength":0,"sumlength
> ":0,"count":0,"countnulls":1,"bitvector":FM^@^@},"_col1":{"columntype":"Long","min":null,"max":null,"countnulls":1,"bitvector":FM^@^@}}}
>         at 
> org.apache.hadoop.hive.ql.exec.mr.ExecReducer.reduce(ExecReducer.java:255) 
> ~[hive-exec-4.0.0-SNAPSHOT.jar:4.0.0-SNAPSHOT]
>         at 
> org.apache.hadoop.mapred.ReduceTask.runOldReducer(ReduceTask.java:445) 
> ~[hadoop-mapreduce-client-core-3.1.0.jar:?]
>         at org.apache.hadoop.mapred.ReduceTask.run(ReduceTask.java:393) 
> ~[hadoop-mapreduce-client-core-3.1.0.jar:?]
>         at 
> org.apache.hadoop.mapred.LocalJobRunner$Job$ReduceTaskRunnable.run(LocalJobRunner.java:347)
>  ~[hadoop-mapreduce-client-common-3.1.0.jar:?]
>         at 
> java.util.concurrent.Executors$RunnableAdapter.call(Executors.java:511) 
> ~[?:1.8.0_181]
>         at java.util.concurrent.FutureTask.run(FutureTask.java:266) 
> ~[?:1.8.0_181]
>         at 
> java.util.concurrent.ThreadPoolExecutor.runWorker(ThreadPoolExecutor.java:1149)
>  ~[?:1.8.0_181]
>         at 
> java.util.concurrent.ThreadPoolExecutor$Worker.run(ThreadPoolExecutor.java:624)
>  ~[?:1.8.0_181]
>         at java.lang.Thread.run(Thread.java:748) ~[?:1.8.0_181]
> Caused by: org.apache.hadoop.hive.ql.metadata.HiveException: Hive Runtime 
> Error while processing row (tag=0) 
> {"key":{},"value":{"_col0":{"columntype":"String","maxlength":0,"sumlength":0,"count":0,"countnulls":1
> ,"bitvector":FM^@^@},"_col1":{"columntype":"Long","min":null,"max":null,"countnulls":1,"bitvector":FM^@^@}}}
>         at 
> org.apache.hadoop.hive.ql.exec.mr.ExecReducer.reduce(ExecReducer.java:243) 
> ~[hive-exec-4.0.0-SNAPSHOT.jar:4.0.0-SNAPSHOT]
>         at 
> org.apache.hadoop.mapred.ReduceTask.runOldReducer(ReduceTask.java:445) 
> ~[hadoop-mapreduce-client-core-3.1.0.jar:?]
>         at org.apache.hadoop.mapred.ReduceTask.run(ReduceTask.java:393) 
> ~[hadoop-mapreduce-client-core-3.1.0.jar:?]
>         at 
> org.apache.hadoop.mapred.LocalJobRunner$Job$ReduceTaskRunnable.run(LocalJobRunner.java:347)
>  ~[hadoop-mapreduce-client-common-3.1.0.jar:?]
>         at 
> java.util.concurrent.Executors$RunnableAdapter.call(Executors.java:511) 
> ~[?:1.8.0_181]
>         at java.util.concurrent.FutureTask.run(FutureTask.java:266) 
> ~[?:1.8.0_181]
>         at 
> java.util.concurrent.ThreadPoolExecutor.runWorker(ThreadPoolExecutor.java:1149)
>  ~[?:1.8.0_181]
>         at 
> java.util.concurrent.ThreadPoolExecutor$Worker.run(ThreadPoolExecutor.java:624)
>  ~[?:1.8.0_181]
>         at java.lang.Thread.run(Thread.java:748) ~[?:1.8.0_181]
> Caused by: org.apache.hadoop.hive.ql.metadata.HiveException: 
> java.lang.ClassCastException: org.apache.hadoop.hive.common.ndv.fm.FMSketch 
> cannot be cast to org.apache.hadoop.hive.common.ndv.hll.HyperLogLog
>         at 
> org.apache.hadoop.hive.ql.exec.GroupByOperator.process(GroupByOperator.java:795)
>  ~[hive-exec-4.0.0-SNAPSHOT.jar:4.0.0-SNAPSHOT]
>         at 
> org.apache.hadoop.hive.ql.exec.mr.ExecReducer.reduce(ExecReducer.java:234) 
> ~[hive-exec-4.0.0-SNAPSHOT.jar:4.0.0-SNAPSHOT]
>         at 
> org.apache.hadoop.mapred.ReduceTask.runOldReducer(ReduceTask.java:445) 
> ~[hadoop-mapreduce-client-core-3.1.0.jar:?]
>         at org.apache.hadoop.mapred.ReduceTask.run(ReduceTask.java:393) 
> ~[hadoop-mapreduce-client-core-3.1.0.jar:?]
>         at 
> org.apache.hadoop.mapred.LocalJobRunner$Job$ReduceTaskRunnable.run(LocalJobRunner.java:347)
>  ~[hadoop-mapreduce-client-common-3.1.0.jar:?]
>         at 
> java.util.concurrent.Executors$RunnableAdapter.call(Executors.java:511) 
> ~[?:1.8.0_181]
>         at java.util.concurrent.FutureTask.run(FutureTask.java:266) 
> ~[?:1.8.0_181]
>         at 
> java.util.concurrent.ThreadPoolExecutor.runWorker(ThreadPoolExecutor.java:1149)
>  ~[?:1.8.0_181]
>         at 
> java.util.concurrent.ThreadPoolExecutor$Worker.run(ThreadPoolExecutor.java:624)
>  ~[?:1.8.0_181]
>         at java.lang.Thread.run(Thread.java:748) ~[?:1.8.0_181]
> Caused by: java.lang.ClassCastException: 
> org.apache.hadoop.hive.common.ndv.fm.FMSketch cannot be cast to 
> org.apache.hadoop.hive.common.ndv.hll.HyperLogLog
>         at 
> org.apache.hadoop.hive.common.ndv.hll.HyperLogLog.mergeEstimators(HyperLogLog.java:650)
>  ~[hive-standalone-metastore-server-4.0.0-SNAPSHOT.jar:4.0.0-SNAPSHOT]
>         at 
> org.apache.hadoop.hive.ql.udf.generic.GenericUDAFComputeStats$GenericUDAFStringStatsEvaluator.merge(GenericUDAFComputeStats.java:964)
>  ~[hive-exec-4.0.0-SNAPSHOT.jar:4.0.0-SNAPSHOT]
>         at 
> org.apache.hadoop.hive.ql.udf.generic.GenericUDAFEvaluator.aggregate(GenericUDAFEvaluator.java:215)
>  ~[hive-exec-4.0.0-SNAPSHOT.jar:4.0.0-SNAPSHOT]
>         at 
> org.apache.hadoop.hive.ql.exec.GroupByOperator.updateAggregations(GroupByOperator.java:641)
>  ~[hive-exec-4.0.0-SNAPSHOT.jar:4.0.0-SNAPSHOT]
>         at 
> org.apache.hadoop.hive.ql.exec.GroupByOperator.processAggr(GroupByOperator.java:880)
>  ~[hive-exec-4.0.0-SNAPSHOT.jar:4.0.0-SNAPSHOT]
>         at 
> org.apache.hadoop.hive.ql.exec.GroupByOperator.processKey(GroupByOperator.java:724)
>  ~[hive-exec-4.0.0-SNAPSHOT.jar:4.0.0-SNAPSHOT]
>         at 
> org.apache.hadoop.hive.ql.exec.GroupByOperator.process(GroupByOperator.java:790)
>  ~[hive-exec-4.0.0-SNAPSHOT.jar:4.0.0-SNAPSHOT]
>         at 
> org.apache.hadoop.hive.ql.exec.mr.ExecReducer.reduce(ExecReducer.java:234) 
> ~[hive-exec-4.0.0-SNAPSHOT.jar:4.0.0-SNAPSHOT]
>         at 
> org.apache.hadoop.mapred.ReduceTask.runOldReducer(ReduceTask.java:445) 
> ~[hadoop-mapreduce-client-core-3.1.0.jar:?]
>         at org.apache.hadoop.mapred.ReduceTask.run(ReduceTask.java:393) 
> ~[hadoop-mapreduce-client-core-3.1.0.jar:?]
>         at 
> org.apache.hadoop.mapred.LocalJobRunner$Job$ReduceTaskRunnable.run(LocalJobRunner.java:347)
>  ~[hadoop-mapreduce-client-common-3.1.0.jar:?]
>         at 
> java.util.concurrent.Executors$RunnableAdapter.call(Executors.java:511) 
> ~[?:1.8.0_181]
>         at java.util.concurrent.FutureTask.run(FutureTask.java:266) 
> ~[?:1.8.0_181]
>         at 
> java.util.concurrent.ThreadPoolExecutor.runWorker(ThreadPoolExecutor.java:1149)
>  ~[?:1.8.0_181]
>         at 
> java.util.concurrent.ThreadPoolExecutor$Worker.run(ThreadPoolExecutor.java:624)
>  ~[?:1.8.0_181]
>         at java.lang.Thread.run(Thread.java:748) ~[?:1.8.0_181]
> {code}



--
This message was sent by Atlassian JIRA
(v7.6.3#76005)

[jira] [Updated] (HIVE-18105) Aggregation of an empty set doesn't pass constants to the UDAF

Reply via email to