[ https://issues.apache.org/jira/browse/HIVE-28082?page=com.atlassian.jira.plugin.system.issuetabpanels:comment-tabpanel&focusedCommentId=17837244#comment-17837244 ]
Krisztian Kasa commented on HIVE-28082: --------------------------------------- Some more details about the issue: {code} explain cbo select avg('text'); select avg('text'); {code} {{avg('text')}} is converted to {{sum('text')/count('text')}} {code} HiveProject(_o__c0=[/($0, $1)]) HiveAggregate(group=[{}], agg#0=[sum($0)], agg#1=[count()]) HiveProject($f0=[_UTF-16LE'text':VARCHAR(2147483647) CHARACTER SET "UTF-16LE"]) HiveTableScan(table=[[_dummy_database, _dummy_table]], table:alias=[_dummy_table]) {code} and {{sum('text')}} throws an exception at execution time and logged as a warning: {code} 2024-04-15T04:47:57,568 WARN [TezTR-671313_1_1_0_0_0] generic.GenericUDAFSum: GenericUDAFSumDouble java.lang.NumberFormatException: For input string: "text" at sun.misc.FloatingDecimal.readJavaFormatString(FloatingDecimal.java:2043) at sun.misc.FloatingDecimal.parseDouble(FloatingDecimal.java:110) at java.lang.Double.parseDouble(Double.java:538) at org.apache.hadoop.hive.serde2.objectinspector.primitive.PrimitiveObjectInspectorUtils.getDouble(PrimitiveObjectInspectorUtils.java:867) at org.apache.hadoop.hive.ql.udf.generic.GenericUDAFSum$GenericUDAFSumDouble.iterate(GenericUDAFSum.java:444) at org.apache.hadoop.hive.ql.udf.generic.GenericUDAFEvaluator.aggregate(GenericUDAFEvaluator.java:215) at org.apache.hadoop.hive.ql.exec.GroupByOperator.updateAggregations(GroupByOperator.java:620) at org.apache.hadoop.hive.ql.exec.GroupByOperator.processHashAggr(GroupByOperator.java:792) at org.apache.hadoop.hive.ql.exec.GroupByOperator.processKey(GroupByOperator.java:701) at org.apache.hadoop.hive.ql.exec.GroupByOperator.process(GroupByOperator.java:766) at org.apache.hadoop.hive.ql.exec.Operator.forward(Operator.java:888) at org.apache.hadoop.hive.ql.exec.SelectOperator.process(SelectOperator.java:94) at org.apache.hadoop.hive.ql.exec.Operator.forward(Operator.java:888) at org.apache.hadoop.hive.ql.exec.TableScanOperator.process(TableScanOperator.java:173) at org.apache.hadoop.hive.ql.exec.MapOperator$MapOpCtx.forward(MapOperator.java:155) at org.apache.hadoop.hive.ql.exec.MapOperator.process(MapOperator.java:555) at org.apache.hadoop.hive.ql.exec.tez.MapRecordSource.processRow(MapRecordSource.java:101) at org.apache.hadoop.hive.ql.exec.tez.MapRecordSource.pushRecord(MapRecordSource.java:83) at org.apache.hadoop.hive.ql.exec.tez.MapRecordProcessor.run(MapRecordProcessor.java:414) at org.apache.hadoop.hive.ql.exec.tez.TezProcessor.initializeAndRunProcessor(TezProcessor.java:293) at org.apache.hadoop.hive.ql.exec.tez.TezProcessor.run(TezProcessor.java:276) at org.apache.tez.runtime.LogicalIOProcessorRuntimeTask.run(LogicalIOProcessorRuntimeTask.java:381) at org.apache.tez.runtime.task.TaskRunner2Callable$1.run(TaskRunner2Callable.java:82) at org.apache.tez.runtime.task.TaskRunner2Callable$1.run(TaskRunner2Callable.java:69) at java.security.AccessController.doPrivileged(Native Method) at javax.security.auth.Subject.doAs(Subject.java:422) at org.apache.hadoop.security.UserGroupInformation.doAs(UserGroupInformation.java:1899) at org.apache.tez.runtime.task.TaskRunner2Callable.callInternal(TaskRunner2Callable.java:69) at org.apache.tez.runtime.task.TaskRunner2Callable.callInternal(TaskRunner2Callable.java:39) at org.apache.tez.common.CallableWithNdc.call(CallableWithNdc.java:36) at org.apache.hadoop.hive.llap.daemon.impl.StatsRecordingThreadPool$WrappedCallable.call(StatsRecordingThreadPool.java:118) at java.util.concurrent.FutureTask.run(FutureTask.java:266) at java.util.concurrent.ThreadPoolExecutor.runWorker(ThreadPoolExecutor.java:1149) at java.util.concurrent.ThreadPoolExecutor$Worker.run(ThreadPoolExecutor.java:624) at java.lang.Thread.run(Thread.java:748) 2024-04-15T04:47:57,568 WARN [TezTR-671313_1_1_0_0_0] generic.GenericUDAFSum: GenericUDAFSumDouble ignoring similar exceptions. {code} Similar when CBO is turned off {code} set hive.cbo.enable=false; select avg('text'); {code} {code} 024-04-15T04:55:29,444 WARN [TezTR-126305_1_1_0_0_0] generic.GenericUDAFAverage: Ignoring similar exceptions java.lang.NumberFormatException: For input string: "text" at sun.misc.FloatingDecimal.readJavaFormatString(FloatingDecimal.java:2043) ~[?:1.8.0_301] at sun.misc.FloatingDecimal.parseDouble(FloatingDecimal.java:110) ~[?:1.8.0_301] at java.lang.Double.parseDouble(Double.java:538) ~[?:1.8.0_301] at org.apache.hadoop.hive.serde2.objectinspector.primitive.PrimitiveObjectInspectorUtils.getDouble(PrimitiveObjectInspectorUtils.java:867) ~[hive-serde-4.1.0-SNAPSHOT.jar:4.1.0-SNAPSHOT] at org.apache.hadoop.hive.ql.udf.generic.GenericUDAFAverage$GenericUDAFAverageEvaluatorDouble.doIterate(GenericUDAFAverage.java:148) ~[hive-exec-4.1.0-SNAPSHOT.jar:4.1.0-SNAPSHOT] at org.apache.hadoop.hive.ql.udf.generic.GenericUDAFAverage$AbstractGenericUDAFAverageEvaluator.iterate(GenericUDAFAverage.java:550) ~[hive-exec-4.1.0-SNAPSHOT.jar:4.1.0-SNAPSHOT] at org.apache.hadoop.hive.ql.udf.generic.GenericUDAFEvaluator.aggregate(GenericUDAFEvaluator.java:215) ~[hive-exec-4.1.0-SNAPSHOT.jar:4.1.0-SNAPSHOT] at org.apache.hadoop.hive.ql.exec.GroupByOperator.updateAggregations(GroupByOperator.java:620) ~[hive-exec-4.1.0-SNAPSHOT.jar:4.1.0-SNAPSHOT] at org.apache.hadoop.hive.ql.exec.GroupByOperator.processHashAggr(GroupByOperator.java:792) ~[hive-exec-4.1.0-SNAPSHOT.jar:4.1.0-SNAPSHOT] at org.apache.hadoop.hive.ql.exec.GroupByOperator.processKey(GroupByOperator.java:701) ~[hive-exec-4.1.0-SNAPSHOT.jar:4.1.0-SNAPSHOT] at org.apache.hadoop.hive.ql.exec.GroupByOperator.process(GroupByOperator.java:766) ~[hive-exec-4.1.0-SNAPSHOT.jar:4.1.0-SNAPSHOT] at org.apache.hadoop.hive.ql.exec.Operator.forward(Operator.java:888) ~[hive-exec-4.1.0-SNAPSHOT.jar:4.1.0-SNAPSHOT] at org.apache.hadoop.hive.ql.exec.SelectOperator.process(SelectOperator.java:94) ~[hive-exec-4.1.0-SNAPSHOT.jar:4.1.0-SNAPSHOT] at org.apache.hadoop.hive.ql.exec.Operator.forward(Operator.java:888) ~[hive-exec-4.1.0-SNAPSHOT.jar:4.1.0-SNAPSHOT] at org.apache.hadoop.hive.ql.exec.TableScanOperator.process(TableScanOperator.java:173) ~[hive-exec-4.1.0-SNAPSHOT.jar:4.1.0-SNAPSHOT] at org.apache.hadoop.hive.ql.exec.MapOperator$MapOpCtx.forward(MapOperator.java:155) ~[hive-exec-4.1.0-SNAPSHOT.jar:4.1.0-SNAPSHOT] at org.apache.hadoop.hive.ql.exec.MapOperator.process(MapOperator.java:555) ~[hive-exec-4.1.0-SNAPSHOT.jar:4.1.0-SNAPSHOT] at org.apache.hadoop.hive.ql.exec.tez.MapRecordSource.processRow(MapRecordSource.java:101) ~[hive-exec-4.1.0-SNAPSHOT.jar:4.1.0-SNAPSHOT] at org.apache.hadoop.hive.ql.exec.tez.MapRecordSource.pushRecord(MapRecordSource.java:83) ~[hive-exec-4.1.0-SNAPSHOT.jar:4.1.0-SNAPSHOT] at org.apache.hadoop.hive.ql.exec.tez.MapRecordProcessor.run(MapRecordProcessor.java:414) ~[hive-exec-4.1.0-SNAPSHOT.jar:4.1.0-SNAPSHOT] at org.apache.hadoop.hive.ql.exec.tez.TezProcessor.initializeAndRunProcessor(TezProcessor.java:293) ~[hive-exec-4.1.0-SNAPSHOT.jar:4.1.0-SNAPSHOT] at org.apache.hadoop.hive.ql.exec.tez.TezProcessor.run(TezProcessor.java:276) ~[hive-exec-4.1.0-SNAPSHOT.jar:4.1.0-SNAPSHOT] at org.apache.tez.runtime.LogicalIOProcessorRuntimeTask.run(LogicalIOProcessorRuntimeTask.java:381) ~[tez-runtime-internals-0.10.3.jar:0.10.3] at org.apache.tez.runtime.task.TaskRunner2Callable$1.run(TaskRunner2Callable.java:82) ~[tez-runtime-internals-0.10.3.jar:0.10.3] at org.apache.tez.runtime.task.TaskRunner2Callable$1.run(TaskRunner2Callable.java:69) ~[tez-runtime-internals-0.10.3.jar:0.10.3] at java.security.AccessController.doPrivileged(Native Method) ~[?:1.8.0_301] at javax.security.auth.Subject.doAs(Subject.java:422) ~[?:1.8.0_301] at org.apache.hadoop.security.UserGroupInformation.doAs(UserGroupInformation.java:1899) ~[hadoop-common-3.3.6.jar:?] at org.apache.tez.runtime.task.TaskRunner2Callable.callInternal(TaskRunner2Callable.java:69) ~[tez-runtime-internals-0.10.3.jar:0.10.3] at org.apache.tez.runtime.task.TaskRunner2Callable.callInternal(TaskRunner2Callable.java:39) ~[tez-runtime-internals-0.10.3.jar:0.10.3] at org.apache.tez.common.CallableWithNdc.call(CallableWithNdc.java:36) ~[tez-common-0.10.3.jar:0.10.3] at org.apache.hadoop.hive.llap.daemon.impl.StatsRecordingThreadPool$WrappedCallable.call(StatsRecordingThreadPool.java:118) ~[hive-llap-server-4.1.0-SNAPSHOT.jar:4.1.0-SNAPSHOT] at java.util.concurrent.FutureTask.run(FutureTask.java:266) ~[?:1.8.0_301] at java.util.concurrent.ThreadPoolExecutor.runWorker(ThreadPoolExecutor.java:1149) ~[?:1.8.0_301] at java.util.concurrent.ThreadPoolExecutor$Worker.run(ThreadPoolExecutor.java:624) ~[?:1.8.0_301] at java.lang.Thread.run(Thread.java:748) ~[?:1.8.0_301] {code} Unfortunately Hive doesn't stop the execution and prints the error message > HiveAggregateReduceFunctionsRule could generate an inconsistent result > ---------------------------------------------------------------------- > > Key: HIVE-28082 > URL: https://issues.apache.org/jira/browse/HIVE-28082 > Project: Hive > Issue Type: Bug > Components: CBO > Affects Versions: 4.0.0-beta-1 > Reporter: Shohei Okumiya > Assignee: Shohei Okumiya > Priority: Major > > HiveAggregateReduceFunctionsRule translates AVG, STDDEV_POP, STDDEV_SAMP, > VAR_POP, and VAR_SAMP. Those UDFs accept string types and try to decode them > as floating point values. It is possible that undecodable values exist. > We found that it could cause inconsistent behaviors with or without CBO. > {code:java} > 0: jdbc:hive2://hive-hiveserver2:10000/defaul> SELECT AVG('text'); > ... > +------+ > | _c0 | > +------+ > | 0.0 | > +------+ > 1 row selected (18.229 seconds) > 0: jdbc:hive2://hive-hiveserver2:10000/defaul> set hive.cbo.enable=false; > No rows affected (0.013 seconds) > 0: jdbc:hive2://hive-hiveserver2:10000/defaul> SELECT AVG('text'); > ... > +-------+ > | _c0 | > +-------+ > | NULL | > +-------+ {code} -- This message was sent by Atlassian Jira (v8.20.10#820010)