[ https://issues.apache.org/jira/browse/HIVE-23509?page=com.atlassian.jira.plugin.system.issuetabpanels:comment-tabpanel&focusedCommentId=17149128#comment-17149128 ]
Zoltan Haindrich commented on HIVE-23509: ----------------------------------------- okay; but I think if someone still uses it - or want's to backport it to some older version might make sense to have it +1 > MapJoin AssertionError: Capacity must be power of 2 > --------------------------------------------------- > > Key: HIVE-23509 > URL: https://issues.apache.org/jira/browse/HIVE-23509 > Project: Hive > Issue Type: Bug > Environment: Hive-2.3.6 > Reporter: Shashank Pedamallu > Assignee: Shashank Pedamallu > Priority: Major > Labels: pull-request-available > Time Spent: 10m > Remaining Estimate: 0h > > Observed AssertionError errors in Hive query when rowCount for join is issued > as (2^x)+(2^(x+1)). > Following is the stacktrace: > {noformat} > [2020-05-11 05:43:12,135] {base_task_runner.py:95} INFO - Subtask: ERROR : > Vertex failed, vertexName=Map 4, vertexId=vertex_1588729523139_51702_1_06, > diagnostics=[Task failed, taskId=task_1588729523139_51702_1_06_001286, > diagnostics=[TaskAttempt 0 failed, info=[Error: Error while running task ( > failure ) : > attempt_1588729523139_51702_1_06_001286_0:java.lang.RuntimeException: > java.lang.AssertionError: Capacity must be a power of two [2020-05-11 > 05:43:12,136] {base_task_runner.py:95} INFO - Subtask: at > org.apache.hadoop.hive.ql.exec.tez.TezProcessor.initializeAndRunProcessor(TezProcessor.java:211) > [2020-05-11 05:43:12,136] {base_task_runner.py:95} INFO - Subtask: at > org.apache.hadoop.hive.ql.exec.tez.TezProcessor.run(TezProcessor.java:168) > [2020-05-11 05:43:12,136] {base_task_runner.py:95} INFO - Subtask: at > org.apache.tez.runtime.LogicalIOProcessorRuntimeTask.run(LogicalIOProcessorRuntimeTask.java:374) > [2020-05-11 05:43:12,136] {base_task_runner.py:95} INFO - Subtask: at > org.apache.tez.runtime.task.TaskRunner2Callable$1.run(TaskRunner2Callable.java:73) > [2020-05-11 05:43:12,136] {base_task_runner.py:95} INFO - Subtask: at > org.apache.tez.runtime.task.TaskRunner2Callable$1.run(TaskRunner2Callable.java:61) > [2020-05-11 05:43:12,136] {base_task_runner.py:95} INFO - Subtask: at > java.security.AccessController.doPrivileged(Native Method) [2020-05-11 > 05:43:12,136] {base_task_runner.py:95} INFO - Subtask: at > javax.security.auth.Subject.doAs(Subject.java:422) [2020-05-11 05:43:12,136] > {base_task_runner.py:95} INFO - Subtask: at > org.apache.hadoop.security.UserGroupInformation.doAs(UserGroupInformation.java:1893) > [2020-05-11 05:43:12,136] {base_task_runner.py:95} INFO - Subtask: at > org.apache.tez.runtime.task.TaskRunner2Callable.callInternal(TaskRunner2Callable.java:61) > [2020-05-11 05:43:12,136] {base_task_runner.py:95} INFO - Subtask: at > org.apache.tez.runtime.task.TaskRunner2Callable.callInternal(TaskRunner2Callable.java:37) > [2020-05-11 05:43:12,136] {base_task_runner.py:95} INFO - Subtask: at > org.apache.tez.common.CallableWithNdc.call(CallableWithNdc.java:36) > [2020-05-11 05:43:12,136] {base_task_runner.py:95} INFO - Subtask: at > java.util.concurrent.FutureTask.run(FutureTask.java:266) [2020-05-11 > 05:43:12,136] {base_task_runner.py:95} INFO - Subtask: at > java.util.concurrent.ThreadPoolExecutor.runWorker(ThreadPoolExecutor.java:1149) > [2020-05-11 05:43:12,136] {base_task_runner.py:95} INFO - Subtask: at > java.util.concurrent.ThreadPoolExecutor$Worker.run(ThreadPoolExecutor.java:624) > [2020-05-11 05:43:12,137] {base_task_runner.py:95} INFO - Subtask: at > java.lang.Thread.run(Thread.java:748) [2020-05-11 05:43:12,137] > {base_task_runner.py:95} INFO - Subtask: Caused by: java.lang.AssertionError: > Capacity must be a power of two [2020-05-11 05:43:12,137] > {base_task_runner.py:95} INFO - Subtask: at > org.apache.hadoop.hive.ql.exec.persistence.BytesBytesMultiHashMap.validateCapacity(BytesBytesMultiHashMap.java:552) > [2020-05-11 05:43:12,137] {base_task_runner.py:95} INFO - Subtask: at > org.apache.hadoop.hive.ql.exec.persistence.BytesBytesMultiHashMap.expandAndRehashImpl(BytesBytesMultiHashMap.java:731) > [2020-05-11 05:43:12,137] {base_task_runner.py:95} INFO - Subtask: at > org.apache.hadoop.hive.ql.exec.persistence.BytesBytesMultiHashMap.expandAndRehashToTarget(BytesBytesMultiHashMap.java:545) > [2020-05-11 05:43:12,137] {base_task_runner.py:95} INFO - Subtask: at > org.apache.hadoop.hive.ql.exec.persistence.HybridHashTableContainer$HashPartition.getHashMapFromDisk(HybridHashTableContainer.java:183) > [2020-05-11 05:43:12,137] {base_task_runner.py:95} INFO - Subtask: at > org.apache.hadoop.hive.ql.exec.MapJoinOperator.reloadHashTable(MapJoinOperator.java:641) > [2020-05-11 05:43:12,137] {base_task_runner.py:95} INFO - Subtask: at > org.apache.hadoop.hive.ql.exec.MapJoinOperator.continueProcess(MapJoinOperator.java:603) > [2020-05-11 05:43:12,137] {base_task_runner.py:95} INFO - Subtask: at > org.apache.hadoop.hive.ql.exec.MapJoinOperator.closeOp(MapJoinOperator.java:539) > [2020-05-11 05:43:12,137] {base_task_runner.py:95} INFO - Subtask: at > org.apache.hadoop.hive.ql.exec.Operator.close(Operator.java:697) [2020-05-11 > 05:43:12,137] {base_task_runner.py:95} INFO - Subtask: at > org.apache.hadoop.hive.ql.exec.Operator.close(Operator.java:711) [2020-05-11 > 05:43:12,137] {base_task_runner.py:95} INFO - Subtask: at > org.apache.hadoop.hive.ql.exec.tez.MapRecordProcessor.close(MapRecordProcessor.java:477) > [2020-05-11 05:43:12,137] {base_task_runner.py:95} INFO - Subtask: at > org.apache.hadoop.hive.ql.exec.tez.TezProcessor.initializeAndRunProcessor(TezProcessor.java:199) > [2020-05-11 05:43:12,137] {base_task_runner.py:95} INFO - Subtask: ... 14 > more [2020-05-11 05:43:12,138] {base_task_runner.py:95} INFO - Subtask: ], > TaskAttempt 1 failed, info=[Error: Error while running task ( failure ) : > attempt_1588729523139_51702_1_06_001286_1:java.lang.RuntimeException: > java.lang.AssertionError: Capacity must be a power of two [2020-05-11 > 05:43:12,138] {base_task_runner.py:95} INFO - Subtask: at > org.apache.hadoop.hive.ql.exec.tez.TezProcessor.initializeAndRunProcessor(TezProcessor.java:211) > [2020-05-11 05:43:12,138] {base_task_runner.py:95} INFO - Subtask: at > org.apache.hadoop.hive.ql.exec.tez.TezProcessor.run(TezProcessor.java:168) > [2020-05-11 05:43:12,138] {base_task_runner.py:95} INFO - Subtask: at > org.apache.tez.runtime.LogicalIOProcessorRuntimeTask.run(LogicalIOProcessorRuntimeTask.java:374) > [2020-05-11 05:43:12,138] {base_task_runner.py:95} INFO - Subtask: at > org.apache.tez.runtime.task.TaskRunner2Callable$1.run(TaskRunner2Callable.java:73) > [2020-05-11 05:43:12,138] {base_task_runner.py:95} INFO - Subtask: at > org.apache.tez.runtime.task.TaskRunner2Callable$1.run(TaskRunner2Callable.java:61) > [2020-05-11 05:43:12,138] {base_task_runner.py:95} INFO - Subtask: at > java.security.AccessController.doPrivileged(Native Method) [2020-05-11 > 05:43:12,138] {base_task_runner.py:95} INFO - Subtask: at > javax.security.auth.Subject.doAs(Subject.java:422) [2020-05-11 05:43:12,138] > {base_task_runner.py:95} INFO - Subtask: at > org.apache.hadoop.security.UserGroupInformation.doAs(UserGroupInformation.java:1893) > [2020-05-11 05:43:12,138] {base_task_runner.py:95} INFO - Subtask: at > org.apache.tez.runtime.task.TaskRunner2Callable.callInternal(TaskRunner2Callable.java:61) > [2020-05-11 05:43:12,138] {base_task_runner.py:95} INFO - Subtask: at > org.apache.tez.runtime.task.TaskRunner2Callable.callInternal(TaskRunner2Callable.java:37) > [2020-05-11 05:43:12,138] {base_task_runner.py:95} INFO - Subtask: at > org.apache.tez.common.CallableWithNdc.call(CallableWithNdc.java:36) > [2020-05-11 05:43:12,138] {base_task_runner.py:95} INFO - Subtask: at > java.util.concurrent.FutureTask.run(FutureTask.java:266) [2020-05-11 > 05:43:12,138] {base_task_runner.py:95} INFO - Subtask: at > java.util.concurrent.ThreadPoolExecutor.runWorker(ThreadPoolExecutor.java:1149) > [2020-05-11 05:43:12,139] {base_task_runner.py:95} INFO - Subtask: at > java.util.concurrent.ThreadPoolExecutor$Worker.run(ThreadPoolExecutor.java:624) > [2020-05-11 05:43:12,139] {base_task_runner.py:95} INFO - Subtask: at > java.lang.Thread.run(Thread.java:748) [2020-05-11 05:43:12,139] > {base_task_runner.py:95} INFO - Subtask: Caused by: java.lang.AssertionError: > Capacity must be a power of two [2020-05-11 05:43:12,139] > {base_task_runner.py:95} INFO - Subtask: at > org.apache.hadoop.hive.ql.exec.persistence.BytesBytesMultiHashMap.validateCapacity(BytesBytesMultiHashMap.java:552) > [2020-05-11 05:43:12,139] {base_task_runner.py:95} INFO - Subtask: at > org.apache.hadoop.hive.ql.exec.persistence.BytesBytesMultiHashMap.expandAndRehashImpl(BytesBytesMultiHashMap.java:731) > [2020-05-11 05:43:12,139] {base_task_runner.py:95} INFO - Subtask: at > org.apache.hadoop.hive.ql.exec.persistence.BytesBytesMultiHashMap.expandAndRehashToTarget(BytesBytesMultiHashMap.java:545) > [2020-05-11 05:43:12,139] {base_task_runner.py:95} INFO - Subtask: at > org.apache.hadoop.hive.ql.exec.persistence.HybridHashTableContainer$HashPartition.getHashMapFromDisk(HybridHashTableContainer.java:183) > [2020-05-11 05:43:12,139] {base_task_runner.py:95} INFO - Subtask: at > org.apache.hadoop.hive.ql.exec.MapJoinOperator.reloadHashTable(MapJoinOperator.java:641) > [2020-05-11 05:43:12,139] {base_task_runner.py:95} INFO - Subtask: at > org.apache.hadoop.hive.ql.exec.MapJoinOperator.continueProcess(MapJoinOperator.java:603) > [2020-05-11 05:43:12,139] {base_task_runner.py:95} INFO - Subtask: at > org.apache.hadoop.hive.ql.exec.MapJoinOperator.closeOp(MapJoinOperator.java:539) > [2020-05-11 05:43:12,139] {base_task_runner.py:95} INFO - Subtask: at > org.apache.hadoop.hive.ql.exec.Operator.close(Operator.java:697) [2020-05-11 > 05:43:12,139] {base_task_runner.py:95} INFO - Subtask: at > org.apache.hadoop.hive.ql.exec.Operator.close(Operator.java:711) [2020-05-11 > 05:43:12,139] {base_task_runner.py:95} INFO - Subtask: at > org.apache.hadoop.hive.ql.exec.tez.MapRecordProcessor.close(MapRecordProcessor.java:477) > [2020-05-11 05:43:12,140] {base_task_runner.py:95} INFO - Subtask: at > org.apache.hadoop.hive.ql.exec.tez.TezProcessor.initializeAndRunProcessor(TezProcessor.java:199) > [2020-05-11 05:43:12,140] {base_task_runner.py:95} INFO - Subtask: ... 14 > more [2020-05-11 05:43:12,140] {base_task_runner.py:95} INFO - Subtask: ], > TaskAttempt 2 failed, info=[Error: Error while running task ( failure ) : > attempt_1588729523139_51702_1_06_001286_2:java.lang.RuntimeException: > java.lang.AssertionError: Capacity must be a power of two [2020-05-11 > 05:43:12,140] {base_task_runner.py:95} INFO - Subtask: at > org.apache.hadoop.hive.ql.exec.tez.TezProcessor.initializeAndRunProcessor(TezProcessor.java:211) > [2020-05-11 05:43:12,140] {base_task_runner.py:95} INFO - Subtask: at > org.apache.hadoop.hive.ql.exec.tez.TezProcessor.run(TezProcessor.java:168) > [2020-05-11 05:43:12,140] {base_task_runner.py:95} INFO - Subtask: at > org.apache.tez.runtime.LogicalIOProcessorRuntimeTask.run(LogicalIOProcessorRuntimeTask.java:374) > [2020-05-11 05:43:12,140] {base_task_runner.py:95} INFO - Subtask: at > org.apache.tez.runtime.task.TaskRunner2Callable$1.run(TaskRunner2Callable.java:73) > [2020-05-11 05:43:12,140] {base_task_runner.py:95} INFO - Subtask: at > org.apache.tez.runtime.task.TaskRunner2Callable$1.run(TaskRunner2Callable.java:61) > [2020-05-11 05:43:12,140] {base_task_runner.py:95} INFO - Subtask: at > java.security.AccessController.doPrivileged(Native Method) [2020-05-11 > 05:43:12,140] {base_task_runner.py:95} INFO - Subtask: at > javax.security.auth.Subject.doAs(Subject.java:422) [2020-05-11 05:43:12,140] > {base_task_runner.py:95} INFO - Subtask: at > org.apache.hadoop.security.UserGroupInformation.doAs(UserGroupInformation.java:1893) > [2020-05-11 05:43:12,140] {base_task_runner.py:95} INFO - Subtask: at > org.apache.tez.runtime.task.TaskRunner2Callable.callInternal(TaskRunner2Callable.java:61) > [2020-05-11 05:43:12,140] {base_task_runner.py:95} INFO - Subtask: at > org.apache.tez.runtime.task.TaskRunner2Callable.callInternal(TaskRunner2Callable.java:37) > [2020-05-11 05:43:12,140] {base_task_runner.py:95} INFO - Subtask: at > org.apache.tez.common.CallableWithNdc.call(CallableWithNdc.java:36) > [2020-05-11 05:43:12,140] {base_task_runner.py:95} INFO - Subtask: at > java.util.concurrent.FutureTask.run(FutureTask.java:266) [2020-05-11 > 05:43:12,141] {base_task_runner.py:95} INFO - Subtask: at > java.util.concurrent.ThreadPoolExecutor.runWorker(ThreadPoolExecutor.java:1149) > [2020-05-11 05:43:12,141] {base_task_runner.py:95} INFO - Subtask: at > java.util.concurrent.ThreadPoolExecutor$Worker.run(ThreadPoolExecutor.java:624) > [2020-05-11 05:43:12,141] {base_task_runner.py:95} INFO - Subtask: at > java.lang.Thread.run(Thread.java:748) [2020-05-11 05:43:12,141] > {base_task_runner.py:95} INFO - Subtask: Caused by: java.lang.AssertionError: > Capacity must be a power of two [2020-05-11 05:43:12,141] > {base_task_runner.py:95} INFO - Subtask: at > org.apache.hadoop.hive.ql.exec.persistence.BytesBytesMultiHashMap.validateCapacity(BytesBytesMultiHashMap.java:552) > [2020-05-11 05:43:12,141] {base_task_runner.py:95} INFO - Subtask: at > org.apache.hadoop.hive.ql.exec.persistence.BytesBytesMultiHashMap.expandAndRehashImpl(BytesBytesMultiHashMap.java:731) > [2020-05-11 05:43:12,141] {base_task_runner.py:95} INFO - Subtask: at > org.apache.hadoop.hive.ql.exec.persistence.BytesBytesMultiHashMap.expandAndRehashToTarget(BytesBytesMultiHashMap.java:545) > [2020-05-11 05:43:12,141] {base_task_runner.py:95} INFO - Subtask: at > org.apache.hadoop.hive.ql.exec.persistence.HybridHashTableContainer$HashPartition.getHashMapFromDisk(HybridHashTableContainer.java:183) > [2020-05-11 05:43:12,141] {base_task_runner.py:95} INFO - Subtask: at > org.apache.hadoop.hive.ql.exec.MapJoinOperator.reloadHashTable(MapJoinOperator.java:641) > [2020-05-11 05:43:12,141] {base_task_runner.py:95} INFO - Subtask: at > org.apache.hadoop.hive.ql.exec.MapJoinOperator.continueProcess(MapJoinOperator.java:603) > [2020-05-11 05:43:12,141] {base_task_runner.py:95} INFO - Subtask: at > org.apache.hadoop.hive.ql.exec.MapJoinOperator.closeOp(MapJoinOperator.java:539) > [2020-05-11 05:43:12,141] {base_task_runner.py:95} INFO - Subtask: at > org.apache.hadoop.hive.ql.exec.Operator.close(Operator.java:697) [2020-05-11 > 05:43:12,141] {base_task_runner.py:95} INFO - Subtask: at > org.apache.hadoop.hive.ql.exec.Operator.close(Operator.java:711) [2020-05-11 > 05:43:12,141] {base_task_runner.py:95} INFO - Subtask: at > org.apache.hadoop.hive.ql.exec.tez.MapRecordProcessor.close(MapRecordProcessor.java:477) > [2020-05-11 05:43:12,142] {base_task_runner.py:95} INFO - Subtask: at > org.apache.hadoop.hive.ql.exec.tez.TezProcessor.initializeAndRunProcessor(TezProcessor.java:199) > [2020-05-11 05:43:12,142] {base_task_runner.py:95} INFO - Subtask: ... 14 > more [2020-05-11 05:43:12,142] {base_task_runner.py:95} INFO - Subtask: ], > TaskAttempt 3 failed, info=[Error: Error while running task ( failure ) : > attempt_1588729523139_51702_1_06_001286_3:java.lang.RuntimeException: > java.lang.AssertionError: Capacity must be a power of two [2020-05-11 > 05:43:12,142] {base_task_runner.py:95} INFO - Subtask: at > org.apache.hadoop.hive.ql.exec.tez.TezProcessor.initializeAndRunProcessor(TezProcessor.java:211) > [2020-05-11 05:43:12,142] {base_task_runner.py:95} INFO - Subtask: at > org.apache.hadoop.hive.ql.exec.tez.TezProcessor.run(TezProcessor.java:168) > [2020-05-11 05:43:12,142] {base_task_runner.py:95} INFO - Subtask: at > org.apache.tez.runtime.LogicalIOProcessorRuntimeTask.run(LogicalIOProcessorRuntimeTask.java:374) > [2020-05-11 05:43:12,142] {base_task_runner.py:95} INFO - Subtask: at > org.apache.tez.runtime.task.TaskRunner2Callable$1.run(TaskRunner2Callable.java:73) > [2020-05-11 05:43:12,142] {base_task_runner.py:95} INFO - Subtask: at > org.apache.tez.runtime.task.TaskRunner2Callable$1.run(TaskRunner2Callable.java:61) > [2020-05-11 05:43:12,142] {base_task_runner.py:95} INFO - Subtask: at > java.security.AccessController.doPrivileged(Native Method) [2020-05-11 > 05:43:12,142] {base_task_runner.py:95} INFO - Subtask: at > javax.security.auth.Subject.doAs(Subject.java:422) [2020-05-11 05:43:12,142] > {base_task_runner.py:95} INFO - Subtask: at > org.apache.hadoop.security.UserGroupInformation.doAs(UserGroupInformation.java:1893) > [2020-05-11 05:43:12,142] {base_task_runner.py:95} INFO - Subtask: at > org.apache.tez.runtime.task.TaskRunner2Callable.callInternal(TaskRunner2Callable.java:61) > [2020-05-11 05:43:12,142] {base_task_runner.py:95} INFO - Subtask: at > org.apache.tez.runtime.task.TaskRunner2Callable.callInternal(TaskRunner2Callable.java:37) > [2020-05-11 05:43:12,142] {base_task_runner.py:95} INFO - Subtask: at > org.apache.tez.common.CallableWithNdc.call(CallableWithNdc.java:36) > [2020-05-11 05:43:12,143] {base_task_runner.py:95} INFO - Subtask: at > java.util.concurrent.FutureTask.run(FutureTask.java:266) [2020-05-11 > 05:43:12,143] {base_task_runner.py:95} INFO - Subtask: at > java.util.concurrent.ThreadPoolExecutor.runWorker(ThreadPoolExecutor.java:1149) > [2020-05-11 05:43:12,143] {base_task_runner.py:95} INFO - Subtask: at > java.util.concurrent.ThreadPoolExecutor$Worker.run(ThreadPoolExecutor.java:624) > [2020-05-11 05:43:12,143] {base_task_runner.py:95} INFO - Subtask: at > java.lang.Thread.run(Thread.java:748) [2020-05-11 05:43:12,143] > {base_task_runner.py:95} INFO - Subtask: Caused by: java.lang.AssertionError: > Capacity must be a power of two [2020-05-11 05:43:12,143] > {base_task_runner.py:95} INFO - Subtask: at > org.apache.hadoop.hive.ql.exec.persistence.BytesBytesMultiHashMap.validateCapacity(BytesBytesMultiHashMap.java:552) > [2020-05-11 05:43:12,143] {base_task_runner.py:95} INFO - Subtask: at > org.apache.hadoop.hive.ql.exec.persistence.BytesBytesMultiHashMap.expandAndRehashImpl(BytesBytesMultiHashMap.java:731) > [2020-05-11 05:43:12,143] {base_task_runner.py:95} INFO - Subtask: at > org.apache.hadoop.hive.ql.exec.persistence.BytesBytesMultiHashMap.expandAndRehashToTarget(BytesBytesMultiHashMap.java:545) > [2020-05-11 05:43:12,143] {base_task_runner.py:95} INFO - Subtask: at > org.apache.hadoop.hive.ql.exec.persistence.HybridHashTableContainer$HashPartition.getHashMapFromDisk(HybridHashTableContainer.java:183) > [2020-05-11 05:43:12,143] {base_task_runner.py:95} INFO - Subtask: at > org.apache.hadoop.hive.ql.exec.MapJoinOperator.reloadHashTable(MapJoinOperator.java:641) > [2020-05-11 05:43:12,143] {base_task_runner.py:95} INFO - Subtask: at > org.apache.hadoop.hive.ql.exec.MapJoinOperator.continueProcess(MapJoinOperator.java:603) > [2020-05-11 05:43:12,143] {base_task_runner.py:95} INFO - Subtask: at > org.apache.hadoop.hive.ql.exec.MapJoinOperator.closeOp(MapJoinOperator.java:539) > [2020-05-11 05:43:12,143] {base_task_runner.py:95} INFO - Subtask: at > org.apache.hadoop.hive.ql.exec.Operator.close(Operator.java:697) [2020-05-11 > 05:43:12,143] {base_task_runner.py:95} INFO - Subtask: at > org.apache.hadoop.hive.ql.exec.Operator.close(Operator.java:711) [2020-05-11 > 05:43:12,144] {base_task_runner.py:95} INFO - Subtask: at > org.apache.hadoop.hive.ql.exec.tez.MapRecordProcessor.close(MapRecordProcessor.java:477) > [2020-05-11 05:43:12,144] {base_task_runner.py:95} INFO - Subtask: at > org.apache.hadoop.hive.ql.exec.tez.TezProcessor.initializeAndRunProcessor(TezProcessor.java:199) > [2020-05-11 05:43:12,144] {base_task_runner.py:95} INFO - Subtask: ... 14 > more{noformat} -- This message was sent by Atlassian Jira (v8.3.4#803005)