[ https://issues.apache.org/jira/browse/HIVE-25794?page=com.atlassian.jira.plugin.system.issuetabpanels:all-tabpanel ]
ASF GitHub Bot updated HIVE-25794: ---------------------------------- Labels: pull-request-available (was: ) > CombineHiveRecordReader: log statements in a loop leads to memory pressure > -------------------------------------------------------------------------- > > Key: HIVE-25794 > URL: https://issues.apache.org/jira/browse/HIVE-25794 > Project: Hive > Issue Type: Bug > Components: Logging > Reporter: iBenny > Assignee: László Bodor > Priority: Major > Labels: pull-request-available > Time Spent: 10m > Remaining Estimate: 0h > > Similar to HIVE-16150, a huge string will be built in a loop, even the log > level is INFO. That leads to memory pressure when processing a big number of > split files. > From > [CombineHiveRecordReader.java|https://github.com/apache/hive/blob/master/ql/src/java/org/apache/hadoop/hive/ql/io/CombineHiveRecordReader.java#L116], > the following needs to be fixed. > LOG.debug("Found spec for " + path + " " + otherPart + " from " + > pathToPartInfo); > {code} > "TezChild" #26 daemon prio=5 os_prio=0 tid=0x00007f5fd1716000 nid=0x2118a > runnable [0x00007f5f8c411000] > java.lang.Thread.State: RUNNABLE > at java.lang.String.valueOf(String.java:2994) > at java.lang.StringBuilder.append(StringBuilder.java:131) > at java.util.AbstractMap.toString(AbstractMap.java:557) > at java.lang.String.valueOf(String.java:2994) > at java.lang.StringBuilder.append(StringBuilder.java:131) > at > org.apache.hadoop.hive.ql.io.CombineHiveRecordReader.extractSinglePartSpec(CombineHiveRecordReader.java:119) > at > org.apache.hadoop.hive.ql.io.CombineHiveRecordReader.<init>(CombineHiveRecordReader.java:88) > at sun.reflect.GeneratedConstructorAccessor22.newInstance(Unknown > Source) > at > sun.reflect.DelegatingConstructorAccessorImpl.newInstance(DelegatingConstructorAccessorImpl.java:45) > at java.lang.reflect.Constructor.newInstance(Constructor.java:423) > at > org.apache.hadoop.hive.shims.HadoopShimsSecure$CombineFileRecordReader.initNextRecordReader(HadoopShimsSecure.java:257) > at > org.apache.hadoop.hive.shims.HadoopShimsSecure$CombineFileRecordReader.next(HadoopShimsSecure.java:144) > at > org.apache.tez.mapreduce.lib.MRReaderMapred.next(MRReaderMapred.java:116) > at > org.apache.hadoop.hive.ql.exec.tez.MergeFileRecordProcessor.run(MergeFileRecordProcessor.java:153) > at > org.apache.hadoop.hive.ql.exec.tez.TezProcessor.initializeAndRunProcessor(TezProcessor.java:267) > at > org.apache.hadoop.hive.ql.exec.tez.MergeFileTezProcessor.run(MergeFileTezProcessor.java:42) > at > org.apache.tez.runtime.LogicalIOProcessorRuntimeTask.run(LogicalIOProcessorRuntimeTask.java:374) > at > org.apache.tez.runtime.task.TaskRunner2Callable$1.run(TaskRunner2Callable.java:73) > at > org.apache.tez.runtime.task.TaskRunner2Callable$1.run(TaskRunner2Callable.java:61) > at java.security.AccessController.doPrivileged(Native Method) > at javax.security.auth.Subject.doAs(Subject.java:422) > at > org.apache.hadoop.security.UserGroupInformation.doAs(UserGroupInformation.java:1730) > at > org.apache.tez.runtime.task.TaskRunner2Callable.callInternal(TaskRunner2Callable.java:61) > at > org.apache.tez.runtime.task.TaskRunner2Callable.callInternal(TaskRunner2Callable.java:37) > at org.apache.tez.common.CallableWithNdc.call(CallableWithNdc.java:36) > at > com.google.common.util.concurrent.TrustedListenableFutureTask$TrustedFutureInterruptibleTask.runInterruptibly(TrustedListenableFutureTask.java:125) > at > com.google.common.util.concurrent.InterruptibleTask.run(InterruptibleTask.java:69) > at > com.google.common.util.concurrent.TrustedListenableFutureTask.run(TrustedListenableFutureTask.java:78) > at > java.util.concurrent.ThreadPoolExecutor.runWorker(ThreadPoolExecutor.java:1149) > at > java.util.concurrent.ThreadPoolExecutor$Worker.run(ThreadPoolExecutor.java:624) > at java.lang.Thread.run(Thread.java:748) > {code} -- This message was sent by Atlassian Jira (v8.20.1#820001)