[ https://issues.apache.org/jira/browse/HIVE-27674?page=com.atlassian.jira.plugin.system.issuetabpanels:all-tabpanel ]
László Bodor updated HIVE-27674: -------------------------------- Description: when a union job creates files only in specific subdirs, this can happen: {code} ERROR : Job Commit failed with exception 'org.apache.hadoop.hive.ql.metadata.HiveException(java.io.FileNotFoundException: File hdfs://c3857-node3.coelab.cloudera.com:8020/warehouse/tablespace/managed/hive/lbodor_test2/dt=20230817/base_0000001/HIVE_UNION_SUBDIR_1 does not exist.)' org.apache.hadoop.hive.ql.metadata.HiveException: java.io.FileNotFoundException: File hdfs://c3857-node3.coelab.cloudera.com:8020/warehouse/tablespace/managed/hive/lbodor_test2/dt=20230817/base_0000001/HIVE_UNION_SUBDIR_1 does not exist. at org.apache.hadoop.hive.ql.exec.FileSinkOperator.jobCloseOp(FileSinkOperator.java:1528) at org.apache.hadoop.hive.ql.exec.Operator.jobClose(Operator.java:797) at org.apache.hadoop.hive.ql.exec.Operator.jobClose(Operator.java:802) at org.apache.hadoop.hive.ql.exec.Operator.jobClose(Operator.java:802) at org.apache.hadoop.hive.ql.exec.Operator.jobClose(Operator.java:802) at org.apache.hadoop.hive.ql.exec.Operator.jobClose(Operator.java:802) at org.apache.hadoop.hive.ql.exec.tez.TezTask.close(TezTask.java:646) at org.apache.hadoop.hive.ql.exec.tez.TezTask.execute(TezTask.java:344) at org.apache.hadoop.hive.ql.exec.Task.executeTask(Task.java:213) at org.apache.hadoop.hive.ql.exec.TaskRunner.runSequential(TaskRunner.java:105) at org.apache.hadoop.hive.ql.Executor.launchTask(Executor.java:357) at org.apache.hadoop.hive.ql.Executor.launchTasks(Executor.java:330) at org.apache.hadoop.hive.ql.Executor.runTasks(Executor.java:246) at org.apache.hadoop.hive.ql.Executor.execute(Executor.java:109) at org.apache.hadoop.hive.ql.Driver.runInternal(Driver.java:770) at org.apache.hadoop.hive.ql.Driver.run(Driver.java:504) at org.apache.hadoop.hive.ql.Driver.run(Driver.java:498) at org.apache.hadoop.hive.ql.reexec.ReExecDriver.run(ReExecDriver.java:166) at org.apache.hive.service.cli.operation.SQLOperation.runQuery(SQLOperation.java:229) at org.apache.hive.service.cli.operation.SQLOperation.access$700(SQLOperation.java:91) at org.apache.hive.service.cli.operation.SQLOperation$BackgroundWork$1.run(SQLOperation.java:329) at java.security.AccessController.doPrivileged(Native Method) at javax.security.auth.Subject.doAs(Subject.java:422) at org.apache.hadoop.security.UserGroupInformation.doAs(UserGroupInformation.java:1898) at org.apache.hive.service.cli.operation.SQLOperation$BackgroundWork.run(SQLOperation.java:347) at java.util.concurrent.Executors$RunnableAdapter.call(Executors.java:511) at java.util.concurrent.FutureTask.run(FutureTask.java:266) at java.util.concurrent.Executors$RunnableAdapter.call(Executors.java:511) at java.util.concurrent.FutureTask.run(FutureTask.java:266) at java.util.concurrent.ThreadPoolExecutor.runWorker(ThreadPoolExecutor.java:1149) at java.util.concurrent.ThreadPoolExecutor$Worker.run(ThreadPoolExecutor.java:624) at java.lang.Thread.run(Thread.java:748) Caused by: java.io.FileNotFoundException: File hdfs://c3857-node3.coelab.cloudera.com:8020/warehouse/tablespace/managed/hive/lbodor_test2/dt=20230817/base_0000001/HIVE_UNION_SUBDIR_1 does not exist. at org.apache.hadoop.hdfs.DistributedFileSystem.listStatusInternal(DistributedFileSystem.java:1097) at org.apache.hadoop.hdfs.DistributedFileSystem.access$600(DistributedFileSystem.java:145) at org.apache.hadoop.hdfs.DistributedFileSystem$24.doCall(DistributedFileSystem.java:1168) at org.apache.hadoop.hdfs.DistributedFileSystem$24.doCall(DistributedFileSystem.java:1165) at org.apache.hadoop.fs.FileSystemLinkResolver.resolve(FileSystemLinkResolver.java:81) at org.apache.hadoop.hdfs.DistributedFileSystem.listStatus(DistributedFileSystem.java:1175) at org.apache.hadoop.hive.ql.exec.Utilities.removeTempOrDuplicateFiles(Utilities.java:1794) at org.apache.hadoop.hive.ql.exec.Utilities.handleDirectInsertTableFinalPath(Utilities.java:4579) at org.apache.hadoop.hive.ql.exec.FileSinkOperator.jobCloseOp(FileSinkOperator.java:1522) ... 31 more {code} please find repro in PR > Misson union subdir should be ignored in some cases > --------------------------------------------------- > > Key: HIVE-27674 > URL: https://issues.apache.org/jira/browse/HIVE-27674 > Project: Hive > Issue Type: Bug > Reporter: László Bodor > Assignee: László Bodor > Priority: Major > Labels: pull-request-available > > when a union job creates files only in specific subdirs, this can happen: > {code} > ERROR : Job Commit failed with exception > 'org.apache.hadoop.hive.ql.metadata.HiveException(java.io.FileNotFoundException: > File > hdfs://c3857-node3.coelab.cloudera.com:8020/warehouse/tablespace/managed/hive/lbodor_test2/dt=20230817/base_0000001/HIVE_UNION_SUBDIR_1 > does not exist.)' > org.apache.hadoop.hive.ql.metadata.HiveException: > java.io.FileNotFoundException: File > hdfs://c3857-node3.coelab.cloudera.com:8020/warehouse/tablespace/managed/hive/lbodor_test2/dt=20230817/base_0000001/HIVE_UNION_SUBDIR_1 > does not exist. > at > org.apache.hadoop.hive.ql.exec.FileSinkOperator.jobCloseOp(FileSinkOperator.java:1528) > at org.apache.hadoop.hive.ql.exec.Operator.jobClose(Operator.java:797) > at org.apache.hadoop.hive.ql.exec.Operator.jobClose(Operator.java:802) > at org.apache.hadoop.hive.ql.exec.Operator.jobClose(Operator.java:802) > at org.apache.hadoop.hive.ql.exec.Operator.jobClose(Operator.java:802) > at org.apache.hadoop.hive.ql.exec.Operator.jobClose(Operator.java:802) > at org.apache.hadoop.hive.ql.exec.tez.TezTask.close(TezTask.java:646) > at org.apache.hadoop.hive.ql.exec.tez.TezTask.execute(TezTask.java:344) > at org.apache.hadoop.hive.ql.exec.Task.executeTask(Task.java:213) > at > org.apache.hadoop.hive.ql.exec.TaskRunner.runSequential(TaskRunner.java:105) > at org.apache.hadoop.hive.ql.Executor.launchTask(Executor.java:357) > at org.apache.hadoop.hive.ql.Executor.launchTasks(Executor.java:330) > at org.apache.hadoop.hive.ql.Executor.runTasks(Executor.java:246) > at org.apache.hadoop.hive.ql.Executor.execute(Executor.java:109) > at org.apache.hadoop.hive.ql.Driver.runInternal(Driver.java:770) > at org.apache.hadoop.hive.ql.Driver.run(Driver.java:504) > at org.apache.hadoop.hive.ql.Driver.run(Driver.java:498) > at > org.apache.hadoop.hive.ql.reexec.ReExecDriver.run(ReExecDriver.java:166) > at > org.apache.hive.service.cli.operation.SQLOperation.runQuery(SQLOperation.java:229) > at > org.apache.hive.service.cli.operation.SQLOperation.access$700(SQLOperation.java:91) > at > org.apache.hive.service.cli.operation.SQLOperation$BackgroundWork$1.run(SQLOperation.java:329) > at java.security.AccessController.doPrivileged(Native Method) > at javax.security.auth.Subject.doAs(Subject.java:422) > at > org.apache.hadoop.security.UserGroupInformation.doAs(UserGroupInformation.java:1898) > at > org.apache.hive.service.cli.operation.SQLOperation$BackgroundWork.run(SQLOperation.java:347) > at > java.util.concurrent.Executors$RunnableAdapter.call(Executors.java:511) > at java.util.concurrent.FutureTask.run(FutureTask.java:266) > at > java.util.concurrent.Executors$RunnableAdapter.call(Executors.java:511) > at java.util.concurrent.FutureTask.run(FutureTask.java:266) > at > java.util.concurrent.ThreadPoolExecutor.runWorker(ThreadPoolExecutor.java:1149) > at > java.util.concurrent.ThreadPoolExecutor$Worker.run(ThreadPoolExecutor.java:624) > at java.lang.Thread.run(Thread.java:748) > Caused by: java.io.FileNotFoundException: File > hdfs://c3857-node3.coelab.cloudera.com:8020/warehouse/tablespace/managed/hive/lbodor_test2/dt=20230817/base_0000001/HIVE_UNION_SUBDIR_1 > does not exist. > at > org.apache.hadoop.hdfs.DistributedFileSystem.listStatusInternal(DistributedFileSystem.java:1097) > at > org.apache.hadoop.hdfs.DistributedFileSystem.access$600(DistributedFileSystem.java:145) > at > org.apache.hadoop.hdfs.DistributedFileSystem$24.doCall(DistributedFileSystem.java:1168) > at > org.apache.hadoop.hdfs.DistributedFileSystem$24.doCall(DistributedFileSystem.java:1165) > at > org.apache.hadoop.fs.FileSystemLinkResolver.resolve(FileSystemLinkResolver.java:81) > at > org.apache.hadoop.hdfs.DistributedFileSystem.listStatus(DistributedFileSystem.java:1175) > at > org.apache.hadoop.hive.ql.exec.Utilities.removeTempOrDuplicateFiles(Utilities.java:1794) > at > org.apache.hadoop.hive.ql.exec.Utilities.handleDirectInsertTableFinalPath(Utilities.java:4579) > at > org.apache.hadoop.hive.ql.exec.FileSinkOperator.jobCloseOp(FileSinkOperator.java:1522) > ... 31 more > {code} > please find repro in PR -- This message was sent by Atlassian Jira (v8.20.10#820010)