[ https://issues.apache.org/jira/browse/HIVE-2150?page=com.atlassian.jira.plugin.system.issuetabpanels:all-tabpanel ]
Steven Wong updated HIVE-2150: ------------------------------ Description: When using dynamic-partition insert and bucketing together on an s3n table, the insert does not create files for empty buckets. This will result in the following exception when running a sampling query that includes the empty buckets. {noformat} FAILED: Hive Internal Error: java.lang.RuntimeException(Cannot get bucket path for bucket 1) java.lang.RuntimeException: Cannot get bucket path for bucket 1 at org.apache.hadoop.hive.ql.metadata.Partition.getBucketPath(Partition.java:367) at org.apache.hadoop.hive.ql.optimizer.SamplePruner.prune(SamplePruner.java:186) at org.apache.hadoop.hive.ql.optimizer.GenMapRedUtils.setTaskPlan(GenMapRedUtils.java:603) at org.apache.hadoop.hive.ql.optimizer.GenMapRedUtils.setTaskPlan(GenMapRedUtils.java:514) at org.apache.hadoop.hive.ql.optimizer.GenMRFileSink1.processFS(GenMRFileSink1.java:586) at org.apache.hadoop.hive.ql.optimizer.GenMRFileSink1.process(GenMRFileSink1.java:145) at org.apache.hadoop.hive.ql.lib.DefaultRuleDispatcher.dispatch(DefaultRuleDispatcher.java:89) at org.apache.hadoop.hive.ql.lib.DefaultGraphWalker.dispatch(DefaultGraphWalker.java:88) at org.apache.hadoop.hive.ql.parse.GenMapRedWalker.walk(GenMapRedWalker.java:55) at org.apache.hadoop.hive.ql.parse.GenMapRedWalker.walk(GenMapRedWalker.java:67) at org.apache.hadoop.hive.ql.parse.GenMapRedWalker.walk(GenMapRedWalker.java:67) at org.apache.hadoop.hive.ql.parse.GenMapRedWalker.walk(GenMapRedWalker.java:67) at org.apache.hadoop.hive.ql.parse.GenMapRedWalker.walk(GenMapRedWalker.java:67) at org.apache.hadoop.hive.ql.lib.DefaultGraphWalker.startWalking(DefaultGraphWalker.java:102) at org.apache.hadoop.hive.ql.parse.SemanticAnalyzer.genMapRedTasks(SemanticAnalyzer.java:6336) at org.apache.hadoop.hive.ql.parse.SemanticAnalyzer.analyzeInternal(SemanticAnalyzer.java:6615) at org.apache.hadoop.hive.ql.parse.BaseSemanticAnalyzer.analyze(BaseSemanticAnalyzer.java:238) at org.apache.hadoop.hive.ql.Driver.compile(Driver.java:332) at org.apache.hadoop.hive.ql.Driver.run(Driver.java:686) at org.apache.hadoop.hive.cli.CliDriver.processCmd(CliDriver.java:149) at org.apache.hadoop.hive.cli.CliDriver.processLineInternal(CliDriver.java:228) at org.apache.hadoop.hive.cli.CliDriver.processLine(CliDriver.java:209) at org.apache.hadoop.hive.cli.CliDriver.main(CliDriver.java:355) at sun.reflect.NativeMethodAccessorImpl.invoke0(Native Method) at sun.reflect.NativeMethodAccessorImpl.invoke(NativeMethodAccessorImpl.java:39) at sun.reflect.DelegatingMethodAccessorImpl.invoke(DelegatingMethodAccessorImpl.java:25) at java.lang.reflect.Method.invoke(Method.java:597) at org.apache.hadoop.util.RunJar.main(RunJar.java:156) Caused by: java.lang.ArrayIndexOutOfBoundsException: 1 at org.apache.hadoop.hive.ql.metadata.Partition.getBucketPath(Partition.java:365) ... 27 more {noformat} Here is a repro case: {noformat} CREATE TABLE tab (x string) PARTITIONED BY (p1 string, p2 string) CLUSTERED BY (x) INTO 4 BUCKETS LOCATION 's3n://some/path'; SET hive.exec.dynamic.partition=true; SET hive.enforce.bucketing=true; INSERT OVERWRITE TABLE tab PARTITION (p1='p', p2) SELECT 'v1', 'v2' FROM dual; SELECT * FROM tab TABLESAMPLE (BUCKET 2 OUT OF 4); {noformat} was: When using dynamic-partition insert and bucketing together on an s3n table, the insert does not create files for empty buckets. This will result in the following exception when running a sampling query that includes the empty buckets. FAILED: Hive Internal Error: java.lang.RuntimeException(Cannot get bucket path for bucket 1) java.lang.RuntimeException: Cannot get bucket path for bucket 1 at org.apache.hadoop.hive.ql.metadata.Partition.getBucketPath(Partition.java:367) at org.apache.hadoop.hive.ql.optimizer.SamplePruner.prune(SamplePruner.java:186) at org.apache.hadoop.hive.ql.optimizer.GenMapRedUtils.setTaskPlan(GenMapRedUtils.java:603) at org.apache.hadoop.hive.ql.optimizer.GenMapRedUtils.setTaskPlan(GenMapRedUtils.java:514) at org.apache.hadoop.hive.ql.optimizer.GenMRFileSink1.processFS(GenMRFileSink1.java:586) at org.apache.hadoop.hive.ql.optimizer.GenMRFileSink1.process(GenMRFileSink1.java:145) at org.apache.hadoop.hive.ql.lib.DefaultRuleDispatcher.dispatch(DefaultRuleDispatcher.java:89) at org.apache.hadoop.hive.ql.lib.DefaultGraphWalker.dispatch(DefaultGraphWalker.java:88) at org.apache.hadoop.hive.ql.parse.GenMapRedWalker.walk(GenMapRedWalker.java:55) at org.apache.hadoop.hive.ql.parse.GenMapRedWalker.walk(GenMapRedWalker.java:67) at org.apache.hadoop.hive.ql.parse.GenMapRedWalker.walk(GenMapRedWalker.java:67) at org.apache.hadoop.hive.ql.parse.GenMapRedWalker.walk(GenMapRedWalker.java:67) at org.apache.hadoop.hive.ql.parse.GenMapRedWalker.walk(GenMapRedWalker.java:67) at org.apache.hadoop.hive.ql.lib.DefaultGraphWalker.startWalking(DefaultGraphWalker.java:102) at org.apache.hadoop.hive.ql.parse.SemanticAnalyzer.genMapRedTasks(SemanticAnalyzer.java:6336) at org.apache.hadoop.hive.ql.parse.SemanticAnalyzer.analyzeInternal(SemanticAnalyzer.java:6615) at org.apache.hadoop.hive.ql.parse.BaseSemanticAnalyzer.analyze(BaseSemanticAnalyzer.java:238) at org.apache.hadoop.hive.ql.Driver.compile(Driver.java:332) at org.apache.hadoop.hive.ql.Driver.run(Driver.java:686) at org.apache.hadoop.hive.cli.CliDriver.processCmd(CliDriver.java:149) at org.apache.hadoop.hive.cli.CliDriver.processLineInternal(CliDriver.java:228) at org.apache.hadoop.hive.cli.CliDriver.processLine(CliDriver.java:209) at org.apache.hadoop.hive.cli.CliDriver.main(CliDriver.java:355) at sun.reflect.NativeMethodAccessorImpl.invoke0(Native Method) at sun.reflect.NativeMethodAccessorImpl.invoke(NativeMethodAccessorImpl.java:39) at sun.reflect.DelegatingMethodAccessorImpl.invoke(DelegatingMethodAccessorImpl.java:25) at java.lang.reflect.Method.invoke(Method.java:597) at org.apache.hadoop.util.RunJar.main(RunJar.java:156) Caused by: java.lang.ArrayIndexOutOfBoundsException: 1 at org.apache.hadoop.hive.ql.metadata.Partition.getBucketPath(Partition.java:365) ... 27 more Here is a repro case: CREATE TABLE tab (x string) PARTITIONED BY (p1 string, p2 string) CLUSTERED BY (x) INTO 4 BUCKETS LOCATION 's3n://some/path'; SET hive.exec.dynamic.partition=true; SET hive.enforce.bucketing=true; INSERT OVERWRITE TABLE tab PARTITION (p1='p', p2) SELECT 'v1', 'v2' FROM dual; SELECT * FROM tab TABLESAMPLE (BUCKET 2 OUT OF 4); > Sampling fails after dynamic-partition insert into a bucketed s3n table > ----------------------------------------------------------------------- > > Key: HIVE-2150 > URL: https://issues.apache.org/jira/browse/HIVE-2150 > Project: Hive > Issue Type: Bug > Affects Versions: 0.7.0 > Reporter: Steven Wong > > When using dynamic-partition insert and bucketing together on an s3n table, > the insert does not create files for empty buckets. This will result in the > following exception when running a sampling query that includes the empty > buckets. > {noformat} > FAILED: Hive Internal Error: java.lang.RuntimeException(Cannot get bucket > path for bucket 1) > java.lang.RuntimeException: Cannot get bucket path for bucket 1 > at > org.apache.hadoop.hive.ql.metadata.Partition.getBucketPath(Partition.java:367) > at > org.apache.hadoop.hive.ql.optimizer.SamplePruner.prune(SamplePruner.java:186) > at > org.apache.hadoop.hive.ql.optimizer.GenMapRedUtils.setTaskPlan(GenMapRedUtils.java:603) > at > org.apache.hadoop.hive.ql.optimizer.GenMapRedUtils.setTaskPlan(GenMapRedUtils.java:514) > at > org.apache.hadoop.hive.ql.optimizer.GenMRFileSink1.processFS(GenMRFileSink1.java:586) > at > org.apache.hadoop.hive.ql.optimizer.GenMRFileSink1.process(GenMRFileSink1.java:145) > at > org.apache.hadoop.hive.ql.lib.DefaultRuleDispatcher.dispatch(DefaultRuleDispatcher.java:89) > at > org.apache.hadoop.hive.ql.lib.DefaultGraphWalker.dispatch(DefaultGraphWalker.java:88) > at > org.apache.hadoop.hive.ql.parse.GenMapRedWalker.walk(GenMapRedWalker.java:55) > at > org.apache.hadoop.hive.ql.parse.GenMapRedWalker.walk(GenMapRedWalker.java:67) > at > org.apache.hadoop.hive.ql.parse.GenMapRedWalker.walk(GenMapRedWalker.java:67) > at > org.apache.hadoop.hive.ql.parse.GenMapRedWalker.walk(GenMapRedWalker.java:67) > at > org.apache.hadoop.hive.ql.parse.GenMapRedWalker.walk(GenMapRedWalker.java:67) > at > org.apache.hadoop.hive.ql.lib.DefaultGraphWalker.startWalking(DefaultGraphWalker.java:102) > at > org.apache.hadoop.hive.ql.parse.SemanticAnalyzer.genMapRedTasks(SemanticAnalyzer.java:6336) > at > org.apache.hadoop.hive.ql.parse.SemanticAnalyzer.analyzeInternal(SemanticAnalyzer.java:6615) > at > org.apache.hadoop.hive.ql.parse.BaseSemanticAnalyzer.analyze(BaseSemanticAnalyzer.java:238) > at org.apache.hadoop.hive.ql.Driver.compile(Driver.java:332) > at org.apache.hadoop.hive.ql.Driver.run(Driver.java:686) > at org.apache.hadoop.hive.cli.CliDriver.processCmd(CliDriver.java:149) > at > org.apache.hadoop.hive.cli.CliDriver.processLineInternal(CliDriver.java:228) > at org.apache.hadoop.hive.cli.CliDriver.processLine(CliDriver.java:209) > at org.apache.hadoop.hive.cli.CliDriver.main(CliDriver.java:355) > at sun.reflect.NativeMethodAccessorImpl.invoke0(Native Method) > at > sun.reflect.NativeMethodAccessorImpl.invoke(NativeMethodAccessorImpl.java:39) > at > sun.reflect.DelegatingMethodAccessorImpl.invoke(DelegatingMethodAccessorImpl.java:25) > at java.lang.reflect.Method.invoke(Method.java:597) > at org.apache.hadoop.util.RunJar.main(RunJar.java:156) > Caused by: java.lang.ArrayIndexOutOfBoundsException: 1 > at > org.apache.hadoop.hive.ql.metadata.Partition.getBucketPath(Partition.java:365) > ... 27 more > {noformat} > Here is a repro case: > {noformat} > CREATE TABLE tab > (x string) > PARTITIONED BY (p1 string, p2 string) > CLUSTERED BY (x) INTO 4 BUCKETS > LOCATION 's3n://some/path'; > SET hive.exec.dynamic.partition=true; > SET hive.enforce.bucketing=true; > INSERT OVERWRITE TABLE tab > PARTITION (p1='p', p2) > SELECT 'v1', 'v2' > FROM dual; > SELECT * > FROM tab TABLESAMPLE (BUCKET 2 OUT OF 4); > {noformat} -- This message is automatically generated by JIRA. For more information on JIRA, see: http://www.atlassian.com/software/jira