[ 
https://issues.apache.org/jira/browse/HIVE-2150?page=com.atlassian.jira.plugin.system.issuetabpanels:all-tabpanel
 ]

Steven Wong updated HIVE-2150:
------------------------------

    Description: 
When using dynamic-partition insert and bucketing together on an s3n table, the 
insert does not create files for empty buckets. This will result in the 
following exception when running a sampling query that includes the empty 
buckets.

{noformat}
FAILED: Hive Internal Error: java.lang.RuntimeException(Cannot get bucket path 
for bucket 1)
java.lang.RuntimeException: Cannot get bucket path for bucket 1
        at 
org.apache.hadoop.hive.ql.metadata.Partition.getBucketPath(Partition.java:367)
        at 
org.apache.hadoop.hive.ql.optimizer.SamplePruner.prune(SamplePruner.java:186)
        at 
org.apache.hadoop.hive.ql.optimizer.GenMapRedUtils.setTaskPlan(GenMapRedUtils.java:603)
        at 
org.apache.hadoop.hive.ql.optimizer.GenMapRedUtils.setTaskPlan(GenMapRedUtils.java:514)
        at 
org.apache.hadoop.hive.ql.optimizer.GenMRFileSink1.processFS(GenMRFileSink1.java:586)
        at 
org.apache.hadoop.hive.ql.optimizer.GenMRFileSink1.process(GenMRFileSink1.java:145)
        at 
org.apache.hadoop.hive.ql.lib.DefaultRuleDispatcher.dispatch(DefaultRuleDispatcher.java:89)
        at 
org.apache.hadoop.hive.ql.lib.DefaultGraphWalker.dispatch(DefaultGraphWalker.java:88)
        at 
org.apache.hadoop.hive.ql.parse.GenMapRedWalker.walk(GenMapRedWalker.java:55)
        at 
org.apache.hadoop.hive.ql.parse.GenMapRedWalker.walk(GenMapRedWalker.java:67)
        at 
org.apache.hadoop.hive.ql.parse.GenMapRedWalker.walk(GenMapRedWalker.java:67)
        at 
org.apache.hadoop.hive.ql.parse.GenMapRedWalker.walk(GenMapRedWalker.java:67)
        at 
org.apache.hadoop.hive.ql.parse.GenMapRedWalker.walk(GenMapRedWalker.java:67)
        at 
org.apache.hadoop.hive.ql.lib.DefaultGraphWalker.startWalking(DefaultGraphWalker.java:102)
        at 
org.apache.hadoop.hive.ql.parse.SemanticAnalyzer.genMapRedTasks(SemanticAnalyzer.java:6336)
        at 
org.apache.hadoop.hive.ql.parse.SemanticAnalyzer.analyzeInternal(SemanticAnalyzer.java:6615)
        at 
org.apache.hadoop.hive.ql.parse.BaseSemanticAnalyzer.analyze(BaseSemanticAnalyzer.java:238)
        at org.apache.hadoop.hive.ql.Driver.compile(Driver.java:332)
        at org.apache.hadoop.hive.ql.Driver.run(Driver.java:686)
        at org.apache.hadoop.hive.cli.CliDriver.processCmd(CliDriver.java:149)
        at 
org.apache.hadoop.hive.cli.CliDriver.processLineInternal(CliDriver.java:228)
        at org.apache.hadoop.hive.cli.CliDriver.processLine(CliDriver.java:209)
        at org.apache.hadoop.hive.cli.CliDriver.main(CliDriver.java:355)
        at sun.reflect.NativeMethodAccessorImpl.invoke0(Native Method)
        at 
sun.reflect.NativeMethodAccessorImpl.invoke(NativeMethodAccessorImpl.java:39)
        at 
sun.reflect.DelegatingMethodAccessorImpl.invoke(DelegatingMethodAccessorImpl.java:25)
        at java.lang.reflect.Method.invoke(Method.java:597)
        at org.apache.hadoop.util.RunJar.main(RunJar.java:156)
Caused by: java.lang.ArrayIndexOutOfBoundsException: 1
        at 
org.apache.hadoop.hive.ql.metadata.Partition.getBucketPath(Partition.java:365)
        ... 27 more
{noformat}

Here is a repro case:

{noformat}
CREATE TABLE tab
(x string)
PARTITIONED BY (p1 string, p2 string)
CLUSTERED BY (x) INTO 4 BUCKETS
LOCATION 's3n://some/path';

SET hive.exec.dynamic.partition=true;
SET hive.enforce.bucketing=true;

INSERT OVERWRITE TABLE tab
PARTITION (p1='p', p2)
SELECT 'v1', 'v2'
FROM dual;

SELECT *
FROM tab TABLESAMPLE (BUCKET 2 OUT OF 4);
{noformat}


  was:
When using dynamic-partition insert and bucketing together on an s3n table, the 
insert does not create files for empty buckets. This will result in the 
following exception when running a sampling query that includes the empty 
buckets.

FAILED: Hive Internal Error: java.lang.RuntimeException(Cannot get bucket path 
for bucket 1)
java.lang.RuntimeException: Cannot get bucket path for bucket 1
        at 
org.apache.hadoop.hive.ql.metadata.Partition.getBucketPath(Partition.java:367)
        at 
org.apache.hadoop.hive.ql.optimizer.SamplePruner.prune(SamplePruner.java:186)
        at 
org.apache.hadoop.hive.ql.optimizer.GenMapRedUtils.setTaskPlan(GenMapRedUtils.java:603)
        at 
org.apache.hadoop.hive.ql.optimizer.GenMapRedUtils.setTaskPlan(GenMapRedUtils.java:514)
        at 
org.apache.hadoop.hive.ql.optimizer.GenMRFileSink1.processFS(GenMRFileSink1.java:586)
        at 
org.apache.hadoop.hive.ql.optimizer.GenMRFileSink1.process(GenMRFileSink1.java:145)
        at 
org.apache.hadoop.hive.ql.lib.DefaultRuleDispatcher.dispatch(DefaultRuleDispatcher.java:89)
        at 
org.apache.hadoop.hive.ql.lib.DefaultGraphWalker.dispatch(DefaultGraphWalker.java:88)
        at 
org.apache.hadoop.hive.ql.parse.GenMapRedWalker.walk(GenMapRedWalker.java:55)
        at 
org.apache.hadoop.hive.ql.parse.GenMapRedWalker.walk(GenMapRedWalker.java:67)
        at 
org.apache.hadoop.hive.ql.parse.GenMapRedWalker.walk(GenMapRedWalker.java:67)
        at 
org.apache.hadoop.hive.ql.parse.GenMapRedWalker.walk(GenMapRedWalker.java:67)
        at 
org.apache.hadoop.hive.ql.parse.GenMapRedWalker.walk(GenMapRedWalker.java:67)
        at 
org.apache.hadoop.hive.ql.lib.DefaultGraphWalker.startWalking(DefaultGraphWalker.java:102)
        at 
org.apache.hadoop.hive.ql.parse.SemanticAnalyzer.genMapRedTasks(SemanticAnalyzer.java:6336)
        at 
org.apache.hadoop.hive.ql.parse.SemanticAnalyzer.analyzeInternal(SemanticAnalyzer.java:6615)
        at 
org.apache.hadoop.hive.ql.parse.BaseSemanticAnalyzer.analyze(BaseSemanticAnalyzer.java:238)
        at org.apache.hadoop.hive.ql.Driver.compile(Driver.java:332)
        at org.apache.hadoop.hive.ql.Driver.run(Driver.java:686)
        at org.apache.hadoop.hive.cli.CliDriver.processCmd(CliDriver.java:149)
        at 
org.apache.hadoop.hive.cli.CliDriver.processLineInternal(CliDriver.java:228)
        at org.apache.hadoop.hive.cli.CliDriver.processLine(CliDriver.java:209)
        at org.apache.hadoop.hive.cli.CliDriver.main(CliDriver.java:355)
        at sun.reflect.NativeMethodAccessorImpl.invoke0(Native Method)
        at 
sun.reflect.NativeMethodAccessorImpl.invoke(NativeMethodAccessorImpl.java:39)
        at 
sun.reflect.DelegatingMethodAccessorImpl.invoke(DelegatingMethodAccessorImpl.java:25)
        at java.lang.reflect.Method.invoke(Method.java:597)
        at org.apache.hadoop.util.RunJar.main(RunJar.java:156)
Caused by: java.lang.ArrayIndexOutOfBoundsException: 1
        at 
org.apache.hadoop.hive.ql.metadata.Partition.getBucketPath(Partition.java:365)
        ... 27 more

Here is a repro case:

CREATE TABLE tab
(x string)
PARTITIONED BY (p1 string, p2 string)
CLUSTERED BY (x) INTO 4 BUCKETS
LOCATION 's3n://some/path';

SET hive.exec.dynamic.partition=true;
SET hive.enforce.bucketing=true;

INSERT OVERWRITE TABLE tab
PARTITION (p1='p', p2)
SELECT 'v1', 'v2'
FROM dual;

SELECT *
FROM tab TABLESAMPLE (BUCKET 2 OUT OF 4);



> Sampling fails after dynamic-partition insert into a bucketed s3n table
> -----------------------------------------------------------------------
>
>                 Key: HIVE-2150
>                 URL: https://issues.apache.org/jira/browse/HIVE-2150
>             Project: Hive
>          Issue Type: Bug
>    Affects Versions: 0.7.0
>            Reporter: Steven Wong
>
> When using dynamic-partition insert and bucketing together on an s3n table, 
> the insert does not create files for empty buckets. This will result in the 
> following exception when running a sampling query that includes the empty 
> buckets.
> {noformat}
> FAILED: Hive Internal Error: java.lang.RuntimeException(Cannot get bucket 
> path for bucket 1)
> java.lang.RuntimeException: Cannot get bucket path for bucket 1
>       at 
> org.apache.hadoop.hive.ql.metadata.Partition.getBucketPath(Partition.java:367)
>       at 
> org.apache.hadoop.hive.ql.optimizer.SamplePruner.prune(SamplePruner.java:186)
>       at 
> org.apache.hadoop.hive.ql.optimizer.GenMapRedUtils.setTaskPlan(GenMapRedUtils.java:603)
>       at 
> org.apache.hadoop.hive.ql.optimizer.GenMapRedUtils.setTaskPlan(GenMapRedUtils.java:514)
>       at 
> org.apache.hadoop.hive.ql.optimizer.GenMRFileSink1.processFS(GenMRFileSink1.java:586)
>       at 
> org.apache.hadoop.hive.ql.optimizer.GenMRFileSink1.process(GenMRFileSink1.java:145)
>       at 
> org.apache.hadoop.hive.ql.lib.DefaultRuleDispatcher.dispatch(DefaultRuleDispatcher.java:89)
>       at 
> org.apache.hadoop.hive.ql.lib.DefaultGraphWalker.dispatch(DefaultGraphWalker.java:88)
>       at 
> org.apache.hadoop.hive.ql.parse.GenMapRedWalker.walk(GenMapRedWalker.java:55)
>       at 
> org.apache.hadoop.hive.ql.parse.GenMapRedWalker.walk(GenMapRedWalker.java:67)
>       at 
> org.apache.hadoop.hive.ql.parse.GenMapRedWalker.walk(GenMapRedWalker.java:67)
>       at 
> org.apache.hadoop.hive.ql.parse.GenMapRedWalker.walk(GenMapRedWalker.java:67)
>       at 
> org.apache.hadoop.hive.ql.parse.GenMapRedWalker.walk(GenMapRedWalker.java:67)
>       at 
> org.apache.hadoop.hive.ql.lib.DefaultGraphWalker.startWalking(DefaultGraphWalker.java:102)
>       at 
> org.apache.hadoop.hive.ql.parse.SemanticAnalyzer.genMapRedTasks(SemanticAnalyzer.java:6336)
>       at 
> org.apache.hadoop.hive.ql.parse.SemanticAnalyzer.analyzeInternal(SemanticAnalyzer.java:6615)
>       at 
> org.apache.hadoop.hive.ql.parse.BaseSemanticAnalyzer.analyze(BaseSemanticAnalyzer.java:238)
>       at org.apache.hadoop.hive.ql.Driver.compile(Driver.java:332)
>       at org.apache.hadoop.hive.ql.Driver.run(Driver.java:686)
>       at org.apache.hadoop.hive.cli.CliDriver.processCmd(CliDriver.java:149)
>       at 
> org.apache.hadoop.hive.cli.CliDriver.processLineInternal(CliDriver.java:228)
>       at org.apache.hadoop.hive.cli.CliDriver.processLine(CliDriver.java:209)
>       at org.apache.hadoop.hive.cli.CliDriver.main(CliDriver.java:355)
>       at sun.reflect.NativeMethodAccessorImpl.invoke0(Native Method)
>       at 
> sun.reflect.NativeMethodAccessorImpl.invoke(NativeMethodAccessorImpl.java:39)
>       at 
> sun.reflect.DelegatingMethodAccessorImpl.invoke(DelegatingMethodAccessorImpl.java:25)
>       at java.lang.reflect.Method.invoke(Method.java:597)
>       at org.apache.hadoop.util.RunJar.main(RunJar.java:156)
> Caused by: java.lang.ArrayIndexOutOfBoundsException: 1
>       at 
> org.apache.hadoop.hive.ql.metadata.Partition.getBucketPath(Partition.java:365)
>       ... 27 more
> {noformat}
> Here is a repro case:
> {noformat}
> CREATE TABLE tab
> (x string)
> PARTITIONED BY (p1 string, p2 string)
> CLUSTERED BY (x) INTO 4 BUCKETS
> LOCATION 's3n://some/path';
> SET hive.exec.dynamic.partition=true;
> SET hive.enforce.bucketing=true;
> INSERT OVERWRITE TABLE tab
> PARTITION (p1='p', p2)
> SELECT 'v1', 'v2'
> FROM dual;
> SELECT *
> FROM tab TABLESAMPLE (BUCKET 2 OUT OF 4);
> {noformat}

--
This message is automatically generated by JIRA.
For more information on JIRA, see: http://www.atlassian.com/software/jira

Reply via email to