[ 
https://issues.apache.org/jira/browse/IMPALA-5225?page=com.atlassian.jira.plugin.system.issuetabpanels:all-tabpanel
 ]

Quanlong Huang reassigned IMPALA-5225:
--------------------------------------

    Assignee: Quanlong Huang

> Queries that reference >6Million files and 500K partitions can fail with 
> java.lang.OutOfMemoryError while serializing PlanFragment
> ----------------------------------------------------------------------------------------------------------------------------------
>
>                 Key: IMPALA-5225
>                 URL: https://issues.apache.org/jira/browse/IMPALA-5225
>             Project: IMPALA
>          Issue Type: Bug
>          Components: Distributed Exec
>    Affects Versions: Impala 2.6.0
>            Reporter: Mostafa Mokhtar
>            Assignee: Quanlong Huang
>            Priority: Major
>              Labels: scalability
>
> Exception
> {code}
> I0418 16:35:24.592319 55911 jni-util.cc:176] java.lang.OutOfMemoryError: 
> Requested array size exceeds VM limit
>         at java.util.Arrays.copyOf(Arrays.java:2271)
>         at java.io.ByteArrayOutputStream.grow(ByteArrayOutputStream.java:113)
>         at 
> java.io.ByteArrayOutputStream.ensureCapacity(ByteArrayOutputStream.java:93)
>         at java.io.ByteArrayOutputStream.write(ByteArrayOutputStream.java:140)
>         at 
> org.apache.thrift.transport.TIOStreamTransport.write(TIOStreamTransport.java:145)
>         at 
> org.apache.thrift.protocol.TBinaryProtocol.writeString(TBinaryProtocol.java:187)
>         at 
> org.apache.impala.thrift.THdfsFileSplit$THdfsFileSplitStandardScheme.write(THdfsFileSplit.java:918)
>         at 
> org.apache.impala.thrift.THdfsFileSplit$THdfsFileSplitStandardScheme.write(THdfsFileSplit.java:818)
>         at 
> org.apache.impala.thrift.THdfsFileSplit.write(THdfsFileSplit.java:731)
>         at 
> org.apache.impala.thrift.TScanRange$TScanRangeStandardScheme.write(TScanRange.java:548)
>         at 
> org.apache.impala.thrift.TScanRange$TScanRangeStandardScheme.write(TScanRange.java:492)
>         at org.apache.impala.thrift.TScanRange.write(TScanRange.java:418)
>         at 
> org.apache.impala.thrift.TScanRangeLocationList$TScanRangeLocationListStandardScheme.write(TScanRangeLocationList.java:480)
>         at 
> org.apache.impala.thrift.TScanRangeLocationList$TScanRangeLocationListStandardScheme.write(TScanRangeLocationList.java:423)
>         at 
> org.apache.impala.thrift.TScanRangeLocationList.write(TScanRangeLocationList.java:363)
>         at 
> org.apache.impala.thrift.TPlanExecInfo$TPlanExecInfoStandardScheme.write(TPlanExecInfo.java:551)
>         at 
> org.apache.impala.thrift.TPlanExecInfo$TPlanExecInfoStandardScheme.write(TPlanExecInfo.java:450)
>         at 
> org.apache.impala.thrift.TPlanExecInfo.write(TPlanExecInfo.java:391)
>         at 
> org.apache.impala.thrift.TQueryExecRequest$TQueryExecRequestStandardScheme.write(TQueryExecRequest.java:1366)
>         at 
> org.apache.impala.thrift.TQueryExecRequest$TQueryExecRequestStandardScheme.write(TQueryExecRequest.java:1210)
>         at 
> org.apache.impala.thrift.TQueryExecRequest.write(TQueryExecRequest.java:1050)
>         at 
> org.apache.impala.thrift.TExecRequest$TExecRequestStandardScheme.write(TExecRequest.java:1382)
>         at 
> org.apache.impala.thrift.TExecRequest$TExecRequestStandardScheme.write(TExecRequest.java:1225)
>         at org.apache.impala.thrift.TExecRequest.write(TExecRequest.java:1050)
>         at org.apache.thrift.TSerializer.serialize(TSerializer.java:79)
>         at 
> org.apache.impala.service.JniFrontend.createExecRequest(JniFrontend.java:163)
> {code}
> Plan
> {code}
> | Estimated Per-Host Requirements: Memory=23.50GB VCores=12                   
>                 |
> | WARNING: The following tables are missing relevant table and/or column 
> statistics.          |
> | tpcds_100_parquet.store_returns, tpcds_100_parquet.store_sales              
>                 |
> |                                                                             
>                 |
> | PLAN-ROOT SINK                                                              
>                 |
> | |                                                                           
>                 |
> | 118:EXCHANGE [UNPARTITIONED]                                                
>                 |
> | |                                                                           
>                 |
> | 58:NESTED LOOP JOIN [CROSS JOIN, BROADCAST]                                 
>                 |
> | |                                                                           
>                 |
> | |--117:EXCHANGE [BROADCAST]                                                 
>                 |
> | |  |                                                                        
>                 |
> | |  116:AGGREGATE [FINALIZE]                                                 
>                 |
> | |  |  output: count:merge(*)                                                
>                 |
> | |  |  group by: a.ss_ticket_number                                          
>                 |
> | |  |  having: count(*) > 9999999999                                         
>                 |
> | |  |                                                                        
>                 |
> | |  115:EXCHANGE [HASH(a.ss_ticket_number)]                                  
>                 |
> | |  |                                                                        
>                 |
> | |  47:AGGREGATE [STREAMING]                                                 
>                 |
> | |  |  output: count(*)                                                      
>                 |
> | |  |  group by: a.ss_ticket_number                                          
>                 |
> | |  |                                                                        
>                 |
> | |  46:HASH JOIN [INNER JOIN, PARTITIONED]                                   
>                 |
> | |  |  hash predicates: a.ss_item_sk = b.sr_item_sk, a.ss_ticket_number = 
> b.sr_ticket_number |
> | |  |  runtime filters: RF023 <- b.sr_ticket_number, RF022 <- b.sr_item_sk   
>                 |
> | |  |                                                                        
>                 |
> | |  |--114:EXCHANGE [HASH(b.sr_item_sk,b.sr_ticket_number)]                  
>                 |
> | |  |  |                                                                     
>                 |
> | |  |  45:SCAN HDFS [tpcds_100_parquet.store_returns b]                      
>                 |
> | |  |     partitions=681/2004 files=9532 size=700.40MB                       
>                 |
> | |  |                                                                        
>                 |
> | |  113:EXCHANGE [HASH(a.ss_item_sk,a.ss_ticket_number)]                     
>                 |
> | |  |                                                                        
>                 |
> | |  44:SCAN HDFS [tpcds_100_parquet.store_sales a]                           
>                 |
> | |     partitions=37168/37168 files=485278 size=2.80GB                       
>                 |
> | |     runtime filters: RF023 -> a.ss_ticket_number, RF022 -> a.ss_item_sk   
>                 |
> | |                                                                           
>                 |
> | 57:NESTED LOOP JOIN [CROSS JOIN, BROADCAST]                                 
>                 |
> | |                                                                           
>                 |
> | |--112:EXCHANGE [BROADCAST]                                                 
>                 |
> | |  |                                                                        
>                 |
> | |  111:AGGREGATE [FINALIZE]                                                 
>                 |
> | |  |  output: count:merge(*)                                                
>                 |
> | |  |  group by: a.ss_ticket_number                                          
>                 |
> | |  |  having: count(*) > 9999999999                                         
>                 |
> | |  |                                                                        
>                 |
> | |  110:EXCHANGE [HASH(a.ss_ticket_number)]                                  
>                 |
> | |  |                                                                        
>                 |
> | |  43:AGGREGATE [STREAMING]                                                 
>                 |
> | |  |  output: count(*)                                                      
>                 |
> | |  |  group by: a.ss_ticket_number                                          
>                 |
> | |  |                                                                        
>                 |
> | |  42:HASH JOIN [INNER JOIN, PARTITIONED]                                   
>                 |
> | |  |  hash predicates: a.ss_item_sk = b.sr_item_sk, a.ss_ticket_number = 
> b.sr_ticket_number |
> | |  |  runtime filters: RF021 <- b.sr_ticket_number, RF020 <- b.sr_item_sk   
>                 |
> | |  |                                                                        
>                 |
> | |  |--109:EXCHANGE [HASH(b.sr_item_sk,b.sr_ticket_number)]                  
>                 |
> | |  |  |                                                                     
>                 |
> | |  |  41:SCAN HDFS [tpcds_100_parquet.store_returns b]                      
>                 |
> | |  |     partitions=681/2004 files=9532 size=700.40MB                       
>                 |
> | |  |                                                                        
>                 |
> | |  108:EXCHANGE [HASH(a.ss_item_sk,a.ss_ticket_number)]                     
>                 |
> | |  |                                                                        
>                 |
> | |  40:SCAN HDFS [tpcds_100_parquet.store_sales a]                           
>                 |
> | |     partitions=37168/37168 files=485278 size=2.80GB                       
>                 |
> | |     runtime filters: RF021 -> a.ss_ticket_number, RF020 -> a.ss_item_sk   
>                 |
> | |                                                                           
>                 |
> | 56:NESTED LOOP JOIN [CROSS JOIN, BROADCAST]                                 
>                 |
> | |                                                                           
>                 |
> | |--107:EXCHANGE [BROADCAST]                                                 
>                 |
> | |  |                                                                        
>                 |
> | |  106:AGGREGATE [FINALIZE]                                                 
>                 |
> | |  |  output: count:merge(*)                                                
>                 |
> | |  |  group by: a.ss_ticket_number                                          
>                 |
> | |  |  having: count(*) > 9999999999                                         
>                 |
> | |  |                                                                        
>                 |
> | |  105:EXCHANGE [HASH(a.ss_ticket_number)]                                  
>                 |
> | |  |                                                                        
>                 |
> | |  39:AGGREGATE [STREAMING]                                                 
>                 |
> | |  |  output: count(*)                                                      
>                 |
> | |  |  group by: a.ss_ticket_number                                          
>                 |
> | |  |                                                                        
>                 |
> | |  38:HASH JOIN [INNER JOIN, PARTITIONED]                                   
>                 |
> | |  |  hash predicates: a.ss_item_sk = b.sr_item_sk, a.ss_ticket_number = 
> b.sr_ticket_number |
> | |  |  runtime filters: RF019 <- b.sr_ticket_number, RF018 <- b.sr_item_sk   
>                 |
> | |  |                                                                        
>                 |
> | |  |--104:EXCHANGE [HASH(b.sr_item_sk,b.sr_ticket_number)]                  
>                 |
> | |  |  |                                                                     
>                 |
> | |  |  37:SCAN HDFS [tpcds_100_parquet.store_returns b]                      
>                 |
> | |  |     partitions=681/2004 files=9532 size=700.40MB                       
>                 |
> | |  |                                                                        
>                 |
> | |  103:EXCHANGE [HASH(a.ss_item_sk,a.ss_ticket_number)]                     
>                 |
> | |  |                                                                        
>                 |
> | |  36:SCAN HDFS [tpcds_100_parquet.store_sales a]                           
>                 |
> | |     partitions=37168/37168 files=485278 size=2.80GB                       
>                 |
> | |     runtime filters: RF019 -> a.ss_ticket_number, RF018 -> a.ss_item_sk   
>                 |
> | |                                                                           
>                 |
> | 55:NESTED LOOP JOIN [CROSS JOIN, BROADCAST]                                 
>                 |
> | |                                                                           
>                 |
> | |--102:EXCHANGE [BROADCAST]                                                 
>                 |
> | |  |                                                                        
>                 |
> | |  101:AGGREGATE [FINALIZE]                                                 
>                 |
> | |  |  output: count:merge(*)                                                
>                 |
> | |  |  group by: a.ss_ticket_number                                          
>                 |
> | |  |  having: count(*) > 9999999999                                         
>                 |
> | |  |                                                                        
>                 |
> | |  100:EXCHANGE [HASH(a.ss_ticket_number)]                                  
>                 |
> | |  |                                                                        
>                 |
> | |  35:AGGREGATE [STREAMING]                                                 
>                 |
> | |  |  output: count(*)                                                      
>                 |
> | |  |  group by: a.ss_ticket_number                                          
>                 |
> | |  |                                                                        
>                 |
> | |  34:HASH JOIN [INNER JOIN, PARTITIONED]                                   
>                 |
> | |  |  hash predicates: a.ss_item_sk = b.sr_item_sk, a.ss_ticket_number = 
> b.sr_ticket_number |
> | |  |  runtime filters: RF017 <- b.sr_ticket_number, RF016 <- b.sr_item_sk   
>                 |
> | |  |                                                                        
>                 |
> | |  |--99:EXCHANGE [HASH(b.sr_item_sk,b.sr_ticket_number)]                   
>                 |
> | |  |  |                                                                     
>                 |
> | |  |  33:SCAN HDFS [tpcds_100_parquet.store_returns b]                      
>                 |
> | |  |     partitions=681/2004 files=9532 size=700.40MB                       
>                 |
> | |  |                                                                        
>                 |
> | |  98:EXCHANGE [HASH(a.ss_item_sk,a.ss_ticket_number)]                      
>                 |
> | |  |                                                                        
>                 |
> | |  32:SCAN HDFS [tpcds_100_parquet.store_sales a]                           
>                 |
> | |     partitions=37168/37168 files=485278 size=2.80GB                       
>                 |
> | |     runtime filters: RF017 -> a.ss_ticket_number, RF016 -> a.ss_item_sk   
>                 |
> | |                                                                           
>                 |
> | 54:NESTED LOOP JOIN [CROSS JOIN, BROADCAST]                                 
>                 |
> | |                                                                           
>                 |
> | |--97:EXCHANGE [BROADCAST]                                                  
>                 |
> | |  |                                                                        
>                 |
> | |  96:AGGREGATE [FINALIZE]                                                  
>                 |
> | |  |  output: count:merge(*)                                                
>                 |
> | |  |  group by: a.ss_ticket_number                                          
>                 |
> | |  |  having: count(*) > 9999999999                                         
>                 |
> | |  |                                                                        
>                 |
> | |  95:EXCHANGE [HASH(a.ss_ticket_number)]                                   
>                 |
> | |  |                                                                        
>                 |
> | |  31:AGGREGATE [STREAMING]                                                 
>                 |
> | |  |  output: count(*)                                                      
>                 |
> | |  |  group by: a.ss_ticket_number                                          
>                 |
> | |  |                                                                        
>                 |
> | |  30:HASH JOIN [INNER JOIN, PARTITIONED]                                   
>                 |
> | |  |  hash predicates: a.ss_item_sk = b.sr_item_sk, a.ss_ticket_number = 
> b.sr_ticket_number |
> | |  |  runtime filters: RF014 <- b.sr_item_sk, RF015 <- b.sr_ticket_number   
>                 |
> | |  |                                                                        
>                 |
> | |  |--94:EXCHANGE [HASH(b.sr_item_sk,b.sr_ticket_number)]                   
>                 |
> | |  |  |                                                                     
>                 |
> | |  |  29:SCAN HDFS [tpcds_100_parquet.store_returns b]                      
>                 |
> | |  |     partitions=681/2004 files=9532 size=700.40MB                       
>                 |
> | |  |                                                                        
>                 |
> | |  93:EXCHANGE [HASH(a.ss_item_sk,a.ss_ticket_number)]                      
>                 |
> | |  |                                                                        
>                 |
> | |  28:SCAN HDFS [tpcds_100_parquet.store_sales a]                           
>                 |
> | |     partitions=37168/37168 files=485278 size=2.80GB                       
>                 |
> | |     runtime filters: RF014 -> a.ss_item_sk, RF015 -> a.ss_ticket_number   
>                 |
> | |                                                                           
>                 |
> | 53:NESTED LOOP JOIN [CROSS JOIN, BROADCAST]                                 
>                 |
> | |                                                                           
>                 |
> | |--92:EXCHANGE [BROADCAST]                                                  
>                 |
> | |  |                                                                        
>                 |
> | |  91:AGGREGATE [FINALIZE]                                                  
>                 |
> | |  |  output: count:merge(*)                                                
>                 |
> | |  |  group by: a.ss_ticket_number                                          
>                 |
> | |  |  having: count(*) > 9999999999                                         
>                 |
> | |  |                                                                        
>                 |
> | |  90:EXCHANGE [HASH(a.ss_ticket_number)]                                   
>                 |
> | |  |                                                                        
>                 |
> | |  27:AGGREGATE [STREAMING]                                                 
>                 |
> | |  |  output: count(*)                                                      
>                 |
> | |  |  group by: a.ss_ticket_number                                          
>                 |
> | |  |                                                                        
>                 |
> | |  26:HASH JOIN [INNER JOIN, PARTITIONED]                                   
>                 |
> | |  |  hash predicates: a.ss_item_sk = b.sr_item_sk, a.ss_ticket_number = 
> b.sr_ticket_number |
> | |  |  runtime filters: RF012 <- b.sr_item_sk, RF013 <- b.sr_ticket_number   
>                 |
> | |  |                                                                        
>                 |
> | |  |--89:EXCHANGE [HASH(b.sr_item_sk,b.sr_ticket_number)]                   
>                 |
> | |  |  |                                                                     
>                 |
> | |  |  25:SCAN HDFS [tpcds_100_parquet.store_returns b]                      
>                 |
> | |  |     partitions=681/2004 files=9532 size=700.40MB                       
>                 |
> | |  |                                                                        
>                 |
> | |  88:EXCHANGE [HASH(a.ss_item_sk,a.ss_ticket_number)]                      
>                 |
> | |  |                                                                        
>                 |
> | |  24:SCAN HDFS [tpcds_100_parquet.store_sales a]                           
>                 |
> | |     partitions=37168/37168 files=485278 size=2.80GB                       
>                 |
> | |     runtime filters: RF012 -> a.ss_item_sk, RF013 -> a.ss_ticket_number   
>                 |
> | |                                                                           
>                 |
> | 52:NESTED LOOP JOIN [CROSS JOIN, BROADCAST]                                 
>                 |
> | |                                                                           
>                 |
> | |--87:EXCHANGE [BROADCAST]                                                  
>                 |
> | |  |                                                                        
>                 |
> | |  86:AGGREGATE [FINALIZE]                                                  
>                 |
> | |  |  output: count:merge(*)                                                
>                 |
> | |  |  group by: a.ss_ticket_number                                          
>                 |
> | |  |  having: count(*) > 9999999999                                         
>                 |
> | |  |                                                                        
>                 |
> | |  85:EXCHANGE [HASH(a.ss_ticket_number)]                                   
>                 |
> | |  |                                                                        
>                 |
> | |  23:AGGREGATE [STREAMING]                                                 
>                 |
> | |  |  output: count(*)                                                      
>                 |
> | |  |  group by: a.ss_ticket_number                                          
>                 |
> | |  |                                                                        
>                 |
> | |  22:HASH JOIN [INNER JOIN, PARTITIONED]                                   
>                 |
> | |  |  hash predicates: a.ss_item_sk = b.sr_item_sk, a.ss_ticket_number = 
> b.sr_ticket_number |
> | |  |  runtime filters: RF010 <- b.sr_item_sk, RF011 <- b.sr_ticket_number   
>                 |
> | |  |                                                                        
>                 |
> | |  |--84:EXCHANGE [HASH(b.sr_item_sk,b.sr_ticket_number)]                   
>                 |
> | |  |  |                                                                     
>                 |
> | |  |  21:SCAN HDFS [tpcds_100_parquet.store_returns b]                      
>                 |
> | |  |     partitions=681/2004 files=9532 size=700.40MB                       
>                 |
> | |  |                                                                        
>                 |
> | |  83:EXCHANGE [HASH(a.ss_item_sk,a.ss_ticket_number)]                      
>                 |
> | |  |                                                                        
>                 |
> | |  20:SCAN HDFS [tpcds_100_parquet.store_sales a]                           
>                 |
> | |     partitions=37168/37168 files=485278 size=2.80GB                       
>                 |
> | |     runtime filters: RF010 -> a.ss_item_sk, RF011 -> a.ss_ticket_number   
>                 |
> | |                                                                           
>                 |
> | 51:NESTED LOOP JOIN [CROSS JOIN, BROADCAST]                                 
>                 |
> | |                                                                           
>                 |
> | |--82:EXCHANGE [BROADCAST]                                                  
>                 |
> | |  |                                                                        
>                 |
> | |  81:AGGREGATE [FINALIZE]                                                  
>                 |
> | |  |  output: count:merge(*)                                                
>                 |
> | |  |  group by: a.ss_ticket_number                                          
>                 |
> | |  |  having: count(*) > 9999999999                                         
>                 |
> | |  |                                                                        
>                 |
> | |  80:EXCHANGE [HASH(a.ss_ticket_number)]                                   
>                 |
> | |  |                                                                        
>                 |
> | |  19:AGGREGATE [STREAMING]                                                 
>                 |
> | |  |  output: count(*)                                                      
>                 |
> | |  |  group by: a.ss_ticket_number                                          
>                 |
> | |  |                                                                        
>                 |
> | |  18:HASH JOIN [INNER JOIN, PARTITIONED]                                   
>                 |
> | |  |  hash predicates: a.ss_item_sk = b.sr_item_sk, a.ss_ticket_number = 
> b.sr_ticket_number |
> | |  |  runtime filters: RF008 <- b.sr_item_sk, RF009 <- b.sr_ticket_number   
>                 |
> | |  |                                                                        
>                 |
> | |  |--79:EXCHANGE [HASH(b.sr_item_sk,b.sr_ticket_number)]                   
>                 |
> | |  |  |                                                                     
>                 |
> | |  |  17:SCAN HDFS [tpcds_100_parquet.store_returns b]                      
>                 |
> | |  |     partitions=681/2004 files=9532 size=700.40MB                       
>                 |
> | |  |                                                                        
>                 |
> | |  78:EXCHANGE [HASH(a.ss_item_sk,a.ss_ticket_number)]                      
>                 |
> | |  |                                                                        
>                 |
> | |  16:SCAN HDFS [tpcds_100_parquet.store_sales a]                           
>                 |
> | |     partitions=37168/37168 files=485278 size=2.80GB                       
>                 |
> | |     runtime filters: RF008 -> a.ss_item_sk, RF009 -> a.ss_ticket_number   
>                 |
> | |                                                                           
>                 |
> | 50:NESTED LOOP JOIN [CROSS JOIN, BROADCAST]                                 
>                 |
> | |                                                                           
>                 |
> | |--77:EXCHANGE [BROADCAST]                                                  
>                 |
> | |  |                                                                        
>                 |
> | |  76:AGGREGATE [FINALIZE]                                                  
>                 |
> | |  |  output: count:merge(*)                                                
>                 |
> | |  |  group by: a.ss_ticket_number                                          
>                 |
> | |  |  having: count(*) > 9999999999                                         
>                 |
> | |  |                                                                        
>                 |
> | |  75:EXCHANGE [HASH(a.ss_ticket_number)]                                   
>                 |
> | |  |                                                                        
>                 |
> | |  15:AGGREGATE [STREAMING]                                                 
>                 |
> | |  |  output: count(*)                                                      
>                 |
> | |  |  group by: a.ss_ticket_number                                          
>                 |
> | |  |                                                                        
>                 |
> | |  14:HASH JOIN [INNER JOIN, PARTITIONED]                                   
>                 |
> | |  |  hash predicates: a.ss_item_sk = b.sr_item_sk, a.ss_ticket_number = 
> b.sr_ticket_number |
> | |  |  runtime filters: RF006 <- b.sr_item_sk, RF007 <- b.sr_ticket_number   
>                 |
> | |  |                                                                        
>                 |
> | |  |--74:EXCHANGE [HASH(b.sr_item_sk,b.sr_ticket_number)]                   
>                 |
> | |  |  |                                                                     
>                 |
> | |  |  13:SCAN HDFS [tpcds_100_parquet.store_returns b]                      
>                 |
> | |  |     partitions=681/2004 files=9532 size=700.40MB                       
>                 |
> | |  |                                                                        
>                 |
> | |  73:EXCHANGE [HASH(a.ss_item_sk,a.ss_ticket_number)]                      
>                 |
> | |  |                                                                        
>                 |
> | |  12:SCAN HDFS [tpcds_100_parquet.store_sales a]                           
>                 |
> | |     partitions=37168/37168 files=485278 size=2.80GB                       
>                 |
> | |     runtime filters: RF006 -> a.ss_item_sk, RF007 -> a.ss_ticket_number   
>                 |
> | |                                                                           
>                 |
> | 49:NESTED LOOP JOIN [CROSS JOIN, BROADCAST]                                 
>                 |
> | |                                                                           
>                 |
> | |--72:EXCHANGE [BROADCAST]                                                  
>                 |
> | |  |                                                                        
>                 |
> | |  71:AGGREGATE [FINALIZE]                                                  
>                 |
> | |  |  output: count:merge(*)                                                
>                 |
> | |  |  group by: a.ss_ticket_number                                          
>                 |
> | |  |  having: count(*) > 9999999999                                         
>                 |
> | |  |                                                                        
>                 |
> | |  70:EXCHANGE [HASH(a.ss_ticket_number)]                                   
>                 |
> | |  |                                                                        
>                 |
> | |  11:AGGREGATE [STREAMING]                                                 
>                 |
> | |  |  output: count(*)                                                      
>                 |
> | |  |  group by: a.ss_ticket_number                                          
>                 |
> | |  |                                                                        
>                 |
> | |  10:HASH JOIN [INNER JOIN, PARTITIONED]                                   
>                 |
> | |  |  hash predicates: a.ss_item_sk = b.sr_item_sk, a.ss_ticket_number = 
> b.sr_ticket_number |
> | |  |  runtime filters: RF004 <- b.sr_item_sk, RF005 <- b.sr_ticket_number   
>                 |
> | |  |                                                                        
>                 |
> | |  |--69:EXCHANGE [HASH(b.sr_item_sk,b.sr_ticket_number)]                   
>                 |
> | |  |  |                                                                     
>                 |
> | |  |  09:SCAN HDFS [tpcds_100_parquet.store_returns b]                      
>                 |
> | |  |     partitions=681/2004 files=9532 size=700.40MB                       
>                 |
> | |  |                                                                        
>                 |
> | |  68:EXCHANGE [HASH(a.ss_item_sk,a.ss_ticket_number)]                      
>                 |
> | |  |                                                                        
>                 |
> | |  08:SCAN HDFS [tpcds_100_parquet.store_sales a]                           
>                 |
> | |     partitions=37168/37168 files=485278 size=2.80GB                       
>                 |
> | |     runtime filters: RF004 -> a.ss_item_sk, RF005 -> a.ss_ticket_number   
>                 |
> | |                                                                           
>                 |
> | 48:NESTED LOOP JOIN [CROSS JOIN, BROADCAST]                                 
>                 |
> | |                                                                           
>                 |
> | |--67:EXCHANGE [BROADCAST]                                                  
>                 |
> | |  |                                                                        
>                 |
> | |  66:AGGREGATE [FINALIZE]                                                  
>                 |
> | |  |  output: count:merge(*)                                                
>                 |
> | |  |  group by: a.ss_ticket_number                                          
>                 |
> | |  |  having: count(*) > 9999999999                                         
>                 |
> | |  |                                                                        
>                 |
> | |  65:EXCHANGE [HASH(a.ss_ticket_number)]                                   
>                 |
> | |  |                                                                        
>                 |
> | |  07:AGGREGATE [STREAMING]                                                 
>                 |
> | |  |  output: count(*)                                                      
>                 |
> | |  |  group by: a.ss_ticket_number                                          
>                 |
> | |  |                                                                        
>                 |
> | |  06:HASH JOIN [INNER JOIN, PARTITIONED]                                   
>                 |
> | |  |  hash predicates: a.ss_item_sk = b.sr_item_sk, a.ss_ticket_number = 
> b.sr_ticket_number |
> | |  |  runtime filters: RF002 <- b.sr_item_sk, RF003 <- b.sr_ticket_number   
>                 |
> | |  |                                                                        
>                 |
> | |  |--64:EXCHANGE [HASH(b.sr_item_sk,b.sr_ticket_number)]                   
>                 |
> | |  |  |                                                                     
>                 |
> | |  |  05:SCAN HDFS [tpcds_100_parquet.store_returns b]                      
>                 |
> | |  |     partitions=681/2004 files=9532 size=700.40MB                       
>                 |
> | |  |                                                                        
>                 |
> | |  63:EXCHANGE [HASH(a.ss_item_sk,a.ss_ticket_number)]                      
>                 |
> | |  |                                                                        
>                 |
> | |  04:SCAN HDFS [tpcds_100_parquet.store_sales a]                           
>                 |
> | |     partitions=37168/37168 files=485278 size=2.80GB                       
>                 |
> | |     runtime filters: RF002 -> a.ss_item_sk, RF003 -> a.ss_ticket_number   
>                 |
> | |                                                                           
>                 |
> | 62:AGGREGATE [FINALIZE]                                                     
>                 |
> | |  output: count:merge(*)                                                   
>                 |
> | |  group by: a.ss_ticket_number                                             
>                 |
> | |  having: count(*) > 9999999999                                            
>                 |
> | |                                                                           
>                 |
> | 61:EXCHANGE [HASH(a.ss_ticket_number)]                                      
>                 |
> | |                                                                           
>                 |
> | 03:AGGREGATE [STREAMING]                                                    
>                 |
> | |  output: count(*)                                                         
>                 |
> | |  group by: a.ss_ticket_number                                             
>                 |
> | |                                                                           
>                 |
> | 02:HASH JOIN [INNER JOIN, PARTITIONED]                                      
>                 |
> | |  hash predicates: a.ss_item_sk = b.sr_item_sk, a.ss_ticket_number = 
> b.sr_ticket_number    |
> | |  runtime filters: RF000 <- b.sr_item_sk, RF001 <- b.sr_ticket_number      
>                 |
> | |                                                                           
>                 |
> | |--60:EXCHANGE [HASH(b.sr_item_sk,b.sr_ticket_number)]                      
>                 |
> | |  |                                                                        
>                 |
> | |  01:SCAN HDFS [tpcds_100_parquet.store_returns b]                         
>                 |
> | |     partitions=681/2004 files=9532 size=700.40MB                          
>                 |
> | |                                                                           
>                 |
> | 59:EXCHANGE [HASH(a.ss_item_sk,a.ss_ticket_number)]                         
>                 |
> | |                                                                           
>                 |
> | 00:SCAN HDFS [tpcds_100_parquet.store_sales a]                              
>                 |
> |    partitions=37168/37168 files=485278 size=2.80GB                          
>                 |
> |    runtime filters: RF000 -> a.ss_item_sk, RF001 -> a.ss_ticket_number      
>                 
> {code}
> Query
> {code}
> select * from 
> (select /* +straight_join */  count(*) 
> from store_sales a join   /* +shuffle */ 
>      store_returns b on 
> a.ss_item_sk = b.sr_item_sk 
>    where a.ss_ticket_number = b.sr_ticket_number and ss_sold_date_sk between 
> 2450816 and 2451500  and sr_returned_date_sk between 2450816 and 2451500
>    group by a.ss_ticket_number 
>    having count(*) > 9999999999) a1,
> (select /* +straight_join */  count(*) 
> from store_sales a join   /* +shuffle */ 
>      store_returns b on 
> a.ss_item_sk = b.sr_item_sk 
>    where a.ss_ticket_number = b.sr_ticket_number and ss_sold_date_sk between 
> 2450816 and 2451500  and sr_returned_date_sk between 2450816 and 2451500
>    group by a.ss_ticket_number 
>    having count(*) > 9999999999) a2,
>    (select /* +straight_join */  count(*) 
> from store_sales a join   /* +shuffle */ 
>      store_returns b on 
> a.ss_item_sk = b.sr_item_sk 
>    where a.ss_ticket_number = b.sr_ticket_number and ss_sold_date_sk between 
> 2450816 and 2451500  and sr_returned_date_sk between 2450816 and 2451500
>    group by a.ss_ticket_number 
>    having count(*) > 9999999999) a3,
>    (select /* +straight_join */  count(*) 
> from store_sales a join   /* +shuffle */ 
>      store_returns b on 
> a.ss_item_sk = b.sr_item_sk 
>    where a.ss_ticket_number = b.sr_ticket_number and ss_sold_date_sk between 
> 2450816 and 2451500  and sr_returned_date_sk between 2450816 and 2451500
>    group by a.ss_ticket_number 
>    having count(*) > 9999999999) a4,
>    (select /* +straight_join */  count(*) 
> from store_sales a join   /* +shuffle */ 
>      store_returns b on 
> a.ss_item_sk = b.sr_item_sk 
>    where a.ss_ticket_number = b.sr_ticket_number and ss_sold_date_sk between 
> 2450816 and 2451500  and sr_returned_date_sk between 2450816 and 2451500
>    group by a.ss_ticket_number 
>    having count(*) > 9999999999) a5,
>    (select /* +straight_join */  count(*) 
> from store_sales a join   /* +shuffle */ 
>      store_returns b on 
> a.ss_item_sk = b.sr_item_sk 
>    where a.ss_ticket_number = b.sr_ticket_number and ss_sold_date_sk between 
> 2450816 and 2451500  and sr_returned_date_sk between 2450816 and 2451500
>    group by a.ss_ticket_number 
>    having count(*) > 9999999999) a6,
>    (select /* +straight_join */  count(*) 
> from store_sales a join   /* +shuffle */ 
>      store_returns b on 
> a.ss_item_sk = b.sr_item_sk 
>    where a.ss_ticket_number = b.sr_ticket_number and ss_sold_date_sk between 
> 2450816 and 2451500  and sr_returned_date_sk between 2450816 and 2451500
>    group by a.ss_ticket_number 
>    having count(*) > 9999999999) a7,
>    (select /* +straight_join */  count(*) 
> from store_sales a join   /* +shuffle */ 
>      store_returns b on 
> a.ss_item_sk = b.sr_item_sk 
>    where a.ss_ticket_number = b.sr_ticket_number and ss_sold_date_sk between 
> 2450816 and 2451500  and sr_returned_date_sk between 2450816 and 2451500
>    group by a.ss_ticket_number 
>    having count(*) > 9999999999) a8,
>    (select /* +straight_join */  count(*) 
> from store_sales a join   /* +shuffle */ 
>      store_returns b on 
> a.ss_item_sk = b.sr_item_sk 
>    where a.ss_ticket_number = b.sr_ticket_number and ss_sold_date_sk between 
> 2450816 and 2451500  and sr_returned_date_sk between 2450816 and 2451500
>    group by a.ss_ticket_number 
>    having count(*) > 9999999999) a9,
>    (select /* +straight_join */  count(*) 
> from store_sales a join   /* +shuffle */ 
>      store_returns b on 
> a.ss_item_sk = b.sr_item_sk 
>    where a.ss_ticket_number = b.sr_ticket_number and ss_sold_date_sk between 
> 2450816 and 2451500  and sr_returned_date_sk between 2450816 and 2451500
>    group by a.ss_ticket_number 
>    having count(*) > 9999999999) a10,
>    (select /* +straight_join */  count(*) 
> from store_sales a join   /* +shuffle */ 
>      store_returns b on 
> a.ss_item_sk = b.sr_item_sk 
>    where a.ss_ticket_number = b.sr_ticket_number and ss_sold_date_sk between 
> 2450816 and 2451500  and sr_returned_date_sk between 2450816 and 2451500
>    group by a.ss_ticket_number 
>    having count(*) > 9999999999) a11,
>    (select /* +straight_join */  count(*) 
> from store_sales a join   /* +shuffle */ 
>      store_returns b on 
> a.ss_item_sk = b.sr_item_sk 
>    where a.ss_ticket_number = b.sr_ticket_number and ss_sold_date_sk between 
> 2450816 and 2451500  and sr_returned_date_sk between 2450816 and 2451500
>    group by a.ss_ticket_number 
>    having count(*) > 9999999999) a12
> {code}



--
This message was sent by Atlassian Jira
(v8.20.10#820010)

---------------------------------------------------------------------
To unsubscribe, e-mail: [email protected]
For additional commands, e-mail: [email protected]


Reply via email to