[ https://issues.apache.org/jira/browse/HIVE-24357?focusedWorklogId=511223&page=com.atlassian.jira.plugin.system.issuetabpanels:worklog-tabpanel#worklog-511223 ]
ASF GitHub Bot logged work on HIVE-24357: ----------------------------------------- Author: ASF GitHub Bot Created on: 13/Nov/20 06:39 Start Date: 13/Nov/20 06:39 Worklog Time Spent: 10m Work Description: jcamachor commented on a change in pull request #1653: URL: https://github.com/apache/hive/pull/1653#discussion_r522678529 ########## File path: ql/src/test/results/clientpositive/perf/tez/query61.q.out ########## @@ -165,70 +167,81 @@ Stage-0 SHUFFLE [RS_38] PartitionCols:_col2 Merge Join Operator [MERGEJOIN_256] (rows=2526982 width=0) - Conds:RS_30._col4=RS_290._col0(Inner),Output:["_col2","_col5"] - <-Map 20 [SIMPLE_EDGE] vectorized - SHUFFLE [RS_290] + Conds:RS_30._col4=RS_295._col0(Inner),Output:["_col2","_col5"] + <-Map 21 [SIMPLE_EDGE] vectorized + SHUFFLE [RS_295] PartitionCols:_col0 - Select Operator [SEL_289] (rows=2300 width=4) + Select Operator [SEL_294] (rows=2300 width=4) Output:["_col0"] - Filter Operator [FIL_288] (rows=2300 width=259) + Filter Operator [FIL_293] (rows=2300 width=259) predicate:(((p_channel_dmail = 'Y') or (p_channel_email = 'Y') or (p_channel_tv = 'Y')) and p_promo_sk is not null) TableScan [TS_18] (rows=2300 width=259) default@promotion,promotion,Tbl:COMPLETE,Col:COMPLETE,Output:["p_promo_sk","p_channel_dmail","p_channel_email","p_channel_tv"] <-Reducer 12 [SIMPLE_EDGE] SHUFFLE [RS_30] PartitionCols:_col4 Merge Join Operator [MERGEJOIN_255] (rows=2526982 width=0) - Conds:RS_27._col3=RS_286._col0(Inner),Output:["_col2","_col4","_col5"] - <-Map 19 [SIMPLE_EDGE] vectorized - SHUFFLE [RS_286] + Conds:RS_27._col3=RS_291._col0(Inner),Output:["_col2","_col4","_col5"] + <-Map 20 [SIMPLE_EDGE] vectorized + SHUFFLE [RS_291] PartitionCols:_col0 - Select Operator [SEL_285] (rows=341 width=4) + Select Operator [SEL_290] (rows=341 width=4) Output:["_col0"] - Filter Operator [FIL_284] (rows=341 width=115) + Filter Operator [FIL_289] (rows=341 width=115) predicate:((s_gmt_offset = -7) and s_store_sk is not null) TableScan [TS_15] (rows=1704 width=115) default@store,store,Tbl:COMPLETE,Col:COMPLETE,Output:["s_store_sk","s_gmt_offset"] <-Reducer 11 [SIMPLE_EDGE] SHUFFLE [RS_27] PartitionCols:_col3 Merge Join Operator [MERGEJOIN_254] (rows=12627499 width=0) - Conds:RS_24._col1=RS_282._col0(Inner),Output:["_col2","_col3","_col4","_col5"] - <-Map 18 [SIMPLE_EDGE] vectorized - SHUFFLE [RS_282] + Conds:RS_24._col1=RS_287._col0(Inner),Output:["_col2","_col3","_col4","_col5"] + <-Map 19 [SIMPLE_EDGE] vectorized + SHUFFLE [RS_287] PartitionCols:_col0 - Select Operator [SEL_281] (rows=46200 width=4) + Select Operator [SEL_286] (rows=46200 width=4) Output:["_col0"] - Filter Operator [FIL_280] (rows=46200 width=94) + Filter Operator [FIL_285] (rows=46200 width=94) predicate:((i_category = 'Electronics') and i_item_sk is not null) TableScan [TS_12] (rows=462000 width=94) default@item,item,Tbl:COMPLETE,Col:COMPLETE,Output:["i_item_sk","i_category"] <-Reducer 10 [SIMPLE_EDGE] SHUFFLE [RS_24] PartitionCols:_col1 Merge Join Operator [MERGEJOIN_253] (rows=13119234 width=4) - Conds:RS_274._col0=RS_278._col0(Inner),Output:["_col1","_col2","_col3","_col4","_col5"] + Conds:RS_283._col0=RS_272._col0(Inner),Output:["_col1","_col2","_col3","_col4","_col5"] <-Map 17 [SIMPLE_EDGE] vectorized - SHUFFLE [RS_278] + PARTITION_ONLY_SHUFFLE [RS_272] PartitionCols:_col0 - Select Operator [SEL_277] (rows=50 width=4) + Select Operator [SEL_271] (rows=50 width=4) Output:["_col0"] - Filter Operator [FIL_276] (rows=50 width=12) + Filter Operator [FIL_270] (rows=50 width=12) predicate:((d_year = 1999) and (d_moy = 11) and d_date_sk is not null) TableScan [TS_9] (rows=73049 width=12) default@date_dim,date_dim,Tbl:COMPLETE,Col:COMPLETE,Output:["d_date_sk","d_year","d_moy"] <-Map 9 [SIMPLE_EDGE] vectorized - SHUFFLE [RS_274] + SHUFFLE [RS_283] PartitionCols:_col0 - Select Operator [SEL_272] (rows=479120969 width=126) + Select Operator [SEL_281] (rows=479120969 width=126) Output:["_col0","_col1","_col2","_col3","_col4","_col5"] - Filter Operator [FIL_270] (rows=479120969 width=126) - predicate:(ss_sold_date_sk is not null and ss_promo_sk is not null and ss_customer_sk is not null and ss_store_sk is not null and ss_item_sk is not null) + Filter Operator [FIL_279] (rows=479120969 width=126) + predicate:(ss_sold_date_sk is not null and ss_promo_sk is not null and ss_customer_sk is not null and ss_store_sk is not null and ss_item_sk is not null and ss_sold_date_sk BETWEEN DynamicValue(RS_22_date_dim_d_date_sk_min) AND DynamicValue(RS_22_date_dim_d_date_sk_max) and in_bloom_filter(ss_sold_date_sk, DynamicValue(RS_22_date_dim_d_date_sk_bloom_filter)) and ((ss_sold_date_sk is not null and ss_promo_sk is not null and ss_customer_sk is not null and ss_store_sk is not null and ss_item_sk is not null) or (ss_sold_date_sk is not null and ss_customer_sk is not null and ss_store_sk is not null and ss_item_sk is not null))) TableScan [TS_6] (rows=575995635 width=126) default@store_sales,store_sales,Tbl:COMPLETE,Col:COMPLETE,Output:["ss_sold_date_sk","ss_item_sk","ss_customer_sk","ss_store_sk","ss_promo_sk","ss_ext_sales_price"] + <-Reducer 18 [BROADCAST_EDGE] vectorized Review comment: Maybe it was because one TS was targeted by a SJ and the other one was not? ---------------------------------------------------------------- This is an automated message from the Apache Git Service. To respond to the message, please log on to GitHub and use the URL above to go to the specific comment. For queries about this service, please contact Infrastructure at: us...@infra.apache.org Issue Time Tracking ------------------- Worklog Id: (was: 511223) Time Spent: 40m (was: 0.5h) > Exchange SWO table/algorithm strategy > ------------------------------------- > > Key: HIVE-24357 > URL: https://issues.apache.org/jira/browse/HIVE-24357 > Project: Hive > Issue Type: Improvement > Reporter: Zoltan Haindrich > Assignee: Zoltan Haindrich > Priority: Major > Labels: pull-request-available > Attachments: swo.before.jointree.dot.png > > Time Spent: 40m > Remaining Estimate: 0h > > SWO right now runs like: > {code} > for every strategy s: for every table t: try s for t > {code} > this results in that an earlier startegy may create a more entangled operator > tree behind - in case its able to merge for a less prioritized table > it would probably make more sense to do: > {code} > for every table t: for every strategy s: try s for t > {code} -- This message was sent by Atlassian Jira (v8.3.4#803005)