[ https://issues.apache.org/jira/browse/HIVE-25614?page=com.atlassian.jira.plugin.system.issuetabpanels:all-tabpanel ]
zengxl updated HIVE-25614: -------------------------- Description: Currently I join 3 tables, find the result of left join is *{color:#de350b}null{color}* Here is my SQL,The result of this SQL is NULL {code:java} //代码占位符 CREATE TABLE `pdwd.pdwd_pf_hive_ah3_metastore_tbl_partitions_d_test_0831_3_tbl`( `tbl_id` bigint COMMENT 'TBL_ID', `tbl_create_time` bigint COMMENT 'TBL_CREATE_TIME', `db_id` bigint COMMENT 'DB_ID', `tbl_last_access_time` bigint COMMENT 'TBL_LAST_ACCESS_TIME', `owner` string COMMENT 'OWNER', `retention` bigint COMMENT 'RETENTION', `sd_id` bigint COMMENT 'SD_ID', `tbl_name` string COMMENT 'TBL_NAME', `tbl_type` string COMMENT 'TBL_TYPE', `view_expanded_text` string COMMENT 'VIEW_EXPANDED_TEXT', `view_original_text` string COMMENT 'VIEW_ORIGINAL_TEXT', `is_rewrite_enabled` bigint COMMENT 'IS_REWRITE_ENABLED', `tbl_owner_type` string COMMENT 'TBL_OWNER_TYPE', `cd_id` bigint COMMENT 'CD_ID', `input_format` string COMMENT 'INPUT_FORMAT', `is_compressed` bigint COMMENT 'IS_COMPRESSED', `is_storedassubdirectories` bigint COMMENT 'IS_STOREDASSUBDIRECTORIES', `tbl_or_part_location` string COMMENT 'tbl_or_part_location', `num_buckets` bigint COMMENT 'NUM_BUCKETS', `output_format` string COMMENT 'OUTPUT_FORMAT', `serde_id` bigint COMMENT 'SERDE_ID', `part_id` bigint COMMENT 'PART_ID', `part_create_time` bigint COMMENT 'PART_CREATE_TIME', `part_last_access_time` bigint COMMENT 'PART_LAST_ACCESS_TIME', `part_name` string COMMENT 'PART_NAME') PARTITIONED BY ( `pt` string) ROW FORMAT SERDE 'org.apache.hadoop.hive.ql.io.orc.OrcSerde' STORED AS INPUTFORMAT 'org.apache.hadoop.hive.ql.io.orc.OrcInputFormat' OUTPUTFORMAT 'org.apache.hadoop.hive.ql.io.orc.OrcOutputFormat' with tmp1 as ( select part_id, create_time, last_access_time, part_name, sd_id, tbl_id from pods.pods_pf_hive_ah3_metastore_partitions_d where pt='2021-08-12' ), tmp2 as ( select tbl_id, create_time, db_id, last_access_time, owner, retention, sd_id, tbl_name, tbl_type, view_expanded_text, view_original_text, is_rewrite_enabled, owner_type from pods.pods_pf_hive_ah3_metastore_tbls_d where pt='2021-08-12' ), tmp3 as ( select sd_id, cd_id, input_format, is_compressed, is_storedassubdirectories, location, num_buckets, output_format, serde_id from pods.pods_pf_hive_ah3_metastore_sds_d where pt='2021-08-12' )insert overwrite table pdwd.pdwd_pf_hive_ah3_metastore_tbl_partitions_d_test_0831_3_tbl PARTITION(pt='2021-08-14') select a.tbl_id, b.create_time as tbl_create_time, b.db_id, b.last_access_time as tbl_last_access_time, b.owner, b.retention, a.sd_id, b.tbl_name, b.tbl_type, b.view_expanded_text, b.view_original_text, b.is_rewrite_enabled, b.owner_type as tbl_owner_type, d.cd_id, d.input_format, d.is_compressed, d.is_storedassubdirectories, d.location as tbl_location, d.num_buckets, d.output_format, d.serde_id, a.part_id, a.create_time as part_create_time, a.last_access_time as part_last_access_time, a.part_name from tmp1 a left join tmp2 b on a.tbl_id=b.tbl_id left join tmp3 d on a.sd_id=d.sd_id; {code} pods.pods_pf_hive_ah3_metastore_partitions_d、pods.pods_pf_hive_ah3_metastore_tbls_d、pods.pods_pf_hive_ah3_metastore_sds_d from {color:#de350b}*Metastore*{color} partitions、tbls、sds The sizes of the three tables are as follows: 80.3 M 240.9 M hdfs://ctyunns4/apps/hive/warehouse/pods/pods_pf_hive_ah3_metastore_partitions_d/pt=2021-10-09/exchangis_hive_w__2585cbd4_8bf8_4fbb_8a90_f5a7939b62b3.snappy 179.8 K 539.5 K hdfs://ctyunns4/apps/hive/warehouse/pods/pods_pf_hive_ah3_metastore_tbls_d/pt=2021-10-09/exchangis_hive_w__8a62acaa_6f82_442e_97db_ce960833612f.snappy 94.3 M 282.9 M hdfs://ctyunns4/apps/hive/warehouse/pods/pods_pf_hive_ah3_metastore_sds_d/pt=2021-10-09/exchangis_hive_w__d25536e8_7018_4262_a00e_5af3b1f88925.snappy The result is as follows,select from pdwd.pdwd_pf_hive_ah3_metastore_tbl_partitions_d_test_0831_3_tbl table *{color:#de350b}is null{color}* {code:java} hive> select * from pdwd.pdwd_pf_hive_ah3_metastore_tbl_partitions_d_test_0831_3_tbl where pt='2021-08-14' and sd_id=21229815; OK 721213 1619403652 528181 0 op 0 21229815 pods_pf_yarn_logic_res_i_d MANAGED_TABLE NULL NULL 0 USER NULL NULL NULL NULL NULL NULL NULL NULL 20302818 1628697610 0 pt=2021-08-11 2021-08-14 {code} The reality pods.pods_pf_hive_ah3_metastore_sds_d table is that the data in this table is {color:#de350b}*not null*{color} {code:java} > select * from pods.pods_pf_hive_ah3_metastore_sds_d where pt='2021-08-12' and > sd_id=21229815; OK Interrupting... Be patient, this might take some time. Press Ctrl+C again to kill JVM 21229815 726105 org.apache.hadoop.hive.ql.io.orc.OrcInputFormat 0 0 hdfs://ctyunns4/apps/hive/warehouse/pods/pods_pf_yarn_logic_res_i_d/pt=2021-08-11 -org.apache.hadoop.hive.ql.io.orc.OrcOutputFormat 21229815 2021-08-12 {code} When I change the order of join,The following SQL: {code:java} with tmp1 as ( select part_id, create_time, last_access_time, part_name, sd_id, tbl_id from pods.pods_pf_hive_ah3_metastore_partitions_d where pt='2021-08-12' ), tmp2 as ( select tbl_id, create_time, db_id, last_access_time, owner, retention, sd_id, tbl_name, tbl_type, view_expanded_text, view_original_text, is_rewrite_enabled, owner_type from pods.pods_pf_hive_ah3_metastore_tbls_d where pt='2021-08-12' ), tmp3 as ( select sd_id, cd_id, input_format, is_compressed, is_storedassubdirectories, location, num_buckets, output_format, serde_id from pods.pods_pf_hive_ah3_metastore_sds_d where pt='2021-08-12' )insert overwrite table pdwd.pdwd_pf_hive_ah3_metastore_tbl_partitions_d_test_0831_3_tbl PARTITION(pt='2021-08-14') select a.tbl_id, b.create_time as tbl_create_time, b.db_id, b.last_access_time as tbl_last_access_time, b.owner, b.retention, a.sd_id, b.tbl_name, b.tbl_type, b.view_expanded_text, b.view_original_text, b.is_rewrite_enabled, b.owner_type as tbl_owner_type, d.cd_id, d.input_format, d.is_compressed, d.is_storedassubdirectories, d.location as tbl_location, d.num_buckets, d.output_format, d.serde_id, a.part_id, a.create_time as part_create_time, a.last_access_time as part_last_access_time, a.part_name from tmp1 a left join tmp3 d on a.sd_id=d.sd_id left join tmp2 b on a.tbl_id=b.tbl_id; {code} The result of this SQL execution is as follows,this result is true ,select from pdwd.pdwd_pf_hive_ah3_metastore_tbl_partitions_d_test_0831_3_tbl table is *{color:#de350b}not null{color}* {code:java} hive> select * from pdwd.pdwd_pf_hive_ah3_metastore_tbl_partitions_d_test_0831_3_tbl where pt='2021-08-14' and sd_id=21229815; OK Interrupting... Be patient, this might take some time. Press Ctrl+C again to kill JVM 721213 1619403652 528181 0 op 0 21229815 pods_pf_yarn_logic_res_i_d MANAGED_TABLE NULL NULL 0 USER 726105 org.apache.hadoop.hive.ql.io.orc.OrcInputFormat 0 0 hdfs://ctyunns4/apps/hive/warehouse/pods/pods_pf_yarn_logic_res_i_d/pt=2021-08-11 -1 org.apache.hadoop.hive.ql.io.orc.OrcOutputFormat 21229815 20302818 1628697610 0 pt=2021-08-11 2021-08-14 {code} The query result is *{color:#de350b}null{color}* first mapjoin,then left join.Query results that are not *{color:#de350b}null{color}* are first left join ,then mapjoin I tested it again first left join,after mapjoin ,last left join .The result of the left join is *{color:#de350b}null{color}* Test SQL as follows: {code:java} CREATE TABLE `pdwd.pdwd_pf_hive_ah3_metastore_tbl_partitions_d_test_0831_4_tbl`( `tbl_id` bigint COMMENT 'TBL_ID', `tbl_create_time` bigint COMMENT 'TBL_CREATE_TIME', `db_id` bigint COMMENT 'DB_ID', `tbl_last_access_time` bigint COMMENT 'TBL_LAST_ACCESS_TIME', `owner` string COMMENT 'OWNER', `retention` bigint COMMENT 'RETENTION', `sd_id` bigint COMMENT 'SD_ID', `tbl_name` string COMMENT 'TBL_NAME', `tbl_type` string COMMENT 'TBL_TYPE', `view_expanded_text` string COMMENT 'VIEW_EXPANDED_TEXT', `view_original_text` string COMMENT 'VIEW_ORIGINAL_TEXT', `is_rewrite_enabled` bigint COMMENT 'IS_REWRITE_ENABLED', `tbl_owner_type` string COMMENT 'TBL_OWNER_TYPE', `cd_id` bigint COMMENT 'CD_ID', `input_format` string COMMENT 'INPUT_FORMAT', `is_compressed` bigint COMMENT 'IS_COMPRESSED', `is_storedassubdirectories` bigint COMMENT 'IS_STOREDASSUBDIRECTORIES', `tbl_or_part_location` string COMMENT 'tbl_or_part_location', `num_buckets` bigint COMMENT 'NUM_BUCKETS', `output_format` string COMMENT 'OUTPUT_FORMAT', `serde_id` bigint COMMENT 'SERDE_ID', `part_id` bigint COMMENT 'PART_ID', `part_create_time` bigint COMMENT 'PART_CREATE_TIME', `part_last_access_time` bigint COMMENT 'PART_LAST_ACCESS_TIME', `part_name` string COMMENT 'PART_NAME', `cd_id_1` bigint COMMENT 'CD_ID_1', `input_format_1` string COMMENT 'INPUT_FORMAT_1', `is_compressed_1` bigint COMMENT 'IS_COMPRESSED_1', `is_storedassubdirectories_1` bigint COMMENT 'IS_STOREDASSUBDIRECTORIES_1', `tbl_or_part_location_1` string COMMENT 'tbl_or_part_location_1', `num_buckets_1` bigint COMMENT 'NUM_BUCKETS_1', `output_format_1` string COMMENT 'OUTPUT_FORMAT_1', `serde_id_1` bigint COMMENT 'SERDE_ID_1' ) PARTITIONED BY ( `pt` string) ROW FORMAT SERDE 'org.apache.hadoop.hive.ql.io.orc.OrcSerde' STORED AS INPUTFORMAT 'org.apache.hadoop.hive.ql.io.orc.OrcInputFormat' OUTPUTFORMAT 'org.apache.hadoop.hive.ql.io.orc.OrcOutputFormat'; with tmp1 as ( select part_id, create_time, last_access_time, part_name, sd_id, tbl_id from pods.pods_pf_hive_ah3_metastore_partitions_d where pt='2021-08-12' ), tmp2 as ( select tbl_id, create_time, db_id, last_access_time, owner, retention, sd_id, tbl_name, tbl_type, view_expanded_text, view_original_text, is_rewrite_enabled, owner_type from pods.pods_pf_hive_ah3_metastore_tbls_d where pt='2021-08-12' ), tmp3 as ( select sd_id, cd_id, input_format, is_compressed, is_storedassubdirectories, location, num_buckets, output_format, serde_id from pods.pods_pf_hive_ah3_metastore_sds_d where pt='2021-08-12' )insert overwrite table pdwd.pdwd_pf_hive_ah3_metastore_tbl_partitions_d_test_0831_4_tbl PARTITION(pt='2021-08-14') select a.tbl_id, b.create_time as tbl_create_time, b.db_id, b.last_access_time as tbl_last_access_time, b.owner, b.retention, a.sd_id, b.tbl_name, b.tbl_type, b.view_expanded_text, b.view_original_text, b.is_rewrite_enabled, b.owner_type as tbl_owner_type, d.cd_id, d.input_format, d.is_compressed, d.is_storedassubdirectories, d.location as tbl_location, d.num_buckets, d.output_format, d.serde_id, a.part_id, a.create_time as part_create_time, a.last_access_time as part_last_access_time, a.part_name, e.cd_id as cd_id_1, e.input_format as input_format_1, e.is_compressed as is_compressed_1, e.is_storedassubdirectories as is_storedassubdirectories_1, e.location as tbl_location_1, e.num_buckets as num_buckets_1, e.output_format as output_format_1, e.serde_id as serde_id_1 from tmp1 a left join tmp3 d on a.sd_id=d.sd_id left join tmp2 b on a.tbl_id=b.tbl_id left join tmp3 e on a.sd_id=e.sd_id {code} The result of this SQL execution is as follows: {code:java} > select * from > pdwd.pdwd_pf_hive_ah3_metastore_tbl_partitions_d_test_0831_4_tbl where > pt='2021-08-14' and sd_id=21229815; OK 721213 1619403652 528181 0 op 0 21229815 pods_pf_yarn_logic_res_i_d MANAGED_TABLE NULL NULL 0 USER 726105 org.apache.hadoop.hive.ql.io.orc.OrcInputFormat 0 0 hdfs://ctyunns4/apps/hive/warehouse/pods/pods_pf_yarn_logic_res_i_d/pt=2021-08-11 -1 org.apache.hadoop.hive.ql.io.orc.OrcOutputFormat 21229815 20302818 1628697610 0 pt=2021-08-11 NULL NULL NULL NULL NULL NULL NULL NULL 2021-08-14 {code} fields cd_id, input_format, is_compressed, is_storedassubdirectories, tbl_or_part_location, num_buckets, output_format, serde_id result {color:#de350b}*is not null*{color} ,but fields cd_id_1, input_format_1, is_compressed_1, is_storedassubdirectories_1, tbl_or_part_location_1, num_buckets_1, output_format_1, serde_id_1 result{color:#de350b} is null{color} was: Currently I join 3 tables, find the result of left join is *{color:#de350b}null{color}* Here is my SQL,The result of this SQL is NULL {code:java} //代码占位符 CREATE TABLE `pdwd.pdwd_pf_hive_ah3_metastore_tbl_partitions_d_test_0831_3_tbl`( `tbl_id` bigint COMMENT 'TBL_ID', `tbl_create_time` bigint COMMENT 'TBL_CREATE_TIME', `db_id` bigint COMMENT 'DB_ID', `tbl_last_access_time` bigint COMMENT 'TBL_LAST_ACCESS_TIME', `owner` string COMMENT 'OWNER', `retention` bigint COMMENT 'RETENTION', `sd_id` bigint COMMENT 'SD_ID', `tbl_name` string COMMENT 'TBL_NAME', `tbl_type` string COMMENT 'TBL_TYPE', `view_expanded_text` string COMMENT 'VIEW_EXPANDED_TEXT', `view_original_text` string COMMENT 'VIEW_ORIGINAL_TEXT', `is_rewrite_enabled` bigint COMMENT 'IS_REWRITE_ENABLED', `tbl_owner_type` string COMMENT 'TBL_OWNER_TYPE', `cd_id` bigint COMMENT 'CD_ID', `input_format` string COMMENT 'INPUT_FORMAT', `is_compressed` bigint COMMENT 'IS_COMPRESSED', `is_storedassubdirectories` bigint COMMENT 'IS_STOREDASSUBDIRECTORIES', `tbl_or_part_location` string COMMENT 'tbl_or_part_location', `num_buckets` bigint COMMENT 'NUM_BUCKETS', `output_format` string COMMENT 'OUTPUT_FORMAT', `serde_id` bigint COMMENT 'SERDE_ID', `part_id` bigint COMMENT 'PART_ID', `part_create_time` bigint COMMENT 'PART_CREATE_TIME', `part_last_access_time` bigint COMMENT 'PART_LAST_ACCESS_TIME', `part_name` string COMMENT 'PART_NAME') PARTITIONED BY ( `pt` string) ROW FORMAT SERDE 'org.apache.hadoop.hive.ql.io.orc.OrcSerde' STORED AS INPUTFORMAT 'org.apache.hadoop.hive.ql.io.orc.OrcInputFormat' OUTPUTFORMAT 'org.apache.hadoop.hive.ql.io.orc.OrcOutputFormat' with tmp1 as ( select part_id, create_time, last_access_time, part_name, sd_id, tbl_id from pods.pods_pf_hive_ah3_metastore_partitions_d where pt='2021-08-12' ), tmp2 as ( select tbl_id, create_time, db_id, last_access_time, owner, retention, sd_id, tbl_name, tbl_type, view_expanded_text, view_original_text, is_rewrite_enabled, owner_type from pods.pods_pf_hive_ah3_metastore_tbls_d where pt='2021-08-12' ), tmp3 as ( select sd_id, cd_id, input_format, is_compressed, is_storedassubdirectories, location, num_buckets, output_format, serde_id from pods.pods_pf_hive_ah3_metastore_sds_d where pt='2021-08-12' )insert overwrite table pdwd.pdwd_pf_hive_ah3_metastore_tbl_partitions_d_test_0831_3_tbl PARTITION(pt='2021-08-14') select a.tbl_id, b.create_time as tbl_create_time, b.db_id, b.last_access_time as tbl_last_access_time, b.owner, b.retention, a.sd_id, b.tbl_name, b.tbl_type, b.view_expanded_text, b.view_original_text, b.is_rewrite_enabled, b.owner_type as tbl_owner_type, d.cd_id, d.input_format, d.is_compressed, d.is_storedassubdirectories, d.location as tbl_location, d.num_buckets, d.output_format, d.serde_id, a.part_id, a.create_time as part_create_time, a.last_access_time as part_last_access_time, a.part_name from tmp1 a left join tmp2 b on a.tbl_id=b.tbl_id left join tmp3 d on a.sd_id=d.sd_id; {code} pods.pods_pf_hive_ah3_metastore_partitions_d、pods.pods_pf_hive_ah3_metastore_tbls_d、pods.pods_pf_hive_ah3_metastore_sds_d from {color:#de350b}*Metastore*{color} partitions、tbls、sds The sizes of the three tables are as follows: 80.3 M 240.9 M hdfs://ctyunns4/apps/hive/warehouse/pods/pods_pf_hive_ah3_metastore_partitions_d/pt=2021-10-09/exchangis_hive_w__2585cbd4_8bf8_4fbb_8a90_f5a7939b62b3.snappy 179.8 K 539.5 K hdfs://ctyunns4/apps/hive/warehouse/pods/pods_pf_hive_ah3_metastore_tbls_d/pt=2021-10-09/exchangis_hive_w__8a62acaa_6f82_442e_97db_ce960833612f.snappy 94.3 M 282.9 M hdfs://ctyunns4/apps/hive/warehouse/pods/pods_pf_hive_ah3_metastore_sds_d/pt=2021-10-09/exchangis_hive_w__d25536e8_7018_4262_a00e_5af3b1f88925.snappy The result is as follows,select from pods.pods_pf_hive_ah3_metastore_sds_d table *{color:#de350b}is null{color}* {code:java} hive> select * from pdwd.pdwd_pf_hive_ah3_metastore_tbl_partitions_d_test_0831_3_tbl where pt='2021-08-14' and sd_id=21229815; OK 721213 1619403652 528181 0 op 0 21229815 pods_pf_yarn_logic_res_i_d MANAGED_TABLE NULL NULL 0 USER NULL NULL NULL NULL NULL NULL NULL NULL 20302818 1628697610 0 pt=2021-08-11 2021-08-14 {code} The reality pods.pods_pf_hive_ah3_metastore_sds_d table is that the data in this table is {color:#de350b}*not null*{color} {code:java} > select * from pods.pods_pf_hive_ah3_metastore_sds_d where pt='2021-08-12' and > sd_id=21229815; OK Interrupting... Be patient, this might take some time. Press Ctrl+C again to kill JVM 21229815 726105 org.apache.hadoop.hive.ql.io.orc.OrcInputFormat 0 0 hdfs://ctyunns4/apps/hive/warehouse/pods/pods_pf_yarn_logic_res_i_d/pt=2021-08-11 -org.apache.hadoop.hive.ql.io.orc.OrcOutputFormat 21229815 2021-08-12 {code} When I change the order of join,The following SQL: {code:java} with tmp1 as ( select part_id, create_time, last_access_time, part_name, sd_id, tbl_id from pods.pods_pf_hive_ah3_metastore_partitions_d where pt='2021-08-12' ), tmp2 as ( select tbl_id, create_time, db_id, last_access_time, owner, retention, sd_id, tbl_name, tbl_type, view_expanded_text, view_original_text, is_rewrite_enabled, owner_type from pods.pods_pf_hive_ah3_metastore_tbls_d where pt='2021-08-12' ), tmp3 as ( select sd_id, cd_id, input_format, is_compressed, is_storedassubdirectories, location, num_buckets, output_format, serde_id from pods.pods_pf_hive_ah3_metastore_sds_d where pt='2021-08-12' )insert overwrite table pdwd.pdwd_pf_hive_ah3_metastore_tbl_partitions_d_test_0831_3_tbl PARTITION(pt='2021-08-14') select a.tbl_id, b.create_time as tbl_create_time, b.db_id, b.last_access_time as tbl_last_access_time, b.owner, b.retention, a.sd_id, b.tbl_name, b.tbl_type, b.view_expanded_text, b.view_original_text, b.is_rewrite_enabled, b.owner_type as tbl_owner_type, d.cd_id, d.input_format, d.is_compressed, d.is_storedassubdirectories, d.location as tbl_location, d.num_buckets, d.output_format, d.serde_id, a.part_id, a.create_time as part_create_time, a.last_access_time as part_last_access_time, a.part_name from tmp1 a left join tmp3 d on a.sd_id=d.sd_id left join tmp2 b on a.tbl_id=b.tbl_id; {code} The result of this SQL execution is as follows,this result is true ,select from pdwd.pdwd_pf_hive_ah3_metastore_tbl_partitions_d_test_0831_3_tbl table is *{color:#de350b}not null{color}* {code:java} hive> select * from pdwd.pdwd_pf_hive_ah3_metastore_tbl_partitions_d_test_0831_3_tbl where pt='2021-08-14' and sd_id=21229815; OK Interrupting... Be patient, this might take some time. Press Ctrl+C again to kill JVM 721213 1619403652 528181 0 op 0 21229815 pods_pf_yarn_logic_res_i_d MANAGED_TABLE NULL NULL 0 USER 726105 org.apache.hadoop.hive.ql.io.orc.OrcInputFormat 0 0 hdfs://ctyunns4/apps/hive/warehouse/pods/pods_pf_yarn_logic_res_i_d/pt=2021-08-11 -1 org.apache.hadoop.hive.ql.io.orc.OrcOutputFormat 21229815 20302818 1628697610 0 pt=2021-08-11 2021-08-14 {code} The query result is *{color:#de350b}null{color}* first mapjoin,then left join.Query results that are not *{color:#de350b}null{color}* are first left join ,then mapjoin I tested it again first left join,after mapjoin ,last left join .The result of the left join is *{color:#de350b}null{color}* Test SQL as follows: {code:java} CREATE TABLE `pdwd.pdwd_pf_hive_ah3_metastore_tbl_partitions_d_test_0831_4_tbl`( `tbl_id` bigint COMMENT 'TBL_ID', `tbl_create_time` bigint COMMENT 'TBL_CREATE_TIME', `db_id` bigint COMMENT 'DB_ID', `tbl_last_access_time` bigint COMMENT 'TBL_LAST_ACCESS_TIME', `owner` string COMMENT 'OWNER', `retention` bigint COMMENT 'RETENTION', `sd_id` bigint COMMENT 'SD_ID', `tbl_name` string COMMENT 'TBL_NAME', `tbl_type` string COMMENT 'TBL_TYPE', `view_expanded_text` string COMMENT 'VIEW_EXPANDED_TEXT', `view_original_text` string COMMENT 'VIEW_ORIGINAL_TEXT', `is_rewrite_enabled` bigint COMMENT 'IS_REWRITE_ENABLED', `tbl_owner_type` string COMMENT 'TBL_OWNER_TYPE', `cd_id` bigint COMMENT 'CD_ID', `input_format` string COMMENT 'INPUT_FORMAT', `is_compressed` bigint COMMENT 'IS_COMPRESSED', `is_storedassubdirectories` bigint COMMENT 'IS_STOREDASSUBDIRECTORIES', `tbl_or_part_location` string COMMENT 'tbl_or_part_location', `num_buckets` bigint COMMENT 'NUM_BUCKETS', `output_format` string COMMENT 'OUTPUT_FORMAT', `serde_id` bigint COMMENT 'SERDE_ID', `part_id` bigint COMMENT 'PART_ID', `part_create_time` bigint COMMENT 'PART_CREATE_TIME', `part_last_access_time` bigint COMMENT 'PART_LAST_ACCESS_TIME', `part_name` string COMMENT 'PART_NAME', `cd_id_1` bigint COMMENT 'CD_ID_1', `input_format_1` string COMMENT 'INPUT_FORMAT_1', `is_compressed_1` bigint COMMENT 'IS_COMPRESSED_1', `is_storedassubdirectories_1` bigint COMMENT 'IS_STOREDASSUBDIRECTORIES_1', `tbl_or_part_location_1` string COMMENT 'tbl_or_part_location_1', `num_buckets_1` bigint COMMENT 'NUM_BUCKETS_1', `output_format_1` string COMMENT 'OUTPUT_FORMAT_1', `serde_id_1` bigint COMMENT 'SERDE_ID_1' ) PARTITIONED BY ( `pt` string) ROW FORMAT SERDE 'org.apache.hadoop.hive.ql.io.orc.OrcSerde' STORED AS INPUTFORMAT 'org.apache.hadoop.hive.ql.io.orc.OrcInputFormat' OUTPUTFORMAT 'org.apache.hadoop.hive.ql.io.orc.OrcOutputFormat'; with tmp1 as ( select part_id, create_time, last_access_time, part_name, sd_id, tbl_id from pods.pods_pf_hive_ah3_metastore_partitions_d where pt='2021-08-12' ), tmp2 as ( select tbl_id, create_time, db_id, last_access_time, owner, retention, sd_id, tbl_name, tbl_type, view_expanded_text, view_original_text, is_rewrite_enabled, owner_type from pods.pods_pf_hive_ah3_metastore_tbls_d where pt='2021-08-12' ), tmp3 as ( select sd_id, cd_id, input_format, is_compressed, is_storedassubdirectories, location, num_buckets, output_format, serde_id from pods.pods_pf_hive_ah3_metastore_sds_d where pt='2021-08-12' )insert overwrite table pdwd.pdwd_pf_hive_ah3_metastore_tbl_partitions_d_test_0831_4_tbl PARTITION(pt='2021-08-14') select a.tbl_id, b.create_time as tbl_create_time, b.db_id, b.last_access_time as tbl_last_access_time, b.owner, b.retention, a.sd_id, b.tbl_name, b.tbl_type, b.view_expanded_text, b.view_original_text, b.is_rewrite_enabled, b.owner_type as tbl_owner_type, d.cd_id, d.input_format, d.is_compressed, d.is_storedassubdirectories, d.location as tbl_location, d.num_buckets, d.output_format, d.serde_id, a.part_id, a.create_time as part_create_time, a.last_access_time as part_last_access_time, a.part_name, e.cd_id as cd_id_1, e.input_format as input_format_1, e.is_compressed as is_compressed_1, e.is_storedassubdirectories as is_storedassubdirectories_1, e.location as tbl_location_1, e.num_buckets as num_buckets_1, e.output_format as output_format_1, e.serde_id as serde_id_1 from tmp1 a left join tmp3 d on a.sd_id=d.sd_id left join tmp2 b on a.tbl_id=b.tbl_id left join tmp3 e on a.sd_id=e.sd_id {code} The result of this SQL execution is as follows: {code:java} > select * from > pdwd.pdwd_pf_hive_ah3_metastore_tbl_partitions_d_test_0831_4_tbl where > pt='2021-08-14' and sd_id=21229815; OK 721213 1619403652 528181 0 op 0 21229815 pods_pf_yarn_logic_res_i_d MANAGED_TABLE NULL NULL 0 USER 726105 org.apache.hadoop.hive.ql.io.orc.OrcInputFormat 0 0 hdfs://ctyunns4/apps/hive/warehouse/pods/pods_pf_yarn_logic_res_i_d/pt=2021-08-11 -1 org.apache.hadoop.hive.ql.io.orc.OrcOutputFormat 21229815 20302818 1628697610 0 pt=2021-08-11 NULL NULL NULL NULL NULL NULL NULL NULL 2021-08-14 {code} fields cd_id, input_format, is_compressed, is_storedassubdirectories, tbl_or_part_location, num_buckets, output_format, serde_id result {color:#de350b}*is not null*{color} ,but fields cd_id_1, input_format_1, is_compressed_1, is_storedassubdirectories_1, tbl_or_part_location_1, num_buckets_1, output_format_1, serde_id_1 result{color:#de350b} is null{color} > Mapjoin then join left,the result is incorrect > ---------------------------------------------- > > Key: HIVE-25614 > URL: https://issues.apache.org/jira/browse/HIVE-25614 > Project: Hive > Issue Type: Bug > Components: Hive > Affects Versions: 3.1.2 > Reporter: zengxl > Priority: Critical > > Currently I join 3 tables, find the result of left join is > *{color:#de350b}null{color}* > Here is my SQL,The result of this SQL is NULL > {code:java} > //代码占位符 > CREATE TABLE > `pdwd.pdwd_pf_hive_ah3_metastore_tbl_partitions_d_test_0831_3_tbl`( > `tbl_id` bigint COMMENT 'TBL_ID', > `tbl_create_time` bigint COMMENT 'TBL_CREATE_TIME', > `db_id` bigint COMMENT 'DB_ID', > `tbl_last_access_time` bigint COMMENT 'TBL_LAST_ACCESS_TIME', > `owner` string COMMENT 'OWNER', > `retention` bigint COMMENT 'RETENTION', > `sd_id` bigint COMMENT 'SD_ID', > `tbl_name` string COMMENT 'TBL_NAME', > `tbl_type` string COMMENT 'TBL_TYPE', > `view_expanded_text` string COMMENT 'VIEW_EXPANDED_TEXT', > `view_original_text` string COMMENT 'VIEW_ORIGINAL_TEXT', > `is_rewrite_enabled` bigint COMMENT 'IS_REWRITE_ENABLED', > `tbl_owner_type` string COMMENT 'TBL_OWNER_TYPE', > `cd_id` bigint COMMENT 'CD_ID', > `input_format` string COMMENT 'INPUT_FORMAT', > `is_compressed` bigint COMMENT 'IS_COMPRESSED', > `is_storedassubdirectories` bigint COMMENT 'IS_STOREDASSUBDIRECTORIES', > `tbl_or_part_location` string COMMENT 'tbl_or_part_location', > `num_buckets` bigint COMMENT 'NUM_BUCKETS', > `output_format` string COMMENT 'OUTPUT_FORMAT', > `serde_id` bigint COMMENT 'SERDE_ID', > `part_id` bigint COMMENT 'PART_ID', > `part_create_time` bigint COMMENT 'PART_CREATE_TIME', > `part_last_access_time` bigint COMMENT 'PART_LAST_ACCESS_TIME', > `part_name` string COMMENT 'PART_NAME') > PARTITIONED BY ( > `pt` string) > ROW FORMAT SERDE > 'org.apache.hadoop.hive.ql.io.orc.OrcSerde' > STORED AS INPUTFORMAT > 'org.apache.hadoop.hive.ql.io.orc.OrcInputFormat' > OUTPUTFORMAT > 'org.apache.hadoop.hive.ql.io.orc.OrcOutputFormat' > > with tmp1 as ( > select > part_id, > create_time, > last_access_time, > part_name, > sd_id, > tbl_id > from > pods.pods_pf_hive_ah3_metastore_partitions_d > where > pt='2021-08-12' > ), > tmp2 as ( > select > tbl_id, > create_time, > db_id, > last_access_time, > owner, > retention, > sd_id, > tbl_name, > tbl_type, > view_expanded_text, > view_original_text, > is_rewrite_enabled, > owner_type > from > pods.pods_pf_hive_ah3_metastore_tbls_d > where > pt='2021-08-12' > ), > tmp3 as ( > select > sd_id, > cd_id, > input_format, > is_compressed, > is_storedassubdirectories, > location, > num_buckets, > output_format, > serde_id > from > pods.pods_pf_hive_ah3_metastore_sds_d > where > pt='2021-08-12' > )insert overwrite table > pdwd.pdwd_pf_hive_ah3_metastore_tbl_partitions_d_test_0831_3_tbl > PARTITION(pt='2021-08-14') > select > a.tbl_id, > b.create_time as tbl_create_time, > b.db_id, > b.last_access_time as tbl_last_access_time, > b.owner, > b.retention, > a.sd_id, > b.tbl_name, > b.tbl_type, > b.view_expanded_text, > b.view_original_text, > b.is_rewrite_enabled, > b.owner_type as tbl_owner_type, > d.cd_id, > d.input_format, > d.is_compressed, > d.is_storedassubdirectories, > d.location as tbl_location, > d.num_buckets, > d.output_format, > d.serde_id, > a.part_id, > a.create_time as part_create_time, > a.last_access_time as part_last_access_time, > a.part_name > from tmp1 a > left join tmp2 b on a.tbl_id=b.tbl_id > left join tmp3 d on a.sd_id=d.sd_id; > {code} > pods.pods_pf_hive_ah3_metastore_partitions_d、pods.pods_pf_hive_ah3_metastore_tbls_d、pods.pods_pf_hive_ah3_metastore_sds_d > from {color:#de350b}*Metastore*{color} partitions、tbls、sds > The sizes of the three tables are as follows: > 80.3 M 240.9 M > hdfs://ctyunns4/apps/hive/warehouse/pods/pods_pf_hive_ah3_metastore_partitions_d/pt=2021-10-09/exchangis_hive_w__2585cbd4_8bf8_4fbb_8a90_f5a7939b62b3.snappy > 179.8 K 539.5 K > hdfs://ctyunns4/apps/hive/warehouse/pods/pods_pf_hive_ah3_metastore_tbls_d/pt=2021-10-09/exchangis_hive_w__8a62acaa_6f82_442e_97db_ce960833612f.snappy > 94.3 M 282.9 M > hdfs://ctyunns4/apps/hive/warehouse/pods/pods_pf_hive_ah3_metastore_sds_d/pt=2021-10-09/exchangis_hive_w__d25536e8_7018_4262_a00e_5af3b1f88925.snappy > The result is as follows,select from > pdwd.pdwd_pf_hive_ah3_metastore_tbl_partitions_d_test_0831_3_tbl table > *{color:#de350b}is null{color}* > {code:java} > hive> select * from > pdwd.pdwd_pf_hive_ah3_metastore_tbl_partitions_d_test_0831_3_tbl where > pt='2021-08-14' and sd_id=21229815; > OK > 721213 1619403652 528181 0 op 0 21229815 pods_pf_yarn_logic_res_i_d > MANAGED_TABLE NULL NULL 0 USER NULL NULL NULL NULL NULL NULL NULL NULL > 20302818 1628697610 0 pt=2021-08-11 2021-08-14 > {code} > The reality pods.pods_pf_hive_ah3_metastore_sds_d table is that the data in > this table is {color:#de350b}*not null*{color} > {code:java} > > select * from pods.pods_pf_hive_ah3_metastore_sds_d where pt='2021-08-12' > > and sd_id=21229815; > OK > Interrupting... > Be patient, this might take some time. > Press Ctrl+C again to kill JVM > 21229815 726105 org.apache.hadoop.hive.ql.io.orc.OrcInputFormat 0 0 > hdfs://ctyunns4/apps/hive/warehouse/pods/pods_pf_yarn_logic_res_i_d/pt=2021-08-11 > -org.apache.hadoop.hive.ql.io.orc.OrcOutputFormat 21229815 2021-08-12 > {code} > When I change the order of join,The following SQL: > {code:java} > with tmp1 as ( > select > part_id, > create_time, > last_access_time, > part_name, > sd_id, > tbl_id > from > pods.pods_pf_hive_ah3_metastore_partitions_d > where > pt='2021-08-12' > ), > tmp2 as ( > select > tbl_id, > create_time, > db_id, > last_access_time, > owner, > retention, > sd_id, > tbl_name, > tbl_type, > view_expanded_text, > view_original_text, > is_rewrite_enabled, > owner_type > from > pods.pods_pf_hive_ah3_metastore_tbls_d > where > pt='2021-08-12' > ), > tmp3 as ( > select > sd_id, > cd_id, > input_format, > is_compressed, > is_storedassubdirectories, > location, > num_buckets, > output_format, > serde_id > from > pods.pods_pf_hive_ah3_metastore_sds_d > where > pt='2021-08-12' > )insert overwrite table > pdwd.pdwd_pf_hive_ah3_metastore_tbl_partitions_d_test_0831_3_tbl > PARTITION(pt='2021-08-14') > select > a.tbl_id, > b.create_time as tbl_create_time, > b.db_id, > b.last_access_time as tbl_last_access_time, > b.owner, > b.retention, > a.sd_id, > b.tbl_name, > b.tbl_type, > b.view_expanded_text, > b.view_original_text, > b.is_rewrite_enabled, > b.owner_type as tbl_owner_type, > d.cd_id, > d.input_format, > d.is_compressed, > d.is_storedassubdirectories, > d.location as tbl_location, > d.num_buckets, > d.output_format, > d.serde_id, > a.part_id, > a.create_time as part_create_time, > a.last_access_time as part_last_access_time, > a.part_name > from tmp1 a > left join tmp3 d on a.sd_id=d.sd_id > left join tmp2 b on a.tbl_id=b.tbl_id; > {code} > The result of this SQL execution is as follows,this result is true ,select > from pdwd.pdwd_pf_hive_ah3_metastore_tbl_partitions_d_test_0831_3_tbl table > is *{color:#de350b}not null{color}* > > {code:java} > hive> select * from > pdwd.pdwd_pf_hive_ah3_metastore_tbl_partitions_d_test_0831_3_tbl where > pt='2021-08-14' and sd_id=21229815; > OK > Interrupting... > Be patient, this might take some time. > Press Ctrl+C again to kill JVM > 721213 1619403652 528181 0 op 0 21229815 pods_pf_yarn_logic_res_i_d > MANAGED_TABLE NULL NULL 0 USER 726105 > org.apache.hadoop.hive.ql.io.orc.OrcInputFormat 0 0 > hdfs://ctyunns4/apps/hive/warehouse/pods/pods_pf_yarn_logic_res_i_d/pt=2021-08-11 > -1 org.apache.hadoop.hive.ql.io.orc.OrcOutputFormat 21229815 20302818 > 1628697610 0 pt=2021-08-11 2021-08-14 > {code} > The query result is *{color:#de350b}null{color}* first mapjoin,then left > join.Query results that are not *{color:#de350b}null{color}* are first left > join ,then mapjoin > I tested it again first left join,after mapjoin ,last left join .The result > of the left join is *{color:#de350b}null{color}* > Test SQL as follows: > {code:java} > CREATE TABLE > `pdwd.pdwd_pf_hive_ah3_metastore_tbl_partitions_d_test_0831_4_tbl`( > `tbl_id` bigint COMMENT 'TBL_ID', > `tbl_create_time` bigint COMMENT 'TBL_CREATE_TIME', > `db_id` bigint COMMENT 'DB_ID', > `tbl_last_access_time` bigint COMMENT 'TBL_LAST_ACCESS_TIME', > `owner` string COMMENT 'OWNER', > `retention` bigint COMMENT 'RETENTION', > `sd_id` bigint COMMENT 'SD_ID', > `tbl_name` string COMMENT 'TBL_NAME', > `tbl_type` string COMMENT 'TBL_TYPE', > `view_expanded_text` string COMMENT 'VIEW_EXPANDED_TEXT', > `view_original_text` string COMMENT 'VIEW_ORIGINAL_TEXT', > `is_rewrite_enabled` bigint COMMENT 'IS_REWRITE_ENABLED', > `tbl_owner_type` string COMMENT 'TBL_OWNER_TYPE', > `cd_id` bigint COMMENT 'CD_ID', > `input_format` string COMMENT 'INPUT_FORMAT', > `is_compressed` bigint COMMENT 'IS_COMPRESSED', > `is_storedassubdirectories` bigint COMMENT 'IS_STOREDASSUBDIRECTORIES', > `tbl_or_part_location` string COMMENT 'tbl_or_part_location', > `num_buckets` bigint COMMENT 'NUM_BUCKETS', > `output_format` string COMMENT 'OUTPUT_FORMAT', > `serde_id` bigint COMMENT 'SERDE_ID', > `part_id` bigint COMMENT 'PART_ID', > `part_create_time` bigint COMMENT 'PART_CREATE_TIME', > `part_last_access_time` bigint COMMENT 'PART_LAST_ACCESS_TIME', > `part_name` string COMMENT 'PART_NAME', > `cd_id_1` bigint COMMENT 'CD_ID_1', > `input_format_1` string COMMENT 'INPUT_FORMAT_1', > `is_compressed_1` bigint COMMENT 'IS_COMPRESSED_1', > `is_storedassubdirectories_1` bigint COMMENT 'IS_STOREDASSUBDIRECTORIES_1', > `tbl_or_part_location_1` string COMMENT 'tbl_or_part_location_1', > `num_buckets_1` bigint COMMENT 'NUM_BUCKETS_1', > `output_format_1` string COMMENT 'OUTPUT_FORMAT_1', > `serde_id_1` bigint COMMENT 'SERDE_ID_1' > ) > PARTITIONED BY ( > `pt` string) > ROW FORMAT SERDE > 'org.apache.hadoop.hive.ql.io.orc.OrcSerde' > STORED AS INPUTFORMAT > 'org.apache.hadoop.hive.ql.io.orc.OrcInputFormat' > OUTPUTFORMAT > 'org.apache.hadoop.hive.ql.io.orc.OrcOutputFormat'; > > with tmp1 as ( > select > part_id, > create_time, > last_access_time, > part_name, > sd_id, > tbl_id > from > pods.pods_pf_hive_ah3_metastore_partitions_d > where > pt='2021-08-12' > ), > tmp2 as ( > select > tbl_id, > create_time, > db_id, > last_access_time, > owner, > retention, > sd_id, > tbl_name, > tbl_type, > view_expanded_text, > view_original_text, > is_rewrite_enabled, > owner_type > from > pods.pods_pf_hive_ah3_metastore_tbls_d > where > pt='2021-08-12' > ), > tmp3 as ( > select > sd_id, > cd_id, > input_format, > is_compressed, > is_storedassubdirectories, > location, > num_buckets, > output_format, > serde_id > from > pods.pods_pf_hive_ah3_metastore_sds_d > where > pt='2021-08-12' > )insert overwrite table > pdwd.pdwd_pf_hive_ah3_metastore_tbl_partitions_d_test_0831_4_tbl > PARTITION(pt='2021-08-14') > select > a.tbl_id, > b.create_time as tbl_create_time, > b.db_id, > b.last_access_time as tbl_last_access_time, > b.owner, > b.retention, > a.sd_id, > b.tbl_name, > b.tbl_type, > b.view_expanded_text, > b.view_original_text, > b.is_rewrite_enabled, > b.owner_type as tbl_owner_type, > d.cd_id, > d.input_format, > d.is_compressed, > d.is_storedassubdirectories, > d.location as tbl_location, > d.num_buckets, > d.output_format, > d.serde_id, > a.part_id, > a.create_time as part_create_time, > a.last_access_time as part_last_access_time, > a.part_name, > e.cd_id as cd_id_1, > e.input_format as input_format_1, > e.is_compressed as is_compressed_1, > e.is_storedassubdirectories as is_storedassubdirectories_1, > e.location as tbl_location_1, > e.num_buckets as num_buckets_1, > e.output_format as output_format_1, > e.serde_id as serde_id_1 > from tmp1 a > left join tmp3 d on a.sd_id=d.sd_id > left join tmp2 b on a.tbl_id=b.tbl_id > left join tmp3 e on a.sd_id=e.sd_id > {code} > The result of this SQL execution is as follows: > {code:java} > > select * from > > pdwd.pdwd_pf_hive_ah3_metastore_tbl_partitions_d_test_0831_4_tbl where > > pt='2021-08-14' and sd_id=21229815; > OK > 721213 1619403652 528181 0 op 0 21229815 pods_pf_yarn_logic_res_i_d > MANAGED_TABLE NULL NULL 0 USER 726105 > org.apache.hadoop.hive.ql.io.orc.OrcInputFormat 0 0 > hdfs://ctyunns4/apps/hive/warehouse/pods/pods_pf_yarn_logic_res_i_d/pt=2021-08-11 > -1 org.apache.hadoop.hive.ql.io.orc.OrcOutputFormat 21229815 20302818 > 1628697610 0 pt=2021-08-11 NULL NULL NULL NULL NULL NULL NULL NULL 2021-08-14 > {code} > fields cd_id, input_format, is_compressed, is_storedassubdirectories, > tbl_or_part_location, num_buckets, output_format, serde_id result > {color:#de350b}*is not null*{color} ,but fields cd_id_1, input_format_1, > is_compressed_1, is_storedassubdirectories_1, tbl_or_part_location_1, > num_buckets_1, output_format_1, serde_id_1 result{color:#de350b} is > null{color} > > > > > -- This message was sent by Atlassian Jira (v8.3.4#803005)