[ https://issues.apache.org/jira/browse/KUDU-3564?page=com.atlassian.jira.plugin.system.issuetabpanels:all-tabpanel ]
Alexey Serbin updated KUDU-3564: -------------------------------- Description: Reproduce steps that copy from the Slack channel: {code:sql} // create the table and data in Impala: CREATE TABLE age_table ( id BIGINT, name STRING, age INT, PRIMARY KEY(id,name,age) ) PARTITION BY HASH (id) PARTITIONS 4, HASH (name) PARTITIONS 4, range (age) ( PARTITION 30 <= VALUES < 60, PARTITION 60 <= VALUES < 90 ) STORED AS KUDU TBLPROPERTIES ('kudu.num_tablet_replicas' = '1'); ALTER TABLE age_table ADD RANGE PARTITION 90<= VALUES <120 HASH(id) PARTITIONS 3 HASH(name) PARTITIONS 3; INSERT INTO age_table VALUES (3, 'alex', 50); INSERT INTO age_table VALUES (12, 'bob', 100); // This query produces wrong results: the expected row for 'bob' isn't returned. // Note that the troublesome row is in the range partition with custom (per-range) hash schema. sudo -u kudu kudu table scan <master.url> default.age_table -columns=id,age -predicates='["AND", ["IN", "id", [12,20]]]' Total count 0 cost 0.0224966 seconds // This query produces correct results: the expected row for 'alex' is returned. sudo -u kudu kudu table scan <master.url> default.age_table -columns=id,age -predicates='["AND", ["IN", "id", [3,20]]]' (int64 id=3, int32 age=50) Total count 1 cost 0.0178102 seconds // However, predicates on the primary key columns seem to work as expected, even for the rows in the range with custom hash schema. sudo -u kudu kudu table scan <master.url> default.age_table -columns=id,age -predicates='["AND", ["=", "id", 12]]' (int64 id=12, int32 age=100) Total count 1 cost 0.0137217 seconds {code} was: Reproduce steps that copy from the Slack channel: {code:sql} // create the table and data in Impala: CREATE TABLE age_table ( id BIGINT, name STRING, age INT, PRIMARY KEY(id,name,age) ) PARTITION BY HASH (id) PARTITIONS 4, HASH (name) PARTITIONS 4, range (age) ( PARTITION 30 <= VALUES < 60, PARTITION 60 <= VALUES < 90 ) STORED AS KUDU TBLPROPERTIES ('kudu.num_tablet_replicas' = '1'); ALTER TABLE age_table ADD RANGE PARTITION 90<= VALUES <120 HASH(id) PARTITIONS 3 HASH(name) PARTITIONS 3; insert into age_table values (3, 'alex', 50); insert into age_table values (12, 'bob', 100); // only predicate "in" for data in custom hash cannot be found, sudo -u kudu kudu table scan <master.url> default.age_table -columns=id,age -predicates='["AND", ["IN", "id", [3,20]]]' (int64 id=3, int32 age=50) Total count 1 cost 0.0178102 seconds {code} > Range specific hashing table when queried with InList predicate may lead to > incorrect results > --------------------------------------------------------------------------------------------- > > Key: KUDU-3564 > URL: https://issues.apache.org/jira/browse/KUDU-3564 > Project: Kudu > Issue Type: Bug > Affects Versions: 1.17.0 > Reporter: YifanZhang > Priority: Major > > Reproduce steps that copy from the Slack channel: > > {code:sql} > // create the table and data in Impala: > CREATE TABLE age_table > ( > id BIGINT, > name STRING, > age INT, > PRIMARY KEY(id,name,age) > ) > PARTITION BY HASH (id) PARTITIONS 4, > HASH (name) PARTITIONS 4, > range (age) > ( > PARTITION 30 <= VALUES < 60, > PARTITION 60 <= VALUES < 90 > ) > STORED AS KUDU > TBLPROPERTIES ('kudu.num_tablet_replicas' = '1'); > ALTER TABLE age_table ADD RANGE PARTITION 90<= VALUES <120 > HASH(id) PARTITIONS 3 HASH(name) PARTITIONS 3; > INSERT INTO age_table VALUES (3, 'alex', 50); > INSERT INTO age_table VALUES (12, 'bob', 100); > // This query produces wrong results: the expected row for 'bob' isn't > returned. > // Note that the troublesome row is in the range partition with custom > (per-range) hash schema. > sudo -u kudu kudu table scan <master.url> default.age_table -columns=id,age > -predicates='["AND", ["IN", "id", [12,20]]]' > Total count 0 cost 0.0224966 seconds > // This query produces correct results: the expected row for 'alex' is > returned. > sudo -u kudu kudu table scan <master.url> default.age_table -columns=id,age > -predicates='["AND", ["IN", "id", [3,20]]]' > (int64 id=3, int32 age=50) > Total count 1 cost 0.0178102 seconds > // However, predicates on the primary key columns seem to work as expected, > even for the rows in the range with custom hash schema. > sudo -u kudu kudu table scan <master.url> default.age_table -columns=id,age > -predicates='["AND", ["=", "id", 12]]' > (int64 id=12, int32 age=100) > Total count 1 cost 0.0137217 seconds > {code} -- This message was sent by Atlassian Jira (v8.20.10#820010)