[
https://issues.apache.org/jira/browse/KUDU-2463?page=com.atlassian.jira.plugin.system.issuetabpanels:all-tabpanel
]
Tomas Farkas updated KUDU-2463:
-------------------------------
Description:
Hi,
I have a static table in Kudu, no inserts/updates or deletes are running on the
cluster. The query returns DIFFERENT result when I change the where condition
on one of the primary key columns, which is in the group_by list.
The created_date is part of the PK and is type of int.
PK contains subscriber, time, date, identifier and created_date.
I tried to check if the inserted count is equal to the HDFS table, and noticed
on one day, that the count differs based on the where criteria!!
{quote}
{{[10.197.0.164:21000] > select created_date, count(*) from base.usage_kudu
where created_date >= 20180601 group by created_date;}}
{{Query: select created_date, count(*) from base.usage_kudu where created_date
>= 20180601 group by created_date}}
{{Query submitted at: 2018-06-04 21:06:30 (Coordinator:
[http://ip-10-197-0-164.eu-west-1.compute.internal:25000|http://ip-10-197-0-164.eu-west-1.compute.internal:25000/])}}
{{Query progress can be monitored at:
[http://ip-10-197-0-164.eu-west-1.compute.internal:25000/query_plan?query_id=ce4e92eda5aaa02f:ea07aa4600000000]}}
{{+---------------+---------+}}
{{| created_date | count(*) |}}
{{+---------------+---------+}}
{{| 20180603 | 12145131 |}}
{{| 20180601 | 18076448 | -> 195k MORE!!!}}
{{| 20180602 | 13325080 |}}
{{| 20180604 | 3788161 |}}
{{+---------------+---------+}}
{{Fetched 4 row(s) in 0.37s}}
{{[10.197.0.164:21000] >}}
{{[10.197.0.164:21000] >}}
{{[10.197.0.164:21000] > select created_date, count(*) from base.usage_kudu
where created_date >= 20180601 group by created_date order by 1;}}
{{Query: select created_date, count(*) from base.usage_kudu where created_date
>= 20180601 group by created_date order by 1}}
{{Query submitted at: 2018-06-04 21:06:55 (Coordinator:
[http://ip-10-197-0-164.eu-west-1.compute.internal:25000|http://ip-10-197-0-164.eu-west-1.compute.internal:25000/])}}
{{Query progress can be monitored at:
[http://ip-10-197-0-164.eu-west-1.compute.internal:25000/query_plan?query_id=d541a9dda19e28e4:be4a2ca000000000]}}
{{+---------------+---------+}}
{{| created_date | count(*) |}}
{{+---------------+---------+}}
{{| 20180601 | 18076448 | -> 195k MORE!!!}}
{{| 20180602 | 13325080 |}}
{{| 20180603 | 12145131 |}}
{{| 20180604 | 3788161 |}}
{{+---------------+---------+}}
{{Fetched 4 row(s) in 1.14s}}
{{[10.197.0.164:21000] > select created_date, count(*) from base.usage_kudu
where created_date >= 20180528 group by created_date order by 1;}}
{{Query: select created_date, count(*) from base.usage_kudu where created_date
>= 20180528 group by created_date order by 1}}
{{Query submitted at: 2018-06-04 21:07:12 (Coordinator:
[http://ip-10-197-0-164.eu-west-1.compute.internal:25000|http://ip-10-197-0-164.eu-west-1.compute.internal:25000/])}}
{{Query progress can be monitored at:
[http://ip-10-197-0-164.eu-west-1.compute.internal:25000/query_plan?query_id=774a261fb94ad2bb:aab28b8b00000000]}}
{{+---------------+---------+}}
{{| created_date | count(*) |}}
{{+---------------+---------+}}
{{| 20180528 | 17607943 |}}
{{| 20180529 | 20741097 |}}
{{| 20180530 | 17362364 |}}
{{| 20180531 | 16877228 |}}
\{{| 20180601 | 17925671 | -> 44k MORE!! }}
{{| 20180602 | 13325080 |}}
{{| 20180603 | 12145131 |}}
{{| 20180604 | 3788161 |}}
{{+---------------+---------+}}
{{Fetched 8 row(s) in 0.67s}}
{{[10.197.0.164:21000] > select created_date, count(*) from base.usage_kudu
where created_date >= 20180525 group by created_date order by 1;}}
{{Query: select created_date, count(*) from base.usage_kudu where created_date
>= 20180525 group by created_date order by 1}}
{{Query submitted at: 2018-06-04 21:07:25 (Coordinator:
[http://ip-10-197-0-164.eu-west-1.compute.internal:25000|http://ip-10-197-0-164.eu-west-1.compute.internal:25000/])}}
{{Query progress can be monitored at:
[http://ip-10-197-0-164.eu-west-1.compute.internal:25000/query_plan?query_id=38483ad3ae5c8eb9:a538cb6300000000]}}
{{+---------------+---------+}}
{{| created_date | count(*) |}}
{{+---------------+---------+}}
{{| 20180525 | 22309857 |}}
{{| 20180526 | 15268520 |}}
{{| 20180527 | 14939691 |}}
{{| 20180528 | 17607943 |}}
{{| 20180529 | 20741097 |}}
{{| 20180530 | 17362364 |}}
{{| 20180531 | 16903829 |}}
{{| 20180601 | 18047010 | -> 165k MORE!!!}}
{{| 20180602 | 13325080 |}}
{{| 20180603 | 12145131 |}}
{{| 20180604 | 3788161 |}}
{{+---------------+---------+}}
{{Fetched 11 row(s) in 0.85s}}
{{[10.197.0.164:21000] > select created_date, count(*) from base.usage_kudu
where created_date = 20180601 group by created_date;}}
{{Query: select created_date, count(*) from base.usage_kudu where created_date
= 20180601 group by created_date}}
{{Query submitted at: 2018-06-04 21:07:42 (Coordinator:
[http://ip-10-197-0-164.eu-west-1.compute.internal:25000|http://ip-10-197-0-164.eu-west-1.compute.internal:25000/])}}
{{Query progress can be monitored at:
[http://ip-10-197-0-164.eu-west-1.compute.internal:25000/query_plan?query_id=7343ba31f6b4c86f:621a7b8c00000000]}}
{{+---------------+---------+}}
{{| created_date | count(*) |}}
{{+---------------+---------+}}
{{| 20180601 | 17881253 | -> CORRECT ONE}}
{{+---------------+---------+}}
{{Fetched 1 row(s) in 0.27s}}
{{[10.197.0.164:21000] > select created_date, count(*) from base.usage_kudu
where created_date >= 20180525 group by created_date order by 1;}}
{{Query: select created_date, count(*) from base.usage_kudu where created_date
>= 20180525 group by created_date order by 1}}
{{Query submitted at: 2018-06-04 21:12:02 (Coordinator:
[http://ip-10-197-0-164.eu-west-1.compute.internal:25000|http://ip-10-197-0-164.eu-west-1.compute.internal:25000/])}}
{{Query progress can be monitored at:
[http://ip-10-197-0-164.eu-west-1.compute.internal:25000/query_plan?query_id=4141df26117f35c3:9ab2f0700000000]}}
{{+---------------+---------+}}
{{| created_date | count(*) |}}
{{+---------------+---------+}}
{{| 20180525 | 22309857 |}}
{{| 20180526 | 15268520 |}}
{{| 20180527 | 14939691 |}}
{{| 20180528 | 17607943 |}}
{{| 20180529 | 20741097 |}}
{{| 20180530 | 17362364 |}}
{{| 20180531 | 16903829 |}}
{{| 20180601 | 18047010 | -> AGAIN WRONG RESULT!!}}
{{| 20180602 | 13325080 |}}
{{| 20180603 | 12145131 |}}
{{| 20180604 | 3788161 |}}
{{+---------------+---------+}}
{{Fetched 11 row(s) in 1.04s}}{{}}{{}}
Again, no other inserts/selects/updates or deletes were running between these
statements on the cluster.
I checked the explain, if there is a difference,but it looks ok. But the result
is different!
{{[10.197.0.164:21000] > explain select created_date, count(*) from
base.usage_kudu where created_date = 20180601 group by created_date;}}
{{Query: explain select created_date, count(*) from base.usage_kudu where
created_date = 20180601 group by created_date}}
{{+--------------------------------------------------+}}
{{| Explain String |}}
{{+--------------------------------------------------+}}
{{| Max Per-Host Resource Reservation: Memory=3.94MB |}}
{{| Per-Host Resource Estimates: Memory=20.00MB |}}
{{| |}}
{{| PLAN-ROOT SINK |}}
{{| | |}}
{{| 04:EXCHANGE [UNPARTITIONED] |}}
{{| | |}}
{{| 03:AGGREGATE [FINALIZE] |}}
{{| | output: count:merge(*) |}}
{{| | group by: created_date |}}
{{| | |}}
{{| 02:EXCHANGE [HASH(created_date)] |}}
{{| | |}}
{{| 01:AGGREGATE [STREAMING] |}}
{{| | output: count(*) |}}
{{| | group by: created_date |}}
{{| | |}}
{{| 00:SCAN KUDU [base.usage_kudu] |}}
{{| kudu predicates: created_date = 20180601 |}}
{{+--------------------------------------------------+}}
{{Fetched 19 row(s) in 0.06s}}
{{[10.197.0.164:21000] > select created_date, count(*) from base.usage_kudu
where created_date = 20180601 group by created_date;}}
{{Query: select created_date, count(*) from base.usage_kudu where created_date
= 20180601 group by created_date}}
{{Query submitted at: 2018-06-04 21:17:21 (Coordinator:
[http://ip-10-197-0-164.eu-west-1.compute.internal:25000|http://ip-10-197-0-164.eu-west-1.compute.internal:25000/])}}
{{Query progress can be monitored at:
[http://ip-10-197-0-164.eu-west-1.compute.internal:25000/query_plan?query_id=c449aabea51e7456:612f096400000000]}}
{{+---------------+---------+}}
{{| created_date | count(*) |}}
{{+---------------+---------+}}
{{| 20180601 | 17881253 |}}
{{+---------------+---------+}}
{{Fetched 1 row(s) in 0.38s}}
{{[10.197.0.164:21000] > explain select created_date, count(*) from
base.usage_kudu where created_date >= 20180525 group by created_date order by
1;}}
{{Query: explain select created_date, count(*) from base.usage_kudu where
created_date >= 20180525 group by created_date order by 1}}
{{+--------------------------------------------------+}}
{{| Explain String |}}
{{+--------------------------------------------------+}}
{{| Max Per-Host Resource Reservation: Memory=9.94MB |}}
{{| Per-Host Resource Estimates: Memory=26.00MB |}}
{{| |}}
{{| PLAN-ROOT SINK |}}
{{| | |}}
{{| 05:MERGING-EXCHANGE [UNPARTITIONED] |}}
{{| | order by: created_date ASC |}}
{{| | |}}
{{| 02:SORT |}}
{{| | order by: created_date ASC |}}
{{| | |}}
{{| 04:AGGREGATE [FINALIZE] |}}
{{| | output: count:merge(*) |}}
{{| | group by: created_date |}}
{{| | |}}
{{| 03:EXCHANGE [HASH(created_date)] |}}
{{| | |}}
{{| 01:AGGREGATE [STREAMING] |}}
{{| | output: count(*) |}}
{{| | group by: created_date |}}
{{| | |}}
{{| 00:SCAN KUDU [base.usage_kudu] |}}
{{| kudu predicates: created_date >= 20180525 |}}
{{+--------------------------------------------------+}}
{{Fetched 23 row(s) in 0.05s}}
{{[10.197.0.164:21000] > select created_date, count(*) from base.usage_kudu
where created_date >= 20180525 group by created_date order by 1;}}
{{Query: select created_date, count(*) from base.usage_kudu where created_date
>= 20180525 group by created_date order by 1}}
{{Query submitted at: 2018-06-04 21:17:32 (Coordinator:
[http://ip-10-197-0-164.eu-west-1.compute.internal:25000|http://ip-10-197-0-164.eu-west-1.compute.internal:25000/])}}
{{Query progress can be monitored at:
[http://ip-10-197-0-164.eu-west-1.compute.internal:25000/query_plan?query_id=bc4a36f2a7ad3280:c7b09a5100000000]}}
{{+---------------+---------+}}
{{| created_date | count(*) |}}
{{+---------------+---------+}}
{{| 20180525 | 22309857 |}}
{{| 20180526 | 15268520 |}}
{{| 20180527 | 14939691 |}}
{{| 20180528 | 17607943 |}}
{{| 20180529 | 20741097 |}}
{{| 20180530 | 17362364 |}}
{{| 20180531 | 16903829 |}}
{{| 20180601 | 18047010 |}}
{{| 20180602 | 13325080 |}}
{{| 20180603 | 12145131 |}}
{{| 20180604 | 3788161 |}}
{{+---------------+---------+}}
{{Fetched 11 row(s) in 0.88s}}
{quote}
was:
Hi,
I have a static table in Kudu, no inserts/updates or deletes are running on the
cluster. The query returns DIFFERENT result when I change the where condition
on one of the primary key columns, which is in the group_by list.
The created_date is part of the PK and is type of int.
PK contains subscriber, time, date, identifier and created_date.
I tried to check if the inserted count is equal to the HDFS table, and noticed
on one day, that the count differs based on the where criteria!!
{{[10.197.0.164:21000] > select created_date, count(*) from base.usage_kudu
where created_date >= 20180601 group by created_date;}}
{{Query: select created_date, count(*) from base.usage_kudu where created_date
>= 20180601 group by created_date}}
{{Query submitted at: 2018-06-04 21:06:30 (Coordinator:
http://ip-10-197-0-164.eu-west-1.compute.internal:25000)}}
{{Query progress can be monitored at:
http://ip-10-197-0-164.eu-west-1.compute.internal:25000/query_plan?query_id=ce4e92eda5aaa02f:ea07aa4600000000}}
{{+--------------+----------+}}
{{| created_date | count(*) |}}
{{+--------------+----------+}}
{{| 20180603 | 12145131 |}}
{{| 20180601 | 18076448 | -> 195k MORE!!!}}
{{| 20180602 | 13325080 |}}
{{| 20180604 | 3788161 |}}
{{+--------------+----------+}}
{{Fetched 4 row(s) in 0.37s}}
{{[10.197.0.164:21000] >}}
{{[10.197.0.164:21000] >}}
{{[10.197.0.164:21000] > select created_date, count(*) from base.usage_kudu
where created_date >= 20180601 group by created_date order by 1;}}
{{Query: select created_date, count(*) from base.usage_kudu where created_date
>= 20180601 group by created_date order by 1}}
{{Query submitted at: 2018-06-04 21:06:55 (Coordinator:
http://ip-10-197-0-164.eu-west-1.compute.internal:25000)}}
{{Query progress can be monitored at:
http://ip-10-197-0-164.eu-west-1.compute.internal:25000/query_plan?query_id=d541a9dda19e28e4:be4a2ca000000000}}
{{+--------------+----------+}}
{{| created_date | count(*) |}}
{{+--------------+----------+}}
{{| 20180601 | 18076448 | -> 195k MORE!!!}}
{{| 20180602 | 13325080 |}}
{{| 20180603 | 12145131 |}}
{{| 20180604 | 3788161 |}}
{{+--------------+----------+}}
{{Fetched 4 row(s) in 1.14s}}
{{[10.197.0.164:21000] > select created_date, count(*) from base.usage_kudu
where created_date >= 20180528 group by created_date order by 1;}}
{{Query: select created_date, count(*) from base.usage_kudu where created_date
>= 20180528 group by created_date order by 1}}
{{Query submitted at: 2018-06-04 21:07:12 (Coordinator:
http://ip-10-197-0-164.eu-west-1.compute.internal:25000)}}
{{Query progress can be monitored at:
http://ip-10-197-0-164.eu-west-1.compute.internal:25000/query_plan?query_id=774a261fb94ad2bb:aab28b8b00000000}}
{{+--------------+----------+}}
{{| created_date | count(*) |}}
{{+--------------+----------+}}
{{| 20180528 | 17607943 |}}
{{| 20180529 | 20741097 |}}
{{| 20180530 | 17362364 |}}
{{| 20180531 | 16877228 |}}
{{| 20180601 | 17925671 | -> 44k MORE!! }}
{{| 20180602 | 13325080 |}}
{{| 20180603 | 12145131 |}}
{{| 20180604 | 3788161 |}}
{{+--------------+----------+}}
{{Fetched 8 row(s) in 0.67s}}
{{[10.197.0.164:21000] > select created_date, count(*) from base.usage_kudu
where created_date >= 20180525 group by created_date order by 1;}}
{{Query: select created_date, count(*) from base.usage_kudu where created_date
>= 20180525 group by created_date order by 1}}
{{Query submitted at: 2018-06-04 21:07:25 (Coordinator:
http://ip-10-197-0-164.eu-west-1.compute.internal:25000)}}
{{Query progress can be monitored at:
http://ip-10-197-0-164.eu-west-1.compute.internal:25000/query_plan?query_id=38483ad3ae5c8eb9:a538cb6300000000}}
{{+--------------+----------+}}
{{| created_date | count(*) |}}
{{+--------------+----------+}}
{{| 20180525 | 22309857 |}}
{{| 20180526 | 15268520 |}}
{{| 20180527 | 14939691 |}}
{{| 20180528 | 17607943 |}}
{{| 20180529 | 20741097 |}}
{{| 20180530 | 17362364 |}}
{{| 20180531 | 16903829 |}}
{{| 20180601 | 18047010 | -> 165k MORE!!!}}
{{| 20180602 | 13325080 |}}
{{| 20180603 | 12145131 |}}
{{| 20180604 | 3788161 |}}
{{+--------------+----------+}}
{{Fetched 11 row(s) in 0.85s}}
{{[10.197.0.164:21000] > select created_date, count(*) from base.usage_kudu
where created_date = 20180601 group by created_date;}}
{{Query: select created_date, count(*) from base.usage_kudu where created_date
= 20180601 group by created_date}}
{{Query submitted at: 2018-06-04 21:07:42 (Coordinator:
http://ip-10-197-0-164.eu-west-1.compute.internal:25000)}}
{{Query progress can be monitored at:
http://ip-10-197-0-164.eu-west-1.compute.internal:25000/query_plan?query_id=7343ba31f6b4c86f:621a7b8c00000000}}
{{+--------------+----------+}}
{{| created_date | count(*) |}}
{{+--------------+----------+}}
{{| 20180601 | 17881253 | -> CORRECT ONE}}
{{+--------------+----------+}}
{{Fetched 1 row(s) in 0.27s}}
{{[10.197.0.164:21000] > select created_date, count(*) from base.usage_kudu
where created_date >= 20180525 group by created_date order by 1;}}
{{Query: select created_date, count(*) from base.usage_kudu where created_date
>= 20180525 group by created_date order by 1}}
{{Query submitted at: 2018-06-04 21:12:02 (Coordinator:
http://ip-10-197-0-164.eu-west-1.compute.internal:25000)}}
{{Query progress can be monitored at:
http://ip-10-197-0-164.eu-west-1.compute.internal:25000/query_plan?query_id=4141df26117f35c3:9ab2f0700000000}}
{{+--------------+----------+}}
{{| created_date | count(*) |}}
{{+--------------+----------+}}
{{| 20180525 | 22309857 |}}
{{| 20180526 | 15268520 |}}
{{| 20180527 | 14939691 |}}
{{| 20180528 | 17607943 |}}
{{| 20180529 | 20741097 |}}
{{| 20180530 | 17362364 |}}
{{| 20180531 | 16903829 |}}
{{| 20180601 | 18047010 | -> AGAIN WRONG RESULT!!}}
{{| 20180602 | 13325080 |}}
{{| 20180603 | 12145131 |}}
{{| 20180604 | 3788161 |}}
{{+--------------+----------+}}
{{Fetched 11 row(s) in 1.04s}}{{}}{{}}
Again, no other inserts/selects/updates or deletes were running between these
statements on the cluster.
I checked the explain, if there is a difference,but it looks ok. But the result
is different!
{{[10.197.0.164:21000] > explain select created_date, count(*) from
base.usage_kudu where created_date = 20180601 group by created_date;}}
{{Query: explain select created_date, count(*) from base.usage_kudu where
created_date = 20180601 group by created_date}}
{{+--------------------------------------------------+}}
{{| Explain String |}}
{{+--------------------------------------------------+}}
{{| Max Per-Host Resource Reservation: Memory=3.94MB |}}
{{| Per-Host Resource Estimates: Memory=20.00MB |}}
{{| |}}
{{| PLAN-ROOT SINK |}}
{{| | |}}
{{| 04:EXCHANGE [UNPARTITIONED] |}}
{{| | |}}
{{| 03:AGGREGATE [FINALIZE] |}}
{{| | output: count:merge(*) |}}
{{| | group by: created_date |}}
{{| | |}}
{{| 02:EXCHANGE [HASH(created_date)] |}}
{{| | |}}
{{| 01:AGGREGATE [STREAMING] |}}
{{| | output: count(*) |}}
{{| | group by: created_date |}}
{{| | |}}
{{| 00:SCAN KUDU [base.usage_kudu] |}}
{{| kudu predicates: created_date = 20180601 |}}
{{+--------------------------------------------------+}}
{{Fetched 19 row(s) in 0.06s}}
{{[10.197.0.164:21000] > select created_date, count(*) from base.usage_kudu
where created_date = 20180601 group by created_date;}}
{{Query: select created_date, count(*) from base.usage_kudu where created_date
= 20180601 group by created_date}}
{{Query submitted at: 2018-06-04 21:17:21 (Coordinator:
http://ip-10-197-0-164.eu-west-1.compute.internal:25000)}}
{{Query progress can be monitored at:
http://ip-10-197-0-164.eu-west-1.compute.internal:25000/query_plan?query_id=c449aabea51e7456:612f096400000000}}
{{+--------------+----------+}}
{{| created_date | count(*) |}}
{{+--------------+----------+}}
{{| 20180601 | 17881253 |}}
{{+--------------+----------+}}
{{Fetched 1 row(s) in 0.38s}}
{{[10.197.0.164:21000] > explain select created_date, count(*) from
base.usage_kudu where created_date >= 20180525 group by created_date order by
1;}}
{{Query: explain select created_date, count(*) from base.usage_kudu where
created_date >= 20180525 group by created_date order by 1}}
{{+--------------------------------------------------+}}
{{| Explain String |}}
{{+--------------------------------------------------+}}
{{| Max Per-Host Resource Reservation: Memory=9.94MB |}}
{{| Per-Host Resource Estimates: Memory=26.00MB |}}
{{| |}}
{{| PLAN-ROOT SINK |}}
{{| | |}}
{{| 05:MERGING-EXCHANGE [UNPARTITIONED] |}}
{{| | order by: created_date ASC |}}
{{| | |}}
{{| 02:SORT |}}
{{| | order by: created_date ASC |}}
{{| | |}}
{{| 04:AGGREGATE [FINALIZE] |}}
{{| | output: count:merge(*) |}}
{{| | group by: created_date |}}
{{| | |}}
{{| 03:EXCHANGE [HASH(created_date)] |}}
{{| | |}}
{{| 01:AGGREGATE [STREAMING] |}}
{{| | output: count(*) |}}
{{| | group by: created_date |}}
{{| | |}}
{{| 00:SCAN KUDU [base.usage_kudu] |}}
{{| kudu predicates: created_date >= 20180525 |}}
{{+--------------------------------------------------+}}
{{Fetched 23 row(s) in 0.05s}}
{{[10.197.0.164:21000] > select created_date, count(*) from base.usage_kudu
where created_date >= 20180525 group by created_date order by 1;}}
{{Query: select created_date, count(*) from base.usage_kudu where created_date
>= 20180525 group by created_date order by 1}}
{{Query submitted at: 2018-06-04 21:17:32 (Coordinator:
http://ip-10-197-0-164.eu-west-1.compute.internal:25000)}}
{{Query progress can be monitored at:
http://ip-10-197-0-164.eu-west-1.compute.internal:25000/query_plan?query_id=bc4a36f2a7ad3280:c7b09a5100000000}}
{{+--------------+----------+}}
{{| created_date | count(*) |}}
{{+--------------+----------+}}
{{| 20180525 | 22309857 |}}
{{| 20180526 | 15268520 |}}
{{| 20180527 | 14939691 |}}
{{| 20180528 | 17607943 |}}
{{| 20180529 | 20741097 |}}
{{| 20180530 | 17362364 |}}
{{| 20180531 | 16903829 |}}
{{| 20180601 | 18047010 |}}
{{| 20180602 | 13325080 |}}
{{| 20180603 | 12145131 |}}
{{| 20180604 | 3788161 |}}
{{+--------------+----------+}}
{{Fetched 11 row(s) in 0.88s}}
> Different results returned by group by on count() metric
> --------------------------------------------------------
>
> Key: KUDU-2463
> URL: https://issues.apache.org/jira/browse/KUDU-2463
> Project: Kudu
> Issue Type: Bug
> Components: impala
> Affects Versions: 1.5.0
> Reporter: Tomas Farkas
> Priority: Critical
>
> Hi,
> I have a static table in Kudu, no inserts/updates or deletes are running on
> the cluster. The query returns DIFFERENT result when I change the where
> condition on one of the primary key columns, which is in the group_by list.
> The created_date is part of the PK and is type of int.
> PK contains subscriber, time, date, identifier and created_date.
> I tried to check if the inserted count is equal to the HDFS table, and
> noticed on one day, that the count differs based on the where criteria!!
> {quote}
> {{[10.197.0.164:21000] > select created_date, count(*) from base.usage_kudu
> where created_date >= 20180601 group by created_date;}}
> {{Query: select created_date, count(*) from base.usage_kudu where
> created_date >= 20180601 group by created_date}}
> {{Query submitted at: 2018-06-04 21:06:30 (Coordinator:
> [http://ip-10-197-0-164.eu-west-1.compute.internal:25000|http://ip-10-197-0-164.eu-west-1.compute.internal:25000/])}}
> {{Query progress can be monitored at:
> [http://ip-10-197-0-164.eu-west-1.compute.internal:25000/query_plan?query_id=ce4e92eda5aaa02f:ea07aa4600000000]}}
> {{+---------------+---------+}}
> {{| created_date | count(*) |}}
> {{+---------------+---------+}}
> {{| 20180603 | 12145131 |}}
> {{| 20180601 | 18076448 | -> 195k MORE!!!}}
> {{| 20180602 | 13325080 |}}
> {{| 20180604 | 3788161 |}}
> {{+---------------+---------+}}
> {{Fetched 4 row(s) in 0.37s}}
> {{[10.197.0.164:21000] >}}
> {{[10.197.0.164:21000] >}}
> {{[10.197.0.164:21000] > select created_date, count(*) from base.usage_kudu
> where created_date >= 20180601 group by created_date order by 1;}}
> {{Query: select created_date, count(*) from base.usage_kudu where
> created_date >= 20180601 group by created_date order by 1}}
> {{Query submitted at: 2018-06-04 21:06:55 (Coordinator:
> [http://ip-10-197-0-164.eu-west-1.compute.internal:25000|http://ip-10-197-0-164.eu-west-1.compute.internal:25000/])}}
> {{Query progress can be monitored at:
> [http://ip-10-197-0-164.eu-west-1.compute.internal:25000/query_plan?query_id=d541a9dda19e28e4:be4a2ca000000000]}}
> {{+---------------+---------+}}
> {{| created_date | count(*) |}}
> {{+---------------+---------+}}
> {{| 20180601 | 18076448 | -> 195k MORE!!!}}
> {{| 20180602 | 13325080 |}}
> {{| 20180603 | 12145131 |}}
> {{| 20180604 | 3788161 |}}
> {{+---------------+---------+}}
> {{Fetched 4 row(s) in 1.14s}}
> {{[10.197.0.164:21000] > select created_date, count(*) from base.usage_kudu
> where created_date >= 20180528 group by created_date order by 1;}}
> {{Query: select created_date, count(*) from base.usage_kudu where
> created_date >= 20180528 group by created_date order by 1}}
> {{Query submitted at: 2018-06-04 21:07:12 (Coordinator:
> [http://ip-10-197-0-164.eu-west-1.compute.internal:25000|http://ip-10-197-0-164.eu-west-1.compute.internal:25000/])}}
> {{Query progress can be monitored at:
> [http://ip-10-197-0-164.eu-west-1.compute.internal:25000/query_plan?query_id=774a261fb94ad2bb:aab28b8b00000000]}}
> {{+---------------+---------+}}
> {{| created_date | count(*) |}}
> {{+---------------+---------+}}
> {{| 20180528 | 17607943 |}}
> {{| 20180529 | 20741097 |}}
> {{| 20180530 | 17362364 |}}
> {{| 20180531 | 16877228 |}}
> \{{| 20180601 | 17925671 | -> 44k MORE!! }}
> {{| 20180602 | 13325080 |}}
> {{| 20180603 | 12145131 |}}
> {{| 20180604 | 3788161 |}}
> {{+---------------+---------+}}
> {{Fetched 8 row(s) in 0.67s}}
> {{[10.197.0.164:21000] > select created_date, count(*) from base.usage_kudu
> where created_date >= 20180525 group by created_date order by 1;}}
> {{Query: select created_date, count(*) from base.usage_kudu where
> created_date >= 20180525 group by created_date order by 1}}
> {{Query submitted at: 2018-06-04 21:07:25 (Coordinator:
> [http://ip-10-197-0-164.eu-west-1.compute.internal:25000|http://ip-10-197-0-164.eu-west-1.compute.internal:25000/])}}
> {{Query progress can be monitored at:
> [http://ip-10-197-0-164.eu-west-1.compute.internal:25000/query_plan?query_id=38483ad3ae5c8eb9:a538cb6300000000]}}
> {{+---------------+---------+}}
> {{| created_date | count(*) |}}
> {{+---------------+---------+}}
> {{| 20180525 | 22309857 |}}
> {{| 20180526 | 15268520 |}}
> {{| 20180527 | 14939691 |}}
> {{| 20180528 | 17607943 |}}
> {{| 20180529 | 20741097 |}}
> {{| 20180530 | 17362364 |}}
> {{| 20180531 | 16903829 |}}
> {{| 20180601 | 18047010 | -> 165k MORE!!!}}
> {{| 20180602 | 13325080 |}}
> {{| 20180603 | 12145131 |}}
> {{| 20180604 | 3788161 |}}
> {{+---------------+---------+}}
> {{Fetched 11 row(s) in 0.85s}}
> {{[10.197.0.164:21000] > select created_date, count(*) from base.usage_kudu
> where created_date = 20180601 group by created_date;}}
> {{Query: select created_date, count(*) from base.usage_kudu where
> created_date = 20180601 group by created_date}}
> {{Query submitted at: 2018-06-04 21:07:42 (Coordinator:
> [http://ip-10-197-0-164.eu-west-1.compute.internal:25000|http://ip-10-197-0-164.eu-west-1.compute.internal:25000/])}}
> {{Query progress can be monitored at:
> [http://ip-10-197-0-164.eu-west-1.compute.internal:25000/query_plan?query_id=7343ba31f6b4c86f:621a7b8c00000000]}}
> {{+---------------+---------+}}
> {{| created_date | count(*) |}}
> {{+---------------+---------+}}
> {{| 20180601 | 17881253 | -> CORRECT ONE}}
> {{+---------------+---------+}}
> {{Fetched 1 row(s) in 0.27s}}
> {{[10.197.0.164:21000] > select created_date, count(*) from base.usage_kudu
> where created_date >= 20180525 group by created_date order by 1;}}
> {{Query: select created_date, count(*) from base.usage_kudu where
> created_date >= 20180525 group by created_date order by 1}}
> {{Query submitted at: 2018-06-04 21:12:02 (Coordinator:
> [http://ip-10-197-0-164.eu-west-1.compute.internal:25000|http://ip-10-197-0-164.eu-west-1.compute.internal:25000/])}}
> {{Query progress can be monitored at:
> [http://ip-10-197-0-164.eu-west-1.compute.internal:25000/query_plan?query_id=4141df26117f35c3:9ab2f0700000000]}}
> {{+---------------+---------+}}
> {{| created_date | count(*) |}}
> {{+---------------+---------+}}
> {{| 20180525 | 22309857 |}}
> {{| 20180526 | 15268520 |}}
> {{| 20180527 | 14939691 |}}
> {{| 20180528 | 17607943 |}}
> {{| 20180529 | 20741097 |}}
> {{| 20180530 | 17362364 |}}
> {{| 20180531 | 16903829 |}}
> {{| 20180601 | 18047010 | -> AGAIN WRONG RESULT!!}}
> {{| 20180602 | 13325080 |}}
> {{| 20180603 | 12145131 |}}
> {{| 20180604 | 3788161 |}}
> {{+---------------+---------+}}
> {{Fetched 11 row(s) in 1.04s}}{{}}{{}}
> Again, no other inserts/selects/updates or deletes were running between these
> statements on the cluster.
>
> I checked the explain, if there is a difference,but it looks ok. But the
> result is different!
>
> {{[10.197.0.164:21000] > explain select created_date, count(*) from
> base.usage_kudu where created_date = 20180601 group by created_date;}}
> {{Query: explain select created_date, count(*) from base.usage_kudu where
> created_date = 20180601 group by created_date}}
> {{+--------------------------------------------------+}}
> {{| Explain String |}}
> {{+--------------------------------------------------+}}
> {{| Max Per-Host Resource Reservation: Memory=3.94MB |}}
> {{| Per-Host Resource Estimates: Memory=20.00MB |}}
> {{| |}}
> {{| PLAN-ROOT SINK |}}
> {{| | |}}
> {{| 04:EXCHANGE [UNPARTITIONED] |}}
> {{| | |}}
> {{| 03:AGGREGATE [FINALIZE] |}}
> {{| | output: count:merge(*) |}}
> {{| | group by: created_date |}}
> {{| | |}}
> {{| 02:EXCHANGE [HASH(created_date)] |}}
> {{| | |}}
> {{| 01:AGGREGATE [STREAMING] |}}
> {{| | output: count(*) |}}
> {{| | group by: created_date |}}
> {{| | |}}
> {{| 00:SCAN KUDU [base.usage_kudu] |}}
> {{| kudu predicates: created_date = 20180601 |}}
> {{+--------------------------------------------------+}}
> {{Fetched 19 row(s) in 0.06s}}
> {{[10.197.0.164:21000] > select created_date, count(*) from base.usage_kudu
> where created_date = 20180601 group by created_date;}}
> {{Query: select created_date, count(*) from base.usage_kudu where
> created_date = 20180601 group by created_date}}
> {{Query submitted at: 2018-06-04 21:17:21 (Coordinator:
> [http://ip-10-197-0-164.eu-west-1.compute.internal:25000|http://ip-10-197-0-164.eu-west-1.compute.internal:25000/])}}
> {{Query progress can be monitored at:
> [http://ip-10-197-0-164.eu-west-1.compute.internal:25000/query_plan?query_id=c449aabea51e7456:612f096400000000]}}
> {{+---------------+---------+}}
> {{| created_date | count(*) |}}
> {{+---------------+---------+}}
> {{| 20180601 | 17881253 |}}
> {{+---------------+---------+}}
> {{Fetched 1 row(s) in 0.38s}}
> {{[10.197.0.164:21000] > explain select created_date, count(*) from
> base.usage_kudu where created_date >= 20180525 group by created_date order by
> 1;}}
> {{Query: explain select created_date, count(*) from base.usage_kudu where
> created_date >= 20180525 group by created_date order by 1}}
> {{+--------------------------------------------------+}}
> {{| Explain String |}}
> {{+--------------------------------------------------+}}
> {{| Max Per-Host Resource Reservation: Memory=9.94MB |}}
> {{| Per-Host Resource Estimates: Memory=26.00MB |}}
> {{| |}}
> {{| PLAN-ROOT SINK |}}
> {{| | |}}
> {{| 05:MERGING-EXCHANGE [UNPARTITIONED] |}}
> {{| | order by: created_date ASC |}}
> {{| | |}}
> {{| 02:SORT |}}
> {{| | order by: created_date ASC |}}
> {{| | |}}
> {{| 04:AGGREGATE [FINALIZE] |}}
> {{| | output: count:merge(*) |}}
> {{| | group by: created_date |}}
> {{| | |}}
> {{| 03:EXCHANGE [HASH(created_date)] |}}
> {{| | |}}
> {{| 01:AGGREGATE [STREAMING] |}}
> {{| | output: count(*) |}}
> {{| | group by: created_date |}}
> {{| | |}}
> {{| 00:SCAN KUDU [base.usage_kudu] |}}
> {{| kudu predicates: created_date >= 20180525 |}}
> {{+--------------------------------------------------+}}
> {{Fetched 23 row(s) in 0.05s}}
> {{[10.197.0.164:21000] > select created_date, count(*) from base.usage_kudu
> where created_date >= 20180525 group by created_date order by 1;}}
> {{Query: select created_date, count(*) from base.usage_kudu where
> created_date >= 20180525 group by created_date order by 1}}
> {{Query submitted at: 2018-06-04 21:17:32 (Coordinator:
> [http://ip-10-197-0-164.eu-west-1.compute.internal:25000|http://ip-10-197-0-164.eu-west-1.compute.internal:25000/])}}
> {{Query progress can be monitored at:
> [http://ip-10-197-0-164.eu-west-1.compute.internal:25000/query_plan?query_id=bc4a36f2a7ad3280:c7b09a5100000000]}}
> {{+---------------+---------+}}
> {{| created_date | count(*) |}}
> {{+---------------+---------+}}
> {{| 20180525 | 22309857 |}}
> {{| 20180526 | 15268520 |}}
> {{| 20180527 | 14939691 |}}
> {{| 20180528 | 17607943 |}}
> {{| 20180529 | 20741097 |}}
> {{| 20180530 | 17362364 |}}
> {{| 20180531 | 16903829 |}}
> {{| 20180601 | 18047010 |}}
> {{| 20180602 | 13325080 |}}
> {{| 20180603 | 12145131 |}}
> {{| 20180604 | 3788161 |}}
> {{+---------------+---------+}}
> {{Fetched 11 row(s) in 0.88s}}
>
> {quote}
--
This message was sent by Atlassian JIRA
(v7.6.3#76005)