This is an automated email from the ASF dual-hosted git repository.
github-bot pushed a commit to branch main
in repository https://gitbox.apache.org/repos/asf/datafusion.git
The following commit(s) were added to refs/heads/main by this push:
new 92d0a5c527 Add explain plans for ClickBench queries (#20666)
92d0a5c527 is described below
commit 92d0a5c527ed38b6ce3bad673f6cf5a372c71b0c
Author: Andrew Lamb <[email protected]>
AuthorDate: Wed Mar 4 09:06:15 2026 -0500
Add explain plans for ClickBench queries (#20666)
## Which issue does this PR close?
- Part of #18489
- Related to https://github.com/apache/datafusion/pull/20180
## Rationale for this change
This looks like a monster PR but I think it will be quite easy to review
(it just adds some new `EXPLAIN` tests). If it would be helpful I can
break it into smaller pieces
I want to improve the plans for ClickBench Query 29
However, the plans for the ClickBench queries are not in our tests
anywhere (so when I make the improvements in
https://github.com/apache/datafusion/pull/20665 no explain plan tests
change)
So to start, let's start with adding the explain plans for all the
queries in clickbench.slt to so it is clear what our current plans look
like as well as to make it clear what the change of plans are
## What changes are included in this PR?
Add explain plans to some .slt tests
## Are these changes tested?
Only tests
## Are there any user-facing changes?
No, this only adds tests
---
datafusion/sqllogictest/test_files/clickbench.slt | 936 +++++++++++++++++++++-
1 file changed, 931 insertions(+), 5 deletions(-)
diff --git a/datafusion/sqllogictest/test_files/clickbench.slt
b/datafusion/sqllogictest/test_files/clickbench.slt
index 42b7cfafda..10059664ad 100644
--- a/datafusion/sqllogictest/test_files/clickbench.slt
+++ b/datafusion/sqllogictest/test_files/clickbench.slt
@@ -15,10 +15,9 @@
# specific language governing permissions and limitations
# under the License.
-
-# This file contains the clickbench schema and queries
-# and the first 10 rows of data. Since ClickBench contains case sensitive
queries
-# this is also a good test of that usecase too
+## Notes: This file contains the ClickBench schema and queries and the first 10
+## rows of data. Since ClickBench contains case sensitive identifiers (e.g.
+## "EventDate") this is also a good test of that usecase too
# create.sql came from
#
https://github.com/ClickHouse/ClickBench/blob/8b9e3aa05ea18afa427f14909ddc678b8ef0d5e6/datafusion/create.sql
@@ -31,6 +30,8 @@ STORED AS PARQUET
LOCATION '../core/tests/data/clickbench_hits_10.parquet';
# ClickBench encodes EventDate as UInt16 days since epoch.
+# So we define this view to convert it to the correct DATE type (this is done
+# in the ClickBench runner as well, see
https://github.com/ClickHouse/ClickBench/pull/803
statement ok
CREATE VIEW hits AS
SELECT * EXCEPT ("EventDate"),
@@ -38,6 +39,7 @@ SELECT * EXCEPT ("EventDate"),
FROM hits_raw;
# Verify EventDate transformation from UInt16 to DATE
+
query D
SELECT "EventDate" FROM hits LIMIT 1;
----
@@ -52,45 +54,203 @@ SELECT "EventDate" FROM hits_raw LIMIT 1;
# queries.sql came from
#
https://github.com/ClickHouse/ClickBench/blob/8b9e3aa05ea18afa427f14909ddc678b8ef0d5e6/datafusion/queries.sql
+## Q0
+query TT
+EXPLAIN SELECT COUNT(*) FROM hits;
+----
+logical_plan
+01)Projection: count(Int64(1)) AS count(*)
+02)--Aggregate: groupBy=[[]], aggr=[[count(Int64(1))]]
+03)----SubqueryAlias: hits
+04)------TableScan: hits_raw projection=[]
+physical_plan
+01)ProjectionExec: expr=[10 as count(*)]
+02)--PlaceholderRowExec
+
query I
SELECT COUNT(*) FROM hits;
----
10
+## Q1
+query TT
+EXPLAIN SELECT COUNT(*) FROM hits WHERE "AdvEngineID" <> 0;
+----
+logical_plan
+01)Projection: count(Int64(1)) AS count(*)
+02)--Aggregate: groupBy=[[]], aggr=[[count(Int64(1))]]
+03)----SubqueryAlias: hits
+04)------Projection:
+05)--------Filter: hits_raw.AdvEngineID != Int16(0)
+06)----------TableScan: hits_raw projection=[AdvEngineID],
partial_filters=[hits_raw.AdvEngineID != Int16(0)]
+physical_plan
+01)ProjectionExec: expr=[count(Int64(1))@0 as count(*)]
+02)--AggregateExec: mode=Final, gby=[], aggr=[count(Int64(1))]
+03)----CoalescePartitionsExec
+04)------AggregateExec: mode=Partial, gby=[], aggr=[count(Int64(1))]
+05)--------FilterExec: AdvEngineID@0 != 0, projection=[]
+06)----------RepartitionExec: partitioning=RoundRobinBatch(4),
input_partitions=1
+07)------------DataSourceExec: file_groups={1 group:
[[WORKSPACE_ROOT/datafusion/core/tests/data/clickbench_hits_10.parquet]]},
projection=[AdvEngineID], file_type=parquet, predicate=AdvEngineID@40 != 0,
pruning_predicate=AdvEngineID_null_count@2 != row_count@3 AND
(AdvEngineID_min@0 != 0 OR 0 != AdvEngineID_max@1),
required_guarantees=[AdvEngineID not in (0)]
+
query I
SELECT COUNT(*) FROM hits WHERE "AdvEngineID" <> 0;
----
0
+query TT
+EXPLAIN SELECT SUM("AdvEngineID"), COUNT(*), AVG("ResolutionWidth") FROM hits;
+----
+logical_plan
+01)Projection: sum(hits.AdvEngineID), count(Int64(1)) AS count(*),
avg(hits.ResolutionWidth)
+02)--Aggregate: groupBy=[[]], aggr=[[sum(CAST(hits.AdvEngineID AS Int64)),
count(Int64(1)), avg(CAST(hits.ResolutionWidth AS Float64))]]
+03)----SubqueryAlias: hits
+04)------TableScan: hits_raw projection=[ResolutionWidth, AdvEngineID]
+physical_plan
+01)ProjectionExec: expr=[sum(hits.AdvEngineID)@0 as sum(hits.AdvEngineID),
count(Int64(1))@1 as count(*), avg(hits.ResolutionWidth)@2 as
avg(hits.ResolutionWidth)]
+02)--AggregateExec: mode=Single, gby=[], aggr=[sum(hits.AdvEngineID),
count(Int64(1)), avg(hits.ResolutionWidth)]
+03)----DataSourceExec: file_groups={1 group:
[[WORKSPACE_ROOT/datafusion/core/tests/data/clickbench_hits_10.parquet]]},
projection=[ResolutionWidth, AdvEngineID], file_type=parquet
+
query IIR
SELECT SUM("AdvEngineID"), COUNT(*), AVG("ResolutionWidth") FROM hits;
----
0 10 0
+## Q3
+query TT
+EXPLAIN SELECT AVG("UserID") FROM hits;
+----
+logical_plan
+01)Aggregate: groupBy=[[]], aggr=[[avg(CAST(hits.UserID AS Float64))]]
+02)--SubqueryAlias: hits
+03)----TableScan: hits_raw projection=[UserID]
+physical_plan
+01)AggregateExec: mode=Single, gby=[], aggr=[avg(hits.UserID)]
+02)--DataSourceExec: file_groups={1 group:
[[WORKSPACE_ROOT/datafusion/core/tests/data/clickbench_hits_10.parquet]]},
projection=[UserID], file_type=parquet
+
query R
SELECT AVG("UserID") FROM hits;
----
-304548765855551740
+## Q4
+query TT
+EXPLAIN SELECT COUNT(DISTINCT "UserID") FROM hits;
+----
+logical_plan
+01)Projection: count(alias1) AS count(DISTINCT hits.UserID)
+02)--Aggregate: groupBy=[[]], aggr=[[count(alias1)]]
+03)----Aggregate: groupBy=[[hits.UserID AS alias1]], aggr=[[]]
+04)------SubqueryAlias: hits
+05)--------TableScan: hits_raw projection=[UserID]
+physical_plan
+01)ProjectionExec: expr=[count(alias1)@0 as count(DISTINCT hits.UserID)]
+02)--AggregateExec: mode=Final, gby=[], aggr=[count(alias1)]
+03)----CoalescePartitionsExec
+04)------AggregateExec: mode=Partial, gby=[], aggr=[count(alias1)]
+05)--------AggregateExec: mode=FinalPartitioned, gby=[alias1@0 as alias1],
aggr=[]
+06)----------RepartitionExec: partitioning=Hash([alias1@0], 4),
input_partitions=1
+07)------------AggregateExec: mode=Partial, gby=[UserID@0 as alias1], aggr=[]
+08)--------------DataSourceExec: file_groups={1 group:
[[WORKSPACE_ROOT/datafusion/core/tests/data/clickbench_hits_10.parquet]]},
projection=[UserID], file_type=parquet
+
query I
SELECT COUNT(DISTINCT "UserID") FROM hits;
----
5
+## Q5
+query TT
+EXPLAIN SELECT COUNT(DISTINCT "SearchPhrase") FROM hits;
+----
+logical_plan
+01)Projection: count(alias1) AS count(DISTINCT hits.SearchPhrase)
+02)--Aggregate: groupBy=[[]], aggr=[[count(alias1)]]
+03)----Aggregate: groupBy=[[hits.SearchPhrase AS alias1]], aggr=[[]]
+04)------SubqueryAlias: hits
+05)--------TableScan: hits_raw projection=[SearchPhrase]
+physical_plan
+01)ProjectionExec: expr=[count(alias1)@0 as count(DISTINCT hits.SearchPhrase)]
+02)--AggregateExec: mode=Final, gby=[], aggr=[count(alias1)]
+03)----CoalescePartitionsExec
+04)------AggregateExec: mode=Partial, gby=[], aggr=[count(alias1)]
+05)--------AggregateExec: mode=FinalPartitioned, gby=[alias1@0 as alias1],
aggr=[]
+06)----------RepartitionExec: partitioning=Hash([alias1@0], 4),
input_partitions=1
+07)------------AggregateExec: mode=Partial, gby=[SearchPhrase@0 as alias1],
aggr=[]
+08)--------------DataSourceExec: file_groups={1 group:
[[WORKSPACE_ROOT/datafusion/core/tests/data/clickbench_hits_10.parquet]]},
projection=[SearchPhrase], file_type=parquet
+
query I
SELECT COUNT(DISTINCT "SearchPhrase") FROM hits;
----
1
+## Q6
+query TT
+EXPLAIN SELECT MIN("EventDate"), MAX("EventDate") FROM hits;
+----
+logical_plan
+01)Aggregate: groupBy=[[]], aggr=[[min(hits.EventDate), max(hits.EventDate)]]
+02)--SubqueryAlias: hits
+03)----Projection: CAST(CAST(hits_raw.EventDate AS Int32) AS Date32) AS
EventDate
+04)------TableScan: hits_raw projection=[EventDate]
+physical_plan
+01)AggregateExec: mode=Single, gby=[], aggr=[min(hits.EventDate),
max(hits.EventDate)]
+02)--DataSourceExec: file_groups={1 group:
[[WORKSPACE_ROOT/datafusion/core/tests/data/clickbench_hits_10.parquet]]},
projection=[CAST(CAST(EventDate@5 AS Int32) AS Date32) as EventDate],
file_type=parquet
+
query DD
SELECT MIN("EventDate"), MAX("EventDate") FROM hits;
----
2013-07-15 2013-07-15
+## Q7
+query TT
+EXPLAIN SELECT "AdvEngineID", COUNT(*) FROM hits WHERE "AdvEngineID" <> 0
GROUP BY "AdvEngineID" ORDER BY COUNT(*) DESC;
+----
+logical_plan
+01)Projection: hits.AdvEngineID, count(*)
+02)--Sort: count(Int64(1)) AS count(*) AS count(*) DESC NULLS FIRST
+03)----Projection: hits.AdvEngineID, count(Int64(1)) AS count(*),
count(Int64(1))
+04)------Aggregate: groupBy=[[hits.AdvEngineID]], aggr=[[count(Int64(1))]]
+05)--------SubqueryAlias: hits
+06)----------Filter: hits_raw.AdvEngineID != Int16(0)
+07)------------TableScan: hits_raw projection=[AdvEngineID],
partial_filters=[hits_raw.AdvEngineID != Int16(0)]
+physical_plan
+01)ProjectionExec: expr=[AdvEngineID@0 as AdvEngineID, count(*)@1 as count(*)]
+02)--SortPreservingMergeExec: [count(Int64(1))@2 DESC]
+03)----SortExec: expr=[count(*)@1 DESC], preserve_partitioning=[true]
+04)------ProjectionExec: expr=[AdvEngineID@0 as AdvEngineID, count(Int64(1))@1
as count(*), count(Int64(1))@1 as count(Int64(1))]
+05)--------AggregateExec: mode=FinalPartitioned, gby=[AdvEngineID@0 as
AdvEngineID], aggr=[count(Int64(1))]
+06)----------RepartitionExec: partitioning=Hash([AdvEngineID@0], 4),
input_partitions=4
+07)------------AggregateExec: mode=Partial, gby=[AdvEngineID@0 as
AdvEngineID], aggr=[count(Int64(1))]
+08)--------------FilterExec: AdvEngineID@0 != 0
+09)----------------RepartitionExec: partitioning=RoundRobinBatch(4),
input_partitions=1
+10)------------------DataSourceExec: file_groups={1 group:
[[WORKSPACE_ROOT/datafusion/core/tests/data/clickbench_hits_10.parquet]]},
projection=[AdvEngineID], file_type=parquet, predicate=AdvEngineID@40 != 0,
pruning_predicate=AdvEngineID_null_count@2 != row_count@3 AND
(AdvEngineID_min@0 != 0 OR 0 != AdvEngineID_max@1),
required_guarantees=[AdvEngineID not in (0)]
+
query II
SELECT "AdvEngineID", COUNT(*) FROM hits WHERE "AdvEngineID" <> 0 GROUP BY
"AdvEngineID" ORDER BY COUNT(*) DESC;
----
+## Q8
+query TT
+EXPLAIN SELECT "RegionID", COUNT(DISTINCT "UserID") AS u FROM hits GROUP BY
"RegionID" ORDER BY u DESC LIMIT 10;
+----
+logical_plan
+01)Sort: u DESC NULLS FIRST, fetch=10
+02)--Projection: hits.RegionID, count(alias1) AS u
+03)----Aggregate: groupBy=[[hits.RegionID]], aggr=[[count(alias1)]]
+04)------Aggregate: groupBy=[[hits.RegionID, hits.UserID AS alias1]], aggr=[[]]
+05)--------SubqueryAlias: hits
+06)----------TableScan: hits_raw projection=[RegionID, UserID]
+physical_plan
+01)SortPreservingMergeExec: [u@1 DESC], fetch=10
+02)--SortExec: TopK(fetch=10), expr=[u@1 DESC], preserve_partitioning=[true]
+03)----ProjectionExec: expr=[RegionID@0 as RegionID, count(alias1)@1 as u]
+04)------AggregateExec: mode=FinalPartitioned, gby=[RegionID@0 as RegionID],
aggr=[count(alias1)]
+05)--------RepartitionExec: partitioning=Hash([RegionID@0], 4),
input_partitions=4
+06)----------AggregateExec: mode=Partial, gby=[RegionID@0 as RegionID],
aggr=[count(alias1)]
+07)------------AggregateExec: mode=FinalPartitioned, gby=[RegionID@0 as
RegionID, alias1@1 as alias1], aggr=[]
+08)--------------RepartitionExec: partitioning=Hash([RegionID@0, alias1@1],
4), input_partitions=1
+09)----------------AggregateExec: mode=Partial, gby=[RegionID@0 as RegionID,
UserID@1 as alias1], aggr=[]
+10)------------------DataSourceExec: file_groups={1 group:
[[WORKSPACE_ROOT/datafusion/core/tests/data/clickbench_hits_10.parquet]]},
projection=[RegionID, UserID], file_type=parquet
+
query II rowsort
SELECT "RegionID", COUNT(DISTINCT "UserID") AS u FROM hits GROUP BY "RegionID"
ORDER BY u DESC LIMIT 10;
----
@@ -99,6 +259,25 @@ SELECT "RegionID", COUNT(DISTINCT "UserID") AS u FROM hits
GROUP BY "RegionID" O
39 1
839 2
+## Q9
+query TT
+EXPLAIN SELECT "RegionID", SUM("AdvEngineID"), COUNT(*) AS c,
AVG("ResolutionWidth"), COUNT(DISTINCT "UserID") FROM hits GROUP BY "RegionID"
ORDER BY c DESC LIMIT 10;
+----
+logical_plan
+01)Sort: c DESC NULLS FIRST, fetch=10
+02)--Projection: hits.RegionID, sum(hits.AdvEngineID), count(Int64(1)) AS
count(*) AS c, avg(hits.ResolutionWidth), count(DISTINCT hits.UserID)
+03)----Aggregate: groupBy=[[hits.RegionID]], aggr=[[sum(CAST(hits.AdvEngineID
AS Int64)), count(Int64(1)), avg(CAST(hits.ResolutionWidth AS Float64)),
count(DISTINCT hits.UserID)]]
+04)------SubqueryAlias: hits
+05)--------TableScan: hits_raw projection=[RegionID, UserID, ResolutionWidth,
AdvEngineID]
+physical_plan
+01)SortPreservingMergeExec: [c@2 DESC], fetch=10
+02)--SortExec: TopK(fetch=10), expr=[c@2 DESC], preserve_partitioning=[true]
+03)----ProjectionExec: expr=[RegionID@0 as RegionID, sum(hits.AdvEngineID)@1
as sum(hits.AdvEngineID), count(Int64(1))@2 as c, avg(hits.ResolutionWidth)@3
as avg(hits.ResolutionWidth), count(DISTINCT hits.UserID)@4 as count(DISTINCT
hits.UserID)]
+04)------AggregateExec: mode=FinalPartitioned, gby=[RegionID@0 as RegionID],
aggr=[sum(hits.AdvEngineID), count(Int64(1)), avg(hits.ResolutionWidth),
count(DISTINCT hits.UserID)]
+05)--------RepartitionExec: partitioning=Hash([RegionID@0], 4),
input_partitions=1
+06)----------AggregateExec: mode=Partial, gby=[RegionID@0 as RegionID],
aggr=[sum(hits.AdvEngineID), count(Int64(1)), avg(hits.ResolutionWidth),
count(DISTINCT hits.UserID)]
+07)------------DataSourceExec: file_groups={1 group:
[[WORKSPACE_ROOT/datafusion/core/tests/data/clickbench_hits_10.parquet]]},
projection=[RegionID, UserID, ResolutionWidth, AdvEngineID], file_type=parquet
+
query IIIRI rowsort
SELECT "RegionID", SUM("AdvEngineID"), COUNT(*) AS c, AVG("ResolutionWidth"),
COUNT(DISTINCT "UserID") FROM hits GROUP BY "RegionID" ORDER BY c DESC LIMIT 10;
----
@@ -107,26 +286,169 @@ SELECT "RegionID", SUM("AdvEngineID"), COUNT(*) AS c,
AVG("ResolutionWidth"), CO
39 0 1 0 1
839 0 6 0 2
+## Q10
+query TT
+EXPLAIN SELECT "MobilePhoneModel", COUNT(DISTINCT "UserID") AS u FROM hits
WHERE "MobilePhoneModel" <> '' GROUP BY "MobilePhoneModel" ORDER BY u DESC
LIMIT 10;
+----
+logical_plan
+01)Sort: u DESC NULLS FIRST, fetch=10
+02)--Projection: hits.MobilePhoneModel, count(alias1) AS u
+03)----Aggregate: groupBy=[[hits.MobilePhoneModel]], aggr=[[count(alias1)]]
+04)------Aggregate: groupBy=[[hits.MobilePhoneModel, hits.UserID AS alias1]],
aggr=[[]]
+05)--------SubqueryAlias: hits
+06)----------Filter: hits_raw.MobilePhoneModel != Utf8View("")
+07)------------TableScan: hits_raw projection=[UserID, MobilePhoneModel],
partial_filters=[hits_raw.MobilePhoneModel != Utf8View("")]
+physical_plan
+01)SortPreservingMergeExec: [u@1 DESC], fetch=10
+02)--SortExec: TopK(fetch=10), expr=[u@1 DESC], preserve_partitioning=[true]
+03)----ProjectionExec: expr=[MobilePhoneModel@0 as MobilePhoneModel,
count(alias1)@1 as u]
+04)------AggregateExec: mode=FinalPartitioned, gby=[MobilePhoneModel@0 as
MobilePhoneModel], aggr=[count(alias1)]
+05)--------RepartitionExec: partitioning=Hash([MobilePhoneModel@0], 4),
input_partitions=4
+06)----------AggregateExec: mode=Partial, gby=[MobilePhoneModel@0 as
MobilePhoneModel], aggr=[count(alias1)]
+07)------------AggregateExec: mode=FinalPartitioned, gby=[MobilePhoneModel@0
as MobilePhoneModel, alias1@1 as alias1], aggr=[]
+08)--------------RepartitionExec: partitioning=Hash([MobilePhoneModel@0,
alias1@1], 4), input_partitions=4
+09)----------------AggregateExec: mode=Partial, gby=[MobilePhoneModel@1 as
MobilePhoneModel, UserID@0 as alias1], aggr=[]
+10)------------------FilterExec: MobilePhoneModel@1 !=
+11)--------------------RepartitionExec: partitioning=RoundRobinBatch(4),
input_partitions=1
+12)----------------------DataSourceExec: file_groups={1 group:
[[WORKSPACE_ROOT/datafusion/core/tests/data/clickbench_hits_10.parquet]]},
projection=[UserID, MobilePhoneModel], file_type=parquet,
predicate=MobilePhoneModel@34 != ,
pruning_predicate=MobilePhoneModel_null_count@2 != row_count@3 AND
(MobilePhoneModel_min@0 != OR != MobilePhoneModel_max@1),
required_guarantees=[MobilePhoneModel not in ()]
+
query TI
SELECT "MobilePhoneModel", COUNT(DISTINCT "UserID") AS u FROM hits WHERE
"MobilePhoneModel" <> '' GROUP BY "MobilePhoneModel" ORDER BY u DESC LIMIT 10;
----
+## Q11
+query TT
+EXPLAIN SELECT "MobilePhone", "MobilePhoneModel", COUNT(DISTINCT "UserID") AS
u FROM hits WHERE "MobilePhoneModel" <> '' GROUP BY "MobilePhone",
"MobilePhoneModel" ORDER BY u DESC LIMIT 10;
+----
+logical_plan
+01)Sort: u DESC NULLS FIRST, fetch=10
+02)--Projection: hits.MobilePhone, hits.MobilePhoneModel, count(alias1) AS u
+03)----Aggregate: groupBy=[[hits.MobilePhone, hits.MobilePhoneModel]],
aggr=[[count(alias1)]]
+04)------Aggregate: groupBy=[[hits.MobilePhone, hits.MobilePhoneModel,
hits.UserID AS alias1]], aggr=[[]]
+05)--------SubqueryAlias: hits
+06)----------Filter: hits_raw.MobilePhoneModel != Utf8View("")
+07)------------TableScan: hits_raw projection=[UserID, MobilePhone,
MobilePhoneModel], partial_filters=[hits_raw.MobilePhoneModel != Utf8View("")]
+physical_plan
+01)SortPreservingMergeExec: [u@2 DESC], fetch=10
+02)--SortExec: TopK(fetch=10), expr=[u@2 DESC], preserve_partitioning=[true]
+03)----ProjectionExec: expr=[MobilePhone@0 as MobilePhone, MobilePhoneModel@1
as MobilePhoneModel, count(alias1)@2 as u]
+04)------AggregateExec: mode=FinalPartitioned, gby=[MobilePhone@0 as
MobilePhone, MobilePhoneModel@1 as MobilePhoneModel], aggr=[count(alias1)]
+05)--------RepartitionExec: partitioning=Hash([MobilePhone@0,
MobilePhoneModel@1], 4), input_partitions=4
+06)----------AggregateExec: mode=Partial, gby=[MobilePhone@0 as MobilePhone,
MobilePhoneModel@1 as MobilePhoneModel], aggr=[count(alias1)]
+07)------------AggregateExec: mode=FinalPartitioned, gby=[MobilePhone@0 as
MobilePhone, MobilePhoneModel@1 as MobilePhoneModel, alias1@2 as alias1],
aggr=[]
+08)--------------RepartitionExec: partitioning=Hash([MobilePhone@0,
MobilePhoneModel@1, alias1@2], 4), input_partitions=4
+09)----------------AggregateExec: mode=Partial, gby=[MobilePhone@1 as
MobilePhone, MobilePhoneModel@2 as MobilePhoneModel, UserID@0 as alias1],
aggr=[]
+10)------------------FilterExec: MobilePhoneModel@2 !=
+11)--------------------RepartitionExec: partitioning=RoundRobinBatch(4),
input_partitions=1
+12)----------------------DataSourceExec: file_groups={1 group:
[[WORKSPACE_ROOT/datafusion/core/tests/data/clickbench_hits_10.parquet]]},
projection=[UserID, MobilePhone, MobilePhoneModel], file_type=parquet,
predicate=MobilePhoneModel@34 != ,
pruning_predicate=MobilePhoneModel_null_count@2 != row_count@3 AND
(MobilePhoneModel_min@0 != OR != MobilePhoneModel_max@1),
required_guarantees=[MobilePhoneModel not in ()]
+
query ITI
SELECT "MobilePhone", "MobilePhoneModel", COUNT(DISTINCT "UserID") AS u FROM
hits WHERE "MobilePhoneModel" <> '' GROUP BY "MobilePhone", "MobilePhoneModel"
ORDER BY u DESC LIMIT 10;
----
+## Q12
+query TT
+EXPLAIN SELECT "SearchPhrase", COUNT(*) AS c FROM hits WHERE "SearchPhrase" <>
'' GROUP BY "SearchPhrase" ORDER BY c DESC LIMIT 10;
+----
+logical_plan
+01)Sort: c DESC NULLS FIRST, fetch=10
+02)--Projection: hits.SearchPhrase, count(Int64(1)) AS count(*) AS c
+03)----Aggregate: groupBy=[[hits.SearchPhrase]], aggr=[[count(Int64(1))]]
+04)------SubqueryAlias: hits
+05)--------Filter: hits_raw.SearchPhrase != Utf8View("")
+06)----------TableScan: hits_raw projection=[SearchPhrase],
partial_filters=[hits_raw.SearchPhrase != Utf8View("")]
+physical_plan
+01)SortPreservingMergeExec: [c@1 DESC], fetch=10
+02)--SortExec: TopK(fetch=10), expr=[c@1 DESC], preserve_partitioning=[true]
+03)----ProjectionExec: expr=[SearchPhrase@0 as SearchPhrase, count(Int64(1))@1
as c]
+04)------AggregateExec: mode=FinalPartitioned, gby=[SearchPhrase@0 as
SearchPhrase], aggr=[count(Int64(1))]
+05)--------RepartitionExec: partitioning=Hash([SearchPhrase@0], 4),
input_partitions=4
+06)----------AggregateExec: mode=Partial, gby=[SearchPhrase@0 as
SearchPhrase], aggr=[count(Int64(1))]
+07)------------FilterExec: SearchPhrase@0 !=
+08)--------------RepartitionExec: partitioning=RoundRobinBatch(4),
input_partitions=1
+09)----------------DataSourceExec: file_groups={1 group:
[[WORKSPACE_ROOT/datafusion/core/tests/data/clickbench_hits_10.parquet]]},
projection=[SearchPhrase], file_type=parquet, predicate=SearchPhrase@39 != ,
pruning_predicate=SearchPhrase_null_count@2 != row_count@3 AND
(SearchPhrase_min@0 != OR != SearchPhrase_max@1),
required_guarantees=[SearchPhrase not in ()]
+
query TI
SELECT "SearchPhrase", COUNT(*) AS c FROM hits WHERE "SearchPhrase" <> ''
GROUP BY "SearchPhrase" ORDER BY c DESC LIMIT 10;
----
+## Q13
+query TT
+EXPLAIN SELECT "SearchPhrase", COUNT(DISTINCT "UserID") AS u FROM hits WHERE
"SearchPhrase" <> '' GROUP BY "SearchPhrase" ORDER BY u DESC LIMIT 10;
+----
+logical_plan
+01)Sort: u DESC NULLS FIRST, fetch=10
+02)--Projection: hits.SearchPhrase, count(alias1) AS u
+03)----Aggregate: groupBy=[[hits.SearchPhrase]], aggr=[[count(alias1)]]
+04)------Aggregate: groupBy=[[hits.SearchPhrase, hits.UserID AS alias1]],
aggr=[[]]
+05)--------SubqueryAlias: hits
+06)----------Filter: hits_raw.SearchPhrase != Utf8View("")
+07)------------TableScan: hits_raw projection=[UserID, SearchPhrase],
partial_filters=[hits_raw.SearchPhrase != Utf8View("")]
+physical_plan
+01)SortPreservingMergeExec: [u@1 DESC], fetch=10
+02)--SortExec: TopK(fetch=10), expr=[u@1 DESC], preserve_partitioning=[true]
+03)----ProjectionExec: expr=[SearchPhrase@0 as SearchPhrase, count(alias1)@1
as u]
+04)------AggregateExec: mode=FinalPartitioned, gby=[SearchPhrase@0 as
SearchPhrase], aggr=[count(alias1)]
+05)--------RepartitionExec: partitioning=Hash([SearchPhrase@0], 4),
input_partitions=4
+06)----------AggregateExec: mode=Partial, gby=[SearchPhrase@0 as
SearchPhrase], aggr=[count(alias1)]
+07)------------AggregateExec: mode=FinalPartitioned, gby=[SearchPhrase@0 as
SearchPhrase, alias1@1 as alias1], aggr=[]
+08)--------------RepartitionExec: partitioning=Hash([SearchPhrase@0,
alias1@1], 4), input_partitions=4
+09)----------------AggregateExec: mode=Partial, gby=[SearchPhrase@1 as
SearchPhrase, UserID@0 as alias1], aggr=[]
+10)------------------FilterExec: SearchPhrase@1 !=
+11)--------------------RepartitionExec: partitioning=RoundRobinBatch(4),
input_partitions=1
+12)----------------------DataSourceExec: file_groups={1 group:
[[WORKSPACE_ROOT/datafusion/core/tests/data/clickbench_hits_10.parquet]]},
projection=[UserID, SearchPhrase], file_type=parquet, predicate=SearchPhrase@39
!= , pruning_predicate=SearchPhrase_null_count@2 != row_count@3 AND
(SearchPhrase_min@0 != OR != SearchPhrase_max@1),
required_guarantees=[SearchPhrase not in ()]
+
query TI
SELECT "SearchPhrase", COUNT(DISTINCT "UserID") AS u FROM hits WHERE
"SearchPhrase" <> '' GROUP BY "SearchPhrase" ORDER BY u DESC LIMIT 10;
----
+## Q14
+query TT
+EXPLAIN SELECT "SearchEngineID", "SearchPhrase", COUNT(*) AS c FROM hits WHERE
"SearchPhrase" <> '' GROUP BY "SearchEngineID", "SearchPhrase" ORDER BY c DESC
LIMIT 10;
+----
+logical_plan
+01)Sort: c DESC NULLS FIRST, fetch=10
+02)--Projection: hits.SearchEngineID, hits.SearchPhrase, count(Int64(1)) AS
count(*) AS c
+03)----Aggregate: groupBy=[[hits.SearchEngineID, hits.SearchPhrase]],
aggr=[[count(Int64(1))]]
+04)------SubqueryAlias: hits
+05)--------Filter: hits_raw.SearchPhrase != Utf8View("")
+06)----------TableScan: hits_raw projection=[SearchEngineID, SearchPhrase],
partial_filters=[hits_raw.SearchPhrase != Utf8View("")]
+physical_plan
+01)SortPreservingMergeExec: [c@2 DESC], fetch=10
+02)--SortExec: TopK(fetch=10), expr=[c@2 DESC], preserve_partitioning=[true]
+03)----ProjectionExec: expr=[SearchEngineID@0 as SearchEngineID,
SearchPhrase@1 as SearchPhrase, count(Int64(1))@2 as c]
+04)------AggregateExec: mode=FinalPartitioned, gby=[SearchEngineID@0 as
SearchEngineID, SearchPhrase@1 as SearchPhrase], aggr=[count(Int64(1))]
+05)--------RepartitionExec: partitioning=Hash([SearchEngineID@0,
SearchPhrase@1], 4), input_partitions=4
+06)----------AggregateExec: mode=Partial, gby=[SearchEngineID@0 as
SearchEngineID, SearchPhrase@1 as SearchPhrase], aggr=[count(Int64(1))]
+07)------------FilterExec: SearchPhrase@1 !=
+08)--------------RepartitionExec: partitioning=RoundRobinBatch(4),
input_partitions=1
+09)----------------DataSourceExec: file_groups={1 group:
[[WORKSPACE_ROOT/datafusion/core/tests/data/clickbench_hits_10.parquet]]},
projection=[SearchEngineID, SearchPhrase], file_type=parquet,
predicate=SearchPhrase@39 != , pruning_predicate=SearchPhrase_null_count@2 !=
row_count@3 AND (SearchPhrase_min@0 != OR != SearchPhrase_max@1),
required_guarantees=[SearchPhrase not in ()]
+
query ITI
SELECT "SearchEngineID", "SearchPhrase", COUNT(*) AS c FROM hits WHERE
"SearchPhrase" <> '' GROUP BY "SearchEngineID", "SearchPhrase" ORDER BY c DESC
LIMIT 10;
----
+## Q15
+query TT
+EXPLAIN SELECT "UserID", COUNT(*) FROM hits GROUP BY "UserID" ORDER BY
COUNT(*) DESC LIMIT 10;
+----
+logical_plan
+01)Projection: hits.UserID, count(*)
+02)--Sort: count(Int64(1)) AS count(*) AS count(*) DESC NULLS FIRST, fetch=10
+03)----Projection: hits.UserID, count(Int64(1)) AS count(*), count(Int64(1))
+04)------Aggregate: groupBy=[[hits.UserID]], aggr=[[count(Int64(1))]]
+05)--------SubqueryAlias: hits
+06)----------TableScan: hits_raw projection=[UserID]
+physical_plan
+01)ProjectionExec: expr=[UserID@0 as UserID, count(*)@1 as count(*)]
+02)--SortPreservingMergeExec: [count(Int64(1))@2 DESC], fetch=10
+03)----SortExec: TopK(fetch=10), expr=[count(*)@1 DESC],
preserve_partitioning=[true]
+04)------ProjectionExec: expr=[UserID@0 as UserID, count(Int64(1))@1 as
count(*), count(Int64(1))@1 as count(Int64(1))]
+05)--------AggregateExec: mode=FinalPartitioned, gby=[UserID@0 as UserID],
aggr=[count(Int64(1))]
+06)----------RepartitionExec: partitioning=Hash([UserID@0], 4),
input_partitions=1
+07)------------AggregateExec: mode=Partial, gby=[UserID@0 as UserID],
aggr=[count(Int64(1))]
+08)--------------DataSourceExec: file_groups={1 group:
[[WORKSPACE_ROOT/datafusion/core/tests/data/clickbench_hits_10.parquet]]},
projection=[UserID], file_type=parquet
+
query II rowsort
SELECT "UserID", COUNT(*) FROM hits GROUP BY "UserID" ORDER BY COUNT(*) DESC
LIMIT 10;
----
@@ -136,6 +458,27 @@ SELECT "UserID", COUNT(*) FROM hits GROUP BY "UserID"
ORDER BY COUNT(*) DESC LIM
519640690937130534 2
7418527520126366595 1
+## Q16
+query TT
+EXPLAIN SELECT "UserID", "SearchPhrase", COUNT(*) FROM hits GROUP BY "UserID",
"SearchPhrase" ORDER BY COUNT(*) DESC LIMIT 10;
+----
+logical_plan
+01)Projection: hits.UserID, hits.SearchPhrase, count(*)
+02)--Sort: count(Int64(1)) AS count(*) AS count(*) DESC NULLS FIRST, fetch=10
+03)----Projection: hits.UserID, hits.SearchPhrase, count(Int64(1)) AS
count(*), count(Int64(1))
+04)------Aggregate: groupBy=[[hits.UserID, hits.SearchPhrase]],
aggr=[[count(Int64(1))]]
+05)--------SubqueryAlias: hits
+06)----------TableScan: hits_raw projection=[UserID, SearchPhrase]
+physical_plan
+01)ProjectionExec: expr=[UserID@0 as UserID, SearchPhrase@1 as SearchPhrase,
count(*)@2 as count(*)]
+02)--SortPreservingMergeExec: [count(Int64(1))@3 DESC], fetch=10
+03)----SortExec: TopK(fetch=10), expr=[count(*)@2 DESC],
preserve_partitioning=[true]
+04)------ProjectionExec: expr=[UserID@0 as UserID, SearchPhrase@1 as
SearchPhrase, count(Int64(1))@2 as count(*), count(Int64(1))@2 as
count(Int64(1))]
+05)--------AggregateExec: mode=FinalPartitioned, gby=[UserID@0 as UserID,
SearchPhrase@1 as SearchPhrase], aggr=[count(Int64(1))]
+06)----------RepartitionExec: partitioning=Hash([UserID@0, SearchPhrase@1],
4), input_partitions=1
+07)------------AggregateExec: mode=Partial, gby=[UserID@0 as UserID,
SearchPhrase@1 as SearchPhrase], aggr=[count(Int64(1))]
+08)--------------DataSourceExec: file_groups={1 group:
[[WORKSPACE_ROOT/datafusion/core/tests/data/clickbench_hits_10.parquet]]},
projection=[UserID, SearchPhrase], file_type=parquet
+
query ITI rowsort
SELECT "UserID", "SearchPhrase", COUNT(*) FROM hits GROUP BY "UserID",
"SearchPhrase" ORDER BY COUNT(*) DESC LIMIT 10;
----
@@ -145,6 +488,24 @@ SELECT "UserID", "SearchPhrase", COUNT(*) FROM hits GROUP
BY "UserID", "SearchPh
519640690937130534 (empty) 2
7418527520126366595 (empty) 1
+## Q17
+query TT
+EXPLAIN SELECT "UserID", "SearchPhrase", COUNT(*) FROM hits GROUP BY "UserID",
"SearchPhrase" LIMIT 10;
+----
+logical_plan
+01)Projection: hits.UserID, hits.SearchPhrase, count(Int64(1)) AS count(*)
+02)--Limit: skip=0, fetch=10
+03)----Aggregate: groupBy=[[hits.UserID, hits.SearchPhrase]],
aggr=[[count(Int64(1))]]
+04)------SubqueryAlias: hits
+05)--------TableScan: hits_raw projection=[UserID, SearchPhrase]
+physical_plan
+01)ProjectionExec: expr=[UserID@0 as UserID, SearchPhrase@1 as SearchPhrase,
count(Int64(1))@2 as count(*)]
+02)--CoalescePartitionsExec: fetch=10
+03)----AggregateExec: mode=FinalPartitioned, gby=[UserID@0 as UserID,
SearchPhrase@1 as SearchPhrase], aggr=[count(Int64(1))]
+04)------RepartitionExec: partitioning=Hash([UserID@0, SearchPhrase@1], 4),
input_partitions=1
+05)--------AggregateExec: mode=Partial, gby=[UserID@0 as UserID,
SearchPhrase@1 as SearchPhrase], aggr=[count(Int64(1))]
+06)----------DataSourceExec: file_groups={1 group:
[[WORKSPACE_ROOT/datafusion/core/tests/data/clickbench_hits_10.parquet]]},
projection=[UserID, SearchPhrase], file_type=parquet
+
query ITI rowsort
SELECT "UserID", "SearchPhrase", COUNT(*) FROM hits GROUP BY "UserID",
"SearchPhrase" LIMIT 10;
----
@@ -154,6 +515,27 @@ SELECT "UserID", "SearchPhrase", COUNT(*) FROM hits GROUP
BY "UserID", "SearchPh
519640690937130534 (empty) 2
7418527520126366595 (empty) 1
+## Q18
+query TT
+EXPLAIN SELECT "UserID", extract(minute FROM
to_timestamp_seconds("EventTime")) AS m, "SearchPhrase", COUNT(*) FROM hits
GROUP BY "UserID", m, "SearchPhrase" ORDER BY COUNT(*) DESC LIMIT 10;
+----
+logical_plan
+01)Projection: hits.UserID, m, hits.SearchPhrase, count(*)
+02)--Sort: count(Int64(1)) AS count(*) AS count(*) DESC NULLS FIRST, fetch=10
+03)----Projection: hits.UserID,
date_part(Utf8("MINUTE"),to_timestamp_seconds(hits.EventTime)) AS m,
hits.SearchPhrase, count(Int64(1)) AS count(*), count(Int64(1))
+04)------Aggregate: groupBy=[[hits.UserID, date_part(Utf8("MINUTE"),
to_timestamp_seconds(hits.EventTime)), hits.SearchPhrase]],
aggr=[[count(Int64(1))]]
+05)--------SubqueryAlias: hits
+06)----------TableScan: hits_raw projection=[EventTime, UserID, SearchPhrase]
+physical_plan
+01)ProjectionExec: expr=[UserID@0 as UserID, m@1 as m, SearchPhrase@2 as
SearchPhrase, count(*)@3 as count(*)]
+02)--SortPreservingMergeExec: [count(Int64(1))@4 DESC], fetch=10
+03)----SortExec: TopK(fetch=10), expr=[count(*)@3 DESC],
preserve_partitioning=[true]
+04)------ProjectionExec: expr=[UserID@0 as UserID,
date_part(Utf8("MINUTE"),to_timestamp_seconds(hits.EventTime))@1 as m,
SearchPhrase@2 as SearchPhrase, count(Int64(1))@3 as count(*),
count(Int64(1))@3 as count(Int64(1))]
+05)--------AggregateExec: mode=FinalPartitioned, gby=[UserID@0 as UserID,
date_part(Utf8("MINUTE"),to_timestamp_seconds(hits.EventTime))@1 as
date_part(Utf8("MINUTE"),to_timestamp_seconds(hits.EventTime)), SearchPhrase@2
as SearchPhrase], aggr=[count(Int64(1))]
+06)----------RepartitionExec: partitioning=Hash([UserID@0,
date_part(Utf8("MINUTE"),to_timestamp_seconds(hits.EventTime))@1,
SearchPhrase@2], 4), input_partitions=1
+07)------------AggregateExec: mode=Partial, gby=[UserID@1 as UserID,
date_part(MINUTE, to_timestamp_seconds(EventTime@0)) as
date_part(Utf8("MINUTE"),to_timestamp_seconds(hits.EventTime)), SearchPhrase@2
as SearchPhrase], aggr=[count(Int64(1))]
+08)--------------DataSourceExec: file_groups={1 group:
[[WORKSPACE_ROOT/datafusion/core/tests/data/clickbench_hits_10.parquet]]},
projection=[EventTime, UserID, SearchPhrase], file_type=parquet
+
query IITI rowsort
SELECT "UserID", extract(minute FROM to_timestamp_seconds("EventTime")) AS m,
"SearchPhrase", COUNT(*) FROM hits GROUP BY "UserID", m, "SearchPhrase" ORDER
BY COUNT(*) DESC LIMIT 10;
----
@@ -168,61 +550,337 @@ SELECT "UserID", extract(minute FROM
to_timestamp_seconds("EventTime")) AS m, "S
519640690937130534 36 (empty) 1
7418527520126366595 18 (empty) 1
+## Q19
+query TT
+EXPLAIN SELECT "UserID" FROM hits WHERE "UserID" = 435090932899640449;
+----
+logical_plan
+01)SubqueryAlias: hits
+02)--Filter: hits_raw.UserID = Int64(435090932899640449)
+03)----TableScan: hits_raw projection=[UserID],
partial_filters=[hits_raw.UserID = Int64(435090932899640449)]
+physical_plan
+01)FilterExec: UserID@0 = 435090932899640449
+02)--RepartitionExec: partitioning=RoundRobinBatch(4), input_partitions=1
+03)----DataSourceExec: file_groups={1 group:
[[WORKSPACE_ROOT/datafusion/core/tests/data/clickbench_hits_10.parquet]]},
projection=[UserID], file_type=parquet, predicate=UserID@9 =
435090932899640449, pruning_predicate=UserID_null_count@2 != row_count@3 AND
UserID_min@0 <= 435090932899640449 AND 435090932899640449 <= UserID_max@1,
required_guarantees=[UserID in (435090932899640449)]
+
query I
SELECT "UserID" FROM hits WHERE "UserID" = 435090932899640449;
----
+## Q20
+query TT
+EXPLAIN SELECT COUNT(*) FROM hits WHERE "URL" LIKE '%google%';
+----
+logical_plan
+01)Projection: count(Int64(1)) AS count(*)
+02)--Aggregate: groupBy=[[]], aggr=[[count(Int64(1))]]
+03)----SubqueryAlias: hits
+04)------Projection:
+05)--------Filter: hits_raw.URL LIKE Utf8View("%google%")
+06)----------TableScan: hits_raw projection=[URL],
partial_filters=[hits_raw.URL LIKE Utf8View("%google%")]
+physical_plan
+01)ProjectionExec: expr=[count(Int64(1))@0 as count(*)]
+02)--AggregateExec: mode=Final, gby=[], aggr=[count(Int64(1))]
+03)----CoalescePartitionsExec
+04)------AggregateExec: mode=Partial, gby=[], aggr=[count(Int64(1))]
+05)--------FilterExec: URL@0 LIKE %google%, projection=[]
+06)----------RepartitionExec: partitioning=RoundRobinBatch(4),
input_partitions=1
+07)------------DataSourceExec: file_groups={1 group:
[[WORKSPACE_ROOT/datafusion/core/tests/data/clickbench_hits_10.parquet]]},
projection=[URL], file_type=parquet, predicate=URL@13 LIKE %google%
+
query I
SELECT COUNT(*) FROM hits WHERE "URL" LIKE '%google%';
----
0
+## Q21
+query TT
+EXPLAIN SELECT "SearchPhrase", MIN("URL"), COUNT(*) AS c FROM hits WHERE "URL"
LIKE '%google%' AND "SearchPhrase" <> '' GROUP BY "SearchPhrase" ORDER BY c
DESC LIMIT 10;
+----
+logical_plan
+01)Sort: c DESC NULLS FIRST, fetch=10
+02)--Projection: hits.SearchPhrase, min(hits.URL), count(Int64(1)) AS count(*)
AS c
+03)----Aggregate: groupBy=[[hits.SearchPhrase]], aggr=[[min(hits.URL),
count(Int64(1))]]
+04)------SubqueryAlias: hits
+05)--------Filter: hits_raw.URL LIKE Utf8View("%google%") AND
hits_raw.SearchPhrase != Utf8View("")
+06)----------TableScan: hits_raw projection=[URL, SearchPhrase],
partial_filters=[hits_raw.URL LIKE Utf8View("%google%"), hits_raw.SearchPhrase
!= Utf8View("")]
+physical_plan
+01)SortPreservingMergeExec: [c@2 DESC], fetch=10
+02)--SortExec: TopK(fetch=10), expr=[c@2 DESC], preserve_partitioning=[true]
+03)----ProjectionExec: expr=[SearchPhrase@0 as SearchPhrase, min(hits.URL)@1
as min(hits.URL), count(Int64(1))@2 as c]
+04)------AggregateExec: mode=FinalPartitioned, gby=[SearchPhrase@0 as
SearchPhrase], aggr=[min(hits.URL), count(Int64(1))]
+05)--------RepartitionExec: partitioning=Hash([SearchPhrase@0], 4),
input_partitions=4
+06)----------AggregateExec: mode=Partial, gby=[SearchPhrase@1 as
SearchPhrase], aggr=[min(hits.URL), count(Int64(1))]
+07)------------FilterExec: URL@0 LIKE %google% AND SearchPhrase@1 !=
+08)--------------RepartitionExec: partitioning=RoundRobinBatch(4),
input_partitions=1
+09)----------------DataSourceExec: file_groups={1 group:
[[WORKSPACE_ROOT/datafusion/core/tests/data/clickbench_hits_10.parquet]]},
projection=[URL, SearchPhrase], file_type=parquet, predicate=URL@13 LIKE
%google% AND SearchPhrase@39 != , pruning_predicate=SearchPhrase_null_count@4
!= row_count@5 AND (SearchPhrase_min@2 != OR != SearchPhrase_max@3),
required_guarantees=[SearchPhrase not in ()]
+
query TTI
SELECT "SearchPhrase", MIN("URL"), COUNT(*) AS c FROM hits WHERE "URL" LIKE
'%google%' AND "SearchPhrase" <> '' GROUP BY "SearchPhrase" ORDER BY c DESC
LIMIT 10;
----
+## Q22
+query TT
+EXPLAIN SELECT "SearchPhrase", MIN("URL"), MIN("Title"), COUNT(*) AS c,
COUNT(DISTINCT "UserID") FROM hits WHERE "Title" LIKE '%Google%' AND "URL" NOT
LIKE '%.google.%' AND "SearchPhrase" <> '' GROUP BY "SearchPhrase" ORDER BY c
DESC LIMIT 10;
+----
+logical_plan
+01)Sort: c DESC NULLS FIRST, fetch=10
+02)--Projection: hits.SearchPhrase, min(hits.URL), min(hits.Title),
count(Int64(1)) AS count(*) AS c, count(DISTINCT hits.UserID)
+03)----Aggregate: groupBy=[[hits.SearchPhrase]], aggr=[[min(hits.URL),
min(hits.Title), count(Int64(1)), count(DISTINCT hits.UserID)]]
+04)------SubqueryAlias: hits
+05)--------Filter: hits_raw.Title LIKE Utf8View("%Google%") AND hits_raw.URL
NOT LIKE Utf8View("%.google.%") AND hits_raw.SearchPhrase != Utf8View("")
+06)----------TableScan: hits_raw projection=[Title, UserID, URL,
SearchPhrase], partial_filters=[hits_raw.Title LIKE Utf8View("%Google%"),
hits_raw.URL NOT LIKE Utf8View("%.google.%"), hits_raw.SearchPhrase !=
Utf8View("")]
+physical_plan
+01)SortPreservingMergeExec: [c@3 DESC], fetch=10
+02)--SortExec: TopK(fetch=10), expr=[c@3 DESC], preserve_partitioning=[true]
+03)----ProjectionExec: expr=[SearchPhrase@0 as SearchPhrase, min(hits.URL)@1
as min(hits.URL), min(hits.Title)@2 as min(hits.Title), count(Int64(1))@3 as c,
count(DISTINCT hits.UserID)@4 as count(DISTINCT hits.UserID)]
+04)------AggregateExec: mode=FinalPartitioned, gby=[SearchPhrase@0 as
SearchPhrase], aggr=[min(hits.URL), min(hits.Title), count(Int64(1)),
count(DISTINCT hits.UserID)]
+05)--------RepartitionExec: partitioning=Hash([SearchPhrase@0], 4),
input_partitions=4
+06)----------AggregateExec: mode=Partial, gby=[SearchPhrase@3 as
SearchPhrase], aggr=[min(hits.URL), min(hits.Title), count(Int64(1)),
count(DISTINCT hits.UserID)]
+07)------------FilterExec: Title@0 LIKE %Google% AND URL@2 NOT LIKE %.google.%
AND SearchPhrase@3 !=
+08)--------------RepartitionExec: partitioning=RoundRobinBatch(4),
input_partitions=1
+09)----------------DataSourceExec: file_groups={1 group:
[[WORKSPACE_ROOT/datafusion/core/tests/data/clickbench_hits_10.parquet]]},
projection=[Title, UserID, URL, SearchPhrase], file_type=parquet,
predicate=Title@2 LIKE %Google% AND URL@13 NOT LIKE %.google.% AND
SearchPhrase@39 != , pruning_predicate=SearchPhrase_null_count@6 != row_count@7
AND (SearchPhrase_min@4 != OR != SearchPhrase_max@5),
required_guarantees=[SearchPhrase not in ()]
+
query TTTII
SELECT "SearchPhrase", MIN("URL"), MIN("Title"), COUNT(*) AS c, COUNT(DISTINCT
"UserID") FROM hits WHERE "Title" LIKE '%Google%' AND "URL" NOT LIKE
'%.google.%' AND "SearchPhrase" <> '' GROUP BY "SearchPhrase" ORDER BY c DESC
LIMIT 10;
----
-query
IITIIIIIIIIITTIIIIIIIIIITIIITIIIITTIIITIIIIIIIIIITIIIIITIIIIIITIIIIIIIIIITTTTIIIIIIIITITTITTTTTTTTTTIIIID
+## Q23
+query TT
+EXPLAIN SELECT * FROM hits WHERE "URL" LIKE '%google%' ORDER BY "EventTime"
LIMIT 10;
+----
+logical_plan
+01)Sort: hits.EventTime ASC NULLS LAST, fetch=10
+02)--SubqueryAlias: hits
+03)----Projection: hits_raw.WatchID, hits_raw.JavaEnable, hits_raw.Title,
hits_raw.GoodEvent, hits_raw.EventTime, hits_raw.CounterID, hits_raw.ClientIP,
hits_raw.RegionID, hits_raw.UserID, hits_raw.CounterClass, hits_raw.OS,
hits_raw.UserAgent, hits_raw.URL, hits_raw.Referer, hits_raw.IsRefresh,
hits_raw.RefererCategoryID, hits_raw.RefererRegionID, hits_raw.URLCategoryID,
hits_raw.URLRegionID, hits_raw.ResolutionWidth, hits_raw.ResolutionHeight,
hits_raw.ResolutionDepth, hits_raw.FlashMa [...]
+04)------Filter: hits_raw.URL LIKE Utf8View("%google%")
+05)--------TableScan: hits_raw projection=[WatchID, JavaEnable, Title,
GoodEvent, EventTime, EventDate, CounterID, ClientIP, RegionID, UserID,
CounterClass, OS, UserAgent, URL, Referer, IsRefresh, RefererCategoryID,
RefererRegionID, URLCategoryID, URLRegionID, ResolutionWidth, ResolutionHeight,
ResolutionDepth, FlashMajor, FlashMinor, FlashMinor2, NetMajor, NetMinor,
UserAgentMajor, UserAgentMinor, CookieEnable, JavascriptEnable, IsMobile,
MobilePhone, MobilePhoneModel, Params, IPNetwork [...]
+physical_plan
+01)SortPreservingMergeExec: [EventTime@4 ASC NULLS LAST], fetch=10
+02)--SortExec: TopK(fetch=10), expr=[EventTime@4 ASC NULLS LAST],
preserve_partitioning=[true]
+03)----ProjectionExec: expr=[WatchID@0 as WatchID, JavaEnable@1 as JavaEnable,
Title@2 as Title, GoodEvent@3 as GoodEvent, EventTime@4 as EventTime,
CounterID@6 as CounterID, ClientIP@7 as ClientIP, RegionID@8 as RegionID,
UserID@9 as UserID, CounterClass@10 as CounterClass, OS@11 as OS, UserAgent@12
as UserAgent, URL@13 as URL, Referer@14 as Referer, IsRefresh@15 as IsRefresh,
RefererCategoryID@16 as RefererCategoryID, RefererRegionID@17 as
RefererRegionID, URLCategoryID@18 as URLCatego [...]
+04)------FilterExec: URL@13 LIKE %google%
+05)--------RepartitionExec: partitioning=RoundRobinBatch(4), input_partitions=1
+06)----------DataSourceExec: file_groups={1 group:
[[WORKSPACE_ROOT/datafusion/core/tests/data/clickbench_hits_10.parquet]]},
projection=[WatchID, JavaEnable, Title, GoodEvent, EventTime, EventDate,
CounterID, ClientIP, RegionID, UserID, CounterClass, OS, UserAgent, URL,
Referer, IsRefresh, RefererCategoryID, RefererRegionID, URLCategoryID,
URLRegionID, ResolutionWidth, ResolutionHeight, ResolutionDepth, FlashMajor,
FlashMinor, FlashMinor2, NetMajor, NetMinor, UserAgentMajor, UserAgentMi [...]
+query
IITIIIIIIIIITTIIIIIIIIIITIIITIIIITTIIITIIIIIIIIIITIIIIITIIIIIITIIIIIIIIIITTTTIIIIIIIITITTITTTTTTTTTTIIIID
SELECT * FROM hits WHERE "URL" LIKE '%google%' ORDER BY "EventTime" LIMIT 10;
----
+## Q24
+query TT
+EXPLAIN SELECT "SearchPhrase" FROM hits WHERE "SearchPhrase" <> '' ORDER BY
"EventTime" LIMIT 10;
+----
+logical_plan
+01)Projection: hits.SearchPhrase
+02)--Sort: hits.EventTime ASC NULLS LAST, fetch=10
+03)----Projection: hits.SearchPhrase, hits.EventTime
+04)------SubqueryAlias: hits
+05)--------Filter: hits_raw.SearchPhrase != Utf8View("")
+06)----------TableScan: hits_raw projection=[EventTime, SearchPhrase],
partial_filters=[hits_raw.SearchPhrase != Utf8View("")]
+physical_plan
+01)ProjectionExec: expr=[SearchPhrase@0 as SearchPhrase]
+02)--SortPreservingMergeExec: [EventTime@1 ASC NULLS LAST], fetch=10
+03)----SortExec: TopK(fetch=10), expr=[EventTime@1 ASC NULLS LAST],
preserve_partitioning=[true]
+04)------ProjectionExec: expr=[SearchPhrase@1 as SearchPhrase, EventTime@0 as
EventTime]
+05)--------FilterExec: SearchPhrase@1 !=
+06)----------RepartitionExec: partitioning=RoundRobinBatch(4),
input_partitions=1
+07)------------DataSourceExec: file_groups={1 group:
[[WORKSPACE_ROOT/datafusion/core/tests/data/clickbench_hits_10.parquet]]},
projection=[EventTime, SearchPhrase], file_type=parquet,
predicate=SearchPhrase@39 != AND DynamicFilter [ empty ],
pruning_predicate=SearchPhrase_null_count@2 != row_count@3 AND
(SearchPhrase_min@0 != OR != SearchPhrase_max@1),
required_guarantees=[SearchPhrase not in ()]
+
query T
SELECT "SearchPhrase" FROM hits WHERE "SearchPhrase" <> '' ORDER BY
"EventTime" LIMIT 10;
----
+## Q25
+query TT
+EXPLAIN SELECT "SearchPhrase" FROM hits WHERE "SearchPhrase" <> '' ORDER BY
"SearchPhrase" LIMIT 10;
+----
+logical_plan
+01)Sort: hits.SearchPhrase ASC NULLS LAST, fetch=10
+02)--SubqueryAlias: hits
+03)----Filter: hits_raw.SearchPhrase != Utf8View("")
+04)------TableScan: hits_raw projection=[SearchPhrase],
partial_filters=[hits_raw.SearchPhrase != Utf8View("")]
+physical_plan
+01)SortPreservingMergeExec: [SearchPhrase@0 ASC NULLS LAST], fetch=10
+02)--SortExec: TopK(fetch=10), expr=[SearchPhrase@0 ASC NULLS LAST],
preserve_partitioning=[true]
+03)----FilterExec: SearchPhrase@0 !=
+04)------RepartitionExec: partitioning=RoundRobinBatch(4), input_partitions=1
+05)--------DataSourceExec: file_groups={1 group:
[[WORKSPACE_ROOT/datafusion/core/tests/data/clickbench_hits_10.parquet]]},
projection=[SearchPhrase], file_type=parquet, predicate=SearchPhrase@39 != AND
DynamicFilter [ empty ], pruning_predicate=SearchPhrase_null_count@2 !=
row_count@3 AND (SearchPhrase_min@0 != OR != SearchPhrase_max@1),
required_guarantees=[SearchPhrase not in ()]
+
query T
SELECT "SearchPhrase" FROM hits WHERE "SearchPhrase" <> '' ORDER BY
"SearchPhrase" LIMIT 10;
----
+## Q26
+query TT
+EXPLAIN SELECT "SearchPhrase" FROM hits WHERE "SearchPhrase" <> '' ORDER BY
"EventTime", "SearchPhrase" LIMIT 10;
+----
+logical_plan
+01)Projection: hits.SearchPhrase
+02)--Sort: hits.EventTime ASC NULLS LAST, hits.SearchPhrase ASC NULLS LAST,
fetch=10
+03)----Projection: hits.SearchPhrase, hits.EventTime
+04)------SubqueryAlias: hits
+05)--------Filter: hits_raw.SearchPhrase != Utf8View("")
+06)----------TableScan: hits_raw projection=[EventTime, SearchPhrase],
partial_filters=[hits_raw.SearchPhrase != Utf8View("")]
+physical_plan
+01)ProjectionExec: expr=[SearchPhrase@0 as SearchPhrase]
+02)--SortPreservingMergeExec: [EventTime@1 ASC NULLS LAST, SearchPhrase@0 ASC
NULLS LAST], fetch=10
+03)----SortExec: TopK(fetch=10), expr=[EventTime@1 ASC NULLS LAST,
SearchPhrase@0 ASC NULLS LAST], preserve_partitioning=[true]
+04)------ProjectionExec: expr=[SearchPhrase@1 as SearchPhrase, EventTime@0 as
EventTime]
+05)--------FilterExec: SearchPhrase@1 !=
+06)----------RepartitionExec: partitioning=RoundRobinBatch(4),
input_partitions=1
+07)------------DataSourceExec: file_groups={1 group:
[[WORKSPACE_ROOT/datafusion/core/tests/data/clickbench_hits_10.parquet]]},
projection=[EventTime, SearchPhrase], file_type=parquet,
predicate=SearchPhrase@39 != AND DynamicFilter [ empty ],
pruning_predicate=SearchPhrase_null_count@2 != row_count@3 AND
(SearchPhrase_min@0 != OR != SearchPhrase_max@1),
required_guarantees=[SearchPhrase not in ()]
+
query T
SELECT "SearchPhrase" FROM hits WHERE "SearchPhrase" <> '' ORDER BY
"EventTime", "SearchPhrase" LIMIT 10;
----
+## Q27
+query TT
+EXPLAIN SELECT "CounterID", AVG(length("URL")) AS l, COUNT(*) AS c FROM hits
WHERE "URL" <> '' GROUP BY "CounterID" HAVING COUNT(*) > 100000 ORDER BY l DESC
LIMIT 25;
+----
+logical_plan
+01)Sort: l DESC NULLS FIRST, fetch=25
+02)--Projection: hits.CounterID, avg(length(hits.URL)) AS l, count(Int64(1))
AS count(*) AS c
+03)----Filter: count(Int64(1)) > Int64(100000)
+04)------Aggregate: groupBy=[[hits.CounterID]],
aggr=[[avg(CAST(character_length(hits.URL) AS length(hits.URL) AS Float64)),
count(Int64(1))]]
+05)--------SubqueryAlias: hits
+06)----------Filter: hits_raw.URL != Utf8View("")
+07)------------TableScan: hits_raw projection=[CounterID, URL],
partial_filters=[hits_raw.URL != Utf8View("")]
+physical_plan
+01)SortPreservingMergeExec: [l@1 DESC], fetch=25
+02)--SortExec: TopK(fetch=25), expr=[l@1 DESC], preserve_partitioning=[true]
+03)----ProjectionExec: expr=[CounterID@0 as CounterID, avg(length(hits.URL))@1
as l, count(Int64(1))@2 as c]
+04)------FilterExec: count(Int64(1))@2 > 100000
+05)--------AggregateExec: mode=FinalPartitioned, gby=[CounterID@0 as
CounterID], aggr=[avg(length(hits.URL)), count(Int64(1))]
+06)----------RepartitionExec: partitioning=Hash([CounterID@0], 4),
input_partitions=4
+07)------------AggregateExec: mode=Partial, gby=[CounterID@0 as CounterID],
aggr=[avg(length(hits.URL)), count(Int64(1))]
+08)--------------FilterExec: URL@1 !=
+09)----------------RepartitionExec: partitioning=RoundRobinBatch(4),
input_partitions=1
+10)------------------DataSourceExec: file_groups={1 group:
[[WORKSPACE_ROOT/datafusion/core/tests/data/clickbench_hits_10.parquet]]},
projection=[CounterID, URL], file_type=parquet, predicate=URL@13 != ,
pruning_predicate=URL_null_count@2 != row_count@3 AND (URL_min@0 != OR !=
URL_max@1), required_guarantees=[URL not in ()]
+
query IRI
SELECT "CounterID", AVG(length("URL")) AS l, COUNT(*) AS c FROM hits WHERE
"URL" <> '' GROUP BY "CounterID" HAVING COUNT(*) > 100000 ORDER BY l DESC LIMIT
25;
----
+## Q28
+query TT
+EXPLAIN SELECT REGEXP_REPLACE("Referer", '^https?://(?:www\.)?([^/]+)/.*$',
'\1') AS k, AVG(length("Referer")) AS l, COUNT(*) AS c, MIN("Referer") FROM
hits WHERE "Referer" <> '' GROUP BY k HAVING COUNT(*) > 100000 ORDER BY l DESC
LIMIT 25;
+----
+logical_plan
+01)Sort: l DESC NULLS FIRST, fetch=25
+02)--Projection:
regexp_replace(hits.Referer,Utf8("^https?://(?:www\.)?([^/]+)/.*$"),Utf8("\1"))
AS k, avg(length(hits.Referer)) AS l, count(Int64(1)) AS count(*) AS c,
min(hits.Referer)
+03)----Filter: count(Int64(1)) > Int64(100000)
+04)------Aggregate: groupBy=[[regexp_replace(hits.Referer,
Utf8View("^https?://(?:www\.)?([^/]+)/.*$"), Utf8View("\1")) AS
regexp_replace(hits.Referer,Utf8("^https?://(?:www\.)?([^/]+)/.*$"),Utf8("\1"))]],
aggr=[[avg(CAST(character_length(hits.Referer) AS length(hits.Referer) AS
Float64)), count(Int64(1)), min(hits.Referer)]]
+05)--------SubqueryAlias: hits
+06)----------Filter: hits_raw.Referer != Utf8View("")
+07)------------TableScan: hits_raw projection=[Referer],
partial_filters=[hits_raw.Referer != Utf8View("")]
+physical_plan
+01)SortPreservingMergeExec: [l@1 DESC], fetch=25
+02)--SortExec: TopK(fetch=25), expr=[l@1 DESC], preserve_partitioning=[true]
+03)----ProjectionExec:
expr=[regexp_replace(hits.Referer,Utf8("^https?://(?:www\.)?([^/]+)/.*$"),Utf8("\1"))@0
as k, avg(length(hits.Referer))@1 as l, count(Int64(1))@2 as c,
min(hits.Referer)@3 as min(hits.Referer)]
+04)------FilterExec: count(Int64(1))@2 > 100000
+05)--------AggregateExec: mode=FinalPartitioned,
gby=[regexp_replace(hits.Referer,Utf8("^https?://(?:www\.)?([^/]+)/.*$"),Utf8("\1"))@0
as
regexp_replace(hits.Referer,Utf8("^https?://(?:www\.)?([^/]+)/.*$"),Utf8("\1"))],
aggr=[avg(length(hits.Referer)), count(Int64(1)), min(hits.Referer)]
+06)----------RepartitionExec:
partitioning=Hash([regexp_replace(hits.Referer,Utf8("^https?://(?:www\.)?([^/]+)/.*$"),Utf8("\1"))@0],
4), input_partitions=4
+07)------------AggregateExec: mode=Partial, gby=[regexp_replace(Referer@0,
^https?://(?:www\.)?([^/]+)/.*$, \1) as
regexp_replace(hits.Referer,Utf8("^https?://(?:www\.)?([^/]+)/.*$"),Utf8("\1"))],
aggr=[avg(length(hits.Referer)), count(Int64(1)), min(hits.Referer)]
+08)--------------FilterExec: Referer@0 !=
+09)----------------RepartitionExec: partitioning=RoundRobinBatch(4),
input_partitions=1
+10)------------------DataSourceExec: file_groups={1 group:
[[WORKSPACE_ROOT/datafusion/core/tests/data/clickbench_hits_10.parquet]]},
projection=[Referer], file_type=parquet, predicate=Referer@14 != ,
pruning_predicate=Referer_null_count@2 != row_count@3 AND (Referer_min@0 != OR
!= Referer_max@1), required_guarantees=[Referer not in ()]
+
query TRIT
SELECT REGEXP_REPLACE("Referer", '^https?://(?:www\.)?([^/]+)/.*$', '\1') AS
k, AVG(length("Referer")) AS l, COUNT(*) AS c, MIN("Referer") FROM hits WHERE
"Referer" <> '' GROUP BY k HAVING COUNT(*) > 100000 ORDER BY l DESC LIMIT 25;
----
+## Q29
+query TT
+EXPLAIN SELECT SUM("ResolutionWidth"), SUM("ResolutionWidth" + 1),
SUM("ResolutionWidth" + 2), SUM("ResolutionWidth" + 3), SUM("ResolutionWidth" +
4), SUM("ResolutionWidth" + 5), SUM("ResolutionWidth" + 6),
SUM("ResolutionWidth" + 7), SUM("ResolutionWidth" + 8), SUM("ResolutionWidth" +
9), SUM("ResolutionWidth" + 10), SUM("ResolutionWidth" + 11),
SUM("ResolutionWidth" + 12), SUM("ResolutionWidth" + 13), SUM("ResolutionWidth"
+ 14), SUM("ResolutionWidth" + 15), SUM("ResolutionWidth" + 16) [...]
+----
+logical_plan
+01)Aggregate: groupBy=[[]], aggr=[[sum(__common_expr_1 AS
hits.ResolutionWidth), sum(__common_expr_1 AS hits.ResolutionWidth + Int64(1)),
sum(__common_expr_1 AS hits.ResolutionWidth + Int64(2)), sum(__common_expr_1 AS
hits.ResolutionWidth + Int64(3)), sum(__common_expr_1 AS hits.ResolutionWidth +
Int64(4)), sum(__common_expr_1 AS hits.ResolutionWidth + Int64(5)),
sum(__common_expr_1 AS hits.ResolutionWidth + Int64(6)), sum(__common_expr_1 AS
hits.ResolutionWidth + Int64(7)), sum(__common [...]
+02)--Projection: CAST(hits.ResolutionWidth AS Int64) AS __common_expr_1
+03)----SubqueryAlias: hits
+04)------TableScan: hits_raw projection=[ResolutionWidth]
+physical_plan
+01)AggregateExec: mode=Single, gby=[], aggr=[sum(hits.ResolutionWidth),
sum(hits.ResolutionWidth + Int64(1)), sum(hits.ResolutionWidth + Int64(2)),
sum(hits.ResolutionWidth + Int64(3)), sum(hits.ResolutionWidth + Int64(4)),
sum(hits.ResolutionWidth + Int64(5)), sum(hits.ResolutionWidth + Int64(6)),
sum(hits.ResolutionWidth + Int64(7)), sum(hits.ResolutionWidth + Int64(8)),
sum(hits.ResolutionWidth + Int64(9)), sum(hits.ResolutionWidth + Int64(10)),
sum(hits.ResolutionWidth + Int64(11)), [...]
+02)--DataSourceExec: file_groups={1 group:
[[WORKSPACE_ROOT/datafusion/core/tests/data/clickbench_hits_10.parquet]]},
projection=[CAST(ResolutionWidth@20 AS Int64) as __common_expr_1],
file_type=parquet
+
query
IIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIII
SELECT SUM("ResolutionWidth"), SUM("ResolutionWidth" + 1),
SUM("ResolutionWidth" + 2), SUM("ResolutionWidth" + 3), SUM("ResolutionWidth" +
4), SUM("ResolutionWidth" + 5), SUM("ResolutionWidth" + 6),
SUM("ResolutionWidth" + 7), SUM("ResolutionWidth" + 8), SUM("ResolutionWidth" +
9), SUM("ResolutionWidth" + 10), SUM("ResolutionWidth" + 11),
SUM("ResolutionWidth" + 12), SUM("ResolutionWidth" + 13), SUM("ResolutionWidth"
+ 14), SUM("ResolutionWidth" + 15), SUM("ResolutionWidth" + 16), SUM("R [...]
----
0 10 20 30 40 50 60 70 80 90 100 110 120 130 140 150 160 170 180 190 200 210
220 230 240 250 260 270 280 290 300 310 320 330 340 350 360 370 380 390 400 410
420 430 440 450 460 470 480 490 500 510 520 530 540 550 560 570 580 590 600 610
620 630 640 650 660 670 680 690 700 710 720 730 740 750 760 770 780 790 800 810
820 830 840 850 860 870 880 890
+## Q30
+query TT
+EXPLAIN SELECT "SearchEngineID", "ClientIP", COUNT(*) AS c, SUM("IsRefresh"),
AVG("ResolutionWidth") FROM hits WHERE "SearchPhrase" <> '' GROUP BY
"SearchEngineID", "ClientIP" ORDER BY c DESC LIMIT 10;
+----
+logical_plan
+01)Sort: c DESC NULLS FIRST, fetch=10
+02)--Projection: hits.SearchEngineID, hits.ClientIP, count(Int64(1)) AS
count(*) AS c, sum(hits.IsRefresh), avg(hits.ResolutionWidth)
+03)----Aggregate: groupBy=[[hits.SearchEngineID, hits.ClientIP]],
aggr=[[count(Int64(1)), sum(CAST(hits.IsRefresh AS Int64)),
avg(CAST(hits.ResolutionWidth AS Float64))]]
+04)------SubqueryAlias: hits
+05)--------Projection: hits_raw.ClientIP, hits_raw.IsRefresh,
hits_raw.ResolutionWidth, hits_raw.SearchEngineID
+06)----------Filter: hits_raw.SearchPhrase != Utf8View("")
+07)------------TableScan: hits_raw projection=[ClientIP, IsRefresh,
ResolutionWidth, SearchEngineID, SearchPhrase],
partial_filters=[hits_raw.SearchPhrase != Utf8View("")]
+physical_plan
+01)SortPreservingMergeExec: [c@2 DESC], fetch=10
+02)--SortExec: TopK(fetch=10), expr=[c@2 DESC], preserve_partitioning=[true]
+03)----ProjectionExec: expr=[SearchEngineID@0 as SearchEngineID, ClientIP@1 as
ClientIP, count(Int64(1))@2 as c, sum(hits.IsRefresh)@3 as sum(hits.IsRefresh),
avg(hits.ResolutionWidth)@4 as avg(hits.ResolutionWidth)]
+04)------AggregateExec: mode=FinalPartitioned, gby=[SearchEngineID@0 as
SearchEngineID, ClientIP@1 as ClientIP], aggr=[count(Int64(1)),
sum(hits.IsRefresh), avg(hits.ResolutionWidth)]
+05)--------RepartitionExec: partitioning=Hash([SearchEngineID@0, ClientIP@1],
4), input_partitions=4
+06)----------AggregateExec: mode=Partial, gby=[SearchEngineID@3 as
SearchEngineID, ClientIP@0 as ClientIP], aggr=[count(Int64(1)),
sum(hits.IsRefresh), avg(hits.ResolutionWidth)]
+07)------------FilterExec: SearchPhrase@4 != , projection=[ClientIP@0,
IsRefresh@1, ResolutionWidth@2, SearchEngineID@3]
+08)--------------RepartitionExec: partitioning=RoundRobinBatch(4),
input_partitions=1
+09)----------------DataSourceExec: file_groups={1 group:
[[WORKSPACE_ROOT/datafusion/core/tests/data/clickbench_hits_10.parquet]]},
projection=[ClientIP, IsRefresh, ResolutionWidth, SearchEngineID,
SearchPhrase], file_type=parquet, predicate=SearchPhrase@39 != ,
pruning_predicate=SearchPhrase_null_count@2 != row_count@3 AND
(SearchPhrase_min@0 != OR != SearchPhrase_max@1),
required_guarantees=[SearchPhrase not in ()]
+
query IIIIR
SELECT "SearchEngineID", "ClientIP", COUNT(*) AS c, SUM("IsRefresh"),
AVG("ResolutionWidth") FROM hits WHERE "SearchPhrase" <> '' GROUP BY
"SearchEngineID", "ClientIP" ORDER BY c DESC LIMIT 10;
----
+## Q31
+query TT
+EXPLAIN SELECT "WatchID", "ClientIP", COUNT(*) AS c, SUM("IsRefresh"),
AVG("ResolutionWidth") FROM hits WHERE "SearchPhrase" <> '' GROUP BY "WatchID",
"ClientIP" ORDER BY c DESC LIMIT 10;
+----
+logical_plan
+01)Sort: c DESC NULLS FIRST, fetch=10
+02)--Projection: hits.WatchID, hits.ClientIP, count(Int64(1)) AS count(*) AS
c, sum(hits.IsRefresh), avg(hits.ResolutionWidth)
+03)----Aggregate: groupBy=[[hits.WatchID, hits.ClientIP]],
aggr=[[count(Int64(1)), sum(CAST(hits.IsRefresh AS Int64)),
avg(CAST(hits.ResolutionWidth AS Float64))]]
+04)------SubqueryAlias: hits
+05)--------Projection: hits_raw.WatchID, hits_raw.ClientIP,
hits_raw.IsRefresh, hits_raw.ResolutionWidth
+06)----------Filter: hits_raw.SearchPhrase != Utf8View("")
+07)------------TableScan: hits_raw projection=[WatchID, ClientIP, IsRefresh,
ResolutionWidth, SearchPhrase], partial_filters=[hits_raw.SearchPhrase !=
Utf8View("")]
+physical_plan
+01)SortPreservingMergeExec: [c@2 DESC], fetch=10
+02)--SortExec: TopK(fetch=10), expr=[c@2 DESC], preserve_partitioning=[true]
+03)----ProjectionExec: expr=[WatchID@0 as WatchID, ClientIP@1 as ClientIP,
count(Int64(1))@2 as c, sum(hits.IsRefresh)@3 as sum(hits.IsRefresh),
avg(hits.ResolutionWidth)@4 as avg(hits.ResolutionWidth)]
+04)------AggregateExec: mode=FinalPartitioned, gby=[WatchID@0 as WatchID,
ClientIP@1 as ClientIP], aggr=[count(Int64(1)), sum(hits.IsRefresh),
avg(hits.ResolutionWidth)]
+05)--------RepartitionExec: partitioning=Hash([WatchID@0, ClientIP@1], 4),
input_partitions=4
+06)----------AggregateExec: mode=Partial, gby=[WatchID@0 as WatchID,
ClientIP@1 as ClientIP], aggr=[count(Int64(1)), sum(hits.IsRefresh),
avg(hits.ResolutionWidth)]
+07)------------FilterExec: SearchPhrase@4 != , projection=[WatchID@0,
ClientIP@1, IsRefresh@2, ResolutionWidth@3]
+08)--------------RepartitionExec: partitioning=RoundRobinBatch(4),
input_partitions=1
+09)----------------DataSourceExec: file_groups={1 group:
[[WORKSPACE_ROOT/datafusion/core/tests/data/clickbench_hits_10.parquet]]},
projection=[WatchID, ClientIP, IsRefresh, ResolutionWidth, SearchPhrase],
file_type=parquet, predicate=SearchPhrase@39 != ,
pruning_predicate=SearchPhrase_null_count@2 != row_count@3 AND
(SearchPhrase_min@0 != OR != SearchPhrase_max@1),
required_guarantees=[SearchPhrase not in ()]
+
query IIIIR
SELECT "WatchID", "ClientIP", COUNT(*) AS c, SUM("IsRefresh"),
AVG("ResolutionWidth") FROM hits WHERE "SearchPhrase" <> '' GROUP BY "WatchID",
"ClientIP" ORDER BY c DESC LIMIT 10;
----
+## Q32
+query TT
+EXPLAIN SELECT "WatchID", "ClientIP", COUNT(*) AS c, SUM("IsRefresh"),
AVG("ResolutionWidth") FROM hits GROUP BY "WatchID", "ClientIP" ORDER BY c DESC
LIMIT 10;
+----
+logical_plan
+01)Sort: c DESC NULLS FIRST, fetch=10
+02)--Projection: hits.WatchID, hits.ClientIP, count(Int64(1)) AS count(*) AS
c, sum(hits.IsRefresh), avg(hits.ResolutionWidth)
+03)----Aggregate: groupBy=[[hits.WatchID, hits.ClientIP]],
aggr=[[count(Int64(1)), sum(CAST(hits.IsRefresh AS Int64)),
avg(CAST(hits.ResolutionWidth AS Float64))]]
+04)------SubqueryAlias: hits
+05)--------TableScan: hits_raw projection=[WatchID, ClientIP, IsRefresh,
ResolutionWidth]
+physical_plan
+01)SortPreservingMergeExec: [c@2 DESC], fetch=10
+02)--SortExec: TopK(fetch=10), expr=[c@2 DESC], preserve_partitioning=[true]
+03)----ProjectionExec: expr=[WatchID@0 as WatchID, ClientIP@1 as ClientIP,
count(Int64(1))@2 as c, sum(hits.IsRefresh)@3 as sum(hits.IsRefresh),
avg(hits.ResolutionWidth)@4 as avg(hits.ResolutionWidth)]
+04)------AggregateExec: mode=FinalPartitioned, gby=[WatchID@0 as WatchID,
ClientIP@1 as ClientIP], aggr=[count(Int64(1)), sum(hits.IsRefresh),
avg(hits.ResolutionWidth)]
+05)--------RepartitionExec: partitioning=Hash([WatchID@0, ClientIP@1], 4),
input_partitions=1
+06)----------AggregateExec: mode=Partial, gby=[WatchID@0 as WatchID,
ClientIP@1 as ClientIP], aggr=[count(Int64(1)), sum(hits.IsRefresh),
avg(hits.ResolutionWidth)]
+07)------------DataSourceExec: file_groups={1 group:
[[WORKSPACE_ROOT/datafusion/core/tests/data/clickbench_hits_10.parquet]]},
projection=[WatchID, ClientIP, IsRefresh, ResolutionWidth], file_type=parquet
+
query IIIIR rowsort
SELECT "WatchID", "ClientIP", COUNT(*) AS c, SUM("IsRefresh"),
AVG("ResolutionWidth") FROM hits GROUP BY "WatchID", "ClientIP" ORDER BY c DESC
LIMIT 10;
----
@@ -237,6 +895,25 @@ SELECT "WatchID", "ClientIP", COUNT(*) AS c,
SUM("IsRefresh"), AVG("ResolutionWi
8924809397503602651 -1216690514 1 0 0
9110818468285196899 -1216690514 1 0 0
+## Q33
+query TT
+EXPLAIN SELECT "URL", COUNT(*) AS c FROM hits GROUP BY "URL" ORDER BY c DESC
LIMIT 10;
+----
+logical_plan
+01)Sort: c DESC NULLS FIRST, fetch=10
+02)--Projection: hits.URL, count(Int64(1)) AS count(*) AS c
+03)----Aggregate: groupBy=[[hits.URL]], aggr=[[count(Int64(1))]]
+04)------SubqueryAlias: hits
+05)--------TableScan: hits_raw projection=[URL]
+physical_plan
+01)SortPreservingMergeExec: [c@1 DESC], fetch=10
+02)--SortExec: TopK(fetch=10), expr=[c@1 DESC], preserve_partitioning=[true]
+03)----ProjectionExec: expr=[URL@0 as URL, count(Int64(1))@1 as c]
+04)------AggregateExec: mode=FinalPartitioned, gby=[URL@0 as URL],
aggr=[count(Int64(1))]
+05)--------RepartitionExec: partitioning=Hash([URL@0], 4), input_partitions=1
+06)----------AggregateExec: mode=Partial, gby=[URL@0 as URL],
aggr=[count(Int64(1))]
+07)------------DataSourceExec: file_groups={1 group:
[[WORKSPACE_ROOT/datafusion/core/tests/data/clickbench_hits_10.parquet]]},
projection=[URL], file_type=parquet
+
query TI rowsort
SELECT "URL", COUNT(*) AS c FROM hits GROUP BY "URL" ORDER BY c DESC LIMIT 10;
----
@@ -247,6 +924,25 @@ http://bonprix.ru/index.ru/cinema/art/A00387,3797); ru)&bL
1
http://holodilnik.ru/russia/05jul2013&model=0 1
http://tours/Ekategoriya%2F&sr=http://slovareniye 1
+## Q34
+query TT
+EXPLAIN SELECT 1, "URL", COUNT(*) AS c FROM hits GROUP BY 1, "URL" ORDER BY c
DESC LIMIT 10;
+----
+logical_plan
+01)Sort: c DESC NULLS FIRST, fetch=10
+02)--Projection: Int64(1), hits.URL, count(Int64(1)) AS c
+03)----Aggregate: groupBy=[[hits.URL]], aggr=[[count(Int64(1))]]
+04)------SubqueryAlias: hits
+05)--------TableScan: hits_raw projection=[URL]
+physical_plan
+01)SortPreservingMergeExec: [c@2 DESC], fetch=10
+02)--SortExec: TopK(fetch=10), expr=[c@2 DESC], preserve_partitioning=[true]
+03)----ProjectionExec: expr=[1 as Int64(1), URL@0 as URL, count(Int64(1))@1 as
c]
+04)------AggregateExec: mode=FinalPartitioned, gby=[URL@0 as URL],
aggr=[count(Int64(1))]
+05)--------RepartitionExec: partitioning=Hash([URL@0], 4), input_partitions=1
+06)----------AggregateExec: mode=Partial, gby=[URL@0 as URL],
aggr=[count(Int64(1))]
+07)------------DataSourceExec: file_groups={1 group:
[[WORKSPACE_ROOT/datafusion/core/tests/data/clickbench_hits_10.parquet]]},
projection=[URL], file_type=parquet
+
query ITI rowsort
SELECT 1, "URL", COUNT(*) AS c FROM hits GROUP BY 1, "URL" ORDER BY c DESC
LIMIT 10;
----
@@ -257,6 +953,26 @@ SELECT 1, "URL", COUNT(*) AS c FROM hits GROUP BY 1, "URL"
ORDER BY c DESC LIMIT
1 http://holodilnik.ru/russia/05jul2013&model=0 1
1 http://tours/Ekategoriya%2F&sr=http://slovareniye 1
+## Q35
+query TT
+EXPLAIN SELECT "ClientIP", "ClientIP" - 1, "ClientIP" - 2, "ClientIP" - 3,
COUNT(*) AS c FROM hits GROUP BY "ClientIP", "ClientIP" - 1, "ClientIP" - 2,
"ClientIP" - 3 ORDER BY c DESC LIMIT 10;
+----
+logical_plan
+01)Sort: c DESC NULLS FIRST, fetch=10
+02)--Projection: hits.ClientIP, hits.ClientIP - Int64(1), hits.ClientIP -
Int64(2), hits.ClientIP - Int64(3), count(Int64(1)) AS count(*) AS c
+03)----Aggregate: groupBy=[[hits.ClientIP, __common_expr_1 AS hits.ClientIP -
Int64(1), __common_expr_1 AS hits.ClientIP - Int64(2), __common_expr_1 AS
hits.ClientIP - Int64(3)]], aggr=[[count(Int64(1))]]
+04)------Projection: CAST(hits.ClientIP AS Int64) AS __common_expr_1,
hits.ClientIP
+05)--------SubqueryAlias: hits
+06)----------TableScan: hits_raw projection=[ClientIP]
+physical_plan
+01)SortPreservingMergeExec: [c@4 DESC], fetch=10
+02)--SortExec: TopK(fetch=10), expr=[c@4 DESC], preserve_partitioning=[true]
+03)----ProjectionExec: expr=[ClientIP@0 as ClientIP, hits.ClientIP -
Int64(1)@1 as hits.ClientIP - Int64(1), hits.ClientIP - Int64(2)@2 as
hits.ClientIP - Int64(2), hits.ClientIP - Int64(3)@3 as hits.ClientIP -
Int64(3), count(Int64(1))@4 as c]
+04)------AggregateExec: mode=FinalPartitioned, gby=[ClientIP@0 as ClientIP,
hits.ClientIP - Int64(1)@1 as hits.ClientIP - Int64(1), hits.ClientIP -
Int64(2)@2 as hits.ClientIP - Int64(2), hits.ClientIP - Int64(3)@3 as
hits.ClientIP - Int64(3)], aggr=[count(Int64(1))]
+05)--------RepartitionExec: partitioning=Hash([ClientIP@0, hits.ClientIP -
Int64(1)@1, hits.ClientIP - Int64(2)@2, hits.ClientIP - Int64(3)@3], 4),
input_partitions=1
+06)----------AggregateExec: mode=Partial, gby=[ClientIP@1 as ClientIP,
__common_expr_1@0 - 1 as hits.ClientIP - Int64(1), __common_expr_1@0 - 2 as
hits.ClientIP - Int64(2), __common_expr_1@0 - 3 as hits.ClientIP - Int64(3)],
aggr=[count(Int64(1))]
+07)------------DataSourceExec: file_groups={1 group:
[[WORKSPACE_ROOT/datafusion/core/tests/data/clickbench_hits_10.parquet]]},
projection=[CAST(ClientIP@7 AS Int64) as __common_expr_1, ClientIP],
file_type=parquet
+
query IIIII rowsort
SELECT "ClientIP", "ClientIP" - 1, "ClientIP" - 2, "ClientIP" - 3, COUNT(*) AS
c FROM hits GROUP BY "ClientIP", "ClientIP" - 1, "ClientIP" - 2, "ClientIP" - 3
ORDER BY c DESC LIMIT 10;
----
@@ -265,46 +981,256 @@ SELECT "ClientIP", "ClientIP" - 1, "ClientIP" - 2,
"ClientIP" - 3, COUNT(*) AS c
1568366281 1568366280 1568366279 1568366278 2
1615432634 1615432633 1615432632 1615432631 1
+## Q36
+query TT
+EXPLAIN SELECT "URL", COUNT(*) AS PageViews FROM hits WHERE "CounterID" = 62
AND "EventDate" >= '2013-07-01' AND "EventDate" <= '2013-07-31' AND
"DontCountHits" = 0 AND "IsRefresh" = 0 AND "URL" <> '' GROUP BY "URL" ORDER BY
PageViews DESC LIMIT 10;
+----
+logical_plan
+01)Sort: pageviews DESC NULLS FIRST, fetch=10
+02)--Projection: hits.URL, count(Int64(1)) AS count(*) AS pageviews
+03)----Aggregate: groupBy=[[hits.URL]], aggr=[[count(Int64(1))]]
+04)------SubqueryAlias: hits
+05)--------Projection: hits_raw.URL
+06)----------Filter: hits_raw.CounterID = Int32(62) AND
CAST(CAST(hits_raw.EventDate AS Int32) AS Date32) >= Date32("2013-07-01") AND
CAST(CAST(hits_raw.EventDate AS Int32) AS Date32) <= Date32("2013-07-31") AND
hits_raw.DontCountHits = Int16(0) AND hits_raw.IsRefresh = Int16(0) AND
hits_raw.URL != Utf8View("")
+07)------------TableScan: hits_raw projection=[EventDate, CounterID, URL,
IsRefresh, DontCountHits], partial_filters=[hits_raw.CounterID = Int32(62),
CAST(CAST(hits_raw.EventDate AS Int32) AS Date32) >= Date32("2013-07-01"),
CAST(CAST(hits_raw.EventDate AS Int32) AS Date32) <= Date32("2013-07-31"),
hits_raw.DontCountHits = Int16(0), hits_raw.IsRefresh = Int16(0), hits_raw.URL
!= Utf8View("")]
+physical_plan
+01)SortPreservingMergeExec: [pageviews@1 DESC], fetch=10
+02)--SortExec: TopK(fetch=10), expr=[pageviews@1 DESC],
preserve_partitioning=[true]
+03)----ProjectionExec: expr=[URL@0 as URL, count(Int64(1))@1 as pageviews]
+04)------AggregateExec: mode=FinalPartitioned, gby=[URL@0 as URL],
aggr=[count(Int64(1))]
+05)--------RepartitionExec: partitioning=Hash([URL@0], 4), input_partitions=4
+06)----------AggregateExec: mode=Partial, gby=[URL@0 as URL],
aggr=[count(Int64(1))]
+07)------------FilterExec: CounterID@1 = 62 AND CAST(CAST(EventDate@0 AS
Int32) AS Date32) >= 2013-07-01 AND CAST(CAST(EventDate@0 AS Int32) AS Date32)
<= 2013-07-31 AND DontCountHits@4 = 0 AND IsRefresh@3 = 0 AND URL@2 != ,
projection=[URL@2]
+08)--------------RepartitionExec: partitioning=RoundRobinBatch(4),
input_partitions=1
+09)----------------DataSourceExec: file_groups={1 group:
[[WORKSPACE_ROOT/datafusion/core/tests/data/clickbench_hits_10.parquet]]},
projection=[EventDate, CounterID, URL, IsRefresh, DontCountHits],
file_type=parquet, predicate=CounterID@6 = 62 AND CAST(CAST(EventDate@5 AS
Int32) AS Date32) >= 2013-07-01 AND CAST(CAST(EventDate@5 AS Int32) AS Date32)
<= 2013-07-31 AND DontCountHits@61 = 0 AND IsRefresh@15 = 0 AND URL@13 != ,
pruning_predicate=CounterID_null_count@2 != row_count@3 AND Coun [...]
+
query TI
SELECT "URL", COUNT(*) AS PageViews FROM hits WHERE "CounterID" = 62 AND
"EventDate" >= '2013-07-01' AND "EventDate" <= '2013-07-31' AND "DontCountHits"
= 0 AND "IsRefresh" = 0 AND "URL" <> '' GROUP BY "URL" ORDER BY PageViews DESC
LIMIT 10;
----
+## Q37
+query TT
+EXPLAIN SELECT "Title", COUNT(*) AS PageViews FROM hits WHERE "CounterID" = 62
AND "EventDate" >= '2013-07-01' AND "EventDate" <= '2013-07-31' AND
"DontCountHits" = 0 AND "IsRefresh" = 0 AND "Title" <> '' GROUP BY "Title"
ORDER BY PageViews DESC LIMIT 10;
+----
+logical_plan
+01)Sort: pageviews DESC NULLS FIRST, fetch=10
+02)--Projection: hits.Title, count(Int64(1)) AS count(*) AS pageviews
+03)----Aggregate: groupBy=[[hits.Title]], aggr=[[count(Int64(1))]]
+04)------SubqueryAlias: hits
+05)--------Projection: hits_raw.Title
+06)----------Filter: hits_raw.CounterID = Int32(62) AND
CAST(CAST(hits_raw.EventDate AS Int32) AS Date32) >= Date32("2013-07-01") AND
CAST(CAST(hits_raw.EventDate AS Int32) AS Date32) <= Date32("2013-07-31") AND
hits_raw.DontCountHits = Int16(0) AND hits_raw.IsRefresh = Int16(0) AND
hits_raw.Title != Utf8View("")
+07)------------TableScan: hits_raw projection=[Title, EventDate, CounterID,
IsRefresh, DontCountHits], partial_filters=[hits_raw.CounterID = Int32(62),
CAST(CAST(hits_raw.EventDate AS Int32) AS Date32) >= Date32("2013-07-01"),
CAST(CAST(hits_raw.EventDate AS Int32) AS Date32) <= Date32("2013-07-31"),
hits_raw.DontCountHits = Int16(0), hits_raw.IsRefresh = Int16(0),
hits_raw.Title != Utf8View("")]
+physical_plan
+01)SortPreservingMergeExec: [pageviews@1 DESC], fetch=10
+02)--SortExec: TopK(fetch=10), expr=[pageviews@1 DESC],
preserve_partitioning=[true]
+03)----ProjectionExec: expr=[Title@0 as Title, count(Int64(1))@1 as pageviews]
+04)------AggregateExec: mode=FinalPartitioned, gby=[Title@0 as Title],
aggr=[count(Int64(1))]
+05)--------RepartitionExec: partitioning=Hash([Title@0], 4), input_partitions=4
+06)----------AggregateExec: mode=Partial, gby=[Title@0 as Title],
aggr=[count(Int64(1))]
+07)------------FilterExec: CounterID@2 = 62 AND CAST(CAST(EventDate@1 AS
Int32) AS Date32) >= 2013-07-01 AND CAST(CAST(EventDate@1 AS Int32) AS Date32)
<= 2013-07-31 AND DontCountHits@4 = 0 AND IsRefresh@3 = 0 AND Title@0 != ,
projection=[Title@0]
+08)--------------RepartitionExec: partitioning=RoundRobinBatch(4),
input_partitions=1
+09)----------------DataSourceExec: file_groups={1 group:
[[WORKSPACE_ROOT/datafusion/core/tests/data/clickbench_hits_10.parquet]]},
projection=[Title, EventDate, CounterID, IsRefresh, DontCountHits],
file_type=parquet, predicate=CounterID@6 = 62 AND CAST(CAST(EventDate@5 AS
Int32) AS Date32) >= 2013-07-01 AND CAST(CAST(EventDate@5 AS Int32) AS Date32)
<= 2013-07-31 AND DontCountHits@61 = 0 AND IsRefresh@15 = 0 AND Title@2 != ,
pruning_predicate=CounterID_null_count@2 != row_count@3 AND C [...]
+
query TI
SELECT "Title", COUNT(*) AS PageViews FROM hits WHERE "CounterID" = 62 AND
"EventDate" >= '2013-07-01' AND "EventDate" <= '2013-07-31' AND "DontCountHits"
= 0 AND "IsRefresh" = 0 AND "Title" <> '' GROUP BY "Title" ORDER BY PageViews
DESC LIMIT 10;
----
+## Q38
+query TT
+EXPLAIN SELECT "URL", COUNT(*) AS PageViews FROM hits WHERE "CounterID" = 62
AND "EventDate" >= '2013-07-01' AND "EventDate" <= '2013-07-31' AND "IsRefresh"
= 0 AND "IsLink" <> 0 AND "IsDownload" = 0 GROUP BY "URL" ORDER BY PageViews
DESC LIMIT 10 OFFSET 1000;
+----
+logical_plan
+01)Limit: skip=1000, fetch=10
+02)--Sort: pageviews DESC NULLS FIRST, fetch=1010
+03)----Projection: hits.URL, count(Int64(1)) AS count(*) AS pageviews
+04)------Aggregate: groupBy=[[hits.URL]], aggr=[[count(Int64(1))]]
+05)--------SubqueryAlias: hits
+06)----------Projection: hits_raw.URL
+07)------------Filter: hits_raw.CounterID = Int32(62) AND
CAST(CAST(hits_raw.EventDate AS Int32) AS Date32) >= Date32("2013-07-01") AND
CAST(CAST(hits_raw.EventDate AS Int32) AS Date32) <= Date32("2013-07-31") AND
hits_raw.IsRefresh = Int16(0) AND hits_raw.IsLink != Int16(0) AND
hits_raw.IsDownload = Int16(0)
+08)--------------TableScan: hits_raw projection=[EventDate, CounterID, URL,
IsRefresh, IsLink, IsDownload], partial_filters=[hits_raw.CounterID =
Int32(62), CAST(CAST(hits_raw.EventDate AS Int32) AS Date32) >=
Date32("2013-07-01"), CAST(CAST(hits_raw.EventDate AS Int32) AS Date32) <=
Date32("2013-07-31"), hits_raw.IsRefresh = Int16(0), hits_raw.IsLink !=
Int16(0), hits_raw.IsDownload = Int16(0)]
+physical_plan
+01)GlobalLimitExec: skip=1000, fetch=10
+02)--SortPreservingMergeExec: [pageviews@1 DESC], fetch=1010
+03)----SortExec: TopK(fetch=1010), expr=[pageviews@1 DESC],
preserve_partitioning=[true]
+04)------ProjectionExec: expr=[URL@0 as URL, count(Int64(1))@1 as pageviews]
+05)--------AggregateExec: mode=FinalPartitioned, gby=[URL@0 as URL],
aggr=[count(Int64(1))]
+06)----------RepartitionExec: partitioning=Hash([URL@0], 4), input_partitions=4
+07)------------AggregateExec: mode=Partial, gby=[URL@0 as URL],
aggr=[count(Int64(1))]
+08)--------------FilterExec: CounterID@1 = 62 AND CAST(CAST(EventDate@0 AS
Int32) AS Date32) >= 2013-07-01 AND CAST(CAST(EventDate@0 AS Int32) AS Date32)
<= 2013-07-31 AND IsRefresh@3 = 0 AND IsLink@4 != 0 AND IsDownload@5 = 0,
projection=[URL@2]
+09)----------------RepartitionExec: partitioning=RoundRobinBatch(4),
input_partitions=1
+10)------------------DataSourceExec: file_groups={1 group:
[[WORKSPACE_ROOT/datafusion/core/tests/data/clickbench_hits_10.parquet]]},
projection=[EventDate, CounterID, URL, IsRefresh, IsLink, IsDownload],
file_type=parquet, predicate=CounterID@6 = 62 AND CAST(CAST(EventDate@5 AS
Int32) AS Date32) >= 2013-07-01 AND CAST(CAST(EventDate@5 AS Int32) AS Date32)
<= 2013-07-31 AND IsRefresh@15 = 0 AND IsLink@52 != 0 AND IsDownload@53 = 0,
pruning_predicate=CounterID_null_count@2 != row_count@3 [...]
+
query TI
SELECT "URL", COUNT(*) AS PageViews FROM hits WHERE "CounterID" = 62 AND
"EventDate" >= '2013-07-01' AND "EventDate" <= '2013-07-31' AND "IsRefresh" = 0
AND "IsLink" <> 0 AND "IsDownload" = 0 GROUP BY "URL" ORDER BY PageViews DESC
LIMIT 10 OFFSET 1000;
----
+## Q39
+query TT
+EXPLAIN SELECT "TraficSourceID", "SearchEngineID", "AdvEngineID", CASE WHEN
("SearchEngineID" = 0 AND "AdvEngineID" = 0) THEN "Referer" ELSE '' END AS Src,
"URL" AS Dst, COUNT(*) AS PageViews FROM hits WHERE "CounterID" = 62 AND
"EventDate" >= '2013-07-01' AND "EventDate" <= '2013-07-31' AND "IsRefresh" = 0
GROUP BY "TraficSourceID", "SearchEngineID", "AdvEngineID", Src, Dst ORDER BY
PageViews DESC LIMIT 10 OFFSET 1000;
+----
+logical_plan
+01)Limit: skip=1000, fetch=10
+02)--Sort: pageviews DESC NULLS FIRST, fetch=1010
+03)----Projection: hits.TraficSourceID, hits.SearchEngineID, hits.AdvEngineID,
CASE WHEN hits.SearchEngineID = Int64(0) AND hits.AdvEngineID = Int64(0) THEN
hits.Referer ELSE Utf8("") END AS src, hits.URL AS dst, count(Int64(1)) AS
count(*) AS pageviews
+04)------Aggregate: groupBy=[[hits.TraficSourceID, hits.SearchEngineID,
hits.AdvEngineID, CASE WHEN hits.SearchEngineID = Int16(0) AND hits.AdvEngineID
= Int16(0) THEN hits.Referer ELSE Utf8View("") END AS CASE WHEN
hits.SearchEngineID = Int64(0) AND hits.AdvEngineID = Int64(0) THEN
hits.Referer ELSE Utf8("") END, hits.URL]], aggr=[[count(Int64(1))]]
+05)--------SubqueryAlias: hits
+06)----------Projection: hits_raw.URL, hits_raw.Referer,
hits_raw.TraficSourceID, hits_raw.SearchEngineID, hits_raw.AdvEngineID
+07)------------Filter: hits_raw.CounterID = Int32(62) AND
CAST(CAST(hits_raw.EventDate AS Int32) AS Date32) >= Date32("2013-07-01") AND
CAST(CAST(hits_raw.EventDate AS Int32) AS Date32) <= Date32("2013-07-31") AND
hits_raw.IsRefresh = Int16(0)
+08)--------------TableScan: hits_raw projection=[EventDate, CounterID, URL,
Referer, IsRefresh, TraficSourceID, SearchEngineID, AdvEngineID],
partial_filters=[hits_raw.CounterID = Int32(62), CAST(CAST(hits_raw.EventDate
AS Int32) AS Date32) >= Date32("2013-07-01"), CAST(CAST(hits_raw.EventDate AS
Int32) AS Date32) <= Date32("2013-07-31"), hits_raw.IsRefresh = Int16(0)]
+physical_plan
+01)GlobalLimitExec: skip=1000, fetch=10
+02)--SortPreservingMergeExec: [pageviews@5 DESC], fetch=1010
+03)----SortExec: TopK(fetch=1010), expr=[pageviews@5 DESC],
preserve_partitioning=[true]
+04)------ProjectionExec: expr=[TraficSourceID@0 as TraficSourceID,
SearchEngineID@1 as SearchEngineID, AdvEngineID@2 as AdvEngineID, CASE WHEN
hits.SearchEngineID = Int64(0) AND hits.AdvEngineID = Int64(0) THEN
hits.Referer ELSE Utf8("") END@3 as src, URL@4 as dst, count(Int64(1))@5 as
pageviews]
+05)--------AggregateExec: mode=FinalPartitioned, gby=[TraficSourceID@0 as
TraficSourceID, SearchEngineID@1 as SearchEngineID, AdvEngineID@2 as
AdvEngineID, CASE WHEN hits.SearchEngineID = Int64(0) AND hits.AdvEngineID =
Int64(0) THEN hits.Referer ELSE Utf8("") END@3 as CASE WHEN hits.SearchEngineID
= Int64(0) AND hits.AdvEngineID = Int64(0) THEN hits.Referer ELSE Utf8("") END,
URL@4 as URL], aggr=[count(Int64(1))]
+06)----------RepartitionExec: partitioning=Hash([TraficSourceID@0,
SearchEngineID@1, AdvEngineID@2, CASE WHEN hits.SearchEngineID = Int64(0) AND
hits.AdvEngineID = Int64(0) THEN hits.Referer ELSE Utf8("") END@3, URL@4], 4),
input_partitions=4
+07)------------AggregateExec: mode=Partial, gby=[TraficSourceID@2 as
TraficSourceID, SearchEngineID@3 as SearchEngineID, AdvEngineID@4 as
AdvEngineID, CASE WHEN SearchEngineID@3 = 0 AND AdvEngineID@4 = 0 THEN
Referer@1 ELSE END as CASE WHEN hits.SearchEngineID = Int64(0) AND
hits.AdvEngineID = Int64(0) THEN hits.Referer ELSE Utf8("") END, URL@0 as URL],
aggr=[count(Int64(1))]
+08)--------------FilterExec: CounterID@1 = 62 AND CAST(CAST(EventDate@0 AS
Int32) AS Date32) >= 2013-07-01 AND CAST(CAST(EventDate@0 AS Int32) AS Date32)
<= 2013-07-31 AND IsRefresh@4 = 0, projection=[URL@2, Referer@3,
TraficSourceID@5, SearchEngineID@6, AdvEngineID@7]
+09)----------------RepartitionExec: partitioning=RoundRobinBatch(4),
input_partitions=1
+10)------------------DataSourceExec: file_groups={1 group:
[[WORKSPACE_ROOT/datafusion/core/tests/data/clickbench_hits_10.parquet]]},
projection=[EventDate, CounterID, URL, Referer, IsRefresh, TraficSourceID,
SearchEngineID, AdvEngineID], file_type=parquet, predicate=CounterID@6 = 62 AND
CAST(CAST(EventDate@5 AS Int32) AS Date32) >= 2013-07-01 AND
CAST(CAST(EventDate@5 AS Int32) AS Date32) <= 2013-07-31 AND IsRefresh@15 = 0,
pruning_predicate=CounterID_null_count@2 != row_count@3 AND Cou [...]
+
query IIITTI
SELECT "TraficSourceID", "SearchEngineID", "AdvEngineID", CASE WHEN
("SearchEngineID" = 0 AND "AdvEngineID" = 0) THEN "Referer" ELSE '' END AS Src,
"URL" AS Dst, COUNT(*) AS PageViews FROM hits WHERE "CounterID" = 62 AND
"EventDate" >= '2013-07-01' AND "EventDate" <= '2013-07-31' AND "IsRefresh" = 0
GROUP BY "TraficSourceID", "SearchEngineID", "AdvEngineID", Src, Dst ORDER BY
PageViews DESC LIMIT 10 OFFSET 1000;
----
+## Q40
+query TT
+EXPLAIN SELECT "URLHash", "EventDate", COUNT(*) AS PageViews FROM hits WHERE
"CounterID" = 62 AND "EventDate" >= '2013-07-01' AND "EventDate" <=
'2013-07-31' AND "IsRefresh" = 0 AND "TraficSourceID" IN (-1, 6) AND
"RefererHash" = 3594120000172545465 GROUP BY "URLHash", "EventDate" ORDER BY
PageViews DESC LIMIT 10 OFFSET 100;
+----
+logical_plan
+01)Limit: skip=100, fetch=10
+02)--Sort: pageviews DESC NULLS FIRST, fetch=110
+03)----Projection: hits.URLHash, hits.EventDate, count(Int64(1)) AS count(*)
AS pageviews
+04)------Aggregate: groupBy=[[hits.URLHash, hits.EventDate]],
aggr=[[count(Int64(1))]]
+05)--------SubqueryAlias: hits
+06)----------Projection: hits_raw.URLHash, CAST(CAST(hits_raw.EventDate AS
Int32) AS Date32) AS EventDate
+07)------------Filter: hits_raw.CounterID = Int32(62) AND
CAST(CAST(hits_raw.EventDate AS Int32) AS Date32) >= Date32("2013-07-01") AND
CAST(CAST(hits_raw.EventDate AS Int32) AS Date32) <= Date32("2013-07-31") AND
hits_raw.IsRefresh = Int16(0) AND (hits_raw.TraficSourceID = Int16(-1) OR
hits_raw.TraficSourceID = Int16(6)) AND hits_raw.RefererHash =
Int64(3594120000172545465)
+08)--------------TableScan: hits_raw projection=[EventDate, CounterID,
IsRefresh, TraficSourceID, RefererHash, URLHash],
partial_filters=[hits_raw.CounterID = Int32(62), CAST(CAST(hits_raw.EventDate
AS Int32) AS Date32) >= Date32("2013-07-01"), CAST(CAST(hits_raw.EventDate AS
Int32) AS Date32) <= Date32("2013-07-31"), hits_raw.IsRefresh = Int16(0),
hits_raw.TraficSourceID = Int16(-1) OR hits_raw.TraficSourceID = Int16(6),
hits_raw.RefererHash = Int64(3594120000172545465)]
+physical_plan
+01)GlobalLimitExec: skip=100, fetch=10
+02)--SortPreservingMergeExec: [pageviews@2 DESC], fetch=110
+03)----SortExec: TopK(fetch=110), expr=[pageviews@2 DESC],
preserve_partitioning=[true]
+04)------ProjectionExec: expr=[URLHash@0 as URLHash, EventDate@1 as EventDate,
count(Int64(1))@2 as pageviews]
+05)--------AggregateExec: mode=FinalPartitioned, gby=[URLHash@0 as URLHash,
EventDate@1 as EventDate], aggr=[count(Int64(1))]
+06)----------RepartitionExec: partitioning=Hash([URLHash@0, EventDate@1], 4),
input_partitions=4
+07)------------AggregateExec: mode=Partial, gby=[URLHash@0 as URLHash,
EventDate@1 as EventDate], aggr=[count(Int64(1))]
+08)--------------ProjectionExec: expr=[URLHash@1 as URLHash,
CAST(CAST(EventDate@0 AS Int32) AS Date32) as EventDate]
+09)----------------FilterExec: CounterID@1 = 62 AND CAST(CAST(EventDate@0 AS
Int32) AS Date32) >= 2013-07-01 AND CAST(CAST(EventDate@0 AS Int32) AS Date32)
<= 2013-07-31 AND IsRefresh@2 = 0 AND (TraficSourceID@3 = -1 OR
TraficSourceID@3 = 6) AND RefererHash@4 = 3594120000172545465,
projection=[EventDate@0, URLHash@5]
+10)------------------RepartitionExec: partitioning=RoundRobinBatch(4),
input_partitions=1
+11)--------------------DataSourceExec: file_groups={1 group:
[[WORKSPACE_ROOT/datafusion/core/tests/data/clickbench_hits_10.parquet]]},
projection=[EventDate, CounterID, IsRefresh, TraficSourceID, RefererHash,
URLHash], file_type=parquet, predicate=CounterID@6 = 62 AND
CAST(CAST(EventDate@5 AS Int32) AS Date32) >= 2013-07-01 AND
CAST(CAST(EventDate@5 AS Int32) AS Date32) <= 2013-07-31 AND IsRefresh@15 = 0
AND (TraficSourceID@37 = -1 OR TraficSourceID@37 = 6) AND RefererHash@102 =
3594120 [...]
+
query IDI
SELECT "URLHash", "EventDate", COUNT(*) AS PageViews FROM hits WHERE
"CounterID" = 62 AND "EventDate" >= '2013-07-01' AND "EventDate" <=
'2013-07-31' AND "IsRefresh" = 0 AND "TraficSourceID" IN (-1, 6) AND
"RefererHash" = 3594120000172545465 GROUP BY "URLHash", "EventDate" ORDER BY
PageViews DESC LIMIT 10 OFFSET 100;
----
+## Q41
+query TT
+EXPLAIN SELECT "WindowClientWidth", "WindowClientHeight", COUNT(*) AS
PageViews FROM hits WHERE "CounterID" = 62 AND "EventDate" >= '2013-07-01' AND
"EventDate" <= '2013-07-31' AND "IsRefresh" = 0 AND "DontCountHits" = 0 AND
"URLHash" = 2868770270353813622 GROUP BY "WindowClientWidth",
"WindowClientHeight" ORDER BY PageViews DESC LIMIT 10 OFFSET 10000;
+----
+logical_plan
+01)Limit: skip=10000, fetch=10
+02)--Sort: pageviews DESC NULLS FIRST, fetch=10010
+03)----Projection: hits.WindowClientWidth, hits.WindowClientHeight,
count(Int64(1)) AS count(*) AS pageviews
+04)------Aggregate: groupBy=[[hits.WindowClientWidth,
hits.WindowClientHeight]], aggr=[[count(Int64(1))]]
+05)--------SubqueryAlias: hits
+06)----------Projection: hits_raw.WindowClientWidth,
hits_raw.WindowClientHeight
+07)------------Filter: hits_raw.CounterID = Int32(62) AND
CAST(CAST(hits_raw.EventDate AS Int32) AS Date32) >= Date32("2013-07-01") AND
CAST(CAST(hits_raw.EventDate AS Int32) AS Date32) <= Date32("2013-07-31") AND
hits_raw.IsRefresh = Int16(0) AND hits_raw.DontCountHits = Int16(0) AND
hits_raw.URLHash = Int64(2868770270353813622)
+08)--------------TableScan: hits_raw projection=[EventDate, CounterID,
IsRefresh, WindowClientWidth, WindowClientHeight, DontCountHits, URLHash],
partial_filters=[hits_raw.CounterID = Int32(62), CAST(CAST(hits_raw.EventDate
AS Int32) AS Date32) >= Date32("2013-07-01"), CAST(CAST(hits_raw.EventDate AS
Int32) AS Date32) <= Date32("2013-07-31"), hits_raw.IsRefresh = Int16(0),
hits_raw.DontCountHits = Int16(0), hits_raw.URLHash =
Int64(2868770270353813622)]
+physical_plan
+01)GlobalLimitExec: skip=10000, fetch=10
+02)--SortPreservingMergeExec: [pageviews@2 DESC], fetch=10010
+03)----SortExec: TopK(fetch=10010), expr=[pageviews@2 DESC],
preserve_partitioning=[true]
+04)------ProjectionExec: expr=[WindowClientWidth@0 as WindowClientWidth,
WindowClientHeight@1 as WindowClientHeight, count(Int64(1))@2 as pageviews]
+05)--------AggregateExec: mode=FinalPartitioned, gby=[WindowClientWidth@0 as
WindowClientWidth, WindowClientHeight@1 as WindowClientHeight],
aggr=[count(Int64(1))]
+06)----------RepartitionExec: partitioning=Hash([WindowClientWidth@0,
WindowClientHeight@1], 4), input_partitions=4
+07)------------AggregateExec: mode=Partial, gby=[WindowClientWidth@0 as
WindowClientWidth, WindowClientHeight@1 as WindowClientHeight],
aggr=[count(Int64(1))]
+08)--------------FilterExec: CounterID@1 = 62 AND CAST(CAST(EventDate@0 AS
Int32) AS Date32) >= 2013-07-01 AND CAST(CAST(EventDate@0 AS Int32) AS Date32)
<= 2013-07-31 AND IsRefresh@2 = 0 AND DontCountHits@5 = 0 AND URLHash@6 =
2868770270353813622, projection=[WindowClientWidth@3, WindowClientHeight@4]
+09)----------------RepartitionExec: partitioning=RoundRobinBatch(4),
input_partitions=1
+10)------------------DataSourceExec: file_groups={1 group:
[[WORKSPACE_ROOT/datafusion/core/tests/data/clickbench_hits_10.parquet]]},
projection=[EventDate, CounterID, IsRefresh, WindowClientWidth,
WindowClientHeight, DontCountHits, URLHash], file_type=parquet,
predicate=CounterID@6 = 62 AND CAST(CAST(EventDate@5 AS Int32) AS Date32) >=
2013-07-01 AND CAST(CAST(EventDate@5 AS Int32) AS Date32) <= 2013-07-31 AND
IsRefresh@15 = 0 AND DontCountHits@61 = 0 AND URLHash@103 = 28687702703538136
[...]
+
query III
SELECT "WindowClientWidth", "WindowClientHeight", COUNT(*) AS PageViews FROM
hits WHERE "CounterID" = 62 AND "EventDate" >= '2013-07-01' AND "EventDate" <=
'2013-07-31' AND "IsRefresh" = 0 AND "DontCountHits" = 0 AND "URLHash" =
2868770270353813622 GROUP BY "WindowClientWidth", "WindowClientHeight" ORDER BY
PageViews DESC LIMIT 10 OFFSET 10000;
----
+## Q42
+query TT
+EXPLAIN SELECT DATE_TRUNC('minute', to_timestamp_seconds("EventTime")) AS M,
COUNT(*) AS PageViews FROM hits WHERE "CounterID" = 62 AND "EventDate" >=
'2013-07-14' AND "EventDate" <= '2013-07-15' AND "IsRefresh" = 0 AND
"DontCountHits" = 0 GROUP BY DATE_TRUNC('minute',
to_timestamp_seconds("EventTime")) ORDER BY DATE_TRUNC('minute', M) LIMIT 10
OFFSET 1000;
+----
+logical_plan
+01)Limit: skip=1000, fetch=10
+02)--Sort: date_trunc(Utf8("minute"), m) ASC NULLS LAST, fetch=1010
+03)----Projection:
date_trunc(Utf8("minute"),to_timestamp_seconds(hits.EventTime)) AS m,
count(Int64(1)) AS count(*) AS pageviews
+04)------Aggregate: groupBy=[[date_trunc(Utf8("minute"),
to_timestamp_seconds(hits.EventTime))]], aggr=[[count(Int64(1))]]
+05)--------SubqueryAlias: hits
+06)----------Projection: hits_raw.EventTime
+07)------------Filter: hits_raw.CounterID = Int32(62) AND
CAST(CAST(hits_raw.EventDate AS Int32) AS Date32) >= Date32("2013-07-14") AND
CAST(CAST(hits_raw.EventDate AS Int32) AS Date32) <= Date32("2013-07-15") AND
hits_raw.IsRefresh = Int16(0) AND hits_raw.DontCountHits = Int16(0)
+08)--------------TableScan: hits_raw projection=[EventTime, EventDate,
CounterID, IsRefresh, DontCountHits], partial_filters=[hits_raw.CounterID =
Int32(62), CAST(CAST(hits_raw.EventDate AS Int32) AS Date32) >=
Date32("2013-07-14"), CAST(CAST(hits_raw.EventDate AS Int32) AS Date32) <=
Date32("2013-07-15"), hits_raw.IsRefresh = Int16(0), hits_raw.DontCountHits =
Int16(0)]
+physical_plan
+01)GlobalLimitExec: skip=1000, fetch=10
+02)--SortPreservingMergeExec: [date_trunc(minute, m@0) ASC NULLS LAST],
fetch=1010
+03)----SortExec: TopK(fetch=1010), expr=[date_trunc(minute, m@0) ASC NULLS
LAST], preserve_partitioning=[true]
+04)------ProjectionExec:
expr=[date_trunc(Utf8("minute"),to_timestamp_seconds(hits.EventTime))@0 as m,
count(Int64(1))@1 as pageviews]
+05)--------AggregateExec: mode=FinalPartitioned,
gby=[date_trunc(Utf8("minute"),to_timestamp_seconds(hits.EventTime))@0 as
date_trunc(Utf8("minute"),to_timestamp_seconds(hits.EventTime))],
aggr=[count(Int64(1))]
+06)----------RepartitionExec:
partitioning=Hash([date_trunc(Utf8("minute"),to_timestamp_seconds(hits.EventTime))@0],
4), input_partitions=4
+07)------------AggregateExec: mode=Partial, gby=[date_trunc(minute,
to_timestamp_seconds(EventTime@0)) as
date_trunc(Utf8("minute"),to_timestamp_seconds(hits.EventTime))],
aggr=[count(Int64(1))]
+08)--------------FilterExec: CounterID@2 = 62 AND CAST(CAST(EventDate@1 AS
Int32) AS Date32) >= 2013-07-14 AND CAST(CAST(EventDate@1 AS Int32) AS Date32)
<= 2013-07-15 AND IsRefresh@3 = 0 AND DontCountHits@4 = 0,
projection=[EventTime@0]
+09)----------------RepartitionExec: partitioning=RoundRobinBatch(4),
input_partitions=1
+10)------------------DataSourceExec: file_groups={1 group:
[[WORKSPACE_ROOT/datafusion/core/tests/data/clickbench_hits_10.parquet]]},
projection=[EventTime, EventDate, CounterID, IsRefresh, DontCountHits],
file_type=parquet, predicate=CounterID@6 = 62 AND CAST(CAST(EventDate@5 AS
Int32) AS Date32) >= 2013-07-14 AND CAST(CAST(EventDate@5 AS Int32) AS Date32)
<= 2013-07-15 AND IsRefresh@15 = 0 AND DontCountHits@61 = 0,
pruning_predicate=CounterID_null_count@2 != row_count@3 AND CounterID_m [...]
+
query PI
SELECT DATE_TRUNC('minute', to_timestamp_seconds("EventTime")) AS M, COUNT(*)
AS PageViews FROM hits WHERE "CounterID" = 62 AND "EventDate" >= '2013-07-14'
AND "EventDate" <= '2013-07-15' AND "IsRefresh" = 0 AND "DontCountHits" = 0
GROUP BY DATE_TRUNC('minute', to_timestamp_seconds("EventTime")) ORDER BY
DATE_TRUNC('minute', M) LIMIT 10 OFFSET 1000;
----
# Clickbench "Extended" queries that test count distinct
+query TT
+EXPLAIN SELECT COUNT(DISTINCT "SearchPhrase"), COUNT(DISTINCT "MobilePhone"),
COUNT(DISTINCT "MobilePhoneModel") FROM hits;
+----
+logical_plan
+01)Aggregate: groupBy=[[]], aggr=[[count(DISTINCT hits.SearchPhrase),
count(DISTINCT hits.MobilePhone), count(DISTINCT hits.MobilePhoneModel)]]
+02)--SubqueryAlias: hits
+03)----TableScan: hits_raw projection=[MobilePhone, MobilePhoneModel,
SearchPhrase]
+physical_plan
+01)AggregateExec: mode=Single, gby=[], aggr=[count(DISTINCT
hits.SearchPhrase), count(DISTINCT hits.MobilePhone), count(DISTINCT
hits.MobilePhoneModel)]
+02)--DataSourceExec: file_groups={1 group:
[[WORKSPACE_ROOT/datafusion/core/tests/data/clickbench_hits_10.parquet]]},
projection=[MobilePhone, MobilePhoneModel, SearchPhrase], file_type=parquet
+
query III
SELECT COUNT(DISTINCT "SearchPhrase"), COUNT(DISTINCT "MobilePhone"),
COUNT(DISTINCT "MobilePhoneModel") FROM hits;
----
1 1 1
+query TT
+EXPLAIN SELECT COUNT(DISTINCT "HitColor"), COUNT(DISTINCT "BrowserCountry"),
COUNT(DISTINCT "BrowserLanguage") FROM hits;
+----
+logical_plan
+01)Aggregate: groupBy=[[]], aggr=[[count(DISTINCT hits.HitColor),
count(DISTINCT hits.BrowserCountry), count(DISTINCT hits.BrowserLanguage)]]
+02)--SubqueryAlias: hits
+03)----TableScan: hits_raw projection=[HitColor, BrowserLanguage,
BrowserCountry]
+physical_plan
+01)AggregateExec: mode=Single, gby=[], aggr=[count(DISTINCT hits.HitColor),
count(DISTINCT hits.BrowserCountry), count(DISTINCT hits.BrowserLanguage)]
+02)--DataSourceExec: file_groups={1 group:
[[WORKSPACE_ROOT/datafusion/core/tests/data/clickbench_hits_10.parquet]]},
projection=[HitColor, BrowserLanguage, BrowserCountry], file_type=parquet
+
query III
SELECT COUNT(DISTINCT "HitColor"), COUNT(DISTINCT "BrowserCountry"),
COUNT(DISTINCT "BrowserLanguage") FROM hits;
----
1 1 1
+query TT
+EXPLAIN SELECT "BrowserCountry", COUNT(DISTINCT "SocialNetwork"),
COUNT(DISTINCT "HitColor"), COUNT(DISTINCT "BrowserLanguage"), COUNT(DISTINCT
"SocialAction") FROM hits GROUP BY 1 ORDER BY 2 DESC LIMIT 10;
+----
+logical_plan
+01)Sort: count(DISTINCT hits.SocialNetwork) DESC NULLS FIRST, fetch=10
+02)--Aggregate: groupBy=[[hits.BrowserCountry]], aggr=[[count(DISTINCT
hits.SocialNetwork), count(DISTINCT hits.HitColor), count(DISTINCT
hits.BrowserLanguage), count(DISTINCT hits.SocialAction)]]
+03)----SubqueryAlias: hits
+04)------TableScan: hits_raw projection=[HitColor, BrowserLanguage,
BrowserCountry, SocialNetwork, SocialAction]
+physical_plan
+01)SortPreservingMergeExec: [count(DISTINCT hits.SocialNetwork)@1 DESC],
fetch=10
+02)--SortExec: TopK(fetch=10), expr=[count(DISTINCT hits.SocialNetwork)@1
DESC], preserve_partitioning=[true]
+03)----AggregateExec: mode=FinalPartitioned, gby=[BrowserCountry@0 as
BrowserCountry], aggr=[count(DISTINCT hits.SocialNetwork), count(DISTINCT
hits.HitColor), count(DISTINCT hits.BrowserLanguage), count(DISTINCT
hits.SocialAction)]
+04)------RepartitionExec: partitioning=Hash([BrowserCountry@0], 4),
input_partitions=1
+05)--------AggregateExec: mode=Partial, gby=[BrowserCountry@2 as
BrowserCountry], aggr=[count(DISTINCT hits.SocialNetwork), count(DISTINCT
hits.HitColor), count(DISTINCT hits.BrowserLanguage), count(DISTINCT
hits.SocialAction)]
+06)----------DataSourceExec: file_groups={1 group:
[[WORKSPACE_ROOT/datafusion/core/tests/data/clickbench_hits_10.parquet]]},
projection=[HitColor, BrowserLanguage, BrowserCountry, SocialNetwork,
SocialAction], file_type=parquet
+
query TIIII
SELECT "BrowserCountry", COUNT(DISTINCT "SocialNetwork"), COUNT(DISTINCT
"HitColor"), COUNT(DISTINCT "BrowserLanguage"), COUNT(DISTINCT "SocialAction")
FROM hits GROUP BY 1 ORDER BY 2 DESC LIMIT 10;
----
---------------------------------------------------------------------
To unsubscribe, e-mail: [email protected]
For additional commands, e-mail: [email protected]