This is an automated email from the ASF dual-hosted git repository.

github-bot pushed a commit to branch main
in repository https://gitbox.apache.org/repos/asf/datafusion.git


The following commit(s) were added to refs/heads/main by this push:
     new 92d0a5c527 Add explain plans for ClickBench queries (#20666)
92d0a5c527 is described below

commit 92d0a5c527ed38b6ce3bad673f6cf5a372c71b0c
Author: Andrew Lamb <[email protected]>
AuthorDate: Wed Mar 4 09:06:15 2026 -0500

    Add explain plans for ClickBench queries (#20666)
    
    ## Which issue does this PR close?
    
    - Part of #18489
    - Related to https://github.com/apache/datafusion/pull/20180
    
    ## Rationale for this change
    
    This looks like a monster PR but I think it will be quite easy to review
    (it just adds some new `EXPLAIN` tests). If it would be helpful I can
    break it into smaller pieces
    
    I want to improve the plans for ClickBench Query 29
    
    However, the plans for the ClickBench queries are not in our tests
    anywhere (so when I make the improvements in
    https://github.com/apache/datafusion/pull/20665 no explain plan tests
    change)
    
    So to start, let's start with adding the explain plans for all the
    queries in clickbench.slt to so it is clear what our current plans look
    like as well as to make it clear what the change of plans are
    
    ## What changes are included in this PR?
    
    Add explain plans to some .slt tests
    
    ## Are these changes tested?
    
    Only tests
    
    ## Are there any user-facing changes?
    No, this only adds tests
---
 datafusion/sqllogictest/test_files/clickbench.slt | 936 +++++++++++++++++++++-
 1 file changed, 931 insertions(+), 5 deletions(-)

diff --git a/datafusion/sqllogictest/test_files/clickbench.slt 
b/datafusion/sqllogictest/test_files/clickbench.slt
index 42b7cfafda..10059664ad 100644
--- a/datafusion/sqllogictest/test_files/clickbench.slt
+++ b/datafusion/sqllogictest/test_files/clickbench.slt
@@ -15,10 +15,9 @@
 # specific language governing permissions and limitations
 # under the License.
 
-
-# This file contains the clickbench schema and queries
-# and the first 10 rows of data. Since ClickBench contains case sensitive 
queries
-# this is also a good test of that usecase too
+## Notes: This file contains the ClickBench schema and queries and the first 10
+## rows of data. Since ClickBench contains case sensitive identifiers (e.g.
+## "EventDate") this is also a good test of that usecase too
 
 # create.sql came from
 # 
https://github.com/ClickHouse/ClickBench/blob/8b9e3aa05ea18afa427f14909ddc678b8ef0d5e6/datafusion/create.sql
@@ -31,6 +30,8 @@ STORED AS PARQUET
 LOCATION '../core/tests/data/clickbench_hits_10.parquet';
 
 # ClickBench encodes EventDate as UInt16 days since epoch.
+# So we define this view to convert it to the correct DATE type (this is done
+# in the ClickBench runner as well, see 
https://github.com/ClickHouse/ClickBench/pull/803
 statement ok
 CREATE VIEW hits AS
 SELECT * EXCEPT ("EventDate"),
@@ -38,6 +39,7 @@ SELECT * EXCEPT ("EventDate"),
 FROM hits_raw;
 
 # Verify EventDate transformation from UInt16 to DATE
+
 query D
 SELECT "EventDate" FROM hits LIMIT 1;
 ----
@@ -52,45 +54,203 @@ SELECT "EventDate" FROM hits_raw LIMIT 1;
 # queries.sql came from
 # 
https://github.com/ClickHouse/ClickBench/blob/8b9e3aa05ea18afa427f14909ddc678b8ef0d5e6/datafusion/queries.sql
 
+## Q0
+query TT
+EXPLAIN SELECT COUNT(*) FROM hits;
+----
+logical_plan
+01)Projection: count(Int64(1)) AS count(*)
+02)--Aggregate: groupBy=[[]], aggr=[[count(Int64(1))]]
+03)----SubqueryAlias: hits
+04)------TableScan: hits_raw projection=[]
+physical_plan
+01)ProjectionExec: expr=[10 as count(*)]
+02)--PlaceholderRowExec
+
 query I
 SELECT COUNT(*) FROM hits;
 ----
 10
 
+## Q1
+query TT
+EXPLAIN SELECT COUNT(*) FROM hits WHERE "AdvEngineID" <> 0;
+----
+logical_plan
+01)Projection: count(Int64(1)) AS count(*)
+02)--Aggregate: groupBy=[[]], aggr=[[count(Int64(1))]]
+03)----SubqueryAlias: hits
+04)------Projection:
+05)--------Filter: hits_raw.AdvEngineID != Int16(0)
+06)----------TableScan: hits_raw projection=[AdvEngineID], 
partial_filters=[hits_raw.AdvEngineID != Int16(0)]
+physical_plan
+01)ProjectionExec: expr=[count(Int64(1))@0 as count(*)]
+02)--AggregateExec: mode=Final, gby=[], aggr=[count(Int64(1))]
+03)----CoalescePartitionsExec
+04)------AggregateExec: mode=Partial, gby=[], aggr=[count(Int64(1))]
+05)--------FilterExec: AdvEngineID@0 != 0, projection=[]
+06)----------RepartitionExec: partitioning=RoundRobinBatch(4), 
input_partitions=1
+07)------------DataSourceExec: file_groups={1 group: 
[[WORKSPACE_ROOT/datafusion/core/tests/data/clickbench_hits_10.parquet]]}, 
projection=[AdvEngineID], file_type=parquet, predicate=AdvEngineID@40 != 0, 
pruning_predicate=AdvEngineID_null_count@2 != row_count@3 AND 
(AdvEngineID_min@0 != 0 OR 0 != AdvEngineID_max@1), 
required_guarantees=[AdvEngineID not in (0)]
+
 query I
 SELECT COUNT(*) FROM hits WHERE "AdvEngineID" <> 0;
 ----
 0
 
+query TT
+EXPLAIN SELECT SUM("AdvEngineID"), COUNT(*), AVG("ResolutionWidth") FROM hits;
+----
+logical_plan
+01)Projection: sum(hits.AdvEngineID), count(Int64(1)) AS count(*), 
avg(hits.ResolutionWidth)
+02)--Aggregate: groupBy=[[]], aggr=[[sum(CAST(hits.AdvEngineID AS Int64)), 
count(Int64(1)), avg(CAST(hits.ResolutionWidth AS Float64))]]
+03)----SubqueryAlias: hits
+04)------TableScan: hits_raw projection=[ResolutionWidth, AdvEngineID]
+physical_plan
+01)ProjectionExec: expr=[sum(hits.AdvEngineID)@0 as sum(hits.AdvEngineID), 
count(Int64(1))@1 as count(*), avg(hits.ResolutionWidth)@2 as 
avg(hits.ResolutionWidth)]
+02)--AggregateExec: mode=Single, gby=[], aggr=[sum(hits.AdvEngineID), 
count(Int64(1)), avg(hits.ResolutionWidth)]
+03)----DataSourceExec: file_groups={1 group: 
[[WORKSPACE_ROOT/datafusion/core/tests/data/clickbench_hits_10.parquet]]}, 
projection=[ResolutionWidth, AdvEngineID], file_type=parquet
+
 query IIR
 SELECT SUM("AdvEngineID"), COUNT(*), AVG("ResolutionWidth") FROM hits;
 ----
 0 10 0
 
+## Q3
+query TT
+EXPLAIN SELECT AVG("UserID") FROM hits;
+----
+logical_plan
+01)Aggregate: groupBy=[[]], aggr=[[avg(CAST(hits.UserID AS Float64))]]
+02)--SubqueryAlias: hits
+03)----TableScan: hits_raw projection=[UserID]
+physical_plan
+01)AggregateExec: mode=Single, gby=[], aggr=[avg(hits.UserID)]
+02)--DataSourceExec: file_groups={1 group: 
[[WORKSPACE_ROOT/datafusion/core/tests/data/clickbench_hits_10.parquet]]}, 
projection=[UserID], file_type=parquet
+
 query R
 SELECT AVG("UserID") FROM hits;
 ----
 -304548765855551740
 
+## Q4
+query TT
+EXPLAIN SELECT COUNT(DISTINCT "UserID") FROM hits;
+----
+logical_plan
+01)Projection: count(alias1) AS count(DISTINCT hits.UserID)
+02)--Aggregate: groupBy=[[]], aggr=[[count(alias1)]]
+03)----Aggregate: groupBy=[[hits.UserID AS alias1]], aggr=[[]]
+04)------SubqueryAlias: hits
+05)--------TableScan: hits_raw projection=[UserID]
+physical_plan
+01)ProjectionExec: expr=[count(alias1)@0 as count(DISTINCT hits.UserID)]
+02)--AggregateExec: mode=Final, gby=[], aggr=[count(alias1)]
+03)----CoalescePartitionsExec
+04)------AggregateExec: mode=Partial, gby=[], aggr=[count(alias1)]
+05)--------AggregateExec: mode=FinalPartitioned, gby=[alias1@0 as alias1], 
aggr=[]
+06)----------RepartitionExec: partitioning=Hash([alias1@0], 4), 
input_partitions=1
+07)------------AggregateExec: mode=Partial, gby=[UserID@0 as alias1], aggr=[]
+08)--------------DataSourceExec: file_groups={1 group: 
[[WORKSPACE_ROOT/datafusion/core/tests/data/clickbench_hits_10.parquet]]}, 
projection=[UserID], file_type=parquet
+
 query I
 SELECT COUNT(DISTINCT "UserID") FROM hits;
 ----
 5
 
+## Q5
+query TT
+EXPLAIN SELECT COUNT(DISTINCT "SearchPhrase") FROM hits;
+----
+logical_plan
+01)Projection: count(alias1) AS count(DISTINCT hits.SearchPhrase)
+02)--Aggregate: groupBy=[[]], aggr=[[count(alias1)]]
+03)----Aggregate: groupBy=[[hits.SearchPhrase AS alias1]], aggr=[[]]
+04)------SubqueryAlias: hits
+05)--------TableScan: hits_raw projection=[SearchPhrase]
+physical_plan
+01)ProjectionExec: expr=[count(alias1)@0 as count(DISTINCT hits.SearchPhrase)]
+02)--AggregateExec: mode=Final, gby=[], aggr=[count(alias1)]
+03)----CoalescePartitionsExec
+04)------AggregateExec: mode=Partial, gby=[], aggr=[count(alias1)]
+05)--------AggregateExec: mode=FinalPartitioned, gby=[alias1@0 as alias1], 
aggr=[]
+06)----------RepartitionExec: partitioning=Hash([alias1@0], 4), 
input_partitions=1
+07)------------AggregateExec: mode=Partial, gby=[SearchPhrase@0 as alias1], 
aggr=[]
+08)--------------DataSourceExec: file_groups={1 group: 
[[WORKSPACE_ROOT/datafusion/core/tests/data/clickbench_hits_10.parquet]]}, 
projection=[SearchPhrase], file_type=parquet
+
 query I
 SELECT COUNT(DISTINCT "SearchPhrase") FROM hits;
 ----
 1
 
+## Q6
+query TT
+EXPLAIN SELECT MIN("EventDate"), MAX("EventDate") FROM hits;
+----
+logical_plan
+01)Aggregate: groupBy=[[]], aggr=[[min(hits.EventDate), max(hits.EventDate)]]
+02)--SubqueryAlias: hits
+03)----Projection: CAST(CAST(hits_raw.EventDate AS Int32) AS Date32) AS 
EventDate
+04)------TableScan: hits_raw projection=[EventDate]
+physical_plan
+01)AggregateExec: mode=Single, gby=[], aggr=[min(hits.EventDate), 
max(hits.EventDate)]
+02)--DataSourceExec: file_groups={1 group: 
[[WORKSPACE_ROOT/datafusion/core/tests/data/clickbench_hits_10.parquet]]}, 
projection=[CAST(CAST(EventDate@5 AS Int32) AS Date32) as EventDate], 
file_type=parquet
+
 query DD
 SELECT MIN("EventDate"), MAX("EventDate") FROM hits;
 ----
 2013-07-15 2013-07-15
 
+## Q7
+query TT
+EXPLAIN SELECT "AdvEngineID", COUNT(*) FROM hits WHERE "AdvEngineID" <> 0 
GROUP BY "AdvEngineID" ORDER BY COUNT(*) DESC;
+----
+logical_plan
+01)Projection: hits.AdvEngineID, count(*)
+02)--Sort: count(Int64(1)) AS count(*) AS count(*) DESC NULLS FIRST
+03)----Projection: hits.AdvEngineID, count(Int64(1)) AS count(*), 
count(Int64(1))
+04)------Aggregate: groupBy=[[hits.AdvEngineID]], aggr=[[count(Int64(1))]]
+05)--------SubqueryAlias: hits
+06)----------Filter: hits_raw.AdvEngineID != Int16(0)
+07)------------TableScan: hits_raw projection=[AdvEngineID], 
partial_filters=[hits_raw.AdvEngineID != Int16(0)]
+physical_plan
+01)ProjectionExec: expr=[AdvEngineID@0 as AdvEngineID, count(*)@1 as count(*)]
+02)--SortPreservingMergeExec: [count(Int64(1))@2 DESC]
+03)----SortExec: expr=[count(*)@1 DESC], preserve_partitioning=[true]
+04)------ProjectionExec: expr=[AdvEngineID@0 as AdvEngineID, count(Int64(1))@1 
as count(*), count(Int64(1))@1 as count(Int64(1))]
+05)--------AggregateExec: mode=FinalPartitioned, gby=[AdvEngineID@0 as 
AdvEngineID], aggr=[count(Int64(1))]
+06)----------RepartitionExec: partitioning=Hash([AdvEngineID@0], 4), 
input_partitions=4
+07)------------AggregateExec: mode=Partial, gby=[AdvEngineID@0 as 
AdvEngineID], aggr=[count(Int64(1))]
+08)--------------FilterExec: AdvEngineID@0 != 0
+09)----------------RepartitionExec: partitioning=RoundRobinBatch(4), 
input_partitions=1
+10)------------------DataSourceExec: file_groups={1 group: 
[[WORKSPACE_ROOT/datafusion/core/tests/data/clickbench_hits_10.parquet]]}, 
projection=[AdvEngineID], file_type=parquet, predicate=AdvEngineID@40 != 0, 
pruning_predicate=AdvEngineID_null_count@2 != row_count@3 AND 
(AdvEngineID_min@0 != 0 OR 0 != AdvEngineID_max@1), 
required_guarantees=[AdvEngineID not in (0)]
+
 query II
 SELECT "AdvEngineID", COUNT(*) FROM hits WHERE "AdvEngineID" <> 0 GROUP BY 
"AdvEngineID" ORDER BY COUNT(*) DESC;
 ----
 
+## Q8
+query TT
+EXPLAIN SELECT "RegionID", COUNT(DISTINCT "UserID") AS u FROM hits GROUP BY 
"RegionID" ORDER BY u DESC LIMIT 10;
+----
+logical_plan
+01)Sort: u DESC NULLS FIRST, fetch=10
+02)--Projection: hits.RegionID, count(alias1) AS u
+03)----Aggregate: groupBy=[[hits.RegionID]], aggr=[[count(alias1)]]
+04)------Aggregate: groupBy=[[hits.RegionID, hits.UserID AS alias1]], aggr=[[]]
+05)--------SubqueryAlias: hits
+06)----------TableScan: hits_raw projection=[RegionID, UserID]
+physical_plan
+01)SortPreservingMergeExec: [u@1 DESC], fetch=10
+02)--SortExec: TopK(fetch=10), expr=[u@1 DESC], preserve_partitioning=[true]
+03)----ProjectionExec: expr=[RegionID@0 as RegionID, count(alias1)@1 as u]
+04)------AggregateExec: mode=FinalPartitioned, gby=[RegionID@0 as RegionID], 
aggr=[count(alias1)]
+05)--------RepartitionExec: partitioning=Hash([RegionID@0], 4), 
input_partitions=4
+06)----------AggregateExec: mode=Partial, gby=[RegionID@0 as RegionID], 
aggr=[count(alias1)]
+07)------------AggregateExec: mode=FinalPartitioned, gby=[RegionID@0 as 
RegionID, alias1@1 as alias1], aggr=[]
+08)--------------RepartitionExec: partitioning=Hash([RegionID@0, alias1@1], 
4), input_partitions=1
+09)----------------AggregateExec: mode=Partial, gby=[RegionID@0 as RegionID, 
UserID@1 as alias1], aggr=[]
+10)------------------DataSourceExec: file_groups={1 group: 
[[WORKSPACE_ROOT/datafusion/core/tests/data/clickbench_hits_10.parquet]]}, 
projection=[RegionID, UserID], file_type=parquet
+
 query II rowsort
 SELECT "RegionID", COUNT(DISTINCT "UserID") AS u FROM hits GROUP BY "RegionID" 
ORDER BY u DESC LIMIT 10;
 ----
@@ -99,6 +259,25 @@ SELECT "RegionID", COUNT(DISTINCT "UserID") AS u FROM hits 
GROUP BY "RegionID" O
 39 1
 839 2
 
+## Q9
+query TT
+EXPLAIN SELECT "RegionID", SUM("AdvEngineID"), COUNT(*) AS c, 
AVG("ResolutionWidth"), COUNT(DISTINCT "UserID") FROM hits GROUP BY "RegionID" 
ORDER BY c DESC LIMIT 10;
+----
+logical_plan
+01)Sort: c DESC NULLS FIRST, fetch=10
+02)--Projection: hits.RegionID, sum(hits.AdvEngineID), count(Int64(1)) AS 
count(*) AS c, avg(hits.ResolutionWidth), count(DISTINCT hits.UserID)
+03)----Aggregate: groupBy=[[hits.RegionID]], aggr=[[sum(CAST(hits.AdvEngineID 
AS Int64)), count(Int64(1)), avg(CAST(hits.ResolutionWidth AS Float64)), 
count(DISTINCT hits.UserID)]]
+04)------SubqueryAlias: hits
+05)--------TableScan: hits_raw projection=[RegionID, UserID, ResolutionWidth, 
AdvEngineID]
+physical_plan
+01)SortPreservingMergeExec: [c@2 DESC], fetch=10
+02)--SortExec: TopK(fetch=10), expr=[c@2 DESC], preserve_partitioning=[true]
+03)----ProjectionExec: expr=[RegionID@0 as RegionID, sum(hits.AdvEngineID)@1 
as sum(hits.AdvEngineID), count(Int64(1))@2 as c, avg(hits.ResolutionWidth)@3 
as avg(hits.ResolutionWidth), count(DISTINCT hits.UserID)@4 as count(DISTINCT 
hits.UserID)]
+04)------AggregateExec: mode=FinalPartitioned, gby=[RegionID@0 as RegionID], 
aggr=[sum(hits.AdvEngineID), count(Int64(1)), avg(hits.ResolutionWidth), 
count(DISTINCT hits.UserID)]
+05)--------RepartitionExec: partitioning=Hash([RegionID@0], 4), 
input_partitions=1
+06)----------AggregateExec: mode=Partial, gby=[RegionID@0 as RegionID], 
aggr=[sum(hits.AdvEngineID), count(Int64(1)), avg(hits.ResolutionWidth), 
count(DISTINCT hits.UserID)]
+07)------------DataSourceExec: file_groups={1 group: 
[[WORKSPACE_ROOT/datafusion/core/tests/data/clickbench_hits_10.parquet]]}, 
projection=[RegionID, UserID, ResolutionWidth, AdvEngineID], file_type=parquet
+
 query IIIRI rowsort
 SELECT "RegionID", SUM("AdvEngineID"), COUNT(*) AS c, AVG("ResolutionWidth"), 
COUNT(DISTINCT "UserID") FROM hits GROUP BY "RegionID" ORDER BY c DESC LIMIT 10;
 ----
@@ -107,26 +286,169 @@ SELECT "RegionID", SUM("AdvEngineID"), COUNT(*) AS c, 
AVG("ResolutionWidth"), CO
 39 0 1 0 1
 839 0 6 0 2
 
+## Q10
+query TT
+EXPLAIN SELECT "MobilePhoneModel", COUNT(DISTINCT "UserID") AS u FROM hits 
WHERE "MobilePhoneModel" <> '' GROUP BY "MobilePhoneModel" ORDER BY u DESC 
LIMIT 10;
+----
+logical_plan
+01)Sort: u DESC NULLS FIRST, fetch=10
+02)--Projection: hits.MobilePhoneModel, count(alias1) AS u
+03)----Aggregate: groupBy=[[hits.MobilePhoneModel]], aggr=[[count(alias1)]]
+04)------Aggregate: groupBy=[[hits.MobilePhoneModel, hits.UserID AS alias1]], 
aggr=[[]]
+05)--------SubqueryAlias: hits
+06)----------Filter: hits_raw.MobilePhoneModel != Utf8View("")
+07)------------TableScan: hits_raw projection=[UserID, MobilePhoneModel], 
partial_filters=[hits_raw.MobilePhoneModel != Utf8View("")]
+physical_plan
+01)SortPreservingMergeExec: [u@1 DESC], fetch=10
+02)--SortExec: TopK(fetch=10), expr=[u@1 DESC], preserve_partitioning=[true]
+03)----ProjectionExec: expr=[MobilePhoneModel@0 as MobilePhoneModel, 
count(alias1)@1 as u]
+04)------AggregateExec: mode=FinalPartitioned, gby=[MobilePhoneModel@0 as 
MobilePhoneModel], aggr=[count(alias1)]
+05)--------RepartitionExec: partitioning=Hash([MobilePhoneModel@0], 4), 
input_partitions=4
+06)----------AggregateExec: mode=Partial, gby=[MobilePhoneModel@0 as 
MobilePhoneModel], aggr=[count(alias1)]
+07)------------AggregateExec: mode=FinalPartitioned, gby=[MobilePhoneModel@0 
as MobilePhoneModel, alias1@1 as alias1], aggr=[]
+08)--------------RepartitionExec: partitioning=Hash([MobilePhoneModel@0, 
alias1@1], 4), input_partitions=4
+09)----------------AggregateExec: mode=Partial, gby=[MobilePhoneModel@1 as 
MobilePhoneModel, UserID@0 as alias1], aggr=[]
+10)------------------FilterExec: MobilePhoneModel@1 != 
+11)--------------------RepartitionExec: partitioning=RoundRobinBatch(4), 
input_partitions=1
+12)----------------------DataSourceExec: file_groups={1 group: 
[[WORKSPACE_ROOT/datafusion/core/tests/data/clickbench_hits_10.parquet]]}, 
projection=[UserID, MobilePhoneModel], file_type=parquet, 
predicate=MobilePhoneModel@34 != , 
pruning_predicate=MobilePhoneModel_null_count@2 != row_count@3 AND 
(MobilePhoneModel_min@0 !=  OR  != MobilePhoneModel_max@1), 
required_guarantees=[MobilePhoneModel not in ()]
+
 query TI
 SELECT "MobilePhoneModel", COUNT(DISTINCT "UserID") AS u FROM hits WHERE 
"MobilePhoneModel" <> '' GROUP BY "MobilePhoneModel" ORDER BY u DESC LIMIT 10;
 ----
 
+## Q11
+query TT
+EXPLAIN SELECT "MobilePhone", "MobilePhoneModel", COUNT(DISTINCT "UserID") AS 
u FROM hits WHERE "MobilePhoneModel" <> '' GROUP BY "MobilePhone", 
"MobilePhoneModel" ORDER BY u DESC LIMIT 10;
+----
+logical_plan
+01)Sort: u DESC NULLS FIRST, fetch=10
+02)--Projection: hits.MobilePhone, hits.MobilePhoneModel, count(alias1) AS u
+03)----Aggregate: groupBy=[[hits.MobilePhone, hits.MobilePhoneModel]], 
aggr=[[count(alias1)]]
+04)------Aggregate: groupBy=[[hits.MobilePhone, hits.MobilePhoneModel, 
hits.UserID AS alias1]], aggr=[[]]
+05)--------SubqueryAlias: hits
+06)----------Filter: hits_raw.MobilePhoneModel != Utf8View("")
+07)------------TableScan: hits_raw projection=[UserID, MobilePhone, 
MobilePhoneModel], partial_filters=[hits_raw.MobilePhoneModel != Utf8View("")]
+physical_plan
+01)SortPreservingMergeExec: [u@2 DESC], fetch=10
+02)--SortExec: TopK(fetch=10), expr=[u@2 DESC], preserve_partitioning=[true]
+03)----ProjectionExec: expr=[MobilePhone@0 as MobilePhone, MobilePhoneModel@1 
as MobilePhoneModel, count(alias1)@2 as u]
+04)------AggregateExec: mode=FinalPartitioned, gby=[MobilePhone@0 as 
MobilePhone, MobilePhoneModel@1 as MobilePhoneModel], aggr=[count(alias1)]
+05)--------RepartitionExec: partitioning=Hash([MobilePhone@0, 
MobilePhoneModel@1], 4), input_partitions=4
+06)----------AggregateExec: mode=Partial, gby=[MobilePhone@0 as MobilePhone, 
MobilePhoneModel@1 as MobilePhoneModel], aggr=[count(alias1)]
+07)------------AggregateExec: mode=FinalPartitioned, gby=[MobilePhone@0 as 
MobilePhone, MobilePhoneModel@1 as MobilePhoneModel, alias1@2 as alias1], 
aggr=[]
+08)--------------RepartitionExec: partitioning=Hash([MobilePhone@0, 
MobilePhoneModel@1, alias1@2], 4), input_partitions=4
+09)----------------AggregateExec: mode=Partial, gby=[MobilePhone@1 as 
MobilePhone, MobilePhoneModel@2 as MobilePhoneModel, UserID@0 as alias1], 
aggr=[]
+10)------------------FilterExec: MobilePhoneModel@2 != 
+11)--------------------RepartitionExec: partitioning=RoundRobinBatch(4), 
input_partitions=1
+12)----------------------DataSourceExec: file_groups={1 group: 
[[WORKSPACE_ROOT/datafusion/core/tests/data/clickbench_hits_10.parquet]]}, 
projection=[UserID, MobilePhone, MobilePhoneModel], file_type=parquet, 
predicate=MobilePhoneModel@34 != , 
pruning_predicate=MobilePhoneModel_null_count@2 != row_count@3 AND 
(MobilePhoneModel_min@0 !=  OR  != MobilePhoneModel_max@1), 
required_guarantees=[MobilePhoneModel not in ()]
+
 query ITI
 SELECT "MobilePhone", "MobilePhoneModel", COUNT(DISTINCT "UserID") AS u FROM 
hits WHERE "MobilePhoneModel" <> '' GROUP BY "MobilePhone", "MobilePhoneModel" 
ORDER BY u DESC LIMIT 10;
 ----
 
+## Q12
+query TT
+EXPLAIN SELECT "SearchPhrase", COUNT(*) AS c FROM hits WHERE "SearchPhrase" <> 
'' GROUP BY "SearchPhrase" ORDER BY c DESC LIMIT 10;
+----
+logical_plan
+01)Sort: c DESC NULLS FIRST, fetch=10
+02)--Projection: hits.SearchPhrase, count(Int64(1)) AS count(*) AS c
+03)----Aggregate: groupBy=[[hits.SearchPhrase]], aggr=[[count(Int64(1))]]
+04)------SubqueryAlias: hits
+05)--------Filter: hits_raw.SearchPhrase != Utf8View("")
+06)----------TableScan: hits_raw projection=[SearchPhrase], 
partial_filters=[hits_raw.SearchPhrase != Utf8View("")]
+physical_plan
+01)SortPreservingMergeExec: [c@1 DESC], fetch=10
+02)--SortExec: TopK(fetch=10), expr=[c@1 DESC], preserve_partitioning=[true]
+03)----ProjectionExec: expr=[SearchPhrase@0 as SearchPhrase, count(Int64(1))@1 
as c]
+04)------AggregateExec: mode=FinalPartitioned, gby=[SearchPhrase@0 as 
SearchPhrase], aggr=[count(Int64(1))]
+05)--------RepartitionExec: partitioning=Hash([SearchPhrase@0], 4), 
input_partitions=4
+06)----------AggregateExec: mode=Partial, gby=[SearchPhrase@0 as 
SearchPhrase], aggr=[count(Int64(1))]
+07)------------FilterExec: SearchPhrase@0 != 
+08)--------------RepartitionExec: partitioning=RoundRobinBatch(4), 
input_partitions=1
+09)----------------DataSourceExec: file_groups={1 group: 
[[WORKSPACE_ROOT/datafusion/core/tests/data/clickbench_hits_10.parquet]]}, 
projection=[SearchPhrase], file_type=parquet, predicate=SearchPhrase@39 != , 
pruning_predicate=SearchPhrase_null_count@2 != row_count@3 AND 
(SearchPhrase_min@0 !=  OR  != SearchPhrase_max@1), 
required_guarantees=[SearchPhrase not in ()]
+
 query TI
 SELECT "SearchPhrase", COUNT(*) AS c FROM hits WHERE "SearchPhrase" <> '' 
GROUP BY "SearchPhrase" ORDER BY c DESC LIMIT 10;
 ----
 
+## Q13
+query TT
+EXPLAIN SELECT "SearchPhrase", COUNT(DISTINCT "UserID") AS u FROM hits WHERE 
"SearchPhrase" <> '' GROUP BY "SearchPhrase" ORDER BY u DESC LIMIT 10;
+----
+logical_plan
+01)Sort: u DESC NULLS FIRST, fetch=10
+02)--Projection: hits.SearchPhrase, count(alias1) AS u
+03)----Aggregate: groupBy=[[hits.SearchPhrase]], aggr=[[count(alias1)]]
+04)------Aggregate: groupBy=[[hits.SearchPhrase, hits.UserID AS alias1]], 
aggr=[[]]
+05)--------SubqueryAlias: hits
+06)----------Filter: hits_raw.SearchPhrase != Utf8View("")
+07)------------TableScan: hits_raw projection=[UserID, SearchPhrase], 
partial_filters=[hits_raw.SearchPhrase != Utf8View("")]
+physical_plan
+01)SortPreservingMergeExec: [u@1 DESC], fetch=10
+02)--SortExec: TopK(fetch=10), expr=[u@1 DESC], preserve_partitioning=[true]
+03)----ProjectionExec: expr=[SearchPhrase@0 as SearchPhrase, count(alias1)@1 
as u]
+04)------AggregateExec: mode=FinalPartitioned, gby=[SearchPhrase@0 as 
SearchPhrase], aggr=[count(alias1)]
+05)--------RepartitionExec: partitioning=Hash([SearchPhrase@0], 4), 
input_partitions=4
+06)----------AggregateExec: mode=Partial, gby=[SearchPhrase@0 as 
SearchPhrase], aggr=[count(alias1)]
+07)------------AggregateExec: mode=FinalPartitioned, gby=[SearchPhrase@0 as 
SearchPhrase, alias1@1 as alias1], aggr=[]
+08)--------------RepartitionExec: partitioning=Hash([SearchPhrase@0, 
alias1@1], 4), input_partitions=4
+09)----------------AggregateExec: mode=Partial, gby=[SearchPhrase@1 as 
SearchPhrase, UserID@0 as alias1], aggr=[]
+10)------------------FilterExec: SearchPhrase@1 != 
+11)--------------------RepartitionExec: partitioning=RoundRobinBatch(4), 
input_partitions=1
+12)----------------------DataSourceExec: file_groups={1 group: 
[[WORKSPACE_ROOT/datafusion/core/tests/data/clickbench_hits_10.parquet]]}, 
projection=[UserID, SearchPhrase], file_type=parquet, predicate=SearchPhrase@39 
!= , pruning_predicate=SearchPhrase_null_count@2 != row_count@3 AND 
(SearchPhrase_min@0 !=  OR  != SearchPhrase_max@1), 
required_guarantees=[SearchPhrase not in ()]
+
 query TI
 SELECT "SearchPhrase", COUNT(DISTINCT "UserID") AS u FROM hits WHERE 
"SearchPhrase" <> '' GROUP BY "SearchPhrase" ORDER BY u DESC LIMIT 10;
 ----
 
+## Q14
+query TT
+EXPLAIN SELECT "SearchEngineID", "SearchPhrase", COUNT(*) AS c FROM hits WHERE 
"SearchPhrase" <> '' GROUP BY "SearchEngineID", "SearchPhrase" ORDER BY c DESC 
LIMIT 10;
+----
+logical_plan
+01)Sort: c DESC NULLS FIRST, fetch=10
+02)--Projection: hits.SearchEngineID, hits.SearchPhrase, count(Int64(1)) AS 
count(*) AS c
+03)----Aggregate: groupBy=[[hits.SearchEngineID, hits.SearchPhrase]], 
aggr=[[count(Int64(1))]]
+04)------SubqueryAlias: hits
+05)--------Filter: hits_raw.SearchPhrase != Utf8View("")
+06)----------TableScan: hits_raw projection=[SearchEngineID, SearchPhrase], 
partial_filters=[hits_raw.SearchPhrase != Utf8View("")]
+physical_plan
+01)SortPreservingMergeExec: [c@2 DESC], fetch=10
+02)--SortExec: TopK(fetch=10), expr=[c@2 DESC], preserve_partitioning=[true]
+03)----ProjectionExec: expr=[SearchEngineID@0 as SearchEngineID, 
SearchPhrase@1 as SearchPhrase, count(Int64(1))@2 as c]
+04)------AggregateExec: mode=FinalPartitioned, gby=[SearchEngineID@0 as 
SearchEngineID, SearchPhrase@1 as SearchPhrase], aggr=[count(Int64(1))]
+05)--------RepartitionExec: partitioning=Hash([SearchEngineID@0, 
SearchPhrase@1], 4), input_partitions=4
+06)----------AggregateExec: mode=Partial, gby=[SearchEngineID@0 as 
SearchEngineID, SearchPhrase@1 as SearchPhrase], aggr=[count(Int64(1))]
+07)------------FilterExec: SearchPhrase@1 != 
+08)--------------RepartitionExec: partitioning=RoundRobinBatch(4), 
input_partitions=1
+09)----------------DataSourceExec: file_groups={1 group: 
[[WORKSPACE_ROOT/datafusion/core/tests/data/clickbench_hits_10.parquet]]}, 
projection=[SearchEngineID, SearchPhrase], file_type=parquet, 
predicate=SearchPhrase@39 != , pruning_predicate=SearchPhrase_null_count@2 != 
row_count@3 AND (SearchPhrase_min@0 !=  OR  != SearchPhrase_max@1), 
required_guarantees=[SearchPhrase not in ()]
+
 query ITI
 SELECT "SearchEngineID", "SearchPhrase", COUNT(*) AS c FROM hits WHERE 
"SearchPhrase" <> '' GROUP BY "SearchEngineID", "SearchPhrase" ORDER BY c DESC 
LIMIT 10;
 ----
 
+## Q15
+query TT
+EXPLAIN SELECT "UserID", COUNT(*) FROM hits GROUP BY "UserID" ORDER BY 
COUNT(*) DESC LIMIT 10;
+----
+logical_plan
+01)Projection: hits.UserID, count(*)
+02)--Sort: count(Int64(1)) AS count(*) AS count(*) DESC NULLS FIRST, fetch=10
+03)----Projection: hits.UserID, count(Int64(1)) AS count(*), count(Int64(1))
+04)------Aggregate: groupBy=[[hits.UserID]], aggr=[[count(Int64(1))]]
+05)--------SubqueryAlias: hits
+06)----------TableScan: hits_raw projection=[UserID]
+physical_plan
+01)ProjectionExec: expr=[UserID@0 as UserID, count(*)@1 as count(*)]
+02)--SortPreservingMergeExec: [count(Int64(1))@2 DESC], fetch=10
+03)----SortExec: TopK(fetch=10), expr=[count(*)@1 DESC], 
preserve_partitioning=[true]
+04)------ProjectionExec: expr=[UserID@0 as UserID, count(Int64(1))@1 as 
count(*), count(Int64(1))@1 as count(Int64(1))]
+05)--------AggregateExec: mode=FinalPartitioned, gby=[UserID@0 as UserID], 
aggr=[count(Int64(1))]
+06)----------RepartitionExec: partitioning=Hash([UserID@0], 4), 
input_partitions=1
+07)------------AggregateExec: mode=Partial, gby=[UserID@0 as UserID], 
aggr=[count(Int64(1))]
+08)--------------DataSourceExec: file_groups={1 group: 
[[WORKSPACE_ROOT/datafusion/core/tests/data/clickbench_hits_10.parquet]]}, 
projection=[UserID], file_type=parquet
+
 query II rowsort
 SELECT "UserID", COUNT(*) FROM hits GROUP BY "UserID" ORDER BY COUNT(*) DESC 
LIMIT 10;
 ----
@@ -136,6 +458,27 @@ SELECT "UserID", COUNT(*) FROM hits GROUP BY "UserID" 
ORDER BY COUNT(*) DESC LIM
 519640690937130534 2
 7418527520126366595 1
 
+## Q16
+query TT
+EXPLAIN SELECT "UserID", "SearchPhrase", COUNT(*) FROM hits GROUP BY "UserID", 
"SearchPhrase" ORDER BY COUNT(*) DESC LIMIT 10;
+----
+logical_plan
+01)Projection: hits.UserID, hits.SearchPhrase, count(*)
+02)--Sort: count(Int64(1)) AS count(*) AS count(*) DESC NULLS FIRST, fetch=10
+03)----Projection: hits.UserID, hits.SearchPhrase, count(Int64(1)) AS 
count(*), count(Int64(1))
+04)------Aggregate: groupBy=[[hits.UserID, hits.SearchPhrase]], 
aggr=[[count(Int64(1))]]
+05)--------SubqueryAlias: hits
+06)----------TableScan: hits_raw projection=[UserID, SearchPhrase]
+physical_plan
+01)ProjectionExec: expr=[UserID@0 as UserID, SearchPhrase@1 as SearchPhrase, 
count(*)@2 as count(*)]
+02)--SortPreservingMergeExec: [count(Int64(1))@3 DESC], fetch=10
+03)----SortExec: TopK(fetch=10), expr=[count(*)@2 DESC], 
preserve_partitioning=[true]
+04)------ProjectionExec: expr=[UserID@0 as UserID, SearchPhrase@1 as 
SearchPhrase, count(Int64(1))@2 as count(*), count(Int64(1))@2 as 
count(Int64(1))]
+05)--------AggregateExec: mode=FinalPartitioned, gby=[UserID@0 as UserID, 
SearchPhrase@1 as SearchPhrase], aggr=[count(Int64(1))]
+06)----------RepartitionExec: partitioning=Hash([UserID@0, SearchPhrase@1], 
4), input_partitions=1
+07)------------AggregateExec: mode=Partial, gby=[UserID@0 as UserID, 
SearchPhrase@1 as SearchPhrase], aggr=[count(Int64(1))]
+08)--------------DataSourceExec: file_groups={1 group: 
[[WORKSPACE_ROOT/datafusion/core/tests/data/clickbench_hits_10.parquet]]}, 
projection=[UserID, SearchPhrase], file_type=parquet
+
 query ITI rowsort
 SELECT "UserID", "SearchPhrase", COUNT(*) FROM hits GROUP BY "UserID", 
"SearchPhrase" ORDER BY COUNT(*) DESC LIMIT 10;
 ----
@@ -145,6 +488,24 @@ SELECT "UserID", "SearchPhrase", COUNT(*) FROM hits GROUP 
BY "UserID", "SearchPh
 519640690937130534 (empty) 2
 7418527520126366595 (empty) 1
 
+## Q17
+query TT
+EXPLAIN SELECT "UserID", "SearchPhrase", COUNT(*) FROM hits GROUP BY "UserID", 
"SearchPhrase" LIMIT 10;
+----
+logical_plan
+01)Projection: hits.UserID, hits.SearchPhrase, count(Int64(1)) AS count(*)
+02)--Limit: skip=0, fetch=10
+03)----Aggregate: groupBy=[[hits.UserID, hits.SearchPhrase]], 
aggr=[[count(Int64(1))]]
+04)------SubqueryAlias: hits
+05)--------TableScan: hits_raw projection=[UserID, SearchPhrase]
+physical_plan
+01)ProjectionExec: expr=[UserID@0 as UserID, SearchPhrase@1 as SearchPhrase, 
count(Int64(1))@2 as count(*)]
+02)--CoalescePartitionsExec: fetch=10
+03)----AggregateExec: mode=FinalPartitioned, gby=[UserID@0 as UserID, 
SearchPhrase@1 as SearchPhrase], aggr=[count(Int64(1))]
+04)------RepartitionExec: partitioning=Hash([UserID@0, SearchPhrase@1], 4), 
input_partitions=1
+05)--------AggregateExec: mode=Partial, gby=[UserID@0 as UserID, 
SearchPhrase@1 as SearchPhrase], aggr=[count(Int64(1))]
+06)----------DataSourceExec: file_groups={1 group: 
[[WORKSPACE_ROOT/datafusion/core/tests/data/clickbench_hits_10.parquet]]}, 
projection=[UserID, SearchPhrase], file_type=parquet
+
 query ITI rowsort
 SELECT "UserID", "SearchPhrase", COUNT(*) FROM hits GROUP BY "UserID", 
"SearchPhrase" LIMIT 10;
 ----
@@ -154,6 +515,27 @@ SELECT "UserID", "SearchPhrase", COUNT(*) FROM hits GROUP 
BY "UserID", "SearchPh
 519640690937130534 (empty) 2
 7418527520126366595 (empty) 1
 
+## Q18
+query TT
+EXPLAIN SELECT "UserID", extract(minute FROM 
to_timestamp_seconds("EventTime")) AS m, "SearchPhrase", COUNT(*) FROM hits 
GROUP BY "UserID", m, "SearchPhrase" ORDER BY COUNT(*) DESC LIMIT 10;
+----
+logical_plan
+01)Projection: hits.UserID, m, hits.SearchPhrase, count(*)
+02)--Sort: count(Int64(1)) AS count(*) AS count(*) DESC NULLS FIRST, fetch=10
+03)----Projection: hits.UserID, 
date_part(Utf8("MINUTE"),to_timestamp_seconds(hits.EventTime)) AS m, 
hits.SearchPhrase, count(Int64(1)) AS count(*), count(Int64(1))
+04)------Aggregate: groupBy=[[hits.UserID, date_part(Utf8("MINUTE"), 
to_timestamp_seconds(hits.EventTime)), hits.SearchPhrase]], 
aggr=[[count(Int64(1))]]
+05)--------SubqueryAlias: hits
+06)----------TableScan: hits_raw projection=[EventTime, UserID, SearchPhrase]
+physical_plan
+01)ProjectionExec: expr=[UserID@0 as UserID, m@1 as m, SearchPhrase@2 as 
SearchPhrase, count(*)@3 as count(*)]
+02)--SortPreservingMergeExec: [count(Int64(1))@4 DESC], fetch=10
+03)----SortExec: TopK(fetch=10), expr=[count(*)@3 DESC], 
preserve_partitioning=[true]
+04)------ProjectionExec: expr=[UserID@0 as UserID, 
date_part(Utf8("MINUTE"),to_timestamp_seconds(hits.EventTime))@1 as m, 
SearchPhrase@2 as SearchPhrase, count(Int64(1))@3 as count(*), 
count(Int64(1))@3 as count(Int64(1))]
+05)--------AggregateExec: mode=FinalPartitioned, gby=[UserID@0 as UserID, 
date_part(Utf8("MINUTE"),to_timestamp_seconds(hits.EventTime))@1 as 
date_part(Utf8("MINUTE"),to_timestamp_seconds(hits.EventTime)), SearchPhrase@2 
as SearchPhrase], aggr=[count(Int64(1))]
+06)----------RepartitionExec: partitioning=Hash([UserID@0, 
date_part(Utf8("MINUTE"),to_timestamp_seconds(hits.EventTime))@1, 
SearchPhrase@2], 4), input_partitions=1
+07)------------AggregateExec: mode=Partial, gby=[UserID@1 as UserID, 
date_part(MINUTE, to_timestamp_seconds(EventTime@0)) as 
date_part(Utf8("MINUTE"),to_timestamp_seconds(hits.EventTime)), SearchPhrase@2 
as SearchPhrase], aggr=[count(Int64(1))]
+08)--------------DataSourceExec: file_groups={1 group: 
[[WORKSPACE_ROOT/datafusion/core/tests/data/clickbench_hits_10.parquet]]}, 
projection=[EventTime, UserID, SearchPhrase], file_type=parquet
+
 query IITI rowsort
 SELECT "UserID", extract(minute FROM to_timestamp_seconds("EventTime")) AS m, 
"SearchPhrase", COUNT(*) FROM hits GROUP BY "UserID", m, "SearchPhrase" ORDER 
BY COUNT(*) DESC LIMIT 10;
 ----
@@ -168,61 +550,337 @@ SELECT "UserID", extract(minute FROM 
to_timestamp_seconds("EventTime")) AS m, "S
 519640690937130534 36 (empty) 1
 7418527520126366595 18 (empty) 1
 
+## Q19
+query TT
+EXPLAIN SELECT "UserID" FROM hits WHERE "UserID" = 435090932899640449;
+----
+logical_plan
+01)SubqueryAlias: hits
+02)--Filter: hits_raw.UserID = Int64(435090932899640449)
+03)----TableScan: hits_raw projection=[UserID], 
partial_filters=[hits_raw.UserID = Int64(435090932899640449)]
+physical_plan
+01)FilterExec: UserID@0 = 435090932899640449
+02)--RepartitionExec: partitioning=RoundRobinBatch(4), input_partitions=1
+03)----DataSourceExec: file_groups={1 group: 
[[WORKSPACE_ROOT/datafusion/core/tests/data/clickbench_hits_10.parquet]]}, 
projection=[UserID], file_type=parquet, predicate=UserID@9 = 
435090932899640449, pruning_predicate=UserID_null_count@2 != row_count@3 AND 
UserID_min@0 <= 435090932899640449 AND 435090932899640449 <= UserID_max@1, 
required_guarantees=[UserID in (435090932899640449)]
+
 query I
 SELECT "UserID" FROM hits WHERE "UserID" = 435090932899640449;
 ----
 
+## Q20
+query TT
+EXPLAIN SELECT COUNT(*) FROM hits WHERE "URL" LIKE '%google%';
+----
+logical_plan
+01)Projection: count(Int64(1)) AS count(*)
+02)--Aggregate: groupBy=[[]], aggr=[[count(Int64(1))]]
+03)----SubqueryAlias: hits
+04)------Projection:
+05)--------Filter: hits_raw.URL LIKE Utf8View("%google%")
+06)----------TableScan: hits_raw projection=[URL], 
partial_filters=[hits_raw.URL LIKE Utf8View("%google%")]
+physical_plan
+01)ProjectionExec: expr=[count(Int64(1))@0 as count(*)]
+02)--AggregateExec: mode=Final, gby=[], aggr=[count(Int64(1))]
+03)----CoalescePartitionsExec
+04)------AggregateExec: mode=Partial, gby=[], aggr=[count(Int64(1))]
+05)--------FilterExec: URL@0 LIKE %google%, projection=[]
+06)----------RepartitionExec: partitioning=RoundRobinBatch(4), 
input_partitions=1
+07)------------DataSourceExec: file_groups={1 group: 
[[WORKSPACE_ROOT/datafusion/core/tests/data/clickbench_hits_10.parquet]]}, 
projection=[URL], file_type=parquet, predicate=URL@13 LIKE %google%
+
 query I
 SELECT COUNT(*) FROM hits WHERE "URL" LIKE '%google%';
 ----
 0
 
+## Q21
+query TT
+EXPLAIN SELECT "SearchPhrase", MIN("URL"), COUNT(*) AS c FROM hits WHERE "URL" 
LIKE '%google%' AND "SearchPhrase" <> '' GROUP BY "SearchPhrase" ORDER BY c 
DESC LIMIT 10;
+----
+logical_plan
+01)Sort: c DESC NULLS FIRST, fetch=10
+02)--Projection: hits.SearchPhrase, min(hits.URL), count(Int64(1)) AS count(*) 
AS c
+03)----Aggregate: groupBy=[[hits.SearchPhrase]], aggr=[[min(hits.URL), 
count(Int64(1))]]
+04)------SubqueryAlias: hits
+05)--------Filter: hits_raw.URL LIKE Utf8View("%google%") AND 
hits_raw.SearchPhrase != Utf8View("")
+06)----------TableScan: hits_raw projection=[URL, SearchPhrase], 
partial_filters=[hits_raw.URL LIKE Utf8View("%google%"), hits_raw.SearchPhrase 
!= Utf8View("")]
+physical_plan
+01)SortPreservingMergeExec: [c@2 DESC], fetch=10
+02)--SortExec: TopK(fetch=10), expr=[c@2 DESC], preserve_partitioning=[true]
+03)----ProjectionExec: expr=[SearchPhrase@0 as SearchPhrase, min(hits.URL)@1 
as min(hits.URL), count(Int64(1))@2 as c]
+04)------AggregateExec: mode=FinalPartitioned, gby=[SearchPhrase@0 as 
SearchPhrase], aggr=[min(hits.URL), count(Int64(1))]
+05)--------RepartitionExec: partitioning=Hash([SearchPhrase@0], 4), 
input_partitions=4
+06)----------AggregateExec: mode=Partial, gby=[SearchPhrase@1 as 
SearchPhrase], aggr=[min(hits.URL), count(Int64(1))]
+07)------------FilterExec: URL@0 LIKE %google% AND SearchPhrase@1 != 
+08)--------------RepartitionExec: partitioning=RoundRobinBatch(4), 
input_partitions=1
+09)----------------DataSourceExec: file_groups={1 group: 
[[WORKSPACE_ROOT/datafusion/core/tests/data/clickbench_hits_10.parquet]]}, 
projection=[URL, SearchPhrase], file_type=parquet, predicate=URL@13 LIKE 
%google% AND SearchPhrase@39 != , pruning_predicate=SearchPhrase_null_count@4 
!= row_count@5 AND (SearchPhrase_min@2 !=  OR  != SearchPhrase_max@3), 
required_guarantees=[SearchPhrase not in ()]
+
 query TTI
 SELECT "SearchPhrase", MIN("URL"), COUNT(*) AS c FROM hits WHERE "URL" LIKE 
'%google%' AND "SearchPhrase" <> '' GROUP BY "SearchPhrase" ORDER BY c DESC 
LIMIT 10;
 ----
 
+## Q22
+query TT
+EXPLAIN SELECT "SearchPhrase", MIN("URL"), MIN("Title"), COUNT(*) AS c, 
COUNT(DISTINCT "UserID") FROM hits WHERE "Title" LIKE '%Google%' AND "URL" NOT 
LIKE '%.google.%' AND "SearchPhrase" <> '' GROUP BY "SearchPhrase" ORDER BY c 
DESC LIMIT 10;
+----
+logical_plan
+01)Sort: c DESC NULLS FIRST, fetch=10
+02)--Projection: hits.SearchPhrase, min(hits.URL), min(hits.Title), 
count(Int64(1)) AS count(*) AS c, count(DISTINCT hits.UserID)
+03)----Aggregate: groupBy=[[hits.SearchPhrase]], aggr=[[min(hits.URL), 
min(hits.Title), count(Int64(1)), count(DISTINCT hits.UserID)]]
+04)------SubqueryAlias: hits
+05)--------Filter: hits_raw.Title LIKE Utf8View("%Google%") AND hits_raw.URL 
NOT LIKE Utf8View("%.google.%") AND hits_raw.SearchPhrase != Utf8View("")
+06)----------TableScan: hits_raw projection=[Title, UserID, URL, 
SearchPhrase], partial_filters=[hits_raw.Title LIKE Utf8View("%Google%"), 
hits_raw.URL NOT LIKE Utf8View("%.google.%"), hits_raw.SearchPhrase != 
Utf8View("")]
+physical_plan
+01)SortPreservingMergeExec: [c@3 DESC], fetch=10
+02)--SortExec: TopK(fetch=10), expr=[c@3 DESC], preserve_partitioning=[true]
+03)----ProjectionExec: expr=[SearchPhrase@0 as SearchPhrase, min(hits.URL)@1 
as min(hits.URL), min(hits.Title)@2 as min(hits.Title), count(Int64(1))@3 as c, 
count(DISTINCT hits.UserID)@4 as count(DISTINCT hits.UserID)]
+04)------AggregateExec: mode=FinalPartitioned, gby=[SearchPhrase@0 as 
SearchPhrase], aggr=[min(hits.URL), min(hits.Title), count(Int64(1)), 
count(DISTINCT hits.UserID)]
+05)--------RepartitionExec: partitioning=Hash([SearchPhrase@0], 4), 
input_partitions=4
+06)----------AggregateExec: mode=Partial, gby=[SearchPhrase@3 as 
SearchPhrase], aggr=[min(hits.URL), min(hits.Title), count(Int64(1)), 
count(DISTINCT hits.UserID)]
+07)------------FilterExec: Title@0 LIKE %Google% AND URL@2 NOT LIKE %.google.% 
AND SearchPhrase@3 != 
+08)--------------RepartitionExec: partitioning=RoundRobinBatch(4), 
input_partitions=1
+09)----------------DataSourceExec: file_groups={1 group: 
[[WORKSPACE_ROOT/datafusion/core/tests/data/clickbench_hits_10.parquet]]}, 
projection=[Title, UserID, URL, SearchPhrase], file_type=parquet, 
predicate=Title@2 LIKE %Google% AND URL@13 NOT LIKE %.google.% AND 
SearchPhrase@39 != , pruning_predicate=SearchPhrase_null_count@6 != row_count@7 
AND (SearchPhrase_min@4 !=  OR  != SearchPhrase_max@5), 
required_guarantees=[SearchPhrase not in ()]
+
 query TTTII
 SELECT "SearchPhrase", MIN("URL"), MIN("Title"), COUNT(*) AS c, COUNT(DISTINCT 
"UserID") FROM hits WHERE "Title" LIKE '%Google%' AND "URL" NOT LIKE 
'%.google.%' AND "SearchPhrase" <> '' GROUP BY "SearchPhrase" ORDER BY c DESC 
LIMIT 10;
 ----
 
-query 
IITIIIIIIIIITTIIIIIIIIIITIIITIIIITTIIITIIIIIIIIIITIIIIITIIIIIITIIIIIIIIIITTTTIIIIIIIITITTITTTTTTTTTTIIIID
+## Q23
+query TT
+EXPLAIN SELECT * FROM hits WHERE "URL" LIKE '%google%' ORDER BY "EventTime" 
LIMIT 10;
+----
+logical_plan
+01)Sort: hits.EventTime ASC NULLS LAST, fetch=10
+02)--SubqueryAlias: hits
+03)----Projection: hits_raw.WatchID, hits_raw.JavaEnable, hits_raw.Title, 
hits_raw.GoodEvent, hits_raw.EventTime, hits_raw.CounterID, hits_raw.ClientIP, 
hits_raw.RegionID, hits_raw.UserID, hits_raw.CounterClass, hits_raw.OS, 
hits_raw.UserAgent, hits_raw.URL, hits_raw.Referer, hits_raw.IsRefresh, 
hits_raw.RefererCategoryID, hits_raw.RefererRegionID, hits_raw.URLCategoryID, 
hits_raw.URLRegionID, hits_raw.ResolutionWidth, hits_raw.ResolutionHeight, 
hits_raw.ResolutionDepth, hits_raw.FlashMa [...]
+04)------Filter: hits_raw.URL LIKE Utf8View("%google%")
+05)--------TableScan: hits_raw projection=[WatchID, JavaEnable, Title, 
GoodEvent, EventTime, EventDate, CounterID, ClientIP, RegionID, UserID, 
CounterClass, OS, UserAgent, URL, Referer, IsRefresh, RefererCategoryID, 
RefererRegionID, URLCategoryID, URLRegionID, ResolutionWidth, ResolutionHeight, 
ResolutionDepth, FlashMajor, FlashMinor, FlashMinor2, NetMajor, NetMinor, 
UserAgentMajor, UserAgentMinor, CookieEnable, JavascriptEnable, IsMobile, 
MobilePhone, MobilePhoneModel, Params, IPNetwork [...]
+physical_plan
+01)SortPreservingMergeExec: [EventTime@4 ASC NULLS LAST], fetch=10
+02)--SortExec: TopK(fetch=10), expr=[EventTime@4 ASC NULLS LAST], 
preserve_partitioning=[true]
+03)----ProjectionExec: expr=[WatchID@0 as WatchID, JavaEnable@1 as JavaEnable, 
Title@2 as Title, GoodEvent@3 as GoodEvent, EventTime@4 as EventTime, 
CounterID@6 as CounterID, ClientIP@7 as ClientIP, RegionID@8 as RegionID, 
UserID@9 as UserID, CounterClass@10 as CounterClass, OS@11 as OS, UserAgent@12 
as UserAgent, URL@13 as URL, Referer@14 as Referer, IsRefresh@15 as IsRefresh, 
RefererCategoryID@16 as RefererCategoryID, RefererRegionID@17 as 
RefererRegionID, URLCategoryID@18 as URLCatego [...]
+04)------FilterExec: URL@13 LIKE %google%
+05)--------RepartitionExec: partitioning=RoundRobinBatch(4), input_partitions=1
+06)----------DataSourceExec: file_groups={1 group: 
[[WORKSPACE_ROOT/datafusion/core/tests/data/clickbench_hits_10.parquet]]}, 
projection=[WatchID, JavaEnable, Title, GoodEvent, EventTime, EventDate, 
CounterID, ClientIP, RegionID, UserID, CounterClass, OS, UserAgent, URL, 
Referer, IsRefresh, RefererCategoryID, RefererRegionID, URLCategoryID, 
URLRegionID, ResolutionWidth, ResolutionHeight, ResolutionDepth, FlashMajor, 
FlashMinor, FlashMinor2, NetMajor, NetMinor, UserAgentMajor, UserAgentMi [...]
 
+query 
IITIIIIIIIIITTIIIIIIIIIITIIITIIIITTIIITIIIIIIIIIITIIIIITIIIIIITIIIIIIIIIITTTTIIIIIIIITITTITTTTTTTTTTIIIID
 SELECT * FROM hits WHERE "URL" LIKE '%google%' ORDER BY "EventTime" LIMIT 10;
 ----
 
+## Q24
+query TT
+EXPLAIN SELECT "SearchPhrase" FROM hits WHERE "SearchPhrase" <> '' ORDER BY 
"EventTime" LIMIT 10;
+----
+logical_plan
+01)Projection: hits.SearchPhrase
+02)--Sort: hits.EventTime ASC NULLS LAST, fetch=10
+03)----Projection: hits.SearchPhrase, hits.EventTime
+04)------SubqueryAlias: hits
+05)--------Filter: hits_raw.SearchPhrase != Utf8View("")
+06)----------TableScan: hits_raw projection=[EventTime, SearchPhrase], 
partial_filters=[hits_raw.SearchPhrase != Utf8View("")]
+physical_plan
+01)ProjectionExec: expr=[SearchPhrase@0 as SearchPhrase]
+02)--SortPreservingMergeExec: [EventTime@1 ASC NULLS LAST], fetch=10
+03)----SortExec: TopK(fetch=10), expr=[EventTime@1 ASC NULLS LAST], 
preserve_partitioning=[true]
+04)------ProjectionExec: expr=[SearchPhrase@1 as SearchPhrase, EventTime@0 as 
EventTime]
+05)--------FilterExec: SearchPhrase@1 != 
+06)----------RepartitionExec: partitioning=RoundRobinBatch(4), 
input_partitions=1
+07)------------DataSourceExec: file_groups={1 group: 
[[WORKSPACE_ROOT/datafusion/core/tests/data/clickbench_hits_10.parquet]]}, 
projection=[EventTime, SearchPhrase], file_type=parquet, 
predicate=SearchPhrase@39 !=  AND DynamicFilter [ empty ], 
pruning_predicate=SearchPhrase_null_count@2 != row_count@3 AND 
(SearchPhrase_min@0 !=  OR  != SearchPhrase_max@1), 
required_guarantees=[SearchPhrase not in ()]
+
 query T
 SELECT "SearchPhrase" FROM hits WHERE "SearchPhrase" <> '' ORDER BY 
"EventTime" LIMIT 10;
 ----
 
+## Q25
+query TT
+EXPLAIN SELECT "SearchPhrase" FROM hits WHERE "SearchPhrase" <> '' ORDER BY 
"SearchPhrase" LIMIT 10;
+----
+logical_plan
+01)Sort: hits.SearchPhrase ASC NULLS LAST, fetch=10
+02)--SubqueryAlias: hits
+03)----Filter: hits_raw.SearchPhrase != Utf8View("")
+04)------TableScan: hits_raw projection=[SearchPhrase], 
partial_filters=[hits_raw.SearchPhrase != Utf8View("")]
+physical_plan
+01)SortPreservingMergeExec: [SearchPhrase@0 ASC NULLS LAST], fetch=10
+02)--SortExec: TopK(fetch=10), expr=[SearchPhrase@0 ASC NULLS LAST], 
preserve_partitioning=[true]
+03)----FilterExec: SearchPhrase@0 != 
+04)------RepartitionExec: partitioning=RoundRobinBatch(4), input_partitions=1
+05)--------DataSourceExec: file_groups={1 group: 
[[WORKSPACE_ROOT/datafusion/core/tests/data/clickbench_hits_10.parquet]]}, 
projection=[SearchPhrase], file_type=parquet, predicate=SearchPhrase@39 !=  AND 
DynamicFilter [ empty ], pruning_predicate=SearchPhrase_null_count@2 != 
row_count@3 AND (SearchPhrase_min@0 !=  OR  != SearchPhrase_max@1), 
required_guarantees=[SearchPhrase not in ()]
+
 query T
 SELECT "SearchPhrase" FROM hits WHERE "SearchPhrase" <> '' ORDER BY 
"SearchPhrase" LIMIT 10;
 ----
 
+## Q26
+query TT
+EXPLAIN SELECT "SearchPhrase" FROM hits WHERE "SearchPhrase" <> '' ORDER BY 
"EventTime", "SearchPhrase" LIMIT 10;
+----
+logical_plan
+01)Projection: hits.SearchPhrase
+02)--Sort: hits.EventTime ASC NULLS LAST, hits.SearchPhrase ASC NULLS LAST, 
fetch=10
+03)----Projection: hits.SearchPhrase, hits.EventTime
+04)------SubqueryAlias: hits
+05)--------Filter: hits_raw.SearchPhrase != Utf8View("")
+06)----------TableScan: hits_raw projection=[EventTime, SearchPhrase], 
partial_filters=[hits_raw.SearchPhrase != Utf8View("")]
+physical_plan
+01)ProjectionExec: expr=[SearchPhrase@0 as SearchPhrase]
+02)--SortPreservingMergeExec: [EventTime@1 ASC NULLS LAST, SearchPhrase@0 ASC 
NULLS LAST], fetch=10
+03)----SortExec: TopK(fetch=10), expr=[EventTime@1 ASC NULLS LAST, 
SearchPhrase@0 ASC NULLS LAST], preserve_partitioning=[true]
+04)------ProjectionExec: expr=[SearchPhrase@1 as SearchPhrase, EventTime@0 as 
EventTime]
+05)--------FilterExec: SearchPhrase@1 != 
+06)----------RepartitionExec: partitioning=RoundRobinBatch(4), 
input_partitions=1
+07)------------DataSourceExec: file_groups={1 group: 
[[WORKSPACE_ROOT/datafusion/core/tests/data/clickbench_hits_10.parquet]]}, 
projection=[EventTime, SearchPhrase], file_type=parquet, 
predicate=SearchPhrase@39 !=  AND DynamicFilter [ empty ], 
pruning_predicate=SearchPhrase_null_count@2 != row_count@3 AND 
(SearchPhrase_min@0 !=  OR  != SearchPhrase_max@1), 
required_guarantees=[SearchPhrase not in ()]
+
 query T
 SELECT "SearchPhrase" FROM hits WHERE "SearchPhrase" <> '' ORDER BY 
"EventTime", "SearchPhrase" LIMIT 10;
 ----
 
+## Q27
+query TT
+EXPLAIN SELECT "CounterID", AVG(length("URL")) AS l, COUNT(*) AS c FROM hits 
WHERE "URL" <> '' GROUP BY "CounterID" HAVING COUNT(*) > 100000 ORDER BY l DESC 
LIMIT 25;
+----
+logical_plan
+01)Sort: l DESC NULLS FIRST, fetch=25
+02)--Projection: hits.CounterID, avg(length(hits.URL)) AS l, count(Int64(1)) 
AS count(*) AS c
+03)----Filter: count(Int64(1)) > Int64(100000)
+04)------Aggregate: groupBy=[[hits.CounterID]], 
aggr=[[avg(CAST(character_length(hits.URL) AS length(hits.URL) AS Float64)), 
count(Int64(1))]]
+05)--------SubqueryAlias: hits
+06)----------Filter: hits_raw.URL != Utf8View("")
+07)------------TableScan: hits_raw projection=[CounterID, URL], 
partial_filters=[hits_raw.URL != Utf8View("")]
+physical_plan
+01)SortPreservingMergeExec: [l@1 DESC], fetch=25
+02)--SortExec: TopK(fetch=25), expr=[l@1 DESC], preserve_partitioning=[true]
+03)----ProjectionExec: expr=[CounterID@0 as CounterID, avg(length(hits.URL))@1 
as l, count(Int64(1))@2 as c]
+04)------FilterExec: count(Int64(1))@2 > 100000
+05)--------AggregateExec: mode=FinalPartitioned, gby=[CounterID@0 as 
CounterID], aggr=[avg(length(hits.URL)), count(Int64(1))]
+06)----------RepartitionExec: partitioning=Hash([CounterID@0], 4), 
input_partitions=4
+07)------------AggregateExec: mode=Partial, gby=[CounterID@0 as CounterID], 
aggr=[avg(length(hits.URL)), count(Int64(1))]
+08)--------------FilterExec: URL@1 != 
+09)----------------RepartitionExec: partitioning=RoundRobinBatch(4), 
input_partitions=1
+10)------------------DataSourceExec: file_groups={1 group: 
[[WORKSPACE_ROOT/datafusion/core/tests/data/clickbench_hits_10.parquet]]}, 
projection=[CounterID, URL], file_type=parquet, predicate=URL@13 != , 
pruning_predicate=URL_null_count@2 != row_count@3 AND (URL_min@0 !=  OR  != 
URL_max@1), required_guarantees=[URL not in ()]
+
 query IRI
 SELECT "CounterID", AVG(length("URL")) AS l, COUNT(*) AS c FROM hits WHERE 
"URL" <> '' GROUP BY "CounterID" HAVING COUNT(*) > 100000 ORDER BY l DESC LIMIT 
25;
 ----
 
+## Q28
+query TT
+EXPLAIN SELECT REGEXP_REPLACE("Referer", '^https?://(?:www\.)?([^/]+)/.*$', 
'\1') AS k, AVG(length("Referer")) AS l, COUNT(*) AS c, MIN("Referer") FROM 
hits WHERE "Referer" <> '' GROUP BY k HAVING COUNT(*) > 100000 ORDER BY l DESC 
LIMIT 25;
+----
+logical_plan
+01)Sort: l DESC NULLS FIRST, fetch=25
+02)--Projection: 
regexp_replace(hits.Referer,Utf8("^https?://(?:www\.)?([^/]+)/.*$"),Utf8("\1")) 
AS k, avg(length(hits.Referer)) AS l, count(Int64(1)) AS count(*) AS c, 
min(hits.Referer)
+03)----Filter: count(Int64(1)) > Int64(100000)
+04)------Aggregate: groupBy=[[regexp_replace(hits.Referer, 
Utf8View("^https?://(?:www\.)?([^/]+)/.*$"), Utf8View("\1")) AS 
regexp_replace(hits.Referer,Utf8("^https?://(?:www\.)?([^/]+)/.*$"),Utf8("\1"))]],
 aggr=[[avg(CAST(character_length(hits.Referer) AS length(hits.Referer) AS 
Float64)), count(Int64(1)), min(hits.Referer)]]
+05)--------SubqueryAlias: hits
+06)----------Filter: hits_raw.Referer != Utf8View("")
+07)------------TableScan: hits_raw projection=[Referer], 
partial_filters=[hits_raw.Referer != Utf8View("")]
+physical_plan
+01)SortPreservingMergeExec: [l@1 DESC], fetch=25
+02)--SortExec: TopK(fetch=25), expr=[l@1 DESC], preserve_partitioning=[true]
+03)----ProjectionExec: 
expr=[regexp_replace(hits.Referer,Utf8("^https?://(?:www\.)?([^/]+)/.*$"),Utf8("\1"))@0
 as k, avg(length(hits.Referer))@1 as l, count(Int64(1))@2 as c, 
min(hits.Referer)@3 as min(hits.Referer)]
+04)------FilterExec: count(Int64(1))@2 > 100000
+05)--------AggregateExec: mode=FinalPartitioned, 
gby=[regexp_replace(hits.Referer,Utf8("^https?://(?:www\.)?([^/]+)/.*$"),Utf8("\1"))@0
 as 
regexp_replace(hits.Referer,Utf8("^https?://(?:www\.)?([^/]+)/.*$"),Utf8("\1"))],
 aggr=[avg(length(hits.Referer)), count(Int64(1)), min(hits.Referer)]
+06)----------RepartitionExec: 
partitioning=Hash([regexp_replace(hits.Referer,Utf8("^https?://(?:www\.)?([^/]+)/.*$"),Utf8("\1"))@0],
 4), input_partitions=4
+07)------------AggregateExec: mode=Partial, gby=[regexp_replace(Referer@0, 
^https?://(?:www\.)?([^/]+)/.*$, \1) as 
regexp_replace(hits.Referer,Utf8("^https?://(?:www\.)?([^/]+)/.*$"),Utf8("\1"))],
 aggr=[avg(length(hits.Referer)), count(Int64(1)), min(hits.Referer)]
+08)--------------FilterExec: Referer@0 != 
+09)----------------RepartitionExec: partitioning=RoundRobinBatch(4), 
input_partitions=1
+10)------------------DataSourceExec: file_groups={1 group: 
[[WORKSPACE_ROOT/datafusion/core/tests/data/clickbench_hits_10.parquet]]}, 
projection=[Referer], file_type=parquet, predicate=Referer@14 != , 
pruning_predicate=Referer_null_count@2 != row_count@3 AND (Referer_min@0 !=  OR 
 != Referer_max@1), required_guarantees=[Referer not in ()]
+
 query TRIT
 SELECT REGEXP_REPLACE("Referer", '^https?://(?:www\.)?([^/]+)/.*$', '\1') AS 
k, AVG(length("Referer")) AS l, COUNT(*) AS c, MIN("Referer") FROM hits WHERE 
"Referer" <> '' GROUP BY k HAVING COUNT(*) > 100000 ORDER BY l DESC LIMIT 25;
 ----
 
+## Q29
+query TT
+EXPLAIN SELECT SUM("ResolutionWidth"), SUM("ResolutionWidth" + 1), 
SUM("ResolutionWidth" + 2), SUM("ResolutionWidth" + 3), SUM("ResolutionWidth" + 
4), SUM("ResolutionWidth" + 5), SUM("ResolutionWidth" + 6), 
SUM("ResolutionWidth" + 7), SUM("ResolutionWidth" + 8), SUM("ResolutionWidth" + 
9), SUM("ResolutionWidth" + 10), SUM("ResolutionWidth" + 11), 
SUM("ResolutionWidth" + 12), SUM("ResolutionWidth" + 13), SUM("ResolutionWidth" 
+ 14), SUM("ResolutionWidth" + 15), SUM("ResolutionWidth" + 16) [...]
+----
+logical_plan
+01)Aggregate: groupBy=[[]], aggr=[[sum(__common_expr_1 AS 
hits.ResolutionWidth), sum(__common_expr_1 AS hits.ResolutionWidth + Int64(1)), 
sum(__common_expr_1 AS hits.ResolutionWidth + Int64(2)), sum(__common_expr_1 AS 
hits.ResolutionWidth + Int64(3)), sum(__common_expr_1 AS hits.ResolutionWidth + 
Int64(4)), sum(__common_expr_1 AS hits.ResolutionWidth + Int64(5)), 
sum(__common_expr_1 AS hits.ResolutionWidth + Int64(6)), sum(__common_expr_1 AS 
hits.ResolutionWidth + Int64(7)), sum(__common [...]
+02)--Projection: CAST(hits.ResolutionWidth AS Int64) AS __common_expr_1
+03)----SubqueryAlias: hits
+04)------TableScan: hits_raw projection=[ResolutionWidth]
+physical_plan
+01)AggregateExec: mode=Single, gby=[], aggr=[sum(hits.ResolutionWidth), 
sum(hits.ResolutionWidth + Int64(1)), sum(hits.ResolutionWidth + Int64(2)), 
sum(hits.ResolutionWidth + Int64(3)), sum(hits.ResolutionWidth + Int64(4)), 
sum(hits.ResolutionWidth + Int64(5)), sum(hits.ResolutionWidth + Int64(6)), 
sum(hits.ResolutionWidth + Int64(7)), sum(hits.ResolutionWidth + Int64(8)), 
sum(hits.ResolutionWidth + Int64(9)), sum(hits.ResolutionWidth + Int64(10)), 
sum(hits.ResolutionWidth + Int64(11)),  [...]
+02)--DataSourceExec: file_groups={1 group: 
[[WORKSPACE_ROOT/datafusion/core/tests/data/clickbench_hits_10.parquet]]}, 
projection=[CAST(ResolutionWidth@20 AS Int64) as __common_expr_1], 
file_type=parquet
+
 query 
IIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIII
 SELECT SUM("ResolutionWidth"), SUM("ResolutionWidth" + 1), 
SUM("ResolutionWidth" + 2), SUM("ResolutionWidth" + 3), SUM("ResolutionWidth" + 
4), SUM("ResolutionWidth" + 5), SUM("ResolutionWidth" + 6), 
SUM("ResolutionWidth" + 7), SUM("ResolutionWidth" + 8), SUM("ResolutionWidth" + 
9), SUM("ResolutionWidth" + 10), SUM("ResolutionWidth" + 11), 
SUM("ResolutionWidth" + 12), SUM("ResolutionWidth" + 13), SUM("ResolutionWidth" 
+ 14), SUM("ResolutionWidth" + 15), SUM("ResolutionWidth" + 16), SUM("R [...]
 ----
 0 10 20 30 40 50 60 70 80 90 100 110 120 130 140 150 160 170 180 190 200 210 
220 230 240 250 260 270 280 290 300 310 320 330 340 350 360 370 380 390 400 410 
420 430 440 450 460 470 480 490 500 510 520 530 540 550 560 570 580 590 600 610 
620 630 640 650 660 670 680 690 700 710 720 730 740 750 760 770 780 790 800 810 
820 830 840 850 860 870 880 890
 
+## Q30
+query TT
+EXPLAIN SELECT "SearchEngineID", "ClientIP", COUNT(*) AS c, SUM("IsRefresh"), 
AVG("ResolutionWidth") FROM hits WHERE "SearchPhrase" <> '' GROUP BY 
"SearchEngineID", "ClientIP" ORDER BY c DESC LIMIT 10;
+----
+logical_plan
+01)Sort: c DESC NULLS FIRST, fetch=10
+02)--Projection: hits.SearchEngineID, hits.ClientIP, count(Int64(1)) AS 
count(*) AS c, sum(hits.IsRefresh), avg(hits.ResolutionWidth)
+03)----Aggregate: groupBy=[[hits.SearchEngineID, hits.ClientIP]], 
aggr=[[count(Int64(1)), sum(CAST(hits.IsRefresh AS Int64)), 
avg(CAST(hits.ResolutionWidth AS Float64))]]
+04)------SubqueryAlias: hits
+05)--------Projection: hits_raw.ClientIP, hits_raw.IsRefresh, 
hits_raw.ResolutionWidth, hits_raw.SearchEngineID
+06)----------Filter: hits_raw.SearchPhrase != Utf8View("")
+07)------------TableScan: hits_raw projection=[ClientIP, IsRefresh, 
ResolutionWidth, SearchEngineID, SearchPhrase], 
partial_filters=[hits_raw.SearchPhrase != Utf8View("")]
+physical_plan
+01)SortPreservingMergeExec: [c@2 DESC], fetch=10
+02)--SortExec: TopK(fetch=10), expr=[c@2 DESC], preserve_partitioning=[true]
+03)----ProjectionExec: expr=[SearchEngineID@0 as SearchEngineID, ClientIP@1 as 
ClientIP, count(Int64(1))@2 as c, sum(hits.IsRefresh)@3 as sum(hits.IsRefresh), 
avg(hits.ResolutionWidth)@4 as avg(hits.ResolutionWidth)]
+04)------AggregateExec: mode=FinalPartitioned, gby=[SearchEngineID@0 as 
SearchEngineID, ClientIP@1 as ClientIP], aggr=[count(Int64(1)), 
sum(hits.IsRefresh), avg(hits.ResolutionWidth)]
+05)--------RepartitionExec: partitioning=Hash([SearchEngineID@0, ClientIP@1], 
4), input_partitions=4
+06)----------AggregateExec: mode=Partial, gby=[SearchEngineID@3 as 
SearchEngineID, ClientIP@0 as ClientIP], aggr=[count(Int64(1)), 
sum(hits.IsRefresh), avg(hits.ResolutionWidth)]
+07)------------FilterExec: SearchPhrase@4 != , projection=[ClientIP@0, 
IsRefresh@1, ResolutionWidth@2, SearchEngineID@3]
+08)--------------RepartitionExec: partitioning=RoundRobinBatch(4), 
input_partitions=1
+09)----------------DataSourceExec: file_groups={1 group: 
[[WORKSPACE_ROOT/datafusion/core/tests/data/clickbench_hits_10.parquet]]}, 
projection=[ClientIP, IsRefresh, ResolutionWidth, SearchEngineID, 
SearchPhrase], file_type=parquet, predicate=SearchPhrase@39 != , 
pruning_predicate=SearchPhrase_null_count@2 != row_count@3 AND 
(SearchPhrase_min@0 !=  OR  != SearchPhrase_max@1), 
required_guarantees=[SearchPhrase not in ()]
+
 query IIIIR
 SELECT "SearchEngineID", "ClientIP", COUNT(*) AS c, SUM("IsRefresh"), 
AVG("ResolutionWidth") FROM hits WHERE "SearchPhrase" <> '' GROUP BY 
"SearchEngineID", "ClientIP" ORDER BY c DESC LIMIT 10;
 ----
 
+## Q31
+query TT
+EXPLAIN SELECT "WatchID", "ClientIP", COUNT(*) AS c, SUM("IsRefresh"), 
AVG("ResolutionWidth") FROM hits WHERE "SearchPhrase" <> '' GROUP BY "WatchID", 
"ClientIP" ORDER BY c DESC LIMIT 10;
+----
+logical_plan
+01)Sort: c DESC NULLS FIRST, fetch=10
+02)--Projection: hits.WatchID, hits.ClientIP, count(Int64(1)) AS count(*) AS 
c, sum(hits.IsRefresh), avg(hits.ResolutionWidth)
+03)----Aggregate: groupBy=[[hits.WatchID, hits.ClientIP]], 
aggr=[[count(Int64(1)), sum(CAST(hits.IsRefresh AS Int64)), 
avg(CAST(hits.ResolutionWidth AS Float64))]]
+04)------SubqueryAlias: hits
+05)--------Projection: hits_raw.WatchID, hits_raw.ClientIP, 
hits_raw.IsRefresh, hits_raw.ResolutionWidth
+06)----------Filter: hits_raw.SearchPhrase != Utf8View("")
+07)------------TableScan: hits_raw projection=[WatchID, ClientIP, IsRefresh, 
ResolutionWidth, SearchPhrase], partial_filters=[hits_raw.SearchPhrase != 
Utf8View("")]
+physical_plan
+01)SortPreservingMergeExec: [c@2 DESC], fetch=10
+02)--SortExec: TopK(fetch=10), expr=[c@2 DESC], preserve_partitioning=[true]
+03)----ProjectionExec: expr=[WatchID@0 as WatchID, ClientIP@1 as ClientIP, 
count(Int64(1))@2 as c, sum(hits.IsRefresh)@3 as sum(hits.IsRefresh), 
avg(hits.ResolutionWidth)@4 as avg(hits.ResolutionWidth)]
+04)------AggregateExec: mode=FinalPartitioned, gby=[WatchID@0 as WatchID, 
ClientIP@1 as ClientIP], aggr=[count(Int64(1)), sum(hits.IsRefresh), 
avg(hits.ResolutionWidth)]
+05)--------RepartitionExec: partitioning=Hash([WatchID@0, ClientIP@1], 4), 
input_partitions=4
+06)----------AggregateExec: mode=Partial, gby=[WatchID@0 as WatchID, 
ClientIP@1 as ClientIP], aggr=[count(Int64(1)), sum(hits.IsRefresh), 
avg(hits.ResolutionWidth)]
+07)------------FilterExec: SearchPhrase@4 != , projection=[WatchID@0, 
ClientIP@1, IsRefresh@2, ResolutionWidth@3]
+08)--------------RepartitionExec: partitioning=RoundRobinBatch(4), 
input_partitions=1
+09)----------------DataSourceExec: file_groups={1 group: 
[[WORKSPACE_ROOT/datafusion/core/tests/data/clickbench_hits_10.parquet]]}, 
projection=[WatchID, ClientIP, IsRefresh, ResolutionWidth, SearchPhrase], 
file_type=parquet, predicate=SearchPhrase@39 != , 
pruning_predicate=SearchPhrase_null_count@2 != row_count@3 AND 
(SearchPhrase_min@0 !=  OR  != SearchPhrase_max@1), 
required_guarantees=[SearchPhrase not in ()]
+
 query IIIIR
 SELECT "WatchID", "ClientIP", COUNT(*) AS c, SUM("IsRefresh"), 
AVG("ResolutionWidth") FROM hits WHERE "SearchPhrase" <> '' GROUP BY "WatchID", 
"ClientIP" ORDER BY c DESC LIMIT 10;
 ----
 
+## Q32
+query TT
+EXPLAIN SELECT "WatchID", "ClientIP", COUNT(*) AS c, SUM("IsRefresh"), 
AVG("ResolutionWidth") FROM hits GROUP BY "WatchID", "ClientIP" ORDER BY c DESC 
LIMIT 10;
+----
+logical_plan
+01)Sort: c DESC NULLS FIRST, fetch=10
+02)--Projection: hits.WatchID, hits.ClientIP, count(Int64(1)) AS count(*) AS 
c, sum(hits.IsRefresh), avg(hits.ResolutionWidth)
+03)----Aggregate: groupBy=[[hits.WatchID, hits.ClientIP]], 
aggr=[[count(Int64(1)), sum(CAST(hits.IsRefresh AS Int64)), 
avg(CAST(hits.ResolutionWidth AS Float64))]]
+04)------SubqueryAlias: hits
+05)--------TableScan: hits_raw projection=[WatchID, ClientIP, IsRefresh, 
ResolutionWidth]
+physical_plan
+01)SortPreservingMergeExec: [c@2 DESC], fetch=10
+02)--SortExec: TopK(fetch=10), expr=[c@2 DESC], preserve_partitioning=[true]
+03)----ProjectionExec: expr=[WatchID@0 as WatchID, ClientIP@1 as ClientIP, 
count(Int64(1))@2 as c, sum(hits.IsRefresh)@3 as sum(hits.IsRefresh), 
avg(hits.ResolutionWidth)@4 as avg(hits.ResolutionWidth)]
+04)------AggregateExec: mode=FinalPartitioned, gby=[WatchID@0 as WatchID, 
ClientIP@1 as ClientIP], aggr=[count(Int64(1)), sum(hits.IsRefresh), 
avg(hits.ResolutionWidth)]
+05)--------RepartitionExec: partitioning=Hash([WatchID@0, ClientIP@1], 4), 
input_partitions=1
+06)----------AggregateExec: mode=Partial, gby=[WatchID@0 as WatchID, 
ClientIP@1 as ClientIP], aggr=[count(Int64(1)), sum(hits.IsRefresh), 
avg(hits.ResolutionWidth)]
+07)------------DataSourceExec: file_groups={1 group: 
[[WORKSPACE_ROOT/datafusion/core/tests/data/clickbench_hits_10.parquet]]}, 
projection=[WatchID, ClientIP, IsRefresh, ResolutionWidth], file_type=parquet
+
 query IIIIR rowsort
 SELECT "WatchID", "ClientIP", COUNT(*) AS c, SUM("IsRefresh"), 
AVG("ResolutionWidth") FROM hits GROUP BY "WatchID", "ClientIP" ORDER BY c DESC 
LIMIT 10;
 ----
@@ -237,6 +895,25 @@ SELECT "WatchID", "ClientIP", COUNT(*) AS c, 
SUM("IsRefresh"), AVG("ResolutionWi
 8924809397503602651 -1216690514 1 0 0
 9110818468285196899 -1216690514 1 0 0
 
+## Q33
+query TT
+EXPLAIN SELECT "URL", COUNT(*) AS c FROM hits GROUP BY "URL" ORDER BY c DESC 
LIMIT 10;
+----
+logical_plan
+01)Sort: c DESC NULLS FIRST, fetch=10
+02)--Projection: hits.URL, count(Int64(1)) AS count(*) AS c
+03)----Aggregate: groupBy=[[hits.URL]], aggr=[[count(Int64(1))]]
+04)------SubqueryAlias: hits
+05)--------TableScan: hits_raw projection=[URL]
+physical_plan
+01)SortPreservingMergeExec: [c@1 DESC], fetch=10
+02)--SortExec: TopK(fetch=10), expr=[c@1 DESC], preserve_partitioning=[true]
+03)----ProjectionExec: expr=[URL@0 as URL, count(Int64(1))@1 as c]
+04)------AggregateExec: mode=FinalPartitioned, gby=[URL@0 as URL], 
aggr=[count(Int64(1))]
+05)--------RepartitionExec: partitioning=Hash([URL@0], 4), input_partitions=1
+06)----------AggregateExec: mode=Partial, gby=[URL@0 as URL], 
aggr=[count(Int64(1))]
+07)------------DataSourceExec: file_groups={1 group: 
[[WORKSPACE_ROOT/datafusion/core/tests/data/clickbench_hits_10.parquet]]}, 
projection=[URL], file_type=parquet
+
 query TI rowsort
 SELECT "URL", COUNT(*) AS c FROM hits GROUP BY "URL" ORDER BY c DESC LIMIT 10;
 ----
@@ -247,6 +924,25 @@ http://bonprix.ru/index.ru/cinema/art/A00387,3797); ru)&bL 
1
 http://holodilnik.ru/russia/05jul2013&model=0 1
 http://tours/Ekategoriya%2F&sr=http://slovareniye 1
 
+## Q34
+query TT
+EXPLAIN SELECT 1, "URL", COUNT(*) AS c FROM hits GROUP BY 1, "URL" ORDER BY c 
DESC LIMIT 10;
+----
+logical_plan
+01)Sort: c DESC NULLS FIRST, fetch=10
+02)--Projection: Int64(1), hits.URL, count(Int64(1)) AS c
+03)----Aggregate: groupBy=[[hits.URL]], aggr=[[count(Int64(1))]]
+04)------SubqueryAlias: hits
+05)--------TableScan: hits_raw projection=[URL]
+physical_plan
+01)SortPreservingMergeExec: [c@2 DESC], fetch=10
+02)--SortExec: TopK(fetch=10), expr=[c@2 DESC], preserve_partitioning=[true]
+03)----ProjectionExec: expr=[1 as Int64(1), URL@0 as URL, count(Int64(1))@1 as 
c]
+04)------AggregateExec: mode=FinalPartitioned, gby=[URL@0 as URL], 
aggr=[count(Int64(1))]
+05)--------RepartitionExec: partitioning=Hash([URL@0], 4), input_partitions=1
+06)----------AggregateExec: mode=Partial, gby=[URL@0 as URL], 
aggr=[count(Int64(1))]
+07)------------DataSourceExec: file_groups={1 group: 
[[WORKSPACE_ROOT/datafusion/core/tests/data/clickbench_hits_10.parquet]]}, 
projection=[URL], file_type=parquet
+
 query ITI rowsort
 SELECT 1, "URL", COUNT(*) AS c FROM hits GROUP BY 1, "URL" ORDER BY c DESC 
LIMIT 10;
 ----
@@ -257,6 +953,26 @@ SELECT 1, "URL", COUNT(*) AS c FROM hits GROUP BY 1, "URL" 
ORDER BY c DESC LIMIT
 1 http://holodilnik.ru/russia/05jul2013&model=0 1
 1 http://tours/Ekategoriya%2F&sr=http://slovareniye 1
 
+## Q35
+query TT
+EXPLAIN SELECT "ClientIP", "ClientIP" - 1, "ClientIP" - 2, "ClientIP" - 3, 
COUNT(*) AS c FROM hits GROUP BY "ClientIP", "ClientIP" - 1, "ClientIP" - 2, 
"ClientIP" - 3 ORDER BY c DESC LIMIT 10;
+----
+logical_plan
+01)Sort: c DESC NULLS FIRST, fetch=10
+02)--Projection: hits.ClientIP, hits.ClientIP - Int64(1), hits.ClientIP - 
Int64(2), hits.ClientIP - Int64(3), count(Int64(1)) AS count(*) AS c
+03)----Aggregate: groupBy=[[hits.ClientIP, __common_expr_1 AS hits.ClientIP - 
Int64(1), __common_expr_1 AS hits.ClientIP - Int64(2), __common_expr_1 AS 
hits.ClientIP - Int64(3)]], aggr=[[count(Int64(1))]]
+04)------Projection: CAST(hits.ClientIP AS Int64) AS __common_expr_1, 
hits.ClientIP
+05)--------SubqueryAlias: hits
+06)----------TableScan: hits_raw projection=[ClientIP]
+physical_plan
+01)SortPreservingMergeExec: [c@4 DESC], fetch=10
+02)--SortExec: TopK(fetch=10), expr=[c@4 DESC], preserve_partitioning=[true]
+03)----ProjectionExec: expr=[ClientIP@0 as ClientIP, hits.ClientIP - 
Int64(1)@1 as hits.ClientIP - Int64(1), hits.ClientIP - Int64(2)@2 as 
hits.ClientIP - Int64(2), hits.ClientIP - Int64(3)@3 as hits.ClientIP - 
Int64(3), count(Int64(1))@4 as c]
+04)------AggregateExec: mode=FinalPartitioned, gby=[ClientIP@0 as ClientIP, 
hits.ClientIP - Int64(1)@1 as hits.ClientIP - Int64(1), hits.ClientIP - 
Int64(2)@2 as hits.ClientIP - Int64(2), hits.ClientIP - Int64(3)@3 as 
hits.ClientIP - Int64(3)], aggr=[count(Int64(1))]
+05)--------RepartitionExec: partitioning=Hash([ClientIP@0, hits.ClientIP - 
Int64(1)@1, hits.ClientIP - Int64(2)@2, hits.ClientIP - Int64(3)@3], 4), 
input_partitions=1
+06)----------AggregateExec: mode=Partial, gby=[ClientIP@1 as ClientIP, 
__common_expr_1@0 - 1 as hits.ClientIP - Int64(1), __common_expr_1@0 - 2 as 
hits.ClientIP - Int64(2), __common_expr_1@0 - 3 as hits.ClientIP - Int64(3)], 
aggr=[count(Int64(1))]
+07)------------DataSourceExec: file_groups={1 group: 
[[WORKSPACE_ROOT/datafusion/core/tests/data/clickbench_hits_10.parquet]]}, 
projection=[CAST(ClientIP@7 AS Int64) as __common_expr_1, ClientIP], 
file_type=parquet
+
 query IIIII rowsort
 SELECT "ClientIP", "ClientIP" - 1, "ClientIP" - 2, "ClientIP" - 3, COUNT(*) AS 
c FROM hits GROUP BY "ClientIP", "ClientIP" - 1, "ClientIP" - 2, "ClientIP" - 3 
ORDER BY c DESC LIMIT 10;
 ----
@@ -265,46 +981,256 @@ SELECT "ClientIP", "ClientIP" - 1, "ClientIP" - 2, 
"ClientIP" - 3, COUNT(*) AS c
 1568366281 1568366280 1568366279 1568366278 2
 1615432634 1615432633 1615432632 1615432631 1
 
+## Q36
+query TT
+EXPLAIN SELECT "URL", COUNT(*) AS PageViews FROM hits WHERE "CounterID" = 62 
AND "EventDate" >= '2013-07-01' AND "EventDate" <= '2013-07-31' AND 
"DontCountHits" = 0 AND "IsRefresh" = 0 AND "URL" <> '' GROUP BY "URL" ORDER BY 
PageViews DESC LIMIT 10;
+----
+logical_plan
+01)Sort: pageviews DESC NULLS FIRST, fetch=10
+02)--Projection: hits.URL, count(Int64(1)) AS count(*) AS pageviews
+03)----Aggregate: groupBy=[[hits.URL]], aggr=[[count(Int64(1))]]
+04)------SubqueryAlias: hits
+05)--------Projection: hits_raw.URL
+06)----------Filter: hits_raw.CounterID = Int32(62) AND 
CAST(CAST(hits_raw.EventDate AS Int32) AS Date32) >= Date32("2013-07-01") AND 
CAST(CAST(hits_raw.EventDate AS Int32) AS Date32) <= Date32("2013-07-31") AND 
hits_raw.DontCountHits = Int16(0) AND hits_raw.IsRefresh = Int16(0) AND 
hits_raw.URL != Utf8View("")
+07)------------TableScan: hits_raw projection=[EventDate, CounterID, URL, 
IsRefresh, DontCountHits], partial_filters=[hits_raw.CounterID = Int32(62), 
CAST(CAST(hits_raw.EventDate AS Int32) AS Date32) >= Date32("2013-07-01"), 
CAST(CAST(hits_raw.EventDate AS Int32) AS Date32) <= Date32("2013-07-31"), 
hits_raw.DontCountHits = Int16(0), hits_raw.IsRefresh = Int16(0), hits_raw.URL 
!= Utf8View("")]
+physical_plan
+01)SortPreservingMergeExec: [pageviews@1 DESC], fetch=10
+02)--SortExec: TopK(fetch=10), expr=[pageviews@1 DESC], 
preserve_partitioning=[true]
+03)----ProjectionExec: expr=[URL@0 as URL, count(Int64(1))@1 as pageviews]
+04)------AggregateExec: mode=FinalPartitioned, gby=[URL@0 as URL], 
aggr=[count(Int64(1))]
+05)--------RepartitionExec: partitioning=Hash([URL@0], 4), input_partitions=4
+06)----------AggregateExec: mode=Partial, gby=[URL@0 as URL], 
aggr=[count(Int64(1))]
+07)------------FilterExec: CounterID@1 = 62 AND CAST(CAST(EventDate@0 AS 
Int32) AS Date32) >= 2013-07-01 AND CAST(CAST(EventDate@0 AS Int32) AS Date32) 
<= 2013-07-31 AND DontCountHits@4 = 0 AND IsRefresh@3 = 0 AND URL@2 != , 
projection=[URL@2]
+08)--------------RepartitionExec: partitioning=RoundRobinBatch(4), 
input_partitions=1
+09)----------------DataSourceExec: file_groups={1 group: 
[[WORKSPACE_ROOT/datafusion/core/tests/data/clickbench_hits_10.parquet]]}, 
projection=[EventDate, CounterID, URL, IsRefresh, DontCountHits], 
file_type=parquet, predicate=CounterID@6 = 62 AND CAST(CAST(EventDate@5 AS 
Int32) AS Date32) >= 2013-07-01 AND CAST(CAST(EventDate@5 AS Int32) AS Date32) 
<= 2013-07-31 AND DontCountHits@61 = 0 AND IsRefresh@15 = 0 AND URL@13 != , 
pruning_predicate=CounterID_null_count@2 != row_count@3 AND Coun [...]
+
 query TI
 SELECT "URL", COUNT(*) AS PageViews FROM hits WHERE "CounterID" = 62 AND 
"EventDate" >= '2013-07-01' AND "EventDate" <= '2013-07-31' AND "DontCountHits" 
= 0 AND "IsRefresh" = 0 AND "URL" <> '' GROUP BY "URL" ORDER BY PageViews DESC 
LIMIT 10;
 ----
 
+## Q37
+query TT
+EXPLAIN SELECT "Title", COUNT(*) AS PageViews FROM hits WHERE "CounterID" = 62 
AND "EventDate" >= '2013-07-01' AND "EventDate" <= '2013-07-31' AND 
"DontCountHits" = 0 AND "IsRefresh" = 0 AND "Title" <> '' GROUP BY "Title" 
ORDER BY PageViews DESC LIMIT 10;
+----
+logical_plan
+01)Sort: pageviews DESC NULLS FIRST, fetch=10
+02)--Projection: hits.Title, count(Int64(1)) AS count(*) AS pageviews
+03)----Aggregate: groupBy=[[hits.Title]], aggr=[[count(Int64(1))]]
+04)------SubqueryAlias: hits
+05)--------Projection: hits_raw.Title
+06)----------Filter: hits_raw.CounterID = Int32(62) AND 
CAST(CAST(hits_raw.EventDate AS Int32) AS Date32) >= Date32("2013-07-01") AND 
CAST(CAST(hits_raw.EventDate AS Int32) AS Date32) <= Date32("2013-07-31") AND 
hits_raw.DontCountHits = Int16(0) AND hits_raw.IsRefresh = Int16(0) AND 
hits_raw.Title != Utf8View("")
+07)------------TableScan: hits_raw projection=[Title, EventDate, CounterID, 
IsRefresh, DontCountHits], partial_filters=[hits_raw.CounterID = Int32(62), 
CAST(CAST(hits_raw.EventDate AS Int32) AS Date32) >= Date32("2013-07-01"), 
CAST(CAST(hits_raw.EventDate AS Int32) AS Date32) <= Date32("2013-07-31"), 
hits_raw.DontCountHits = Int16(0), hits_raw.IsRefresh = Int16(0), 
hits_raw.Title != Utf8View("")]
+physical_plan
+01)SortPreservingMergeExec: [pageviews@1 DESC], fetch=10
+02)--SortExec: TopK(fetch=10), expr=[pageviews@1 DESC], 
preserve_partitioning=[true]
+03)----ProjectionExec: expr=[Title@0 as Title, count(Int64(1))@1 as pageviews]
+04)------AggregateExec: mode=FinalPartitioned, gby=[Title@0 as Title], 
aggr=[count(Int64(1))]
+05)--------RepartitionExec: partitioning=Hash([Title@0], 4), input_partitions=4
+06)----------AggregateExec: mode=Partial, gby=[Title@0 as Title], 
aggr=[count(Int64(1))]
+07)------------FilterExec: CounterID@2 = 62 AND CAST(CAST(EventDate@1 AS 
Int32) AS Date32) >= 2013-07-01 AND CAST(CAST(EventDate@1 AS Int32) AS Date32) 
<= 2013-07-31 AND DontCountHits@4 = 0 AND IsRefresh@3 = 0 AND Title@0 != , 
projection=[Title@0]
+08)--------------RepartitionExec: partitioning=RoundRobinBatch(4), 
input_partitions=1
+09)----------------DataSourceExec: file_groups={1 group: 
[[WORKSPACE_ROOT/datafusion/core/tests/data/clickbench_hits_10.parquet]]}, 
projection=[Title, EventDate, CounterID, IsRefresh, DontCountHits], 
file_type=parquet, predicate=CounterID@6 = 62 AND CAST(CAST(EventDate@5 AS 
Int32) AS Date32) >= 2013-07-01 AND CAST(CAST(EventDate@5 AS Int32) AS Date32) 
<= 2013-07-31 AND DontCountHits@61 = 0 AND IsRefresh@15 = 0 AND Title@2 != , 
pruning_predicate=CounterID_null_count@2 != row_count@3 AND C [...]
+
 query TI
 SELECT "Title", COUNT(*) AS PageViews FROM hits WHERE "CounterID" = 62 AND 
"EventDate" >= '2013-07-01' AND "EventDate" <= '2013-07-31' AND "DontCountHits" 
= 0 AND "IsRefresh" = 0 AND "Title" <> '' GROUP BY "Title" ORDER BY PageViews 
DESC LIMIT 10;
 ----
 
+## Q38
+query TT
+EXPLAIN SELECT "URL", COUNT(*) AS PageViews FROM hits WHERE "CounterID" = 62 
AND "EventDate" >= '2013-07-01' AND "EventDate" <= '2013-07-31' AND "IsRefresh" 
= 0 AND "IsLink" <> 0 AND "IsDownload" = 0 GROUP BY "URL" ORDER BY PageViews 
DESC LIMIT 10 OFFSET 1000;
+----
+logical_plan
+01)Limit: skip=1000, fetch=10
+02)--Sort: pageviews DESC NULLS FIRST, fetch=1010
+03)----Projection: hits.URL, count(Int64(1)) AS count(*) AS pageviews
+04)------Aggregate: groupBy=[[hits.URL]], aggr=[[count(Int64(1))]]
+05)--------SubqueryAlias: hits
+06)----------Projection: hits_raw.URL
+07)------------Filter: hits_raw.CounterID = Int32(62) AND 
CAST(CAST(hits_raw.EventDate AS Int32) AS Date32) >= Date32("2013-07-01") AND 
CAST(CAST(hits_raw.EventDate AS Int32) AS Date32) <= Date32("2013-07-31") AND 
hits_raw.IsRefresh = Int16(0) AND hits_raw.IsLink != Int16(0) AND 
hits_raw.IsDownload = Int16(0)
+08)--------------TableScan: hits_raw projection=[EventDate, CounterID, URL, 
IsRefresh, IsLink, IsDownload], partial_filters=[hits_raw.CounterID = 
Int32(62), CAST(CAST(hits_raw.EventDate AS Int32) AS Date32) >= 
Date32("2013-07-01"), CAST(CAST(hits_raw.EventDate AS Int32) AS Date32) <= 
Date32("2013-07-31"), hits_raw.IsRefresh = Int16(0), hits_raw.IsLink != 
Int16(0), hits_raw.IsDownload = Int16(0)]
+physical_plan
+01)GlobalLimitExec: skip=1000, fetch=10
+02)--SortPreservingMergeExec: [pageviews@1 DESC], fetch=1010
+03)----SortExec: TopK(fetch=1010), expr=[pageviews@1 DESC], 
preserve_partitioning=[true]
+04)------ProjectionExec: expr=[URL@0 as URL, count(Int64(1))@1 as pageviews]
+05)--------AggregateExec: mode=FinalPartitioned, gby=[URL@0 as URL], 
aggr=[count(Int64(1))]
+06)----------RepartitionExec: partitioning=Hash([URL@0], 4), input_partitions=4
+07)------------AggregateExec: mode=Partial, gby=[URL@0 as URL], 
aggr=[count(Int64(1))]
+08)--------------FilterExec: CounterID@1 = 62 AND CAST(CAST(EventDate@0 AS 
Int32) AS Date32) >= 2013-07-01 AND CAST(CAST(EventDate@0 AS Int32) AS Date32) 
<= 2013-07-31 AND IsRefresh@3 = 0 AND IsLink@4 != 0 AND IsDownload@5 = 0, 
projection=[URL@2]
+09)----------------RepartitionExec: partitioning=RoundRobinBatch(4), 
input_partitions=1
+10)------------------DataSourceExec: file_groups={1 group: 
[[WORKSPACE_ROOT/datafusion/core/tests/data/clickbench_hits_10.parquet]]}, 
projection=[EventDate, CounterID, URL, IsRefresh, IsLink, IsDownload], 
file_type=parquet, predicate=CounterID@6 = 62 AND CAST(CAST(EventDate@5 AS 
Int32) AS Date32) >= 2013-07-01 AND CAST(CAST(EventDate@5 AS Int32) AS Date32) 
<= 2013-07-31 AND IsRefresh@15 = 0 AND IsLink@52 != 0 AND IsDownload@53 = 0, 
pruning_predicate=CounterID_null_count@2 != row_count@3  [...]
+
 query TI
 SELECT "URL", COUNT(*) AS PageViews FROM hits WHERE "CounterID" = 62 AND 
"EventDate" >= '2013-07-01' AND "EventDate" <= '2013-07-31' AND "IsRefresh" = 0 
AND "IsLink" <> 0 AND "IsDownload" = 0 GROUP BY "URL" ORDER BY PageViews DESC 
LIMIT 10 OFFSET 1000;
 ----
 
+## Q39
+query TT
+EXPLAIN SELECT "TraficSourceID", "SearchEngineID", "AdvEngineID", CASE WHEN 
("SearchEngineID" = 0 AND "AdvEngineID" = 0) THEN "Referer" ELSE '' END AS Src, 
"URL" AS Dst, COUNT(*) AS PageViews FROM hits WHERE "CounterID" = 62 AND 
"EventDate" >= '2013-07-01' AND "EventDate" <= '2013-07-31' AND "IsRefresh" = 0 
GROUP BY "TraficSourceID", "SearchEngineID", "AdvEngineID", Src, Dst ORDER BY 
PageViews DESC LIMIT 10 OFFSET 1000;
+----
+logical_plan
+01)Limit: skip=1000, fetch=10
+02)--Sort: pageviews DESC NULLS FIRST, fetch=1010
+03)----Projection: hits.TraficSourceID, hits.SearchEngineID, hits.AdvEngineID, 
CASE WHEN hits.SearchEngineID = Int64(0) AND hits.AdvEngineID = Int64(0) THEN 
hits.Referer ELSE Utf8("") END AS src, hits.URL AS dst, count(Int64(1)) AS 
count(*) AS pageviews
+04)------Aggregate: groupBy=[[hits.TraficSourceID, hits.SearchEngineID, 
hits.AdvEngineID, CASE WHEN hits.SearchEngineID = Int16(0) AND hits.AdvEngineID 
= Int16(0) THEN hits.Referer ELSE Utf8View("") END AS CASE WHEN 
hits.SearchEngineID = Int64(0) AND hits.AdvEngineID = Int64(0) THEN 
hits.Referer ELSE Utf8("") END, hits.URL]], aggr=[[count(Int64(1))]]
+05)--------SubqueryAlias: hits
+06)----------Projection: hits_raw.URL, hits_raw.Referer, 
hits_raw.TraficSourceID, hits_raw.SearchEngineID, hits_raw.AdvEngineID
+07)------------Filter: hits_raw.CounterID = Int32(62) AND 
CAST(CAST(hits_raw.EventDate AS Int32) AS Date32) >= Date32("2013-07-01") AND 
CAST(CAST(hits_raw.EventDate AS Int32) AS Date32) <= Date32("2013-07-31") AND 
hits_raw.IsRefresh = Int16(0)
+08)--------------TableScan: hits_raw projection=[EventDate, CounterID, URL, 
Referer, IsRefresh, TraficSourceID, SearchEngineID, AdvEngineID], 
partial_filters=[hits_raw.CounterID = Int32(62), CAST(CAST(hits_raw.EventDate 
AS Int32) AS Date32) >= Date32("2013-07-01"), CAST(CAST(hits_raw.EventDate AS 
Int32) AS Date32) <= Date32("2013-07-31"), hits_raw.IsRefresh = Int16(0)]
+physical_plan
+01)GlobalLimitExec: skip=1000, fetch=10
+02)--SortPreservingMergeExec: [pageviews@5 DESC], fetch=1010
+03)----SortExec: TopK(fetch=1010), expr=[pageviews@5 DESC], 
preserve_partitioning=[true]
+04)------ProjectionExec: expr=[TraficSourceID@0 as TraficSourceID, 
SearchEngineID@1 as SearchEngineID, AdvEngineID@2 as AdvEngineID, CASE WHEN 
hits.SearchEngineID = Int64(0) AND hits.AdvEngineID = Int64(0) THEN 
hits.Referer ELSE Utf8("") END@3 as src, URL@4 as dst, count(Int64(1))@5 as 
pageviews]
+05)--------AggregateExec: mode=FinalPartitioned, gby=[TraficSourceID@0 as 
TraficSourceID, SearchEngineID@1 as SearchEngineID, AdvEngineID@2 as 
AdvEngineID, CASE WHEN hits.SearchEngineID = Int64(0) AND hits.AdvEngineID = 
Int64(0) THEN hits.Referer ELSE Utf8("") END@3 as CASE WHEN hits.SearchEngineID 
= Int64(0) AND hits.AdvEngineID = Int64(0) THEN hits.Referer ELSE Utf8("") END, 
URL@4 as URL], aggr=[count(Int64(1))]
+06)----------RepartitionExec: partitioning=Hash([TraficSourceID@0, 
SearchEngineID@1, AdvEngineID@2, CASE WHEN hits.SearchEngineID = Int64(0) AND 
hits.AdvEngineID = Int64(0) THEN hits.Referer ELSE Utf8("") END@3, URL@4], 4), 
input_partitions=4
+07)------------AggregateExec: mode=Partial, gby=[TraficSourceID@2 as 
TraficSourceID, SearchEngineID@3 as SearchEngineID, AdvEngineID@4 as 
AdvEngineID, CASE WHEN SearchEngineID@3 = 0 AND AdvEngineID@4 = 0 THEN 
Referer@1 ELSE  END as CASE WHEN hits.SearchEngineID = Int64(0) AND 
hits.AdvEngineID = Int64(0) THEN hits.Referer ELSE Utf8("") END, URL@0 as URL], 
aggr=[count(Int64(1))]
+08)--------------FilterExec: CounterID@1 = 62 AND CAST(CAST(EventDate@0 AS 
Int32) AS Date32) >= 2013-07-01 AND CAST(CAST(EventDate@0 AS Int32) AS Date32) 
<= 2013-07-31 AND IsRefresh@4 = 0, projection=[URL@2, Referer@3, 
TraficSourceID@5, SearchEngineID@6, AdvEngineID@7]
+09)----------------RepartitionExec: partitioning=RoundRobinBatch(4), 
input_partitions=1
+10)------------------DataSourceExec: file_groups={1 group: 
[[WORKSPACE_ROOT/datafusion/core/tests/data/clickbench_hits_10.parquet]]}, 
projection=[EventDate, CounterID, URL, Referer, IsRefresh, TraficSourceID, 
SearchEngineID, AdvEngineID], file_type=parquet, predicate=CounterID@6 = 62 AND 
CAST(CAST(EventDate@5 AS Int32) AS Date32) >= 2013-07-01 AND 
CAST(CAST(EventDate@5 AS Int32) AS Date32) <= 2013-07-31 AND IsRefresh@15 = 0, 
pruning_predicate=CounterID_null_count@2 != row_count@3 AND Cou [...]
+
 query IIITTI
 SELECT "TraficSourceID", "SearchEngineID", "AdvEngineID", CASE WHEN 
("SearchEngineID" = 0 AND "AdvEngineID" = 0) THEN "Referer" ELSE '' END AS Src, 
"URL" AS Dst, COUNT(*) AS PageViews FROM hits WHERE "CounterID" = 62 AND 
"EventDate" >= '2013-07-01' AND "EventDate" <= '2013-07-31' AND "IsRefresh" = 0 
GROUP BY "TraficSourceID", "SearchEngineID", "AdvEngineID", Src, Dst ORDER BY 
PageViews DESC LIMIT 10 OFFSET 1000;
 ----
 
+## Q40
+query TT
+EXPLAIN SELECT "URLHash", "EventDate", COUNT(*) AS PageViews FROM hits WHERE 
"CounterID" = 62 AND "EventDate" >= '2013-07-01' AND "EventDate" <= 
'2013-07-31' AND "IsRefresh" = 0 AND "TraficSourceID" IN (-1, 6) AND 
"RefererHash" = 3594120000172545465 GROUP BY "URLHash", "EventDate" ORDER BY 
PageViews DESC LIMIT 10 OFFSET 100;
+----
+logical_plan
+01)Limit: skip=100, fetch=10
+02)--Sort: pageviews DESC NULLS FIRST, fetch=110
+03)----Projection: hits.URLHash, hits.EventDate, count(Int64(1)) AS count(*) 
AS pageviews
+04)------Aggregate: groupBy=[[hits.URLHash, hits.EventDate]], 
aggr=[[count(Int64(1))]]
+05)--------SubqueryAlias: hits
+06)----------Projection: hits_raw.URLHash, CAST(CAST(hits_raw.EventDate AS 
Int32) AS Date32) AS EventDate
+07)------------Filter: hits_raw.CounterID = Int32(62) AND 
CAST(CAST(hits_raw.EventDate AS Int32) AS Date32) >= Date32("2013-07-01") AND 
CAST(CAST(hits_raw.EventDate AS Int32) AS Date32) <= Date32("2013-07-31") AND 
hits_raw.IsRefresh = Int16(0) AND (hits_raw.TraficSourceID = Int16(-1) OR 
hits_raw.TraficSourceID = Int16(6)) AND hits_raw.RefererHash = 
Int64(3594120000172545465)
+08)--------------TableScan: hits_raw projection=[EventDate, CounterID, 
IsRefresh, TraficSourceID, RefererHash, URLHash], 
partial_filters=[hits_raw.CounterID = Int32(62), CAST(CAST(hits_raw.EventDate 
AS Int32) AS Date32) >= Date32("2013-07-01"), CAST(CAST(hits_raw.EventDate AS 
Int32) AS Date32) <= Date32("2013-07-31"), hits_raw.IsRefresh = Int16(0), 
hits_raw.TraficSourceID = Int16(-1) OR hits_raw.TraficSourceID = Int16(6), 
hits_raw.RefererHash = Int64(3594120000172545465)]
+physical_plan
+01)GlobalLimitExec: skip=100, fetch=10
+02)--SortPreservingMergeExec: [pageviews@2 DESC], fetch=110
+03)----SortExec: TopK(fetch=110), expr=[pageviews@2 DESC], 
preserve_partitioning=[true]
+04)------ProjectionExec: expr=[URLHash@0 as URLHash, EventDate@1 as EventDate, 
count(Int64(1))@2 as pageviews]
+05)--------AggregateExec: mode=FinalPartitioned, gby=[URLHash@0 as URLHash, 
EventDate@1 as EventDate], aggr=[count(Int64(1))]
+06)----------RepartitionExec: partitioning=Hash([URLHash@0, EventDate@1], 4), 
input_partitions=4
+07)------------AggregateExec: mode=Partial, gby=[URLHash@0 as URLHash, 
EventDate@1 as EventDate], aggr=[count(Int64(1))]
+08)--------------ProjectionExec: expr=[URLHash@1 as URLHash, 
CAST(CAST(EventDate@0 AS Int32) AS Date32) as EventDate]
+09)----------------FilterExec: CounterID@1 = 62 AND CAST(CAST(EventDate@0 AS 
Int32) AS Date32) >= 2013-07-01 AND CAST(CAST(EventDate@0 AS Int32) AS Date32) 
<= 2013-07-31 AND IsRefresh@2 = 0 AND (TraficSourceID@3 = -1 OR 
TraficSourceID@3 = 6) AND RefererHash@4 = 3594120000172545465, 
projection=[EventDate@0, URLHash@5]
+10)------------------RepartitionExec: partitioning=RoundRobinBatch(4), 
input_partitions=1
+11)--------------------DataSourceExec: file_groups={1 group: 
[[WORKSPACE_ROOT/datafusion/core/tests/data/clickbench_hits_10.parquet]]}, 
projection=[EventDate, CounterID, IsRefresh, TraficSourceID, RefererHash, 
URLHash], file_type=parquet, predicate=CounterID@6 = 62 AND 
CAST(CAST(EventDate@5 AS Int32) AS Date32) >= 2013-07-01 AND 
CAST(CAST(EventDate@5 AS Int32) AS Date32) <= 2013-07-31 AND IsRefresh@15 = 0 
AND (TraficSourceID@37 = -1 OR TraficSourceID@37 = 6) AND RefererHash@102 = 
3594120 [...]
+
 query IDI
 SELECT "URLHash", "EventDate", COUNT(*) AS PageViews FROM hits WHERE 
"CounterID" = 62 AND "EventDate" >= '2013-07-01' AND "EventDate" <= 
'2013-07-31' AND "IsRefresh" = 0 AND "TraficSourceID" IN (-1, 6) AND 
"RefererHash" = 3594120000172545465 GROUP BY "URLHash", "EventDate" ORDER BY 
PageViews DESC LIMIT 10 OFFSET 100;
 ----
 
+## Q41
+query TT
+EXPLAIN SELECT "WindowClientWidth", "WindowClientHeight", COUNT(*) AS 
PageViews FROM hits WHERE "CounterID" = 62 AND "EventDate" >= '2013-07-01' AND 
"EventDate" <= '2013-07-31' AND "IsRefresh" = 0 AND "DontCountHits" = 0 AND 
"URLHash" = 2868770270353813622 GROUP BY "WindowClientWidth", 
"WindowClientHeight" ORDER BY PageViews DESC LIMIT 10 OFFSET 10000;
+----
+logical_plan
+01)Limit: skip=10000, fetch=10
+02)--Sort: pageviews DESC NULLS FIRST, fetch=10010
+03)----Projection: hits.WindowClientWidth, hits.WindowClientHeight, 
count(Int64(1)) AS count(*) AS pageviews
+04)------Aggregate: groupBy=[[hits.WindowClientWidth, 
hits.WindowClientHeight]], aggr=[[count(Int64(1))]]
+05)--------SubqueryAlias: hits
+06)----------Projection: hits_raw.WindowClientWidth, 
hits_raw.WindowClientHeight
+07)------------Filter: hits_raw.CounterID = Int32(62) AND 
CAST(CAST(hits_raw.EventDate AS Int32) AS Date32) >= Date32("2013-07-01") AND 
CAST(CAST(hits_raw.EventDate AS Int32) AS Date32) <= Date32("2013-07-31") AND 
hits_raw.IsRefresh = Int16(0) AND hits_raw.DontCountHits = Int16(0) AND 
hits_raw.URLHash = Int64(2868770270353813622)
+08)--------------TableScan: hits_raw projection=[EventDate, CounterID, 
IsRefresh, WindowClientWidth, WindowClientHeight, DontCountHits, URLHash], 
partial_filters=[hits_raw.CounterID = Int32(62), CAST(CAST(hits_raw.EventDate 
AS Int32) AS Date32) >= Date32("2013-07-01"), CAST(CAST(hits_raw.EventDate AS 
Int32) AS Date32) <= Date32("2013-07-31"), hits_raw.IsRefresh = Int16(0), 
hits_raw.DontCountHits = Int16(0), hits_raw.URLHash = 
Int64(2868770270353813622)]
+physical_plan
+01)GlobalLimitExec: skip=10000, fetch=10
+02)--SortPreservingMergeExec: [pageviews@2 DESC], fetch=10010
+03)----SortExec: TopK(fetch=10010), expr=[pageviews@2 DESC], 
preserve_partitioning=[true]
+04)------ProjectionExec: expr=[WindowClientWidth@0 as WindowClientWidth, 
WindowClientHeight@1 as WindowClientHeight, count(Int64(1))@2 as pageviews]
+05)--------AggregateExec: mode=FinalPartitioned, gby=[WindowClientWidth@0 as 
WindowClientWidth, WindowClientHeight@1 as WindowClientHeight], 
aggr=[count(Int64(1))]
+06)----------RepartitionExec: partitioning=Hash([WindowClientWidth@0, 
WindowClientHeight@1], 4), input_partitions=4
+07)------------AggregateExec: mode=Partial, gby=[WindowClientWidth@0 as 
WindowClientWidth, WindowClientHeight@1 as WindowClientHeight], 
aggr=[count(Int64(1))]
+08)--------------FilterExec: CounterID@1 = 62 AND CAST(CAST(EventDate@0 AS 
Int32) AS Date32) >= 2013-07-01 AND CAST(CAST(EventDate@0 AS Int32) AS Date32) 
<= 2013-07-31 AND IsRefresh@2 = 0 AND DontCountHits@5 = 0 AND URLHash@6 = 
2868770270353813622, projection=[WindowClientWidth@3, WindowClientHeight@4]
+09)----------------RepartitionExec: partitioning=RoundRobinBatch(4), 
input_partitions=1
+10)------------------DataSourceExec: file_groups={1 group: 
[[WORKSPACE_ROOT/datafusion/core/tests/data/clickbench_hits_10.parquet]]}, 
projection=[EventDate, CounterID, IsRefresh, WindowClientWidth, 
WindowClientHeight, DontCountHits, URLHash], file_type=parquet, 
predicate=CounterID@6 = 62 AND CAST(CAST(EventDate@5 AS Int32) AS Date32) >= 
2013-07-01 AND CAST(CAST(EventDate@5 AS Int32) AS Date32) <= 2013-07-31 AND 
IsRefresh@15 = 0 AND DontCountHits@61 = 0 AND URLHash@103 = 28687702703538136 
[...]
+
 query III
 SELECT "WindowClientWidth", "WindowClientHeight", COUNT(*) AS PageViews FROM 
hits WHERE "CounterID" = 62 AND "EventDate" >= '2013-07-01' AND "EventDate" <= 
'2013-07-31' AND "IsRefresh" = 0 AND "DontCountHits" = 0 AND "URLHash" = 
2868770270353813622 GROUP BY "WindowClientWidth", "WindowClientHeight" ORDER BY 
PageViews DESC LIMIT 10 OFFSET 10000;
 ----
 
+## Q42
+query TT
+EXPLAIN SELECT DATE_TRUNC('minute', to_timestamp_seconds("EventTime")) AS M, 
COUNT(*) AS PageViews FROM hits WHERE "CounterID" = 62 AND "EventDate" >= 
'2013-07-14' AND "EventDate" <= '2013-07-15' AND "IsRefresh" = 0 AND 
"DontCountHits" = 0 GROUP BY DATE_TRUNC('minute', 
to_timestamp_seconds("EventTime")) ORDER BY DATE_TRUNC('minute', M) LIMIT 10 
OFFSET 1000;
+----
+logical_plan
+01)Limit: skip=1000, fetch=10
+02)--Sort: date_trunc(Utf8("minute"), m) ASC NULLS LAST, fetch=1010
+03)----Projection: 
date_trunc(Utf8("minute"),to_timestamp_seconds(hits.EventTime)) AS m, 
count(Int64(1)) AS count(*) AS pageviews
+04)------Aggregate: groupBy=[[date_trunc(Utf8("minute"), 
to_timestamp_seconds(hits.EventTime))]], aggr=[[count(Int64(1))]]
+05)--------SubqueryAlias: hits
+06)----------Projection: hits_raw.EventTime
+07)------------Filter: hits_raw.CounterID = Int32(62) AND 
CAST(CAST(hits_raw.EventDate AS Int32) AS Date32) >= Date32("2013-07-14") AND 
CAST(CAST(hits_raw.EventDate AS Int32) AS Date32) <= Date32("2013-07-15") AND 
hits_raw.IsRefresh = Int16(0) AND hits_raw.DontCountHits = Int16(0)
+08)--------------TableScan: hits_raw projection=[EventTime, EventDate, 
CounterID, IsRefresh, DontCountHits], partial_filters=[hits_raw.CounterID = 
Int32(62), CAST(CAST(hits_raw.EventDate AS Int32) AS Date32) >= 
Date32("2013-07-14"), CAST(CAST(hits_raw.EventDate AS Int32) AS Date32) <= 
Date32("2013-07-15"), hits_raw.IsRefresh = Int16(0), hits_raw.DontCountHits = 
Int16(0)]
+physical_plan
+01)GlobalLimitExec: skip=1000, fetch=10
+02)--SortPreservingMergeExec: [date_trunc(minute, m@0) ASC NULLS LAST], 
fetch=1010
+03)----SortExec: TopK(fetch=1010), expr=[date_trunc(minute, m@0) ASC NULLS 
LAST], preserve_partitioning=[true]
+04)------ProjectionExec: 
expr=[date_trunc(Utf8("minute"),to_timestamp_seconds(hits.EventTime))@0 as m, 
count(Int64(1))@1 as pageviews]
+05)--------AggregateExec: mode=FinalPartitioned, 
gby=[date_trunc(Utf8("minute"),to_timestamp_seconds(hits.EventTime))@0 as 
date_trunc(Utf8("minute"),to_timestamp_seconds(hits.EventTime))], 
aggr=[count(Int64(1))]
+06)----------RepartitionExec: 
partitioning=Hash([date_trunc(Utf8("minute"),to_timestamp_seconds(hits.EventTime))@0],
 4), input_partitions=4
+07)------------AggregateExec: mode=Partial, gby=[date_trunc(minute, 
to_timestamp_seconds(EventTime@0)) as 
date_trunc(Utf8("minute"),to_timestamp_seconds(hits.EventTime))], 
aggr=[count(Int64(1))]
+08)--------------FilterExec: CounterID@2 = 62 AND CAST(CAST(EventDate@1 AS 
Int32) AS Date32) >= 2013-07-14 AND CAST(CAST(EventDate@1 AS Int32) AS Date32) 
<= 2013-07-15 AND IsRefresh@3 = 0 AND DontCountHits@4 = 0, 
projection=[EventTime@0]
+09)----------------RepartitionExec: partitioning=RoundRobinBatch(4), 
input_partitions=1
+10)------------------DataSourceExec: file_groups={1 group: 
[[WORKSPACE_ROOT/datafusion/core/tests/data/clickbench_hits_10.parquet]]}, 
projection=[EventTime, EventDate, CounterID, IsRefresh, DontCountHits], 
file_type=parquet, predicate=CounterID@6 = 62 AND CAST(CAST(EventDate@5 AS 
Int32) AS Date32) >= 2013-07-14 AND CAST(CAST(EventDate@5 AS Int32) AS Date32) 
<= 2013-07-15 AND IsRefresh@15 = 0 AND DontCountHits@61 = 0, 
pruning_predicate=CounterID_null_count@2 != row_count@3 AND CounterID_m [...]
+
 query PI
 SELECT DATE_TRUNC('minute', to_timestamp_seconds("EventTime")) AS M, COUNT(*) 
AS PageViews FROM hits WHERE "CounterID" = 62 AND "EventDate" >= '2013-07-14' 
AND "EventDate" <= '2013-07-15' AND "IsRefresh" = 0 AND "DontCountHits" = 0 
GROUP BY DATE_TRUNC('minute', to_timestamp_seconds("EventTime")) ORDER BY 
DATE_TRUNC('minute', M) LIMIT 10 OFFSET 1000;
 ----
 
 # Clickbench "Extended" queries that test count distinct
 
+query TT
+EXPLAIN SELECT COUNT(DISTINCT "SearchPhrase"), COUNT(DISTINCT "MobilePhone"), 
COUNT(DISTINCT "MobilePhoneModel") FROM hits;
+----
+logical_plan
+01)Aggregate: groupBy=[[]], aggr=[[count(DISTINCT hits.SearchPhrase), 
count(DISTINCT hits.MobilePhone), count(DISTINCT hits.MobilePhoneModel)]]
+02)--SubqueryAlias: hits
+03)----TableScan: hits_raw projection=[MobilePhone, MobilePhoneModel, 
SearchPhrase]
+physical_plan
+01)AggregateExec: mode=Single, gby=[], aggr=[count(DISTINCT 
hits.SearchPhrase), count(DISTINCT hits.MobilePhone), count(DISTINCT 
hits.MobilePhoneModel)]
+02)--DataSourceExec: file_groups={1 group: 
[[WORKSPACE_ROOT/datafusion/core/tests/data/clickbench_hits_10.parquet]]}, 
projection=[MobilePhone, MobilePhoneModel, SearchPhrase], file_type=parquet
+
 query III
 SELECT COUNT(DISTINCT "SearchPhrase"), COUNT(DISTINCT "MobilePhone"), 
COUNT(DISTINCT "MobilePhoneModel") FROM hits;
 ----
 1 1 1
 
+query TT
+EXPLAIN SELECT COUNT(DISTINCT "HitColor"), COUNT(DISTINCT "BrowserCountry"), 
COUNT(DISTINCT "BrowserLanguage")  FROM hits;
+----
+logical_plan
+01)Aggregate: groupBy=[[]], aggr=[[count(DISTINCT hits.HitColor), 
count(DISTINCT hits.BrowserCountry), count(DISTINCT hits.BrowserLanguage)]]
+02)--SubqueryAlias: hits
+03)----TableScan: hits_raw projection=[HitColor, BrowserLanguage, 
BrowserCountry]
+physical_plan
+01)AggregateExec: mode=Single, gby=[], aggr=[count(DISTINCT hits.HitColor), 
count(DISTINCT hits.BrowserCountry), count(DISTINCT hits.BrowserLanguage)]
+02)--DataSourceExec: file_groups={1 group: 
[[WORKSPACE_ROOT/datafusion/core/tests/data/clickbench_hits_10.parquet]]}, 
projection=[HitColor, BrowserLanguage, BrowserCountry], file_type=parquet
+
 query III
 SELECT COUNT(DISTINCT "HitColor"), COUNT(DISTINCT "BrowserCountry"), 
COUNT(DISTINCT "BrowserLanguage")  FROM hits;
 ----
 1 1 1
 
+query TT
+EXPLAIN SELECT "BrowserCountry",  COUNT(DISTINCT "SocialNetwork"), 
COUNT(DISTINCT "HitColor"), COUNT(DISTINCT "BrowserLanguage"), COUNT(DISTINCT 
"SocialAction") FROM hits GROUP BY 1 ORDER BY 2 DESC LIMIT 10;
+----
+logical_plan
+01)Sort: count(DISTINCT hits.SocialNetwork) DESC NULLS FIRST, fetch=10
+02)--Aggregate: groupBy=[[hits.BrowserCountry]], aggr=[[count(DISTINCT 
hits.SocialNetwork), count(DISTINCT hits.HitColor), count(DISTINCT 
hits.BrowserLanguage), count(DISTINCT hits.SocialAction)]]
+03)----SubqueryAlias: hits
+04)------TableScan: hits_raw projection=[HitColor, BrowserLanguage, 
BrowserCountry, SocialNetwork, SocialAction]
+physical_plan
+01)SortPreservingMergeExec: [count(DISTINCT hits.SocialNetwork)@1 DESC], 
fetch=10
+02)--SortExec: TopK(fetch=10), expr=[count(DISTINCT hits.SocialNetwork)@1 
DESC], preserve_partitioning=[true]
+03)----AggregateExec: mode=FinalPartitioned, gby=[BrowserCountry@0 as 
BrowserCountry], aggr=[count(DISTINCT hits.SocialNetwork), count(DISTINCT 
hits.HitColor), count(DISTINCT hits.BrowserLanguage), count(DISTINCT 
hits.SocialAction)]
+04)------RepartitionExec: partitioning=Hash([BrowserCountry@0], 4), 
input_partitions=1
+05)--------AggregateExec: mode=Partial, gby=[BrowserCountry@2 as 
BrowserCountry], aggr=[count(DISTINCT hits.SocialNetwork), count(DISTINCT 
hits.HitColor), count(DISTINCT hits.BrowserLanguage), count(DISTINCT 
hits.SocialAction)]
+06)----------DataSourceExec: file_groups={1 group: 
[[WORKSPACE_ROOT/datafusion/core/tests/data/clickbench_hits_10.parquet]]}, 
projection=[HitColor, BrowserLanguage, BrowserCountry, SocialNetwork, 
SocialAction], file_type=parquet
+
 query TIIII
 SELECT "BrowserCountry",  COUNT(DISTINCT "SocialNetwork"), COUNT(DISTINCT 
"HitColor"), COUNT(DISTINCT "BrowserLanguage"), COUNT(DISTINCT "SocialAction") 
FROM hits GROUP BY 1 ORDER BY 2 DESC LIMIT 10;
 ----


---------------------------------------------------------------------
To unsubscribe, e-mail: [email protected]
For additional commands, e-mail: [email protected]


Reply via email to