andygrove commented on issue #2008: URL: https://github.com/apache/datafusion-comet/issues/2008#issuecomment-3057914312
# Comet 0.9.0 Plan ``` == Physical Plan == AdaptiveSparkPlan isFinalPlan=false +- !CometHashAggregate [sum#146, isEmpty#147], Final, [sum(l_extendedprice#21)] +- CometExchange SinglePartition, ENSURE_REQUIREMENTS, CometNativeShuffle, [plan_id=225] +- !CometHashAggregate [l_extendedprice#21], Partial, [partial_sum(l_extendedprice#21)] +- CometProject [l_extendedprice#21], [l_extendedprice#21] +- CometHashJoin [p_partkey#74L], [l_partkey#127L], Inner, BuildRight, (cast(l_quantity#20 as decimal(17,7)) < (0.2 * avg(l_quantity))#125) :- CometProject [l_quantity#20, l_extendedprice#21, p_partkey#74L], [l_quantity#20, l_extendedprice#21, p_partkey#74L] : +- CometHashJoin [l_partkey#17L], [p_partkey#74L], Inner, BuildRight : :- CometExchange hashpartitioning(l_partkey#17L, 200), ENSURE_REQUIREMENTS, CometNativeShuffle, [plan_id=212] : : +- CometFilter [l_partkey#17L, l_quantity#20, l_extendedprice#21], (isnotnull(l_partkey#17L) AND isnotnull(l_quantity#20)) : : +- CometScan parquet [l_partkey#17L,l_quantity#20,l_extendedprice#21] Batched: true, DataFilters: [isnotnull(l_partkey#17L), isnotnull(l_quantity#20)], Format: CometParquet, Location: InMemoryFileIndex(1 paths)[file:/mnt/bigdata/tpch/sf100/lineitem.parquet], PartitionFilters: [], PushedFilters: [IsNotNull(l_partkey), IsNotNull(l_quantity)], ReadSchema: struct<l_partkey:bigint,l_quantity:decimal(11,2),l_extendedprice:decimal(11,2)> : +- CometExchange hashpartitioning(p_partkey#74L, 200), ENSURE_REQUIREMENTS, CometNativeShuffle, [plan_id=214] : +- CometProject [p_partkey#74L], [p_partkey#74L] : +- CometFilter [p_partkey#74L, p_brand#77, p_container#80], ((((isnotnull(p_brand#77) AND isnotnull(p_container#80)) AND (p_brand#77 = Brand#42)) AND (p_container#80 = LG BAG)) AND isnotnull(p_partkey#74L)) : +- CometScan parquet [p_partkey#74L,p_brand#77,p_container#80] Batched: true, DataFilters: [isnotnull(p_brand#77), isnotnull(p_container#80), (p_brand#77 = Brand#42), (p_container#80 = LG ..., Format: CometParquet, Location: InMemoryFileIndex(1 paths)[file:/mnt/bigdata/tpch/sf100/part.parquet], PartitionFilters: [], PushedFilters: [IsNotNull(p_brand), IsNotNull(p_container), EqualTo(p_brand,Brand#42), EqualTo(p_container,LG BA..., ReadSchema: struct<p_partkey:bigint,p_brand:string,p_container:string> +- CometFilter [(0.2 * avg(l_quantity))#125, l_partkey#127L], isnotnull((0.2 * avg(l_quantity))#125) +- !CometHashAggregate [l_partkey#127L, sum#150, count#151L], Final, [l_partkey#127L], [avg(UnscaledValue(l_quantity#130))] +- CometExchange hashpartitioning(l_partkey#127L, 200), ENSURE_REQUIREMENTS, CometNativeShuffle, [plan_id=219] +- !CometHashAggregate [l_partkey#127L, l_quantity#130], Partial, [l_partkey#127L], [partial_avg(UnscaledValue(l_quantity#130))] +- CometFilter [l_partkey#127L, l_quantity#130], isnotnull(l_partkey#127L) +- CometScan parquet [l_partkey#127L,l_quantity#130] Batched: true, DataFilters: [isnotnull(l_partkey#127L)], Format: CometParquet, Location: InMemoryFileIndex(1 paths)[file:/mnt/bigdata/tpch/sf100/lineitem.parquet], PartitionFilters: [], PushedFilters: [IsNotNull(l_partkey)], ReadSchema: struct<l_partkey:bigint,l_quantity:decimal(11,2)> ``` # Gluten 1.4.0 Plan ``` == Physical Plan == AdaptiveSparkPlan isFinalPlan=false +- HashAggregate(keys=[], functions=[sum(l_extendedprice#21)]) +- Exchange SinglePartition, ENSURE_REQUIREMENTS, [plan_id=116] +- HashAggregate(keys=[], functions=[partial_sum(l_extendedprice#21)]) +- Project [l_extendedprice#21] +- SortMergeJoin [p_partkey#74L], [l_partkey#127L], Inner, (cast(l_quantity#20 as decimal(17,7)) < (0.2 * avg(l_quantity))#125) :- Project [l_quantity#20, l_extendedprice#21, p_partkey#74L] : +- SortMergeJoin [l_partkey#17L], [p_partkey#74L], Inner : :- Sort [l_partkey#17L ASC NULLS FIRST], false, 0 : : +- Exchange hashpartitioning(l_partkey#17L, 200), ENSURE_REQUIREMENTS, [plan_id=100] : : +- Filter (isnotnull(l_partkey#17L) AND isnotnull(l_quantity#20)) : : +- FileScan parquet [l_partkey#17L,l_quantity#20,l_extendedprice#21] Batched: true, DataFilters: [isnotnull(l_partkey#17L), isnotnull(l_quantity#20)], Format: Parquet, Location: InMemoryFileIndex(1 paths)[file:/mnt/bigdata/tpch/sf100/lineitem.parquet], PartitionFilters: [], PushedFilters: [IsNotNull(l_partkey), IsNotNull(l_quantity)], ReadSchema: struct<l_partkey:bigint,l_quantity:decimal(11,2),l_extendedprice:decimal(11,2)> : +- Sort [p_partkey#74L ASC NULLS FIRST], false, 0 : +- Exchange hashpartitioning(p_partkey#74L, 200), ENSURE_REQUIREMENTS, [plan_id=101] : +- Project [p_partkey#74L] : +- Filter ((((isnotnull(p_brand#77) AND isnotnull(p_container#80)) AND (p_brand#77 = Brand#42)) AND (p_container#80 = LG BAG)) AND isnotnull(p_partkey#74L)) : +- FileScan parquet [p_partkey#74L,p_brand#77,p_container#80] Batched: true, DataFilters: [isnotnull(p_brand#77), isnotnull(p_container#80), (p_brand#77 = Brand#42), (p_container#80 = LG ..., Format: Parquet, Location: InMemoryFileIndex(1 paths)[file:/mnt/bigdata/tpch/sf100/part.parquet], PartitionFilters: [], PushedFilters: [IsNotNull(p_brand), IsNotNull(p_container), EqualTo(p_brand,Brand#42), EqualTo(p_container,LG BA..., ReadSchema: struct<p_partkey:bigint,p_brand:string,p_container:string> +- Sort [l_partkey#127L ASC NULLS FIRST], false, 0 +- Filter isnotnull((0.2 * avg(l_quantity))#125) +- HashAggregate(keys=[l_partkey#127L], functions=[avg(UnscaledValue(l_quantity#130))]) +- Exchange hashpartitioning(l_partkey#127L, 200), ENSURE_REQUIREMENTS, [plan_id=106] +- HashAggregate(keys=[l_partkey#127L], functions=[partial_avg(UnscaledValue(l_quantity#130))]) +- Filter isnotnull(l_partkey#127L) +- FileScan parquet [l_partkey#127L,l_quantity#130] Batched: true, DataFilters: [isnotnull(l_partkey#127L)], Format: Parquet, Location: InMemoryFileIndex(1 paths)[file:/mnt/bigdata/tpch/sf100/lineitem.parquet], PartitionFilters: [], PushedFilters: [IsNotNull(l_partkey)], ReadSchema: struct<l_partkey:bigint,l_quantity:decimal(11,2)> ``` -- This is an automated message from the Apache Git Service. To respond to the message, please log on to GitHub and use the URL above to go to the specific comment. To unsubscribe, e-mail: github-unsubscr...@datafusion.apache.org For queries about this service, please contact Infrastructure at: us...@infra.apache.org --------------------------------------------------------------------- To unsubscribe, e-mail: github-unsubscr...@datafusion.apache.org For additional commands, e-mail: github-h...@datafusion.apache.org