This is an automated email from the ASF dual-hosted git repository. blaginin pushed a commit to branch db/dorny-md in repository https://gitbox.apache.org/repos/asf/datafusion.git
commit c4011e61fcb4ad0e98f6e095cef0b14836290a41 Merge: e5c4c9702e 617700d1b3 Author: blaginin <[email protected]> AuthorDate: Tue Jan 13 19:06:56 2026 +0000 Merge branch 'main' into sandbox-main # Conflicts: # .asf.yaml .github/dependabot.yml | 1 + .github/workflows/audit.yml | 4 +- .github/workflows/rust.yml | 21 +- Cargo.lock | 1659 ++++++++------------ Cargo.toml | 103 +- NOTICE.txt | 2 +- benchmarks/Cargo.toml | 2 +- benchmarks/README.md | 19 +- benchmarks/bench.sh | 6 +- benchmarks/compare.py | 20 +- benchmarks/src/bin/dfbench.rs | 15 +- benchmarks/src/bin/external_aggr.rs | 27 +- benchmarks/src/bin/imdb.rs | 24 +- benchmarks/src/bin/mem_profile.rs | 22 +- benchmarks/src/cancellation.rs | 18 +- benchmarks/src/clickbench.rs | 30 +- benchmarks/src/h2o.rs | 26 +- benchmarks/src/hj.rs | 422 +++-- benchmarks/src/imdb/convert.rs | 12 +- benchmarks/src/imdb/run.rs | 22 +- benchmarks/src/nlj.rs | 12 +- benchmarks/src/smj.rs | 12 +- benchmarks/src/sort_tpch.rs | 18 +- benchmarks/src/tpcds/run.rs | 28 +- benchmarks/src/tpch/run.rs | 28 +- benchmarks/src/util/options.rs | 18 +- ci/scripts/check_examples_docs.sh | 64 + datafusion-cli/Cargo.toml | 3 +- datafusion-cli/src/functions.rs | 185 ++- datafusion-cli/src/main.rs | 113 +- datafusion-cli/tests/cli_integration.rs | 8 +- datafusion-examples/Cargo.toml | 15 +- datafusion-examples/README.md | 21 +- datafusion-examples/data/README.md | 25 + datafusion-examples/data/csv/cars.csv | 26 + datafusion-examples/data/csv/regex.csv | 12 + .../examples/builtin_functions/function_factory.rs | 4 +- .../examples/builtin_functions/main.rs | 2 +- .../examples/builtin_functions/regexp.rs | 31 +- .../examples/custom_data_source/csv_json_opener.rs | 32 +- .../custom_data_source/csv_sql_streaming.rs | 19 +- .../custom_data_source/default_column_values.rs | 7 +- .../examples/custom_data_source/main.rs | 2 +- datafusion-examples/examples/data_io/main.rs | 2 +- .../examples/data_io/parquet_encrypted.rs | 32 +- .../examples/data_io/parquet_exec_visitor.rs | 24 +- .../examples/data_io/parquet_index.rs | 2 +- .../examples/dataframe/cache_factory.rs | 229 +++ .../examples/dataframe/dataframe.rs | 84 +- .../examples/dataframe/deserialize_to_struct.rs | 321 +++- datafusion-examples/examples/dataframe/main.rs | 9 +- .../examples/execution_monitoring/main.rs | 2 +- .../examples/execution_monitoring/tracing.rs | 34 +- .../examples/external_dependency/main.rs | 2 +- .../ffi/ffi_example_table_provider/src/lib.rs | 7 +- .../examples/ffi/ffi_module_interface/src/lib.rs | 4 +- .../examples/ffi/ffi_module_loader/Cargo.toml | 1 + .../examples/ffi/ffi_module_loader/src/main.rs | 11 +- datafusion-examples/examples/flight/client.rs | 17 +- datafusion-examples/examples/flight/main.rs | 2 +- datafusion-examples/examples/flight/server.rs | 38 +- datafusion-examples/examples/flight/sql_server.rs | 25 +- datafusion-examples/examples/proto/main.rs | 2 +- .../examples/query_planning/expr_api.rs | 13 +- .../examples/query_planning/main.rs | 2 +- .../examples/query_planning/parse_sql_expr.rs | 70 +- .../examples/query_planning/plan_to_sql.rs | 80 +- .../examples/query_planning/planner_api.rs | 23 +- .../examples/query_planning/thread_pools.rs | 14 +- .../examples/relation_planner/main.rs | 2 +- .../examples/relation_planner/match_recognize.rs | 6 +- .../examples/relation_planner/pivot_unpivot.rs | 10 +- .../examples/relation_planner/table_sample.rs | 63 +- .../examples/sql_ops/custom_sql_parser.rs | 420 +++++ datafusion-examples/examples/sql_ops/dialect.rs | 135 -- datafusion-examples/examples/sql_ops/main.rs | 14 +- datafusion-examples/examples/sql_ops/query.rs | 64 +- datafusion-examples/examples/udf/advanced_udaf.rs | 15 +- datafusion-examples/examples/udf/advanced_udwf.rs | 49 +- datafusion-examples/examples/udf/async_udf.rs | 3 +- datafusion-examples/examples/udf/main.rs | 2 +- datafusion-examples/examples/udf/simple_udtf.rs | 27 +- .../mod.rs => datafusion-examples/src/lib.rs | 4 +- datafusion-examples/src/utils/csv_to_parquet.rs | 245 +++ .../src/utils/datasets/cars.rs | 16 +- datafusion-examples/src/utils/datasets/mod.rs | 139 ++ .../src/utils/datasets/regex.rs | 13 +- .../src/utils}/mod.rs | 5 +- datafusion/catalog-listing/src/config.rs | 86 +- datafusion/catalog-listing/src/table.rs | 410 ++++- datafusion/catalog/src/cte_worktable.rs | 38 +- datafusion/catalog/src/memory/table.rs | 349 +++- datafusion/catalog/src/table.rs | 25 + datafusion/common/Cargo.toml | 2 +- datafusion/common/src/config.rs | 87 +- datafusion/common/src/dfschema.rs | 6 + .../common/src/file_options/parquet_writer.rs | 34 +- datafusion/common/src/hash_utils.rs | 136 +- datafusion/common/src/lib.rs | 4 +- datafusion/common/src/parquet_config.rs | 108 ++ datafusion/common/src/scalar/consts.rs | 12 + datafusion/common/src/scalar/mod.rs | 238 ++- datafusion/common/src/utils/mod.rs | 36 +- datafusion/core/Cargo.toml | 5 + .../core/benches/range_and_generate_series.rs | 90 ++ datafusion/core/benches/topk_aggregate.rs | 211 ++- datafusion/core/src/dataframe/mod.rs | 2 + .../core/src/datasource/file_format/arrow.rs | 93 ++ datafusion/core/src/datasource/file_format/csv.rs | 90 ++ datafusion/core/src/datasource/file_format/json.rs | 42 + datafusion/core/src/datasource/file_format/mod.rs | 12 +- .../core/src/datasource/file_format/parquet.rs | 22 + datafusion/core/src/datasource/listing/table.rs | 22 +- .../core/src/datasource/listing_table_factory.rs | 7 +- datafusion/core/src/datasource/mod.rs | 66 +- .../core/src/datasource/physical_plan/mod.rs | 135 -- .../core/src/datasource/physical_plan/parquet.rs | 94 +- datafusion/core/src/execution/context/mod.rs | 35 +- datafusion/core/src/execution/session_state.rs | 57 +- datafusion/core/src/physical_planner.rs | 167 +- datafusion/core/src/test_util/parquet.rs | 15 +- datafusion/core/tests/core_integration.rs | 3 - .../tests/custom_sources_cases/dml_planning.rs | 297 ++++ datafusion/core/tests/custom_sources_cases/mod.rs | 1 + datafusion/core/tests/dataframe/mod.rs | 2 +- .../core/tests/datasource/object_store_access.rs | 76 +- datafusion/core/tests/execution/coop.rs | 59 +- datafusion/core/tests/expr_api/mod.rs | 5 +- datafusion/core/tests/expr_api/simplification.rs | 156 +- datafusion/core/tests/fuzz_cases/join_fuzz.rs | 1 + datafusion/core/tests/fuzz_cases/window_fuzz.rs | 4 +- datafusion/core/tests/parquet/custom_reader.rs | 10 +- .../parquet/{schema_adapter.rs => expr_adapter.rs} | 152 +- datafusion/core/tests/parquet/mod.rs | 3 +- datafusion/core/tests/parquet/ordering.rs | 103 ++ datafusion/core/tests/parquet/page_pruning.rs | 9 +- .../physical_optimizer/aggregate_statistics.rs | 86 + .../physical_optimizer/enforce_distribution.rs | 59 +- .../tests/physical_optimizer/enforce_sorting.rs | 57 +- .../physical_optimizer/filter_pushdown/mod.rs | 409 +++-- .../physical_optimizer/filter_pushdown/util.rs | 28 +- .../tests/physical_optimizer/join_selection.rs | 10 + .../tests/physical_optimizer/limit_pushdown.rs | 84 +- .../physical_optimizer/partition_statistics.rs | 162 +- .../physical_optimizer/projection_pushdown.rs | 46 + .../core/tests/physical_optimizer/pushdown_sort.rs | 434 ++++- .../replace_with_order_preserving_variants.rs | 229 ++- .../tests/physical_optimizer/sanity_checker.rs | 6 +- .../core/tests/physical_optimizer/test_utils.rs | 216 ++- .../schema_adapter_integration_tests.rs | 752 --------- datafusion/core/tests/sql/explain_analyze.rs | 1 - datafusion/core/tests/sql/mod.rs | 1 + datafusion/core/tests/sql/unparser.rs | 462 ++++++ .../core/tests/user_defined/relation_planner.rs | 16 +- .../user_defined_async_scalar_functions.rs | 40 +- .../user_defined/user_defined_scalar_functions.rs | 19 +- datafusion/datasource-arrow/NOTICE.txt | 2 +- datafusion/datasource-arrow/src/source.rs | 118 +- .../datasource-avro/src/avro_to_arrow/schema.rs | 4 +- datafusion/datasource-avro/src/source.rs | 17 - datafusion/datasource-csv/src/file_format.rs | 50 +- datafusion/datasource-csv/src/source.rs | 17 - datafusion/datasource-json/src/source.rs | 17 - datafusion/datasource-parquet/Cargo.toml | 7 + .../benches/parquet_nested_filter_pushdown.rs | 238 +++ datafusion/datasource-parquet/src/file_format.rs | 92 +- datafusion/datasource-parquet/src/metadata.rs | 142 +- datafusion/datasource-parquet/src/metrics.rs | 10 +- datafusion/datasource-parquet/src/mod.rs | 1 + datafusion/datasource-parquet/src/opener.rs | 617 +++++--- datafusion/datasource-parquet/src/row_filter.rs | 487 +++++- .../datasource-parquet/src/row_group_filter.rs | 9 +- datafusion/datasource-parquet/src/sort.rs | 865 ++++++++-- datafusion/datasource-parquet/src/source.rs | 88 +- .../datasource-parquet/src/supported_predicates.rs | 144 ++ datafusion/datasource/Cargo.toml | 2 +- datafusion/datasource/src/display.rs | 9 +- datafusion/datasource/src/file.rs | 87 +- datafusion/datasource/src/file_format.rs | 76 + datafusion/datasource/src/file_scan_config.rs | 96 +- datafusion/datasource/src/mod.rs | 77 +- datafusion/datasource/src/schema_adapter.rs | 1065 ++----------- datafusion/datasource/src/test_util.rs | 18 - datafusion/datasource/src/url.rs | 84 +- datafusion/datasource/src/write/demux.rs | 17 + datafusion/execution/Cargo.toml | 1 + datafusion/execution/src/cache/cache_manager.rs | 284 +++- datafusion/execution/src/cache/cache_unit.rs | 454 ++++-- .../execution/src/cache/file_metadata_cache.rs | 429 ++--- datafusion/execution/src/cache/list_files_cache.rs | 917 ++++++----- datafusion/execution/src/cache/mod.rs | 48 +- datafusion/execution/src/lib.rs | 1 - datafusion/expr-common/src/accumulator.rs | 23 +- datafusion/expr-common/src/signature.rs | 74 +- datafusion/expr-common/src/type_coercion/binary.rs | 121 +- .../src/type_coercion/binary/tests/arithmetic.rs | 8 +- .../src/type_coercion/binary/tests/comparison.rs | 58 +- datafusion/expr/src/arguments.rs | 433 ++++- datafusion/expr/src/execution_props.rs | 16 +- datafusion/expr/src/expr.rs | 3 +- datafusion/expr/src/expr_schema.rs | 55 +- datafusion/expr/src/function.rs | 8 +- datafusion/expr/src/lib.rs | 4 + datafusion/expr/src/logical_plan/builder.rs | 23 + datafusion/expr/src/logical_plan/dml.rs | 8 +- datafusion/expr/src/logical_plan/plan.rs | 24 + datafusion/expr/src/logical_plan/tree_node.rs | 4 + datafusion/expr/src/partition_evaluator.rs | 4 +- datafusion/expr/src/planner.rs | 4 +- datafusion/expr/src/simplify.rs | 120 +- datafusion/expr/src/type_coercion/functions.rs | 304 ++-- datafusion/expr/src/udaf.rs | 2 +- datafusion/expr/src/udf.rs | 14 +- datafusion/expr/src/udwf.rs | 2 +- datafusion/ffi/Cargo.toml | 15 +- datafusion/ffi/src/arrow_wrappers.rs | 10 +- datafusion/ffi/src/catalog_provider.rs | 95 +- datafusion/ffi/src/catalog_provider_list.rs | 81 +- datafusion/ffi/src/execution/task_ctx.rs | 1 - datafusion/ffi/src/execution/task_ctx_provider.rs | 1 - datafusion/ffi/src/execution_plan.rs | 98 +- datafusion/ffi/src/expr/columnar_value.rs | 1 - datafusion/ffi/src/expr/distribution.rs | 11 +- datafusion/ffi/src/expr/expr_properties.rs | 3 - datafusion/ffi/src/expr/interval.rs | 4 +- datafusion/ffi/src/insert_op.rs | 3 +- datafusion/ffi/src/lib.rs | 1 + datafusion/ffi/src/physical_expr/mod.rs | 69 +- datafusion/ffi/src/physical_expr/partitioning.rs | 7 +- datafusion/ffi/src/physical_expr/sort.rs | 7 +- datafusion/ffi/src/plan_properties.rs | 143 +- .../ffi/src/proto/logical_extension_codec.rs | 39 +- .../ffi/src/proto/physical_extension_codec.rs | 11 +- datafusion/ffi/src/record_batch_stream.rs | 85 +- datafusion/ffi/src/schema_provider.rs | 103 +- datafusion/ffi/src/session/config.rs | 1 - datafusion/ffi/src/session/mod.rs | 10 +- datafusion/ffi/src/table_provider.rs | 259 +-- datafusion/ffi/src/table_source.rs | 7 +- datafusion/ffi/src/tests/async_provider.rs | 64 +- datafusion/ffi/src/tests/catalog.rs | 41 +- datafusion/ffi/src/tests/mod.rs | 60 +- datafusion/ffi/src/tests/sync_provider.rs | 11 +- datafusion/ffi/src/tests/udf_udaf_udwf.rs | 70 +- datafusion/ffi/src/tests/utils.rs | 8 +- datafusion/ffi/src/udaf/accumulator.rs | 40 +- datafusion/ffi/src/udaf/accumulator_args.rs | 122 +- datafusion/ffi/src/udaf/groups_accumulator.rs | 47 +- datafusion/ffi/src/udaf/mod.rs | 67 +- datafusion/ffi/src/udf/mod.rs | 63 +- datafusion/ffi/src/udf/return_type_args.rs | 16 +- datafusion/ffi/src/udtf.rs | 158 +- datafusion/ffi/src/udwf/mod.rs | 17 +- datafusion/ffi/src/udwf/partition_evaluator.rs | 31 +- .../ffi/src/udwf/partition_evaluator_args.rs | 108 +- datafusion/ffi/src/udwf/range.rs | 1 - datafusion/ffi/src/volatility.rs | 3 +- datafusion/ffi/tests/ffi_catalog.rs | 14 +- datafusion/ffi/tests/ffi_integration.rs | 11 +- datafusion/ffi/tests/ffi_udaf.rs | 71 +- datafusion/ffi/tests/ffi_udf.rs | 4 +- datafusion/ffi/tests/ffi_udtf.rs | 8 +- datafusion/ffi/tests/ffi_udwf.rs | 3 +- datafusion/ffi/tests/utils/mod.rs | 43 + .../src/aggregate/groups_accumulator/accumulate.rs | 214 ++- .../src/aggregate/groups_accumulator/bool_op.rs | 2 +- .../src/aggregate/groups_accumulator/prim_op.rs | 5 +- datafusion/functions-aggregate/src/array_agg.rs | 4 +- datafusion/functions-aggregate/src/average.rs | 21 +- datafusion/functions-aggregate/src/correlation.rs | 62 +- datafusion/functions-aggregate/src/count.rs | 4 +- datafusion/functions-aggregate/src/median.rs | 22 +- .../functions-aggregate/src/percentile_cont.rs | 505 +++--- datafusion/functions-aggregate/src/string_agg.rs | 13 +- datafusion/functions-aggregate/src/variance.rs | 144 +- datafusion/functions-nested/src/array_has.rs | 21 +- datafusion/functions-nested/src/planner.rs | 3 + datafusion/functions-nested/src/set_ops.rs | 19 +- datafusion/functions-table/src/generate_series.rs | 47 + datafusion/functions-window/Cargo.toml | 8 + datafusion/functions-window/benches/nth_value.rs | 263 ++++ datafusion/functions-window/src/nth_value.rs | 182 ++- datafusion/functions/Cargo.toml | 70 +- datafusion/functions/benches/concat.rs | 100 +- .../functions/benches/{concat.rs => concat_ws.rs} | 64 +- datafusion/functions/benches/contains.rs | 185 +++ .../functions/benches/{to_hex.rs => crypto.rs} | 74 +- datafusion/functions/benches/ends_with.rs | 185 +++ datafusion/functions/benches/factorial.rs | 67 + datafusion/functions/benches/floor_ceil.rs | 135 ++ datafusion/functions/benches/left.rs | 111 ++ datafusion/functions/benches/levenshtein.rs | 87 + datafusion/functions/benches/pad.rs | 314 +++- datafusion/functions/benches/regexp_count.rs | 118 ++ datafusion/functions/benches/replace.rs | 193 +++ datafusion/functions/benches/split_part.rs | 382 +++++ datafusion/functions/benches/starts_with.rs | 185 +++ datafusion/functions/benches/substr_index.rs | 124 +- datafusion/functions/benches/to_hex.rs | 120 +- datafusion/functions/benches/to_timestamp.rs | 24 +- datafusion/functions/benches/translate.rs | 90 ++ datafusion/functions/benches/{ltrim.rs => trim.rs} | 200 ++- datafusion/functions/src/core/arrow_cast.rs | 17 +- datafusion/functions/src/core/arrow_metadata.rs | 160 ++ datafusion/functions/src/core/coalesce.rs | 4 +- datafusion/functions/src/core/getfield.rs | 645 +++++--- datafusion/functions/src/core/mod.rs | 14 + datafusion/functions/src/core/nvl.rs | 4 +- datafusion/functions/src/core/nvl2.rs | 4 +- datafusion/functions/src/core/union_extract.rs | 5 +- datafusion/functions/src/crypto/basic.rs | 34 +- datafusion/functions/src/datetime/common.rs | 153 +- datafusion/functions/src/datetime/current_date.rs | 23 +- datafusion/functions/src/datetime/current_time.rs | 65 +- datafusion/functions/src/datetime/date_bin.rs | 314 +++- datafusion/functions/src/datetime/date_trunc.rs | 284 +++- datafusion/functions/src/datetime/mod.rs | 55 +- datafusion/functions/src/datetime/now.rs | 18 +- datafusion/functions/src/datetime/to_date.rs | 6 +- datafusion/functions/src/datetime/to_time.rs | 252 +++ datafusion/functions/src/datetime/to_timestamp.rs | 1007 +++++++++--- datafusion/functions/src/datetime/to_unixtime.rs | 38 +- datafusion/functions/src/encoding/inner.rs | 745 ++++----- datafusion/functions/src/macros.rs | 29 + datafusion/functions/src/math/ceil.rs | 206 +++ datafusion/functions/src/math/decimal.rs | 111 ++ datafusion/functions/src/math/factorial.rs | 78 +- datafusion/functions/src/math/floor.rs | 206 +++ datafusion/functions/src/math/iszero.rs | 32 +- datafusion/functions/src/math/log.rs | 230 ++- datafusion/functions/src/math/mod.rs | 21 +- datafusion/functions/src/math/monotonicity.rs | 48 - datafusion/functions/src/math/nans.rs | 32 +- datafusion/functions/src/math/nanvl.rs | 27 +- datafusion/functions/src/math/power.rs | 368 ++++- datafusion/functions/src/math/round.rs | 367 +++-- datafusion/functions/src/regex/regexpcount.rs | 12 +- datafusion/functions/src/regex/regexplike.rs | 4 +- datafusion/functions/src/string/btrim.rs | 2 +- datafusion/functions/src/string/common.rs | 234 ++- datafusion/functions/src/string/concat.rs | 11 +- datafusion/functions/src/string/concat_ws.rs | 35 +- datafusion/functions/src/string/contains.rs | 89 +- datafusion/functions/src/string/ends_with.rs | 294 +++- datafusion/functions/src/string/levenshtein.rs | 24 +- datafusion/functions/src/string/ltrim.rs | 10 +- datafusion/functions/src/string/repeat.rs | 33 +- datafusion/functions/src/string/replace.rs | 85 +- datafusion/functions/src/string/rtrim.rs | 10 +- datafusion/functions/src/string/split_part.rs | 62 +- datafusion/functions/src/string/starts_with.rs | 272 +++- datafusion/functions/src/string/to_hex.rs | 217 ++- datafusion/functions/src/string/uuid.rs | 2 +- datafusion/functions/src/unicode/left.rs | 15 +- datafusion/functions/src/unicode/lpad.rs | 33 +- datafusion/functions/src/unicode/rpad.rs | 38 +- datafusion/functions/src/unicode/strpos.rs | 39 +- datafusion/functions/src/unicode/substrindex.rs | 70 +- datafusion/functions/src/unicode/translate.rs | 69 +- datafusion/functions/src/utils.rs | 153 +- datafusion/macros/Cargo.toml | 2 +- datafusion/optimizer/src/analyzer/type_coercion.rs | 185 ++- datafusion/optimizer/src/decorrelate.rs | 10 +- .../src/decorrelate_predicate_subquery.rs | 72 +- datafusion/optimizer/src/eliminate_cross_join.rs | 3 + datafusion/optimizer/src/eliminate_outer_join.rs | 1 + .../optimizer/src/extract_equijoin_predicate.rs | 4 + datafusion/optimizer/src/optimizer.rs | 28 +- .../src/simplify_expressions/expr_simplifier.rs | 213 +-- .../optimizer/src/simplify_expressions/mod.rs | 3 +- .../src/simplify_expressions/simplify_exprs.rs | 13 +- .../src/simplify_expressions/simplify_literal.rs | 148 ++ .../src/simplify_expressions/unwrap_cast.rs | 22 +- .../optimizer/src/simplify_expressions/utils.rs | 48 + .../physical-expr-adapter/src/schema_rewriter.rs | 4 +- datafusion/physical-expr-common/Cargo.toml | 3 + datafusion/physical-expr-common/src/lib.rs | 1 + .../src/metrics/baseline.rs | 4 +- .../src/metrics/builder.rs | 2 +- .../src/metrics/custom.rs | 2 +- .../physical-expr-common/src/metrics/expression.rs | 88 ++ .../src/metrics/mod.rs | 4 +- .../src/metrics/value.rs | 0 datafusion/physical-expr-common/src/sort_expr.rs | 21 +- datafusion/physical-expr-common/src/utils.rs | 17 +- datafusion/physical-expr/Cargo.toml | 4 + datafusion/physical-expr/benches/in_list.rs | 77 + datafusion/physical-expr/src/equivalence/class.rs | 8 +- datafusion/physical-expr/src/expressions/binary.rs | 94 ++ datafusion/physical-expr/src/expressions/case.rs | 279 +--- .../src/expressions/dynamic_filters.rs | 189 ++- .../physical-expr/src/expressions/in_list.rs | 302 +++- datafusion/physical-expr/src/partitioning.rs | 586 ++++++- datafusion/physical-expr/src/planner.rs | 33 +- datafusion/physical-expr/src/projection.rs | 317 +++- datafusion/physical-expr/src/scalar_function.rs | 8 +- datafusion/physical-expr/src/utils/mod.rs | 2 +- .../physical-optimizer/src/coalesce_batches.rs | 87 - .../physical-optimizer/src/enforce_distribution.rs | 86 +- .../physical-optimizer/src/join_selection.rs | 28 +- datafusion/physical-optimizer/src/lib.rs | 1 - datafusion/physical-optimizer/src/optimizer.rs | 4 - .../physical-optimizer/src/sanity_checker.rs | 3 +- .../physical-optimizer/src/topk_aggregation.rs | 21 +- datafusion/physical-plan/Cargo.toml | 1 + .../src/aggregates/group_values/mod.rs | 4 +- .../group_values/multi_group_by/bytes_view.rs | 159 +- .../aggregates/group_values/multi_group_by/mod.rs | 24 +- .../src/aggregates/group_values/row.rs | 9 +- .../group_values/single_group_by/boolean.rs | 3 +- .../group_values/single_group_by/bytes.rs | 4 +- .../group_values/single_group_by/bytes_view.rs | 4 +- .../group_values/single_group_by/primitive.rs | 8 +- datafusion/physical-plan/src/aggregates/mod.rs | 284 +++- .../physical-plan/src/aggregates/row_hash.rs | 353 +++-- .../src/aggregates/topk/hash_table.rs | 416 +++-- .../physical-plan/src/aggregates/topk/heap.rs | 230 ++- .../src/aggregates/topk/priority_map.rs | 129 +- .../physical-plan/src/aggregates/topk_stream.rs | 14 + datafusion/physical-plan/src/async_func.rs | 118 +- datafusion/physical-plan/src/coalesce_batches.rs | 20 + datafusion/physical-plan/src/coop.rs | 39 +- datafusion/physical-plan/src/filter.rs | 5 +- datafusion/physical-plan/src/joins/array_map.rs | 547 +++++++ datafusion/physical-plan/src/joins/chain.rs | 69 + .../physical-plan/src/joins/hash_join/exec.rs | 1338 +++++++++++++--- .../physical-plan/src/joins/hash_join/mod.rs | 2 +- .../src/joins/hash_join/partitioned_hash_eval.rs | 458 +++++- .../src/joins/hash_join/shared_bounds.rs | 41 +- .../physical-plan/src/joins/hash_join/stream.rs | 163 +- .../physical-plan/src/joins/join_hash_map.rs | 117 +- datafusion/physical-plan/src/joins/mod.rs | 29 +- .../physical-plan/src/joins/nested_loop_join.rs | 33 +- .../src/joins/sort_merge_join/exec.rs | 18 +- .../src/joins/sort_merge_join/tests.rs | 74 + .../physical-plan/src/joins/stream_join_utils.rs | 14 +- datafusion/physical-plan/src/joins/test_utils.rs | 1 + datafusion/physical-plan/src/joins/utils.rs | 30 +- datafusion/physical-plan/src/memory.rs | 54 + datafusion/physical-plan/src/metrics.rs | 6 +- datafusion/physical-plan/src/projection.rs | 77 +- datafusion/physical-plan/src/recursive_query.rs | 4 +- datafusion/physical-plan/src/repartition/mod.rs | 128 +- .../physical-plan/src/sorts/multi_level_merge.rs | 12 +- datafusion/physical-plan/src/sorts/sort.rs | 465 +++++- .../src/sorts/sort_preserving_merge.rs | 18 +- .../physical-plan/src/spill/spill_manager.rs | 5 +- datafusion/physical-plan/src/stream.rs | 187 ++- datafusion/physical-plan/src/test.rs | 1 + .../physical-plan/src/windows/window_agg_exec.rs | 38 +- datafusion/physical-plan/src/work_table.rs | 103 +- datafusion/proto-common/src/from_proto/mod.rs | 25 +- datafusion/proto-common/src/to_proto/mod.rs | 2 +- datafusion/proto/Cargo.toml | 3 - datafusion/proto/proto/datafusion.proto | 12 + datafusion/proto/src/generated/pbjson.rs | 225 +++ datafusion/proto/src/generated/prost.rs | 21 +- datafusion/proto/src/logical_plan/file_formats.rs | 7 +- datafusion/proto/src/physical_plan/from_proto.rs | 76 +- datafusion/proto/src/physical_plan/mod.rs | 6 + datafusion/proto/src/physical_plan/to_proto.rs | 16 +- .../proto/tests/cases/roundtrip_logical_plan.rs | 15 +- .../proto/tests/cases/roundtrip_physical_plan.rs | 53 +- datafusion/spark/Cargo.toml | 5 + datafusion/spark/benches/space.rs | 73 + datafusion/spark/src/function/aggregate/collect.rs | 200 +++ datafusion/spark/src/function/aggregate/mod.rs | 19 +- datafusion/spark/src/function/array/mod.rs | 9 +- datafusion/spark/src/function/array/repeat.rs | 128 ++ datafusion/spark/src/function/collection/mod.rs | 13 +- datafusion/spark/src/function/collection/size.rs | 162 ++ datafusion/spark/src/function/conditional/if.rs | 2 +- datafusion/spark/src/function/datetime/date_add.rs | 25 +- datafusion/spark/src/function/datetime/date_sub.rs | 25 +- datafusion/spark/src/function/datetime/extract.rs | 268 ++++ datafusion/spark/src/function/datetime/mod.rs | 18 + datafusion/spark/src/function/hash/crc32.rs | 43 +- datafusion/spark/src/function/hash/sha1.rs | 27 +- datafusion/spark/src/function/math/abs.rs | 78 +- datafusion/spark/src/function/math/hex.rs | 27 +- datafusion/spark/src/function/mod.rs | 1 + datafusion/spark/src/function/null_utils.rs | 122 ++ datafusion/spark/src/function/string/ascii.rs | 77 +- datafusion/spark/src/function/string/concat.rs | 110 +- datafusion/spark/src/function/string/elt.rs | 67 +- .../spark/src/function/string/format_string.rs | 63 +- datafusion/spark/src/function/string/like.rs | 92 +- datafusion/spark/src/function/string/mod.rs | 4 + datafusion/spark/src/function/string/space.rs | 232 +++ datafusion/sql/src/expr/function.rs | 37 +- datafusion/sql/src/expr/identifier.rs | 4 +- datafusion/sql/src/expr/value.rs | 130 +- datafusion/sql/src/planner.rs | 2 +- datafusion/sql/src/query.rs | 1 + datafusion/sql/src/relation/mod.rs | 8 +- datafusion/sql/src/resolve.rs | 162 +- datafusion/sql/src/select.rs | 8 +- datafusion/sql/src/statement.rs | 192 ++- datafusion/sql/src/unparser/ast.rs | 46 +- datafusion/sql/src/unparser/dialect.rs | 45 +- datafusion/sql/src/unparser/expr.rs | 44 +- datafusion/sql/src/unparser/plan.rs | 36 +- datafusion/sql/src/utils.rs | 25 +- datafusion/sql/src/values.rs | 8 +- datafusion/sql/tests/cases/plan_to_sql.rs | 80 +- datafusion/sql/tests/common/mod.rs | 12 +- datafusion/sql/tests/sql_integration.rs | 42 +- datafusion/sqllogictest/Cargo.toml | 7 +- datafusion/sqllogictest/bin/postgres_container.rs | 6 +- datafusion/sqllogictest/src/engines/conversion.rs | 6 +- .../src/engines/postgres_engine/mod.rs | 131 +- .../src/engines/postgres_engine/types.rs | 45 - datafusion/sqllogictest/src/test_context.rs | 165 +- datafusion/sqllogictest/test_files/aggregate.slt | 501 +++++- .../test_files/aggregate_skip_partial.slt | 17 +- .../sqllogictest/test_files/aggregates_topk.slt | 88 ++ datafusion/sqllogictest/test_files/array.slt | 32 + .../sqllogictest/test_files/arrow_typeof.slt | 5 +- datafusion/sqllogictest/test_files/async_udf.slt | 12 +- datafusion/sqllogictest/test_files/case.slt | 5 +- datafusion/sqllogictest/test_files/cte.slt | 69 +- .../test_files/cte_quoted_reference.slt | 70 + .../test_files/datetime/arith_date_date.slt | 15 + .../test_files/datetime/arith_date_integer.slt | 89 ++ .../test_files/datetime/arith_date_interval.slt | 37 + .../test_files/datetime/arith_date_time.slt | 116 ++ .../test_files/datetime/arith_interval_double.slt | 41 + .../datetime/arith_interval_interval.slt | 27 + .../test_files/datetime/arith_negate_interval.slt | 13 + .../test_files/datetime/arith_time_interval.slt | 70 + .../test_files/datetime/arith_time_time.slt | 47 + .../datetime/arith_timestamp_duration.slt | 147 ++ .../datetime/arith_timestamp_interval.slt | 36 + .../datetime/arith_timestamp_timestamp.slt | 13 + .../{ => datetime}/current_date_timezone.slt | 0 .../{ => datetime}/current_time_timezone.slt | 0 .../test_files/{expr => datetime}/date_part.slt | 0 .../test_files/{ => datetime}/dates.slt | 14 +- .../test_files/{ => datetime}/interval.slt | 0 .../test_files/{ => datetime}/interval_mysql.slt | 0 .../test_files/{ => datetime}/timestamps.slt | 1245 ++++++++++++++- datafusion/sqllogictest/test_files/decimal.slt | 173 +- datafusion/sqllogictest/test_files/delete.slt | 16 +- datafusion/sqllogictest/test_files/dml_delete.slt | 202 +++ datafusion/sqllogictest/test_files/dml_update.slt | 286 ++++ .../test_files/dynamic_filter_pushdown_config.slt | 326 ---- datafusion/sqllogictest/test_files/encoding.slt | 143 +- datafusion/sqllogictest/test_files/errors.slt | 4 +- datafusion/sqllogictest/test_files/explain.slt | 4 - .../sqllogictest/test_files/explain_analyze.slt | 41 + .../sqllogictest/test_files/information_schema.slt | 63 +- datafusion/sqllogictest/test_files/join.slt.part | 8 +- datafusion/sqllogictest/test_files/joins.slt | 13 +- datafusion/sqllogictest/test_files/math.slt | 2 +- datafusion/sqllogictest/test_files/metadata.slt | 54 +- .../sqllogictest/test_files/named_arguments.slt | 3 +- .../test_files/null_aware_anti_join.slt | 453 ++++++ datafusion/sqllogictest/test_files/order.slt | 11 + datafusion/sqllogictest/test_files/parquet.slt | 4 + .../test_files/parquet_filter_pushdown.slt | 111 ++ .../sqllogictest/test_files/repartition_scan.slt | 4 + .../test_files/repartition_subset_satisfaction.slt | 526 +++++++ .../sqllogictest/test_files/run_end_encoded.slt | 57 + datafusion/sqllogictest/test_files/scalar.slt | 130 +- .../sqllogictest/test_files/schema_evolution.slt | 144 ++ .../sqllogictest/test_files/set_variable.slt | 18 + .../sqllogictest/test_files/simplify_expr.slt | 18 + .../sqllogictest/test_files/sort_merge_join.slt | 2 +- .../sqllogictest/test_files/sort_pushdown.slt | 886 +++++++++++ .../test_files/spark/aggregate/collect.slt | 93 ++ .../test_files/spark/array/array_repeat.slt | 77 +- .../test_files/spark/collection/size.slt | 132 ++ .../test_files/spark/datetime/date_add.slt | 12 +- .../test_files/spark/datetime/hour.slt | 23 +- .../test_files/spark/datetime/minute.slt | 23 +- .../test_files/spark/datetime/second.slt | 23 +- .../sqllogictest/test_files/spark/hash/crc32.slt | 6 +- .../sqllogictest/test_files/spark/string/space.slt | 35 +- .../test_files/string/string_query.slt.part | 10 +- datafusion/sqllogictest/test_files/struct.slt | 210 ++- .../sqllogictest/test_files/table_functions.slt | 24 + .../test_files/to_timestamp_timezone.slt | 204 +++ .../test_files/tpch/plans/q16.slt.part | 11 +- .../test_files/tpch/plans/q18.slt.part | 34 +- .../sqllogictest/test_files/tpch/plans/q3.slt.part | 30 +- datafusion/sqllogictest/test_files/unnest.slt | 30 +- datafusion/sqllogictest/test_files/update.slt | 20 +- datafusion/substrait/src/physical_plan/consumer.rs | 12 +- datafusion/substrait/src/serializer.rs | 1 + datafusion/wasmtest/src/lib.rs | 11 +- dev/changelog/52.0.0.md | 745 +++++++++ docs/requirements.txt | 4 +- docs/source/_static/theme_overrides.css | 18 + docs/source/contributor-guide/architecture.md | 2 +- docs/source/contributor-guide/communication.md | 68 +- docs/source/index.rst | 3 +- docs/source/library-user-guide/upgrading.md | 348 ++-- docs/source/user-guide/cli/functions.md | 50 + docs/source/user-guide/configs.md | 253 +-- docs/source/user-guide/introduction.md | 5 +- docs/source/user-guide/sql/format_options.md | 102 +- docs/source/user-guide/sql/scalar_functions.md | 245 ++- 602 files changed, 41458 insertions(+), 14279 deletions(-) --------------------------------------------------------------------- To unsubscribe, e-mail: [email protected] For additional commands, e-mail: [email protected]
