This is an automated email from the ASF dual-hosted git repository. blaginin pushed a commit to branch main in repository https://gitbox.apache.org/repos/asf/datafusion-sandbox.git
commit 6b83141a35ba7cdeae6944c5fd9f749fdcde23af Merge: b9b756e0d 545c37fbc Author: blaginin <[email protected]> AuthorDate: Mon Feb 2 09:54:58 2026 +0000 Merge branch 'main' into sanbox-main # Conflicts: # .github/workflows/rust.yml .github/dependabot.yml | 2 +- .github/workflows/audit.yml | 2 +- .github/workflows/rust.yml | 19 +- Cargo.lock | 176 ++-- Cargo.toml | 80 +- benchmarks/src/clickbench.rs | 36 +- ci/scripts/check_examples_docs.sh | 85 +- datafusion-cli/src/exec.rs | 2 +- datafusion-examples/Cargo.toml | 7 +- datafusion-examples/README.md | 28 +- .../examples/builtin_functions/main.rs | 12 +- .../custom_data_source/adapter_serialization.rs | 519 +++++++++ .../custom_data_source/custom_file_casts.rs | 8 +- .../custom_data_source/default_column_values.rs | 8 +- .../custom_data_source/file_stream_provider.rs | 3 +- .../examples/custom_data_source/main.rs | 36 +- datafusion-examples/examples/data_io/catalog.rs | 8 - .../examples/data_io/json_shredding.rs | 8 +- datafusion-examples/examples/data_io/main.rs | 40 +- .../examples/data_io/parquet_encrypted.rs | 2 +- datafusion-examples/examples/dataframe/main.rs | 11 +- .../examples/execution_monitoring/main.rs | 12 +- .../examples/external_dependency/main.rs | 8 +- datafusion-examples/examples/flight/main.rs | 12 +- datafusion-examples/examples/flight/sql_server.rs | 5 - .../examples/proto/expression_deduplication.rs | 275 +++++ datafusion-examples/examples/proto/main.rs | 14 +- .../examples/query_planning/main.rs | 32 +- .../examples/relation_planner/main.rs | 12 +- datafusion-examples/examples/sql_ops/main.rs | 16 +- datafusion-examples/examples/udf/main.rs | 32 +- datafusion-examples/src/bin/examples-docs.rs | 45 + datafusion-examples/src/utils/examples_docs.rs | 684 ++++++++++++ datafusion-examples/src/utils/mod.rs | 1 + datafusion/catalog-listing/src/mod.rs | 1 - datafusion/catalog-listing/src/table.rs | 3 +- datafusion/catalog/src/information_schema.rs | 63 +- datafusion/catalog/src/lib.rs | 1 - datafusion/common-runtime/src/lib.rs | 1 - datafusion/common/src/config.rs | 2 +- datafusion/common/src/lib.rs | 1 - datafusion/core/Cargo.toml | 6 +- datafusion/core/benches/data_utils/mod.rs | 2 + .../core/benches/preserve_file_partitioning.rs | 2 +- datafusion/core/src/bin/print_functions_docs.rs | 27 +- datafusion/core/src/dataframe/mod.rs | 35 +- datafusion/core/src/dataframe/parquet.rs | 156 ++- .../core/src/datasource/file_format/parquet.rs | 7 +- datafusion/core/src/datasource/mod.rs | 6 +- .../core/src/datasource/physical_plan/parquet.rs | 96 +- datafusion/core/src/execution/context/mod.rs | 2 +- datafusion/core/src/lib.rs | 1 - datafusion/core/src/physical_planner.rs | 27 +- .../tests/custom_sources_cases/dml_planning.rs | 2 +- datafusion/core/tests/execution/coop.rs | 2 +- datafusion/core/tests/fifo/mod.rs | 4 +- .../aggregation_fuzzer/context_generator.rs | 2 +- .../fuzz_cases/aggregation_fuzzer/query_builder.rs | 6 +- datafusion/core/tests/fuzz_cases/join_fuzz.rs | 2 +- .../fuzz_cases/sort_preserving_repartition_fuzz.rs | 2 +- datafusion/core/tests/macro_hygiene/mod.rs | 4 +- datafusion/core/tests/parquet/expr_adapter.rs | 6 +- datafusion/core/tests/parquet/filter_pushdown.rs | 1 - datafusion/core/tests/parquet/page_pruning.rs | 60 +- .../{filter_pushdown/mod.rs => filter_pushdown.rs} | 8 +- .../tests/physical_optimizer/limit_pushdown.rs | 398 +++++-- datafusion/core/tests/physical_optimizer/mod.rs | 3 +- .../{filter_pushdown/util.rs => pushdown_utils.rs} | 37 +- datafusion/core/tests/sql/explain_analyze.rs | 5 +- .../user_defined/user_defined_window_functions.rs | 2 +- datafusion/datasource-arrow/src/mod.rs | 1 - datafusion/datasource-avro/src/mod.rs | 1 - datafusion/datasource-csv/src/mod.rs | 1 - datafusion/datasource-json/src/mod.rs | 1 - datafusion/datasource-parquet/src/file_format.rs | 10 +- datafusion/datasource-parquet/src/metadata.rs | 53 +- datafusion/datasource-parquet/src/metrics.rs | 11 +- datafusion/datasource-parquet/src/mod.rs | 1 - datafusion/datasource-parquet/src/opener.rs | 10 +- datafusion/datasource-parquet/src/page_filter.rs | 25 +- datafusion/datasource-parquet/src/row_filter.rs | 2 + datafusion/datasource/src/file_sink_config.rs | 48 + datafusion/datasource/src/mod.rs | 1 - datafusion/datasource/src/write/demux.rs | 5 +- datafusion/doc/src/lib.rs | 1 - datafusion/execution/src/config.rs | 6 + datafusion/execution/src/lib.rs | 1 - datafusion/execution/src/memory_pool/mod.rs | 92 +- datafusion/execution/src/memory_pool/pool.rs | 38 +- datafusion/expr-common/src/lib.rs | 1 - datafusion/expr/src/expr.rs | 2 +- datafusion/expr/src/expr_schema.rs | 254 ++--- datafusion/expr/src/lib.rs | 1 - datafusion/expr/src/type_coercion/functions.rs | 282 +++-- datafusion/expr/src/udf.rs | 101 +- datafusion/expr/src/utils.rs | 27 +- datafusion/ffi/src/lib.rs | 1 - datafusion/functions-aggregate-common/src/lib.rs | 2 - .../functions-aggregate-common/src/tdigest.rs | 54 +- .../functions-aggregate/benches/array_agg.rs | 2 +- datafusion/functions-aggregate/benches/count.rs | 2 +- .../functions-aggregate/src/approx_median.rs | 2 +- .../src/approx_percentile_cont.rs | 8 +- datafusion/functions-aggregate/src/lib.rs | 2 - datafusion/functions-nested/Cargo.toml | 8 + .../functions-nested/benches/array_remove.rs | 573 ++++++++++ .../functions-nested/benches/array_repeat.rs | 477 +++++++++ datafusion/functions-nested/src/lib.rs | 2 - datafusion/functions-nested/src/make_array.rs | 12 +- datafusion/functions-nested/src/remove.rs | 127 ++- datafusion/functions-nested/src/repeat.rs | 175 ++-- datafusion/functions-nested/src/sort.rs | 36 +- datafusion/functions-table/src/generate_series.rs | 37 +- datafusion/functions-table/src/lib.rs | 2 - datafusion/functions-window-common/src/lib.rs | 2 - datafusion/functions-window/src/lib.rs | 1 - datafusion/functions-window/src/nth_value.rs | 2 +- datafusion/functions/Cargo.toml | 9 +- datafusion/functions/benches/chr.rs | 31 +- datafusion/functions/benches/date_trunc.rs | 11 +- datafusion/functions/benches/encoding.rs | 20 +- datafusion/functions/benches/left.rs | 141 ++- datafusion/functions/benches/repeat.rs | 39 + datafusion/functions/benches/right.rs | 150 +++ datafusion/functions/benches/trim.rs | 4 +- datafusion/functions/src/datetime/date_trunc.rs | 26 +- datafusion/functions/src/datetime/to_local_time.rs | 2 +- datafusion/functions/src/datetime/to_timestamp.rs | 15 +- datafusion/functions/src/datetime/to_unixtime.rs | 7 +- datafusion/functions/src/encoding/inner.rs | 37 +- datafusion/functions/src/lib.rs | 2 - datafusion/functions/src/math/abs.rs | 4 +- datafusion/functions/src/math/floor.rs | 264 ++++- datafusion/functions/src/math/iszero.rs | 124 ++- datafusion/functions/src/math/nans.rs | 173 ++- datafusion/functions/src/string/chr.rs | 126 ++- datafusion/functions/src/string/repeat.rs | 164 ++- datafusion/functions/src/unicode/left.rs | 249 ++++- datafusion/functions/src/unicode/right.rs | 245 ++++- datafusion/functions/src/utils.rs | 22 +- datafusion/macros/Cargo.toml | 2 +- datafusion/macros/src/user_doc.rs | 1 - datafusion/optimizer/src/analyzer/type_coercion.rs | 66 +- datafusion/optimizer/src/lib.rs | 1 - datafusion/optimizer/src/push_down_filter.rs | 22 +- .../src/simplify_expressions/udf_preimage.rs | 1 - datafusion/physical-expr-adapter/src/lib.rs | 2 - .../physical-expr-adapter/src/schema_rewriter.rs | 65 +- .../physical-expr-common/src/binary_view_map.rs | 195 +++- datafusion/physical-expr-common/src/lib.rs | 2 - .../physical-expr-common/src/metrics/value.rs | 27 +- datafusion/physical-expr/benches/case_when.rs | 109 ++ .../src/expressions/dynamic_filters.rs | 17 +- datafusion/physical-expr/src/lib.rs | 2 - .../src/simplifier/const_evaluator.rs | 19 +- datafusion/physical-expr/src/simplifier/mod.rs | 4 +- .../physical-optimizer/src/aggregate_statistics.rs | 6 +- datafusion/physical-optimizer/src/lib.rs | 2 - .../physical-optimizer/src/limit_pushdown.rs | 3 +- .../physical-optimizer/src/projection_pushdown.rs | 17 +- .../physical-optimizer/src/update_aggr_exprs.rs | 6 +- datafusion/physical-plan/src/aggregates/mod.rs | 294 +++++- .../physical-plan/src/aggregates/no_grouping.rs | 25 +- .../physical-plan/src/aggregates/row_hash.rs | 72 +- datafusion/physical-plan/src/joins/cross_join.rs | 2 +- .../physical-plan/src/joins/hash_join/exec.rs | 10 - .../physical-plan/src/joins/nested_loop_join.rs | 4 +- .../src/joins/piecewise_merge_join/exec.rs | 2 +- datafusion/physical-plan/src/lib.rs | 2 - datafusion/physical-plan/src/sorts/sort.rs | 4 +- datafusion/physical-plan/src/sorts/stream.rs | 4 +- datafusion/physical-plan/src/stream.rs | 4 +- datafusion/physical-plan/src/work_table.rs | 2 +- datafusion/proto-common/src/generated/mod.rs | 1 + datafusion/proto-common/src/lib.rs | 1 - datafusion/proto/proto/datafusion.proto | 13 + datafusion/proto/src/bytes/mod.rs | 56 +- datafusion/proto/src/generated/mod.rs | 4 +- datafusion/proto/src/generated/pbjson.rs | 97 ++ datafusion/proto/src/generated/prost.rs | 39 + datafusion/proto/src/lib.rs | 1 - datafusion/proto/src/physical_plan/from_proto.rs | 169 ++- datafusion/proto/src/physical_plan/mod.rs | 1105 ++++++++++++-------- datafusion/proto/src/physical_plan/to_proto.rs | 174 ++- .../proto/tests/cases/roundtrip_physical_plan.rs | 237 ++++- datafusion/pruning/src/lib.rs | 1 - datafusion/session/src/lib.rs | 1 - datafusion/spark/Cargo.toml | 9 + datafusion/spark/src/function/array/spark_array.rs | 17 +- .../src/function/datetime/from_utc_timestamp.rs | 195 ++++ datafusion/spark/src/function/datetime/mod.rs | 60 ++ .../src/function/datetime/to_utc_timestamp.rs | 225 ++++ datafusion/spark/src/function/datetime/unix.rs | 174 +++ datafusion/spark/src/function/hash/sha2.rs | 249 ++--- datafusion/spark/src/function/math/abs.rs | 356 +++++-- datafusion/spark/src/function/string/base64.rs | 183 ++++ datafusion/spark/src/function/string/concat.rs | 46 +- datafusion/spark/src/function/string/mod.rs | 15 + datafusion/spark/src/lib.rs | 22 +- datafusion/spark/src/session_state.rs | 111 ++ datafusion/sql/src/lib.rs | 1 - datafusion/sqllogictest/Cargo.toml | 2 +- datafusion/sqllogictest/src/test_context.rs | 16 +- datafusion/sqllogictest/src/util.rs | 2 +- datafusion/sqllogictest/test_files/aggregate.slt | 21 +- datafusion/sqllogictest/test_files/array.slt | 43 + datafusion/sqllogictest/test_files/clickbench.slt | 34 +- .../test_files/dynamic_filter_pushdown_config.slt | 2 +- datafusion/sqllogictest/test_files/encoding.slt | 77 +- datafusion/sqllogictest/test_files/expr.slt | 10 + .../sqllogictest/test_files/information_schema.slt | 2 +- .../test_files/join_disable_repartition_joins.slt | 20 +- datafusion/sqllogictest/test_files/joins.slt | 7 +- datafusion/sqllogictest/test_files/limit.slt | 4 +- .../sqllogictest/test_files/limit_pruning.slt | 4 +- datafusion/sqllogictest/test_files/math.slt | 32 + .../test_files/projection_pushdown.slt | 331 +++++- .../sqllogictest/test_files/push_down_filter.slt | 254 +++++ datafusion/sqllogictest/test_files/scalar.slt | 4 +- datafusion/sqllogictest/test_files/spark/README.md | 12 + .../spark/datetime/from_utc_timestamp.slt | 156 +++ .../test_files/spark/datetime/to_utc_timestamp.slt | 150 ++- .../test_files/spark/datetime/unix.slt | 134 +++ .../sqllogictest/test_files/spark/hash/sha2.slt | 55 + .../sqllogictest/test_files/spark/math/abs.slt | 202 +++- .../test_files/spark/string/base64.slt | 113 +- .../test_files/spark/string/concat.slt | 24 + .../test_files/spark/string/unbase64.slt | 27 - .../sqllogictest/test_files/table_functions.slt | 30 +- datafusion/sqllogictest/test_files/union.slt | 35 +- datafusion/substrait/Cargo.toml | 2 +- datafusion/substrait/src/lib.rs | 1 - datafusion/wasmtest/src/lib.rs | 7 +- dev/changelog/52.1.0.md | 46 + dev/update_datafusion_versions.py | 8 +- docs/requirements.txt | 2 +- docs/source/_static/favicon.svg | 10 + docs/source/conf.py | 2 + docs/source/contributor-guide/howtos.md | 2 +- docs/source/contributor-guide/testing.md | 1 + docs/source/download.md | 2 +- docs/source/library-user-guide/extending-sql.md | 2 +- .../library-user-guide/functions/adding-udfs.md | 1 - docs/source/library-user-guide/query-optimizer.md | 2 + .../source/library-user-guide/table-constraints.md | 6 +- docs/source/library-user-guide/upgrading.md | 133 ++- docs/source/user-guide/arrow-introduction.md | 9 +- docs/source/user-guide/concepts-readings-events.md | 8 +- docs/source/user-guide/configs.md | 4 +- docs/source/user-guide/crate-configuration.md | 3 +- docs/source/user-guide/example-usage.md | 6 +- docs/source/user-guide/explain-usage.md | 1 + docs/source/user-guide/sql/data_types.md | 80 +- docs/source/user-guide/sql/format_options.md | 2 +- docs/source/user-guide/sql/scalar_functions.md | 23 +- test-utils/src/data_gen.rs | 2 +- 256 files changed, 11408 insertions(+), 2907 deletions(-) diff --cc .github/workflows/rust.yml index e9a6a3857,393625361..cae4ed09d --- a/.github/workflows/rust.yml +++ b/.github/workflows/rust.yml @@@ -747,10 -709,14 +747,15 @@@ jobs ./dev/update_function_docs.sh git diff --exit-code + # This job ensures `datafusion-examples/README.md` stays in sync with the source code: + # 1. Generates README automatically using the Rust examples docs generator + # (parsing documentation from `examples/<group>/main.rs`) + # 2. Formats the generated Markdown using DataFusion's standard Prettier setup + # 3. Compares the result against the committed README.md and fails if out-of-date examples-docs-check: name: check example README is up-to-date - needs: linux-build-lib + needs: [linux-build-lib, check-files] + if: needs.check-files.outputs.should_skip != 'true' runs-on: ubuntu-latest container: image: amd64/rust @@@ -760,7 -726,17 +765,17 @@@ with: submodules: true fetch-depth: 1 - + + - name: Mark repository as safe for git + # Required for git commands inside container (avoids "dubious ownership" error) + run: git config --global --add safe.directory "$GITHUB_WORKSPACE" + + - name: Set up Node.js (required for prettier) + # doc_prettier_check.sh uses npx to run prettier for Markdown formatting + uses: actions/setup-node@v4 + with: + node-version: '18' + - name: Run examples docs check script run: | bash ci/scripts/check_examples_docs.sh --------------------------------------------------------------------- To unsubscribe, e-mail: [email protected] For additional commands, e-mail: [email protected]
