This is an automated email from the ASF dual-hosted git repository.
comphead pushed a commit to branch df52
in repository https://gitbox.apache.org/repos/asf/datafusion-comet.git
The following commit(s) were added to refs/heads/df52 by this push:
new e35812e00 chore: [Df52] migration - fix failing tests (#3507)
e35812e00 is described below
commit e35812e00e5e9e0198bc2b641c569b64add57111
Author: Oleks V <[email protected]>
AuthorDate: Sat Feb 14 15:05:13 2026 -0800
chore: [Df52] migration - fix failing tests (#3507)
---
dev/diffs/3.5.8.diff | 2 +-
native/Cargo.lock | 241 +++++++++++----------
native/core/src/execution/planner.rs | 7 +-
native/core/src/parquet/parquet_exec.rs | 4 +-
native/core/src/parquet/schema_adapter.rs | 130 ++++++-----
.../org/apache/comet/shims/CometExprShim.scala | 2 +-
6 files changed, 192 insertions(+), 194 deletions(-)
diff --git a/dev/diffs/3.5.8.diff b/dev/diffs/3.5.8.diff
index beef44549..3c81e01a3 100644
--- a/dev/diffs/3.5.8.diff
+++ b/dev/diffs/3.5.8.diff
@@ -2832,7 +2832,7 @@ index d675503a8ba..f220892396e 100644
+ }
assert(bucketedScan.length == expectedNumBucketedScan)
}
-
+
diff --git
a/sql/core/src/test/scala/org/apache/spark/sql/streaming/FileStreamSinkSuite.scala
b/sql/core/src/test/scala/org/apache/spark/sql/streaming/FileStreamSinkSuite.scala
index 7f6fa2a123e..c778b4e2c48 100644
---
a/sql/core/src/test/scala/org/apache/spark/sql/streaming/FileStreamSinkSuite.scala
diff --git a/native/Cargo.lock b/native/Cargo.lock
index d1c8acf52..4d828a79e 100644
--- a/native/Cargo.lock
+++ b/native/Cargo.lock
@@ -420,9 +420,9 @@ dependencies = [
[[package]]
name = "async-compression"
-version = "0.4.37"
+version = "0.4.39"
source = "registry+https://github.com/rust-lang/crates.io-index"
-checksum = "d10e4f991a553474232bc0a31799f6d24b034a84c0971d80d2e2f78b2e576e40"
+checksum = "68650b7df54f0293fd061972a0fb05aaf4fc0879d3b3d21a638a182c5c543b9f"
dependencies = [
"compression-codecs",
"compression-core",
@@ -528,7 +528,7 @@ checksum =
"9035ad2d096bed7955a320ee7e2230574d28fd3c3a0f186cbea1ff3c7eed5dbb"
dependencies = [
"proc-macro2",
"quote",
- "syn 2.0.114",
+ "syn 2.0.115",
]
[[package]]
@@ -554,9 +554,9 @@ checksum =
"c08606f8c3cbf4ce6ec8e28fb0014a2c086708fe954eaa885384a6165172e7e8"
[[package]]
name = "aws-config"
-version = "1.8.13"
+version = "1.8.14"
source = "registry+https://github.com/rust-lang/crates.io-index"
-checksum = "c456581cb3c77fafcc8c67204a70680d40b61112d6da78c77bd31d945b65f1b5"
+checksum = "8a8fc176d53d6fe85017f230405e3255cedb4a02221cb55ed6d76dccbbb099b2"
dependencies = [
"aws-credential-types",
"aws-runtime",
@@ -584,9 +584,9 @@ dependencies = [
[[package]]
name = "aws-credential-types"
-version = "1.2.11"
+version = "1.2.12"
source = "registry+https://github.com/rust-lang/crates.io-index"
-checksum = "3cd362783681b15d136480ad555a099e82ecd8e2d10a841e14dfd0078d67fee3"
+checksum = "e26bbf46abc608f2dc61fd6cb3b7b0665497cc259a21520151ed98f8b37d2c79"
dependencies = [
"aws-smithy-async",
"aws-smithy-runtime-api",
@@ -606,9 +606,9 @@ dependencies = [
[[package]]
name = "aws-lc-sys"
-version = "0.37.0"
+version = "0.37.1"
source = "registry+https://github.com/rust-lang/crates.io-index"
-checksum = "5c34dda4df7017c8db52132f0f8a2e0f8161649d15723ed63fc00c82d0f2081a"
+checksum = "b092fe214090261288111db7a2b2c2118e5a7f30dc2569f1732c4069a6840549"
dependencies = [
"cc",
"cmake",
@@ -618,9 +618,9 @@ dependencies = [
[[package]]
name = "aws-runtime"
-version = "1.6.0"
+version = "1.7.0"
source = "registry+https://github.com/rust-lang/crates.io-index"
-checksum = "c635c2dc792cb4a11ce1a4f392a925340d1bdf499289b5ec1ec6810954eb43f5"
+checksum = "b0f92058d22a46adf53ec57a6a96f34447daf02bff52e8fb956c66bcd5c6ac12"
dependencies = [
"aws-credential-types",
"aws-sigv4",
@@ -631,6 +631,7 @@ dependencies = [
"aws-smithy-types",
"aws-types",
"bytes",
+ "bytes-utils",
"fastrand",
"http 1.4.0",
"http-body 1.0.1",
@@ -642,9 +643,9 @@ dependencies = [
[[package]]
name = "aws-sdk-sso"
-version = "1.93.0"
+version = "1.94.0"
source = "registry+https://github.com/rust-lang/crates.io-index"
-checksum = "9dcb38bb33fc0a11f1ffc3e3e85669e0a11a37690b86f77e75306d8f369146a0"
+checksum = "699da1961a289b23842d88fe2984c6ff68735fdf9bdcbc69ceaeb2491c9bf434"
dependencies = [
"aws-credential-types",
"aws-runtime",
@@ -666,9 +667,9 @@ dependencies = [
[[package]]
name = "aws-sdk-ssooidc"
-version = "1.95.0"
+version = "1.96.0"
source = "registry+https://github.com/rust-lang/crates.io-index"
-checksum = "2ada8ffbea7bd1be1f53df1dadb0f8fdb04badb13185b3321b929d1ee3caad09"
+checksum = "e3e3a4cb3b124833eafea9afd1a6cc5f8ddf3efefffc6651ef76a03cbc6b4981"
dependencies = [
"aws-credential-types",
"aws-runtime",
@@ -690,9 +691,9 @@ dependencies = [
[[package]]
name = "aws-sdk-sts"
-version = "1.97.0"
+version = "1.98.0"
source = "registry+https://github.com/rust-lang/crates.io-index"
-checksum = "e6443ccadc777095d5ed13e21f5c364878c9f5bad4e35187a6cdbd863b0afcad"
+checksum = "89c4f19655ab0856375e169865c91264de965bd74c407c7f1e403184b1049409"
dependencies = [
"aws-credential-types",
"aws-runtime",
@@ -715,9 +716,9 @@ dependencies = [
[[package]]
name = "aws-sigv4"
-version = "1.3.8"
+version = "1.4.0"
source = "registry+https://github.com/rust-lang/crates.io-index"
-checksum = "efa49f3c607b92daae0c078d48a4571f599f966dce3caee5f1ea55c4d9073f99"
+checksum = "68f6ae9b71597dc5fd115d52849d7a5556ad9265885ad3492ea8d73b93bbc46e"
dependencies = [
"aws-credential-types",
"aws-smithy-http",
@@ -737,9 +738,9 @@ dependencies = [
[[package]]
name = "aws-smithy-async"
-version = "1.2.11"
+version = "1.2.12"
source = "registry+https://github.com/rust-lang/crates.io-index"
-checksum = "52eec3db979d18cb807fc1070961cc51d87d069abe9ab57917769687368a8c6c"
+checksum = "3cba48474f1d6807384d06fec085b909f5807e16653c5af5c45dfe89539f0b70"
dependencies = [
"futures-util",
"pin-project-lite",
@@ -748,9 +749,9 @@ dependencies = [
[[package]]
name = "aws-smithy-http"
-version = "0.63.3"
+version = "0.63.4"
source = "registry+https://github.com/rust-lang/crates.io-index"
-checksum = "630e67f2a31094ffa51b210ae030855cb8f3b7ee1329bdd8d085aaf61e8b97fc"
+checksum = "af4a8a5fe3e4ac7ee871237c340bbce13e982d37543b65700f4419e039f5d78e"
dependencies = [
"aws-smithy-runtime-api",
"aws-smithy-types",
@@ -769,9 +770,9 @@ dependencies = [
[[package]]
name = "aws-smithy-http-client"
-version = "1.1.9"
+version = "1.1.10"
source = "registry+https://github.com/rust-lang/crates.io-index"
-checksum = "12fb0abf49ff0cab20fd31ac1215ed7ce0ea92286ba09e2854b42ba5cabe7525"
+checksum = "0709f0083aa19b704132684bc26d3c868e06bd428ccc4373b0b55c3e8748a58b"
dependencies = [
"aws-smithy-async",
"aws-smithy-runtime-api",
@@ -793,27 +794,27 @@ dependencies = [
[[package]]
name = "aws-smithy-json"
-version = "0.62.3"
+version = "0.62.4"
source = "registry+https://github.com/rust-lang/crates.io-index"
-checksum = "3cb96aa208d62ee94104645f7b2ecaf77bf27edf161590b6224bfbac2832f979"
+checksum = "27b3a779093e18cad88bbae08dc4261e1d95018c4c5b9356a52bcae7c0b6e9bb"
dependencies = [
"aws-smithy-types",
]
[[package]]
name = "aws-smithy-observability"
-version = "0.2.4"
+version = "0.2.5"
source = "registry+https://github.com/rust-lang/crates.io-index"
-checksum = "c0a46543fbc94621080b3cf553eb4cbbdc41dd9780a30c4756400f0139440a1d"
+checksum = "4d3f39d5bb871aaf461d59144557f16d5927a5248a983a40654d9cf3b9ba183b"
dependencies = [
"aws-smithy-runtime-api",
]
[[package]]
name = "aws-smithy-query"
-version = "0.60.13"
+version = "0.60.14"
source = "registry+https://github.com/rust-lang/crates.io-index"
-checksum = "0cebbddb6f3a5bd81553643e9c7daf3cc3dc5b0b5f398ac668630e8a84e6fff0"
+checksum = "05f76a580e3d8f8961e5d48763214025a2af65c2fa4cd1fb7f270a0e107a71b0"
dependencies = [
"aws-smithy-types",
"urlencoding",
@@ -821,9 +822,9 @@ dependencies = [
[[package]]
name = "aws-smithy-runtime"
-version = "1.10.0"
+version = "1.10.1"
source = "registry+https://github.com/rust-lang/crates.io-index"
-checksum = "f3df87c14f0127a0d77eb261c3bc45d5b4833e2a1f63583ebfb728e4852134ee"
+checksum = "8fd3dfc18c1ce097cf81fced7192731e63809829c6cbf933c1ec47452d08e1aa"
dependencies = [
"aws-smithy-async",
"aws-smithy-http",
@@ -846,9 +847,9 @@ dependencies = [
[[package]]
name = "aws-smithy-runtime-api"
-version = "1.11.3"
+version = "1.11.4"
source = "registry+https://github.com/rust-lang/crates.io-index"
-checksum = "49952c52f7eebb72ce2a754d3866cc0f87b97d2a46146b79f80f3a93fb2b3716"
+checksum = "8c55e0837e9b8526f49e0b9bfa9ee18ddee70e853f5bc09c5d11ebceddcb0fec"
dependencies = [
"aws-smithy-async",
"aws-smithy-types",
@@ -863,9 +864,9 @@ dependencies = [
[[package]]
name = "aws-smithy-types"
-version = "1.4.3"
+version = "1.4.4"
source = "registry+https://github.com/rust-lang/crates.io-index"
-checksum = "3b3a26048eeab0ddeba4b4f9d51654c79af8c3b32357dc5f336cee85ab331c33"
+checksum = "576b0d6991c9c32bc14fc340582ef148311f924d41815f641a308b5d11e8e7cd"
dependencies = [
"base64-simd",
"bytes",
@@ -886,18 +887,18 @@ dependencies = [
[[package]]
name = "aws-smithy-xml"
-version = "0.60.13"
+version = "0.60.14"
source = "registry+https://github.com/rust-lang/crates.io-index"
-checksum = "11b2f670422ff42bf7065031e72b45bc52a3508bd089f743ea90731ca2b6ea57"
+checksum = "b53543b4b86ed43f051644f704a98c7291b3618b67adf057ee77a366fa52fcaa"
dependencies = [
"xmlparser",
]
[[package]]
name = "aws-types"
-version = "1.3.11"
+version = "1.3.12"
source = "registry+https://github.com/rust-lang/crates.io-index"
-checksum = "1d980627d2dd7bfc32a3c025685a033eeab8d365cc840c631ef59d1b8f428164"
+checksum = "6c50f3cdf47caa8d01f2be4a6663ea02418e892f9bbfd82c7b9a3a37eaccdd3a"
dependencies = [
"aws-credential-types",
"aws-smithy-async",
@@ -1008,7 +1009,7 @@ dependencies = [
"regex",
"rustc-hash 2.1.1",
"shlex",
- "syn 2.0.114",
+ "syn 2.0.115",
]
[[package]]
@@ -1102,7 +1103,7 @@ dependencies = [
"proc-macro2",
"quote",
"rustversion",
- "syn 2.0.114",
+ "syn 2.0.115",
]
[[package]]
@@ -1125,7 +1126,7 @@ dependencies = [
"proc-macro-crate",
"proc-macro2",
"quote",
- "syn 2.0.114",
+ "syn 2.0.115",
]
[[package]]
@@ -1334,18 +1335,18 @@ dependencies = [
[[package]]
name = "clap"
-version = "4.5.57"
+version = "4.5.58"
source = "registry+https://github.com/rust-lang/crates.io-index"
-checksum = "6899ea499e3fb9305a65d5ebf6e3d2248c5fab291f300ad0a704fbe142eae31a"
+checksum = "63be97961acde393029492ce0be7a1af7e323e6bae9511ebfac33751be5e6806"
dependencies = [
"clap_builder",
]
[[package]]
name = "clap_builder"
-version = "4.5.57"
+version = "4.5.58"
source = "registry+https://github.com/rust-lang/crates.io-index"
-checksum = "7b12c8b680195a62a8364d16b8447b01b6c2c8f9aaf68bee653be34d4245e238"
+checksum = "7f13174bda5dfd69d7e947827e5af4b0f2f94a4a3ee92912fba07a66150f21e2"
dependencies = [
"anstyle",
"clap_lex",
@@ -1353,9 +1354,9 @@ dependencies = [
[[package]]
name = "clap_lex"
-version = "0.7.7"
+version = "1.0.0"
source = "registry+https://github.com/rust-lang/crates.io-index"
-checksum = "c3e64b0cc0439b12df2fa678eae89a1c56a529fd067a9115f7827f1fffd22b32"
+checksum = "3a822ea5bc7590f9d40f1ba12c0dc3c2760f3482c6984db1573ad11031420831"
[[package]]
name = "cmake"
@@ -1656,7 +1657,7 @@ dependencies = [
"proc-macro2",
"quote",
"strsim",
- "syn 2.0.114",
+ "syn 2.0.115",
]
[[package]]
@@ -1670,7 +1671,7 @@ dependencies = [
"proc-macro2",
"quote",
"strsim",
- "syn 2.0.114",
+ "syn 2.0.115",
]
[[package]]
@@ -1683,7 +1684,7 @@ dependencies = [
"proc-macro2",
"quote",
"strsim",
- "syn 2.0.114",
+ "syn 2.0.115",
]
[[package]]
@@ -1694,7 +1695,7 @@ checksum =
"fc34b93ccb385b40dc71c6fceac4b2ad23662c7eeb248cf10d529b7e055b6ead"
dependencies = [
"darling_core 0.20.11",
"quote",
- "syn 2.0.114",
+ "syn 2.0.115",
]
[[package]]
@@ -1705,7 +1706,7 @@ checksum =
"d38308df82d1080de0afee5d069fa14b0326a88c14f15c5ccda35b4a6c414c81"
dependencies = [
"darling_core 0.21.3",
"quote",
- "syn 2.0.114",
+ "syn 2.0.115",
]
[[package]]
@@ -1716,7 +1717,7 @@ checksum =
"ac3984ec7bd6cfa798e62b4a642426a5be0e68f9401cfc2a01e3fa9ea2fcdb8d"
dependencies = [
"darling_core 0.23.0",
"quote",
- "syn 2.0.114",
+ "syn 2.0.115",
]
[[package]]
@@ -2321,7 +2322,7 @@ checksum =
"c4fe888aeb6a095c4bcbe8ac1874c4b9a4c7ffa2ba849db7922683ba20875aaf"
dependencies = [
"datafusion-doc",
"quote",
- "syn 2.0.114",
+ "syn 2.0.115",
]
[[package]]
@@ -2529,9 +2530,9 @@ dependencies = [
[[package]]
name = "deranged"
-version = "0.5.5"
+version = "0.5.6"
source = "registry+https://github.com/rust-lang/crates.io-index"
-checksum = "ececcb659e7ba858fb4f10388c250a7252eb0a27373f1a72b8748afdd248e587"
+checksum = "cc3dc5ad92c2e2d1c193bbbbdf2ea477cb81331de4f3103f267ca18368b988c4"
dependencies = [
"powerfmt",
"serde_core",
@@ -2555,7 +2556,7 @@ dependencies = [
"darling 0.20.11",
"proc-macro2",
"quote",
- "syn 2.0.114",
+ "syn 2.0.115",
]
[[package]]
@@ -2565,7 +2566,7 @@ source =
"registry+https://github.com/rust-lang/crates.io-index"
checksum = "ab63b0e2bf4d5928aff72e83a7dace85d7bba5fe12dcc3c5a572d78caffd3f3c"
dependencies = [
"derive_builder_core",
- "syn 2.0.114",
+ "syn 2.0.115",
]
[[package]]
@@ -2586,7 +2587,7 @@ dependencies = [
"proc-macro2",
"quote",
"rustc_version",
- "syn 2.0.114",
+ "syn 2.0.115",
"unicode-xid",
]
@@ -2616,7 +2617,7 @@ checksum =
"97369cbbc041bc366949bc74d34658d6cda5621039731c6310521892a3a20ae0"
dependencies = [
"proc-macro2",
"quote",
- "syn 2.0.114",
+ "syn 2.0.115",
]
[[package]]
@@ -2669,7 +2670,7 @@ checksum =
"44f23cf4b44bfce11a86ace86f8a73ffdec849c9fd00a386a53d278bd9e81fb3"
dependencies = [
"proc-macro2",
"quote",
- "syn 2.0.114",
+ "syn 2.0.115",
]
[[package]]
@@ -2898,7 +2899,7 @@ checksum =
"162ee34ebcb7c64a8abebc059ce0fee27c2262618d7b60ed8faf72fef13c3650"
dependencies = [
"proc-macro2",
"quote",
- "syn 2.0.114",
+ "syn 2.0.115",
]
[[package]]
@@ -3564,9 +3565,9 @@ dependencies = [
[[package]]
name = "jiff"
-version = "0.2.19"
+version = "0.2.20"
source = "registry+https://github.com/rust-lang/crates.io-index"
-checksum = "d89a5b5e10d5a9ad6e5d1f4bd58225f655d6fe9767575a5e8ac5a6fe64e04495"
+checksum = "c867c356cc096b33f4981825ab281ecba3db0acefe60329f044c1789d94c6543"
dependencies = [
"jiff-static",
"jiff-tzdb-platform",
@@ -3579,13 +3580,13 @@ dependencies = [
[[package]]
name = "jiff-static"
-version = "0.2.19"
+version = "0.2.20"
source = "registry+https://github.com/rust-lang/crates.io-index"
-checksum = "ff7a39c8862fc1369215ccf0a8f12dd4598c7f6484704359f0351bd617034dbf"
+checksum = "f7946b4325269738f270bb55b3c19ab5c5040525f83fd625259422a9d25d9be5"
dependencies = [
"proc-macro2",
"quote",
- "syn 2.0.114",
+ "syn 2.0.115",
]
[[package]]
@@ -3739,9 +3740,9 @@ checksum =
"2c4a545a15244c7d945065b5d392b2d2d7f21526fba56ce51467b06ed445e8f7"
[[package]]
name = "libc"
-version = "0.2.180"
+version = "0.2.181"
source = "registry+https://github.com/rust-lang/crates.io-index"
-checksum = "bcc35a38544a891a5f7c865aca548a982ccb3b8650a5b06d0fd33a10283c56fc"
+checksum = "459427e2af2b9c839b132acb702a1c654d95e10f8c326bfc2ad11310e458b1c5"
[[package]]
name = "libloading"
@@ -3898,9 +3899,9 @@ dependencies = [
[[package]]
name = "memchr"
-version = "2.7.6"
+version = "2.8.0"
source = "registry+https://github.com/rust-lang/crates.io-index"
-checksum = "f52b00d39961fc5b2736ea853c9cc86238e165017a493d1d5c8eac6bdc4cc273"
+checksum = "f8ca58f447f06ed17d5fc4043ce1b10dd205e060fb3ce5b979b8ed8e59ff3f79"
[[package]]
name = "memmap2"
@@ -4273,9 +4274,9 @@ dependencies = [
[[package]]
name = "parquet"
-version = "57.2.0"
+version = "57.3.0"
source = "registry+https://github.com/rust-lang/crates.io-index"
-checksum = "5f6a2926a30477c0b95fea6c28c3072712b139337a242c2cc64817bdc20a8854"
+checksum = "6ee96b29972a257b855ff2341b37e61af5f12d6af1158b6dcdb5b31ea07bb3cb"
dependencies = [
"ahash 0.8.12",
"arrow-array",
@@ -4314,9 +4315,9 @@ dependencies = [
[[package]]
name = "parquet-variant"
-version = "57.2.0"
+version = "57.3.0"
source = "registry+https://github.com/rust-lang/crates.io-index"
-checksum = "c254fac16af78ad96aa442290cb6504951c4d484fdfcfe58f4588033d30e4c8f"
+checksum = "a6c31f8f9bfefb9dbf67b0807e00fd918676954a7477c889be971ac904103184"
dependencies = [
"arrow-schema",
"chrono",
@@ -4328,9 +4329,9 @@ dependencies = [
[[package]]
name = "parquet-variant-compute"
-version = "57.2.0"
+version = "57.3.0"
source = "registry+https://github.com/rust-lang/crates.io-index"
-checksum = "2178772f1c5ad7e5da8b569d986d3f5cbb4a4cee915925f28fdc700dbb2e80cf"
+checksum = "196cd9f7178fed3ac8d5e6d2b51193818e896bbc3640aea3fde3440114a8f39c"
dependencies = [
"arrow",
"arrow-schema",
@@ -4344,9 +4345,9 @@ dependencies = [
[[package]]
name = "parquet-variant-json"
-version = "57.2.0"
+version = "57.3.0"
source = "registry+https://github.com/rust-lang/crates.io-index"
-checksum = "7a1510daa121c04848368f9c38d0be425b9418c70be610ecc0aa8071738c0ef3"
+checksum = "ed23d7acc90ef60f7fdbcc473fa2fdaefa33542ed15b84388959346d52c839be"
dependencies = [
"arrow-schema",
"base64",
@@ -4421,7 +4422,7 @@ checksum =
"6e918e4ff8c4549eb882f14b3a4bc8c8bc93de829416eacf579f1207a8fbf861"
dependencies = [
"proc-macro2",
"quote",
- "syn 2.0.114",
+ "syn 2.0.115",
]
[[package]]
@@ -4563,7 +4564,7 @@ source =
"registry+https://github.com/rust-lang/crates.io-index"
checksum = "479ca8adacdd7ce8f1fb39ce9ecccbfe93a3f1344b3d0d97f20bc0196208f62b"
dependencies = [
"proc-macro2",
- "syn 2.0.114",
+ "syn 2.0.115",
]
[[package]]
@@ -4633,7 +4634,7 @@ dependencies = [
"prost",
"prost-types",
"regex",
- "syn 2.0.114",
+ "syn 2.0.115",
"tempfile",
]
@@ -4647,7 +4648,7 @@ dependencies = [
"itertools 0.14.0",
"proc-macro2",
"quote",
- "syn 2.0.114",
+ "syn 2.0.115",
]
[[package]]
@@ -4912,7 +4913,7 @@ checksum =
"b7186006dcb21920990093f30e3dea63b7d6e977bf1256be20c3563a5db070da"
dependencies = [
"proc-macro2",
"quote",
- "syn 2.0.114",
+ "syn 2.0.115",
]
[[package]]
@@ -5238,9 +5239,9 @@ checksum =
"b39cdef0fa800fc44525c84ccb54a029961a8215f9619753635a9c0d2538d46d"
[[package]]
name = "ryu"
-version = "1.0.22"
+version = "1.0.23"
source = "registry+https://github.com/rust-lang/crates.io-index"
-checksum = "a50f4cf475b65d88e057964e0e9bb1f0aa9bbb2036dc65c64596b42932536984"
+checksum = "9774ba4a74de5f7b1c1451ed6cd5285a32eddb5cccb8cc655a4e50009e06477f"
[[package]]
name = "same-file"
@@ -5378,7 +5379,7 @@ checksum =
"d540f220d3187173da220f885ab66608367b6574e925011a9353e4badda91d79"
dependencies = [
"proc-macro2",
"quote",
- "syn 2.0.114",
+ "syn 2.0.115",
]
[[package]]
@@ -5402,7 +5403,7 @@ checksum =
"175ee3e80ae9982737ca543e96133087cbd9a485eecc3bc4de9c1a37b47ea59c"
dependencies = [
"proc-macro2",
"quote",
- "syn 2.0.114",
+ "syn 2.0.115",
]
[[package]]
@@ -5445,7 +5446,7 @@ dependencies = [
"darling 0.21.3",
"proc-macro2",
"quote",
- "syn 2.0.114",
+ "syn 2.0.115",
]
[[package]]
@@ -5572,7 +5573,7 @@ checksum =
"da5fc6819faabb412da764b99d3b713bb55083c11e7e0c00144d386cd6a1939c"
dependencies = [
"proc-macro2",
"quote",
- "syn 2.0.114",
+ "syn 2.0.115",
]
[[package]]
@@ -5611,7 +5612,7 @@ dependencies = [
"heck",
"proc-macro2",
"quote",
- "syn 2.0.114",
+ "syn 2.0.115",
]
[[package]]
@@ -5656,9 +5657,9 @@ dependencies = [
[[package]]
name = "syn"
-version = "2.0.114"
+version = "2.0.115"
source = "registry+https://github.com/rust-lang/crates.io-index"
-checksum = "d4d107df263a3013ef9b1879b0df87d706ff80f65a86ea879bd9c31f9b307c2a"
+checksum = "6e614ed320ac28113fa64972c4262d5dbc89deacdfd00c34a3e4cea073243c12"
dependencies = [
"proc-macro2",
"quote",
@@ -5682,7 +5683,7 @@ checksum =
"728a70f3dbaf5bab7f0c4b1ac8d7ae5ea60a4b5549c8a5914361c99147a709d2"
dependencies = [
"proc-macro2",
"quote",
- "syn 2.0.114",
+ "syn 2.0.115",
]
[[package]]
@@ -5699,12 +5700,12 @@ checksum =
"55937e1799185b12863d447f42597ed69d9928686b8d88a1df17376a097d8369"
[[package]]
name = "tempfile"
-version = "3.24.0"
+version = "3.25.0"
source = "registry+https://github.com/rust-lang/crates.io-index"
-checksum = "655da9c7eb6305c55742045d5a8d2037996d61d8de95806335c7c86ce0f82e9c"
+checksum = "0136791f7c95b1f6dd99f9cc786b91bb81c3800b639b3478e561ddb7be95e5f1"
dependencies = [
"fastrand",
- "getrandom 0.3.4",
+ "getrandom 0.4.1",
"once_cell",
"rustix 1.1.3",
"windows-sys 0.61.2",
@@ -5736,7 +5737,7 @@ checksum =
"4fee6c4efc90059e10f81e6d42c60a18f76588c3d74cb83a0b242a2b6c7504c1"
dependencies = [
"proc-macro2",
"quote",
- "syn 2.0.114",
+ "syn 2.0.115",
]
[[package]]
@@ -5747,7 +5748,7 @@ checksum =
"ebc4ee7f67670e9b64d05fa4253e753e016c6c95ff35b89b7941d6b856dec1d5"
dependencies = [
"proc-macro2",
"quote",
- "syn 2.0.114",
+ "syn 2.0.115",
]
[[package]]
@@ -5902,7 +5903,7 @@ checksum =
"af407857209536a95c8e56f8231ef2c2e2aff839b22e07a1ffcbc617e9db9fa5"
dependencies = [
"proc-macro2",
"quote",
- "syn 2.0.114",
+ "syn 2.0.115",
]
[[package]]
@@ -5951,9 +5952,9 @@ dependencies = [
[[package]]
name = "toml_parser"
-version = "1.0.6+spec-1.1.0"
+version = "1.0.8+spec-1.1.0"
source = "registry+https://github.com/rust-lang/crates.io-index"
-checksum = "a3198b4b0a8e11f09dd03e133c0280504d0801269e9afa46362ffde1cbeebf44"
+checksum = "0742ff5ff03ea7e67c8ae6c93cac239e0d9784833362da3f9a9c1da8dfefcbdc"
dependencies = [
"winnow",
]
@@ -6022,7 +6023,7 @@ checksum =
"7490cfa5ec963746568740651ac6781f701c9c5ea257c58e057f3ba8cf69e8da"
dependencies = [
"proc-macro2",
"quote",
- "syn 2.0.114",
+ "syn 2.0.115",
]
[[package]]
@@ -6066,7 +6067,7 @@ checksum =
"3c36781cc0e46a83726d9879608e4cf6c2505237e263a8eb8c24502989cfdb28"
dependencies = [
"proc-macro2",
"quote",
- "syn 2.0.114",
+ "syn 2.0.115",
]
[[package]]
@@ -6086,9 +6087,9 @@ checksum =
"562d481066bde0658276a35467c4af00bdc6ee726305698a55b86e61d7ad82bb"
[[package]]
name = "unicode-ident"
-version = "1.0.22"
+version = "1.0.23"
source = "registry+https://github.com/rust-lang/crates.io-index"
-checksum = "9312f7c4f6ff9069b165498234ce8be658059c6728633667c526e27dc2cf1df5"
+checksum = "537dd038a89878be9b64dd4bd1b260315c1bb94f4d784956b81e27a088d9a09e"
[[package]]
name = "unicode-segmentation"
@@ -6272,7 +6273,7 @@ dependencies = [
"bumpalo",
"proc-macro2",
"quote",
- "syn 2.0.114",
+ "syn 2.0.115",
"wasm-bindgen-shared",
]
@@ -6425,7 +6426,7 @@ checksum =
"053e2e040ab57b9dc951b72c264860db7eb3b0200ba345b4e4c3b14f67855ddf"
dependencies = [
"proc-macro2",
"quote",
- "syn 2.0.114",
+ "syn 2.0.115",
]
[[package]]
@@ -6436,7 +6437,7 @@ checksum =
"3f316c4a2570ba26bbec722032c4099d8c8bc095efccdc15688708623367e358"
dependencies = [
"proc-macro2",
"quote",
- "syn 2.0.114",
+ "syn 2.0.115",
]
[[package]]
@@ -6733,7 +6734,7 @@ dependencies = [
"heck",
"indexmap 2.13.0",
"prettyplease",
- "syn 2.0.114",
+ "syn 2.0.115",
"wasm-metadata",
"wit-bindgen-core",
"wit-component",
@@ -6749,7 +6750,7 @@ dependencies = [
"prettyplease",
"proc-macro2",
"quote",
- "syn 2.0.114",
+ "syn 2.0.115",
"wit-bindgen-core",
"wit-bindgen-rust",
]
@@ -6831,7 +6832,7 @@ checksum =
"b659052874eb698efe5b9e8cf382204678a0086ebf46982b79d6ca3182927e5d"
dependencies = [
"proc-macro2",
"quote",
- "syn 2.0.114",
+ "syn 2.0.115",
"synstructure",
]
@@ -6852,7 +6853,7 @@ checksum =
"4122cd3169e94605190e77839c9a40d40ed048d305bfdc146e7df40ab0f3e517"
dependencies = [
"proc-macro2",
"quote",
- "syn 2.0.114",
+ "syn 2.0.115",
]
[[package]]
@@ -6872,7 +6873,7 @@ checksum =
"d71e5d6e06ab090c67b5e44993ec16b72dcbaabc526db883a360057678b48502"
dependencies = [
"proc-macro2",
"quote",
- "syn 2.0.114",
+ "syn 2.0.115",
"synstructure",
]
@@ -6912,7 +6913,7 @@ checksum =
"eadce39539ca5cb3985590102671f2567e659fca9666581ad3411d59207951f3"
dependencies = [
"proc-macro2",
"quote",
- "syn 2.0.114",
+ "syn 2.0.115",
]
[[package]]
@@ -6923,9 +6924,9 @@ checksum =
"a7948af682ccbc3342b6e9420e8c51c1fe5d7bf7756002b4a3c6cabfe96a7e3c"
[[package]]
name = "zmij"
-version = "1.0.19"
+version = "1.0.21"
source = "registry+https://github.com/rust-lang/crates.io-index"
-checksum = "3ff05f8caa9038894637571ae6b9e29466c1f4f829d26c9b28f869a29cbe3445"
+checksum = "b8848ee67ecc8aedbaf3e4122217aff892639231befc6a1b58d29fff4c2cabaa"
[[package]]
name = "zstd"
diff --git a/native/core/src/execution/planner.rs
b/native/core/src/execution/planner.rs
index b4a538aae..aeb3db716 100644
--- a/native/core/src/execution/planner.rs
+++ b/native/core/src/execution/planner.rs
@@ -1030,7 +1030,7 @@ impl PhysicalPlanner {
.map(|expr| self.create_expr(expr,
Arc::clone(&required_schema)))
.collect();
- let default_values: Option<HashMap<usize, ScalarValue>> = if
!scan
+ let default_values: Option<HashMap<Column, ScalarValue>> = if
!scan
.default_values
.is_empty()
{
@@ -1060,6 +1060,11 @@ impl PhysicalPlanner {
default_values_indexes
.into_iter()
.zip(default_values)
+ .map(|(idx, scalar_value)| {
+ let field = required_schema.field(idx);
+ let column =
Column::new(field.name().as_str(), idx);
+ (column, scalar_value)
+ })
.collect(),
)
} else {
diff --git a/native/core/src/parquet/parquet_exec.rs
b/native/core/src/parquet/parquet_exec.rs
index f4cc7bf9f..2d970734b 100644
--- a/native/core/src/parquet/parquet_exec.rs
+++ b/native/core/src/parquet/parquet_exec.rs
@@ -28,7 +28,7 @@ use datafusion::datasource::physical_plan::{
use datafusion::datasource::source::DataSourceExec;
use datafusion::execution::object_store::ObjectStoreUrl;
use datafusion::execution::SendableRecordBatchStream;
-use datafusion::physical_expr::expressions::BinaryExpr;
+use datafusion::physical_expr::expressions::{BinaryExpr, Column};
use datafusion::physical_expr::PhysicalExpr;
use datafusion::physical_expr_adapter::PhysicalExprAdapterFactory;
use datafusion::physical_plan::stream::RecordBatchStreamAdapter;
@@ -67,7 +67,7 @@ pub(crate) fn init_datasource_exec(
file_groups: Vec<Vec<PartitionedFile>>,
projection_vector: Option<Vec<usize>>,
data_filters: Option<Vec<Arc<dyn PhysicalExpr>>>,
- default_values: Option<HashMap<usize, ScalarValue>>,
+ default_values: Option<HashMap<Column, ScalarValue>>,
session_timezone: &str,
case_sensitive: bool,
session_ctx: &Arc<SessionContext>,
diff --git a/native/core/src/parquet/schema_adapter.rs
b/native/core/src/parquet/schema_adapter.rs
index 2874b6cbf..3402377b5 100644
--- a/native/core/src/parquet/schema_adapter.rs
+++ b/native/core/src/parquet/schema_adapter.rs
@@ -41,9 +41,6 @@ use datafusion_physical_expr_adapter::{
};
use std::collections::HashMap;
use std::sync::Arc;
-// ============================================================================
-// New PhysicalExprAdapter Implementation (Recommended)
-// ============================================================================
/// Factory for creating Spark-compatible physical expression adapters.
///
@@ -54,15 +51,15 @@ pub struct SparkPhysicalExprAdapterFactory {
/// Spark-specific parquet options for type conversions
parquet_options: SparkParquetOptions,
/// Default values for columns that may be missing from the physical
schema.
- /// The key is the column index in the logical schema.
- default_values: Option<HashMap<usize, ScalarValue>>,
+ /// The key is the Column (containing name and index).
+ default_values: Option<HashMap<Column, ScalarValue>>,
}
impl SparkPhysicalExprAdapterFactory {
/// Create a new factory with the given options.
pub fn new(
parquet_options: SparkParquetOptions,
- default_values: Option<HashMap<usize, ScalarValue>>,
+ default_values: Option<HashMap<Column, ScalarValue>>,
) -> Self {
Self {
parquet_options,
@@ -186,8 +183,8 @@ struct SparkPhysicalExprAdapter {
physical_file_schema: SchemaRef,
/// Spark-specific options for type conversions
parquet_options: SparkParquetOptions,
- /// Default values for missing columns (keyed by logical schema index)
- default_values: Option<HashMap<usize, ScalarValue>>,
+ /// Default values for missing columns (keyed by Column)
+ default_values: Option<HashMap<Column, ScalarValue>>,
/// The default DataFusion adapter to delegate standard handling to
default_adapter: Arc<dyn PhysicalExprAdapter>,
/// Mapping from logical column names to original physical column names,
@@ -207,8 +204,10 @@ impl PhysicalExprAdapter for SparkPhysicalExprAdapter {
//
// The default adapter may fail for complex nested type casts (List,
Map).
// In that case, fall back to wrapping everything ourselves.
+ let expr = self.replace_missing_with_defaults(expr)?;
let expr = match self.default_adapter.rewrite(Arc::clone(&expr)) {
Ok(rewritten) => {
+ // Replace references to missing columns with default values
// Replace DataFusion's CastColumnExpr with either:
// - CometCastColumnExpr (for Struct/List/Map, uses
spark_parquet_convert)
// - Spark Cast (for simple scalar types)
@@ -216,9 +215,10 @@ impl PhysicalExprAdapter for SparkPhysicalExprAdapter {
.transform(|e| self.replace_with_spark_cast(e))
.data()?
}
- Err(_) => {
+ Err(e) => {
// Default adapter failed (likely complex nested type cast).
// Handle all type mismatches ourselves using
spark_parquet_convert.
+ log::info!("Default schema adapter error: {}", e);
self.wrap_all_type_mismatches(expr)?
}
};
@@ -249,7 +249,6 @@ impl PhysicalExprAdapter for SparkPhysicalExprAdapter {
}
}
-#[allow(dead_code)]
impl SparkPhysicalExprAdapter {
/// Wrap ALL Column expressions that have type mismatches with
CometCastColumnExpr.
/// This is the fallback path when the default adapter fails (e.g., for
complex
@@ -369,53 +368,6 @@ impl SparkPhysicalExprAdapter {
Ok(Transformed::no(expr))
}
- /// Cast Column expressions where the physical and logical datatypes
differ.
- ///
- /// This function traverses the expression tree and for each Column
expression,
- /// checks if the physical file schema datatype differs from the logical
file schema
- /// datatype. If they differ, it wraps the Column with a CastColumnExpr to
perform
- /// the necessary type conversion.
- fn cast_datafusion_unsupported_expr(
- &self,
- expr: Arc<dyn PhysicalExpr>,
- ) -> DataFusionResult<Arc<dyn PhysicalExpr>> {
- expr.transform(|e| {
- // Check if this is a Column expression
- if let Some(column) = e.as_any().downcast_ref::<Column>() {
- let col_idx = column.index();
-
- // dbg!(&self.logical_file_schema, &self.physical_file_schema);
-
- // Get the logical datatype (expected by the query)
- let logical_field =
self.logical_file_schema.fields().get(col_idx);
- // Get the physical datatype (actual file schema)
- let physical_field =
self.physical_file_schema.fields().get(col_idx);
-
- // dbg!(&logical_field, &physical_field);
-
- if let (Some(logical_field), Some(physical_field)) =
(logical_field, physical_field)
- {
- let logical_type = logical_field.data_type();
- let physical_type = physical_field.data_type();
-
- // If datatypes differ, insert a CastColumnExpr
- if logical_type != physical_type {
- let cast_expr: Arc<dyn PhysicalExpr> =
Arc::new(CometCastColumnExpr::new(
- Arc::clone(&e),
- Arc::clone(physical_field),
- Arc::clone(logical_field),
- None,
- ));
- // dbg!(&cast_expr);
- return Ok(Transformed::yes(cast_expr));
- }
- }
- }
- Ok(Transformed::no(e))
- })
- .data()
- }
-
/// Replace references to missing columns with default values.
fn replace_missing_with_defaults(
&self,
@@ -429,17 +381,55 @@ impl SparkPhysicalExprAdapter {
return Ok(expr);
}
- // Convert index-based defaults to name-based for
replace_columns_with_literals
- let name_based: HashMap<&str, &ScalarValue> = defaults
+ // dbg!(&self.logical_file_schema, &self.physical_file_schema);
+
+ // Convert Column-based defaults to name-based for
replace_columns_with_literals.
+ // Only include columns that are MISSING from the physical file schema.
+ // If the default value's type doesn't match the logical schema, cast
it using Spark cast.
+ let owned_values: Vec<(String, ScalarValue)> = defaults
.iter()
- .filter_map(|(idx, val)| {
- self.logical_file_schema
- .fields()
- .get(*idx)
- .map(|f| (f.name().as_str(), val))
+ .filter(|(col, _)| {
+ // Only include defaults for columns missing from the physical
file schema
+ let col_name = col.name();
+ if self.parquet_options.case_sensitive {
+
self.physical_file_schema.field_with_name(col_name).is_err()
+ } else {
+ !self
+ .physical_file_schema
+ .fields()
+ .iter()
+ .any(|f| f.name().eq_ignore_ascii_case(col_name))
+ }
+ })
+ .map(|(col, val)| {
+ let col_name = col.name();
+ let value = self
+ .logical_file_schema
+ .field_with_name(col_name)
+ .ok()
+ .filter(|field| val.data_type() != *field.data_type())
+ .and_then(|field| {
+ spark_parquet_convert(
+ ColumnarValue::Scalar(val.clone()),
+ field.data_type(),
+ &self.parquet_options,
+ )
+ .ok()
+ .and_then(|cv| match cv {
+ ColumnarValue::Scalar(s) => Some(s),
+ _ => None,
+ })
+ })
+ .unwrap_or_else(|| val.clone());
+ (col_name.to_string(), value)
})
.collect();
+ let name_based: HashMap<&str, &ScalarValue> =
+ owned_values.iter().map(|(k, v)| (k.as_str(), v)).collect();
+
+ dbg!(&name_based, &expr);
+
if name_based.is_empty() {
return Ok(expr);
}
@@ -510,13 +500,13 @@ pub fn adapt_batch_with_expressions(
pub struct SparkSchemaAdapterFactory {
/// Spark cast options
parquet_options: SparkParquetOptions,
- default_values: Option<HashMap<usize, ScalarValue>>,
+ default_values: Option<HashMap<Column, ScalarValue>>,
}
impl SparkSchemaAdapterFactory {
pub fn new(
options: SparkParquetOptions,
- default_values: Option<HashMap<usize, ScalarValue>>,
+ default_values: Option<HashMap<Column, ScalarValue>>,
) -> Self {
Self {
parquet_options: options,
@@ -554,7 +544,7 @@ pub struct SparkSchemaAdapter {
required_schema: SchemaRef,
/// Spark cast options
parquet_options: SparkParquetOptions,
- default_values: Option<HashMap<usize, ScalarValue>>,
+ default_values: Option<HashMap<Column, ScalarValue>>,
}
impl SchemaAdapter for SparkSchemaAdapter {
@@ -659,7 +649,7 @@ pub struct SchemaMapping {
field_mappings: Vec<Option<usize>>,
/// Spark cast options
parquet_options: SparkParquetOptions,
- default_values: Option<HashMap<usize, ScalarValue>>,
+ default_values: Option<HashMap<Column, ScalarValue>>,
}
impl SchemaMapper for SchemaMapping {
@@ -688,7 +678,9 @@ impl SchemaMapper for SchemaMapping {
|| {
if let Some(default_values) = &self.default_values {
// We have a map of default values, see if this
field is in there.
- if let Some(value) = default_values.get(&field_idx)
+ // Create a Column from the field name and index
to look up the default value
+ let column = Column::new(field.name().as_str(),
field_idx);
+ if let Some(value) = default_values.get(&column)
// Default value exists, construct a column from
it.
{
let cv = if field.data_type() ==
&value.data_type() {
diff --git
a/spark/src/main/spark-4.0/org/apache/comet/shims/CometExprShim.scala
b/spark/src/main/spark-4.0/org/apache/comet/shims/CometExprShim.scala
index 12ea91d42..9fe53b9a8 100644
--- a/spark/src/main/spark-4.0/org/apache/comet/shims/CometExprShim.scala
+++ b/spark/src/main/spark-4.0/org/apache/comet/shims/CometExprShim.scala
@@ -29,7 +29,7 @@ import org.apache.comet.CometSparkSessionExtensions.withInfo
import org.apache.comet.expressions.{CometCast, CometEvalMode}
import org.apache.comet.serde.{CommonStringExprs, Compatible, ExprOuterClass,
Incompatible}
import org.apache.comet.serde.ExprOuterClass.{BinaryOutputStyle, Expr}
-import org.apache.comet.serde.QueryPlanSerde.{exprToProtoInternal,
optExprWithInfo, scalarFunctionExprToProto}
+import org.apache.comet.serde.QueryPlanSerde.exprToProtoInternal
/**
* `CometExprShim` acts as a shim for parsing expressions from different Spark
versions.
---------------------------------------------------------------------
To unsubscribe, e-mail: [email protected]
For additional commands, e-mail: [email protected]