This is an automated email from the ASF dual-hosted git repository.

github-bot pushed a commit to branch main
in repository https://gitbox.apache.org/repos/asf/datafusion.git


The following commit(s) were added to refs/heads/main by this push:
     new 617700d1b3 Upgrade DataFusion to arrow-rs/parquet 57.2.0 (#19355)
617700d1b3 is described below

commit 617700d1b36b41cf6fcdad8fccbb2d5841028420
Author: Andrew Lamb <[email protected]>
AuthorDate: Tue Jan 13 11:14:33 2026 -0500

    Upgrade DataFusion to arrow-rs/parquet 57.2.0 (#19355)
    
    ## Which issue does this PR close?
    
    
    - Related to https://github.com/apache/arrow-rs/issues/8465
    - Closes https://github.com/apache/datafusion/issues/19290
    
    ## Rationale for this change
    
    Upgrade to latest arrow version
    
    I made this PR early to test the arrow release with DataFusion
    
    ## What changes are included in this PR?
    1. Update arrow
    2. Updates for API
    ## Are these changes tested?
    
    Yes by CI
    
    ## Are there any user-facing changes?
    No
---
 Cargo.lock                                         | 109 ++++++++-------------
 Cargo.toml                                         |  14 +--
 datafusion/common/src/scalar/mod.rs                |   7 +-
 .../datasource-avro/src/avro_to_arrow/schema.rs    |   4 +-
 datafusion/functions/src/core/union_extract.rs     |   5 +-
 datafusion/physical-plan/src/filter.rs             |   5 +-
 datafusion/proto-common/src/from_proto/mod.rs      |  21 ++--
 .../proto/tests/cases/roundtrip_logical_plan.rs    |  12 ++-
 datafusion/sqllogictest/src/test_context.rs        |   5 +-
 datafusion/sqllogictest/test_files/case.slt        |   5 +-
 .../sqllogictest/test_files/spark/hash/crc32.slt   |   6 +-
 datafusion/sqllogictest/test_files/struct.slt      |  39 ++++++--
 12 files changed, 116 insertions(+), 116 deletions(-)

diff --git a/Cargo.lock b/Cargo.lock
index 9921dc63a5..9c26b085b7 100644
--- a/Cargo.lock
+++ b/Cargo.lock
@@ -232,9 +232,9 @@ checksum = 
"7c02d123df017efcdfbd739ef81735b36c5ba83ec3c59c80a9d7ecc718f92e50"
 
 [[package]]
 name = "arrow"
-version = "57.1.0"
+version = "57.2.0"
 source = "registry+https://github.com/rust-lang/crates.io-index";
-checksum = "cb372a7cbcac02a35d3fb7b3fc1f969ec078e871f9bb899bf00a2e1809bec8a3"
+checksum = "2a2b10dcb159faf30d3f81f6d56c1211a5bea2ca424eabe477648a44b993320e"
 dependencies = [
  "arrow-arith",
  "arrow-array",
@@ -255,9 +255,9 @@ dependencies = [
 
 [[package]]
 name = "arrow-arith"
-version = "57.1.0"
+version = "57.2.0"
 source = "registry+https://github.com/rust-lang/crates.io-index";
-checksum = "0f377dcd19e440174596d83deb49cd724886d91060c07fec4f67014ef9d54049"
+checksum = "288015089e7931843c80ed4032c5274f02b37bcb720c4a42096d50b390e70372"
 dependencies = [
  "arrow-array",
  "arrow-buffer",
@@ -269,9 +269,9 @@ dependencies = [
 
 [[package]]
 name = "arrow-array"
-version = "57.1.0"
+version = "57.2.0"
 source = "registry+https://github.com/rust-lang/crates.io-index";
-checksum = "a23eaff85a44e9fa914660fb0d0bb00b79c4a3d888b5334adb3ea4330c84f002"
+checksum = "65ca404ea6191e06bf30956394173337fa9c35f445bd447fe6c21ab944e1a23c"
 dependencies = [
  "ahash",
  "arrow-buffer",
@@ -288,9 +288,9 @@ dependencies = [
 
 [[package]]
 name = "arrow-buffer"
-version = "57.1.0"
+version = "57.2.0"
 source = "registry+https://github.com/rust-lang/crates.io-index";
-checksum = "a2819d893750cb3380ab31ebdc8c68874dd4429f90fd09180f3c93538bd21626"
+checksum = "36356383099be0151dacc4245309895f16ba7917d79bdb71a7148659c9206c56"
 dependencies = [
  "bytes",
  "half",
@@ -300,9 +300,9 @@ dependencies = [
 
 [[package]]
 name = "arrow-cast"
-version = "57.1.0"
+version = "57.2.0"
 source = "registry+https://github.com/rust-lang/crates.io-index";
-checksum = "e3d131abb183f80c450d4591dc784f8d7750c50c6e2bc3fcaad148afc8361271"
+checksum = "9c8e372ed52bd4ee88cc1e6c3859aa7ecea204158ac640b10e187936e7e87074"
 dependencies = [
  "arrow-array",
  "arrow-buffer",
@@ -322,9 +322,9 @@ dependencies = [
 
 [[package]]
 name = "arrow-csv"
-version = "57.1.0"
+version = "57.2.0"
 source = "registry+https://github.com/rust-lang/crates.io-index";
-checksum = "2275877a0e5e7e7c76954669366c2aa1a829e340ab1f612e647507860906fb6b"
+checksum = "8e4100b729fe656f2e4fb32bc5884f14acf9118d4ad532b7b33c1132e4dce896"
 dependencies = [
  "arrow-array",
  "arrow-cast",
@@ -337,9 +337,9 @@ dependencies = [
 
 [[package]]
 name = "arrow-data"
-version = "57.1.0"
+version = "57.2.0"
 source = "registry+https://github.com/rust-lang/crates.io-index";
-checksum = "05738f3d42cb922b9096f7786f606fcb8669260c2640df8490533bb2fa38c9d3"
+checksum = "bf87f4ff5fc13290aa47e499a8b669a82c5977c6a1fedce22c7f542c1fd5a597"
 dependencies = [
  "arrow-buffer",
  "arrow-schema",
@@ -350,9 +350,9 @@ dependencies = [
 
 [[package]]
 name = "arrow-flight"
-version = "57.1.0"
+version = "57.2.0"
 source = "registry+https://github.com/rust-lang/crates.io-index";
-checksum = "8b5f57c3d39d1b1b7c1376a772ea86a131e7da310aed54ebea9363124bb885e3"
+checksum = "f63654f21676be802d446c6c4bc54f6a47e18d55f9ae6f7195a6f6faf2ecdbeb"
 dependencies = [
  "arrow-arith",
  "arrow-array",
@@ -378,9 +378,9 @@ dependencies = [
 
 [[package]]
 name = "arrow-ipc"
-version = "57.1.0"
+version = "57.2.0"
 source = "registry+https://github.com/rust-lang/crates.io-index";
-checksum = "3d09446e8076c4b3f235603d9ea7c5494e73d441b01cd61fb33d7254c11964b3"
+checksum = "eb3ca63edd2073fcb42ba112f8ae165df1de935627ead6e203d07c99445f2081"
 dependencies = [
  "arrow-array",
  "arrow-buffer",
@@ -394,9 +394,9 @@ dependencies = [
 
 [[package]]
 name = "arrow-json"
-version = "57.1.0"
+version = "57.2.0"
 source = "registry+https://github.com/rust-lang/crates.io-index";
-checksum = "371ffd66fa77f71d7628c63f209c9ca5341081051aa32f9c8020feb0def787c0"
+checksum = "a36b2332559d3310ebe3e173f75b29989b4412df4029a26a30cc3f7da0869297"
 dependencies = [
  "arrow-array",
  "arrow-buffer",
@@ -418,9 +418,9 @@ dependencies = [
 
 [[package]]
 name = "arrow-ord"
-version = "57.1.0"
+version = "57.2.0"
 source = "registry+https://github.com/rust-lang/crates.io-index";
-checksum = "cbc94fc7adec5d1ba9e8cd1b1e8d6f72423b33fe978bf1f46d970fafab787521"
+checksum = "13c4e0530272ca755d6814218dffd04425c5b7854b87fa741d5ff848bf50aa39"
 dependencies = [
  "arrow-array",
  "arrow-buffer",
@@ -431,9 +431,9 @@ dependencies = [
 
 [[package]]
 name = "arrow-row"
-version = "57.1.0"
+version = "57.2.0"
 source = "registry+https://github.com/rust-lang/crates.io-index";
-checksum = "169676f317157dc079cc5def6354d16db63d8861d61046d2f3883268ced6f99f"
+checksum = "b07f52788744cc71c4628567ad834cadbaeb9f09026ff1d7a4120f69edf7abd3"
 dependencies = [
  "arrow-array",
  "arrow-buffer",
@@ -444,9 +444,9 @@ dependencies = [
 
 [[package]]
 name = "arrow-schema"
-version = "57.1.0"
+version = "57.2.0"
 source = "registry+https://github.com/rust-lang/crates.io-index";
-checksum = "d27609cd7dd45f006abae27995c2729ef6f4b9361cde1ddd019dc31a5aa017e0"
+checksum = "6bb63203e8e0e54b288d0d8043ca8fa1013820822a27692ef1b78a977d879f2c"
 dependencies = [
  "bitflags",
  "serde",
@@ -456,9 +456,9 @@ dependencies = [
 
 [[package]]
 name = "arrow-select"
-version = "57.1.0"
+version = "57.2.0"
 source = "registry+https://github.com/rust-lang/crates.io-index";
-checksum = "ae980d021879ea119dd6e2a13912d81e64abed372d53163e804dfe84639d8010"
+checksum = "c96d8a1c180b44ecf2e66c9a2f2bbcb8b1b6f14e165ce46ac8bde211a363411b"
 dependencies = [
  "ahash",
  "arrow-array",
@@ -470,9 +470,9 @@ dependencies = [
 
 [[package]]
 name = "arrow-string"
-version = "57.1.0"
+version = "57.2.0"
 source = "registry+https://github.com/rust-lang/crates.io-index";
-checksum = "cf35e8ef49dcf0c5f6d175edee6b8af7b45611805333129c541a8b89a0fc0534"
+checksum = "a8ad6a81add9d3ea30bf8374ee8329992c7fd246ffd8b7e2f48a3cea5aa0cc9a"
 dependencies = [
  "arrow-array",
  "arrow-buffer",
@@ -3506,7 +3506,7 @@ dependencies = [
  "js-sys",
  "log",
  "wasm-bindgen",
- "windows-core 0.62.2",
+ "windows-core",
 ]
 
 [[package]]
@@ -4317,9 +4317,9 @@ dependencies = [
 
 [[package]]
 name = "parquet"
-version = "57.1.0"
+version = "57.2.0"
 source = "registry+https://github.com/rust-lang/crates.io-index";
-checksum = "be3e4f6d320dd92bfa7d612e265d7d08bba0a240bab86af3425e1d255a511d89"
+checksum = "5f6a2926a30477c0b95fea6c28c3072712b139337a242c2cc64817bdc20a8854"
 dependencies = [
  "ahash",
  "arrow-array",
@@ -6780,7 +6780,7 @@ source = 
"registry+https://github.com/rust-lang/crates.io-index";
 checksum = "9babd3a767a4c1aef6900409f85f5d53ce2544ccdfaa86dad48c91782c6d6893"
 dependencies = [
  "windows-collections",
- "windows-core 0.61.2",
+ "windows-core",
  "windows-future",
  "windows-link 0.1.3",
  "windows-numerics",
@@ -6792,7 +6792,7 @@ version = "0.2.0"
 source = "registry+https://github.com/rust-lang/crates.io-index";
 checksum = "3beeceb5e5cfd9eb1d76b381630e82c4241ccd0d27f1a39ed41b2760b255c5e8"
 dependencies = [
- "windows-core 0.61.2",
+ "windows-core",
 ]
 
 [[package]]
@@ -6804,21 +6804,8 @@ dependencies = [
  "windows-implement",
  "windows-interface",
  "windows-link 0.1.3",
- "windows-result 0.3.4",
- "windows-strings 0.4.2",
-]
-
-[[package]]
-name = "windows-core"
-version = "0.62.2"
-source = "registry+https://github.com/rust-lang/crates.io-index";
-checksum = "b8e83a14d34d0623b51dce9581199302a221863196a1dde71a7663a4c2be9deb"
-dependencies = [
- "windows-implement",
- "windows-interface",
- "windows-link 0.2.1",
- "windows-result 0.4.1",
- "windows-strings 0.5.1",
+ "windows-result",
+ "windows-strings",
 ]
 
 [[package]]
@@ -6827,7 +6814,7 @@ version = "0.2.1"
 source = "registry+https://github.com/rust-lang/crates.io-index";
 checksum = "fc6a41e98427b19fe4b73c550f060b59fa592d7d686537eebf9385621bfbad8e"
 dependencies = [
- "windows-core 0.61.2",
+ "windows-core",
  "windows-link 0.1.3",
  "windows-threading",
 ]
@@ -6872,7 +6859,7 @@ version = "0.2.0"
 source = "registry+https://github.com/rust-lang/crates.io-index";
 checksum = "9150af68066c4c5c07ddc0ce30421554771e528bde427614c61038bc2c92c2b1"
 dependencies = [
- "windows-core 0.61.2",
+ "windows-core",
  "windows-link 0.1.3",
 ]
 
@@ -6885,15 +6872,6 @@ dependencies = [
  "windows-link 0.1.3",
 ]
 
-[[package]]
-name = "windows-result"
-version = "0.4.1"
-source = "registry+https://github.com/rust-lang/crates.io-index";
-checksum = "7781fa89eaf60850ac3d2da7af8e5242a5ea78d1a11c49bf2910bb5a73853eb5"
-dependencies = [
- "windows-link 0.2.1",
-]
-
 [[package]]
 name = "windows-strings"
 version = "0.4.2"
@@ -6903,15 +6881,6 @@ dependencies = [
  "windows-link 0.1.3",
 ]
 
-[[package]]
-name = "windows-strings"
-version = "0.5.1"
-source = "registry+https://github.com/rust-lang/crates.io-index";
-checksum = "7837d08f69c77cf6b07689544538e017c1bfcf57e34b4c0ff58e6c2cd3b37091"
-dependencies = [
- "windows-link 0.2.1",
-]
-
 [[package]]
 name = "windows-sys"
 version = "0.52.0"
diff --git a/Cargo.toml b/Cargo.toml
index 29c11fa10c..e2bbf2ea98 100644
--- a/Cargo.toml
+++ b/Cargo.toml
@@ -91,19 +91,19 @@ ahash = { version = "0.8", default-features = false, 
features = [
     "runtime-rng",
 ] }
 apache-avro = { version = "0.21", default-features = false }
-arrow = { version = "57.1.0", features = [
+arrow = { version = "57.2.0", features = [
     "prettyprint",
     "chrono-tz",
 ] }
-arrow-buffer = { version = "57.1.0", default-features = false }
-arrow-flight = { version = "57.1.0", features = [
+arrow-buffer = { version = "57.2.0", default-features = false }
+arrow-flight = { version = "57.2.0", features = [
     "flight-sql-experimental",
 ] }
-arrow-ipc = { version = "57.1.0", default-features = false, features = [
+arrow-ipc = { version = "57.2.0", default-features = false, features = [
     "lz4",
 ] }
-arrow-ord = { version = "57.1.0", default-features = false }
-arrow-schema = { version = "57.1.0", default-features = false }
+arrow-ord = { version = "57.2.0", default-features = false }
+arrow-schema = { version = "57.2.0", default-features = false }
 async-trait = "0.1.89"
 bigdecimal = "0.4.8"
 bytes = "1.11"
@@ -166,7 +166,7 @@ log = "^0.4"
 num-traits = { version = "0.2" }
 object_store = { version = "0.12.4", default-features = false }
 parking_lot = "0.12"
-parquet = { version = "57.1.0", default-features = false, features = [
+parquet = { version = "57.2.0", default-features = false, features = [
     "arrow",
     "async",
     "object_store",
diff --git a/datafusion/common/src/scalar/mod.rs 
b/datafusion/common/src/scalar/mod.rs
index e4e048ad3c..eda4952cf5 100644
--- a/datafusion/common/src/scalar/mod.rs
+++ b/datafusion/common/src/scalar/mod.rs
@@ -8868,7 +8868,7 @@ mod tests {
             .unwrap(),
             ScalarValue::try_new_null(&DataType::Map(map_field_ref, 
false)).unwrap(),
             ScalarValue::try_new_null(&DataType::Union(
-                UnionFields::new(vec![42], vec![field_ref]),
+                UnionFields::try_new(vec![42], vec![field_ref]).unwrap(),
                 UnionMode::Dense,
             ))
             .unwrap(),
@@ -8971,13 +8971,14 @@ mod tests {
         }
 
         // Test union type
-        let union_fields = UnionFields::new(
+        let union_fields = UnionFields::try_new(
             vec![0, 1],
             vec![
                 Field::new("i32", DataType::Int32, false),
                 Field::new("f64", DataType::Float64, false),
             ],
-        );
+        )
+        .unwrap();
         let union_result = ScalarValue::new_default(&DataType::Union(
             union_fields.clone(),
             UnionMode::Sparse,
diff --git a/datafusion/datasource-avro/src/avro_to_arrow/schema.rs 
b/datafusion/datasource-avro/src/avro_to_arrow/schema.rs
index 0e8f2a4d56..053be3c9af 100644
--- a/datafusion/datasource-avro/src/avro_to_arrow/schema.rs
+++ b/datafusion/datasource-avro/src/avro_to_arrow/schema.rs
@@ -117,8 +117,8 @@ fn schema_to_field_with_props(
                     .iter()
                     .map(|s| schema_to_field_with_props(s, None, has_nullable, 
None))
                     .collect::<Result<Vec<Field>>>()?;
-                let type_ids = 0_i8..fields.len() as i8;
-                DataType::Union(UnionFields::new(type_ids, fields), 
UnionMode::Dense)
+                // Assign type_ids based on the order in which they appear
+                DataType::Union(UnionFields::from_fields(fields), 
UnionMode::Dense)
             }
         }
         AvroSchema::Record(RecordSchema { fields, .. }) => {
diff --git a/datafusion/functions/src/core/union_extract.rs 
b/datafusion/functions/src/core/union_extract.rs
index 56d4f23cc4..8d915fb2e2 100644
--- a/datafusion/functions/src/core/union_extract.rs
+++ b/datafusion/functions/src/core/union_extract.rs
@@ -189,13 +189,14 @@ mod tests {
     fn test_scalar_value() -> Result<()> {
         let fun = UnionExtractFun::new();
 
-        let fields = UnionFields::new(
+        let fields = UnionFields::try_new(
             vec![1, 3],
             vec![
                 Field::new("str", DataType::Utf8, false),
                 Field::new("int", DataType::Int32, false),
             ],
-        );
+        )
+        .unwrap();
 
         let args = vec![
             ColumnarValue::Scalar(ScalarValue::Union(
diff --git a/datafusion/physical-plan/src/filter.rs 
b/datafusion/physical-plan/src/filter.rs
index 674fe6692a..42adb84397 100644
--- a/datafusion/physical-plan/src/filter.rs
+++ b/datafusion/physical-plan/src/filter.rs
@@ -1557,13 +1557,14 @@ mod tests {
     #[test]
     fn test_equivalence_properties_union_type() -> Result<()> {
         let union_type = DataType::Union(
-            UnionFields::new(
+            UnionFields::try_new(
                 vec![0, 1],
                 vec![
                     Field::new("f1", DataType::Int32, true),
                     Field::new("f2", DataType::Utf8, true),
                 ],
-            ),
+            )
+            .unwrap(),
             UnionMode::Sparse,
         );
 
diff --git a/datafusion/proto-common/src/from_proto/mod.rs 
b/datafusion/proto-common/src/from_proto/mod.rs
index e8e71c3884..3c41b8cad9 100644
--- a/datafusion/proto-common/src/from_proto/mod.rs
+++ b/datafusion/proto-common/src/from_proto/mod.rs
@@ -304,13 +304,16 @@ impl TryFrom<&protobuf::arrow_type::ArrowTypeEnum> for 
DataType {
                 };
                 let union_fields = 
parse_proto_fields_to_fields(&union.union_types)?;
 
-                // Default to index based type ids if not provided
-                let type_ids: Vec<_> = match union.type_ids.is_empty() {
-                    true => (0..union_fields.len() as i8).collect(),
-                    false => union.type_ids.iter().map(|i| *i as i8).collect(),
+                // Default to index based type ids if not explicitly provided
+                let union_fields = if union.type_ids.is_empty() {
+                    UnionFields::from_fields(union_fields)
+                } else {
+                    let type_ids = union.type_ids.iter().map(|i| *i as i8);
+                    UnionFields::try_new(type_ids, union_fields).map_err(|e| {
+                        DataFusionError::from(e).context("Deserializing Union 
DataType")
+                    })?
                 };
-
-                DataType::Union(UnionFields::new(type_ids, union_fields), 
union_mode)
+                DataType::Union(union_fields, union_mode)
             }
             arrow_type::ArrowTypeEnum::Dictionary(dict) => {
                 let key_datatype = 
dict.as_ref().key.as_deref().required("key")?;
@@ -602,7 +605,9 @@ impl TryFrom<&protobuf::ScalarValue> for ScalarValue {
                     .collect::<Option<Vec<_>>>();
                 let fields = fields.ok_or_else(|| 
Error::required("UnionField"))?;
                 let fields = parse_proto_fields_to_fields(&fields)?;
-                let fields = UnionFields::new(ids, fields);
+                let union_fields = UnionFields::try_new(ids, 
fields).map_err(|e| {
+                    DataFusionError::from(e).context("Deserializing Union 
ScalarValue")
+                })?;
                 let v_id = val.value_id as i8;
                 let val = match &val.value {
                     None => None,
@@ -614,7 +619,7 @@ impl TryFrom<&protobuf::ScalarValue> for ScalarValue {
                         Some((v_id, Box::new(val)))
                     }
                 };
-                Self::Union(val, fields, mode)
+                Self::Union(val, union_fields, mode)
             }
             Value::FixedSizeBinaryValue(v) => {
                 Self::FixedSizeBinary(v.length, Some(v.clone().values))
diff --git a/datafusion/proto/tests/cases/roundtrip_logical_plan.rs 
b/datafusion/proto/tests/cases/roundtrip_logical_plan.rs
index bcfda648b5..b9af9fc935 100644
--- a/datafusion/proto/tests/cases/roundtrip_logical_plan.rs
+++ b/datafusion/proto/tests/cases/roundtrip_logical_plan.rs
@@ -1780,19 +1780,20 @@ fn round_trip_datatype() {
             ),
         ])),
         DataType::Union(
-            UnionFields::new(
+            UnionFields::try_new(
                 vec![7, 5, 3],
                 vec![
                     Field::new("nullable", DataType::Boolean, false),
                     Field::new("name", DataType::Utf8, false),
                     Field::new("datatype", DataType::Binary, false),
                 ],
-            ),
+            )
+            .unwrap(),
             UnionMode::Sparse,
         ),
         DataType::Union(
-            UnionFields::new(
-                vec![5, 8, 1],
+            UnionFields::try_new(
+                vec![5, 8, 1, 100],
                 vec![
                     Field::new("nullable", DataType::Boolean, false),
                     Field::new("name", DataType::Utf8, false),
@@ -1807,7 +1808,8 @@ fn round_trip_datatype() {
                         true,
                     ),
                 ],
-            ),
+            )
+            .unwrap(),
             UnionMode::Dense,
         ),
         DataType::Dictionary(
diff --git a/datafusion/sqllogictest/src/test_context.rs 
b/datafusion/sqllogictest/src/test_context.rs
index a9aa3baa24..d416dc1bcf 100644
--- a/datafusion/sqllogictest/src/test_context.rs
+++ b/datafusion/sqllogictest/src/test_context.rs
@@ -538,14 +538,15 @@ fn create_example_udf() -> ScalarUDF {
 
 fn register_union_table(ctx: &SessionContext) {
     let union = UnionArray::try_new(
-        UnionFields::new(
+        UnionFields::try_new(
             // typeids: 3 for int, 1 for string
             vec![3, 1],
             vec![
                 Field::new("int", DataType::Int32, false),
                 Field::new("string", DataType::Utf8, false),
             ],
-        ),
+        )
+        .unwrap(),
         ScalarBuffer::from(vec![3, 1, 3]),
         None,
         vec![
diff --git a/datafusion/sqllogictest/test_files/case.slt 
b/datafusion/sqllogictest/test_files/case.slt
index 074d216ac7..481dde5be9 100644
--- a/datafusion/sqllogictest/test_files/case.slt
+++ b/datafusion/sqllogictest/test_files/case.slt
@@ -384,8 +384,7 @@ SELECT column2, column3, column4  FROM t;
 {foo: a, xxx: b} {xxx: c, foo: d} {xxx: e}
 
 # coerce structs with different field orders,
-# (note the *value*s are from column2 but the field name is 'xxx', as the 
coerced
-# type takes the field name from the last argument (column3)
+# should keep the same field values
 query ?
 SELECT
   case
@@ -394,7 +393,7 @@ SELECT
   end
 FROM t;
 ----
-{xxx: a, foo: b}
+{xxx: b, foo: a}
 
 # coerce structs with different field orders
 query ?
diff --git a/datafusion/sqllogictest/test_files/spark/hash/crc32.slt 
b/datafusion/sqllogictest/test_files/spark/hash/crc32.slt
index 6fbeb11fb9..df5588c758 100644
--- a/datafusion/sqllogictest/test_files/spark/hash/crc32.slt
+++ b/datafusion/sqllogictest/test_files/spark/hash/crc32.slt
@@ -81,7 +81,7 @@ SELECT crc32(arrow_cast('Spark', 'BinaryView'));
 ----
 1557323817
 
-# Upstream arrow-rs issue: https://github.com/apache/arrow-rs/issues/8841
-# This should succeed after we receive the fix
-query error Arrow error: Compute error: Internal Error: Cannot cast BinaryView 
to BinaryArray of expected type
+query I
 select crc32(arrow_cast(null, 'Dictionary(Int32, Utf8)'))
+----
+NULL
diff --git a/datafusion/sqllogictest/test_files/struct.slt 
b/datafusion/sqllogictest/test_files/struct.slt
index d985af1104..a91a5e7f87 100644
--- a/datafusion/sqllogictest/test_files/struct.slt
+++ b/datafusion/sqllogictest/test_files/struct.slt
@@ -492,9 +492,18 @@ Struct("r": Utf8, "c": Float64)
 statement ok
 drop table t;
 
-query error DataFusion error: Optimizer rule 'simplify_expressions' 
failed[\s\S]*Arrow error: Cast error: Cannot cast string 'a' to value of 
Float64 type
+statement ok
 create table t as values({r: 'a', c: 1}), ({c: 2.3, r: 'b'});
 
+query ?
+select * from t;
+----
+{c: 1.0, r: a}
+{c: 2.3, r: b}
+
+statement ok
+drop table t;
+
 ##################################
 ## Test Coalesce with Struct
 ##################################
@@ -560,10 +569,18 @@ create table t(a struct(r varchar, c int), b struct(r 
varchar, c float)) as valu
     (row('purple', 1), row('green', 2.3));
 
 # out of order struct literal
-# TODO: This query should not fail
-statement error DataFusion error: Optimizer rule 'simplify_expressions' 
failed[\s\S]*Arrow error: Cast error: Cannot cast string 'b' to value of Int32 
type
+statement ok
 create table t(a struct(r varchar, c int)) as values ({r: 'a', c: 1}), ({c: 2, 
r: 'b'});
 
+query ?
+select * from t;
+----
+{r: a, c: 1}
+{r: b, c: 2}
+
+statement ok
+drop table t;
+
 ##################################
 ## Test Array of Struct
 ##################################
@@ -573,9 +590,11 @@ select [{r: 'a', c: 1}, {r: 'b', c: 2}];
 ----
 [{r: a, c: 1}, {r: b, c: 2}]
 
-# Can't create a list of struct with different field types
-query error
+# Create a list of struct with different field types
+query ?
 select [{r: 'a', c: 1}, {c: 2, r: 'b'}];
+----
+[{c: 1, r: a}, {c: 2, r: b}]
 
 statement ok
 create table t(a struct(r varchar, c int), b struct(r varchar, c float)) as 
values (row('a', 1), row('b', 2.3));
@@ -592,9 +611,11 @@ drop table t;
 statement ok
 create table t(a struct(r varchar, c int), b struct(c float, r varchar)) as 
values (row('a', 1), row(2.3, 'b'));
 
-# create array with different struct type is not valid
-query error
+# create array with different struct type should be cast
+query T
 select arrow_typeof([a, b]) from t;
+----
+List(Struct("c": Float32, "r": Utf8View))
 
 statement ok
 drop table t;
@@ -602,13 +623,13 @@ drop table t;
 statement ok
 create table t(a struct(r varchar, c int, g float), b struct(r varchar, c 
float, g int)) as values (row('a', 1, 2.3), row('b', 2.3, 2));
 
-# type of each column should not coerced but perserve as it is
+# type of each column should not coerced but preserve as it is
 query T
 select arrow_typeof(a) from t;
 ----
 Struct("r": Utf8View, "c": Int32, "g": Float32)
 
-# type of each column should not coerced but perserve as it is
+# type of each column should not coerced but preserve as it is
 query T
 select arrow_typeof(b) from t;
 ----


---------------------------------------------------------------------
To unsubscribe, e-mail: [email protected]
For additional commands, e-mail: [email protected]

Reply via email to