alamb commented on code in PR #14384: URL: https://github.com/apache/datafusion/pull/14384#discussion_r1937460456
########## datafusion/sqllogictest/test_files/case.slt: ########## @@ -308,3 +308,113 @@ NULL NULL false statement ok drop table foo + + +# Test coercion of inner struct field names +# Reproducer for https://github.com/apache/datafusion/issues/14383 +statement ok +create table t as values +( + 100, -- column1 int (so the case isn't constant folded) + { 'foo': 'bar' }, -- column2 has List of Struct w/ Utf8 + { 'foo': arrow_cast('baz', 'Utf8View') }, -- column3 has List of Struct w/ Utf8View + { 'xxx': arrow_cast('blarg', 'Utf8View') } -- column4 has List of Struct w/ Utf8View and a different field name +); + + +# Note field names are foo/foo/xxx +query ??? +SELECT column2, column3, column4 FROM t; +---- +{foo: bar} {foo: baz} {xxx: blarg} + +# Coerce fields, expect the field name to be the name of the first arg to case +# the field should not be named 'c0' +query ? +SELECT + case + when column1 > 0 then column2 + when column1 < 0 then column3 + else column4 + end +FROM t; +---- +{xxx: bar} + +query ? +SELECT + case + when column1 > 0 then column3 -- different arg order affects field name + when column1 < 0 then column4 + else column2 + end +FROM t; +---- +{foo: baz} + +query ? +SELECT + case + when column1 > 0 then column4 -- different arg order affects field name + when column1 < 0 then column2 + else column3 + end +FROM t; +---- +{foo: blarg} + +statement ok +drop table t + + +# Test coercion of inner struct field names with different orders / missing fields +statement ok +create table t as values +( + 100, -- column1 int (so the case isn't constant folded) + { 'foo': 'a', 'xxx': 'b' }, -- column2: Struct with fields foo, xxx + { 'xxx': 'c', 'foo': 'd' }, -- column3: Struct with fields xxx, foo + { 'xxx': 'e' } -- column4: Struct with field xxx (no second field) +); + +# Note field names are in different orders Review Comment: - While working on https://github.com/apache/datafusion/pull/14385 I found that this behaior behavior (using struct position rather than field name) is inconsistent Specifically the basic case comparison goes through "comparison_coercion" (fixed in this PR) which uses the list field order But when coercing list elements, that goes through `type_union_resolution`, ([link](https://github.com/apache/datafusion/blob/8d006a287c653e8305827ad909c407baa7ed59fd/datafusion/expr-common/src/type_coercion/binary.rs#L383-L382)) which matches fields by name ([link](https://github.com/apache/datafusion/blob/8d006a287c653e8305827ad909c407baa7ed59fd/datafusion/expr-common/src/type_coercion/binary.rs#L484-L509)) @jayzhan211 is the eventual plan to unify thee code paths? Or maybe the case coercion should use `type_union_resolution` 🤔 -- This is an automated message from the Apache Git Service. To respond to the message, please log on to GitHub and use the URL above to go to the specific comment. To unsubscribe, e-mail: github-unsubscr...@datafusion.apache.org For queries about this service, please contact Infrastructure at: us...@infra.apache.org --------------------------------------------------------------------- To unsubscribe, e-mail: github-unsubscr...@datafusion.apache.org For additional commands, e-mail: github-h...@datafusion.apache.org