alamb commented on code in PR #14384: URL: https://github.com/apache/datafusion/pull/14384#discussion_r1937410660
########## datafusion/sqllogictest/test_files/case.slt: ########## @@ -308,3 +308,113 @@ NULL NULL false statement ok drop table foo + + +# Test coercion of inner struct field names +# Reproducer for https://github.com/apache/datafusion/issues/14383 +statement ok +create table t as values +( + 100, -- column1 int (so the case isn't constant folded) + { 'foo': 'bar' }, -- column2 has List of Struct w/ Utf8 + { 'foo': arrow_cast('baz', 'Utf8View') }, -- column3 has List of Struct w/ Utf8View + { 'xxx': arrow_cast('blarg', 'Utf8View') } -- column4 has List of Struct w/ Utf8View and a different field name +); + + +# Note field names are foo/foo/xxx +query ??? +SELECT column2, column3, column4 FROM t; +---- +{foo: bar} {foo: baz} {xxx: blarg} + +# Coerce fields, expect the field name to be the name of the first arg to case +# the field should not be named 'c0' +query ? +SELECT + case + when column1 > 0 then column2 + when column1 < 0 then column3 + else column4 + end +FROM t; +---- +{xxx: bar} + +query ? +SELECT + case + when column1 > 0 then column3 -- different arg order affects field name + when column1 < 0 then column4 + else column2 + end +FROM t; +---- +{foo: baz} + +query ? +SELECT + case + when column1 > 0 then column4 -- different arg order affects field name + when column1 < 0 then column2 + else column3 + end +FROM t; +---- +{foo: blarg} + +statement ok +drop table t + + +# Test coercion of inner struct field names with different orders / missing fields +statement ok +create table t as values +( + 100, -- column1 int (so the case isn't constant folded) + { 'foo': 'a', 'xxx': 'b' }, -- column2: Struct with fields foo, xxx + { 'xxx': 'c', 'foo': 'd' }, -- column3: Struct with fields xxx, foo + { 'xxx': 'e' } -- column4: Struct with field xxx (no second field) +); + +# Note field names are in different orders Review Comment: This test documents some strange behavior that I don't really know if is a bug or not DataFusion treats the field orders as important, and when coercing two structs, it does it in field order (not by name) So in this case `{ 'foo': 'a', 'xxx': 'b' }`, and `{ 'xxx': 'c', 'foo': 'd' }` are coerced to compare the values `a` /`c` and `b` / `d` (not the values `a`/`d` and `b`/`c` This PR doesn't change this behavior, but I felt that was an important behavior to document in tests, so I did so -- This is an automated message from the Apache Git Service. To respond to the message, please log on to GitHub and use the URL above to go to the specific comment. To unsubscribe, e-mail: github-unsubscr...@datafusion.apache.org For queries about this service, please contact Infrastructure at: us...@infra.apache.org --------------------------------------------------------------------- To unsubscribe, e-mail: github-unsubscr...@datafusion.apache.org For additional commands, e-mail: github-h...@datafusion.apache.org