alamb commented on code in PR #10949:
URL: https://github.com/apache/datafusion/pull/10949#discussion_r1643398963
##########
datafusion/substrait/src/physical_plan/producer.rs:
##########
@@ -55,15 +60,56 @@ pub fn to_substrait_rel(
}
}
+ let mut names = vec![];
+ let mut types = vec![];
+
+ for field in base_config.file_schema.fields.iter() {
+ match to_substrait_type(field.data_type(), field.is_nullable()) {
+ Ok(t) => {
+ names.push(field.name().clone());
+ types.push(t);
+ }
+ Err(e) => return Err(e),
+ }
+ }
+
+ let type_info = Struct {
+ types,
+ // FIXME: duckdb doesn't set this field, keep it as default
variant 0.
+ //
https://github.com/duckdb/substrait/blob/b6f56643cb11d52de0e32c24a01dfd5947df62be/src/to_substrait.cpp#L1106-L1127
+ type_variation_reference: 0,
+ nullability: Nullability::Required.into(),
+ };
+
+ let mut select_struct = None;
+ if let Some(projection) = base_config.projection.as_ref() {
+ let struct_items = projection
+ .iter()
+ .map(|index| StructItem {
+ field: *index as i32,
+ // FIXME: duckdb sets this to None, but it's not clear why.
+ //
https://github.com/duckdb/substrait/blob/b6f56643cb11d52de0e32c24a01dfd5947df62be/src/to_substrait.cpp#L1191
+ child: None,
+ })
+ .collect();
+
+ select_struct = Some(StructSelect { struct_items });
+ }
+
Ok(Box::new(Rel {
rel_type: Some(RelType::Read(Box::new(ReadRel {
common: None,
- base_schema: None,
+ base_schema: Some(NamedStruct {
+ names,
+ r#struct: Some(type_info),
+ }),
filter: None,
best_effort_filter: None,
projection: Some(MaskExpression {
- select: None,
- maintain_singular_struct: false,
+ select: select_struct,
+ // FIXME: duckdb set this to true, but it's not clear why.
Review Comment:
> // fixme: whatever this means
😆
##########
datafusion/substrait/tests/testdata/data.parquet:
##########
Review Comment:
Can you please put a README in that directory with this information? It is
important context but would likely be hard to find after this PR
Perhaps
https://github.com/apache/datafusion/tree/main/datafusion/substrait/tests/testdata/README.md
--
This is an automated message from the Apache Git Service.
To respond to the message, please log on to GitHub and use the
URL above to go to the specific comment.
To unsubscribe, e-mail: [email protected]
For queries about this service, please contact Infrastructure at:
[email protected]
---------------------------------------------------------------------
To unsubscribe, e-mail: [email protected]
For additional commands, e-mail: [email protected]