alamb commented on code in PR #12864:
URL: https://github.com/apache/datafusion/pull/12864#discussion_r1811193956
##########
datafusion/expr/src/logical_plan/builder.rs:
##########
@@ -177,7 +179,7 @@ impl LogicalPlanBuilder {
/// so it's usually better to override the default names with a table
alias list.
///
/// If the values include params/binders such as $1, $2, $3, etc, then the
`param_data_types` should be provided.
- pub fn values(mut values: Vec<Vec<Expr>>) -> Result<Self> {
+ pub fn values(values: Vec<Vec<Expr>>, schema: Option<&DFSchemaRef>) ->
Result<Self> {
Review Comment:
👍 This makes a lot of sense to me to pass in the schema too if knon.
I think it might be nicer on users if we didn't make an API change and
instead added a new API like
```rust
pub fn values_with_schema(values: Vec<Vec<Expr>>, schema:
Option<&DFSchemaRef>) -> Result<Self> {
...
}
```
Even if we also deprecated `values` it would help users prepare for upgrade
##########
datafusion/expr/src/logical_plan/builder.rs:
##########
@@ -196,16 +198,55 @@ impl LogicalPlanBuilder {
}
}
- let empty_schema = DFSchema::empty();
+ // Check the type of value against the schema
+ if let Some(schema) = schema {
+ Self::infer_from_schema(values, schema)
+ } else {
+ // Infer from data itself
+ Self::infer_data(values)
+ }
+ }
+
+ fn infer_from_schema(values: Vec<Vec<Expr>>, schema: &DFSchema) ->
Result<Self> {
Review Comment:
Maybe we can give this a name to make it clear it is related to `VALUES`
processing
```suggestion
fn infer_values_from_schema(values: Vec<Vec<Expr>>, schema: &DFSchema)
-> Result<Self> {
```
##########
datafusion/sqllogictest/test_files/group_by.slt:
##########
@@ -3360,7 +3360,8 @@ physical_plan
05)--------CoalesceBatchesExec: target_batch_size=4
06)----------RepartitionExec: partitioning=Hash([sn@0, amount@1], 8),
input_partitions=8
07)------------AggregateExec: mode=Partial, gby=[sn@0 as sn, amount@1 as
amount], aggr=[]
-08)--------------MemoryExec: partitions=8, partition_sizes=[1, 0, 0, 0, 0, 0,
0, 0]
+08)--------------RepartitionExec: partitioning=RoundRobinBatch(8),
input_partitions=1
+09)----------------MemoryExec: partitions=1, partition_sizes=[1]
Review Comment:
(for other readers the explanation of why this changed is below at
https://github.com/apache/datafusion/pull/12864/files#r1797538174)
##########
datafusion/sql/src/planner.rs:
##########
@@ -154,6 +156,7 @@ impl PlannerContext {
ctes: HashMap::new(),
outer_query_schema: None,
outer_from_schema: None,
+ table_schema: None,
Review Comment:
Maybe we can name it `create_table_schema` to reflect that it is used for
`CREATE TABLE` processing?
##########
datafusion/sqllogictest/test_files/struct.slt:
##########
@@ -392,12 +392,12 @@ create table t(a struct<r varchar, c int>, b struct<r
varchar, c float>) as valu
query T
select arrow_typeof([a, b]) from t;
----
-List(Field { name: "item", data_type: Struct([Field { name: "r", data_type:
Utf8, nullable: true, dict_id: 0, dict_is_ordered: false, metadata: {} }, Field
{ name: "c", data_type: Int32, nullable: true, dict_id: 0, dict_is_ordered:
false, metadata: {} }]), nullable: true, dict_id: 0, dict_is_ordered: false,
metadata: {} })
+List(Field { name: "item", data_type: Struct([Field { name: "r", data_type:
Utf8, nullable: true, dict_id: 0, dict_is_ordered: false, metadata: {} }, Field
{ name: "c", data_type: Float32, nullable: true, dict_id: 0, dict_is_ordered:
false, metadata: {} }]), nullable: true, dict_id: 0, dict_is_ordered: false,
metadata: {} })
Review Comment:
this seems much more correct to me -- "c" is `float` not `Int32` 👍
##########
datafusion/sqllogictest/test_files/struct.slt:
##########
@@ -453,6 +453,27 @@ Struct([Field { name: "r", data_type: Utf8, nullable:
true, dict_id: 0, dict_is_
statement ok
drop table t;
+statement ok
+create table t as values({r: 'a', c: 1}), ({r: 'b', c: 2.3});
+
+query ?
+select * from t;
+----
+{c0: a, c1: 1.0}
+{c0: b, c1: 2.3}
Review Comment:
Nice
##########
datafusion/expr-common/src/type_coercion/binary.rs:
##########
@@ -529,6 +535,89 @@ fn type_union_resolution_coercion(
}
}
+pub fn try_type_union_resolution(data_types: &[DataType]) ->
Result<Vec<DataType>> {
Review Comment:
As this is a public function we perhaps add some comments here (maybe
highlighting how it is different than `type_union_resolution`)
--
This is an automated message from the Apache Git Service.
To respond to the message, please log on to GitHub and use the
URL above to go to the specific comment.
To unsubscribe, e-mail: [email protected]
For queries about this service, please contact Infrastructure at:
[email protected]
---------------------------------------------------------------------
To unsubscribe, e-mail: [email protected]
For additional commands, e-mail: [email protected]