neilconway commented on code in PR #20819:
URL: https://github.com/apache/datafusion/pull/20819#discussion_r2936977959


##########
datafusion/sql/src/set_expr.rs:
##########
@@ -160,3 +182,56 @@ impl<S: ContextProvider> SqlToRel<'_, S> {
         }
     }
 }
+
+// Adds aliases to SELECT items in a SetExpr using the provided schema.
+// This ensures that unnamed expressions on the right side of a 
UNION/INTERSECT/EXCEPT
+// get aliased with the column names from the left side, allowing queries like
+// `SELECT 1 AS a, 0 AS b, 0 AS c UNION ALL SELECT 2, 0, 0` to work correctly.
+fn alias_set_expr(set_expr: &mut SetExpr, schema: &DFSchemaRef) {
+    match set_expr {
+        SetExpr::Select(select) => alias_select_items(&mut select.projection, 
schema),
+        // For nested set operations, only alias the leftmost branch
+        SetExpr::SetOperation { left, .. } => alias_set_expr(left, schema),
+        // Handle parenthesized queries like (SELECT ... UNION ALL SELECT ...)
+        SetExpr::Query(query) => alias_set_expr(&mut query.body, schema),
+        // For other cases (Values, etc.), return as-is
+        _other => (),
+    }
+}
+
+// Adds aliases to literal value expressions where missing, based on the input 
schema, as these are
+// the ones that can cause duplicate name issues (e.g. `SELECT 0, 0` has two 
columns named `Int64(0)`).
+// Other expressions typically have unique names.
+fn alias_select_items(items: &mut [SelectItem], schema: &DFSchemaRef) {
+    let mut col_idx = 0;
+    for item in items.iter_mut() {
+        match item {
+            SelectItem::UnnamedExpr(expr) if is_literal_value(expr) => {
+                if let Some(field) = schema.fields().get(col_idx) {
+                    *item = SelectItem::ExprWithAlias {
+                        expr: expr.clone(),
+                        alias: Ident::new(field.name()),
+                    };
+                }
+                col_idx += 1;
+            }
+            SelectItem::UnnamedExpr(_) | SelectItem::ExprWithAlias { .. } => {
+                col_idx += 1;
+            }
+            SelectItem::Wildcard(_) | SelectItem::QualifiedWildcard(_, _) => {
+                // Wildcards expand to multiple columns - skip position 
tracking

Review Comment:
   Skipping wildcards seems like it won't produce the right behavior. Consider:
   
   ```sql
   CREATE TABLE t_left (c1 INT, c2 INT, c3 INT) AS VALUES (1, 2, 3);
   CREATE TABLE t_right (c1 INT, c2 INT) AS VALUES (10, 20);
   SELECT c1, c2, c3 FROM t_left UNION ALL SELECT *, 0 FROM t_right;
   ```
   yields
   ```
   Schema error: Schema contains qualified field name t_right.c1 and 
unqualified field name c1 which would be ambiguous
   ```



-- 
This is an automated message from the Apache Git Service.
To respond to the message, please log on to GitHub and use the
URL above to go to the specific comment.

To unsubscribe, e-mail: [email protected]

For queries about this service, please contact Infrastructure at:
[email protected]


---------------------------------------------------------------------
To unsubscribe, e-mail: [email protected]
For additional commands, e-mail: [email protected]

Reply via email to