jayzhan211 commented on code in PR #14268:
URL: https://github.com/apache/datafusion/pull/14268#discussion_r1929710139


##########
datafusion/sqllogictest/test_files/expr.slt:
##########
@@ -344,6 +344,16 @@ SELECT ascii('💯a')
 ----
 128175
 
+query I
+SELECT ascii('222')
+----
+50
+
+query I
+SELECT ascii(2)

Review Comment:
   In DuckDB, this doesn't work
   ```
   D select ascii(2);
   Binder Error: No function matches the given name and argument types 
'ascii(INTEGER_LITERAL)'. You might need to add explicit type casts.
        Candidate functions:
        ascii(VARCHAR) -> INTEGER
   
   LINE 1: select ascii(2);
                  ^
   D select ascii('2');
   ┌────────────┐
   │ ascii('2') │
   │   int32    │
   ├────────────┤
   │         50 │
   └────────────┘
   ```
   
   So does Postgres
   
   ```
   postgres=# select ascii(2);
   ERROR:  function ascii(integer) does not exist
   LINE 1: select ascii(2);
                  ^
   HINT:  No function matches the given name and argument types. You might need 
to add explicit type casts.
   postgres=# select ascii('2');
    ascii 
   -------
       50
   (1 row)
   ```
   
   This used to work in DataFusion, but it is not consistent with other 
systems. I'm not sure whether we should introduce this behaviour back



##########
datafusion/optimizer/src/analyzer/type_coercion.rs:
##########
@@ -2133,4 +2133,77 @@ mod test {
         assert_analyzed_plan_eq(Arc::new(TypeCoercion::new()), plan, 
expected)?;
         Ok(())
     }
+
+    /// Create a [`PhysicalExpr`] from an [`Expr`] after applying type 
coercion.
+    fn create_physical_expr_with_type_coercion(
+        expr: Expr,
+        df_schema: &DFSchema,
+    ) -> Result<Arc<dyn PhysicalExpr>> {
+        let props = ExecutionProps::default();
+        let coerced_expr = expr
+            .rewrite(&mut TypeCoercionRewriter::new(df_schema))?
+            .data;
+        let physical_expr = datafusion_physical_expr::create_physical_expr(
+            &coerced_expr,
+            df_schema,
+            &props,
+        )?;
+        Ok(physical_expr)
+    }
+
+    fn evaluate_expr_with_array(
+        expr: Expr,
+        batch: RecordBatch,
+        df_schema: &DFSchema,
+    ) -> Result<ArrayRef> {
+        let physical_expr = create_physical_expr_with_type_coercion(expr, 
df_schema)?;
+        match physical_expr.evaluate(&batch)? {
+            ColumnarValue::Array(result) => Ok(result),
+            _ => datafusion_common::internal_err!(
+                "Expected array result in evaluate_expr_with_array"
+            ),
+        }
+    }
+
+    fn evaluate_expr_with_scalar(expr: Expr) -> Result<ScalarValue> {
+        let df_schema = DFSchema::empty();
+        let physical_expr = create_physical_expr_with_type_coercion(expr, 
&df_schema)?;
+        match physical_expr
+            .evaluate(&RecordBatch::new_empty(Arc::clone(df_schema.inner())))?
+        {
+            ColumnarValue::Scalar(result) => Ok(result),
+            _ => datafusion_common::internal_err!(
+                "Expected scalar result in evaluate_expr_with_scalar"
+            ),
+        }
+    }
+
+    #[test]
+    fn test_ascii_expr() -> Result<()> {

Review Comment:
   We can remove this test if the purpose of the test is covered already in slt



##########
datafusion/expr/src/type_coercion/functions.rs:
##########
@@ -744,6 +706,57 @@ fn get_valid_types(
     Ok(valid_types)
 }
 
+/// Validates that all data types are either [`NativeType::String`] or 
[`NativeType::Null`].
+/// For [`NativeType::Null`], returns [`DataType::Utf8`] as the default string 
type.
+/// Returns error if any type is neither [`NativeType::String`] nor 
[`NativeType::Null`].
+fn validate_and_collect_string_types(data_types: &[DataType]) -> 
Result<Vec<DataType>> {
+    data_types
+        .iter()
+        .map(|data_type| {
+            let logical_type: NativeType = data_type.into();
+            match logical_type {
+                NativeType::String => Ok(data_type.to_owned()),
+                // TODO: Switch to Utf8View if all the string functions 
supports Utf8View
+                NativeType::Null => Ok(DataType::Utf8),
+                _ => plan_err!("The signature expected NativeType::String but 
received {logical_type}"),
+            }
+        })
+        .collect()
+}
+
+/// Returns a common string [`DataType`] that both input types can be coerced 
to.
+/// Handles [`DataType::Dictionary`] by recursively finding common type of 
their value [`DataType`].
+/// Returns error if types cannot be coerced to a common string [`DataType`].
+fn find_common_string_type(lhs_type: &DataType, rhs_type: &DataType) -> 
Result<DataType> {
+    match (lhs_type, rhs_type) {
+        (DataType::Dictionary(_, lhs), DataType::Dictionary(_, rhs)) => {
+            find_common_string_type(lhs, rhs)
+        }
+        (DataType::Dictionary(_, v), other) | (other, DataType::Dictionary(_, 
v)) => {
+            find_common_string_type(v, other)
+        }
+        _ => {
+            if let Some(coerced_type) = string_coercion(lhs_type, rhs_type) {
+                Ok(coerced_type)
+            } else {
+                plan_err!(
+                    "{lhs_type} and {rhs_type} are not coercible to a common 
string type"
+                )
+            }
+        }
+    }
+}
+
+/// Recursively traverses [`DataType::Dictionary`] to get the underlying value 
[`DataType`].
+/// For non-dictionary types, returns the default [`DataType`].
+fn base_dictionary_type_or_default_type(data_type: &DataType) -> DataType {

Review Comment:
   We can add dictionary support to `datafusion/common/src/utils/mod.rs` 
`base_type`



##########
datafusion/expr/src/type_coercion/functions.rs:
##########
@@ -584,23 +544,7 @@ fn get_valid_types(
                 match target_type_class {
                     TypeSignatureClass::Native(native_type) => {
                         let target_type = native_type.native();
-                        if &logical_type == target_type {

Review Comment:
   Does that mean others function that used Coercible String now also cast 
integer to string?



##########
datafusion/sqllogictest/test_files/expr.slt:
##########
@@ -571,8 +601,10 @@ select repeat('-1.2', arrow_cast(3, 'Int32'));
 ----
 -1.2-1.2-1.2
 
-query error DataFusion error: Error during planning: Internal error: Expect 
TypeSignatureClass::Native\(LogicalType\(Native\(Int64\), Int64\)\) but 
received Float64
+query T
 select repeat('-1.2', 3.2);

Review Comment:
   This should be error, the 2nd argument should be integer



-- 
This is an automated message from the Apache Git Service.
To respond to the message, please log on to GitHub and use the
URL above to go to the specific comment.

To unsubscribe, e-mail: github-unsubscr...@datafusion.apache.org

For queries about this service, please contact Infrastructure at:
us...@infra.apache.org


---------------------------------------------------------------------
To unsubscribe, e-mail: github-unsubscr...@datafusion.apache.org
For additional commands, e-mail: github-h...@datafusion.apache.org

Reply via email to