Jefffrey commented on code in PR #20032:
URL: https://github.com/apache/datafusion/pull/20032#discussion_r2736679294
##########
datafusion/functions/src/string/ltrim.rs:
##########
@@ -119,18 +119,87 @@ impl ScalarUDFImpl for LtrimFunc {
}
fn invoke_with_args(&self, args: ScalarFunctionArgs) ->
Result<ColumnarValue> {
- match args.args[0].data_type() {
+ let return_type = args.return_field.data_type();
+ let number_rows = args.number_rows;
+ let args = args.args;
+
+ // If any argument is a scalar NULL, the output is NULL for all rows
+ if args
+ .iter()
+ .any(|v| matches!(v, ColumnarValue::Scalar(s) if s.is_null()))
+ {
+ if args.iter().any(|v| matches!(v, ColumnarValue::Array(_))) {
Review Comment:
We shouldn't return a columnar array here; it should just be columnar scalar
only
##########
datafusion/functions/src/string/ltrim.rs:
##########
@@ -119,18 +119,87 @@ impl ScalarUDFImpl for LtrimFunc {
}
fn invoke_with_args(&self, args: ScalarFunctionArgs) ->
Result<ColumnarValue> {
- match args.args[0].data_type() {
+ let return_type = args.return_field.data_type();
+ let number_rows = args.number_rows;
+ let args = args.args;
+
+ // If any argument is a scalar NULL, the output is NULL for all rows
+ if args
+ .iter()
+ .any(|v| matches!(v, ColumnarValue::Scalar(s) if s.is_null()))
+ {
+ if args.iter().any(|v| matches!(v, ColumnarValue::Array(_))) {
+ return Ok(ColumnarValue::Array(arrow::array::new_null_array(
+ return_type,
+ number_rows,
+ )));
+ }
+ return
Ok(ColumnarValue::Scalar(ScalarValue::try_from(return_type)?));
+ }
+
+ // Scalar fast path
+ if args.iter().all(|v| matches!(v, ColumnarValue::Scalar(_))) {
+ let arg0 = &args[0];
+ let arg1 = args.get(1);
+
+ let (value, pattern) = match (arg0, arg1) {
+ (ColumnarValue::Scalar(s0), None) => (s0, None),
+ (ColumnarValue::Scalar(s0), Some(ColumnarValue::Scalar(s1)))
=> {
+ (s0, Some(s1))
+ }
+ _ => {
+ return internal_err!(
+ "Unexpected argument combination in ltrim scalar fast
path"
+ );
+ }
+ };
+
+ let trim_chars: Vec<char> = match pattern {
+ None => vec![' '],
+ Some(ScalarValue::Utf8(Some(p)))
+ | Some(ScalarValue::LargeUtf8(Some(p)))
+ | Some(ScalarValue::Utf8View(Some(p))) => p.chars().collect(),
+ Some(other) => {
+ return internal_err!(
+ "Unexpected data type {:?} for ltrim pattern",
+ other.data_type()
+ );
+ }
+ };
+
+ let trimmed = match value {
+ ScalarValue::Utf8(Some(s)) => ScalarValue::Utf8(Some(
+ s.trim_start_matches(&trim_chars[..]).to_string(),
+ )),
+ ScalarValue::Utf8View(Some(s)) => ScalarValue::Utf8View(Some(
+ s.trim_start_matches(&trim_chars[..]).to_string(),
+ )),
+ ScalarValue::LargeUtf8(Some(s)) => ScalarValue::LargeUtf8(Some(
+ s.trim_start_matches(&trim_chars[..]).to_string(),
+ )),
+ other => {
+ return internal_err!(
+ "Unexpected data type {:?} for function ltrim",
+ other.data_type()
+ );
+ }
+ };
+
+ return Ok(ColumnarValue::Scalar(trimmed));
+ }
+
+ // Array path
+ match args[0].data_type() {
DataType::Utf8 | DataType::Utf8View => make_scalar_function(
ltrim::<i32>,
vec![Hint::Pad, Hint::AcceptsSingular],
Review Comment:
It seems a bit surprising we get such a speedup when `make_scalar_function`
with hints should already ensure we don't expand the arrays 🤔
##########
datafusion/functions/src/string/ltrim.rs:
##########
@@ -119,18 +119,87 @@ impl ScalarUDFImpl for LtrimFunc {
}
fn invoke_with_args(&self, args: ScalarFunctionArgs) ->
Result<ColumnarValue> {
- match args.args[0].data_type() {
+ let return_type = args.return_field.data_type();
+ let number_rows = args.number_rows;
+ let args = args.args;
+
+ // If any argument is a scalar NULL, the output is NULL for all rows
+ if args
+ .iter()
+ .any(|v| matches!(v, ColumnarValue::Scalar(s) if s.is_null()))
+ {
+ if args.iter().any(|v| matches!(v, ColumnarValue::Array(_))) {
+ return Ok(ColumnarValue::Array(arrow::array::new_null_array(
+ return_type,
+ number_rows,
+ )));
+ }
+ return
Ok(ColumnarValue::Scalar(ScalarValue::try_from(return_type)?));
+ }
+
+ // Scalar fast path
+ if args.iter().all(|v| matches!(v, ColumnarValue::Scalar(_))) {
Review Comment:
Using an iter check on this when we have at most 2 arguments is overkill;
just use a match
--
This is an automated message from the Apache Git Service.
To respond to the message, please log on to GitHub and use the
URL above to go to the specific comment.
To unsubscribe, e-mail: [email protected]
For queries about this service, please contact Infrastructure at:
[email protected]
---------------------------------------------------------------------
To unsubscribe, e-mail: [email protected]
For additional commands, e-mail: [email protected]