adriangb commented on code in PR #12978:
URL: https://github.com/apache/datafusion/pull/12978#discussion_r1819457742
##########
datafusion/core/src/physical_optimizer/pruning.rs:
##########
@@ -1610,6 +1624,74 @@ fn build_statistics_expr(
Ok(statistics_expr)
}
+fn build_like_match(
+ expr_builder: &mut PruningExpressionBuilder,
+) -> Result<Arc<dyn PhysicalExpr>> {
+ // column LIKE literal => (min, max) LIKE literal split at % => min <=
split literal && split literal <= max
+ // column LIKE 'foo%' => min <= 'foo' && 'foo' <= max
+ // column LIKE '%foo' => min <= '' && '' <= max => true
+ // column LIKE '%foo%' => min <= '' && '' <= max => true
+ // column LIKE 'foo' => min <= 'foo' && 'foo' <= max
+
+ fn unpack_string(s: &ScalarValue) -> Result<&String> {
+ match s {
+ ScalarValue::Utf8(Some(s)) => Ok(s),
+ ScalarValue::LargeUtf8(Some(s)) => Ok(s),
+ ScalarValue::Utf8View(Some(s)) => Ok(s),
+ ScalarValue::Dictionary(_, value) => unpack_string(value),
+ _ => plan_err!("LIKE expression must be a string literal"),
+ }
+ }
+
+ fn extract_string_literal(expr: &Arc<dyn PhysicalExpr>) -> Result<&String>
{
+ if let Some(lit) = expr.as_any().downcast_ref::<phys_expr::Literal>() {
+ let s = unpack_string(lit.value())?;
+ return Ok(s);
+ }
+ plan_err!("LIKE expression must be a string literal")
Review Comment:
Changed to options in
https://github.com/apache/datafusion/pull/12978/commits/5bcca3ef98def407822d23318ac8b6584f9f8c24
##########
datafusion/core/src/physical_optimizer/pruning.rs:
##########
@@ -1648,22 +1649,46 @@ fn build_like_match(
let s = unpack_string(lit.value())?;
return Ok(s);
}
- plan_err!("LIKE expression must be a string literal")
+ plan_err!("Unexpected LIKE expression: {expr:?}")
}
- // I *think* that ILIKE could be handled by making the min lowercase and
max uppercase
- // but that requires building the physical expressions that call lower()
and upper()
+ /// Try and increment the the string's bytes from right to left, returning
when the result
+ /// is a valid UTF8 string. Returns `None` when it can't increment any
byte.
+ /// Vendored from
https://github.com/apache/arrow-rs/blob/56525efbd5f37b89d1b56aa51709cab9f81bc89e/parquet/src/column/writer/mod.rs#L1432-L1448
+ fn increment_utf8(data: &str) -> Result<String> {
+ let mut data = data.as_bytes().to_vec();
+ for idx in (0..data.len()).rev() {
+ let original = data[idx];
+ let (byte, overflow) = original.overflowing_add(1);
Review Comment:
Changed to operate on code points in
https://github.com/apache/datafusion/pull/12978/commits/5bcca3ef98def407822d23318ac8b6584f9f8c24
--
This is an automated message from the Apache Git Service.
To respond to the message, please log on to GitHub and use the
URL above to go to the specific comment.
To unsubscribe, e-mail: [email protected]
For queries about this service, please contact Infrastructure at:
[email protected]
---------------------------------------------------------------------
To unsubscribe, e-mail: [email protected]
For additional commands, e-mail: [email protected]