UBarney commented on code in PR #14567:
URL: https://github.com/apache/datafusion/pull/14567#discussion_r1952625400


##########
datafusion/physical-optimizer/src/pruning.rs:
##########
@@ -1710,6 +1711,66 @@ fn build_like_match(
     Some(combined)
 }
 
+// For predicate `col NOT LIKE 'foo%'`, we rewrite it as `(col_min NOT LIKE 
'foo%' OR col_max NOT LIKE 'foo%')`. If both col_min and col_max have the 
prefix foo, we skip the entire row group (as we can be certain that all data in 
this row group has the prefix foo).
+fn build_not_like_match(
+    expr_builder: &mut PruningExpressionBuilder<'_>,
+) -> Result<Arc<dyn PhysicalExpr>> {
+    // col NOT LIKE 'prefix%' ->  !(col_min LIKE 'prefix%' && col_max LIKE 
'prefix%') -> (col_min NOT LIKE 'prefix%' || col_max NOT LIKE 'prefix%')
+
+    let min_column_expr = expr_builder.min_column_expr()?;
+    let max_column_expr = expr_builder.max_column_expr()?;
+
+    let scalar_expr = expr_builder.scalar_expr();
+
+    let pattern = extract_string_literal(scalar_expr).ok_or_else(|| {
+        plan_datafusion_err!("cannot extract literal from NOT LIKE expression")
+    })?;
+
+    let chars: Vec<char> = pattern.chars().collect();
+    for i in 0..chars.len() - 1 {
+        // Check if current char is a wildcard and is not escaped with 
backslash
+        if (chars[i] == '%' || chars[i] == '_') && (i == 0 || chars[i - 1] != 
'\\') {
+            // Example: For pattern "foo%bar", the row group might include 
values like
+            // ["foobar", "food", "foodbar"], making it unsafe to prune.
+            // Even if the min/max values in the group (e.g., "foobar" and 
"foodbar")
+            // match the pattern, intermediate values like "food" may not
+            // match the full pattern "foo%bar", making pruning unsafe.
+            // (truncate foo%bar to foo% have same problem)
+            return Err(plan_datafusion_err!(
+                "NOT LIKE expressions with unescaped wildcards ('%' or '_') at 
the beginning or middle of the pattern are not supported"
+            ));
+        }
+    }
+
+    if chars.last() == Some(&'_') && (chars.len() > 1 && chars[chars.len() - 
2] != '\\') {

Review Comment:
   Now it only handle `constant-prefix%` . Adding `split_constant_prefix`



-- 
This is an automated message from the Apache Git Service.
To respond to the message, please log on to GitHub and use the
URL above to go to the specific comment.

To unsubscribe, e-mail: github-unsubscr...@datafusion.apache.org

For queries about this service, please contact Infrastructure at:
us...@infra.apache.org


---------------------------------------------------------------------
To unsubscribe, e-mail: github-unsubscr...@datafusion.apache.org
For additional commands, e-mail: github-h...@datafusion.apache.org

Reply via email to