comphead commented on code in PR #16332:
URL: https://github.com/apache/datafusion/pull/16332#discussion_r2134794387


##########
datafusion-cli/src/functions.rs:
##########
@@ -460,3 +473,94 @@ impl TableFunctionImpl for ParquetMetadataFunc {
         Ok(Arc::new(parquet_metadata))
     }
 }
+
+/// A table function that allows users to query files using glob patterns
+/// for example: SELECT * FROM glob('path/to/*/file.parquet')
+pub struct GlobFunc {
+    // we need the ctx here to get the schema from the listing table later
+    ctx: SessionContext,
+}
+
+impl std::fmt::Debug for GlobFunc {
+    fn fmt(&self, f: &mut std::fmt::Formatter<'_>) -> std::fmt::Result {
+        f.debug_struct("GlobFunc")
+            .field("ctx", &"<SessionContext>")
+            .finish()
+    }
+}
+
+impl GlobFunc {
+    /// Create a new GlobFunc
+    pub fn new(ctx: SessionContext) -> Self {
+        Self { ctx }
+    }
+}
+
+fn as_utf8_literal<'a>(expr: &'a Expr, arg_name: &str) -> Result<&'a str> {
+    match expr {
+        Expr::Literal(ScalarValue::Utf8(Some(s)), _) => Ok(s),
+        Expr::Column(Column { name, .. }) => Ok(name),
+        _ => plan_err!("glob() requires a string literal for the '{arg_name}' 
argument"),
+    }
+}
+
+impl TableFunctionImpl for GlobFunc {
+    fn call(&self, exprs: &[Expr]) -> Result<Arc<dyn TableProvider>> {
+        // Parse arguments
+        if exprs.is_empty() {
+            return plan_err!("glob() requires a glob pattern");
+        }
+        let pattern = as_utf8_literal(&exprs[0], "pattern")?;
+        let format = if exprs.len() > 1 {
+            Some(as_utf8_literal(&exprs[1], "format")?)
+        } else {
+            None
+        };
+
+        // Create ListingTableUrl - distinguish between URLs with schemes and 
local paths
+        let url = if pattern.contains("://") && pattern.contains(['*', '?', 
'[']) {
+            // URL with scheme and glob - split manually to avoid URL encoding 
of glob chars
+            let glob_pos = pattern.find(['*', '?', '[']).unwrap(); // we 
already checked it exists
+            let split_pos = pattern[..glob_pos].rfind('/').unwrap() + 1; // 
find last '/' before glob
+            let (base_path, glob_part) = pattern.split_at(split_pos);
+
+            let base_url = Url::parse(&format!("{}/", 
base_path.trim_end_matches('/')))
+                .map_err(|e| {
+                DataFusionError::Plan(format!("Invalid base URL: {}", e))

Review Comment:
   plan_df_error!
   ```suggestion
                   plan_datafusion_err!("Invalid base URL: {}", e))
   ```



-- 
This is an automated message from the Apache Git Service.
To respond to the message, please log on to GitHub and use the
URL above to go to the specific comment.

To unsubscribe, e-mail: github-unsubscr...@datafusion.apache.org

For queries about this service, please contact Infrastructure at:
us...@infra.apache.org


---------------------------------------------------------------------
To unsubscribe, e-mail: github-unsubscr...@datafusion.apache.org
For additional commands, e-mail: github-h...@datafusion.apache.org

Reply via email to