ethan-tyler commented on code in PR #17702:
URL: https://github.com/apache/datafusion/pull/17702#discussion_r2608286043


##########
datafusion/core/src/datasource/listing_table_factory.rs:
##########
@@ -63,137 +65,190 @@ impl TableProviderFactory for ListingTableFactory {
             ))?
             .create(session_state, &cmd.options)?;
 
-        let mut table_path = ListingTableUrl::parse(&cmd.location)?;
-        let file_extension = match table_path.is_collection() {
-            // Setting the extension to be empty instead of allowing the 
default extension seems
-            // odd, but was done to ensure existing behavior isn't modified. 
It seems like this
-            // could be refactored to either use the default extension or set 
the fully expected
-            // extension when compression is included (e.g. ".csv.gz")
-            true => "",
-            false => &get_extension(cmd.location.as_str()),
+        let file_extension = match cmd.locations.len() {
+            1 => {
+                let table_path = ListingTableUrl::parse(&cmd.locations[0])?;
+                match table_path.is_collection() {
+                    // Setting the extension to be empty instead of allowing 
the default extension seems
+                    // odd, but was done to ensure existing behavior isn't 
modified. It seems like this
+                    // could be refactored to either use the default extension 
or set the fully expected
+                    // extension when compression is included (e.g. ".csv.gz").
+                    // We do the same if there are multiple locations provided 
for the table.
+                    true => "",
+                    false => &get_extension(cmd.locations[0].as_str()),
+                }
+            }
+            _ => "",
         };
+
         let mut options = ListingOptions::new(file_format)
             .with_session_config_options(session_state.config())
             .with_file_extension(file_extension);
 
-        let (provided_schema, table_partition_cols) = if 
cmd.schema.fields().is_empty() {
-            let infer_parts = session_state
-                .config_options()
-                .execution
-                .listing_table_factory_infer_partitions;
-            let part_cols = if cmd.table_partition_cols.is_empty() && 
infer_parts {
-                options
-                    .infer_partitions(session_state, &table_path)
-                    .await?
-                    .into_iter()
-            } else {
-                cmd.table_partition_cols.clone().into_iter()
-            };
-
-            (
-                None,
-                part_cols
-                    .map(|p| {
-                        (
-                            p,
-                            DataType::Dictionary(
-                                Box::new(DataType::UInt16),
-                                Box::new(DataType::Utf8),
-                            ),
-                        )
-                    })
-                    .collect::<Vec<_>>(),
+        let table_paths: Vec<ListingTableUrl> = cmd
+            .locations
+            .iter()
+            .map(|loc| ListingTableUrl::parse(loc))
+            .collect::<Result<Vec<_>>>()?;
+
+        // We use the first location to infer the partition columns,
+        // primarily for performance and simplicity reasons.
+        let partition_columns = infer_partition_columns(
+            &options,
+            session_state,
+            &table_paths[0],

Review Comment:
   Yup, should guard this:
   
   ```rust
   let first_path = table_paths.first().ok_or_else(|| {
       DataFusionError::Plan("At least one location required".into())
   })?;
   ```



-- 
This is an automated message from the Apache Git Service.
To respond to the message, please log on to GitHub and use the
URL above to go to the specific comment.

To unsubscribe, e-mail: [email protected]

For queries about this service, please contact Infrastructure at:
[email protected]


---------------------------------------------------------------------
To unsubscribe, e-mail: [email protected]
For additional commands, e-mail: [email protected]

Reply via email to