Lordworms commented on issue #11150:
URL: https://github.com/apache/datafusion/issues/11150#issuecomment-2240904646

   When I was doing this one, I added a test like 
   ``` rust
   #[tokio::test]
   async fn roundtrip_physical_plan_copy_to_sql_options() -> Result<()> {
       // Create a new SessionContext
       let ctx = SessionContext::new();
       let file_type = format_as_file_type(Arc::new(CsvFormatFactory::new()));
       // Create a CSV scan as input
       let input = create_csv_scan(&ctx).await?;
       let plan = LogicalPlan::Copy(CopyTo {
           input: Arc::new(input),
           output_url: "test.csv".to_string(),
           partition_by: vec!["a".to_string(), "b".to_string(), 
"c".to_string()],
           file_type,
           options: Default::default(),
       });
   
       // Convert the logical plan to a physical plan
       let planner = DefaultPhysicalPlanner::default();
       let physical_plan = planner.create_physical_plan(&plan, 
&ctx.state()).await?;
       roundtrip_test(physical_plan)
   }
   ```
   
   I find that it seems like the roundtrip in physical plan test should be ok 
since the fileformatfactory is translated into Fileformat automaticly here
   
   ```Rust
   LogicalPlan::Copy(CopyTo {
                   input,
                   output_url,
                   file_type,
                   partition_by,
                   options: source_option_tuples,
               }) => {
                   let input_exec = children.one()?;
                   let parsed_url = ListingTableUrl::parse(output_url)?;
                   let object_store_url = parsed_url.object_store();
   
                   let schema: Schema = (**input.schema()).clone().into();
   
                   // Note: the DataType passed here is ignored for the 
purposes of writing and inferred instead
                   // from the schema of the RecordBatch being written. This 
allows COPY statements to specify only
                   // the column name rather than column name + explicit data 
type.
                   let table_partition_cols = partition_by
                       .iter()
                       .map(|s| (s.to_string(), arrow_schema::DataType::Null))
                       .collect::<Vec<_>>();
   
                   let keep_partition_by_columns = match source_option_tuples
                       .get("execution.keep_partition_by_columns")
                       .map(|v| v.trim()) {
                       None => 
session_state.config().options().execution.keep_partition_by_columns,
                       Some("true") => true,
                       Some("false") => false,
                       Some(value) =>
                           return 
Err(DataFusionError::Configuration(format!("provided value for 
'execution.keep_partition_by_columns' was not recognized: \"{}\"", value))),
                   };
   
                   // Set file sink related options
                   let config = FileSinkConfig {
                       object_store_url,
                       table_paths: vec![parsed_url],
                       file_groups: vec![],
                       output_schema: Arc::new(schema),
                       table_partition_cols,
                       overwrite: false,
                       keep_partition_by_columns,
                   };
   
                   let sink_format = file_type_to_format(file_type)?
                       .create(session_state, source_option_tuples)?;
   
                   sink_format
                       .create_writer_physical_plan(input_exec, session_state, 
config, None)
                       .await?
               }
   ```
   
   So it causes my confusion here how to actually add a test here, appreciate 
your guidence @alamb 


-- 
This is an automated message from the Apache Git Service.
To respond to the message, please log on to GitHub and use the
URL above to go to the specific comment.

To unsubscribe, e-mail: [email protected]

For queries about this service, please contact Infrastructure at:
[email protected]


---------------------------------------------------------------------
To unsubscribe, e-mail: [email protected]
For additional commands, e-mail: [email protected]

Reply via email to