jeffreyssmith2nd commented on code in PR #10716:
URL: https://github.com/apache/datafusion/pull/10716#discussion_r1624964108
##########
datafusion/core/src/datasource/physical_plan/parquet/row_filter.rs:
##########
@@ -473,6 +495,74 @@ mod test {
);
}
+ #[test]
+ fn test_filter_type_coercion() {
+ let testdata = crate::test_util::parquet_test_data();
+ let file =
std::fs::File::open(format!("{testdata}/alltypes_plain.parquet"))
+ .expect("opening file");
+
+ let reader = SerializedFileReader::new(file).expect("creating reader");
+ let metadata = reader.metadata();
+ let file_schema =
+ parquet_to_arrow_schema(metadata.file_metadata().schema_descr(),
None)
+ .expect("parsing schema");
+
+ // This is the schema we would like to coerce to,
+ // which is different from the physical schema of the file.
+ let table_schema = Schema::new(vec![Field::new(
+ "timestamp_col",
+ DataType::Timestamp(Nanosecond, Some(Arc::from("UTC"))),
+ false,
+ )]);
+
+ let expr = col("timestamp_col").eq(Expr::Literal(
+ ScalarValue::TimestampNanosecond(Some(1), Some(Arc::from("UTC"))),
+ ));
+ let expr = logical2physical(&expr, &table_schema);
+ let candidate = FilterCandidateBuilder::new(expr, &file_schema,
&table_schema)
+ .build(metadata)
+ .expect("building candidate")
+ .expect("candidate expected");
+
+ let schema_adapter =
+ DefaultSchemaAdapterFactory {}.create(Arc::new(table_schema));
+ let (schema_mapping, _) = schema_adapter
+ .map_schema(&file_schema)
+ .expect("creating schema mapping");
+
+ let mut row_filter = DatafusionArrowPredicate::try_new(
+ candidate,
+ &file_schema,
+ metadata,
+ Count::new(),
+ Time::new(),
+ schema_mapping,
+ )
+ .expect("creating filter predicate");
+
+ // Create some fake data as if it was from the parquet file
+ let ts_array = TimestampNanosecondArray::new(
+ vec![TimestampNanosecondType::parse("2020-01-01T00:00:00")
+ .expect("should parse")]
+ .into(),
+ None,
+ );
+ // We need a matching schema to create a record batch
+ let batch_schema = Schema::new(vec![Field::new(
+ "timestamp_col",
+ DataType::Timestamp(Nanosecond, None),
+ false,
+ )]);
+
+ let record_batch =
+ RecordBatch::try_new(Arc::new(batch_schema),
vec![Arc::new(ts_array)])
+ .expect("creating record batch");
+
+ let filtered = row_filter.evaluate(record_batch);
+
+ assert!(filtered.is_ok());
Review Comment:
The latest commit checks the tests results, has the inverse case for filter
returning true, and uses the actual data out of the parquet file.
--
This is an automated message from the Apache Git Service.
To respond to the message, please log on to GitHub and use the
URL above to go to the specific comment.
To unsubscribe, e-mail: [email protected]
For queries about this service, please contact Infrastructure at:
[email protected]
---------------------------------------------------------------------
To unsubscribe, e-mail: [email protected]
For additional commands, e-mail: [email protected]