alamb commented on code in PR #17022:
URL: https://github.com/apache/datafusion/pull/17022#discussion_r2261307237


##########
datafusion/core/src/datasource/file_format/parquet.rs:
##########
@@ -376,41 +391,123 @@ mod tests {
         )));
         let (meta, _files) = store_parquet(vec![batch1, batch2], false).await?;
 
+        let session = SessionContext::new();
+        let ctx = session.state();
+
         // Use a size hint larger than the parquet footer but smaller than the 
actual metadata, requiring a second fetch
         // for the remaining metadata
         fetch_parquet_metadata(
             store.as_ref() as &dyn ObjectStore,
             &meta[0],
             Some(9),
             None,
+            false,
+            None,
         )
         .await
         .expect("error reading metadata with hint");
-
         assert_eq!(store.request_count(), 2);
 
-        let session = SessionContext::new();
-        let ctx = session.state();
+        // Increases by 2 because cache has no entries yet
+        fetch_parquet_metadata(
+            store.as_ref() as &dyn ObjectStore,
+            &meta[0],
+            Some(9),
+            None,
+            true,
+            ctx.runtime_env().cache_manager.get_file_metadata_cache(),
+        )
+        .await
+        .expect("error reading metadata with hint");
+        assert_eq!(store.request_count(), 4);
+
+        // No increase because cache has an entry
+        fetch_parquet_metadata(
+            store.as_ref() as &dyn ObjectStore,
+            &meta[0],
+            Some(9),
+            None,
+            true,
+            ctx.runtime_env().cache_manager.get_file_metadata_cache(),
+        )
+        .await
+        .expect("error reading metadata with hint");
+        assert_eq!(store.request_count(), 4);
+
+        // Increase by 2  because `cache_metadata` is false

Review Comment:
   👍 



##########
datafusion/datasource-parquet/src/reader.rs:
##########
@@ -212,7 +212,7 @@ impl ParquetFileReaderFactory for 
CachedParquetFileReaderFactory {
 /// Implements [`AsyncFileReader`] for a Parquet file in object storage. Reads 
the file metadata
 /// from the [`FileMetadataCache`], if available, otherwise reads it directly 
from the file and then
 /// updates the cache.
-pub(crate) struct CachedParquetFileReader {
+pub struct CachedParquetFileReader {

Review Comment:
   Likewise, I locally reverted these changes to visibility and everything 
seems to have compiled just fine



##########
datafusion/datasource-parquet/src/mod.rs:
##########
@@ -24,7 +24,7 @@ pub mod file_format;
 mod metrics;
 mod opener;
 mod page_filter;
-mod reader;
+pub mod reader;

Review Comment:
   why does this need to be made pub? I reverted the change and things still 
seem to compile just fine



##########
datafusion/datasource-parquet/src/file_format.rs:
##########
@@ -285,6 +286,11 @@ impl ParquetFormat {
         self.options.global.coerce_int96 = time_unit;
         self
     }
+
+    pub fn with_cache_metadata(mut self, cache_metadata: bool) -> Self {

Review Comment:
   I think this option was removed in 
https://github.com/apache/datafusion/pull/17062 so it is no longer needed



-- 
This is an automated message from the Apache Git Service.
To respond to the message, please log on to GitHub and use the
URL above to go to the specific comment.

To unsubscribe, e-mail: [email protected]

For queries about this service, please contact Infrastructure at:
[email protected]


---------------------------------------------------------------------
To unsubscribe, e-mail: [email protected]
For additional commands, e-mail: [email protected]

Reply via email to