Rachelint commented on code in PR #11319:
URL: https://github.com/apache/datafusion/pull/11319#discussion_r1667817843


##########
datafusion/core/src/datasource/physical_plan/parquet/statistics.rs:
##########
@@ -392,51 +393,73 @@ macro_rules! get_statistics {
                 })
             },
             DataType::Binary => Ok(Arc::new(BinaryArray::from_iter(
-                [<$stat_type_prefix 
ByteArrayStatsIterator>]::new($iterator).map(|x| x.map(|x| x.to_vec())),
+                [<$stat_type_prefix ByteArrayStatsIterator>]::new($iterator)
             ))),
             DataType::LargeBinary => Ok(Arc::new(LargeBinaryArray::from_iter(
-                [<$stat_type_prefix 
ByteArrayStatsIterator>]::new($iterator).map(|x| x.map(|x|x.to_vec())),
-            ))),
-            DataType::Utf8 => Ok(Arc::new(StringArray::from_iter(
-                [<$stat_type_prefix 
ByteArrayStatsIterator>]::new($iterator).map(|x| {
-                    x.and_then(|x| {
-                        let res = std::str::from_utf8(x).map(|s| 
s.to_string()).ok();
-                        if res.is_none() {
-                            log::debug!("Utf8 statistics is a non-UTF8 value, 
ignoring it.");
-                        }
-                        res
-                    })
-                }),
+                [<$stat_type_prefix ByteArrayStatsIterator>]::new($iterator)
             ))),
+            DataType::Utf8 => {
+                let iterator = [<$stat_type_prefix 
ByteArrayStatsIterator>]::new($iterator);
+                let mut builder = StringBuilder::new();
+                for x in iterator {
+                    let Some(x) = x else {
+                        builder.append_null(); // no statistics value
+                        continue;
+                    };
+
+                    let Ok(x) = std::str::from_utf8(x) else {
+                        log::debug!("Utf8 statistics is a non-UTF8 value, 
ignoring it.");
+                        builder.append_null();
+                        continue;
+                    };
+
+                    builder.append_value(x);
+                }
+                Ok(Arc::new(builder.finish()))
+            },
             DataType::LargeUtf8 => {
-                Ok(Arc::new(LargeStringArray::from_iter(
-                    [<$stat_type_prefix 
ByteArrayStatsIterator>]::new($iterator).map(|x| {
-                        x.and_then(|x| {
-                            let res = std::str::from_utf8(x).map(|s| 
s.to_string()).ok();
-                            if res.is_none() {
-                                log::debug!("LargeUtf8 statistics is a 
non-UTF8 value, ignoring it.");
-                            }
-                            res
-                        })
-                    }),
-                )))
-            }
-            DataType::FixedSizeBinary(size) => 
Ok(Arc::new(FixedSizeBinaryArray::from(
-                [<$stat_type_prefix 
FixedLenByteArrayStatsIterator>]::new($iterator).map(|x| {
-                    x.and_then(|x| {
-                        if x.len().try_into() == Ok(*size) {
-                            Some(x)
-                        } else {
-                            log::debug!(
-                                "FixedSizeBinary({}) statistics is a binary of 
size {}, ignoring it.",
-                                size,
-                                x.len(),
-                            );
-                            None
-                        }
-                    })
-                }).collect::<Vec<_>>(),
-            ))),
+                let iterator = [<$stat_type_prefix 
ByteArrayStatsIterator>]::new($iterator);
+                let mut builder = LargeStringBuilder::new();
+                for x in iterator {
+                    let Some(x) = x else {
+                        builder.append_null(); // no statistics value
+                        continue;
+                    };
+
+                    let Ok(x) = std::str::from_utf8(x) else {
+                        log::debug!("Utf8 statistics is a non-UTF8 value, 
ignoring it.");
+                        builder.append_null();
+                        continue;
+                    };
+
+                    builder.append_value(x);
+                }
+                Ok(Arc::new(builder.finish()))
+            },
+            DataType::FixedSizeBinary(size) => {
+                let iterator = [<$stat_type_prefix 
FixedLenByteArrayStatsIterator>]::new($iterator);
+                let mut builder = FixedSizeBinaryBuilder::new(*size);
+                for x in iterator {
+                    let Some(x) = x else {
+                        builder.append_null(); // no statistics value
+                        continue;
+                    };
+
+                    // ignore invalid values
+                    if x.len().try_into() != Ok(*size){
+                        log::debug!(
+                            "FixedSizeBinary({}) statistics is a binary of 
size {}, ignoring it.",
+                            size,
+                            x.len(),
+                        );
+                        builder.append_null();
+                        continue;
+                    }
+
+                    builder.append_value(x).expect("ensure to append 
successfully here, because size have been checked before");

Review Comment:
   > Rather than using `expect` it is likely possible to check the value of 
`builder.append_value` and if it is an error append null. But perhaps that is 
overly confusing. This formulation is quite clear and explicit
   
   :thinking: Yes, I `append_null` before. And I read the codes of 
`append_value` after, I found the only reason to return Err is to append a 
value with different width that has been checked and ensured actually.



-- 
This is an automated message from the Apache Git Service.
To respond to the message, please log on to GitHub and use the
URL above to go to the specific comment.

To unsubscribe, e-mail: [email protected]

For queries about this service, please contact Infrastructure at:
[email protected]


---------------------------------------------------------------------
To unsubscribe, e-mail: [email protected]
For additional commands, e-mail: [email protected]

Reply via email to