notashes commented on code in PR #20182:
URL: https://github.com/apache/datafusion/pull/20182#discussion_r2779152576


##########
datafusion/common/benches/with_hashes.rs:
##########
@@ -122,16 +151,51 @@ where
     builder.finish().expect("should be nulls in buffer")
 }
 
-// Returns an new array that is the same as array, but with nulls
+// Returns a new array that is the same as array, but with nulls
+// Handles the special case of RunArray where nulls must be in the values array
 fn add_nulls(array: &ArrayRef) -> ArrayRef {
-    let array_data = array
-        .clone()
-        .into_data()
-        .into_builder()
-        .nulls(Some(create_null_mask(array.len())))
-        .build()
-        .unwrap();
-    make_array(array_data)
+    use arrow::datatypes::DataType;
+
+    match array.data_type() {
+        DataType::RunEndEncoded(_, _) => {
+            // RunArray can't have top-level nulls, so apply nulls to the 
values array
+            let run_array = array
+                .as_any()
+                .downcast_ref::<RunArray<Int32Type>>()
+                .expect("Expected RunArray");
+
+            let run_ends_buffer = run_array.run_ends().inner().clone();
+            let run_ends_array = 
PrimitiveArray::<Int32Type>::new(run_ends_buffer, None);
+            let values = run_array.values().clone();
+
+            // Add nulls to the values array
+            let values_with_nulls = {
+                let array_data = values
+                    .clone()
+                    .into_data()
+                    .into_builder()
+                    .nulls(Some(create_null_mask(values.len())))

Review Comment:
   I was thinking about it for a while. It probably should come up to be around 
the same 3% zone even though the variance could be a bit high.
   
   I've set the run_length to be within 1..50. 
   Let's say we have  ~300 runs on average, with each each one carrying ~25 
elements. 3% of which will roughly translate to `10 * 25 = 250`. But yes that 
is probably our ideal scenario.  
   
   let me know what you think? i'll try to do some testing regarding this. 



-- 
This is an automated message from the Apache Git Service.
To respond to the message, please log on to GitHub and use the
URL above to go to the specific comment.

To unsubscribe, e-mail: [email protected]

For queries about this service, please contact Infrastructure at:
[email protected]


---------------------------------------------------------------------
To unsubscribe, e-mail: [email protected]
For additional commands, e-mail: [email protected]

Reply via email to