kosiew commented on code in PR #1036:
URL: 
https://github.com/apache/datafusion-python/pull/1036#discussion_r2002243419


##########
python/tests/test_dataframe.py:
##########
@@ -1191,13 +1192,17 @@ def add_with_parameter(df_internal, value: Any) -> 
DataFrame:
 def test_dataframe_repr_html(df) -> None:
     output = df._repr_html_()

Review Comment:
   Would it be a good idea to test for other df fixtures too?
   
   not specific to this PR, maybe add an empty_df fixture for tests too
   eg
   ```
   @pytest.fixture
   def empty_df():
       ctx = SessionContext()
   
       # Create an empty RecordBatch with the same schema as df
       batch = pa.RecordBatch.from_arrays(
           [
               pa.array([], type=pa.int64()),
               pa.array([], type=pa.int64()),
               pa.array([], type=pa.int64()),
           ],
           names=["a", "b", "c"],
       )
   
       return ctx.from_arrow(batch)
   
   
   @pytest.mark.parametrize(
       "dataframe_fixture",
       ["empty_df", "df", "nested_df", "struct_df", "partitioned_df", 
"aggregate_df"],
   )
   def test_dataframe_repr_html(request, dataframe_fixture) -> None:
       df = request.getfixturevalue(dataframe_fixture)
       output = df._repr_html_()
   
   ```



##########
src/dataframe.rs:
##########
@@ -111,56 +116,151 @@ impl PyDataFrame {
     }
 
     fn __repr__(&self, py: Python) -> PyDataFusionResult<String> {
-        let df = self.df.as_ref().clone().limit(0, Some(10))?;
-        let batches = wait_for_future(py, df.collect())?;
-        let batches_as_string = pretty::pretty_format_batches(&batches);
-        match batches_as_string {
-            Ok(batch) => Ok(format!("DataFrame()\n{batch}")),
-            Err(err) => Ok(format!("Error: {:?}", err.to_string())),
+        let (batches, has_more) = wait_for_future(
+            py,
+            collect_record_batches_to_display(self.df.as_ref().clone(), 10, 
10),
+        )?;
+        if batches.is_empty() {
+            // This should not be reached, but do it for safety since we index 
into the vector below
+            return Ok("No data to display".to_string());
         }
-    }
 
-    fn _repr_html_(&self, py: Python) -> PyDataFusionResult<String> {
-        let mut html_str = "<table border='1'>\n".to_string();
+        let batches_as_displ =
+            
pretty::pretty_format_batches(&batches).map_err(py_datafusion_err)?;
+
+        let additional_str = match has_more {
+            true => "\nData truncated.",
+            false => "",
+        };
 
-        let df = self.df.as_ref().clone().limit(0, Some(10))?;
-        let batches = wait_for_future(py, df.collect())?;
+        Ok(format!("DataFrame()\n{batches_as_displ}{additional_str}"))
+    }
 
+    fn _repr_html_(&self, py: Python) -> PyDataFusionResult<String> {
+        let (batches, has_more) = wait_for_future(
+            py,
+            collect_record_batches_to_display(
+                self.df.as_ref().clone(),
+                MIN_TABLE_ROWS_TO_DISPLAY,
+                usize::MAX,
+            ),
+        )?;

Review Comment:
   Extracting some variables into helper functions could make this more 
readable and easier to maintain eg:
   
   ```
       fn _repr_html_(&self, py: Python) -> PyDataFusionResult<String> {
           let (batches, has_more) = wait_for_future(
               py,
               collect_record_batches_to_display(
                   self.df.as_ref().clone(),
                   MIN_TABLE_ROWS_TO_DISPLAY,
                   usize::MAX,
               ),
           )?;
           if batches.is_empty() {
               // This should not be reached, but do it for safety since we 
index into the vector below
               return Ok("No data to display".to_string());
           }
   
           let table_uuid = uuid::Uuid::new_v4().to_string();
           let schema = batches[0].schema();
           
           // Get table formatters for displaying cell values
           let batch_formatters = get_batch_formatters(&batches)?;
           let rows_per_batch = batches.iter().map(|batch| batch.num_rows());
   
           // Generate HTML components
           let mut html_str = generate_html_table_header(&schema);
           html_str.push_str(&generate_table_rows(
               &batch_formatters, 
               rows_per_batch, 
               &table_uuid
           )?);
           html_str.push_str("</tbody></table></div>\n");
           html_str.push_str(&generate_javascript());
   
           if has_more {
               html_str.push_str("Data truncated due to size.");
           }
   
           Ok(html_str)
       }
   ```



-- 
This is an automated message from the Apache Git Service.
To respond to the message, please log on to GitHub and use the
URL above to go to the specific comment.

To unsubscribe, e-mail: github-unsubscr...@datafusion.apache.org

For queries about this service, please contact Infrastructure at:
us...@infra.apache.org


---------------------------------------------------------------------
To unsubscribe, e-mail: github-unsubscr...@datafusion.apache.org
For additional commands, e-mail: github-h...@datafusion.apache.org

Reply via email to