kevinjqliu commented on code in PR #1036: URL: https://github.com/apache/datafusion-python/pull/1036#discussion_r1979772203
########## src/dataframe.rs: ########## @@ -100,46 +106,153 @@ impl PyDataFrame { } fn _repr_html_(&self, py: Python) -> PyDataFusionResult<String> { - let mut html_str = "<table border='1'>\n".to_string(); - - let df = self.df.as_ref().clone().limit(0, Some(10))?; - let batches = wait_for_future(py, df.collect())?; - + let (batches, mut has_more) = + wait_for_future(py, get_first_few_record_batches(self.df.as_ref().clone()))?; + let Some(batches) = batches else { + return Ok("No data to display".to_string()); + }; if batches.is_empty() { - html_str.push_str("</table>\n"); - return Ok(html_str); + // This should not be reached, but do it for safety since we index into the vector below + return Ok("No data to display".to_string()); } + let table_uuid = uuid::Uuid::new_v4().to_string(); + + let mut html_str = " + <style> + .expandable-container { + display: inline-block; + max-width: 200px; + } + .expandable { + white-space: nowrap; + overflow: hidden; + text-overflow: ellipsis; + display: block; + } + .full-text { + display: none; + white-space: normal; + } + .expand-btn { + cursor: pointer; + color: blue; + text-decoration: underline; + border: none; + background: none; + font-size: inherit; + display: block; + margin-top: 5px; + } + </style> + + <div style=\"width: 100%; max-width: 1000px; max-height: 300px; overflow: auto; border: 1px solid #ccc;\"> + <table style=\"border-collapse: collapse; min-width: 100%\"> + <thead>\n".to_string(); + let schema = batches[0].schema(); let mut header = Vec::new(); for field in schema.fields() { - header.push(format!("<th>{}</td>", field.name())); + header.push(format!("<th style='border: 1px solid black; padding: 8px; text-align: left; background-color: #f2f2f2; white-space: nowrap; min-width: fit-content; max-width: fit-content;'>{}</th>", field.name())); } let header_str = header.join(""); - html_str.push_str(&format!("<tr>{}</tr>\n", header_str)); + html_str.push_str(&format!("<tr>{}</tr></thead><tbody>\n", header_str)); + + let batch_formatters = batches + .iter() + .map(|batch| { + batch + .columns() + .iter() + .map(|c| ArrayFormatter::try_new(c.as_ref(), &FormatOptions::default())) + .map(|c| { + c.map_err(|e| PyValueError::new_err(format!("Error: {:?}", e.to_string()))) + }) + .collect::<Result<Vec<_>, _>>() + }) + .collect::<Result<Vec<_>, _>>()?; + + let total_memory: usize = batches + .iter() + .map(|batch| batch.get_array_memory_size()) + .sum(); + let rows_per_batch = batches.iter().map(|batch| batch.num_rows()); + let total_rows = rows_per_batch.clone().sum(); + + // let (total_memory, total_rows) = batches.iter().fold((0, 0), |acc, batch| { + // (acc.0 + batch.get_array_memory_size(), acc.1 + batch.num_rows()) + // }); Review Comment: nit remove commented out code -- This is an automated message from the Apache Git Service. To respond to the message, please log on to GitHub and use the URL above to go to the specific comment. To unsubscribe, e-mail: github-unsubscr...@datafusion.apache.org For queries about this service, please contact Infrastructure at: us...@infra.apache.org --------------------------------------------------------------------- To unsubscribe, e-mail: github-unsubscr...@datafusion.apache.org For additional commands, e-mail: github-h...@datafusion.apache.org