alamb commented on code in PR #19433:
URL: https://github.com/apache/datafusion/pull/19433#discussion_r2656399423


##########
datafusion/core/src/test_util/parquet.rs:
##########
@@ -229,3 +221,91 @@ impl TestParquetFile {
         self.path.as_path()
     }
 }
+
+/// Specification for a sorting column in a Parquet file.
+///
+/// This is used by [`create_sorted_parquet_file`] to define the sort order
+/// when creating test Parquet files with sorting metadata.
+#[derive(Debug, Clone)]
+pub struct SortColumnSpec {
+    /// The column index in the schema (0-based)
+    pub column_idx: usize,
+    /// If true, the column is sorted in descending order
+    pub descending: bool,
+    /// If true, nulls come before non-null values
+    pub nulls_first: bool,
+}
+
+impl SortColumnSpec {
+    /// Create a new sort column specification
+    pub fn new(column_idx: usize, descending: bool, nulls_first: bool) -> Self 
{
+        Self {
+            column_idx,
+            descending,
+            nulls_first,
+        }
+    }
+
+    /// Create an ascending, nulls-first sort column
+    pub fn asc_nulls_first(column_idx: usize) -> Self {
+        Self::new(column_idx, false, true)
+    }
+
+    /// Create an ascending, nulls-last sort column
+    pub fn asc_nulls_last(column_idx: usize) -> Self {
+        Self::new(column_idx, false, false)
+    }
+
+    /// Create a descending, nulls-first sort column
+    pub fn desc_nulls_first(column_idx: usize) -> Self {
+        Self::new(column_idx, true, true)
+    }
+
+    /// Create a descending, nulls-last sort column
+    pub fn desc_nulls_last(column_idx: usize) -> Self {
+        Self::new(column_idx, true, false)
+    }
+}
+
+/// Creates a test Parquet file with sorting_columns metadata.

Review Comment:
   TIL: 
https://github.com/apache/parquet-format/blob/4b1c72c837bec5b792b2514f0057533030fcedf8/src/main/thrift/parquet.thrift#L1018-L1021
   
   I didn't realize this was part of the parquet metadata, but it appears to 
have been that way for 13 years 👍 



-- 
This is an automated message from the Apache Git Service.
To respond to the message, please log on to GitHub and use the
URL above to go to the specific comment.

To unsubscribe, e-mail: [email protected]

For queries about this service, please contact Infrastructure at:
[email protected]


---------------------------------------------------------------------
To unsubscribe, e-mail: [email protected]
For additional commands, e-mail: [email protected]

Reply via email to