gene-bordegaray commented on code in PR #22207:
URL: https://github.com/apache/datafusion/pull/22207#discussion_r3281783677


##########
datafusion/physical-expr/src/partitioning.rs:
##########
@@ -133,13 +137,197 @@ impl Display for Partitioning {
                     .join(", ");
                 write!(f, "Hash([{phy_exprs_str}], {size})")
             }
+            Partitioning::Range(range) => write!(f, "{range}"),
             Partitioning::UnknownPartitioning(size) => {
                 write!(f, "UnknownPartitioning({size})")
             }
         }
     }
 }
 
+/// Physical range partitioning.
+///
+/// [`RangePartitioning`] describes an ordered key space with split points.
+///
+/// - `ordering` defines the partitioning key and ordering.
+/// - `split_points` define the boundaries between adjacent partitions.
+///
+/// Comparisons use the lexicographic order defined by `ordering`, including
+/// `ASC`/`DESC` and null ordering. Split points must be strictly ordered
+/// according to that ordering, and each split point must have one value per
+/// ordering expression.
+///
+/// `N` split points define `N + 1` partitions:
+///
+/// ```text
+/// partition 0: key < split_points[0]
+/// partition 1: split_points[0] <= key < split_points[1]
+/// ...
+/// partition N - 1: split_points[N - 2] <= key < split_points[N - 1]
+/// partition N: split_points[N - 1] <= key
+/// ```
+///
+/// Values equal to split point `i` belong to partition `i + 1`, so interior
+/// partitions are lower-inclusive and upper-exclusive.
+///
+/// For a single range key:
+///
+/// ```text
+/// ordering = [date ASC NULLS LAST]
+/// split_points = [
+///   (2022-01-01),
+///   (2023-01-01),
+/// ]
+///
+/// partition 0: date before 2022-01-01
+/// partition 1: date between 2022-01-01 (inclusive) and 2023-01-01 (exclusive)
+/// partition 2: date at/after 2023-01-01
+/// ```
+///
+/// The same model extends to compound keys.
+/// For `ordering = [time ASC, city ASC]`, split points are ordered
+/// lexicographically by `(time, city)`:
+///
+/// ```text
+/// ordering = [time ASC NULLS LAST, city ASC NULLS LAST]
+/// split_points = [
+///   (2022, Allston),
+///   (2023, Allston),
+/// ]
+///
+/// partition 0: keys before  (2022, Allston)
+/// partition 1: keys between (2022, Allston) and (2023, Allston)
+/// partition 2: keys at/after (2023, Allston)
+/// ```
+///
+/// NOTE: Optimizer and execution behavior for this partitioning is 
intentionally
+/// not implemented and will be introduced incrementally.
+#[derive(Debug, Clone)]
+pub struct RangePartitioning {
+    /// Ordered partitioning key.
+    ordering: LexOrdering,
+    /// Boundaries between adjacent partitions.
+    split_points: Vec<SplitPoint>,
+}
+
+/// A boundary between adjacent range partitions.
+///
+/// A split point is a tuple with one [`ScalarValue`] per sort expression in 
the
+/// parent [`RangePartitioning`] ordering.
+#[derive(Debug, Clone, PartialEq)]
+pub struct SplitPoint {
+    values: Vec<ScalarValue>,
+}
+
+impl SplitPoint {
+    /// Creates a new split point from its tuple values.
+    pub fn new(values: Vec<ScalarValue>) -> Self {
+        Self { values }
+    }
+
+    /// Returns the tuple values for this split point.
+    pub fn values(&self) -> &[ScalarValue] {
+        &self.values
+    }
+}
+
+impl Display for SplitPoint {
+    fn fmt(&self, f: &mut fmt::Formatter) -> fmt::Result {
+        let values = self
+            .values
+            .iter()
+            .map(ToString::to_string)
+            .collect::<Vec<_>>()
+            .join(", ");
+        write!(f, "({values})")
+    }
+}
+
+impl RangePartitioning {
+    /// Creates range partitioning metadata.
+    ///
+    /// The caller is responsible for satisfying the contract documented on
+    /// [`RangePartitioning`].
+    pub fn new(ordering: LexOrdering, split_points: Vec<SplitPoint>) -> Self {
+        Self {

Review Comment:
   Good point, added this and use now in the proto
   
   Also added some unit tests covering this so 👍 



##########
datafusion/physical-expr/src/partitioning.rs:
##########
@@ -133,13 +137,197 @@ impl Display for Partitioning {
                     .join(", ");
                 write!(f, "Hash([{phy_exprs_str}], {size})")
             }
+            Partitioning::Range(range) => write!(f, "{range}"),
             Partitioning::UnknownPartitioning(size) => {
                 write!(f, "UnknownPartitioning({size})")
             }
         }
     }
 }
 
+/// Physical range partitioning.
+///
+/// [`RangePartitioning`] describes an ordered key space with split points.
+///
+/// - `ordering` defines the partitioning key and ordering.
+/// - `split_points` define the boundaries between adjacent partitions.
+///
+/// Comparisons use the lexicographic order defined by `ordering`, including
+/// `ASC`/`DESC` and null ordering. Split points must be strictly ordered
+/// according to that ordering, and each split point must have one value per
+/// ordering expression.
+///
+/// `N` split points define `N + 1` partitions:
+///
+/// ```text
+/// partition 0: key < split_points[0]
+/// partition 1: split_points[0] <= key < split_points[1]
+/// ...
+/// partition N - 1: split_points[N - 2] <= key < split_points[N - 1]
+/// partition N: split_points[N - 1] <= key
+/// ```
+///
+/// Values equal to split point `i` belong to partition `i + 1`, so interior
+/// partitions are lower-inclusive and upper-exclusive.
+///
+/// For a single range key:
+///
+/// ```text
+/// ordering = [date ASC NULLS LAST]
+/// split_points = [
+///   (2022-01-01),
+///   (2023-01-01),
+/// ]
+///
+/// partition 0: date before 2022-01-01
+/// partition 1: date between 2022-01-01 (inclusive) and 2023-01-01 (exclusive)
+/// partition 2: date at/after 2023-01-01
+/// ```
+///
+/// The same model extends to compound keys.
+/// For `ordering = [time ASC, city ASC]`, split points are ordered
+/// lexicographically by `(time, city)`:
+///
+/// ```text
+/// ordering = [time ASC NULLS LAST, city ASC NULLS LAST]
+/// split_points = [
+///   (2022, Allston),
+///   (2023, Allston),
+/// ]
+///
+/// partition 0: keys before  (2022, Allston)
+/// partition 1: keys between (2022, Allston) and (2023, Allston)
+/// partition 2: keys at/after (2023, Allston)
+/// ```
+///
+/// NOTE: Optimizer and execution behavior for this partitioning is 
intentionally
+/// not implemented and will be introduced incrementally.
+#[derive(Debug, Clone)]
+pub struct RangePartitioning {
+    /// Ordered partitioning key.
+    ordering: LexOrdering,
+    /// Boundaries between adjacent partitions.
+    split_points: Vec<SplitPoint>,
+}
+
+/// A boundary between adjacent range partitions.
+///
+/// A split point is a tuple with one [`ScalarValue`] per sort expression in 
the
+/// parent [`RangePartitioning`] ordering.
+#[derive(Debug, Clone, PartialEq)]
+pub struct SplitPoint {
+    values: Vec<ScalarValue>,
+}
+
+impl SplitPoint {
+    /// Creates a new split point from its tuple values.
+    pub fn new(values: Vec<ScalarValue>) -> Self {
+        Self { values }
+    }
+
+    /// Returns the tuple values for this split point.
+    pub fn values(&self) -> &[ScalarValue] {
+        &self.values
+    }
+}
+
+impl Display for SplitPoint {
+    fn fmt(&self, f: &mut fmt::Formatter) -> fmt::Result {
+        let values = self
+            .values
+            .iter()
+            .map(ToString::to_string)
+            .collect::<Vec<_>>()
+            .join(", ");
+        write!(f, "({values})")
+    }
+}
+
+impl RangePartitioning {
+    /// Creates range partitioning metadata.
+    ///
+    /// The caller is responsible for satisfying the contract documented on
+    /// [`RangePartitioning`].
+    pub fn new(ordering: LexOrdering, split_points: Vec<SplitPoint>) -> Self {
+        Self {

Review Comment:
   Good point, added this and use now in the proto
   
   Also added some unit tests covering this 👍 



-- 
This is an automated message from the Apache Git Service.
To respond to the message, please log on to GitHub and use the
URL above to go to the specific comment.

To unsubscribe, e-mail: [email protected]

For queries about this service, please contact Infrastructure at:
[email protected]


---------------------------------------------------------------------
To unsubscribe, e-mail: [email protected]
For additional commands, e-mail: [email protected]

Reply via email to