ozankabak commented on code in PR #16217:
URL: https://github.com/apache/datafusion/pull/16217#discussion_r2126827741


##########
datafusion/physical-expr-common/src/sort_expr.rs:
##########
@@ -516,162 +460,240 @@ impl Display for LexOrdering {
     }
 }
 
-impl FromIterator<PhysicalSortExpr> for LexOrdering {
-    fn from_iter<T: IntoIterator<Item = PhysicalSortExpr>>(iter: T) -> Self {
-        let mut lex_ordering = LexOrdering::default();
-
-        for i in iter {
-            lex_ordering.push(i);
-        }
+impl IntoIterator for LexOrdering {
+    type Item = PhysicalSortExpr;
+    type IntoIter = IntoIter<Self::Item>;
 
-        lex_ordering
+    fn into_iter(self) -> Self::IntoIter {
+        self.exprs.into_iter()
     }
 }
 
-impl Index<usize> for LexOrdering {
-    type Output = PhysicalSortExpr;
+impl<'a> IntoIterator for &'a LexOrdering {
+    type Item = &'a PhysicalSortExpr;
+    type IntoIter = std::slice::Iter<'a, PhysicalSortExpr>;
 
-    fn index(&self, index: usize) -> &Self::Output {
-        &self.inner[index]
+    fn into_iter(self) -> Self::IntoIter {
+        self.exprs.iter()
     }
 }
 
-impl Index<Range<usize>> for LexOrdering {
-    type Output = [PhysicalSortExpr];
-
-    fn index(&self, range: Range<usize>) -> &Self::Output {
-        &self.inner[range]
+impl From<LexOrdering> for Vec<PhysicalSortExpr> {
+    fn from(ordering: LexOrdering) -> Self {
+        ordering.exprs
     }
 }
 
-impl Index<RangeFrom<usize>> for LexOrdering {
-    type Output = [PhysicalSortExpr];
+/// This object represents a lexicographical ordering requirement and contains
+/// a vector of `PhysicalSortRequirement` objects.
+///
+/// For example, a `vec![a Some(ASC), b None]` represents a lexicographical
+/// requirement that firsts imposes an ordering by column `a` in ascending
+/// order, then by column `b` in *any* (ascending or descending) order. The
+/// ordering is non-degenerate, meaning it contains at least one element, and
+/// it is duplicate-free, meaning it does not contain multiple entries for the
+/// same column.
+///
+/// Note that a `LexRequirement` need not enforce the uniqueness of its sort
+/// expressions after construction like a `LexOrdering` does, because it 
provides
+/// no mutation methods. If such methods become necessary, we will need to
+/// enforce uniqueness like the latter object.
+#[derive(Debug, Clone, PartialEq)]
+pub struct LexRequirement {
+    reqs: Vec<PhysicalSortRequirement>,
+}
+
+impl LexRequirement {
+    /// Creates a new [`LexRequirement`] from the given vector of sort 
expressions.
+    /// If the vector is empty, returns `None`.
+    pub fn new(reqs: impl IntoIterator<Item = PhysicalSortRequirement>) -> 
Option<Self> {
+        let (non_empty, requirements) = Self::construct(reqs);
+        non_empty.then_some(requirements)
+    }
+
+    /// Returns the leading `PhysicalSortRequirement` of the `LexRequirement`.
+    /// Note that this function does not return an `Option`, as a 
`LexRequirement`
+    /// is always non-degenerate (i.e. it contains at least one element).
+    pub fn first(&self) -> &PhysicalSortRequirement {
+        // Can safely `unwrap` because `LexRequirement` is non-degenerate:
+        self.reqs.first().unwrap()
+    }
+
+    /// Constructs a new `LexRequirement` from the given sort requirements w/o
+    /// enforcing non-degeneracy. This function is used internally and is not
+    /// meant (or safe) for external use.
+    fn construct(
+        reqs: impl IntoIterator<Item = PhysicalSortRequirement>,
+    ) -> (bool, Self) {
+        let mut set = HashSet::new();
+        let reqs = reqs
+            .into_iter()
+            .filter_map(|r| set.insert(Arc::clone(&r.expr)).then_some(r))
+            .collect();
+        (!set.is_empty(), Self { reqs })
+    }
+}
 
-    fn index(&self, range_from: RangeFrom<usize>) -> &Self::Output {
-        &self.inner[range_from]
+impl<const N: usize> From<[PhysicalSortRequirement; N]> for LexRequirement {
+    fn from(value: [PhysicalSortRequirement; N]) -> Self {
+        // TODO: Replace this assertion with a condition on the generic 
parameter
+        //       when Rust supports it.
+        assert!(N > 0);
+        let (non_empty, requirement) = Self::construct(value);
+        debug_assert!(non_empty);
+        requirement
     }
 }
 
-impl Index<RangeTo<usize>> for LexOrdering {
-    type Output = [PhysicalSortExpr];
+impl Deref for LexRequirement {
+    type Target = [PhysicalSortRequirement];
 
-    fn index(&self, range_to: RangeTo<usize>) -> &Self::Output {
-        &self.inner[range_to]
+    fn deref(&self) -> &Self::Target {
+        self.reqs.as_slice()
     }
 }
 
-impl IntoIterator for LexOrdering {
-    type Item = PhysicalSortExpr;
-    type IntoIter = IntoIter<PhysicalSortExpr>;
+impl IntoIterator for LexRequirement {
+    type Item = PhysicalSortRequirement;
+    type IntoIter = IntoIter<Self::Item>;
 
     fn into_iter(self) -> Self::IntoIter {
-        self.inner.into_iter()
+        self.reqs.into_iter()
     }
 }
 
-///`LexOrderingRef` is an alias for the type &`[PhysicalSortExpr]`, which 
represents
-/// a reference to a lexicographical ordering.
-#[deprecated(since = "43.0.0", note = "use &LexOrdering instead")]
-pub type LexOrderingRef<'a> = &'a [PhysicalSortExpr];
+impl<'a> IntoIterator for &'a LexRequirement {
+    type Item = &'a PhysicalSortRequirement;
+    type IntoIter = std::slice::Iter<'a, PhysicalSortRequirement>;
 
-///`LexRequirement` is an struct containing a `Vec<PhysicalSortRequirement>`, 
which
-/// represents a lexicographical ordering requirement.
-#[derive(Debug, Default, Clone, PartialEq)]
-pub struct LexRequirement {
-    pub inner: Vec<PhysicalSortRequirement>,
+    fn into_iter(self) -> Self::IntoIter {
+        self.reqs.iter()
+    }
 }
 
-impl LexRequirement {
-    pub fn new(inner: Vec<PhysicalSortRequirement>) -> Self {
-        Self { inner }
+impl From<LexRequirement> for Vec<PhysicalSortRequirement> {
+    fn from(requirement: LexRequirement) -> Self {
+        requirement.reqs
     }
+}
 
-    pub fn is_empty(&self) -> bool {
-        self.inner.is_empty()
+// Cross-conversion utilities between `LexOrdering` and `LexRequirement`
+impl From<LexOrdering> for LexRequirement {
+    fn from(value: LexOrdering) -> Self {
+        // Can construct directly as `value` is non-degenerate:
+        let (non_empty, requirements) =
+            Self::construct(value.into_iter().map(Into::into));
+        debug_assert!(non_empty);
+        requirements
     }
+}
 
-    pub fn iter(&self) -> impl Iterator<Item = &PhysicalSortRequirement> {
-        self.inner.iter()
+impl From<LexRequirement> for LexOrdering {
+    fn from(value: LexRequirement) -> Self {
+        // Can construct directly as `value` is non-degenerate:
+        let (non_empty, ordering) = 
Self::construct(value.into_iter().map(Into::into));
+        debug_assert!(non_empty);
+        ordering
+    }
+}
+
+/// Represents a plan's input ordering requirements. Vector elements represent
+/// alternative ordering requirements in the order of preference.
+#[derive(Debug, Clone, PartialEq)]
+pub enum OrderingRequirements {
+    /// The operator is not able to work without one of these requirements.
+    Hard(Vec<LexRequirement>),
+    /// The operator can benefit from these input orderings when available,
+    /// but can still work in the absence of any input ordering.
+    Soft(Vec<LexRequirement>),
+}
+
+impl OrderingRequirements {
+    /// Creates a new instance from the given alternatives. If an empty list of
+    /// alternatives are given, returns `None`.
+    pub fn new_alternatives(
+        alternatives: impl IntoIterator<Item = LexRequirement>,
+        soft: bool,
+    ) -> Option<Self> {
+        let alternatives = alternatives.into_iter().collect::<Vec<_>>();
+        (!alternatives.is_empty()).then(|| {
+            if soft {
+                Self::Soft(alternatives)
+            } else {
+                Self::Hard(alternatives)
+            }
+        })
     }
 
-    pub fn push(&mut self, physical_sort_requirement: PhysicalSortRequirement) 
{
-        self.inner.push(physical_sort_requirement)
+    /// Creates a new instance with a single hard requirement.
+    pub fn new(requirement: LexRequirement) -> Self {
+        Self::Hard(vec![requirement])
     }
 
-    /// Create a new [`LexRequirement`] from a [`LexOrdering`]
-    ///
-    /// Returns [`LexRequirement`] that requires the exact
-    /// sort of the [`PhysicalSortExpr`]s in `ordering`
-    pub fn from_lex_ordering(ordering: LexOrdering) -> Self {
-        Self::new(
-            ordering
-                .into_iter()
-                .map(PhysicalSortRequirement::from)
-                .collect(),
-        )
+    /// Creates a new instance with a single soft requirement.
+    pub fn new_soft(requirement: LexRequirement) -> Self {
+        Self::Soft(vec![requirement])
     }
 
-    /// Constructs a duplicate-free `LexOrderingReq` by filtering out
-    /// duplicate entries that have same physical expression inside.
-    ///
-    /// For example, `vec![a Some(ASC), a Some(DESC)]` collapses to `vec![a
-    /// Some(ASC)]`.
-    pub fn collapse(self) -> Self {
-        let mut output = Vec::<PhysicalSortRequirement>::new();
-        for item in self {
-            if !output.iter().any(|req| req.expr.eq(&item.expr)) {
-                output.push(item);
-            }
+    /// Adds an alternative requirement to the list of alternatives.
+    pub fn add_alternative(&mut self, requirement: LexRequirement) {
+        match self {
+            Self::Hard(alts) | Self::Soft(alts) => alts.push(requirement),
         }
-        LexRequirement::new(output)
     }
-}
 
-impl From<LexOrdering> for LexRequirement {
-    fn from(value: LexOrdering) -> Self {
-        Self::from_lex_ordering(value)
+    /// Returns the first (i.e. most preferred) `LexRequirement` among
+    /// alternative requirements.
+    pub fn into_single(self) -> LexRequirement {
+        match self {
+            Self::Hard(mut alts) | Self::Soft(mut alts) => alts.swap_remove(0),
+        }
     }
-}
 
-impl Deref for LexRequirement {
-    type Target = [PhysicalSortRequirement];
-
-    fn deref(&self) -> &Self::Target {
-        self.inner.as_slice()
+    /// Returns a reference to the first (i.e. most preferred) `LexRequirement`
+    /// among alternative requirements.
+    pub fn first(&self) -> &LexRequirement {
+        match self {
+            Self::Hard(alts) | Self::Soft(alts) => &alts[0],
+        }
     }
-}
-
-impl FromIterator<PhysicalSortRequirement> for LexRequirement {
-    fn from_iter<T: IntoIterator<Item = PhysicalSortRequirement>>(iter: T) -> 
Self {
-        let mut lex_requirement = LexRequirement::new(vec![]);
 
-        for i in iter {
-            lex_requirement.inner.push(i);
+    /// Returns all alternatives as a vector of `LexRequirement` objects and a
+    /// boolean value indicating softness/hardness of the requirements.
+    pub fn get_alternatives(self) -> (Vec<LexRequirement>, bool) {

Review Comment:
   Agreed, will follow your suggestion.



-- 
This is an automated message from the Apache Git Service.
To respond to the message, please log on to GitHub and use the
URL above to go to the specific comment.

To unsubscribe, e-mail: github-unsubscr...@datafusion.apache.org

For queries about this service, please contact Infrastructure at:
us...@infra.apache.org


---------------------------------------------------------------------
To unsubscribe, e-mail: github-unsubscr...@datafusion.apache.org
For additional commands, e-mail: github-h...@datafusion.apache.org

Reply via email to