ozankabak commented on code in PR #16217: URL: https://github.com/apache/datafusion/pull/16217#discussion_r2126826372
########## datafusion/physical-expr-common/src/sort_expr.rs: ########## @@ -516,162 +460,240 @@ impl Display for LexOrdering { } } -impl FromIterator<PhysicalSortExpr> for LexOrdering { - fn from_iter<T: IntoIterator<Item = PhysicalSortExpr>>(iter: T) -> Self { - let mut lex_ordering = LexOrdering::default(); - - for i in iter { - lex_ordering.push(i); - } +impl IntoIterator for LexOrdering { + type Item = PhysicalSortExpr; + type IntoIter = IntoIter<Self::Item>; - lex_ordering + fn into_iter(self) -> Self::IntoIter { + self.exprs.into_iter() } } -impl Index<usize> for LexOrdering { - type Output = PhysicalSortExpr; +impl<'a> IntoIterator for &'a LexOrdering { + type Item = &'a PhysicalSortExpr; + type IntoIter = std::slice::Iter<'a, PhysicalSortExpr>; - fn index(&self, index: usize) -> &Self::Output { - &self.inner[index] + fn into_iter(self) -> Self::IntoIter { + self.exprs.iter() } } -impl Index<Range<usize>> for LexOrdering { - type Output = [PhysicalSortExpr]; - - fn index(&self, range: Range<usize>) -> &Self::Output { - &self.inner[range] +impl From<LexOrdering> for Vec<PhysicalSortExpr> { + fn from(ordering: LexOrdering) -> Self { + ordering.exprs } } -impl Index<RangeFrom<usize>> for LexOrdering { - type Output = [PhysicalSortExpr]; +/// This object represents a lexicographical ordering requirement and contains +/// a vector of `PhysicalSortRequirement` objects. +/// +/// For example, a `vec![a Some(ASC), b None]` represents a lexicographical +/// requirement that firsts imposes an ordering by column `a` in ascending +/// order, then by column `b` in *any* (ascending or descending) order. The +/// ordering is non-degenerate, meaning it contains at least one element, and +/// it is duplicate-free, meaning it does not contain multiple entries for the +/// same column. +/// +/// Note that a `LexRequirement` need not enforce the uniqueness of its sort +/// expressions after construction like a `LexOrdering` does, because it provides +/// no mutation methods. If such methods become necessary, we will need to +/// enforce uniqueness like the latter object. +#[derive(Debug, Clone, PartialEq)] +pub struct LexRequirement { + reqs: Vec<PhysicalSortRequirement>, +} + +impl LexRequirement { + /// Creates a new [`LexRequirement`] from the given vector of sort expressions. + /// If the vector is empty, returns `None`. + pub fn new(reqs: impl IntoIterator<Item = PhysicalSortRequirement>) -> Option<Self> { + let (non_empty, requirements) = Self::construct(reqs); + non_empty.then_some(requirements) + } + + /// Returns the leading `PhysicalSortRequirement` of the `LexRequirement`. + /// Note that this function does not return an `Option`, as a `LexRequirement` + /// is always non-degenerate (i.e. it contains at least one element). + pub fn first(&self) -> &PhysicalSortRequirement { + // Can safely `unwrap` because `LexRequirement` is non-degenerate: + self.reqs.first().unwrap() + } + + /// Constructs a new `LexRequirement` from the given sort requirements w/o + /// enforcing non-degeneracy. This function is used internally and is not + /// meant (or safe) for external use. + fn construct( + reqs: impl IntoIterator<Item = PhysicalSortRequirement>, + ) -> (bool, Self) { + let mut set = HashSet::new(); + let reqs = reqs + .into_iter() + .filter_map(|r| set.insert(Arc::clone(&r.expr)).then_some(r)) + .collect(); + (!set.is_empty(), Self { reqs }) + } +} - fn index(&self, range_from: RangeFrom<usize>) -> &Self::Output { - &self.inner[range_from] +impl<const N: usize> From<[PhysicalSortRequirement; N]> for LexRequirement { + fn from(value: [PhysicalSortRequirement; N]) -> Self { + // TODO: Replace this assertion with a condition on the generic parameter + // when Rust supports it. + assert!(N > 0); + let (non_empty, requirement) = Self::construct(value); + debug_assert!(non_empty); + requirement } } -impl Index<RangeTo<usize>> for LexOrdering { - type Output = [PhysicalSortExpr]; +impl Deref for LexRequirement { + type Target = [PhysicalSortRequirement]; - fn index(&self, range_to: RangeTo<usize>) -> &Self::Output { - &self.inner[range_to] + fn deref(&self) -> &Self::Target { + self.reqs.as_slice() } } -impl IntoIterator for LexOrdering { - type Item = PhysicalSortExpr; - type IntoIter = IntoIter<PhysicalSortExpr>; +impl IntoIterator for LexRequirement { + type Item = PhysicalSortRequirement; + type IntoIter = IntoIter<Self::Item>; fn into_iter(self) -> Self::IntoIter { - self.inner.into_iter() + self.reqs.into_iter() } } -///`LexOrderingRef` is an alias for the type &`[PhysicalSortExpr]`, which represents -/// a reference to a lexicographical ordering. -#[deprecated(since = "43.0.0", note = "use &LexOrdering instead")] -pub type LexOrderingRef<'a> = &'a [PhysicalSortExpr]; +impl<'a> IntoIterator for &'a LexRequirement { + type Item = &'a PhysicalSortRequirement; + type IntoIter = std::slice::Iter<'a, PhysicalSortRequirement>; -///`LexRequirement` is an struct containing a `Vec<PhysicalSortRequirement>`, which -/// represents a lexicographical ordering requirement. -#[derive(Debug, Default, Clone, PartialEq)] -pub struct LexRequirement { - pub inner: Vec<PhysicalSortRequirement>, + fn into_iter(self) -> Self::IntoIter { + self.reqs.iter() + } } -impl LexRequirement { - pub fn new(inner: Vec<PhysicalSortRequirement>) -> Self { - Self { inner } +impl From<LexRequirement> for Vec<PhysicalSortRequirement> { + fn from(requirement: LexRequirement) -> Self { + requirement.reqs } +} - pub fn is_empty(&self) -> bool { - self.inner.is_empty() +// Cross-conversion utilities between `LexOrdering` and `LexRequirement` +impl From<LexOrdering> for LexRequirement { + fn from(value: LexOrdering) -> Self { + // Can construct directly as `value` is non-degenerate: + let (non_empty, requirements) = + Self::construct(value.into_iter().map(Into::into)); + debug_assert!(non_empty); + requirements } +} - pub fn iter(&self) -> impl Iterator<Item = &PhysicalSortRequirement> { - self.inner.iter() +impl From<LexRequirement> for LexOrdering { + fn from(value: LexRequirement) -> Self { + // Can construct directly as `value` is non-degenerate: + let (non_empty, ordering) = Self::construct(value.into_iter().map(Into::into)); + debug_assert!(non_empty); + ordering + } +} + +/// Represents a plan's input ordering requirements. Vector elements represent +/// alternative ordering requirements in the order of preference. +#[derive(Debug, Clone, PartialEq)] +pub enum OrderingRequirements { Review Comment: We can change the name if there is a consensus on a better name. I don't really have a strong opinion on this. Regarding your last question, since `LexRequirement`s become non-degenerate, a lot of the call sites require updating. I tried organizing the changes relating to `OrderingRequirements` in a separate PR, but it didn't really reduce the diff because the call sites required updating anyway. -- This is an automated message from the Apache Git Service. To respond to the message, please log on to GitHub and use the URL above to go to the specific comment. To unsubscribe, e-mail: github-unsubscr...@datafusion.apache.org For queries about this service, please contact Infrastructure at: us...@infra.apache.org --------------------------------------------------------------------- To unsubscribe, e-mail: github-unsubscr...@datafusion.apache.org For additional commands, e-mail: github-h...@datafusion.apache.org