viirya commented on code in PR #16217: URL: https://github.com/apache/datafusion/pull/16217#discussion_r2124440817
########## datafusion/physical-expr-common/src/sort_expr.rs: ########## @@ -516,162 +460,240 @@ impl Display for LexOrdering { } } -impl FromIterator<PhysicalSortExpr> for LexOrdering { - fn from_iter<T: IntoIterator<Item = PhysicalSortExpr>>(iter: T) -> Self { - let mut lex_ordering = LexOrdering::default(); - - for i in iter { - lex_ordering.push(i); - } +impl IntoIterator for LexOrdering { + type Item = PhysicalSortExpr; + type IntoIter = IntoIter<Self::Item>; - lex_ordering + fn into_iter(self) -> Self::IntoIter { + self.exprs.into_iter() } } -impl Index<usize> for LexOrdering { - type Output = PhysicalSortExpr; +impl<'a> IntoIterator for &'a LexOrdering { + type Item = &'a PhysicalSortExpr; + type IntoIter = std::slice::Iter<'a, PhysicalSortExpr>; - fn index(&self, index: usize) -> &Self::Output { - &self.inner[index] + fn into_iter(self) -> Self::IntoIter { + self.exprs.iter() } } -impl Index<Range<usize>> for LexOrdering { - type Output = [PhysicalSortExpr]; - - fn index(&self, range: Range<usize>) -> &Self::Output { - &self.inner[range] +impl From<LexOrdering> for Vec<PhysicalSortExpr> { + fn from(ordering: LexOrdering) -> Self { + ordering.exprs } } -impl Index<RangeFrom<usize>> for LexOrdering { - type Output = [PhysicalSortExpr]; +/// This object represents a lexicographical ordering requirement and contains +/// a vector of `PhysicalSortRequirement` objects. +/// +/// For example, a `vec![a Some(ASC), b None]` represents a lexicographical +/// requirement that firsts imposes an ordering by column `a` in ascending +/// order, then by column `b` in *any* (ascending or descending) order. The +/// ordering is non-degenerate, meaning it contains at least one element, and +/// it is duplicate-free, meaning it does not contain multiple entries for the +/// same column. +/// +/// Note that a `LexRequirement` need not enforce the uniqueness of its sort +/// expressions after construction like a `LexOrdering` does, because it provides +/// no mutation methods. If such methods become necessary, we will need to +/// enforce uniqueness like the latter object. +#[derive(Debug, Clone, PartialEq)] +pub struct LexRequirement { + reqs: Vec<PhysicalSortRequirement>, +} + +impl LexRequirement { + /// Creates a new [`LexRequirement`] from the given vector of sort expressions. + /// If the vector is empty, returns `None`. + pub fn new(reqs: impl IntoIterator<Item = PhysicalSortRequirement>) -> Option<Self> { + let (non_empty, requirements) = Self::construct(reqs); + non_empty.then_some(requirements) + } + + /// Returns the leading `PhysicalSortRequirement` of the `LexRequirement`. + /// Note that this function does not return an `Option`, as a `LexRequirement` + /// is always non-degenerate (i.e. it contains at least one element). + pub fn first(&self) -> &PhysicalSortRequirement { + // Can safely `unwrap` because `LexRequirement` is non-degenerate: + self.reqs.first().unwrap() + } + + /// Constructs a new `LexRequirement` from the given sort requirements w/o + /// enforcing non-degeneracy. This function is used internally and is not + /// meant (or safe) for external use. + fn construct( + reqs: impl IntoIterator<Item = PhysicalSortRequirement>, + ) -> (bool, Self) { + let mut set = HashSet::new(); + let reqs = reqs + .into_iter() + .filter_map(|r| set.insert(Arc::clone(&r.expr)).then_some(r)) + .collect(); + (!set.is_empty(), Self { reqs }) + } +} - fn index(&self, range_from: RangeFrom<usize>) -> &Self::Output { - &self.inner[range_from] +impl<const N: usize> From<[PhysicalSortRequirement; N]> for LexRequirement { + fn from(value: [PhysicalSortRequirement; N]) -> Self { + // TODO: Replace this assertion with a condition on the generic parameter + // when Rust supports it. + assert!(N > 0); + let (non_empty, requirement) = Self::construct(value); + debug_assert!(non_empty); + requirement } } -impl Index<RangeTo<usize>> for LexOrdering { - type Output = [PhysicalSortExpr]; +impl Deref for LexRequirement { + type Target = [PhysicalSortRequirement]; - fn index(&self, range_to: RangeTo<usize>) -> &Self::Output { - &self.inner[range_to] + fn deref(&self) -> &Self::Target { + self.reqs.as_slice() } } -impl IntoIterator for LexOrdering { - type Item = PhysicalSortExpr; - type IntoIter = IntoIter<PhysicalSortExpr>; +impl IntoIterator for LexRequirement { + type Item = PhysicalSortRequirement; + type IntoIter = IntoIter<Self::Item>; fn into_iter(self) -> Self::IntoIter { - self.inner.into_iter() + self.reqs.into_iter() } } -///`LexOrderingRef` is an alias for the type &`[PhysicalSortExpr]`, which represents -/// a reference to a lexicographical ordering. -#[deprecated(since = "43.0.0", note = "use &LexOrdering instead")] -pub type LexOrderingRef<'a> = &'a [PhysicalSortExpr]; +impl<'a> IntoIterator for &'a LexRequirement { + type Item = &'a PhysicalSortRequirement; + type IntoIter = std::slice::Iter<'a, PhysicalSortRequirement>; -///`LexRequirement` is an struct containing a `Vec<PhysicalSortRequirement>`, which -/// represents a lexicographical ordering requirement. -#[derive(Debug, Default, Clone, PartialEq)] -pub struct LexRequirement { - pub inner: Vec<PhysicalSortRequirement>, + fn into_iter(self) -> Self::IntoIter { + self.reqs.iter() + } } -impl LexRequirement { - pub fn new(inner: Vec<PhysicalSortRequirement>) -> Self { - Self { inner } +impl From<LexRequirement> for Vec<PhysicalSortRequirement> { + fn from(requirement: LexRequirement) -> Self { + requirement.reqs } +} - pub fn is_empty(&self) -> bool { - self.inner.is_empty() +// Cross-conversion utilities between `LexOrdering` and `LexRequirement` +impl From<LexOrdering> for LexRequirement { + fn from(value: LexOrdering) -> Self { + // Can construct directly as `value` is non-degenerate: + let (non_empty, requirements) = + Self::construct(value.into_iter().map(Into::into)); + debug_assert!(non_empty); + requirements } +} - pub fn iter(&self) -> impl Iterator<Item = &PhysicalSortRequirement> { - self.inner.iter() +impl From<LexRequirement> for LexOrdering { + fn from(value: LexRequirement) -> Self { + // Can construct directly as `value` is non-degenerate: + let (non_empty, ordering) = Self::construct(value.into_iter().map(Into::into)); + debug_assert!(non_empty); + ordering + } +} + +/// Represents a plan's input ordering requirements. Vector elements represent +/// alternative ordering requirements in the order of preference. +#[derive(Debug, Clone, PartialEq)] +pub enum OrderingRequirements { Review Comment: I need to trace the change of `required_input_ordering` in other files here as it looks confusing at the first look between `LexRequirement` and `OrderingRequirements`. Maybe `InputOrderingRequirements` or `InputOrderingReq`? Another question is, this looks like can be a separate change in API. Is this also a required part of so called equivalence overhaul? -- This is an automated message from the Apache Git Service. To respond to the message, please log on to GitHub and use the URL above to go to the specific comment. To unsubscribe, e-mail: github-unsubscr...@datafusion.apache.org For queries about this service, please contact Infrastructure at: us...@infra.apache.org --------------------------------------------------------------------- To unsubscribe, e-mail: github-unsubscr...@datafusion.apache.org For additional commands, e-mail: github-h...@datafusion.apache.org