Kontinuation commented on code in PR #555: URL: https://github.com/apache/sedona-db/pull/555#discussion_r2734980412
########## rust/sedona-spatial-join/src/index/memory_plan.rs: ########## @@ -0,0 +1,217 @@ +// Licensed to the Apache Software Foundation (ASF) under one +// or more contributor license agreements. See the NOTICE file +// distributed with this work for additional information +// regarding copyright ownership. The ASF licenses this file +// to you under the Apache License, Version 2.0 (the +// "License"); you may not use this file except in compliance +// with the License. You may obtain a copy of the License at +// +// http://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, +// software distributed under the License is distributed on an +// "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY +// KIND, either express or implied. See the License for the +// specific language governing permissions and limitations +// under the License. + +use std::cmp::max; + +use datafusion_common::{DataFusionError, Result}; + +use super::BuildPartition; + +/// The memory accounting summary of a build side partition. This is collected +/// during the build side collection phase and used to estimate the memory usage for +/// running spatial join. +#[derive(Clone, Copy, Debug, PartialEq, Eq)] +pub(crate) struct PartitionMemorySummary { + /// Number of rows in the partition. + pub num_rows: usize, + /// The total memory reserved when collecting this build side partition. + pub reserved_memory: usize, + /// The estimated memory usage for building the spatial index for all the data in + /// this build side partition. + pub estimated_index_memory_usage: usize, +} + +impl From<&BuildPartition> for PartitionMemorySummary { + fn from(partition: &BuildPartition) -> Self { + Self { + num_rows: partition.num_rows, + reserved_memory: partition.reservation.size(), + estimated_index_memory_usage: partition.estimated_spatial_index_memory_usage, + } + } +} + +/// A detailed plan for memory usage during spatial join execution. The spatial join +/// could be spatial-partitioned if the reserved memory is not sufficient to hold the +/// entire spatial index. +#[derive(Debug, PartialEq, Eq)] +pub(crate) struct MemoryPlan { + /// The total number of rows in the build side. + pub num_rows: usize, + /// The total memory reserved for the build side. + pub reserved_memory: usize, + /// The estimated memory usage for building the spatial index for the entire build side. + /// It could be larger than [`Self::reserved_memory`], and in that case we need to + /// partition the build side using spatial partitioning. + pub estimated_index_memory_usage: usize, + /// The memory budget for holding the spatial index. If the spatial join is partitioned, + /// this is the memory budget for holding the spatial index of a single partition. + pub memory_for_spatial_index: usize, + /// The memory budget for intermittent usage, such as buffering data during repartitioning. + pub memory_for_intermittent_usage: usize, + /// The number of spatial partitions to split the build side into. + pub num_partitions: usize, +} + +impl MemoryPlan { + /// Write debug info for this memory plan + pub fn debug_print(&self, f: &mut impl std::fmt::Write) -> std::fmt::Result { + writeln!(f, "Memory Plan:")?; Review Comment: `{:#?}` is pretty readable so I'll remove the custom debug_print function. -- This is an automated message from the Apache Git Service. To respond to the message, please log on to GitHub and use the URL above to go to the specific comment. To unsubscribe, e-mail: [email protected] For queries about this service, please contact Infrastructure at: [email protected]
