Kontinuation commented on code in PR #362:
URL: https://github.com/apache/sedona-db/pull/362#discussion_r2602033344
##########
rust/sedona-spatial-join/src/build_index.rs:
##########
@@ -33,14 +33,68 @@ use crate::{
spatial_predicate::SpatialPredicate,
};
-pub(crate) async fn build_index(
+/// Sequential version of build_index that doesn't spawn tasks.
+/// Used in execution contexts without async runtime support (e.g.,
Spark/Comet JNI)
+pub async fn build_index_seq(
context: Arc<TaskContext>,
build_schema: SchemaRef,
build_streams: Vec<SendableRecordBatchStream>,
spatial_predicate: SpatialPredicate,
join_type: JoinType,
probe_threads_count: usize,
metrics: ExecutionPlanMetricsSet,
+) -> Result<SpatialIndex> {
+ build_index_impl(
+ context,
+ build_schema,
+ build_streams,
+ spatial_predicate,
+ join_type,
+ probe_threads_count,
+ metrics,
+ false, // concurrent = false
+ )
+ .await
+}
+
+/// Concurrent version of build_index that spawns tasks for parallel
collection.
+pub async fn build_index(
+ context: Arc<TaskContext>,
+ build_schema: SchemaRef,
+ build_streams: Vec<SendableRecordBatchStream>,
+ spatial_predicate: SpatialPredicate,
+ join_type: JoinType,
+ probe_threads_count: usize,
+ metrics: ExecutionPlanMetricsSet,
+) -> Result<SpatialIndex> {
+ build_index_impl(
+ context,
+ build_schema,
+ build_streams,
+ spatial_predicate,
+ join_type,
+ probe_threads_count,
+ metrics,
+ true, // concurrent = true
+ )
+ .await
+}
+
+/// Internal implementation of build_index with configurable concurrency.
+///
+/// # Arguments
+/// * `concurrent` - If true, uses `collect_all` which spawns tasks for
parallel collection.
+/// If false, collects partitions sequentially (for JNI/embedded contexts).
+#[allow(clippy::too_many_arguments)]
+async fn build_index_impl(
+ context: Arc<TaskContext>,
+ build_schema: SchemaRef,
+ build_streams: Vec<SendableRecordBatchStream>,
+ spatial_predicate: SpatialPredicate,
+ join_type: JoinType,
+ probe_threads_count: usize,
+ metrics: ExecutionPlanMetricsSet,
+ concurrent: bool,
Review Comment:
I also prefer renaming `use_sequential_index_build ` to
`concurrent_build_side_collection`, since `concurrent_build_side_collection`
accurately signifies what operation is impacted by this configuration.
--
This is an automated message from the Apache Git Service.
To respond to the message, please log on to GitHub and use the
URL above to go to the specific comment.
To unsubscribe, e-mail: [email protected]
For queries about this service, please contact Infrastructure at:
[email protected]