This is an automated email from the ASF dual-hosted git repository.
jiayu pushed a commit to branch main
in repository https://gitbox.apache.org/repos/asf/sedona-db.git
The following commit(s) were added to refs/heads/main by this push:
new 7f821cc feat: refactor KNN join with new geo-index trait and
lock-free shared geometry cache (#169)
7f821cc is described below
commit 7f821cc3ea2e6a83ea0ef1140a3d6dcd35313837
Author: Feng Zhang <[email protected]>
AuthorDate: Tue Oct 7 21:20:00 2025 -0700
feat: refactor KNN join with new geo-index trait and lock-free shared
geometry cache (#169)
---
.github/workflows/rust.yml | 38 +++-
Cargo.lock | 333 +++++++++++++++++-----------------
Cargo.toml | 1 +
rust/sedona-spatial-join/Cargo.toml | 1 +
rust/sedona-spatial-join/src/index.rs | 236 +++++++++++++++---------
5 files changed, 354 insertions(+), 255 deletions(-)
diff --git a/.github/workflows/rust.yml b/.github/workflows/rust.yml
index c056ec3..f7ea8d1 100644
--- a/.github/workflows/rust.yml
+++ b/.github/workflows/rust.yml
@@ -97,6 +97,9 @@ jobs:
if: steps.cache-vcpkg.outputs.cache-hit != 'true'
run: |
./vcpkg/vcpkg install abseil openssl
+ # Clean up vcpkg buildtrees and downloads to save space
+ rm -rf vcpkg/buildtrees
+ rm -rf vcpkg/downloads
- name: Use stable Rust
id: rust
@@ -106,33 +109,62 @@ jobs:
- uses: Swatinem/rust-cache@v2
with:
# Update this key to force a new cache
- prefix-key: "rust-${{ matrix.name }}-v2"
+ prefix-key: "rust-${{ matrix.name }}-v3"
+
+ - name: Free Disk Space (Ubuntu)
+ uses: jlumbroso/free-disk-space@main
+ with:
+ # Free up space by removing tools we don't need
+ tool-cache: false # Keep tool cache as we need build tools
+ android: true # Remove Android SDK (not needed)
+ dotnet: true # Remove .NET runtime (not needed)
+ haskell: true # Remove Haskell toolchain (not needed)
+ large-packages: false # Keep essential packages including
build-essential
+ swap-storage: true # Remove swap file to free space
+ docker-images: true # Remove docker images (not needed)
- name: Install dependencies
shell: bash
- run: sudo apt-get update && sudo apt-get install -y libgeos-dev
+ run: |
+ sudo apt-get update && sudo apt-get install -y libgeos-dev
- name: Build
if: matrix.name == 'build'
run: |
cargo build --workspace --all-targets --all-features
+ # Clean up build artifacts aggressively
+ rm -rf target/debug/deps
+ rm -rf target/debug/incremental
+ rm -rf target/debug/build
- name: Clippy
if: matrix.name == 'clippy'
run: |
cargo clippy --workspace --all-targets --all-features -- -Dwarnings
+ # Clean up clippy artifacts aggressively
+ rm -rf target/debug/deps
+ rm -rf target/debug/incremental
+ rm -rf target/debug/build
- name: Test
if: matrix.name == 'test'
run: |
cargo test --workspace --all-targets --all-features
- # Clean up intermediate build artifacts to free disk space
+ # Clean up intermediate build artifacts to free disk space
aggressively
cargo clean -p sedona-s2geography
+ rm -rf target/debug/deps
+ rm -rf target/debug/incremental
+ rm -rf target/debug/build
+ # Also clean target/.rustc_info.json and other cache files
+ find target/debug -name "*.rlib" -delete
+ find target/debug -name "*.rmeta" -delete
- name: Doctests
if: matrix.name == 'test'
run: |
cargo test --workspace --doc --all-features
+ # Clean up after doctests
+ cargo clean
- name: Check docs
if: matrix.name == 'docs'
diff --git a/Cargo.lock b/Cargo.lock
index af9e3f1..72f67a2 100644
--- a/Cargo.lock
+++ b/Cargo.lock
@@ -73,9 +73,9 @@ dependencies = [
[[package]]
name = "addr2line"
-version = "0.24.2"
+version = "0.25.1"
source = "registry+https://github.com/rust-lang/crates.io-index"
-checksum = "dfbe277e56a376000877090da837660b4427aad530e3028d44e0bffe4f89a1c1"
+checksum = "1b5d307320b3181d6d7954e663bd7c774a838b8220fe0593c86d9fb09f498b4b"
dependencies = [
"gimli",
]
@@ -153,9 +153,9 @@ checksum =
"4b46cbb362ab8752921c97e041f5e366ee6297bd428a31275b9fcf1e380f7299"
[[package]]
name = "anstream"
-version = "0.6.20"
+version = "0.6.21"
source = "registry+https://github.com/rust-lang/crates.io-index"
-checksum = "3ae563653d1938f79b1ab1b5e668c87c76a9930414574a6583a7b7e11a8e6192"
+checksum = "43d5b281e737544384e969a5ccad3f1cdd24b48086a0fc1b2a5262a26b8f4f4a"
dependencies = [
"anstyle",
"anstyle-parse",
@@ -168,9 +168,9 @@ dependencies = [
[[package]]
name = "anstyle"
-version = "1.0.11"
+version = "1.0.13"
source = "registry+https://github.com/rust-lang/crates.io-index"
-checksum = "862ed96ca487e809f1c8e5a8447f6ee2cf102f846893800b20cebdf541fc6bbd"
+checksum = "5192cca8006f1fd4f7237516f40fa183bb07f8fbdfedaa0036de5ea9b0b45e78"
[[package]]
name = "anstyle-parse"
@@ -545,9 +545,9 @@ checksum =
"c08606f8c3cbf4ce6ec8e28fb0014a2c086708fe954eaa885384a6165172e7e8"
[[package]]
name = "aws-config"
-version = "1.8.6"
+version = "1.8.7"
source = "registry+https://github.com/rust-lang/crates.io-index"
-checksum = "8bc1b40fb26027769f16960d2f4a6bc20c4bb755d403e552c8c1a73af433c246"
+checksum = "04b37ddf8d2e9744a0b9c19ce0b78efe4795339a90b66b7bae77987092cd2e69"
dependencies = [
"aws-credential-types",
"aws-runtime",
@@ -575,9 +575,9 @@ dependencies = [
[[package]]
name = "aws-credential-types"
-version = "1.2.6"
+version = "1.2.7"
source = "registry+https://github.com/rust-lang/crates.io-index"
-checksum = "d025db5d9f52cbc413b167136afb3d8aeea708c0d8884783cf6253be5e22f6f2"
+checksum = "799a1290207254984cb7c05245111bc77958b92a3c9bb449598044b36341cce6"
dependencies = [
"aws-smithy-async",
"aws-smithy-runtime-api",
@@ -597,9 +597,9 @@ dependencies = [
[[package]]
name = "aws-lc-sys"
-version = "0.32.0"
+version = "0.32.2"
source = "registry+https://github.com/rust-lang/crates.io-index"
-checksum = "ee74396bee4da70c2e27cf94762714c911725efe69d9e2672f998512a67a4ce4"
+checksum = "a2b715a6010afb9e457ca2b7c9d2b9c344baa8baed7b38dc476034c171b32575"
dependencies = [
"bindgen 0.72.1",
"cc",
@@ -611,9 +611,9 @@ dependencies = [
[[package]]
name = "aws-runtime"
-version = "1.5.10"
+version = "1.5.11"
source = "registry+https://github.com/rust-lang/crates.io-index"
-checksum = "c034a1bc1d70e16e7f4e4caf7e9f7693e4c9c24cd91cf17c2a0b21abaebc7c8b"
+checksum = "2e1ed337dabcf765ad5f2fb426f13af22d576328aaf09eac8f70953530798ec0"
dependencies = [
"aws-credential-types",
"aws-sigv4",
@@ -635,9 +635,9 @@ dependencies = [
[[package]]
name = "aws-sdk-sso"
-version = "1.84.0"
+version = "1.85.0"
source = "registry+https://github.com/rust-lang/crates.io-index"
-checksum = "357a841807f6b52cb26123878b3326921e2a25faca412fabdd32bd35b7edd5d3"
+checksum = "2f2c741e2e439f07b5d1b33155e246742353d82167c785a2ff547275b7e32483"
dependencies = [
"aws-credential-types",
"aws-runtime",
@@ -657,9 +657,9 @@ dependencies = [
[[package]]
name = "aws-sdk-ssooidc"
-version = "1.86.0"
+version = "1.87.0"
source = "registry+https://github.com/rust-lang/crates.io-index"
-checksum = "9d1cc7fb324aa12eb4404210e6381195c5b5e9d52c2682384f295f38716dd3c7"
+checksum = "6428ae5686b18c0ee99f6f3c39d94ae3f8b42894cdc35c35d8fb2470e9db2d4c"
dependencies = [
"aws-credential-types",
"aws-runtime",
@@ -679,9 +679,9 @@ dependencies = [
[[package]]
name = "aws-sdk-sts"
-version = "1.86.0"
+version = "1.87.0"
source = "registry+https://github.com/rust-lang/crates.io-index"
-checksum = "e7d835f123f307cafffca7b9027c14979f1d403b417d8541d67cf252e8a21e35"
+checksum = "5871bec9a79a3e8d928c7788d654f135dde0e71d2dd98089388bab36b37ef607"
dependencies = [
"aws-credential-types",
"aws-runtime",
@@ -724,9 +724,9 @@ dependencies = [
[[package]]
name = "aws-smithy-async"
-version = "1.2.5"
+version = "1.2.6"
source = "registry+https://github.com/rust-lang/crates.io-index"
-checksum = "1e190749ea56f8c42bf15dd76c65e14f8f765233e6df9b0506d9d934ebef867c"
+checksum = "127fcfad33b7dfc531141fda7e1c402ac65f88aca5511a4d31e2e3d2cd01ce9c"
dependencies = [
"futures-util",
"pin-project-lite",
@@ -735,9 +735,9 @@ dependencies = [
[[package]]
name = "aws-smithy-http"
-version = "0.62.3"
+version = "0.62.4"
source = "registry+https://github.com/rust-lang/crates.io-index"
-checksum = "7c4dacf2d38996cf729f55e7a762b30918229917eca115de45dfa8dfb97796c9"
+checksum = "3feafd437c763db26aa04e0cc7591185d0961e64c61885bece0fb9d50ceac671"
dependencies = [
"aws-smithy-runtime-api",
"aws-smithy-types",
@@ -755,9 +755,9 @@ dependencies = [
[[package]]
name = "aws-smithy-http-client"
-version = "1.1.1"
+version = "1.1.3"
source = "registry+https://github.com/rust-lang/crates.io-index"
-checksum = "147e8eea63a40315d704b97bf9bc9b8c1402ae94f89d5ad6f7550d963309da1b"
+checksum = "1053b5e587e6fa40ce5a79ea27957b04ba660baa02b28b7436f64850152234f1"
dependencies = [
"aws-smithy-async",
"aws-smithy-runtime-api",
@@ -779,27 +779,27 @@ dependencies = [
[[package]]
name = "aws-smithy-json"
-version = "0.61.5"
+version = "0.61.6"
source = "registry+https://github.com/rust-lang/crates.io-index"
-checksum = "eaa31b350998e703e9826b2104dd6f63be0508666e1aba88137af060e8944047"
+checksum = "cff418fc8ec5cadf8173b10125f05c2e7e1d46771406187b2c878557d4503390"
dependencies = [
"aws-smithy-types",
]
[[package]]
name = "aws-smithy-observability"
-version = "0.1.3"
+version = "0.1.4"
source = "registry+https://github.com/rust-lang/crates.io-index"
-checksum = "9364d5989ac4dd918e5cc4c4bdcc61c9be17dcd2586ea7f69e348fc7c6cab393"
+checksum = "2d1881b1ea6d313f9890710d65c158bdab6fb08c91ea825f74c1c8c357baf4cc"
dependencies = [
"aws-smithy-runtime-api",
]
[[package]]
name = "aws-smithy-query"
-version = "0.60.7"
+version = "0.60.8"
source = "registry+https://github.com/rust-lang/crates.io-index"
-checksum = "f2fbd61ceb3fe8a1cb7352e42689cec5335833cd9f94103a61e98f9bb61c64bb"
+checksum = "d28a63441360c477465f80c7abac3b9c4d075ca638f982e605b7dc2a2c7156c9"
dependencies = [
"aws-smithy-types",
"urlencoding",
@@ -807,9 +807,9 @@ dependencies = [
[[package]]
name = "aws-smithy-runtime"
-version = "1.9.2"
+version = "1.9.3"
source = "registry+https://github.com/rust-lang/crates.io-index"
-checksum = "4fa63ad37685ceb7762fa4d73d06f1d5493feb88e3f27259b9ed277f4c01b185"
+checksum = "40ab99739082da5347660c556689256438defae3bcefd66c52b095905730e404"
dependencies = [
"aws-smithy-async",
"aws-smithy-http",
@@ -831,9 +831,9 @@ dependencies = [
[[package]]
name = "aws-smithy-runtime-api"
-version = "1.9.0"
+version = "1.9.1"
source = "registry+https://github.com/rust-lang/crates.io-index"
-checksum = "07f5e0fc8a6b3f2303f331b94504bbf754d85488f402d6f1dd7a6080f99afe56"
+checksum = "3683c5b152d2ad753607179ed71988e8cfd52964443b4f74fd8e552d0bbfeb46"
dependencies = [
"aws-smithy-async",
"aws-smithy-types",
@@ -848,9 +848,9 @@ dependencies = [
[[package]]
name = "aws-smithy-types"
-version = "1.3.2"
+version = "1.3.3"
source = "registry+https://github.com/rust-lang/crates.io-index"
-checksum = "d498595448e43de7f4296b7b7a18a8a02c61ec9349128c80a368f7c3b4ab11a8"
+checksum = "9f5b3a7486f6690ba25952cabf1e7d75e34d69eaff5081904a47bc79074d6457"
dependencies = [
"base64-simd",
"bytes",
@@ -871,9 +871,9 @@ dependencies = [
[[package]]
name = "aws-smithy-xml"
-version = "0.60.10"
+version = "0.60.11"
source = "registry+https://github.com/rust-lang/crates.io-index"
-checksum = "3db87b96cb1b16c024980f133968d52882ca0daaee3a086c6decc500f6c99728"
+checksum = "e9c34127e8c624bc2999f3b657e749c1393bedc9cd97b92a804db8ced4d2e163"
dependencies = [
"xmlparser",
]
@@ -894,9 +894,9 @@ dependencies = [
[[package]]
name = "backtrace"
-version = "0.3.75"
+version = "0.3.76"
source = "registry+https://github.com/rust-lang/crates.io-index"
-checksum = "6806a6321ec58106fea15becdad98371e28d92ccbc7c8f1b3b6dd724fe8f1002"
+checksum = "bb531853791a215d7c62a30daf0dde835f381ab5de4589cfe7c649d2cbe92bd6"
dependencies = [
"addr2line",
"cfg-if",
@@ -904,7 +904,7 @@ dependencies = [
"miniz_oxide",
"object",
"rustc-demangle",
- "windows-targets 0.52.6",
+ "windows-link",
]
[[package]]
@@ -1043,9 +1043,9 @@ checksum =
"46c5e41b57b8bba42a04676d81cb89e9ee8e859a1a66f80a5a72e1cb76b34d43"
[[package]]
name = "bytemuck"
-version = "1.23.2"
+version = "1.24.0"
source = "registry+https://github.com/rust-lang/crates.io-index"
-checksum = "3995eaeebcdf32f91f980d360f78732ddc061097ab4e39991ae7a6ace9194677"
+checksum = "1fbdf580320f38b612e485521afda1ee26d10cc9884efaaa750d383e13e3c5f4"
[[package]]
name = "byteorder"
@@ -1121,9 +1121,9 @@ checksum =
"37b2a672a2cb129a2e41c10b1224bb368f9f37a2b16b612598138befd7b37eb5"
[[package]]
name = "cc"
-version = "1.2.38"
+version = "1.2.40"
source = "registry+https://github.com/rust-lang/crates.io-index"
-checksum = "80f41ae168f955c12fb8960b057d70d0ca153fb83182b57d86380443527be7e9"
+checksum = "e1d05d92f4b1fd76aad469d46cdd858ca761576082cd37df81416691e50199fb"
dependencies = [
"find-msvc-tools",
"jobserver",
@@ -2271,7 +2271,7 @@ dependencies = [
"libc",
"option-ext",
"redox_users",
- "windows-sys 0.61.1",
+ "windows-sys 0.61.2",
]
[[package]]
@@ -2358,7 +2358,7 @@ source =
"registry+https://github.com/rust-lang/crates.io-index"
checksum = "39cab71617ae0d63f51a36d69f866391735b51691dbda63cf6f96d042b63efeb"
dependencies = [
"libc",
- "windows-sys 0.61.1",
+ "windows-sys 0.61.2",
]
[[package]]
@@ -2398,9 +2398,9 @@ dependencies = [
[[package]]
name = "find-msvc-tools"
-version = "0.1.2"
+version = "0.1.3"
source = "registry+https://github.com/rust-lang/crates.io-index"
-checksum = "1ced73b1dacfc750a6db6c0a0c3a3853c8b41997e2e2c563dc90804ae6867959"
+checksum = "0399f9d26e5191ce32c498bebd31e7a3ceabc2745f0ac54af3f335126c3f24b3"
[[package]]
name = "fixedbitset"
@@ -2420,9 +2420,9 @@ dependencies = [
[[package]]
name = "flate2"
-version = "1.1.2"
+version = "1.1.4"
source = "registry+https://github.com/rust-lang/crates.io-index"
-checksum = "4a3d7db9596fecd151c5f638c0ee5d5bd487b6e0ea232e5dc96d5250f6f94b1d"
+checksum = "dc5a4e564e38c699f2880d3fda590bedc2e69f3f84cd48b457bd892ce61d0aa9"
dependencies = [
"crc32fast",
"libz-rs-sys",
@@ -2616,7 +2616,7 @@ dependencies = [
[[package]]
name = "geo-index"
version = "0.3.1"
-source =
"git+https://github.com/wherobots/geo-index.git?branch=main#f7d5bef2044831e78b2deb095f1af932128d74e4"
+source =
"git+https://github.com/wherobots/geo-index.git?branch=main#6a03f0a2e3ba7ecfaacbf18019008449b8c93541"
dependencies = [
"bytemuck",
"float_next_after",
@@ -2626,7 +2626,6 @@ dependencies = [
"num-traits",
"thiserror 1.0.69",
"tinyvec",
- "wkt 0.14.0",
]
[[package]]
@@ -2733,9 +2732,9 @@ dependencies = [
[[package]]
name = "gimli"
-version = "0.31.1"
+version = "0.32.3"
source = "registry+https://github.com/rust-lang/crates.io-index"
-checksum = "07e28edb80900c19c28f1072f2e8aeca7fa06b23cd4169cefe1af5aa3260783f"
+checksum = "e629b9b98ef3dd8afe6ca2bd0f89306cec16d43d907889945bc5d6687f2f13c7"
[[package]]
name = "glob"
@@ -3429,7 +3428,7 @@ source =
"registry+https://github.com/rust-lang/crates.io-index"
checksum = "07033963ba89ebaf1584d767badaa2e8fcec21aedea6b8c0346d487d49c28667"
dependencies = [
"cfg-if",
- "windows-targets 0.53.4",
+ "windows-targets 0.53.5",
]
[[package]]
@@ -3507,11 +3506,10 @@ checksum =
"f5e54036fe321fd421e10d732f155734c4e4afd610dd556d9a82833ab3ee0bed"
[[package]]
name = "lock_api"
-version = "0.4.13"
+version = "0.4.14"
source = "registry+https://github.com/rust-lang/crates.io-index"
-checksum = "96936507f153605bddfcda068dd804796c84324ed2510809e5b2a624c81da765"
+checksum = "224399e74b87b5f3557511d98dff8b14089b3dadafcab6bb93eab67d3aace965"
dependencies = [
- "autocfg",
"scopeguard",
]
@@ -3603,6 +3601,7 @@ source =
"registry+https://github.com/rust-lang/crates.io-index"
checksum = "1fa76a2c86f704bdb222d66965fb3d63269ce38518b83cb0575fca855ebb6316"
dependencies = [
"adler2",
+ "simd-adler32",
]
[[package]]
@@ -3752,9 +3751,9 @@ dependencies = [
[[package]]
name = "object"
-version = "0.36.7"
+version = "0.37.3"
source = "registry+https://github.com/rust-lang/crates.io-index"
-checksum = "62948e14d923ea95ea2c7c86c71013138b66525b86bdc08d2dcc262bdb497b87"
+checksum = "ff76201f031d8863c38aa7f905eca4f53abbfa15f609db4277d44cd8938f33fe"
dependencies = [
"memchr",
]
@@ -3787,7 +3786,7 @@ dependencies = [
"serde",
"serde_json",
"serde_urlencoded",
- "thiserror 2.0.16",
+ "thiserror 2.0.17",
"tokio",
"tracing",
"url",
@@ -3843,9 +3842,9 @@ checksum =
"1a80800c0488c3a21695ea981a54918fbb37abf04f4d0720c453632255e2ff0e"
[[package]]
name = "parking_lot"
-version = "0.12.4"
+version = "0.12.5"
source = "registry+https://github.com/rust-lang/crates.io-index"
-checksum = "70d58bf43669b5795d1576d0641cfb6fbb2057bf629506267a92807158584a13"
+checksum = "93857453250e3077bd71ff98b6a65ea6621a19bb0f559a85248955ac12c45a1a"
dependencies = [
"lock_api",
"parking_lot_core",
@@ -3853,15 +3852,15 @@ dependencies = [
[[package]]
name = "parking_lot_core"
-version = "0.9.11"
+version = "0.9.12"
source = "registry+https://github.com/rust-lang/crates.io-index"
-checksum = "bc838d2a56b5b1a6c25f55575dfc605fabb63bb2365f6c2353ef9159aa69e4a5"
+checksum = "2621685985a2ebf1c516881c026032ac7deafcda1a2c9b7850dc81e3dfcb64c1"
dependencies = [
"cfg-if",
"libc",
"redox_syscall",
"smallvec",
- "windows-targets 0.52.6",
+ "windows-link",
]
[[package]]
@@ -3915,9 +3914,9 @@ checksum =
"9b4f627cb1b25917193a259e49bdad08f671f8d9708acfd5fe0a8c1455d87220"
[[package]]
name = "petgraph"
-version = "0.8.2"
+version = "0.8.3"
source = "registry+https://github.com/rust-lang/crates.io-index"
-checksum = "54acf3a685220b533e437e264e4d932cfbdc4cc7ec0cd232ed73c08d03b8a7ca"
+checksum = "8701b58ea97060d5e5b155d383a69952a60943f0e6dfe30b04c287beb0b27455"
dependencies = [
"fixedbitset",
"hashbrown 0.15.5",
@@ -4095,9 +4094,9 @@ dependencies = [
[[package]]
name = "psm"
-version = "0.1.26"
+version = "0.1.27"
source = "registry+https://github.com/rust-lang/crates.io-index"
-checksum = "6e944464ec8536cd1beb0bbfd96987eb5e3b72f2ecdafdc5c769a37f1fa2ae1f"
+checksum = "e66fcd288453b748497d8fb18bccc83a16b0518e3906d4b8df0a8d42d93dbb1c"
dependencies = [
"cc",
]
@@ -4194,7 +4193,7 @@ dependencies = [
"rustc-hash",
"rustls",
"socket2",
- "thiserror 2.0.16",
+ "thiserror 2.0.17",
"tokio",
"tracing",
"web-time",
@@ -4215,7 +4214,7 @@ dependencies = [
"rustls",
"rustls-pki-types",
"slab",
- "thiserror 2.0.16",
+ "thiserror 2.0.17",
"tinyvec",
"tracing",
"web-time",
@@ -4237,9 +4236,9 @@ dependencies = [
[[package]]
name = "quote"
-version = "1.0.40"
+version = "1.0.41"
source = "registry+https://github.com/rust-lang/crates.io-index"
-checksum = "1885c039570dc00dcb4ff087a89e185fd56bae234ddc7f056a945bf36467248d"
+checksum = "ce25767e7b499d1b604768e7cde645d14cc8584231ea6b295e9c9eb22c02e1d1"
dependencies = [
"proc-macro2",
]
@@ -4361,9 +4360,9 @@ dependencies = [
[[package]]
name = "redox_syscall"
-version = "0.5.17"
+version = "0.5.18"
source = "registry+https://github.com/rust-lang/crates.io-index"
-checksum = "5407465600fb0548f1442edf71dd20683c6ed326200ace4b1ef0763521bb3b77"
+checksum = "ed2bf2547551a7053d6fdfafda3f938979645c44812fbfcda098faae3f1a362d"
dependencies = [
"bitflags",
]
@@ -4376,7 +4375,7 @@ checksum =
"a4e608c6638b9c18977b00b475ac1f28d14e84b27d8d42f70e0bf1e3dec127ac"
dependencies = [
"getrandom 0.2.16",
"libredox",
- "thiserror 2.0.16",
+ "thiserror 2.0.17",
]
[[package]]
@@ -4569,7 +4568,7 @@ dependencies = [
"errno",
"libc",
"linux-raw-sys",
- "windows-sys 0.61.1",
+ "windows-sys 0.61.2",
]
[[package]]
@@ -4620,9 +4619,9 @@ dependencies = [
[[package]]
name = "rustls-webpki"
-version = "0.103.6"
+version = "0.103.7"
source = "registry+https://github.com/rust-lang/crates.io-index"
-checksum = "8572f3c2cb9934231157b45499fc41e1f58c589fdfb81a844ba873265e80f8eb"
+checksum = "e10b3f4191e8a80e6b43eebabfac91e5dcecebb27a71f04e820c47ec41d314bf"
dependencies = [
"aws-lc-rs",
"ring",
@@ -4720,7 +4719,7 @@ version = "0.1.28"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "891d81b926048e76efe18581bf793546b4c0eaf8448d72be8de2bbee5fd166e1"
dependencies = [
- "windows-sys 0.61.1",
+ "windows-sys 0.61.2",
]
[[package]]
@@ -4731,9 +4730,9 @@ checksum =
"94143f37725109f92c262ed2cf5e59bce7498c01bcc1502d7b9afe439a4e9f49"
[[package]]
name = "security-framework"
-version = "3.5.0"
+version = "3.5.1"
source = "registry+https://github.com/rust-lang/crates.io-index"
-checksum = "cc198e42d9b7510827939c9a15f5062a0c913f3371d765977e586d2fe6c16f4a"
+checksum = "b3297343eaf830f66ede390ea39da1d462b6b0c1b000f420d0a83f898bbbe6ef"
dependencies = [
"bitflags",
"core-foundation",
@@ -4920,7 +4919,7 @@ dependencies = [
"sedona-functions",
"sedona-schema",
"sedona-testing",
- "thiserror 2.0.16",
+ "thiserror 2.0.17",
]
[[package]]
@@ -4934,7 +4933,7 @@ dependencies = [
"serde",
"serde_json",
"serde_with",
- "thiserror 2.0.16",
+ "thiserror 2.0.17",
"wkb",
"wkt 0.13.0",
]
@@ -5014,7 +5013,7 @@ dependencies = [
"sedona-schema",
"sedona-testing",
"serde_json",
- "thiserror 2.0.16",
+ "thiserror 2.0.17",
"wkb",
]
@@ -5038,7 +5037,7 @@ dependencies = [
"sedona-functions",
"sedona-schema",
"sedona-testing",
- "thiserror 2.0.16",
+ "thiserror 2.0.17",
]
[[package]]
@@ -5075,6 +5074,7 @@ dependencies = [
"geo-traits-ext",
"geo-types",
"geos",
+ "once_cell",
"parking_lot",
"rand 0.8.5",
"rstest",
@@ -5134,7 +5134,7 @@ dependencies = [
"sedona-functions",
"sedona-schema",
"sedona-testing",
- "thiserror 2.0.16",
+ "thiserror 2.0.17",
"wkb",
]
@@ -5160,7 +5160,7 @@ dependencies = [
"sedona-proj",
"sedona-schema",
"sedona-tg",
- "thiserror 2.0.16",
+ "thiserror 2.0.17",
"tokio",
]
@@ -5180,7 +5180,7 @@ dependencies = [
"sedona-geoparquet",
"sedona-proj",
"sedona-schema",
- "thiserror 2.0.16",
+ "thiserror 2.0.17",
"tokio",
]
@@ -5198,9 +5198,9 @@ checksum =
"1bc711410fbe7399f390ca1c3b60ad0f53f80e95c5eb935e52268a0e2cd49acc"
[[package]]
name = "serde"
-version = "1.0.226"
+version = "1.0.228"
source = "registry+https://github.com/rust-lang/crates.io-index"
-checksum = "0dca6411025b24b60bfa7ec1fe1f8e710ac09782dca409ee8237ba74b51295fd"
+checksum = "9a8e94ea7f378bd32cbbd37198a4a91436180c5bb472411e48b5ec2e2124ae9e"
dependencies = [
"serde_core",
"serde_derive",
@@ -5218,18 +5218,18 @@ dependencies = [
[[package]]
name = "serde_core"
-version = "1.0.226"
+version = "1.0.228"
source = "registry+https://github.com/rust-lang/crates.io-index"
-checksum = "ba2ba63999edb9dac981fb34b3e5c0d111a69b0924e253ed29d83f7c99e966a4"
+checksum = "41d385c7d4ca58e59fc732af25c3983b67ac852c1a25000afe1175de458b67ad"
dependencies = [
"serde_derive",
]
[[package]]
name = "serde_derive"
-version = "1.0.226"
+version = "1.0.228"
source = "registry+https://github.com/rust-lang/crates.io-index"
-checksum = "8db53ae22f34573731bafa1db20f04027b2d25e02d8205921b569171699cdb33"
+checksum = "d540f220d3187173da220f885ab66608367b6574e925011a9353e4badda91d79"
dependencies = [
"proc-macro2",
"quote",
@@ -5309,6 +5309,12 @@ dependencies = [
"libc",
]
+[[package]]
+name = "simd-adler32"
+version = "0.3.7"
+source = "registry+https://github.com/rust-lang/crates.io-index"
+checksum = "d66dc143e6b11c1eddc06d5c423cfc97062865baf299914ab64caa38182078fe"
+
[[package]]
name = "simdutf8"
version = "0.1.5"
@@ -5391,9 +5397,9 @@ checksum =
"a8f112729512f8e442d81f95a8a7ddf2b7c6b8a1a6f509a95864142b30cab2d3"
[[package]]
name = "stacker"
-version = "0.1.21"
+version = "0.1.22"
source = "registry+https://github.com/rust-lang/crates.io-index"
-checksum = "cddb07e32ddb770749da91081d8d0ac3a16f1a569a18b20348cd371f5dead06b"
+checksum = "e1f8b29fb42aafcea4edeeb6b2f2d7ecd0d969c48b4cf0d2e64aafc471dd6e59"
dependencies = [
"cc",
"cfg-if",
@@ -5508,7 +5514,7 @@ dependencies = [
"getrandom 0.3.3",
"once_cell",
"rustix",
- "windows-sys 0.61.1",
+ "windows-sys 0.61.2",
]
[[package]]
@@ -5522,11 +5528,11 @@ dependencies = [
[[package]]
name = "thiserror"
-version = "2.0.16"
+version = "2.0.17"
source = "registry+https://github.com/rust-lang/crates.io-index"
-checksum = "3467d614147380f2e4e374161426ff399c91084acd2363eaf549172b3d5e60c0"
+checksum = "f63587ca0f12b72a0600bcba1d40081f830876000bb46dd2337a3051618f4fc8"
dependencies = [
- "thiserror-impl 2.0.16",
+ "thiserror-impl 2.0.17",
]
[[package]]
@@ -5542,9 +5548,9 @@ dependencies = [
[[package]]
name = "thiserror-impl"
-version = "2.0.16"
+version = "2.0.17"
source = "registry+https://github.com/rust-lang/crates.io-index"
-checksum = "6c5e1be1c48b9172ee610da68fd9cd2770e7a4056cb3fc98710ee6906f0c7960"
+checksum = "3ff15c8ecd7de3849db632e14d18d2571fa09dfc5ed93479bc4485c7a517c913"
dependencies = [
"proc-macro2",
"quote",
@@ -5669,9 +5675,9 @@ dependencies = [
[[package]]
name = "tokio-rustls"
-version = "0.26.3"
+version = "0.26.4"
source = "registry+https://github.com/rust-lang/crates.io-index"
-checksum = "05f63835928ca123f1bef57abbcd23bb2ba0ac9ae1235f1e65bda0d06e7786bd"
+checksum = "1729aa945f29d91ba541258c8df89027d5792d85a8841fb65e8bf0f4ede4ef61"
dependencies = [
"rustls",
"tokio",
@@ -5851,9 +5857,9 @@ dependencies = [
[[package]]
name = "typenum"
-version = "1.18.0"
+version = "1.19.0"
source = "registry+https://github.com/rust-lang/crates.io-index"
-checksum = "1dccffe3ce07af9386bfd29e80c0ab1a8205a2fc34e4bcd40364df902cfa8f3f"
+checksum = "562d481066bde0658276a35467c4af00bdc6ee726305698a55b86e61d7ad82bb"
[[package]]
name = "typewit"
@@ -5875,9 +5881,9 @@ checksum =
"f6ccf251212114b54433ec949fd6a7841275f9ada20dddd2f29e9ceea4501493"
[[package]]
name = "unicode-width"
-version = "0.2.1"
+version = "0.2.2"
source = "registry+https://github.com/rust-lang/crates.io-index"
-checksum = "4a1a07cc7db3810833284e8d372ccdc6da29741639ecc70c9ec107df0fa6154c"
+checksum = "b4ac048d71ede7ee76d585517add45da530660ef4390e49b098733c6e897f254"
[[package]]
name = "unindent"
@@ -6121,7 +6127,7 @@ version = "0.1.11"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "c2a7b1c03c876122aa43f3020e6c3c3ee5c05081c9a00739faf7503aeba10d22"
dependencies = [
- "windows-sys 0.61.1",
+ "windows-sys 0.61.2",
]
[[package]]
@@ -6132,9 +6138,9 @@ checksum =
"712e227841d057c1ee1cd2fb22fa7e5a5461ae8e48fa2ca79ec42cfc1931183f"
[[package]]
name = "windows-core"
-version = "0.62.1"
+version = "0.62.2"
source = "registry+https://github.com/rust-lang/crates.io-index"
-checksum = "6844ee5416b285084d3d3fffd743b925a6c9385455f64f6d4fa3031c4c2749a9"
+checksum = "b8e83a14d34d0623b51dce9581199302a221863196a1dde71a7663a4c2be9deb"
dependencies = [
"windows-implement",
"windows-interface",
@@ -6145,9 +6151,9 @@ dependencies = [
[[package]]
name = "windows-implement"
-version = "0.60.1"
+version = "0.60.2"
source = "registry+https://github.com/rust-lang/crates.io-index"
-checksum = "edb307e42a74fb6de9bf3a02d9712678b22399c87e6fa869d6dfcd8c1b7754e0"
+checksum = "053e2e040ab57b9dc951b72c264860db7eb3b0200ba345b4e4c3b14f67855ddf"
dependencies = [
"proc-macro2",
"quote",
@@ -6156,9 +6162,9 @@ dependencies = [
[[package]]
name = "windows-interface"
-version = "0.59.2"
+version = "0.59.3"
source = "registry+https://github.com/rust-lang/crates.io-index"
-checksum = "c0abd1ddbc6964ac14db11c7213d6532ef34bd9aa042c2e5935f59d7908b46a5"
+checksum = "3f316c4a2570ba26bbec722032c4099d8c8bc095efccdc15688708623367e358"
dependencies = [
"proc-macro2",
"quote",
@@ -6167,24 +6173,24 @@ dependencies = [
[[package]]
name = "windows-link"
-version = "0.2.0"
+version = "0.2.1"
source = "registry+https://github.com/rust-lang/crates.io-index"
-checksum = "45e46c0661abb7180e7b9c281db115305d49ca1709ab8242adf09666d2173c65"
+checksum = "f0805222e57f7521d6a62e36fa9163bc891acd422f971defe97d64e70d0a4fe5"
[[package]]
name = "windows-result"
-version = "0.4.0"
+version = "0.4.1"
source = "registry+https://github.com/rust-lang/crates.io-index"
-checksum = "7084dcc306f89883455a206237404d3eaf961e5bd7e0f312f7c91f57eb44167f"
+checksum = "7781fa89eaf60850ac3d2da7af8e5242a5ea78d1a11c49bf2910bb5a73853eb5"
dependencies = [
"windows-link",
]
[[package]]
name = "windows-strings"
-version = "0.5.0"
+version = "0.5.1"
source = "registry+https://github.com/rust-lang/crates.io-index"
-checksum = "7218c655a553b0bed4426cf54b20d7ba363ef543b52d515b3e48d7fd55318dda"
+checksum = "7837d08f69c77cf6b07689544538e017c1bfcf57e34b4c0ff58e6c2cd3b37091"
dependencies = [
"windows-link",
]
@@ -6213,14 +6219,14 @@ version = "0.60.2"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "f2f500e4d28234f72040990ec9d39e3a6b950f9f22d3dba18416c35882612bcb"
dependencies = [
- "windows-targets 0.53.4",
+ "windows-targets 0.53.5",
]
[[package]]
name = "windows-sys"
-version = "0.61.1"
+version = "0.61.2"
source = "registry+https://github.com/rust-lang/crates.io-index"
-checksum = "6f109e41dd4a3c848907eb83d5a42ea98b3769495597450cf6d153507b166f0f"
+checksum = "ae137229bcbd6cdf0f7b80a31df61766145077ddf49416a728b02cb3921ff3fc"
dependencies = [
"windows-link",
]
@@ -6243,19 +6249,19 @@ dependencies = [
[[package]]
name = "windows-targets"
-version = "0.53.4"
+version = "0.53.5"
source = "registry+https://github.com/rust-lang/crates.io-index"
-checksum = "2d42b7b7f66d2a06854650af09cfdf8713e427a439c97ad65a6375318033ac4b"
+checksum = "4945f9f551b88e0d65f3db0bc25c33b8acea4d9e41163edf90dcd0b19f9069f3"
dependencies = [
"windows-link",
- "windows_aarch64_gnullvm 0.53.0",
- "windows_aarch64_msvc 0.53.0",
- "windows_i686_gnu 0.53.0",
- "windows_i686_gnullvm 0.53.0",
- "windows_i686_msvc 0.53.0",
- "windows_x86_64_gnu 0.53.0",
- "windows_x86_64_gnullvm 0.53.0",
- "windows_x86_64_msvc 0.53.0",
+ "windows_aarch64_gnullvm 0.53.1",
+ "windows_aarch64_msvc 0.53.1",
+ "windows_i686_gnu 0.53.1",
+ "windows_i686_gnullvm 0.53.1",
+ "windows_i686_msvc 0.53.1",
+ "windows_x86_64_gnu 0.53.1",
+ "windows_x86_64_gnullvm 0.53.1",
+ "windows_x86_64_msvc 0.53.1",
]
[[package]]
@@ -6266,9 +6272,9 @@ checksum =
"32a4622180e7a0ec044bb555404c800bc9fd9ec262ec147edd5989ccd0c02cd3"
[[package]]
name = "windows_aarch64_gnullvm"
-version = "0.53.0"
+version = "0.53.1"
source = "registry+https://github.com/rust-lang/crates.io-index"
-checksum = "86b8d5f90ddd19cb4a147a5fa63ca848db3df085e25fee3cc10b39b6eebae764"
+checksum = "a9d8416fa8b42f5c947f8482c43e7d89e73a173cead56d044f6a56104a6d1b53"
[[package]]
name = "windows_aarch64_msvc"
@@ -6278,9 +6284,9 @@ checksum =
"09ec2a7bb152e2252b53fa7803150007879548bc709c039df7627cabbd05d469"
[[package]]
name = "windows_aarch64_msvc"
-version = "0.53.0"
+version = "0.53.1"
source = "registry+https://github.com/rust-lang/crates.io-index"
-checksum = "c7651a1f62a11b8cbd5e0d42526e55f2c99886c77e007179efff86c2b137e66c"
+checksum = "b9d782e804c2f632e395708e99a94275910eb9100b2114651e04744e9b125006"
[[package]]
name = "windows_i686_gnu"
@@ -6290,9 +6296,9 @@ checksum =
"8e9b5ad5ab802e97eb8e295ac6720e509ee4c243f69d781394014ebfe8bbfa0b"
[[package]]
name = "windows_i686_gnu"
-version = "0.53.0"
+version = "0.53.1"
source = "registry+https://github.com/rust-lang/crates.io-index"
-checksum = "c1dc67659d35f387f5f6c479dc4e28f1d4bb90ddd1a5d3da2e5d97b42d6272c3"
+checksum = "960e6da069d81e09becb0ca57a65220ddff016ff2d6af6a223cf372a506593a3"
[[package]]
name = "windows_i686_gnullvm"
@@ -6302,9 +6308,9 @@ checksum =
"0eee52d38c090b3caa76c563b86c3a4bd71ef1a819287c19d586d7334ae8ed66"
[[package]]
name = "windows_i686_gnullvm"
-version = "0.53.0"
+version = "0.53.1"
source = "registry+https://github.com/rust-lang/crates.io-index"
-checksum = "9ce6ccbdedbf6d6354471319e781c0dfef054c81fbc7cf83f338a4296c0cae11"
+checksum = "fa7359d10048f68ab8b09fa71c3daccfb0e9b559aed648a8f95469c27057180c"
[[package]]
name = "windows_i686_msvc"
@@ -6314,9 +6320,9 @@ checksum =
"240948bc05c5e7c6dabba28bf89d89ffce3e303022809e73deaefe4f6ec56c66"
[[package]]
name = "windows_i686_msvc"
-version = "0.53.0"
+version = "0.53.1"
source = "registry+https://github.com/rust-lang/crates.io-index"
-checksum = "581fee95406bb13382d2f65cd4a908ca7b1e4c2f1917f143ba16efe98a589b5d"
+checksum = "1e7ac75179f18232fe9c285163565a57ef8d3c89254a30685b57d83a38d326c2"
[[package]]
name = "windows_x86_64_gnu"
@@ -6326,9 +6332,9 @@ checksum =
"147a5c80aabfbf0c7d901cb5895d1de30ef2907eb21fbbab29ca94c5b08b1a78"
[[package]]
name = "windows_x86_64_gnu"
-version = "0.53.0"
+version = "0.53.1"
source = "registry+https://github.com/rust-lang/crates.io-index"
-checksum = "2e55b5ac9ea33f2fc1716d1742db15574fd6fc8dadc51caab1c16a3d3b4190ba"
+checksum = "9c3842cdd74a865a8066ab39c8a7a473c0778a3f29370b5fd6b4b9aa7df4a499"
[[package]]
name = "windows_x86_64_gnullvm"
@@ -6338,9 +6344,9 @@ checksum =
"24d5b23dc417412679681396f2b49f3de8c1473deb516bd34410872eff51ed0d"
[[package]]
name = "windows_x86_64_gnullvm"
-version = "0.53.0"
+version = "0.53.1"
source = "registry+https://github.com/rust-lang/crates.io-index"
-checksum = "0a6e035dd0599267ce1ee132e51c27dd29437f63325753051e71dd9e42406c57"
+checksum = "0ffa179e2d07eee8ad8f57493436566c7cc30ac536a3379fdf008f47f6bb7ae1"
[[package]]
name = "windows_x86_64_msvc"
@@ -6350,9 +6356,9 @@ checksum =
"589f6da84c646204747d1270a2a5661ea66ed1cced2631d546fdfb155959f9ec"
[[package]]
name = "windows_x86_64_msvc"
-version = "0.53.0"
+version = "0.53.1"
source = "registry+https://github.com/rust-lang/crates.io-index"
-checksum = "271414315aff87387382ec3d271b52d7ae78726f5d44ac98b4f4030c91880486"
+checksum = "d6bbff5f0aada427a1e5a6da5f1f98158182f26556f345ac9e04d36d0ebed650"
[[package]]
name = "winnow"
@@ -6407,19 +6413,6 @@ dependencies = [
"thiserror 1.0.69",
]
-[[package]]
-name = "wkt"
-version = "0.14.0"
-source = "registry+https://github.com/rust-lang/crates.io-index"
-checksum = "efb2b923ccc882312e559ffaa832a055ba9d1ac0cc8e86b3e25453247e4b81d7"
-dependencies = [
- "geo-traits 0.3.0",
- "geo-types",
- "log",
- "num-traits",
- "thiserror 1.0.69",
-]
-
[[package]]
name = "writeable"
version = "0.6.1"
@@ -6518,9 +6511,9 @@ dependencies = [
[[package]]
name = "zeroize"
-version = "1.8.1"
+version = "1.8.2"
source = "registry+https://github.com/rust-lang/crates.io-index"
-checksum = "ced3678a2879b30306d323f4542626697a464a97c0a07c9aebf7ebca65cd4dde"
+checksum = "b97154e67e32c85465826e8bcc1c59429aaaf107c1e4a9e53c8d8ccd5eff88d0"
[[package]]
name = "zerotrie"
diff --git a/Cargo.toml b/Cargo.toml
index e6d5bda..1c36111 100644
--- a/Cargo.toml
+++ b/Cargo.toml
@@ -84,6 +84,7 @@ object_store = { version = "0.12.0", default-features = false
}
float_next_after = "1"
mimalloc = { version = "0.1", default-features = false }
libmimalloc-sys = { version = "0.1", default-features = false }
+once_cell = "1.20"
geos = { version = "10.0.0", features = ["geo"] }
diff --git a/rust/sedona-spatial-join/Cargo.toml
b/rust/sedona-spatial-join/Cargo.toml
index 0b2029f..4720b86 100644
--- a/rust/sedona-spatial-join/Cargo.toml
+++ b/rust/sedona-spatial-join/Cargo.toml
@@ -42,6 +42,7 @@ datafusion-physical-plan = { workspace = true }
datafusion-execution = { workspace = true }
datafusion-common-runtime = { workspace = true }
futures = { workspace = true }
+once_cell = { workspace = true }
parking_lot = { workspace = true }
geo-generic-alg = { workspace = true }
geo-traits = { workspace = true, features = ["geo-types"] }
diff --git a/rust/sedona-spatial-join/src/index.rs
b/rust/sedona-spatial-join/src/index.rs
index 59335f9..9c5f447 100644
--- a/rust/sedona-spatial-join/src/index.rs
+++ b/rust/sedona-spatial-join/src/index.rs
@@ -14,6 +14,7 @@
// KIND, either express or implied. See the License for the
// specific language governing permissions and limitations
// under the License.
+use once_cell::sync::OnceCell;
use std::sync::{
atomic::{AtomicUsize, Ordering},
Arc,
@@ -30,7 +31,9 @@ use datafusion_execution::{
use datafusion_expr::{ColumnarValue, JoinType};
use datafusion_physical_plan::metrics::{self, ExecutionPlanMetricsSet,
MetricBuilder};
use futures::StreamExt;
-use geo_index::rtree::distance::{DistanceMetric, EuclideanDistance,
HaversineDistance};
+use geo_index::rtree::distance::{
+ DistanceMetric, EuclideanDistance, GeometryAccessor, HaversineDistance,
+};
use geo_index::rtree::{sort::HilbertSort, RTree, RTreeBuilder, RTreeIndex};
use geo_index::IndexableNum;
use geo_types::{Geometry, Point, Rect};
@@ -235,32 +238,6 @@ impl SpatialIndexBuilder {
geom_idx_vec
}
- /// Build cached geometries for KNN queries to avoid repeated WKB
conversions
- /// Returns both geometries and total WKB size for memory estimation
- fn build_cached_geometries(indexed_batches: &[IndexedBatch]) ->
(Vec<Geometry<f64>>, usize) {
- let mut geometries = Vec::new();
- let mut total_wkb_size = 0;
-
- for indexed_batch in indexed_batches.iter() {
- for wkb_opt in indexed_batch.geom_array.wkbs().iter() {
- if let Some(wkb) = wkb_opt.as_ref() {
- if let Ok(geom) = item_to_geometry(wkb) {
- geometries.push(geom);
- total_wkb_size += wkb.buf().len();
- }
- }
- }
- }
-
- (geometries, total_wkb_size)
- }
-
- /// Estimate the memory usage of cached geometries based on WKB size with
overhead
- fn estimate_geometry_memory(wkb_size: usize) -> usize {
- // Use WKB size as base + overhead for geo::Geometry objects
- wkb_size * 2
- }
-
/// Finish building and return the completed SpatialIndex.
pub fn finish(mut self, schema: SchemaRef) -> Result<SpatialIndex> {
if self.indexed_batches.is_empty() {
@@ -270,6 +247,7 @@ impl SpatialIndexBuilder {
self.options,
AtomicUsize::new(self.probe_threads_count),
self.reservation,
+ self.memory_pool.clone(),
));
}
@@ -297,15 +275,9 @@ impl SpatialIndexBuilder {
ConcurrentReservation::try_new(REFINER_RESERVATION_PREALLOC_SIZE,
refiner_reservation)
.unwrap();
- // Pre-compute geometries for KNN queries to avoid repeated
WKB-to-geometry conversions
- let (cached_geometries, total_wkb_size) =
- Self::build_cached_geometries(&self.indexed_batches);
-
- // Reserve memory for cached geometries using WKB size with overhead
- let geometry_memory_estimate =
Self::estimate_geometry_memory(total_wkb_size);
- let geometry_consumer =
MemoryConsumer::new("SpatialJoinGeometryCache");
- let mut geometry_reservation =
geometry_consumer.register(&self.memory_pool);
- geometry_reservation.try_grow(geometry_memory_estimate)?;
+ let cache_size = batch_pos_vec.len();
+ let knn_components =
+ KnnComponents::new(cache_size, &self.indexed_batches,
self.memory_pool.clone())?;
Ok(SpatialIndex {
schema,
@@ -318,9 +290,8 @@ impl SpatialIndexBuilder {
geom_idx_vec,
visited_left_side,
probe_threads_counter: AtomicUsize::new(self.probe_threads_count),
+ knn_components,
reservation: self.reservation,
- cached_geometries,
- cached_geometry_reservation: geometry_reservation,
})
}
}
@@ -365,19 +336,13 @@ pub(crate) struct SpatialIndex {
/// build side when running left-outer joins. See also
[`report_probe_completed`].
probe_threads_counter: AtomicUsize,
+ /// Shared KNN components (distance metrics and geometry cache) for
efficient KNN queries
+ knn_components: KnnComponents,
+
/// Memory reservation for tracking the memory usage of the spatial index
/// Cleared on `SpatialIndex` drop
#[expect(dead_code)]
reservation: MemoryReservation,
-
- /// Cached vector of geometries for KNN queries to avoid repeated
WKB-to-geometry conversions
- /// This is computed once during index building for performance
optimization
- cached_geometries: Vec<Geometry<f64>>,
-
- /// Memory reservation for tracking the memory usage of cached geometries
- /// Cleared on `SpatialIndex` drop
- #[expect(dead_code)]
- cached_geometry_reservation: MemoryReservation,
}
/// Indexed batch containing the original record batch and the evaluated
geometry array.
@@ -421,6 +386,7 @@ impl SpatialIndex {
options: SpatialJoinOptions,
probe_threads_counter: AtomicUsize,
mut reservation: MemoryReservation,
+ memory_pool: Arc<dyn MemoryPool>,
) -> Self {
let evaluator = create_operand_evaluator(&spatial_predicate,
options.clone());
let refiner = create_refiner(
@@ -432,7 +398,6 @@ impl SpatialIndex {
);
let refiner_reservation = reservation.split(0);
let refiner_reservation = ConcurrentReservation::try_new(0,
refiner_reservation).unwrap();
- let cached_geometry_reservation = reservation.split(0);
let rtree = RTreeBuilder::<f32>::new(0).finish::<HilbertSort>();
Self {
schema,
@@ -445,9 +410,8 @@ impl SpatialIndex {
geom_idx_vec: Vec::new(),
visited_left_side: None,
probe_threads_counter,
+ knn_components: KnnComponents::new(0, &[],
memory_pool.clone()).unwrap(), // Empty index has no cache
reservation,
- cached_geometries: Vec::new(),
- cached_geometry_reservation,
}
}
@@ -455,6 +419,15 @@ impl SpatialIndex {
self.schema.clone()
}
+ /// Create a KNN geometry accessor for accessing geometries with caching
+ fn create_knn_accessor(&self) -> SedonaKnnAdapter<'_> {
+ SedonaKnnAdapter::new(
+ &self.indexed_batches,
+ &self.data_id_to_batch_pos,
+ &self.knn_components,
+ )
+ }
+
/// Get the batch at the given index.
pub(crate) fn get_indexed_batch(&self, batch_idx: usize) -> &RecordBatch {
&self.indexed_batches[batch_idx].batch
@@ -557,32 +530,23 @@ impl SpatialIndex {
}
};
- // Use pre-computed cached geometries for performance
- let geometries = &self.cached_geometries;
-
- if geometries.is_empty() {
- return Ok(JoinResultMetrics {
- count: 0,
- candidate_count: 0,
- });
- }
-
- // Choose distance metric based on use_spheroid parameter
- let distance_metric: Box<dyn DistanceMetric<f32>> = if use_spheroid {
- // For spheroid (geodesic) distance, we use the Haversine formula
as an approximation for now.
- // The distance metric will be used to calculate distances between
geometries for ranking purposes.
- Box::new(HaversineDistance::default())
+ // Select the appropriate distance metric
+ let distance_metric: &dyn DistanceMetric<f32> = if use_spheroid {
+ &self.knn_components.haversine_metric
} else {
- Box::new(EuclideanDistance)
+ &self.knn_components.euclidean_metric
};
+ // Create geometry accessor for on-demand WKB decoding and caching
+ let geometry_accessor = self.create_knn_accessor();
+
// Use neighbors_geometry to find k nearest neighbors
let initial_results = self.rtree.neighbors_geometry(
&probe_geom,
Some(k as usize),
None, // no max_distance filter
- distance_metric.as_ref(),
- geometries,
+ distance_metric,
+ &geometry_accessor,
);
if initial_results.is_empty() {
@@ -601,13 +565,12 @@ impl SpatialIndex {
let mut distances_with_indices: Vec<(f64, u32)> = Vec::new();
for &result_idx in &final_results {
- if (result_idx as usize) < geometries.len() {
- let distance =
distance_metric.geometry_to_geometry_distance(
- &probe_geom,
- &geometries[result_idx as usize],
- );
- if let Some(distance_f64) = distance.to_f64() {
- distances_with_indices.push((distance_f64,
result_idx));
+ if (result_idx as usize) < self.data_id_to_batch_pos.len() {
+ if let Some(item_geom) =
geometry_accessor.get_geometry(result_idx as usize) {
+ let distance =
distance_metric.distance_to_geometry(&probe_geom, item_geom);
+ if let Some(distance_f64) = distance.to_f64() {
+ distances_with_indices.push((distance_f64,
result_idx));
+ }
}
}
}
@@ -654,13 +617,14 @@ impl SpatialIndex {
let mut all_distances_with_indices: Vec<(f64, u32)> =
Vec::new();
for &result_idx in &expanded_results {
- if (result_idx as usize) < geometries.len() {
- let distance =
distance_metric.geometry_to_geometry_distance(
- &probe_geom,
- &geometries[result_idx as usize],
- );
- if let Some(distance_f64) = distance.to_f64() {
- all_distances_with_indices.push((distance_f64,
result_idx));
+ if (result_idx as usize) < self.data_id_to_batch_pos.len()
{
+ if let Some(item_geom) =
geometry_accessor.get_geometry(result_idx as usize)
+ {
+ let distance =
+
distance_metric.distance_to_geometry(&probe_geom, item_geom);
+ if let Some(distance_f64) = distance.to_f64() {
+ all_distances_with_indices.push((distance_f64,
result_idx));
+ }
}
}
}
@@ -827,6 +791,7 @@ pub(crate) async fn build_index(
options,
AtomicUsize::new(probe_threads_count),
reservation,
+ memory_pool,
));
}
@@ -937,6 +902,113 @@ async fn collect_build_partition(
/// Rough estimate for in-memory size of the rtree per rect in bytes
const RTREE_MEMORY_ESTIMATE_PER_RECT: usize = 60;
+/// Shared KNN components that can be reused across queries
+struct KnnComponents {
+ euclidean_metric: EuclideanDistance,
+ haversine_metric: HaversineDistance,
+ /// Pre-allocated vector for geometry cache - lock-free access
+ /// Indexed by rtree data index for O(1) access
+ geometry_cache: Vec<OnceCell<Geometry<f64>>>,
+ /// Memory reservation to track geometry cache memory usage
+ _reservation: MemoryReservation,
+}
+
+impl KnnComponents {
+ fn new(
+ cache_size: usize,
+ indexed_batches: &[IndexedBatch],
+ memory_pool: Arc<dyn MemoryPool>,
+ ) -> datafusion_common::Result<Self> {
+ // Create memory consumer and reservation for geometry cache
+ let consumer = MemoryConsumer::new("SpatialJoinKnnGeometryCache");
+ let mut reservation = consumer.register(&memory_pool);
+
+ // Estimate maximum possible memory usage based on WKB sizes
+ let estimated_memory =
Self::estimate_max_memory_usage(indexed_batches);
+ reservation.try_grow(estimated_memory)?;
+
+ // Pre-allocate OnceCell vector
+ let geometry_cache = (0..cache_size).map(|_|
OnceCell::new()).collect();
+
+ Ok(Self {
+ euclidean_metric: EuclideanDistance,
+ haversine_metric: HaversineDistance::default(),
+ geometry_cache,
+ _reservation: reservation,
+ })
+ }
+
+ /// Estimate the maximum memory usage for decoded geometries based on WKB
sizes
+ fn estimate_max_memory_usage(indexed_batches: &[IndexedBatch]) -> usize {
+ let mut total_wkb_size = 0;
+
+ for batch in indexed_batches {
+ for wkb in batch.geom_array.wkbs().iter().flatten() {
+ total_wkb_size += wkb.buf().len();
+ }
+ }
+ total_wkb_size
+ }
+}
+
+/// Geometry accessor for SedonaDB KNN queries.
+/// This accessor provides on-demand WKB decoding and geometry caching for
efficient
+/// KNN queries with support for both Euclidean and Haversine distance metrics.
+struct SedonaKnnAdapter<'a> {
+ indexed_batches: &'a [IndexedBatch],
+ data_id_to_batch_pos: &'a [(i32, i32)],
+ // Reference to KNN components for cache and memory tracking
+ knn_components: &'a KnnComponents,
+}
+
+impl<'a> SedonaKnnAdapter<'a> {
+ /// Create a new adapter
+ fn new(
+ indexed_batches: &'a [IndexedBatch],
+ data_id_to_batch_pos: &'a [(i32, i32)],
+ knn_components: &'a KnnComponents,
+ ) -> Self {
+ Self {
+ indexed_batches,
+ data_id_to_batch_pos,
+ knn_components,
+ }
+ }
+}
+
+impl<'a> GeometryAccessor for SedonaKnnAdapter<'a> {
+ /// Get geometry for the given item index with lock-free caching
+ fn get_geometry(&self, item_index: usize) -> Option<&Geometry<f64>> {
+ let geometry_cache = &self.knn_components.geometry_cache;
+
+ // Bounds check
+ if item_index >= geometry_cache.len() || item_index >=
self.data_id_to_batch_pos.len() {
+ return None;
+ }
+
+ // Try to get from cache first
+ if let Some(geom) = geometry_cache[item_index].get() {
+ return Some(geom);
+ }
+
+ // Cache miss - decode from WKB
+ let (batch_idx, row_idx) = self.data_id_to_batch_pos[item_index];
+ let indexed_batch = &self.indexed_batches[batch_idx as usize];
+
+ if let Some(wkb) = indexed_batch.wkb(row_idx as usize) {
+ if let Ok(geom) = item_to_geometry(wkb) {
+ // Try to store in cache - if another thread got there first,
we just use theirs
+ let _ = geometry_cache[item_index].set(geom);
+ // Return reference to the cached geometry
+ return geometry_cache[item_index].get();
+ }
+ }
+
+ // Failed to decode - don't cache invalid results
+ None
+ }
+}
+
#[cfg(test)]
mod tests {
use crate::spatial_predicate::{RelationPredicate, SpatialRelationType};