This is an automated email from the ASF dual-hosted git repository.
paleolimbot pushed a commit to branch main
in repository https://gitbox.apache.org/repos/asf/sedona-db.git
The following commit(s) were added to refs/heads/main by this push:
new 076f62b feat(rust/sedona-testing): Add random tiled raster test data
(#300)
076f62b is described below
commit 076f62b4fb2a1f79ad767109542daab848b3d862
Author: jp <[email protected]>
AuthorDate: Fri Nov 14 09:49:48 2025 -0800
feat(rust/sedona-testing): Add random tiled raster test data (#300)
---
Cargo.lock | 1 +
Cargo.toml | 1 +
rust/sedona-testing/Cargo.toml | 1 +
rust/sedona-testing/src/rasters.rs | 409 ++++++++++++++++++++++++++++++++++++-
4 files changed, 408 insertions(+), 4 deletions(-)
diff --git a/Cargo.lock b/Cargo.lock
index 20e02b3..e5d23c5 100644
--- a/Cargo.lock
+++ b/Cargo.lock
@@ -5256,6 +5256,7 @@ dependencies = [
"datafusion-common",
"datafusion-expr",
"datafusion-physical-expr",
+ "fastrand",
"geo",
"geo-traits",
"geo-types",
diff --git a/Cargo.toml b/Cargo.toml
index 5a580b5..7fa350f 100644
--- a/Cargo.toml
+++ b/Cargo.toml
@@ -87,6 +87,7 @@ datafusion-physical-expr = { version = "50.2.0" }
datafusion-physical-plan = { version = "50.2.0" }
dirs = "6.0.0"
env_logger = "0.11"
+fastrand = "2.0"
futures = { version = "0.3" }
object_store = { version = "0.12.0", default-features = false }
float_next_after = "1"
diff --git a/rust/sedona-testing/Cargo.toml b/rust/sedona-testing/Cargo.toml
index f467aad..fe57750 100644
--- a/rust/sedona-testing/Cargo.toml
+++ b/rust/sedona-testing/Cargo.toml
@@ -43,6 +43,7 @@ criterion = { workspace = true, optional = true }
datafusion-common = { workspace = true }
datafusion-expr = { workspace = true }
datafusion-physical-expr = { workspace = true }
+fastrand = { workspace = true }
geo-traits = { workspace = true, features = ["geo-types"] }
geo-types = { workspace = true }
parquet = { workspace = true, features = ["arrow", "snap", "zstd"] }
diff --git a/rust/sedona-testing/src/rasters.rs
b/rust/sedona-testing/src/rasters.rs
index 826024f..69d83ca 100644
--- a/rust/sedona-testing/src/rasters.rs
+++ b/rust/sedona-testing/src/rasters.rs
@@ -15,9 +15,11 @@
// specific language governing permissions and limitations
// under the License.
use arrow_array::StructArray;
-use arrow_schema::ArrowError;
+use datafusion_common::Result;
+use fastrand::Rng;
+use sedona_raster::array::RasterStructArray;
use sedona_raster::builder::RasterBuilder;
-use sedona_raster::traits::{BandMetadata, RasterMetadata};
+use sedona_raster::traits::{BandMetadata, RasterMetadata, RasterRef};
use sedona_schema::raster::{BandDataType, StorageType};
/// Generate a StructArray of rasters with sequentially increasing dimensions
and pixel values
@@ -25,7 +27,7 @@ use sedona_schema::raster::{BandDataType, StorageType};
pub fn generate_test_rasters(
count: usize,
null_raster_index: Option<usize>,
-) -> Result<StructArray, ArrowError> {
+) -> Result<StructArray> {
let mut builder = RasterBuilder::new(count);
for i in 0..count {
// If a null raster index is specified and that matches the current
index,
@@ -65,7 +67,324 @@ pub fn generate_test_rasters(
builder.finish_raster()?;
}
- builder.finish()
+ Ok(builder.finish()?)
+}
+
+/// Generates a set of tiled rasters arranged in a grid
+/// - Each raster tile has specified dimensions and random pixel values
+/// - Each raster has 3 bands which can be interpreted as RGB values
+/// and the result can be visualized as a mosaic of tiles.
+/// - There are nodata values at the 4 corners of the overall mosaic.
+pub fn generate_tiled_rasters(
+ tile_size: (usize, usize),
+ number_of_tiles: (usize, usize),
+ data_type: BandDataType,
+ seed: Option<u64>,
+) -> Result<StructArray> {
+ let mut rng = match seed {
+ Some(s) => Rng::with_seed(s),
+ None => Rng::new(),
+ };
+ let (tile_width, tile_height) = tile_size;
+ let (x_tiles, y_tiles) = number_of_tiles;
+ let mut raster_builder = RasterBuilder::new(x_tiles * y_tiles);
+ let band_count = 3;
+
+ for tile_y in 0..y_tiles {
+ for tile_x in 0..x_tiles {
+ let origin_x = (tile_x * tile_width) as f64;
+ let origin_y = (tile_y * tile_height) as f64;
+
+ let raster_metadata = RasterMetadata {
+ width: tile_width as u64,
+ height: tile_height as u64,
+ upperleft_x: origin_x,
+ upperleft_y: origin_y,
+ scale_x: 1.0,
+ scale_y: 1.0,
+ skew_x: 0.0,
+ skew_y: 0.0,
+ };
+
+ raster_builder.start_raster(&raster_metadata, None)?;
+
+ for _ in 0..band_count {
+ // Set a nodata value appropriate for the data type
+ let nodata_value = get_nodata_value_for_type(&data_type);
+
+ let band_metadata = BandMetadata {
+ nodata_value: nodata_value.clone(),
+ storage_type: StorageType::InDb,
+ datatype: data_type.clone(),
+ outdb_url: None,
+ outdb_band_id: None,
+ };
+
+ raster_builder.start_band(band_metadata)?;
+
+ let pixel_count = tile_width * tile_height;
+
+ // Determine which corner position (if any) should have nodata
in this tile
+ let corner_position =
+ get_corner_position(tile_x, tile_y, x_tiles, y_tiles,
tile_width, tile_height);
+ let band_data = generate_random_band_data(
+ pixel_count,
+ &data_type,
+ nodata_value.as_deref(),
+ corner_position,
+ &mut rng,
+ );
+
+ raster_builder.band_data_writer().append_value(&band_data);
+ raster_builder.finish_band()?;
+ }
+
+ raster_builder.finish_raster()?;
+ }
+ }
+
+ Ok(raster_builder.finish()?)
+}
+
+/// Determine if this tile contains a corner of the overall grid and return
its position
+/// Returns Some(position) if this tile contains a corner, None otherwise
+fn get_corner_position(
+ tile_x: usize,
+ tile_y: usize,
+ x_tiles: usize,
+ y_tiles: usize,
+ tile_width: usize,
+ tile_height: usize,
+) -> Option<usize> {
+ // Top-left corner (tile 0,0, pixel 0)
+ if tile_x == 0 && tile_y == 0 {
+ return Some(0);
+ }
+ // Top-right corner (tile x_tiles-1, 0, pixel tile_width-1)
+ if tile_x == x_tiles - 1 && tile_y == 0 {
+ return Some(tile_width - 1);
+ }
+ // Bottom-left corner (tile 0, y_tiles-1, pixel (tile_height-1)*tile_width)
+ if tile_x == 0 && tile_y == y_tiles - 1 {
+ return Some((tile_height - 1) * tile_width);
+ }
+ // Bottom-right corner (tile x_tiles-1, y_tiles-1, pixel
tile_height*tile_width-1)
+ if tile_x == x_tiles - 1 && tile_y == y_tiles - 1 {
+ return Some(tile_height * tile_width - 1);
+ }
+ None
+}
+
+fn generate_random_band_data(
+ pixel_count: usize,
+ data_type: &BandDataType,
+ nodata_bytes: Option<&[u8]>,
+ corner_position: Option<usize>,
+ rng: &mut Rng,
+) -> Vec<u8> {
+ match data_type {
+ BandDataType::UInt8 => {
+ let mut data: Vec<u8> = (0..pixel_count).map(|_|
rng.u8(..)).collect();
+ // Set corner pixel to nodata value if this tile contains a corner
+ if let (Some(nodata), Some(pos)) = (nodata_bytes, corner_position)
{
+ if !nodata.is_empty() && pos < data.len() {
+ data[pos] = nodata[0];
+ }
+ }
+ data
+ }
+ BandDataType::UInt16 => {
+ let mut data = Vec::with_capacity(pixel_count * 2);
+ for _ in 0..pixel_count {
+ data.extend_from_slice(&rng.u16(..).to_ne_bytes());
+ }
+ // Set corner pixel to nodata value if this tile contains a corner
+ if let (Some(nodata), Some(pos)) = (nodata_bytes, corner_position)
{
+ if nodata.len() >= 2 && pos * 2 + 2 <= data.len() {
+ data[pos * 2..(pos * 2) +
2].copy_from_slice(&nodata[0..2]);
+ }
+ }
+ data
+ }
+ BandDataType::Int16 => {
+ let mut data = Vec::with_capacity(pixel_count * 2);
+ for _ in 0..pixel_count {
+ data.extend_from_slice(&rng.i16(..).to_ne_bytes());
+ }
+ // Set corner pixel to nodata value if this tile contains a corner
+ if let (Some(nodata), Some(pos)) = (nodata_bytes, corner_position)
{
+ if nodata.len() >= 2 && pos * 2 + 2 <= data.len() {
+ data[pos * 2..(pos * 2) +
2].copy_from_slice(&nodata[0..2]);
+ }
+ }
+ data
+ }
+ BandDataType::UInt32 => {
+ let mut data = Vec::with_capacity(pixel_count * 4);
+ for _ in 0..pixel_count {
+ data.extend_from_slice(&rng.u32(..).to_ne_bytes());
+ }
+ // Set corner pixel to nodata value if this tile contains a corner
+ if let (Some(nodata), Some(pos)) = (nodata_bytes, corner_position)
{
+ if nodata.len() >= 4 && pos * 4 + 4 <= data.len() {
+ data[pos * 4..(pos * 4) +
4].copy_from_slice(&nodata[0..4]);
+ }
+ }
+ data
+ }
+ BandDataType::Int32 => {
+ let mut data = Vec::with_capacity(pixel_count * 4);
+ for _ in 0..pixel_count {
+ data.extend_from_slice(&rng.i32(..).to_ne_bytes());
+ }
+ // Set corner pixel to nodata value if this tile contains a corner
+ if let (Some(nodata), Some(pos)) = (nodata_bytes, corner_position)
{
+ if nodata.len() >= 4 && pos * 4 + 4 <= data.len() {
+ data[pos * 4..(pos * 4) +
4].copy_from_slice(&nodata[0..4]);
+ }
+ }
+ data
+ }
+ BandDataType::Float32 => {
+ let mut data = Vec::with_capacity(pixel_count * 4);
+ for _ in 0..pixel_count {
+ data.extend_from_slice(&rng.f32().to_ne_bytes());
+ }
+ // Set corner pixel to nodata value if this tile contains a corner
+ if let (Some(nodata), Some(pos)) = (nodata_bytes, corner_position)
{
+ if nodata.len() >= 4 && pos * 4 + 4 <= data.len() {
+ data[pos * 4..(pos * 4) +
4].copy_from_slice(&nodata[0..4]);
+ }
+ }
+ data
+ }
+ BandDataType::Float64 => {
+ let mut data = Vec::with_capacity(pixel_count * 8);
+ for _ in 0..pixel_count {
+ data.extend_from_slice(&rng.f64().to_ne_bytes());
+ }
+ // Set corner pixel to nodata value if this tile contains a corner
+ if let (Some(nodata), Some(pos)) = (nodata_bytes, corner_position)
{
+ if nodata.len() >= 8 && pos * 8 + 8 <= data.len() {
+ data[pos * 8..(pos * 8) +
8].copy_from_slice(&nodata[0..8]);
+ }
+ }
+ data
+ }
+ }
+}
+
+fn get_nodata_value_for_type(data_type: &BandDataType) -> Option<Vec<u8>> {
+ match data_type {
+ BandDataType::UInt8 => Some(vec![255u8]),
+ BandDataType::UInt16 => Some(u16::MAX.to_ne_bytes().to_vec()),
+ BandDataType::Int16 => Some(i16::MIN.to_ne_bytes().to_vec()),
+ BandDataType::UInt32 => Some(u32::MAX.to_ne_bytes().to_vec()),
+ BandDataType::Int32 => Some(i32::MIN.to_ne_bytes().to_vec()),
+ BandDataType::Float32 => Some(f32::NAN.to_ne_bytes().to_vec()),
+ BandDataType::Float64 => Some(f64::NAN.to_ne_bytes().to_vec()),
+ }
+}
+
+/// Compare two RasterStructArrays for equality
+pub fn assert_raster_arrays_equal(
+ raster_array1: &RasterStructArray,
+ raster_array2: &RasterStructArray,
+) {
+ assert_eq!(
+ raster_array1.len(),
+ raster_array2.len(),
+ "Raster array lengths do not match"
+ );
+
+ for i in 0..raster_array1.len() {
+ let raster1 = raster_array1.get(i).unwrap();
+ let raster2 = raster_array2.get(i).unwrap();
+ assert_raster_equal(&raster1, &raster2);
+ }
+}
+
+/// Compare two rasters for equality
+pub fn assert_raster_equal(raster1: &impl RasterRef, raster2: &impl RasterRef)
{
+ // Compare metadata
+ let meta1 = raster1.metadata();
+ let meta2 = raster2.metadata();
+ assert_eq!(meta1.width(), meta2.width(), "Raster widths do not match");
+ assert_eq!(
+ meta1.height(),
+ meta2.height(),
+ "Raster heights do not match"
+ );
+ assert_eq!(
+ meta1.upper_left_x(),
+ meta2.upper_left_x(),
+ "Raster upper left x does not match"
+ );
+ assert_eq!(
+ meta1.upper_left_y(),
+ meta2.upper_left_y(),
+ "Raster upper left y does not match"
+ );
+ assert_eq!(
+ meta1.scale_x(),
+ meta2.scale_x(),
+ "Raster scale x does not match"
+ );
+ assert_eq!(
+ meta1.scale_y(),
+ meta2.scale_y(),
+ "Raster scale y does not match"
+ );
+ assert_eq!(
+ meta1.skew_x(),
+ meta2.skew_x(),
+ "Raster skew x does not match"
+ );
+ assert_eq!(
+ meta1.skew_y(),
+ meta2.skew_y(),
+ "Raster skew y does not match"
+ );
+
+ // Compare bands
+ let bands1 = raster1.bands();
+ let bands2 = raster2.bands();
+ assert_eq!(bands1.len(), bands2.len(), "Number of bands do not match");
+
+ for band_index in 0..bands1.len() {
+ let band1 = bands1.band(band_index + 1).unwrap();
+ let band2 = bands2.band(band_index + 1).unwrap();
+
+ let band_meta1 = band1.metadata();
+ let band_meta2 = band2.metadata();
+ assert_eq!(
+ band_meta1.data_type(),
+ band_meta2.data_type(),
+ "Band data types do not match"
+ );
+ assert_eq!(
+ band_meta1.nodata_value(),
+ band_meta2.nodata_value(),
+ "Band nodata values do not match"
+ );
+ assert_eq!(
+ band_meta1.storage_type(),
+ band_meta2.storage_type(),
+ "Band storage types do not match"
+ );
+ assert_eq!(
+ band_meta1.outdb_url(),
+ band_meta2.outdb_url(),
+ "Band outdb URLs do not match"
+ );
+ assert_eq!(
+ band_meta1.outdb_band_id(),
+ band_meta2.outdb_band_id(),
+ "Band outdb band IDs do not match"
+ );
+
+ assert_eq!(band1.data(), band2.data(), "Band data does not match");
+ }
}
#[cfg(test)]
@@ -115,4 +434,86 @@ mod tests {
assert_eq!(actual_pixel_values, expected_pixel_values);
}
}
+
+ #[test]
+ fn test_generate_tiled_rasters() {
+ let tile_size = (64, 64);
+ let number_of_tiles = (4, 4);
+ let data_type = BandDataType::UInt8;
+ let struct_array =
+ generate_tiled_rasters(tile_size, number_of_tiles, data_type,
Some(43)).unwrap();
+ let raster_array = RasterStructArray::new(&struct_array);
+ assert_eq!(raster_array.len(), 16); // 4x4 tiles
+ for i in 0..16 {
+ let raster = raster_array.get(i).unwrap();
+ let metadata = raster.metadata();
+ assert_eq!(metadata.width(), 64);
+ assert_eq!(metadata.height(), 64);
+ assert_eq!(metadata.upper_left_x(), ((i % 4) * 64) as f64);
+ assert_eq!(metadata.upper_left_y(), ((i / 4) * 64) as f64);
+ let bands = raster.bands();
+ assert_eq!(bands.len(), 3);
+ for band_index in 0..3 {
+ let band = bands.band(band_index + 1).unwrap();
+ let band_metadata = band.metadata();
+ assert_eq!(band_metadata.data_type(), BandDataType::UInt8);
+ assert_eq!(band_metadata.storage_type(), StorageType::InDb);
+ let band_data = band.data();
+ assert_eq!(band_data.len(), 64 * 64); // 4096 pixels
+ }
+ }
+ }
+
+ #[test]
+ fn test_raster_arrays_equal() {
+ let raster_array1 = generate_test_rasters(3, None).unwrap();
+ let raster_struct_array1 = RasterStructArray::new(&raster_array1);
+ // Test that identical arrays are equal
+ assert_raster_arrays_equal(&raster_struct_array1,
&raster_struct_array1);
+ }
+
+ #[test]
+ #[should_panic = "Raster array lengths do not match"]
+ fn test_raster_arrays_not_equal() {
+ let raster_array1 = generate_test_rasters(3, None).unwrap();
+ let raster_struct_array1 = RasterStructArray::new(&raster_array1);
+
+ // Test that arrays with different lengths are not equal
+ let raster_array2 = generate_test_rasters(4, None).unwrap();
+ let raster_struct_array2 = RasterStructArray::new(&raster_array2);
+ assert_raster_arrays_equal(&raster_struct_array1,
&raster_struct_array2);
+ }
+
+ #[test]
+ fn test_raster_equal() {
+ let raster_array1 =
+ generate_tiled_rasters((256, 256), (1, 1), BandDataType::UInt8,
Some(43)).unwrap();
+ let raster1 = RasterStructArray::new(&raster_array1).get(0).unwrap();
+
+ // Assert that the rasters are equal to themselves
+ assert_raster_equal(&raster1, &raster1);
+ }
+
+ #[test]
+ #[should_panic = "Band data does not match"]
+ fn test_raster_different_band_data() {
+ let raster_array1 =
+ generate_tiled_rasters((128, 128), (1, 1), BandDataType::UInt8,
Some(43)).unwrap();
+ let raster_array2 =
+ generate_tiled_rasters((128, 128), (1, 1), BandDataType::UInt8,
Some(47)).unwrap();
+
+ let raster1 = RasterStructArray::new(&raster_array1).get(0).unwrap();
+ let raster2 = RasterStructArray::new(&raster_array2).get(0).unwrap();
+ assert_raster_equal(&raster1, &raster2);
+ }
+
+ #[test]
+ #[should_panic = "Raster upper left x does not match"]
+ fn test_raster_different_metadata() {
+ let raster_array =
+ generate_tiled_rasters((128, 128), (2, 1), BandDataType::UInt8,
Some(43)).unwrap();
+ let raster1 = RasterStructArray::new(&raster_array).get(0).unwrap();
+ let raster2 = RasterStructArray::new(&raster_array).get(1).unwrap();
+ assert_raster_equal(&raster1, &raster2);
+ }
}