Copilot commented on code in PR #678: URL: https://github.com/apache/sedona-db/pull/678#discussion_r2878306297
########## c/sedona-gdal/src/vsi.rs: ########## @@ -0,0 +1,177 @@ +// Licensed to the Apache Software Foundation (ASF) under one +// or more contributor license agreements. See the NOTICE file +// distributed with this work for additional information +// regarding copyright ownership. The ASF licenses this file +// to you under the Apache License, Version 2.0 (the +// "License"); you may not use this file except in compliance +// with the License. You may obtain a copy of the License at +// +// http://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, +// software distributed under the License is distributed on an +// "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY +// KIND, either express or implied. See the License for the +// specific language governing permissions and limitations +// under the License. + +//! GDAL Virtual File System (VSI) wrappers. + +use std::ffi::CString; + +use crate::errors::{GdalError, Result}; +use crate::gdal_api::{call_gdal_api, GdalApi}; + +/// Creates a new VSI in-memory file from a given buffer. +/// +/// The data is copied into GDAL-allocated memory (via `VSIMalloc`) so that +/// GDAL can safely free it with `VSIFree` when ownership is taken. +pub fn create_mem_file(api: &'static GdalApi, file_name: &str, data: Vec<u8>) -> Result<()> { + let c_file_name = CString::new(file_name)?; + let len = data.len(); + + // Allocate via GDAL's allocator so GDAL can safely free it. + let gdal_buf = unsafe { call_gdal_api!(api, VSIMalloc, len) } as *mut u8; + if gdal_buf.is_null() { + return Err(GdalError::NullPointer { + method_name: "VSIMalloc", + msg: format!("failed to allocate {len} bytes"), + }); + } + + // Copy data into GDAL-allocated buffer + unsafe { + std::ptr::copy_nonoverlapping(data.as_ptr(), gdal_buf, len); + } + // Rust Vec is dropped here, freeing the Rust-allocated memory. + + let handle = unsafe { + call_gdal_api!( + api, + VSIFileFromMemBuffer, + c_file_name.as_ptr(), + gdal_buf, + len as i64, + 1 // bTakeOwnership = true — GDAL will VSIFree gdal_buf + ) + }; + + if handle.is_null() { + // GDAL did not take ownership, so we must free. + unsafe { call_gdal_api!(api, VSIFree, gdal_buf as *mut std::ffi::c_void) }; + return Err(GdalError::NullPointer { + method_name: "VSIFileFromMemBuffer", + msg: String::new(), + }); + } + + unsafe { + call_gdal_api!(api, VSIFCloseL, handle); + } + + Ok(()) +} + +/// Unlink (delete) a VSI in-memory file. +pub fn unlink_mem_file(api: &'static GdalApi, file_name: &str) -> Result<()> { + let c_file_name = CString::new(file_name)?; + + let rv = unsafe { call_gdal_api!(api, VSIUnlink, c_file_name.as_ptr()) }; + + if rv != 0 { + return Err(GdalError::UnlinkMemFile { + file_name: file_name.to_string(), + }); + } + + Ok(()) +} + +/// Copies the bytes of the VSI in-memory file, taking ownership and freeing the GDAL memory. +pub fn get_vsi_mem_file_bytes_owned(api: &'static GdalApi, file_name: &str) -> Result<Vec<u8>> { + let c_file_name = CString::new(file_name)?; + + let owned_bytes = unsafe { + let mut length: i64 = 0; + let bytes = call_gdal_api!( + api, + VSIGetMemFileBuffer, + c_file_name.as_ptr(), + &mut length, + 1 // bUnlinkAndSeize = true + ); + + if bytes.is_null() { + return Err(GdalError::NullPointer { + method_name: "VSIGetMemFileBuffer", + msg: String::new(), + }); + } + + if length < 0 { + call_gdal_api!(api, VSIFree, bytes.cast::<std::ffi::c_void>()); + return Err(GdalError::BadArgument(format!( + "VSIGetMemFileBuffer returned negative length: {length}" + ))); + } + + let slice = std::slice::from_raw_parts(bytes, length as usize); + let vec = slice.to_vec(); + + call_gdal_api!(api, VSIFree, bytes.cast::<std::ffi::c_void>()); + + vec + }; + + Ok(owned_bytes) +} + +#[cfg(all(test, feature = "gdal-sys"))] +mod tests { + use super::*; + use crate::register::get_global_gdal_api; + + #[test] + fn create_and_retrieve_mem_file() { + let file_name = "/vsimem/525ebf24-a030-4677-bb4e-a921741cabe0"; + + let api = get_global_gdal_api().unwrap(); + create_mem_file(api, file_name, vec![1_u8, 2, 3, 4]).unwrap(); + + let bytes = get_vsi_mem_file_bytes_owned(api, file_name).unwrap(); + + assert_eq!(bytes, vec![1_u8, 2, 3, 4]); + + // mem file must not be there anymore + assert!(matches!( + unlink_mem_file(api, file_name).unwrap_err(), + GdalError::UnlinkMemFile { + file_name + } + if file_name == file_name Review Comment: The assertion in `create_and_retrieve_mem_file` shadows `file_name` inside the match and then compares `file_name == file_name`, which is always true. This makes the test ineffective. Bind the matched field to a different name (or compare to the outer `file_name` via a different identifier) so the assertion actually verifies the error contains the expected path. ```suggestion file_name: err_file_name } if err_file_name == file_name ``` ########## c/sedona-gdal/src/vector/layer.rs: ########## @@ -0,0 +1,114 @@ +// Licensed to the Apache Software Foundation (ASF) under one +// or more contributor license agreements. See the NOTICE file +// distributed with this work for additional information +// regarding copyright ownership. The ASF licenses this file +// to you under the Apache License, Version 2.0 (the +// "License"); you may not use this file except in compliance +// with the License. You may obtain a copy of the License at +// +// http://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, +// software distributed under the License is distributed on an +// "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY +// KIND, either express or implied. See the License for the +// specific language governing permissions and limitations +// under the License. + +use std::marker::PhantomData; + +use crate::dataset::Dataset; +use crate::errors::{GdalError, Result}; +use crate::gdal_api::{call_gdal_api, GdalApi}; +use crate::gdal_dyn_bindgen::*; +use crate::vector::feature::{Feature, FieldDefn}; + +/// An OGR layer (borrowed from a Dataset). +pub struct Layer<'a> { + api: &'static GdalApi, + c_layer: OGRLayerH, + _dataset: PhantomData<&'a Dataset>, +} + +impl<'a> Layer<'a> { + pub(crate) fn new(api: &'static GdalApi, c_layer: OGRLayerH, _dataset: &'a Dataset) -> Self { + Self { + api, + c_layer, + _dataset: PhantomData, + } + } + + /// Return the raw C layer handle. + pub fn c_layer(&self) -> OGRLayerH { + self.c_layer + } + + /// Reset reading to the first feature. + pub fn reset_reading(&self) { + unsafe { call_gdal_api!(self.api, OGR_L_ResetReading, self.c_layer) }; + } + + /// Get the next feature (returns None when exhausted). + pub fn next_feature(&self) -> Option<Feature<'_>> { + let c_feature = unsafe { call_gdal_api!(self.api, OGR_L_GetNextFeature, self.c_layer) }; + if c_feature.is_null() { + None + } else { + Some(Feature::new(self.api, c_feature)) + } Review Comment: `Layer::next_feature` returns `Option<Feature<'_>>`, which ties the returned `Feature` to the temporary borrow of `&self` rather than the layer/dataset lifetime `'a`. This prevents using it as the iterator item type `Feature<'a>` and is likely to cause a lifetime mismatch compile error. Consider returning `Option<Feature<'a>>` (e.g. by taking `&'a self`) or removing the lifetime parameter from `Feature` if it does not actually borrow. ########## c/sedona-gdal/src/vsi.rs: ########## @@ -0,0 +1,177 @@ +// Licensed to the Apache Software Foundation (ASF) under one +// or more contributor license agreements. See the NOTICE file +// distributed with this work for additional information +// regarding copyright ownership. The ASF licenses this file +// to you under the Apache License, Version 2.0 (the +// "License"); you may not use this file except in compliance +// with the License. You may obtain a copy of the License at +// +// http://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, +// software distributed under the License is distributed on an +// "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY +// KIND, either express or implied. See the License for the +// specific language governing permissions and limitations +// under the License. + +//! GDAL Virtual File System (VSI) wrappers. + +use std::ffi::CString; + +use crate::errors::{GdalError, Result}; +use crate::gdal_api::{call_gdal_api, GdalApi}; + +/// Creates a new VSI in-memory file from a given buffer. +/// +/// The data is copied into GDAL-allocated memory (via `VSIMalloc`) so that +/// GDAL can safely free it with `VSIFree` when ownership is taken. +pub fn create_mem_file(api: &'static GdalApi, file_name: &str, data: Vec<u8>) -> Result<()> { + let c_file_name = CString::new(file_name)?; + let len = data.len(); + + // Allocate via GDAL's allocator so GDAL can safely free it. + let gdal_buf = unsafe { call_gdal_api!(api, VSIMalloc, len) } as *mut u8; + if gdal_buf.is_null() { + return Err(GdalError::NullPointer { + method_name: "VSIMalloc", + msg: format!("failed to allocate {len} bytes"), + }); + } + + // Copy data into GDAL-allocated buffer + unsafe { + std::ptr::copy_nonoverlapping(data.as_ptr(), gdal_buf, len); + } + // Rust Vec is dropped here, freeing the Rust-allocated memory. + + let handle = unsafe { + call_gdal_api!( + api, + VSIFileFromMemBuffer, + c_file_name.as_ptr(), + gdal_buf, + len as i64, + 1 // bTakeOwnership = true — GDAL will VSIFree gdal_buf + ) Review Comment: `create_mem_file` assumes `VSIMalloc(len)` returns non-null and treats null as an error. This will typically fail for `data.len() == 0` (allocators commonly return null for size 0) even though creating an empty `/vsimem/` file should be valid. Handle the zero-length case explicitly (skip allocation/copy and pass a null pointer with length 0), and consider using `i64::try_from(len)` instead of `len as i64` when calling `VSIFileFromMemBuffer`. ########## c/sedona-gdal/src/mem.rs: ########## @@ -0,0 +1,422 @@ +// Licensed to the Apache Software Foundation (ASF) under one +// or more contributor license agreements. See the NOTICE file +// distributed with this work for additional information +// regarding copyright ownership. The ASF licenses this file +// to you under the Apache License, Version 2.0 (the +// "License"); you may not use this file except in compliance +// with the License. You may obtain a copy of the License at +// +// http://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, +// software distributed under the License is distributed on an +// "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY +// KIND, either express or implied. See the License for the +// specific language governing permissions and limitations +// under the License. + +//! High-level builder for creating in-memory (MEM) GDAL datasets. +//! +//! [`MemDatasetBuilder`] provides a fluent, type-safe API for constructing GDAL MEM +//! datasets with zero-copy band attachment, optional geo-transform, projection, and +//! per-band nodata values. +//! +//! # Example +//! +//! ```rust,ignore +//! use sedona_gdal::mem::{MemDatasetBuilder, Nodata}; +//! use sedona_gdal::GdalDataType; +//! +//! let data: Vec<u8> = vec![0u8; 256 * 256]; +//! let dataset = unsafe { +//! MemDatasetBuilder::new(256, 256) +//! .add_band(GdalDataType::UInt8, data.as_ptr()) +//! .geo_transform([0.0, 1.0, 0.0, 0.0, 0.0, -1.0]) +//! .projection("EPSG:4326") +//! .build() +//! .unwrap() +//! }; +//! assert_eq!(dataset.raster_count(), 1); +//! ``` + +use crate::dataset::Dataset; +use crate::errors::{GdalError, Result}; +use crate::gdal_api::call_gdal_api; +use crate::gdal_dyn_bindgen::CE_Failure; +use crate::raster::types::GdalDataType; + +/// Nodata value for a raster band. +/// +/// GDAL has three separate APIs for setting nodata depending on the band data type: +/// - [`f64`] for most types (UInt8 through Float64, excluding Int64/UInt64) +/// - [`i64`] for Int64 bands +/// - [`u64`] for UInt64 bands +/// +/// This enum encapsulates all three variants so callers don't need to match on +/// the band type when setting nodata. +#[derive(Debug, Clone, Copy, PartialEq)] +pub enum Nodata { + F64(f64), + I64(i64), + U64(u64), +} + +/// A band specification for [`MemDatasetBuilder`]. +struct MemBand { + data_type: GdalDataType, + data_ptr: *const u8, + pixel_offset: Option<i64>, + line_offset: Option<i64>, + nodata: Option<Nodata>, +} + +/// A builder for constructing in-memory (MEM) GDAL datasets. +/// +/// This creates datasets using `MEMDataset::Create` (bypassing GDAL's open-dataset-list +/// mutex for better concurrency) and attaches bands via `GDALAddBand` with `DATAPOINTER` +/// options for zero-copy operation. +/// +/// # Safety +/// +/// All `add_band*` methods are `unsafe` because the caller must ensure that the +/// provided data pointers remain valid for the lifetime of the built [`Dataset`]. +pub struct MemDatasetBuilder { + width: usize, + height: usize, + n_owned_bands: usize, + owned_bands_data_type: Option<GdalDataType>, + bands: Vec<MemBand>, + geo_transform: Option<[f64; 6]>, + projection: Option<String>, +} + +impl MemDatasetBuilder { + /// Create a new builder for a MEM dataset with the given dimensions. + pub fn new(width: usize, height: usize) -> Self { + Self { + width, + height, + n_owned_bands: 0, + owned_bands_data_type: None, + bands: Vec::new(), + geo_transform: None, + projection: None, + } + } + + /// Create a new builder for a MEM dataset with the given dimensions and number of owned bands. + pub fn new_with_owned_bands( + width: usize, + height: usize, + n_owned_bands: usize, + owned_bands_data_type: GdalDataType, + ) -> Self { + Self { + width, + height, + n_owned_bands, + owned_bands_data_type: Some(owned_bands_data_type), + bands: Vec::new(), + geo_transform: None, + projection: None, + } + } + + /// Create a MEM dataset with owned bands + pub fn create( + width: usize, + height: usize, + n_owned_bands: usize, + owned_bands_data_type: GdalDataType, + ) -> Result<Dataset> { + unsafe { + Self::new_with_owned_bands(width, height, n_owned_bands, owned_bands_data_type).build() + } + } + + /// Add a zero-copy band from a raw data pointer. + /// + /// Uses default pixel and line offsets (contiguous, row-major layout). + /// + /// # Safety + /// + /// The caller must ensure `data_ptr` points to a valid buffer of at least + /// `height * width * data_type.byte_size()` bytes, and that the buffer + /// outlives the built [`Dataset`]. + pub unsafe fn add_band(self, data_type: GdalDataType, data_ptr: *const u8) -> Self { + self.add_band_with_options(data_type, data_ptr, None, None, None) + } + + /// Add a zero-copy band with custom offsets and optional nodata. + /// + /// # Arguments + /// * `data_type` - The GDAL data type of the band. + /// * `data_ptr` - Pointer to the band pixel data. + /// * `pixel_offset` - Byte offset between consecutive pixels. `None` defaults to + /// the byte size of `data_type`. + /// * `line_offset` - Byte offset between consecutive lines. `None` defaults to + /// `pixel_offset * width`. + /// * `nodata` - Optional nodata value for the band. + /// + /// # Safety + /// + /// The caller must ensure `data_ptr` points to a valid buffer of sufficient size + /// for the given dimensions and offsets, and that the buffer outlives the built + /// [`Dataset`]. + pub unsafe fn add_band_with_options( + mut self, + data_type: GdalDataType, + data_ptr: *const u8, + pixel_offset: Option<i64>, + line_offset: Option<i64>, + nodata: Option<Nodata>, + ) -> Self { + self.bands.push(MemBand { + data_type, + data_ptr, + pixel_offset, + line_offset, + nodata, + }); + self + } + + /// Set the geo-transform for the dataset. + /// + /// The array is `[origin_x, pixel_width, rotation_x, origin_y, rotation_y, pixel_height]`. + pub fn geo_transform(mut self, gt: [f64; 6]) -> Self { + self.geo_transform = Some(gt); + self + } + + /// Set the projection (CRS) for the dataset as a WKT or PROJ string. + pub fn projection(mut self, wkt: impl Into<String>) -> Self { + self.projection = Some(wkt.into()); + self + } + + /// Build the GDAL MEM dataset. + /// + /// This creates an empty MEM dataset using `MEMDataset::Create` (bypassing GDAL's + /// open-dataset-list), then attaches bands, sets the geo-transform, projection, + /// and per-band nodata values. + /// + /// # Safety + /// + /// This method is unsafe because the built dataset references memory provided via + /// the `add_band*` methods. The caller must ensure all data pointers remain valid + /// for the lifetime of the returned [`Dataset`]. + pub unsafe fn build(self) -> Result<Dataset> { + let api = crate::register::get_global_gdal_api() + .map_err(|e| GdalError::BadArgument(e.to_string()))?; + + // Create an initial MEM dataset via MEMDataset::Create directly, + // bypassing GDAL's open-dataset-list mutex. + let empty_filename = c""; + let owned_bands_data_type = self + .owned_bands_data_type + .unwrap_or(GdalDataType::UInt8) + .to_c(); + let handle = unsafe { + call_gdal_api!( + api, + MEMDatasetCreate, + empty_filename.as_ptr(), + self.width as i32, + self.height as i32, + self.n_owned_bands as i32, + owned_bands_data_type, + std::ptr::null_mut() + ) + }; + + if handle.is_null() { + return Err(api.last_cpl_err(CE_Failure as u32)); + } + let dataset = Dataset::new_owned(api, handle); + + // Attach bands (zero-copy via DATAPOINTER). + for band_spec in &self.bands { + dataset.add_band_with_data( + band_spec.data_type, + band_spec.data_ptr, + band_spec.pixel_offset, + band_spec.line_offset, + )?; + } + + // Set geo-transform. + if let Some(gt) = &self.geo_transform { + dataset.set_geo_transform(gt)?; + } + + // Set projection/CRS. + if let Some(proj) = &self.projection { + dataset.set_projection(proj)?; + } + + // Set per-band nodata values. + for (i, band_spec) in self.bands.iter().enumerate() { + if let Some(nodata) = &band_spec.nodata { + let raster_band = dataset.rasterband(i + 1)?; + match nodata { + Nodata::F64(v) => raster_band.set_no_data_value(Some(*v))?, + Nodata::I64(v) => raster_band.set_no_data_value_i64(Some(*v))?, + Nodata::U64(v) => raster_band.set_no_data_value_u64(Some(*v))?, + } + } + } Review Comment: `MemDatasetBuilder::build` applies per-band nodata to `dataset.rasterband(i + 1)`, but when `n_owned_bands > 0` the externally attached bands start after those owned bands. This will set nodata on the wrong bands (and skip nodata on some external bands). Use an index offset (e.g. `i + 1 + self.n_owned_bands`) or iterate over the actual band numbers returned by `add_band_with_data`. ########## c/sedona-gdal/src/mem.rs: ########## @@ -0,0 +1,422 @@ +// Licensed to the Apache Software Foundation (ASF) under one +// or more contributor license agreements. See the NOTICE file +// distributed with this work for additional information +// regarding copyright ownership. The ASF licenses this file +// to you under the Apache License, Version 2.0 (the +// "License"); you may not use this file except in compliance +// with the License. You may obtain a copy of the License at +// +// http://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, +// software distributed under the License is distributed on an +// "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY +// KIND, either express or implied. See the License for the +// specific language governing permissions and limitations +// under the License. + +//! High-level builder for creating in-memory (MEM) GDAL datasets. +//! +//! [`MemDatasetBuilder`] provides a fluent, type-safe API for constructing GDAL MEM +//! datasets with zero-copy band attachment, optional geo-transform, projection, and +//! per-band nodata values. +//! +//! # Example +//! +//! ```rust,ignore +//! use sedona_gdal::mem::{MemDatasetBuilder, Nodata}; +//! use sedona_gdal::GdalDataType; +//! +//! let data: Vec<u8> = vec![0u8; 256 * 256]; +//! let dataset = unsafe { +//! MemDatasetBuilder::new(256, 256) +//! .add_band(GdalDataType::UInt8, data.as_ptr()) +//! .geo_transform([0.0, 1.0, 0.0, 0.0, 0.0, -1.0]) +//! .projection("EPSG:4326") +//! .build() +//! .unwrap() +//! }; +//! assert_eq!(dataset.raster_count(), 1); +//! ``` + +use crate::dataset::Dataset; +use crate::errors::{GdalError, Result}; +use crate::gdal_api::call_gdal_api; +use crate::gdal_dyn_bindgen::CE_Failure; +use crate::raster::types::GdalDataType; + +/// Nodata value for a raster band. +/// +/// GDAL has three separate APIs for setting nodata depending on the band data type: +/// - [`f64`] for most types (UInt8 through Float64, excluding Int64/UInt64) +/// - [`i64`] for Int64 bands +/// - [`u64`] for UInt64 bands +/// +/// This enum encapsulates all three variants so callers don't need to match on +/// the band type when setting nodata. +#[derive(Debug, Clone, Copy, PartialEq)] +pub enum Nodata { + F64(f64), + I64(i64), + U64(u64), +} + +/// A band specification for [`MemDatasetBuilder`]. +struct MemBand { + data_type: GdalDataType, + data_ptr: *const u8, + pixel_offset: Option<i64>, + line_offset: Option<i64>, + nodata: Option<Nodata>, +} + +/// A builder for constructing in-memory (MEM) GDAL datasets. +/// +/// This creates datasets using `MEMDataset::Create` (bypassing GDAL's open-dataset-list +/// mutex for better concurrency) and attaches bands via `GDALAddBand` with `DATAPOINTER` +/// options for zero-copy operation. +/// +/// # Safety +/// +/// All `add_band*` methods are `unsafe` because the caller must ensure that the +/// provided data pointers remain valid for the lifetime of the built [`Dataset`]. +pub struct MemDatasetBuilder { + width: usize, + height: usize, + n_owned_bands: usize, + owned_bands_data_type: Option<GdalDataType>, + bands: Vec<MemBand>, + geo_transform: Option<[f64; 6]>, + projection: Option<String>, +} + +impl MemDatasetBuilder { + /// Create a new builder for a MEM dataset with the given dimensions. + pub fn new(width: usize, height: usize) -> Self { + Self { + width, + height, + n_owned_bands: 0, + owned_bands_data_type: None, + bands: Vec::new(), + geo_transform: None, + projection: None, + } + } + + /// Create a new builder for a MEM dataset with the given dimensions and number of owned bands. + pub fn new_with_owned_bands( + width: usize, + height: usize, + n_owned_bands: usize, + owned_bands_data_type: GdalDataType, + ) -> Self { + Self { + width, + height, + n_owned_bands, + owned_bands_data_type: Some(owned_bands_data_type), + bands: Vec::new(), + geo_transform: None, + projection: None, + } + } + + /// Create a MEM dataset with owned bands + pub fn create( + width: usize, + height: usize, + n_owned_bands: usize, + owned_bands_data_type: GdalDataType, + ) -> Result<Dataset> { + unsafe { + Self::new_with_owned_bands(width, height, n_owned_bands, owned_bands_data_type).build() + } + } + + /// Add a zero-copy band from a raw data pointer. + /// + /// Uses default pixel and line offsets (contiguous, row-major layout). + /// + /// # Safety + /// + /// The caller must ensure `data_ptr` points to a valid buffer of at least + /// `height * width * data_type.byte_size()` bytes, and that the buffer + /// outlives the built [`Dataset`]. + pub unsafe fn add_band(self, data_type: GdalDataType, data_ptr: *const u8) -> Self { + self.add_band_with_options(data_type, data_ptr, None, None, None) + } + + /// Add a zero-copy band with custom offsets and optional nodata. + /// + /// # Arguments + /// * `data_type` - The GDAL data type of the band. + /// * `data_ptr` - Pointer to the band pixel data. + /// * `pixel_offset` - Byte offset between consecutive pixels. `None` defaults to + /// the byte size of `data_type`. + /// * `line_offset` - Byte offset between consecutive lines. `None` defaults to + /// `pixel_offset * width`. + /// * `nodata` - Optional nodata value for the band. + /// + /// # Safety + /// + /// The caller must ensure `data_ptr` points to a valid buffer of sufficient size + /// for the given dimensions and offsets, and that the buffer outlives the built + /// [`Dataset`]. + pub unsafe fn add_band_with_options( + mut self, + data_type: GdalDataType, + data_ptr: *const u8, + pixel_offset: Option<i64>, + line_offset: Option<i64>, + nodata: Option<Nodata>, + ) -> Self { + self.bands.push(MemBand { + data_type, + data_ptr, + pixel_offset, + line_offset, + nodata, + }); + self + } + + /// Set the geo-transform for the dataset. + /// + /// The array is `[origin_x, pixel_width, rotation_x, origin_y, rotation_y, pixel_height]`. + pub fn geo_transform(mut self, gt: [f64; 6]) -> Self { + self.geo_transform = Some(gt); + self + } + + /// Set the projection (CRS) for the dataset as a WKT or PROJ string. + pub fn projection(mut self, wkt: impl Into<String>) -> Self { + self.projection = Some(wkt.into()); + self + } + + /// Build the GDAL MEM dataset. + /// + /// This creates an empty MEM dataset using `MEMDataset::Create` (bypassing GDAL's + /// open-dataset-list), then attaches bands, sets the geo-transform, projection, + /// and per-band nodata values. + /// + /// # Safety + /// + /// This method is unsafe because the built dataset references memory provided via + /// the `add_band*` methods. The caller must ensure all data pointers remain valid + /// for the lifetime of the returned [`Dataset`]. + pub unsafe fn build(self) -> Result<Dataset> { + let api = crate::register::get_global_gdal_api() + .map_err(|e| GdalError::BadArgument(e.to_string()))?; + + // Create an initial MEM dataset via MEMDataset::Create directly, + // bypassing GDAL's open-dataset-list mutex. + let empty_filename = c""; + let owned_bands_data_type = self + .owned_bands_data_type + .unwrap_or(GdalDataType::UInt8) + .to_c(); + let handle = unsafe { + call_gdal_api!( + api, + MEMDatasetCreate, + empty_filename.as_ptr(), + self.width as i32, + self.height as i32, + self.n_owned_bands as i32, Review Comment: `MemDatasetBuilder::build` casts `width/height/n_owned_bands` to `i32` with `as`, which will truncate for values > i32::MAX and can lead to UB or corrupted datasets at runtime. Use `try_into()` and return a `GdalError` on overflow (similar to other APIs in this crate). ```suggestion let width_i32: i32 = self.width.try_into().map_err(|_| { GdalError::BadArgument(format!( "MemDatasetBuilder width {} exceeds i32::MAX", self.width )) })?; let height_i32: i32 = self.height.try_into().map_err(|_| { GdalError::BadArgument(format!( "MemDatasetBuilder height {} exceeds i32::MAX", self.height )) })?; let n_owned_bands_i32: i32 = self.n_owned_bands.try_into().map_err(|_| { GdalError::BadArgument(format!( "MemDatasetBuilder n_owned_bands {} exceeds i32::MAX", self.n_owned_bands )) })?; let handle = unsafe { call_gdal_api!( api, MEMDatasetCreate, empty_filename.as_ptr(), width_i32, height_i32, n_owned_bands_i32, ``` ########## c/sedona-gdal/src/vector/layer.rs: ########## @@ -0,0 +1,114 @@ +// Licensed to the Apache Software Foundation (ASF) under one +// or more contributor license agreements. See the NOTICE file +// distributed with this work for additional information +// regarding copyright ownership. The ASF licenses this file +// to you under the Apache License, Version 2.0 (the +// "License"); you may not use this file except in compliance +// with the License. You may obtain a copy of the License at +// +// http://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, +// software distributed under the License is distributed on an +// "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY +// KIND, either express or implied. See the License for the +// specific language governing permissions and limitations +// under the License. + +use std::marker::PhantomData; + +use crate::dataset::Dataset; +use crate::errors::{GdalError, Result}; +use crate::gdal_api::{call_gdal_api, GdalApi}; +use crate::gdal_dyn_bindgen::*; +use crate::vector::feature::{Feature, FieldDefn}; + +/// An OGR layer (borrowed from a Dataset). +pub struct Layer<'a> { + api: &'static GdalApi, + c_layer: OGRLayerH, + _dataset: PhantomData<&'a Dataset>, +} + +impl<'a> Layer<'a> { + pub(crate) fn new(api: &'static GdalApi, c_layer: OGRLayerH, _dataset: &'a Dataset) -> Self { + Self { + api, + c_layer, + _dataset: PhantomData, + } + } + + /// Return the raw C layer handle. + pub fn c_layer(&self) -> OGRLayerH { + self.c_layer + } + + /// Reset reading to the first feature. + pub fn reset_reading(&self) { + unsafe { call_gdal_api!(self.api, OGR_L_ResetReading, self.c_layer) }; + } + + /// Get the next feature (returns None when exhausted). + pub fn next_feature(&self) -> Option<Feature<'_>> { + let c_feature = unsafe { call_gdal_api!(self.api, OGR_L_GetNextFeature, self.c_layer) }; + if c_feature.is_null() { + None + } else { + Some(Feature::new(self.api, c_feature)) + } + } + + /// Create a field on this layer. + pub fn create_field(&self, field_defn: &FieldDefn) -> Result<()> { + let rv = unsafe { + call_gdal_api!( + self.api, + OGR_L_CreateField, + self.c_layer, + field_defn.c_field_defn(), + 1 // bApproxOK + ) + }; + if rv != OGRERR_NONE { + return Err(GdalError::OgrError { + err: rv, + method_name: "OGR_L_CreateField", + }); + } + Ok(()) + } + + /// Get the number of features in this layer. + /// + /// If `force` is true, the count will be computed even if it is expensive. + pub fn feature_count(&self, force: bool) -> i64 { + unsafe { + call_gdal_api!( + self.api, + OGR_L_GetFeatureCount, + self.c_layer, + if force { 1 } else { 0 } + ) + } + } + + /// Iterate over all features. + pub fn features(&self) -> FeatureIterator<'_> { + self.reset_reading(); + FeatureIterator { layer: self } + } +} + +/// Iterator over features in a layer. +pub struct FeatureIterator<'a> { + layer: &'a Layer<'a>, +} + +impl<'a> Iterator for FeatureIterator<'a> { + type Item = Feature<'a>; + + fn next(&mut self) -> Option<Self::Item> { + self.layer.next_feature() + } Review Comment: `FeatureIterator` declares `type Item = Feature<'a>` but `next()` delegates to `Layer::next_feature()` which currently returns `Option<Feature<'_>>`. With the current signatures this will not typecheck. Align the lifetimes by updating `next_feature()` (and/or `Feature`) so the iterator can return `Feature<'a>` consistently. ########## c/sedona-gdal/src/vrt.rs: ########## @@ -0,0 +1,324 @@ +// Licensed to the Apache Software Foundation (ASF) under one +// or more contributor license agreements. See the NOTICE file +// distributed with this work for additional information +// regarding copyright ownership. The ASF licenses this file +// to you under the Apache License, Version 2.0 (the +// "License"); you may not use this file except in compliance +// with the License. You may obtain a copy of the License at +// +// http://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, +// software distributed under the License is distributed on an +// "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY +// KIND, either express or implied. See the License for the +// specific language governing permissions and limitations +// under the License. + +//! GDAL VRT (Virtual Raster) API wrappers. + +use std::ffi::CString; +use std::ops::{Deref, DerefMut}; +use std::ptr::null_mut; + +use crate::cpl::CslStringList; +use crate::dataset::Dataset; +use crate::errors::{GdalError, Result}; +use crate::gdal_api::{call_gdal_api, GdalApi}; +use crate::raster::RasterBand; +use crate::{gdal_dyn_bindgen::*, GdalDataType}; + +/// Special value indicating that nodata is not set for a VRT source. +/// Matches `VRT_NODATA_UNSET` from GDAL's `gdal_vrt.h`. +pub const NODATA_UNSET: f64 = -1234.56; + +/// A VRT (Virtual Raster) dataset. +pub struct VrtDataset { + dataset: Dataset, +} + +unsafe impl Send for VrtDataset {} + +impl VrtDataset { + /// Create a new empty VRT dataset with the given dimensions. + pub fn create(api: &'static GdalApi, x_size: usize, y_size: usize) -> Result<Self> { + let x: i32 = x_size.try_into()?; + let y: i32 = y_size.try_into()?; + let c_dataset = unsafe { call_gdal_api!(api, VRTCreate, x, y) }; + + if c_dataset.is_null() { + return Err(GdalError::NullPointer { + method_name: "VRTCreate", + msg: String::new(), + }); + } + + Ok(VrtDataset { + dataset: Dataset::new_owned(api, c_dataset), + }) + } + + /// Consume this VRT and return the underlying `Dataset`, transferring ownership. + pub fn as_dataset(self) -> Dataset { + let VrtDataset { dataset } = self; + dataset + } + + /// Add a new band to the VRT dataset. + /// + /// Returns the 1-based index of the newly created band on success. + pub fn add_band(&mut self, data_type: GdalDataType, options: Option<&[&str]>) -> Result<usize> { + let csl = CslStringList::try_from_iter(options.unwrap_or(&[]).iter().copied())?; + + // Preserve null semantics: pass null when no options given. + let opts_ptr = if csl.is_empty() { + null_mut() + } else { + csl.as_ptr() + }; + + let rv = unsafe { + call_gdal_api!( + self.dataset.api(), + GDALAddBand, + self.dataset.c_dataset(), + data_type.to_c(), + opts_ptr + ) + }; + + if rv != CE_None { + return Err(self.dataset.api().last_cpl_err(rv as u32)); + } + + Ok(self.raster_count()) + } + + /// Fetch a band from the VRT dataset as a `VrtRasterBand`. + pub fn rasterband(&self, band_index: usize) -> Result<VrtRasterBand<'_>> { + let band = self.dataset.rasterband(band_index)?; + Ok(VrtRasterBand { band }) + } +} + +impl Deref for VrtDataset { + type Target = Dataset; + + fn deref(&self) -> &Self::Target { + &self.dataset + } +} + +impl DerefMut for VrtDataset { + fn deref_mut(&mut self) -> &mut Self::Target { + &mut self.dataset + } +} + +impl AsRef<Dataset> for VrtDataset { + fn as_ref(&self) -> &Dataset { + &self.dataset + } +} + +/// A raster band within a VRT dataset. +pub struct VrtRasterBand<'a> { + band: RasterBand<'a>, +} + +impl<'a> VrtRasterBand<'a> { + /// Returns the raw GDAL raster band handle. + pub fn c_rasterband(&self) -> GDALRasterBandH { + self.band.c_rasterband() + } + + /// Adds a simple source to this VRT band. + /// + /// # Arguments + /// * `source_band` - The source raster band to read from + /// * `src_window` - Source window as `(x_offset, y_offset, x_size, y_size)` in pixels + /// * `dst_window` - Destination window as `(x_offset, y_offset, x_size, y_size)` in pixels + /// * `resampling` - Optional resampling method (e.g. "near", "bilinear", "cubic"). + /// * `nodata` - Optional nodata value for the source. If None, uses `NODATA_UNSET`. + pub fn add_simple_source( + &self, + source_band: &RasterBand<'a>, + src_window: (i32, i32, i32, i32), + dst_window: (i32, i32, i32, i32), + resampling: Option<&str>, + nodata: Option<f64>, + ) -> Result<()> { + let c_resampling = resampling.and_then(|s| CString::new(s).ok()); + + let resampling_ptr = c_resampling + .as_ref() + .map(|s| s.as_ptr()) + .unwrap_or(null_mut()); Review Comment: `resampling.and_then(|s| CString::new(s).ok())` silently drops invalid resampling strings containing NUL bytes and ends up passing a null pointer to GDAL. This should return an error instead of changing behavior. Use a fallible conversion (e.g. `CString::new(s)?`) and propagate the error via `Result`. -- This is an automated message from the Apache Git Service. To respond to the message, please log on to GitHub and use the URL above to go to the specific comment. To unsubscribe, e-mail: [email protected] For queries about this service, please contact Infrastructure at: [email protected]
