This is an automated email from the ASF dual-hosted git repository. desruisseaux pushed a commit to branch main in repository https://gitbox.apache.org/repos/asf/sis-site.git
commit f3e04ec8101161a732a9d999508bed45d7bed297 Author: Martin Desruisseaux <martin.desruisse...@geomatys.com> AuthorDate: Tue Apr 18 15:30:06 2023 +0200 Add more "how to" examples for raster data. Add clarification about pixel coordinates and which exceptions are thrown. --- content/howto/_index.md | 2 + content/howto/datalake_to_datacube.md | 129 +++++++++++++++++++++ content/howto/parallel_computation.md | 60 ++++++++++ .../howto/raster_values_at_pixel_coordinates.md | 7 +- content/howto/read_netcdf.md | 2 +- content/howto/resample_raster.md | 2 - 6 files changed, 198 insertions(+), 4 deletions(-) diff --git a/content/howto/_index.md b/content/howto/_index.md index 6292d3f8..35b52100 100644 --- a/content/howto/_index.md +++ b/content/howto/_index.md @@ -16,6 +16,8 @@ The examples are grouped in the following sections: * [Get raster values at geographic coordinates](howto/raster_values_at_geographic_coordinates.html) * [Handle rasters bigger than memory](howto/rasters_bigger_than_memory.html) * [Resample a raster](howto/resample_raster.html) +* [Parallel computation](howto/parallel_computation.html) +* [From data lake to data cube](howto/datalake_to_datacube.html) * [Write a raster to a file](howto/write_raster.html) diff --git a/content/howto/datalake_to_datacube.md b/content/howto/datalake_to_datacube.md new file mode 100644 index 00000000..7a0b489a --- /dev/null +++ b/content/howto/datalake_to_datacube.md @@ -0,0 +1,129 @@ +--- +title: From data lake to data cube +--- + +This example opens a few files where each file represent a slice in a data cube. +Then the slices are aggregated together in a single multi-dimensional data cube. +For example each file may be a raster representing Sea Surface Temperature (SST) at a specific day, +and those files can be a aggregated in a single three-dimensional raster with a temporal dimension. + +A current limitation is that each slice must have the same number of dimensions than the data cube. +For the example of SST raster for a specific day, the raster CRS must still have a temporal axis +even if the grid contains only one cell in the temporal dimension. +A future Apache SIS version will provide methods for adding dimensions. + +This example assumes that all slice have the same size, resolution and coordinate reference system. +If this is not the case, the aggregation will still work but instead of producing a data cube, +it may produce an `Aggregate` resource containing a tree like below: + +``` +Root aggregate +├─ All coverages with same sample dimensions #1 +│ └─ ... +└─ All coverages with same sample dimensions #2 + ├─ Coverages with equivalent reference systems #1 + │ └─ ... + └─ Coverages with equivalent reference systems #2 + ├─ Slices with compatible "grid to CRS" #1 + ├─ Slices with compatible "grid to CRS" #2 + └─ ...</pre> +``` + +A future Apache SIS version will provide methods for controlling the way to aggregate +such heterogeneous data set. + +This example works with `Resource` instances, which are not necessarily data loaded in memory. +Consequently the `DataStore` instances must be kept open for all the duration of data cube usage. + + +# Direct dependencies + +Maven coordinates | Module info | Remarks +----------------------------------- | ------------------------------- | -------------------- +`org.apache.sis.code:sis-feature` | `org.apache.sis.feature` | +`org.apache.sis.storage:sis-netcdf` | `org.apache.sis.storage.netcdf` | +`edu.ucar:cdm-core` | | For netCDF-4 or HDF5 + +The `cdm-core` dependency can be omitted for netCDF-3 (a.k.a. "classic"), + + +# Code example + +The file name and geospatial coordinates in following code need to be updated for yours data. + +{{< highlight java >}} +import java.io.File; +import org.apache.sis.storage.DataStore; +import org.apache.sis.storage.DataStores; +import org.apache.sis.storage.DataStoreException; +import org.apache.sis.storage.GridCoverageResource; +import org.apache.sis.storage.aggregate.CoverageAggregator; + +public class DataLakeToDataCube { + /** + * Demo entry point. + * + * @param args ignored. + * @throws DataStoreException if an error occurred while reading the raster. + */ + public static void main(String[] args) throws DataStoreException { + try (DataStore s1 = DataStores.open(new File("CMEMS_20220301.nc")); + DataStore s2 = DataStores.open(new File("CMEMS_20220302.nc")); + DataStore s3 = DataStores.open(new File("CMEMS_20220303.nc"))) + { + /* + * Following casts are okay if we know that the resources are raster data, + * all of them having the same grid geometry. Otherwise the code can still + * work but would require more `if (x instanceof Y)` checks. + */ + var r1 = (GridCoverageResource) s1.findResource("sea_surface_height_above_geoid"); + var r2 = (GridCoverageResource) s2.findResource("sea_surface_height_above_geoid"); + var r3 = (GridCoverageResource) s3.findResource("sea_surface_height_above_geoid"); + + System.out.printf("Extent of first set of slices:%n%s%n", r1.getGridGeometry().getExtent()); + System.out.printf("Extent of second set of slices:%n%s%n", r2.getGridGeometry().getExtent()); + System.out.printf("Extent of third set of slices:%n%s%n", r3.getGridGeometry().getExtent()); + + var aggregator = new CoverageAggregator(null); + aggregator.add(r1); + aggregator.add(r2); + aggregator.add(r3); + var dataCube = (GridCoverageResource) aggregator.build(); + /* + * From this point, the data cube can be used as a three-dimension grid coverage. + * See "Get raster values at geographic (or pixel) coordinates" for usage examples. + * However all usages must be done inside this `try` block. + */ + System.out.printf("Extent of the data cube:%n%s%n", dataCube.getGridGeometry().getExtent()); + } + } +} +{{< / highlight >}} + + +# Output + +The output depends on the raster data and the locale. +Below is an example: + +``` +Extent of first set of slices: +Column: [0 … 864] (865 cells) +Row: [0 … 1080] (1081 cells) +Time: [0 … 95] (96 cells) + +Extent of second set of slices: +Column: [0 … 864] (865 cells) +Row: [0 … 1080] (1081 cells) +Time: [0 … 95] (96 cells) + +Extent of third set of slices: +Column: [0 … 864] (865 cells) +Row: [0 … 1080] (1081 cells) +Time: [0 … 95] (96 cells) + +Extent of the data cube: +Column: [0 … 864] (865 cells) +Row: [0 … 1080] (1081 cells) +Time: [0 … 287] (288 cells) +``` diff --git a/content/howto/parallel_computation.md b/content/howto/parallel_computation.md new file mode 100644 index 00000000..1de672a1 --- /dev/null +++ b/content/howto/parallel_computation.md @@ -0,0 +1,60 @@ +--- +title: Parallel computation +--- + +Some grid coverages will read or compute chunks of data only when first requested. +For example when a coverage is the [result of a reprojection](resample_raster.html), +or when a big coverage [uses deferred tile reading](rasters_bigger_than_memory.html). +However if tiles are always requested in the same thread, +it will result in a sequential, mono-threaded computation. +Furthermore it may cause a lot of seek or "HTTP range" operations if tiles are read in random order. +For parallel computation using all available processors, +or for more efficient read operations, +we need to inform Apache SIS in advance about which pixels are about to be requested. + + +# Direct dependencies + +Maven coordinates | Module info | Remarks +--------------------------------- | ------------------------ | ------- +`org.apache.sis.code:sis-feature` | `org.apache.sis.feature` | + + +# Code example + +{{< highlight java >}} +import java.awt.image.ImagingOpException; +import java.awt.image.RenderedImage; +import org.apache.sis.coverage.grid.GridCoverage; +import org.apache.sis.image.ImageProcessor; + +public class ParallelTileComputation { + /** + * Demo entry point. + * + * @param args ignored. + * @throws ImagingOpException unchecked exception thrown if an error occurred while computing a tile. + */ + public static void main(String[] args) { + GridCoverage coverage = ...; // See "Resample a raster" code example. + /* + * Get all data from the coverage, assuming that the grid is two-dimensional. + * If there is three or more dimensions, the null value needs to be replaced + * by a `GridExtent` specifying the two-dimensional slice to fetch. + */ + RenderedImage data = coverage.render(null); + /* + * With above `RenderedImage`, tiles are computed when first requested and cached for future uses. + * If all tiles will be requested in the same thread, it results in a sequential tile computation. + * For parallel computation using all available processors, we need to inform Apache SIS in advance + * about which pixels will be requested. The `null` argument below means "all pixels in the image". + * Don't use that null argument if the image is very big! + */ + var processor = new ImageProcessor(); + data = processor.prefetch(data, null); + /* + * See for example "Get raster values at pixel coordinates" for using this image. + */ + } +} +{{< / highlight >}} diff --git a/content/howto/raster_values_at_pixel_coordinates.md b/content/howto/raster_values_at_pixel_coordinates.md index b77a9143..d73968ed 100644 --- a/content/howto/raster_values_at_pixel_coordinates.md +++ b/content/howto/raster_values_at_pixel_coordinates.md @@ -14,6 +14,11 @@ but provide a _transfer function_ for converting those integers to "real world" Apache SIS can provide either the original integers or the converted values, at user's choice. This choice is specified by the boolean argument in the `data.forConvertedValues(…)` call. +Note that pixel coordinates are relative to the request made in the call to `render(…)`. +They are not directly the grid coordinates of the coverage. +The use of relative coordinates makes possible to avoid 32 bits integer overflow, +and is also convenient for working on an area of interest regardless the grid coverage origin. + # Direct dependencies @@ -74,7 +79,7 @@ public class RasterValuesAtPixelCoordinates { System.out.printf("Value at (%d,%d) is %g %s.%n", pos.x, pos.y, value, unit); if (--n == 0) break; } - pit.moveTo(100, 200); + pit.moveTo(100, 200); // Relative to `extent` low coordinates. float value = pit.getSampleFloat(band); System.out.printf("Value at (100,200) is %g %s.%n", value, unit); } diff --git a/content/howto/read_netcdf.md b/content/howto/read_netcdf.md index 691b51db..d30150c0 100644 --- a/content/howto/read_netcdf.md +++ b/content/howto/read_netcdf.md @@ -61,7 +61,7 @@ public class ReadNetCDF { */ public static GridCoverage example() throws DataStoreException { GridCoverage data; - try (DataStore store = DataStores.open(new File("CMEMS_R20220516.nc"))) { + try (DataStore store = DataStores.open(new File("CMEMS_20220516.nc"))) { /* * See what is inside this file. One of the components listed * below can be given in argument to `findResource(String)`. diff --git a/content/howto/resample_raster.md b/content/howto/resample_raster.md index 9c322953..8e498b1f 100644 --- a/content/howto/resample_raster.md +++ b/content/howto/resample_raster.md @@ -28,7 +28,6 @@ for example using Well Known Text (WKT) format. The file name in following code need to be updated for yours data. {{< highlight java >}} -import java.awt.image.ImagingOpException; import org.apache.sis.coverage.grid.GridCoverage; import org.apache.sis.coverage.grid.GridCoverageProcessor; import org.apache.sis.image.Interpolation; @@ -43,7 +42,6 @@ public class ResampleRaster { * @param args ignored. * @throws FactoryException if an error occurred while creating the Coordinate Reference System (CRS). * @throws TransformException if an error occurred while transforming coordinates to the target CRS. - * @throws ImagingOpException unchecked exception thrown if an error occurred while resampling a tile. */ public static void main(String[] args) throws FactoryException, TransformException { GridCoverage data = ...; // See "Read netCDF" or "Read GeoTIFF" code examples.