This is an automated email from the ASF dual-hosted git repository. desruisseaux pushed a commit to branch geoapi-4.0 in repository https://gitbox.apache.org/repos/asf/sis.git
commit 0f1c4f6b9a557d584b44f1ca6970eb422f50347d Author: Martin Desruisseaux <martin.desruisse...@geomatys.com> AuthorDate: Fri Jan 20 18:03:44 2023 +0100 When reading consecutive tiles in a GeoTIFF file, use a single HTTP request for all contiguous tiles instead of creating a new connection unconditionally for each tile. --- .../org/apache/sis/util/collection/RangeSet.java | 45 ++++++++++-- .../apache/sis/util/collection/RangeSetTest.java | 51 ++++++++++--- .../storage/inflater/CompressionChannel.java | 2 +- .../org/apache/sis/storage/geotiff/DataSubset.java | 45 ++++++++++-- .../sis/internal/storage/io/ChannelDataInput.java | 16 ++-- .../internal/storage/io/FileCacheByteChannel.java | 85 ++++++++++++++++------ .../storage/io/FileCacheByteChannelTest.java | 2 +- 7 files changed, 194 insertions(+), 52 deletions(-) diff --git a/core/sis-utility/src/main/java/org/apache/sis/util/collection/RangeSet.java b/core/sis-utility/src/main/java/org/apache/sis/util/collection/RangeSet.java index 1bbf68e843..62b0ad35bf 100644 --- a/core/sis-utility/src/main/java/org/apache/sis/util/collection/RangeSet.java +++ b/core/sis-utility/src/main/java/org/apache/sis/util/collection/RangeSet.java @@ -331,7 +331,7 @@ public class RangeSet<E extends Comparable<? super E>> extends AbstractSet<Range @Override public int size() { assert (length & 1) == 0; // Length must be even. - return length >>> 1; + return length >> 1; } /** @@ -1407,7 +1407,40 @@ public class RangeSet<E extends Comparable<? super E>> extends AbstractSet<Range // The value is equal to an excluded endpoint. return -1; } - return index >>> 1; // Round toward 0 (odd index are maximum values). + return index >> 1; // Round toward 0 (odd index are maximum values). + } + + /** + * Returns the index of the range having a minimum value equal or lower than the specified value. + * If the given value is lower than the minimal value of all ranges in this set, + * then this method returns -1. + * + * @param value the minimum value to search, ignoring inclusiveness/exclusiveness. + * @return index of the range having a minimum value equal or lower than the specified value. May be -1. + * + * @since 1.4 + */ + public int indexOfMin(final E value) { + int index = binarySearch(value, 0, length); + if (index < 0) index = ~index - 1; + return index >> 1; // Not >>> because we need to preserve the sign. + } + + /** + * Returns the index of the range having a maximum value equal or greater than the specified value. + * If the given value is greater than the maximal value of all ranges in this set, + * then this method returns {@link #size()}. + * + * @param value the maximum value to search, ignoring inclusiveness/exclusiveness. + * @return index of the range having a maximum value equal or greater than the specified value. + * May be {@link #size()}. + * + * @since 1.4 + */ + public int indexOfMax(final E value) { + int index = binarySearch(value, 0, length); + if (index < 0) index = ~index; + return index >> 1; } /** @@ -1422,7 +1455,7 @@ public class RangeSet<E extends Comparable<? super E>> extends AbstractSet<Range * @throws ClassCastException if range elements are not convertible to {@code long}. */ public long getMinLong(final int index) throws IndexOutOfBoundsException, ClassCastException { - return Array.getLong(array, Objects.checkIndex(index, length >>> 1) << 1); + return Array.getLong(array, Objects.checkIndex(index, length >> 1) << 1); } /** @@ -1439,7 +1472,7 @@ public class RangeSet<E extends Comparable<? super E>> extends AbstractSet<Range * @see org.apache.sis.measure.NumberRange#getMinDouble() */ public double getMinDouble(final int index) throws IndexOutOfBoundsException, ClassCastException { - return Array.getDouble(array, Objects.checkIndex(index, length >>> 1) << 1); + return Array.getDouble(array, Objects.checkIndex(index, length >> 1) << 1); } /** @@ -1454,7 +1487,7 @@ public class RangeSet<E extends Comparable<? super E>> extends AbstractSet<Range * @throws ClassCastException if range elements are not convertible to {@code long}. */ public long getMaxLong(final int index) throws IndexOutOfBoundsException, ClassCastException { - return Array.getLong(array, (Objects.checkIndex(index, length >>> 1) << 1) | 1); + return Array.getLong(array, (Objects.checkIndex(index, length >> 1) << 1) | 1); } /** @@ -1471,7 +1504,7 @@ public class RangeSet<E extends Comparable<? super E>> extends AbstractSet<Range * @see org.apache.sis.measure.NumberRange#getMaxDouble() */ public double getMaxDouble(final int index) throws IndexOutOfBoundsException, ClassCastException { - return Array.getDouble(array, (Objects.checkIndex(index, length >>> 1) << 1) | 1); + return Array.getDouble(array, (Objects.checkIndex(index, length >> 1) << 1) | 1); } /** diff --git a/core/sis-utility/src/test/java/org/apache/sis/util/collection/RangeSetTest.java b/core/sis-utility/src/test/java/org/apache/sis/util/collection/RangeSetTest.java index 8643d991cc..3a0955298f 100644 --- a/core/sis-utility/src/test/java/org/apache/sis/util/collection/RangeSetTest.java +++ b/core/sis-utility/src/test/java/org/apache/sis/util/collection/RangeSetTest.java @@ -43,7 +43,7 @@ import static org.apache.sis.internal.util.StandardDateFormat.NANOS_PER_SECOND; * * @author Martin Desruisseaux (Geomatys) * @author Rémi Maréchal (Geomatys) - * @version 0.5 + * @version 1.4 * @since 0.3 */ @DependsOn(org.apache.sis.measure.RangeTest.class) @@ -225,6 +225,8 @@ public final class RangeSetTest extends TestCase { /** * Tests the {@link RangeSet#indexOfRange(Comparable)} method. + * Opportunistically tests {@link RangeSet#indexOfMin(Comparable)} + * and {@link RangeSet#indexOfMax(Comparable)} methods as well. */ @Test public void testIndexOfRange() { @@ -234,14 +236,45 @@ public final class RangeSetTest extends TestCase { assertTrue(ranges.add(-20, -10)); assertTrue(ranges.add( 60, 70)); assertTrue(ranges.add( -5, 25)); - assertEquals( 0, ranges.indexOfRange(-15)); - assertEquals( 1, ranges.indexOfRange( 20)); - assertEquals( 2, ranges.indexOfRange( 28)); - assertEquals( 3, ranges.indexOfRange( 49)); - assertEquals( 4, ranges.indexOfRange( 69)); - assertEquals(-1, ranges.indexOfRange( 70)); - assertEquals(-1, ranges.indexOfRange( 26)); - assertEquals(-1, ranges.indexOfRange(-30)); + verifyIndexOf(ranges, -15, 0, 0, 0); + verifyIndexOf(ranges, 20, 1, 1, 1); + verifyIndexOf(ranges, 28, 2, 2, 2); + verifyIndexOf(ranges, 49, 3, 3, 3); + verifyIndexOf(ranges, 69, 4, 4, 4); + verifyIndexOf(ranges, 70, -1, 4, 4); + verifyIndexOf(ranges, 100, -1, 4, 5); + verifyIndexOf(ranges, 60, 4, 4, 4); + verifyIndexOf(ranges, 59, -1, 3, 4); + verifyIndexOf(ranges, 26, -1, 1, 2); + verifyIndexOf(ranges, -30, -1, -1, 0); + verifyIndexOf(ranges, -20, 0, 0, 0); + verifyIndexOf(ranges, -21, -1, -1, 0); + } + + /** + * Verifies the result of calling an {@code indexOf(…)} method. + * + * @param ranges the ranges where to search. + * @param value the value to search. + * @param index expected result of {@code indedOfRange(…)}. + * @param min expected result of {@code indedOfMin(…)}. + * @param max expected result of {@code indedOfMax(…)}. + */ + private static void verifyIndexOf(final RangeSet<Integer> ranges, + final int value, final int index, final int min, final int max) + { + assertEquals(index, ranges.indexOfRange(value)); + assertEquals(min, ranges.indexOfMin (value)); + assertEquals(max, ranges.indexOfMax (value)); + if (index >= 0) { + assertTrue(value >= ranges.getMinLong(index)); + assertTrue(value < ranges.getMaxLong(index)); + } + final int s = ranges.size(); + if (min >= 0) assertTrue(value >= ranges.getMinLong(min )); + if (min+1 < s) assertTrue(value < ranges.getMinLong(min+1)); + if (max < s) assertTrue(value <= ranges.getMaxLong(max )); + if (max > 0) assertTrue(value > ranges.getMaxLong(max-1)); } /** diff --git a/storage/sis-geotiff/src/main/java/org/apache/sis/internal/storage/inflater/CompressionChannel.java b/storage/sis-geotiff/src/main/java/org/apache/sis/internal/storage/inflater/CompressionChannel.java index bef43cb3d0..1fd76a6d98 100644 --- a/storage/sis-geotiff/src/main/java/org/apache/sis/internal/storage/inflater/CompressionChannel.java +++ b/storage/sis-geotiff/src/main/java/org/apache/sis/internal/storage/inflater/CompressionChannel.java @@ -84,7 +84,7 @@ abstract class CompressionChannel extends PixelChannel { public void setInputRegion(final long start, final long byteCount) throws IOException { endPosition = Math.addExact(start, byteCount); input.seek(start); - input.endOfInterest(endPosition); + input.rangeOfInterest(start, endPosition); } /** diff --git a/storage/sis-geotiff/src/main/java/org/apache/sis/storage/geotiff/DataSubset.java b/storage/sis-geotiff/src/main/java/org/apache/sis/storage/geotiff/DataSubset.java index 021c793d8e..fa795ad802 100644 --- a/storage/sis-geotiff/src/main/java/org/apache/sis/storage/geotiff/DataSubset.java +++ b/storage/sis-geotiff/src/main/java/org/apache/sis/storage/geotiff/DataSubset.java @@ -29,6 +29,7 @@ import org.opengis.util.GenericName; import org.apache.sis.image.DataType; import org.apache.sis.storage.DataStoreException; import org.apache.sis.storage.DataStoreContentException; +import org.apache.sis.internal.util.Numerics; import org.apache.sis.internal.storage.io.Region; import org.apache.sis.internal.storage.io.HyperRectangleReader; import org.apache.sis.internal.storage.TiledGridCoverage; @@ -66,7 +67,7 @@ import static java.lang.Math.toIntExact; * the same tile indices than {@link DataCube} in order to avoid integer overflow. * * @author Martin Desruisseaux (Geomatys) - * @version 1.3 + * @version 1.4 * @since 1.1 */ class DataSubset extends TiledGridCoverage implements Localized { @@ -248,7 +249,10 @@ class DataSubset extends TiledGridCoverage implements Localized { /** * Stores information about a tile to be loaded. * - * @param iterator the iterator for which to create a snapshot of its current position. + * @param domain the iterator for which to create a snapshot of its current position. + * @param tileOffsets the {@link DataSubset#tileOffsets} vector. + * @param includedBanks indices of banks to read, or {@code null} for reading all of them. + * @param numTiles value of {@link DataSubset#numTiles} (total number of tiles in the image). */ Tile(final AOI domain, final Vector tileOffsets, final int[] includedBanks, final int numTiles) { super(domain); @@ -259,6 +263,24 @@ class DataSubset extends TiledGridCoverage implements Localized { byteOffset = tileOffsets.longValue(p); } + /** + * Notifies the input channel about the range of bytes that we are going to read. + * + * @param tileOffsets the {@link DataSubset#tileOffsets} vector. + * @param tileByteCounts the {@link DataSubset#tileByteCounts} vector. + * @param b indices of banks to read. + * @param numTiles value of {@link DataSubset#numTiles} (total number of tiles in the image). + * @param input the input to notify about the ranges of bytes to read. + */ + final void notifyInputChannel(final Vector tileOffsets, final Vector tileByteCounts, + int b, final int numTiles, final ChannelDataInput input) + { + b = indexInTileVector + b * numTiles; + final long offset = tileOffsets.longValue(b); + final long length = tileByteCounts.longValue(b); + input.rangeOfInterest(offset, Numerics.saturatingAdd(offset, length)); + } + /** * Copies {@link #tileOffsets} or {@link #tileByteCounts} values into the given target array. * Values for different planes ("banks" in Java2D terminology) are packed as consecutive values @@ -308,6 +330,7 @@ class DataSubset extends TiledGridCoverage implements Localized { * Each tile will either store all sample values in an interleaved fashion inside a single bank * (`sourcePixelStride` > 1) or use one separated bank per band (`sourcePixelStride` == 1). */ + final ChannelDataInput input = source.reader.input; final int[] includedBanks = (sourcePixelStride == 1) ? includedBands : null; final Raster[] result = new Raster[iterator.tileCountInQuery]; final Tile[] missings = new Tile[iterator.tileCountInQuery]; @@ -319,8 +342,20 @@ class DataSubset extends TiledGridCoverage implements Localized { if (tile != null) { result[iterator.getIndexInResultArray()] = tile; } else { - // Tile not yet loaded. Add to a queue of tiles to load later. - missings[numMissings++] = new Tile(iterator, tileOffsets, includedBanks, numTiles); + /* + * Tile not yet loaded. Add to a queue of tiles to load later. + * Notify the input channel about the ranges of bytes to read. + * This notification is redundant with the same notification + * done in `CompressionChannel.setInputRegion(…)`, but doing + * all notifications in advance gives a chance to group ranges. + */ + final Tile missing = new Tile(iterator, tileOffsets, includedBanks, numTiles); + missings[numMissings++] = missing; + if (includedBanks == null) { + missing.notifyInputChannel(tileOffsets, tileByteCounts, 0, numTiles, input); + } else for (int b : includedBanks) { + missing.notifyInputChannel(tileOffsets, tileByteCounts, b, numTiles, input); + } } } while (iterator.next()); if (numMissings != 0) { @@ -339,7 +374,7 @@ class DataSubset extends TiledGridCoverage implements Localized { final Point origin = new Point(); final long[] offsets = new long[numBanks]; final long[] byteCounts = new long[numBanks]; - try (Closeable c = createInflater()) { + try (Closeable finisher = createInflater()) { for (int i=0; i<numMissings; i++) { final Tile tile = missings[i]; if (tile.getRegionInsideTile(lower, upper, subsampling, BIDIMENSIONAL)) { diff --git a/storage/sis-storage/src/main/java/org/apache/sis/internal/storage/io/ChannelDataInput.java b/storage/sis-storage/src/main/java/org/apache/sis/internal/storage/io/ChannelDataInput.java index 538ed96daf..40eb30dbe9 100644 --- a/storage/sis-storage/src/main/java/org/apache/sis/internal/storage/io/ChannelDataInput.java +++ b/storage/sis-storage/src/main/java/org/apache/sis/internal/storage/io/ChannelDataInput.java @@ -949,18 +949,18 @@ public class ChannelDataInput extends ChannelData { } /** - * Specifies the position after the last byte which is expected to be read. - * The number of bytes is only a hint and may be ignored, depending on the channel. + * Specifies a range of bytes which is expected to be read. + * The range of bytes is only a hint and may be ignored, depending on subclasses. * Reading more bytes than specified is okay, only potentially less efficient. - * Values ≤ {@linkplain #position() position} means to read until the end of stream. * - * @param position position after the last desired byte, - * or a value ≤ current position for reading until the end of stream. + * @param lower position (inclusive) of the first byte to be requested. + * @param upper position (exclusive) of the last byte to be requested. */ - public final void endOfInterest(final long position) { + public final void rangeOfInterest(long lower, long upper) { if (channel instanceof FileCacheByteChannel) { - ((FileCacheByteChannel) channel).endOfInterest(position + channelOffset); - // Overflow is okay as value ≤ position means "read until end of stream". + lower = Math.addExact(lower, channelOffset); + upper = Math.addExact(upper, channelOffset); + ((FileCacheByteChannel) channel).rangeOfInterest(lower, upper); } } diff --git a/storage/sis-storage/src/main/java/org/apache/sis/internal/storage/io/FileCacheByteChannel.java b/storage/sis-storage/src/main/java/org/apache/sis/internal/storage/io/FileCacheByteChannel.java index 5557d651ac..544532dedb 100644 --- a/storage/sis-storage/src/main/java/org/apache/sis/internal/storage/io/FileCacheByteChannel.java +++ b/storage/sis-storage/src/main/java/org/apache/sis/internal/storage/io/FileCacheByteChannel.java @@ -47,7 +47,7 @@ import static org.apache.sis.internal.storage.StoreUtilities.LOGGER; * * <ul> * <li>Bytes read from the input stream are cached in a temporary file for making backward seeks possible.</li> - * <li>The number of bytes of interest {@linkplain #endOfInterest(long) can be specified}. + * <li>The range of bytes of interest {@linkplain #rangeOfInterest(long, long) can be specified}. * It makes possible to specify the range of bytes to download with HTTP connections.</li> * <li>This implementation is thread-safe.</li> * <li>Current implementation is read-only.</li> @@ -193,7 +193,7 @@ public abstract class FileCacheByteChannel implements SeekableByteChannel { * @param start position of the first byte to read (inclusive). * @param end position of the last byte to read with the returned stream (inclusive), * or {@link Long#MAX_VALUE} for end of stream. - * @return + * @return the "Range" value to put in an HTTP header. */ public static String formatRange(final long start, final long end) { final boolean hasEnd = (end > start) && (end != Long.MAX_VALUE); @@ -261,12 +261,13 @@ public abstract class FileCacheByteChannel implements SeekableByteChannel { private long position; /** - * Position after the last requested byte, or ≤ {@linkplain #position} if unknown. - * It can be used for specifying the range of bytes to download from an HTTP connection. + * Ranges of requested bytes, for choosing the ranges to request in new connections. + * Ranges are added by calls to {@link #rangeOfInterest(long, long)} and removed + * when the connection is created. * - * @see #endOfInterest(long) + * @see #rangeOfInterest(long, long) */ - private long endOfInterest; + private final RangeSet<Long> rangesOfInterest; /** * Ranges of bytes in the {@linkplain #file} where data are valid. @@ -288,6 +289,7 @@ public abstract class FileCacheByteChannel implements SeekableByteChannel { * @throws IOException if the temporary file cannot be created. */ protected FileCacheByteChannel(final String prefix) throws IOException { + rangesOfInterest = RangeSet.create(Long.class, true, false); rangesOfAvailableBytes = RangeSet.create(Long.class, true, false); file = FileChannel.open(Files.createTempFile(prefix, null), StandardOpenOption.READ, @@ -386,42 +388,79 @@ public abstract class FileCacheByteChannel implements SeekableByteChannel { ArgumentChecks.ensurePositive("newPosition", newPosition); } position = newPosition; - if (endOfInterest - newPosition < SKIP_THRESHOLD) { - endOfInterest = 0; // Read until end of stream. - } return this; } /** - * Specifies the position after the last byte which is expected to be read. - * The number of bytes is only a hint and may be ignored, depending on subclasses. + * Specifies a range of bytes which is expected to be read. + * The range of bytes is only a hint and may be ignored, depending on subclasses. * Reading more bytes than specified is okay, only potentially less efficient. - * Values ≤ {@linkplain #position() position} means to read until the end of stream. * - * @param end position after the last desired byte, or a value ≤ position for reading until the end of stream. + * @param lower position (inclusive) of the first byte to be requested. + * @param upper position (exclusive) of the last byte to be requested. */ - final synchronized void endOfInterest(final long end) { - endOfInterest = end; + final synchronized void rangeOfInterest(final long lower, final long upper) { + if (upper > lower) { + rangesOfInterest.add(lower, upper); + } } /** * Opens a connection on the range of bytes determined by the current channel position. - * The {@link #endOfInterest} position is considered unspecified if not greater than - * {@link #position} (it may be 0). + * The range of bytes of interest is specified in the {@link #rangesOfInterest} set. + * If no range is specified, this method requests all bytes until the end of stream. + * If some ranges are specified, this method finds the smallest "end of range" after + * the current position. If the gab between ranges is less than {@link #SKIP_THRESHOLD}, + * the ranges will be merged in a single request. * * @return the opened connection (never {@code null}). * @throws IOException if the connection cannot be established. */ private Connection openConnection() throws IOException { - long end = endOfInterest; - if (end > position) end--; // Make inclusive. - else end = (length > 0) ? length-1 : Long.MAX_VALUE; - var c = openConnection(position, end); + int i = Math.max(rangesOfInterest.indexOfMin(position), 0); + final int size = rangesOfInterest.size(); + long end; + do { // Should be executed exactly 1 or 2 times. + if (i >= size) { + end = (length > 0) ? length-1 : Long.MAX_VALUE; + break; + } + end = rangesOfInterest.getMaxLong(i) - 1; // Inclusive + i++; + } while (end < position); + /* + * At this point we found the smallest "end of range" position. + * If the gab with next range is small enough, merge the ranges + * in order to make a single connection request. + */ + while (i < size) { + if (rangesOfInterest.getMinLong(i) - end >= SKIP_THRESHOLD) { + break; + } + end = rangesOfInterest.getMaxLong(i) - 1; // Inclusive + i++; + } + /* + * Send the HTTP or S3 request for the range of bytes. + * Prepare the cache file to receive those bytes. + * Save the stream length if it is known. + */ + final Connection c = openConnection(position, end); file.position(c.start); if (c.length >= 0) { length = c.length; } connection = c; // Set only on success. + /* + * Remove the requested range from the list of ranges of interest. + * The range to remove is determined on the assumption that caller + * makes a best effort for reading bytes in sequential order, and + * that if the connection provides less bytes, the missing bytes + * will probably be requested later. + */ + end = Math.min(c.end, end); + if (end != Long.MAX_VALUE) end++; // Make exclusive. + rangesOfInterest.remove(Math.min(position, c.start), end); return c; } @@ -763,6 +802,8 @@ public abstract class FileCacheByteChannel implements SeekableByteChannel { */ @Override public synchronized String toString() { - return Strings.toString(getClass(), "filename", filename(), "position", position, "rangeCount", rangesOfAvailableBytes.size()); + return Strings.toString(getClass(), "filename", filename(), "position", position, + "rangesOfAvailableBytes", rangesOfAvailableBytes.size(), + "rangesOfInterest", rangesOfInterest.size()); } } diff --git a/storage/sis-storage/src/test/java/org/apache/sis/internal/storage/io/FileCacheByteChannelTest.java b/storage/sis-storage/src/test/java/org/apache/sis/internal/storage/io/FileCacheByteChannelTest.java index fc7c8ad3c9..5da0e3dce7 100644 --- a/storage/sis-storage/src/test/java/org/apache/sis/internal/storage/io/FileCacheByteChannelTest.java +++ b/storage/sis-storage/src/test/java/org/apache/sis/internal/storage/io/FileCacheByteChannelTest.java @@ -181,7 +181,7 @@ public final class FileCacheByteChannelTest extends TestCase { end = t; } channel.position(position); - channel.endOfInterest(end + 1); + channel.rangeOfInterest(position, end + 1); } channel.readInRandomRegion(buffer); while (buffer.hasRemaining()) {