This is an automated email from the ASF dual-hosted git repository. desruisseaux pushed a commit to branch geoapi-4.0 in repository https://gitbox.apache.org/repos/asf/sis.git
The following commit(s) were added to refs/heads/geoapi-4.0 by this push: new 54f97965a8 Allow HTTP connection from an URI with cache in a temporary file. 54f97965a8 is described below commit 54f97965a83d6952c27e36cc696172973fae802d Author: Martin Desruisseaux <martin.desruisse...@geomatys.com> AuthorDate: Fri Dec 23 15:55:16 2022 +0100 Allow HTTP connection from an URI with cache in a temporary file. --- .../apache/sis/cloud/aws/s3/CachedByteChannel.java | 16 +-- .../apache/sis/internal/jaxb/referencing/Code.java | 2 +- .../apache/sis/test/integration/MetadataTest.java | 5 +- .../org/apache/sis/internal/util/Constants.java | 6 +- .../apache/sis/internal/util/DefinitionURI.java | 6 +- .../apache/sis/storage/netcdf/MetadataReader.java | 2 +- .../org/apache/sis/internal/storage/CodeType.java | 9 +- .../sis/internal/storage/io/ChannelFactory.java | 12 +- .../internal/storage/io/FileCacheByteChannel.java | 73 ++++++++---- .../sis/internal/storage/io/HttpByteChannel.java | 130 +++++++++++++++++++++ .../sis/internal/storage/io/IOUtilities.java | 13 +++ 11 files changed, 224 insertions(+), 50 deletions(-) diff --git a/cloud/sis-cloud-aws/src/main/java/org/apache/sis/cloud/aws/s3/CachedByteChannel.java b/cloud/sis-cloud-aws/src/main/java/org/apache/sis/cloud/aws/s3/CachedByteChannel.java index dc7800160a..4bb3ae9cf1 100644 --- a/cloud/sis-cloud-aws/src/main/java/org/apache/sis/cloud/aws/s3/CachedByteChannel.java +++ b/cloud/sis-cloud-aws/src/main/java/org/apache/sis/cloud/aws/s3/CachedByteChannel.java @@ -70,19 +70,15 @@ final class CachedByteChannel extends FileCacheByteChannel { * @return contains the input stream providing the bytes to read starting at the given start position. */ @Override - protected Connection openConnection(long start, long end) throws IOException { + protected Connection openConnection(final long start, final long end) throws IOException { final ResponseInputStream<GetObjectResponse> stream; final String contentRange, acceptRanges; final Long contentLength; try { GetObjectRequest.Builder builder = GetObjectRequest.builder().bucket(path.bucket).key(path.key); - final boolean hasEnd = (end > start) && (end != Long.MAX_VALUE); - if (start != 0 || hasEnd) { - final StringBuilder range = new StringBuilder(RANGES_UNIT).append('=').append(start); - if (hasEnd) { - range.append('-').append(end); // Inclusive. - } - builder = builder.range(range.toString()); + final String range = Connection.formatRange(start, end); + if (range != null) { + builder = builder.range(range); } stream = path.fs.client().getObject(builder.build()); final GetObjectResponse response = stream.response(); @@ -94,9 +90,7 @@ final class CachedByteChannel extends FileCacheByteChannel { } final List<String> rangeUnits = (acceptRanges != null) ? List.of(acceptRanges) : List.of(); final long length = (contentLength != null) ? contentLength : -1; - if (contentRange == null) { - return new Connection(stream, 0, (length < 0) ? Long.MAX_VALUE : length, length, Connection.acceptRanges(rangeUnits)); - } else try { + try { return new Connection(stream, contentRange, length, rangeUnits); } catch (IllegalArgumentException e) { throw new IOException(e); diff --git a/core/sis-referencing/src/main/java/org/apache/sis/internal/jaxb/referencing/Code.java b/core/sis-referencing/src/main/java/org/apache/sis/internal/jaxb/referencing/Code.java index 48a5d53ad2..e096f6d20c 100644 --- a/core/sis-referencing/src/main/java/org/apache/sis/internal/jaxb/referencing/Code.java +++ b/core/sis-referencing/src/main/java/org/apache/sis/internal/jaxb/referencing/Code.java @@ -176,7 +176,7 @@ public final class Code { return new Code(identifier); } if (!isHTTP) { - isHTTP = code.regionMatches(true, 0, Constants.HTTP, 0, 5); + isHTTP = code.regionMatches(true, 0, Constants.HTTP + ':', 0, 5); if (isHTTP) { fallback = identifier; } else if (!isEPSG) { diff --git a/core/sis-referencing/src/test/java/org/apache/sis/test/integration/MetadataTest.java b/core/sis-referencing/src/test/java/org/apache/sis/test/integration/MetadataTest.java index 5538a1dfb0..1d3950bedc 100644 --- a/core/sis-referencing/src/test/java/org/apache/sis/test/integration/MetadataTest.java +++ b/core/sis-referencing/src/test/java/org/apache/sis/test/integration/MetadataTest.java @@ -57,6 +57,7 @@ import org.apache.sis.internal.jaxb.metadata.replace.ReferenceSystemMetadata; import org.apache.sis.internal.xml.LegacyNamespaces; import org.apache.sis.internal.jaxb.gcx.Anchor; import org.apache.sis.internal.system.Loggers; +import org.apache.sis.internal.util.Constants; import org.apache.sis.util.SimpleInternationalString; import org.apache.sis.util.ComparisonMode; import org.apache.sis.xml.Namespaces; @@ -136,7 +137,7 @@ public final strictfp class MetadataTest extends TestCase { final Anchor country = new Anchor(URI.create("SDN:C320:2:FR"), "France"); // Non-public SIS class. { final DefaultOnlineResource online = new DefaultOnlineResource(URI.create("http://www.ifremer.fr/sismer/")); - online.setProtocol("http"); + online.setProtocol(Constants.HTTP); final DefaultContact contact = new DefaultContact(online); contact.getIdentifierMap().putSpecialized(IdentifierSpace.ID, "IFREMER"); contact.setPhones(List.of( @@ -166,7 +167,7 @@ public final strictfp class MetadataTest extends TestCase { @SuppressWarnings("deprecation") final DefaultResponsibility originator = new DefaultResponsibleParty(Role.ORIGINATOR); final DefaultOnlineResource online = new DefaultOnlineResource(URI.create("http://www.com.univ-mrs.fr/LOB/")); - online.setProtocol("http"); + online.setProtocol(Constants.HTTP); final DefaultContact contact = new DefaultContact(online); contact.setPhones(List.of( new DefaultTelephone("+33 (0)4 xx.xx.xx.x5", TelephoneType.VOICE), diff --git a/core/sis-utility/src/main/java/org/apache/sis/internal/util/Constants.java b/core/sis-utility/src/main/java/org/apache/sis/internal/util/Constants.java index 82a2c9e042..9a38995c0a 100644 --- a/core/sis-utility/src/main/java/org/apache/sis/internal/util/Constants.java +++ b/core/sis-utility/src/main/java/org/apache/sis/internal/util/Constants.java @@ -31,7 +31,7 @@ import org.apache.sis.util.Static; * creates itself the instance to be tested. * * @author Martin Desruisseaux (Geomatys) - * @version 1.3 + * @version 1.4 * @since 0.5 * @module */ @@ -53,9 +53,9 @@ public final class Constants extends Static { public static final byte DEFAULT_INDENTATION = 2; /** - * The {@value} protocol. Used in XML namespaces. + * The {@value} protocol. */ - public static final String HTTP = "http:"; + public static final String HTTP = "http", HTTPS = "https"; /** * The {@value} code space. diff --git a/core/sis-utility/src/main/java/org/apache/sis/internal/util/DefinitionURI.java b/core/sis-utility/src/main/java/org/apache/sis/internal/util/DefinitionURI.java index 5129f2865e..54a23c4e3e 100644 --- a/core/sis-utility/src/main/java/org/apache/sis/internal/util/DefinitionURI.java +++ b/core/sis-utility/src/main/java/org/apache/sis/internal/util/DefinitionURI.java @@ -329,7 +329,7 @@ public final class DefinitionURI { * in addition to "ogc" in URN. */ case 0: { - if (regionMatches("http", uri, lower, upper)) { + if (regionMatches(Constants.HTTP, uri, lower, upper)) { result = new DefinitionURI(); result.isHTTP = true; if (codeForGML(null, null, uri, ++upper, result) != null) { @@ -651,7 +651,7 @@ public final class DefinitionURI { if (isGML) { final String path = PATHS.get(type); if (path != null) { - return Constants.HTTP + path + authority + ".xml#" + code; + return Constants.HTTP + ':' + path + authority + ".xml#" + code; } } final StringBuilder buffer = new StringBuilder(40); @@ -671,7 +671,7 @@ public final class DefinitionURI { */ private void appendStringTo(final StringBuilder buffer, char separator) { if (isHTTP) { - buffer.append(Constants.HTTP + "//").append(DOMAIN).append("/def"); + buffer.append(Constants.HTTP + "://").append(DOMAIN).append("/def"); separator = '/'; } int n = 4; diff --git a/storage/sis-netcdf/src/main/java/org/apache/sis/storage/netcdf/MetadataReader.java b/storage/sis-netcdf/src/main/java/org/apache/sis/storage/netcdf/MetadataReader.java index 55385907ce..4ee557155e 100644 --- a/storage/sis-netcdf/src/main/java/org/apache/sis/storage/netcdf/MetadataReader.java +++ b/storage/sis-netcdf/src/main/java/org/apache/sis/storage/netcdf/MetadataReader.java @@ -386,7 +386,7 @@ split: while ((start = CharSequences.skipLeadingWhitespaces(value, start, lengt final DefaultOnlineResource resource = new DefaultOnlineResource(uri); final String protocol = uri.getScheme(); resource.setProtocol(protocol); - if ("http".equalsIgnoreCase(protocol) || "https".equalsIgnoreCase(protocol)) { + if (IOUtilities.isHTTP(protocol)) { resource.setApplicationProfile("web browser"); } resource.setFunction(OnLineFunction.INFORMATION); diff --git a/storage/sis-storage/src/main/java/org/apache/sis/internal/storage/CodeType.java b/storage/sis-storage/src/main/java/org/apache/sis/internal/storage/CodeType.java index 419d6fbf53..9e05c61d74 100644 --- a/storage/sis-storage/src/main/java/org/apache/sis/internal/storage/CodeType.java +++ b/storage/sis-storage/src/main/java/org/apache/sis/internal/storage/CodeType.java @@ -21,6 +21,7 @@ import java.util.Map; import java.util.HashMap; import java.util.Locale; import org.apache.sis.util.CharSequences; +import org.apache.sis.internal.util.Constants; import org.apache.sis.internal.util.DefinitionURI; @@ -94,10 +95,10 @@ public enum CodeType { private static final Map<String,CodeType> FOR_PROTOCOL; static { FOR_PROTOCOL = new HashMap<>(); - FOR_PROTOCOL.put("urn", CodeType.URN); - FOR_PROTOCOL.put("http", CodeType.HTTP_OGC); // Will actually need verification. - FOR_PROTOCOL.put("https", CodeType.HTTP_OGC); // Will actually need verification. - FOR_PROTOCOL.put("shttp", CodeType.HTTP_OGC); // Not widely used but nevertheless exist. + FOR_PROTOCOL.put("urn", CodeType.URN); + FOR_PROTOCOL.put(Constants.HTTP, CodeType.HTTP_OGC); // Will actually need verification. + FOR_PROTOCOL.put(Constants.HTTPS, CodeType.HTTP_OGC); // Will actually need verification. + FOR_PROTOCOL.put("shttp", CodeType.HTTP_OGC); // Not widely used but nevertheless exist. for (final String p : new String[] {"cvs", "dav", "file", "ftp", "git", "jar", "nfs", "sftp", "ssh", "svn"}) { if (FOR_PROTOCOL.put(p, CodeType.URL) != null) { throw new AssertionError(p); diff --git a/storage/sis-storage/src/main/java/org/apache/sis/internal/storage/io/ChannelFactory.java b/storage/sis-storage/src/main/java/org/apache/sis/internal/storage/io/ChannelFactory.java index 943a8fe371..bec2f32eb7 100644 --- a/storage/sis-storage/src/main/java/org/apache/sis/internal/storage/io/ChannelFactory.java +++ b/storage/sis-storage/src/main/java/org/apache/sis/internal/storage/io/ChannelFactory.java @@ -69,7 +69,7 @@ import org.apache.sis.storage.event.StoreListeners; * * @author Martin Desruisseaux (Geomatys) * @author Johann Sorel (Geomatys) - * @version 1.2 + * @version 1.4 * @since 0.8 * @module */ @@ -224,6 +224,16 @@ public abstract class ChannelFactory { * so we are better to check now and provide a more appropriate exception for this method. */ throw new IOException(Resources.format(Resources.Keys.MissingSchemeInURI_1, uri)); + } + if (IOUtilities.isHTTP(uri.getScheme())) { + return new ChannelFactory(false) { + @Override public ReadableByteChannel readable(String filename, StoreListeners listeners) throws IOException { + return new HttpByteChannel(filename, uri); + } + @Override public WritableByteChannel writable(String filename, StoreListeners listeners) throws IOException { + return Channels.newChannel(uri.toURL().openConnection().getOutputStream()); + } + }; } else try { storage = Paths.get(uri); } catch (IllegalArgumentException | FileSystemNotFoundException e) { diff --git a/storage/sis-storage/src/main/java/org/apache/sis/internal/storage/io/FileCacheByteChannel.java b/storage/sis-storage/src/main/java/org/apache/sis/internal/storage/io/FileCacheByteChannel.java index 6ca122e7bd..092fd52bc0 100644 --- a/storage/sis-storage/src/main/java/org/apache/sis/internal/storage/io/FileCacheByteChannel.java +++ b/storage/sis-storage/src/main/java/org/apache/sis/internal/storage/io/FileCacheByteChannel.java @@ -73,11 +73,6 @@ public abstract class FileCacheByteChannel implements SeekableByteChannel { */ static final int SKIP_THRESHOLD = 64 * 1024; - /** - * The unit of ranges used in HTTP connections. - */ - protected static final String RANGES_UNIT = "bytes"; - /** * Number of nanoseconds to wait before to close an inactive connection. */ @@ -88,6 +83,9 @@ public abstract class FileCacheByteChannel implements SeekableByteChannel { * This is the return value of {@link #openConnection(long, long)}. */ protected static final class Connection extends org.apache.sis.internal.jdk17.Record { + /** The unit of ranges used in HTTP connections. */ + private static final String RANGES_UNIT = "bytes"; + /** The input stream for reading the bytes. */ final InputStream input; @@ -127,29 +125,35 @@ public abstract class FileCacheByteChannel implements SeekableByteChannel { * Example: "Content-Range: bytes 25000-75000/100000". * * @param input the input stream for reading the bytes. - * @param contentRange value of "Content-Range" in HTTP header. + * @param contentRange value of "Content-Range" in HTTP header, or {@code null} if none. * @param acceptRanges value of "Accept-Ranges" in HTTP header. * @param contentLength total length of the stream, or -1 if unknown. - * @throws IllegalArgumentException if the start, end of length cannot be parsed. + * @throws IllegalArgumentException if the start, end or length cannot be parsed. */ public Connection(final InputStream input, String contentRange, long contentLength, final Iterable<String> acceptRanges) { this.input = input; - contentRange = contentRange.trim(); - int s = contentRange.indexOf(' '); - if (s >= 0 && (s != RANGES_UNIT.length() || !contentRange.regionMatches(true, 0, RANGES_UNIT, 0, s))) { - throw new IllegalArgumentException(Errors.format(Errors.Keys.UnsupportedArgumentValue_1, contentRange)); - } - int rs = contentRange.indexOf('-', ++s); // Index of range separator. - int ls = contentRange.indexOf('/', Math.max(s, rs+1)); // Index of length separator. - if (contentLength < 0 && ls >= 0) { - final String t = contentRange.substring(ls+1).trim(); - if (!t.equals("*")) contentLength = Long.parseLong(t); + if (contentRange == null) { + start = 0; + end = (contentLength > 0) ? contentLength - 1 : Long.MAX_VALUE; + length = contentLength; + } else { + contentRange = contentRange.trim(); + int s = contentRange.indexOf(' '); + if (s >= 0 && (s != RANGES_UNIT.length() || !contentRange.regionMatches(true, 0, RANGES_UNIT, 0, s))) { + throw new IllegalArgumentException(Errors.format(Errors.Keys.UnsupportedArgumentValue_1, contentRange)); + } + int rs = contentRange.indexOf('-', ++s); // Index of range separator. + int ls = contentRange.indexOf('/', Math.max(s, rs+1)); // Index of length separator. + if (contentLength < 0 && ls >= 0) { + final String t = contentRange.substring(ls+1).trim(); + if (!t.equals("*")) contentLength = Long.parseLong(t); + } + length = contentLength; + if (ls < 0) ls = contentRange.length(); + if (rs < 0) rs = ls; + start = Long.parseLong(contentRange.substring(s, rs).trim()); + end = (rs < ls) ? Long.parseLong(contentRange.substring(rs+1, ls).trim()) : length; } - length = contentLength; - if (ls < 0) ls = contentRange.length(); - if (rs < 0) rs = ls; - start = Long.parseLong(contentRange.substring(s, rs).trim()); - end = (rs < ls) ? Long.parseLong(contentRange.substring(rs+1, ls).trim()) : length; this.acceptRanges = acceptRanges(acceptRanges); } @@ -159,7 +163,7 @@ public abstract class FileCacheByteChannel implements SeekableByteChannel { * @param values HTTP header value for "Accept-Ranges". * @return whether the values contains at least one "bytes" string. */ - public static boolean acceptRanges(final Iterable<String> values) { + private static boolean acceptRanges(final Iterable<String> values) { for (final String t : values) { if (ArraysExt.containsIgnoreCase((String[]) CharSequences.split(t, ','), RANGES_UNIT)) { return true; @@ -168,12 +172,33 @@ public abstract class FileCacheByteChannel implements SeekableByteChannel { return false; } + /** + * Formats the "Range" value to send in an HTTP header for the specified range of bytes. + * This is a helper method for {@link #openConnection(long, long)} implementations. + * + * @param start position of the first byte to read (inclusive). + * @param end position of the last byte to read with the returned stream (inclusive), + * or {@link Long#MAX_VALUE} for end of stream. + * @return + */ + public static String formatRange(final long start, final long end) { + final boolean hasEnd = (end > start) && (end != Long.MAX_VALUE); + if (start == 0 && !hasEnd) { + return null; + } + final StringBuilder range = new StringBuilder(RANGES_UNIT).append('=').append(start).append('-'); + if (hasEnd) { + range.append(end); // Inclusive. + } + return range.toString(); + } + /** * Returns a string representation for debugging purposes. */ @Override public String toString() { - return Strings.toString(getClass(), "start", start, "end", end); + return Strings.toString(getClass(), null, formatRange(start, end)); } } diff --git a/storage/sis-storage/src/main/java/org/apache/sis/internal/storage/io/HttpByteChannel.java b/storage/sis-storage/src/main/java/org/apache/sis/internal/storage/io/HttpByteChannel.java new file mode 100644 index 0000000000..fad086563a --- /dev/null +++ b/storage/sis-storage/src/main/java/org/apache/sis/internal/storage/io/HttpByteChannel.java @@ -0,0 +1,130 @@ +/* + * Licensed to the Apache Software Foundation (ASF) under one or more + * contributor license agreements. See the NOTICE file distributed with + * this work for additional information regarding copyright ownership. + * The ASF licenses this file to You under the Apache License, Version 2.0 + * (the "License"); you may not use this file except in compliance with + * the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ +package org.apache.sis.internal.storage.io; + +import java.net.URI; +import java.net.http.HttpClient; +import java.net.http.HttpHeaders; +import java.net.http.HttpRequest; +import java.net.http.HttpResponse; +import java.io.InputStream; +import java.io.IOException; +import java.lang.ref.WeakReference; +import java.util.List; + + +/** + * A seekable byte channel on a HTTP connection. + * This implementation use HTTP range for reading bytes at an arbitrary position. + * A temporary file is used for caching the bytes that have been read. + * + * @author Alexis Manin (Geomatys) + * @author Martin Desruisseaux (Geomatys) + * @version 1.4 + * @since 1.4 + * @module + */ +final class HttpByteChannel extends FileCacheByteChannel { + /** + * Data store name to report in case of failure. + */ + private final String filename; + + /** + * The request to be sent to the client, without "Range" header. + * This builder contains the {@linkplain #path} to the file. + */ + private final HttpRequest.Builder request; + + /** + * The client where to send HTTP requests. + */ + private final HttpClient client; + + /** + * The singleton client used for HTTP connections. + */ + private static WeakReference<HttpClient> sharedClient; + + /** + * Gets or create the singleton client used for HTTP connections. + */ + private static synchronized HttpClient sharedClient() { + if (sharedClient != null) { + HttpClient client = sharedClient.get(); + if (client != null) return client; + } + HttpClient client = HttpClient.newHttpClient(); + sharedClient = new WeakReference<>(client); + return client; + } + + /** + * Creates a new channel for a file at the given URI. + * + * @param name data store name to report in case of failure. + * @param path URL to the file to read. + * @throws IOException if the temporary file can not be created. + */ + public HttpByteChannel(final String name, final URI path) throws IOException { + super("http-"); + filename = name; + request = HttpRequest.newBuilder(path); + client = sharedClient(); + } + + /** + * Returns the data store name to report in case of failure. + */ + @Override + protected String filename() { + return filename; + } + + /** + * Creates an input stream which provides the bytes to read starting at the specified position. + * + * @param start position of the first byte to read (inclusive). + * @param end position of the last byte to read with the returned stream (inclusive), + * or {@link Long#MAX_VALUE} for end of stream. + * @return contains the input stream providing the bytes to read starting at the given start position. + */ + @Override + protected Connection openConnection(final long start, final long end) throws IOException { + HttpRequest.Builder r = request; + String range = Connection.formatRange(start, end); + if (range != null) { + r = r.copy().setHeader("Range", range); + } + final HttpResponse<InputStream> response; + try { + response = client.send(r.build(), HttpResponse.BodyHandlers.ofInputStream()); + } catch (InterruptedException e) { + throw new IOException(e); + } + final InputStream stream = response.body(); + final HttpHeaders headers = response.headers(); + range = headers.firstValue("Content-Range").orElse(null); + final List<String> rangeUnits = headers.allValues("Accept-Ranges"); + try { + final long length = headers.firstValueAsLong("Content-Length").orElse(-1); + return new Connection(stream, range, length, rangeUnits); + } catch (IllegalArgumentException e) { + throw new IOException(e); + } + } +} diff --git a/storage/sis-storage/src/main/java/org/apache/sis/internal/storage/io/IOUtilities.java b/storage/sis-storage/src/main/java/org/apache/sis/internal/storage/io/IOUtilities.java index 60616119ad..8f613a9d1e 100644 --- a/storage/sis-storage/src/main/java/org/apache/sis/internal/storage/io/IOUtilities.java +++ b/storage/sis-storage/src/main/java/org/apache/sis/internal/storage/io/IOUtilities.java @@ -45,6 +45,7 @@ import org.apache.sis.util.CharSequences; import org.apache.sis.util.Exceptions; import org.apache.sis.util.Static; import org.apache.sis.util.resources.Errors; +import org.apache.sis.internal.util.Constants; import org.apache.sis.internal.storage.Resources; @@ -673,6 +674,18 @@ public final class IOUtilities extends Static { return isWrite & (!isRead | truncate); } + /** + * Returns {@code true} if the given protocol is "http" or "https". + * The comparison is case-insensitive. + * + * @param protocol the protocol to test. + * @return whether the given protocol is HTTP(S). + */ + public static boolean isHTTP(final String protocol) { + return Constants.HTTP .equalsIgnoreCase(protocol) + || Constants.HTTPS.equalsIgnoreCase(protocol); + } + /** * Reads the next character as an Unicode code point. Unless end-of-file has been reached, the returned value is * between {@value java.lang.Character#MIN_CODE_POINT} and {@value java.lang.Character#MAX_CODE_POINT} inclusive.