This is an automated email from the ASF dual-hosted git repository. desruisseaux pushed a commit to branch geoapi-4.0 in repository https://gitbox.apache.org/repos/asf/sis.git
commit 758fb1bd1386454f38672eb8c2d5dcd395a873ea Author: Martin Desruisseaux <[email protected]> AuthorDate: Mon Jan 14 19:38:12 2019 +0100 More lenient parsing of time-related units, in order to avoid parsing errors with some netCDF files. --- .../sis/internal/util/StandardDateFormat.java | 73 +++++++++++++++------- .../org/apache/sis/measure/UnitAliases.properties | 2 + .../sis/internal/util/StandardDateFormatTest.java | 7 ++- .../java/org/apache/sis/measure/UnitsTest.java | 12 ++++ .../apache/sis/storage/netcdf/GridResource.java | 6 +- .../apache/sis/internal/netcdf/VariableTest.java | 37 ++++++++++- 6 files changed, 111 insertions(+), 26 deletions(-) diff --git a/core/sis-utility/src/main/java/org/apache/sis/internal/util/StandardDateFormat.java b/core/sis-utility/src/main/java/org/apache/sis/internal/util/StandardDateFormat.java index c90a4da..56dd8a1 100644 --- a/core/sis-utility/src/main/java/org/apache/sis/internal/util/StandardDateFormat.java +++ b/core/sis-utility/src/main/java/org/apache/sis/internal/util/StandardDateFormat.java @@ -85,11 +85,12 @@ public final class StandardDateFormat extends DateFormat { /** * The thread-safe instance to use for reading and formatting dates. - * Only the year is mandatory, all other fields are optional. + * Only the year is mandatory, all other fields are optional at parsing time. + * However all fields are written, including milliseconds at formatting time. */ public static final DateTimeFormatter FORMAT = new DateTimeFormatterBuilder() - // parseLenient() is for allowing fields with one digit instead of two. - .parseLenient() .appendValue(ChronoField.YEAR, 4, 5, SignStyle.NORMAL) // Proleptic year (use negative number if needed). + .parseLenient() // For allowing fields with one digit instead of two. + .parseCaseInsensitive() .appendValue(ChronoField.YEAR, 4, 5, SignStyle.NORMAL) // Proleptic year (use negative number if needed). .optionalStart().appendLiteral('-').appendValue(ChronoField.MONTH_OF_YEAR, 2) .optionalStart().appendLiteral('-').appendValue(ChronoField.DAY_OF_MONTH, 2) .optionalStart().appendLiteral('T').appendValue(ChronoField.HOUR_OF_DAY, 2) @@ -97,7 +98,7 @@ public final class StandardDateFormat extends DateFormat { .optionalStart().appendLiteral(':').appendValue(ChronoField.SECOND_OF_MINUTE, 2) .appendFraction(ChronoField.MILLI_OF_SECOND, 3, 3, true) .optionalEnd().optionalEnd().optionalEnd() // Move back to the optional block of HOUR_OF_DAY. - .optionalStart().appendOffsetId() + .optionalStart().appendOffset("+H:MM:ss", "Z") .toFormatter(Locale.ROOT); /** @@ -165,7 +166,7 @@ public final class StandardDateFormat extends DateFormat { /** * Modifies the given date and time string for making it more compliant to ISO syntax. * If date and time are separated by spaces, then this method replaces those spaces by - * the 'T' letter. + * the 'T' letter. All other spaces that are not between two digits are removed. * * @param text the text to make more compliant with ISO syntax. * @param lower index of the first character to examine. @@ -173,27 +174,53 @@ public final class StandardDateFormat extends DateFormat { * @return sub-sequence of {@code text} from {@code lower} to {@code upper}, potentially modified. */ static CharSequence toISO(CharSequence text, int lower, int upper) { - int sep = CharSequences.indexOf(text, ':', lower, upper); - if (sep >= lower) { - sep = CharSequences.skipTrailingWhitespaces(text, lower, sep); - while (sep > lower) { - final int c = Character.codePointBefore(text, sep); - final int timeStart = sep; - sep -= Character.charCount(c); - if (!Character.isDigit(c)) { - if (Character.isWhitespace(c)) { - sep = CharSequences.skipTrailingWhitespaces(text, lower, sep); - if (sep > lower && Character.isDigit(Character.codePointBefore(text, sep))) { - text = new StringBuilder(upper - lower).append(text, lower, upper).replace(sep, timeStart, "T"); - upper = text.length(); - lower = 0; - } - } - break; + boolean isCopied = false; + lower = CharSequences.skipLeadingWhitespaces (text, lower, upper); + upper = CharSequences.skipTrailingWhitespaces(text, lower, upper); + int cp = 0; // Non-whitespace character from previous iteration. + for (int i = upper; i > lower;) { + int c = Character.codePointBefore(text, i); + int n = Character.charCount(c); +replace: if (Character.isWhitespace(c)) { + /* + * Found whitespaces from 'i' inclusive (after computation below) to 'end' exclusive. + * If no concurrent change, i > lower because text.charAt(lower) is not a whitespace. + * Set 'c' to the character before whitespaces. 'cp' is the character after spaces. + */ + int end = i; + i = CharSequences.skipTrailingWhitespaces(text, lower, i - n); + c = Character.codePointBefore(text, i); + n = Character.charCount(c); + boolean isDateTimeSeparator = false; + if (Character.isDigit(cp) && Character.isDigit(c)) { + /* + * If the character before and after whitespaces are digits, maybe we have + * the separation between date and timezone. Use ':' position as a check. + */ + isDateTimeSeparator = CharSequences.indexOf(text, ':', lower, upper) > end; + if (!isDateTimeSeparator) break replace; // Skip replacement. } + final StringBuilder b; + if (isCopied) { + b = (StringBuilder) text; + } else { + text = b = new StringBuilder(upper - lower).append(text, lower, upper); + i -= lower; + end -= lower; + lower = 0; + isCopied = true; + } + if (isDateTimeSeparator) { + b.replace(i, end, "T"); + } else { + b.delete(i, end); + } + upper = b.length(); } + i -= n; + cp = c; } - return CharSequences.trimWhitespaces(text, lower, upper); + return text.subSequence(lower, upper); } /** diff --git a/core/sis-utility/src/main/resources/org/apache/sis/measure/UnitAliases.properties b/core/sis-utility/src/main/resources/org/apache/sis/measure/UnitAliases.properties index 3b6bbd9..6443a08 100644 --- a/core/sis-utility/src/main/resources/org/apache/sis/measure/UnitAliases.properties +++ b/core/sis-utility/src/main/resources/org/apache/sis/measure/UnitAliases.properties @@ -1,6 +1,7 @@ # Licensed to the Apache Software Foundation (ASF) under one or more contributor license agreements; # and to You under the Apache License, Version 2.0. +# Contains only unit names not included in UnitNames.properties. # "degrees", "metres" and "meters" are handled as special cases. arcsec=\u2033 days=d @@ -21,6 +22,7 @@ gradians=grad grads=grad grams=g hours=h +hr=h liters=L litres=L mbar=hPa diff --git a/core/sis-utility/src/test/java/org/apache/sis/internal/util/StandardDateFormatTest.java b/core/sis-utility/src/test/java/org/apache/sis/internal/util/StandardDateFormatTest.java index 043a7c4..be4386d 100644 --- a/core/sis-utility/src/test/java/org/apache/sis/internal/util/StandardDateFormatTest.java +++ b/core/sis-utility/src/test/java/org/apache/sis/internal/util/StandardDateFormatTest.java @@ -22,6 +22,7 @@ import java.time.LocalDateTime; import java.util.Date; import java.util.concurrent.TimeUnit; import java.text.ParseException; +import org.apache.sis.test.DependsOnMethod; import org.apache.sis.test.TestCase; import org.junit.Test; @@ -57,7 +58,8 @@ public final strictfp class StandardDateFormatTest extends TestCase { assertSame ("2009-01-01T06:00:00+01:00", toISO("2009-01-01T06:00:00+01:00")); assertEquals("2005-09-22T04:30:15", toISO("2005-09-22 04:30:15")); assertSame ("2005-09-22", toISO("2005-09-22")); - assertEquals("2005-09-22T04 : 30 : 15", toISO(" 2005-09-22 04 : 30 : 15 ")); + assertEquals("2005-09-22T04:30:15", toISO(" 2005-09-22 04 : 30 : 15 ")); + assertEquals("1992-10-8T15:15:42.5-6:00", toISO("1992-10-8 15:15:42.5 -6:00")); } /** @@ -73,6 +75,7 @@ public final strictfp class StandardDateFormatTest extends TestCase { * @throws ParseException if an error occurred while parsing the date. */ @Test + @DependsOnMethod("testToISO") public void testParse() throws ParseException { final long day = 1466985600000L; final StandardDateFormat f = new StandardDateFormat(); @@ -98,6 +101,7 @@ public final strictfp class StandardDateFormatTest extends TestCase { * @since 0.8 */ @Test + @DependsOnMethod("testParse") public void testParseBest() { final long day = 1466985600000L; assertEquals(Instant.ofEpochMilli(day + ((16*60 + 48)*60 )*1000), StandardDateFormat.parseBest("2016-06-27T16:48Z")); @@ -115,6 +119,7 @@ public final strictfp class StandardDateFormatTest extends TestCase { * @since 1.0 */ @Test + @DependsOnMethod("testParse") public void testParseInstant() { final long day = 1466985600000L; assertEquals(Instant.ofEpochMilli(day + ((16*60 + 48)*60 )*1000), StandardDateFormat.parseInstantUTC("2016-06-27T16:48Z")); diff --git a/core/sis-utility/src/test/java/org/apache/sis/measure/UnitsTest.java b/core/sis-utility/src/test/java/org/apache/sis/measure/UnitsTest.java index c774c18..0f4e632 100644 --- a/core/sis-utility/src/test/java/org/apache/sis/measure/UnitsTest.java +++ b/core/sis-utility/src/test/java/org/apache/sis/measure/UnitsTest.java @@ -268,6 +268,18 @@ public final strictfp class UnitsTest extends TestCase { assertSame(RADIAN, valueOf("radian")); assertSame(RADIAN, valueOf("radians")); assertSame(SECOND, valueOf("s")); + assertSame(SECOND, valueOf("second")); + assertSame(SECOND, valueOf("seconds")); + assertSame(MINUTE, valueOf("min")); + assertSame(MINUTE, valueOf("minute")); + assertSame(MINUTE, valueOf("minutes")); + assertSame(HOUR, valueOf("h")); + assertSame(HOUR, valueOf("hr")); + assertSame(HOUR, valueOf("hour")); + assertSame(HOUR, valueOf("hours")); + assertSame(DAY, valueOf("d")); + assertSame(DAY, valueOf("day")); + assertSame(DAY, valueOf("days")); assertSame(METRE, valueOf("m")); assertSame(METRE, valueOf("metre")); assertSame(METRE, valueOf("meter")); diff --git a/storage/sis-netcdf/src/main/java/org/apache/sis/storage/netcdf/GridResource.java b/storage/sis-netcdf/src/main/java/org/apache/sis/storage/netcdf/GridResource.java index 8f92404..8ea8595 100644 --- a/storage/sis-netcdf/src/main/java/org/apache/sis/storage/netcdf/GridResource.java +++ b/storage/sis-netcdf/src/main/java/org/apache/sis/storage/netcdf/GridResource.java @@ -67,7 +67,11 @@ import ucar.nc2.constants.CDM; // We use only String consta final class GridResource extends AbstractGridResource implements ResourceOnFileSystem { /** * Words used in standard (preferred) or long (if no standard) variable names which suggest - * that the variable is a component of a vector. Example of standard variable names: + * that the variable is a component of a vector. Those words are used in heuristic rules + * for deciding if two variables should be stored in a single {@code Coverage} instance. + * For example the eastward (u) and northward (v) components of oceanic current vectors + * should be stored as two sample dimensions of a single "Current" coverage. + * Example of standard variable names: * * <ul> * <li>{@code baroclinic_eastward_sea_water_velocity}</li> diff --git a/storage/sis-netcdf/src/test/java/org/apache/sis/internal/netcdf/VariableTest.java b/storage/sis-netcdf/src/test/java/org/apache/sis/internal/netcdf/VariableTest.java index 39a602b..09a9a0a 100644 --- a/storage/sis-netcdf/src/test/java/org/apache/sis/internal/netcdf/VariableTest.java +++ b/storage/sis-netcdf/src/test/java/org/apache/sis/internal/netcdf/VariableTest.java @@ -17,10 +17,12 @@ package org.apache.sis.internal.netcdf; import java.io.IOException; +import java.time.Instant; import org.apache.sis.math.Vector; import org.apache.sis.util.Workaround; import org.apache.sis.storage.DataStoreException; import org.apache.sis.internal.netcdf.ucar.DecoderWrapper; +import org.apache.sis.measure.Units; import org.apache.sis.test.DependsOn; import org.opengis.test.dataset.TestData; import org.junit.Test; @@ -66,7 +68,7 @@ public strictfp class VariableTest extends TestCase { * Gets the variable from the given decoder, reordering them if the decoder is a wrapper for UCAR library. * We perform this reordering because UCAR library does not always return the variables in the order they * are declared. In the case of the {@link TestData#NETCDF_4D_PROJECTED} file, the CIP variable is expected - * last but UCAR library put it second. + * last but UCAR library puts it second. */ @Workaround(library = "UCAR", version = "4.6.11") private Variable[] getVariablesCIP(final Decoder decoder) { @@ -137,6 +139,39 @@ public strictfp class VariableTest extends TestCase { } /** + * Tests {@link Variable#parseUnit(String)} method. + * + * @throws Exception if an I/O or logical error occurred while opening the file. + */ + @Test + public void testParseUnit() throws Exception { + final Variable variable = selectDataset(TestData.NETCDF_2D_GEOGRAPHIC).getVariables()[0]; + assertSame(Units.SECOND, variable.parseUnit("s")); + assertSame(Units.SECOND, variable.parseUnit("second")); + assertSame(Units.SECOND, variable.parseUnit("seconds")); + assertSame(Units.MINUTE, variable.parseUnit("min")); + assertSame(Units.MINUTE, variable.parseUnit("minute")); + assertSame(Units.MINUTE, variable.parseUnit("minutes")); + assertSame(Units.HOUR, variable.parseUnit("h")); + assertSame(Units.HOUR, variable.parseUnit("hr")); + assertSame(Units.HOUR, variable.parseUnit("hour")); + assertSame(Units.HOUR, variable.parseUnit("hours")); + assertSame(Units.DAY, variable.parseUnit("d")); + assertSame(Units.DAY, variable.parseUnit("day")); + assertSame(Units.DAY, variable.parseUnit("days")); + /* + * Parsing date set the epoch as a side effect. + */ + final Instant save = variable.epoch; + try { + assertSame(Units.DAY, variable.parseUnit("days since 1992-10-8 15:15:42.5 -6:00")); + assertEquals("epoch", variable.epoch, Instant.parse("1992-10-08T21:15:42.500Z")); + } finally { + variable.epoch = save; + } + } + + /** * Tests {@link Variable#getGridDimensionNames()} and {@link Variable#getShape()} * on a simple two-dimensional dataset. *
