This is an automated email from the ASF dual-hosted git repository.

desruisseaux pushed a commit to branch geoapi-4.0
in repository https://gitbox.apache.org/repos/asf/sis.git

commit 758fb1bd1386454f38672eb8c2d5dcd395a873ea
Author: Martin Desruisseaux <[email protected]>
AuthorDate: Mon Jan 14 19:38:12 2019 +0100

    More lenient parsing of time-related units, in order to avoid parsing 
errors with some netCDF files.
---
 .../sis/internal/util/StandardDateFormat.java      | 73 +++++++++++++++-------
 .../org/apache/sis/measure/UnitAliases.properties  |  2 +
 .../sis/internal/util/StandardDateFormatTest.java  |  7 ++-
 .../java/org/apache/sis/measure/UnitsTest.java     | 12 ++++
 .../apache/sis/storage/netcdf/GridResource.java    |  6 +-
 .../apache/sis/internal/netcdf/VariableTest.java   | 37 ++++++++++-
 6 files changed, 111 insertions(+), 26 deletions(-)

diff --git 
a/core/sis-utility/src/main/java/org/apache/sis/internal/util/StandardDateFormat.java
 
b/core/sis-utility/src/main/java/org/apache/sis/internal/util/StandardDateFormat.java
index c90a4da..56dd8a1 100644
--- 
a/core/sis-utility/src/main/java/org/apache/sis/internal/util/StandardDateFormat.java
+++ 
b/core/sis-utility/src/main/java/org/apache/sis/internal/util/StandardDateFormat.java
@@ -85,11 +85,12 @@ public final class StandardDateFormat extends DateFormat {
 
     /**
      * The thread-safe instance to use for reading and formatting dates.
-     * Only the year is mandatory, all other fields are optional.
+     * Only the year is mandatory, all other fields are optional at parsing 
time.
+     * However all fields are written, including milliseconds at formatting 
time.
      */
     public static final DateTimeFormatter FORMAT = new 
DateTimeFormatterBuilder()
-            // parseLenient() is for allowing fields with one digit instead of 
two.
-            .parseLenient()                    .appendValue(ChronoField.YEAR, 
4, 5, SignStyle.NORMAL)    // Proleptic year (use negative number if needed).
+            .parseLenient()                    // For allowing fields with one 
digit instead of two.
+            .parseCaseInsensitive()            .appendValue(ChronoField.YEAR, 
4, 5, SignStyle.NORMAL)    // Proleptic year (use negative number if needed).
             
.optionalStart().appendLiteral('-').appendValue(ChronoField.MONTH_OF_YEAR,    2)
             
.optionalStart().appendLiteral('-').appendValue(ChronoField.DAY_OF_MONTH,     2)
             
.optionalStart().appendLiteral('T').appendValue(ChronoField.HOUR_OF_DAY,      2)
@@ -97,7 +98,7 @@ public final class StandardDateFormat extends DateFormat {
             
.optionalStart().appendLiteral(':').appendValue(ChronoField.SECOND_OF_MINUTE, 2)
                                                
.appendFraction(ChronoField.MILLI_OF_SECOND, 3, 3, true)
             .optionalEnd().optionalEnd().optionalEnd()    // Move back to the 
optional block of HOUR_OF_DAY.
-            .optionalStart().appendOffsetId()
+            .optionalStart().appendOffset("+H:MM:ss", "Z")
             .toFormatter(Locale.ROOT);
 
     /**
@@ -165,7 +166,7 @@ public final class StandardDateFormat extends DateFormat {
     /**
      * Modifies the given date and time string for making it more compliant to 
ISO syntax.
      * If date and time are separated by spaces, then this method replaces 
those spaces by
-     * the 'T' letter.
+     * the 'T' letter. All other spaces that are not between two digits are 
removed.
      *
      * @param  text   the text to make more compliant with ISO syntax.
      * @param  lower  index of the first character to examine.
@@ -173,27 +174,53 @@ public final class StandardDateFormat extends DateFormat {
      * @return sub-sequence of {@code text} from {@code lower} to {@code 
upper}, potentially modified.
      */
     static CharSequence toISO(CharSequence text, int lower, int upper) {
-        int sep = CharSequences.indexOf(text, ':', lower, upper);
-        if (sep >= lower) {
-            sep = CharSequences.skipTrailingWhitespaces(text, lower, sep);
-            while (sep > lower) {
-                final int c = Character.codePointBefore(text, sep);
-                final int timeStart = sep;
-                sep -= Character.charCount(c);
-                if (!Character.isDigit(c)) {
-                    if (Character.isWhitespace(c)) {
-                        sep = CharSequences.skipTrailingWhitespaces(text, 
lower, sep);
-                        if (sep > lower && 
Character.isDigit(Character.codePointBefore(text, sep))) {
-                            text = new StringBuilder(upper - 
lower).append(text, lower, upper).replace(sep, timeStart, "T");
-                            upper = text.length();
-                            lower = 0;
-                        }
-                    }
-                    break;
+        boolean isCopied = false;
+        lower = CharSequences.skipLeadingWhitespaces (text, lower, upper);
+        upper = CharSequences.skipTrailingWhitespaces(text, lower, upper);
+        int cp = 0;   // Non-whitespace character from previous iteration.
+        for (int i = upper; i > lower;) {
+            int c = Character.codePointBefore(text, i);
+            int n = Character.charCount(c);
+replace:    if (Character.isWhitespace(c)) {
+                /*
+                 * Found whitespaces from 'i' inclusive (after computation 
below) to 'end' exclusive.
+                 * If no concurrent change, i > lower because 
text.charAt(lower) is not a whitespace.
+                 * Set 'c' to the character before whitespaces. 'cp' is the 
character after spaces.
+                 */
+                int end = i;
+                i = CharSequences.skipTrailingWhitespaces(text, lower, i - n);
+                c = Character.codePointBefore(text, i);
+                n = Character.charCount(c);
+                boolean isDateTimeSeparator = false;
+                if (Character.isDigit(cp) && Character.isDigit(c)) {
+                    /*
+                     * If the character before and after whitespaces are 
digits, maybe we have
+                     * the separation between date and timezone. Use ':' 
position as a check.
+                     */
+                    isDateTimeSeparator = CharSequences.indexOf(text, ':', 
lower, upper) > end;
+                    if (!isDateTimeSeparator) break replace;               // 
Skip replacement.
                 }
+                final StringBuilder b;
+                if (isCopied) {
+                    b = (StringBuilder) text;
+                } else {
+                    text = b = new StringBuilder(upper - lower).append(text, 
lower, upper);
+                    i       -= lower;
+                    end     -= lower;
+                    lower    = 0;
+                    isCopied = true;
+                }
+                if (isDateTimeSeparator) {
+                    b.replace(i, end, "T");
+                } else {
+                    b.delete(i, end);
+                }
+                upper = b.length();
             }
+            i -= n;
+            cp = c;
         }
-        return CharSequences.trimWhitespaces(text, lower, upper);
+        return text.subSequence(lower, upper);
     }
 
     /**
diff --git 
a/core/sis-utility/src/main/resources/org/apache/sis/measure/UnitAliases.properties
 
b/core/sis-utility/src/main/resources/org/apache/sis/measure/UnitAliases.properties
index 3b6bbd9..6443a08 100644
--- 
a/core/sis-utility/src/main/resources/org/apache/sis/measure/UnitAliases.properties
+++ 
b/core/sis-utility/src/main/resources/org/apache/sis/measure/UnitAliases.properties
@@ -1,6 +1,7 @@
 # Licensed to the Apache Software Foundation (ASF) under one or more 
contributor license agreements;
 # and to You under the Apache License, Version 2.0.
 
+# Contains only unit names not included in UnitNames.properties.
 # "degrees", "metres" and "meters" are handled as special cases.
 arcsec=\u2033
 days=d
@@ -21,6 +22,7 @@ gradians=grad
 grads=grad
 grams=g
 hours=h
+hr=h
 liters=L
 litres=L
 mbar=hPa
diff --git 
a/core/sis-utility/src/test/java/org/apache/sis/internal/util/StandardDateFormatTest.java
 
b/core/sis-utility/src/test/java/org/apache/sis/internal/util/StandardDateFormatTest.java
index 043a7c4..be4386d 100644
--- 
a/core/sis-utility/src/test/java/org/apache/sis/internal/util/StandardDateFormatTest.java
+++ 
b/core/sis-utility/src/test/java/org/apache/sis/internal/util/StandardDateFormatTest.java
@@ -22,6 +22,7 @@ import java.time.LocalDateTime;
 import java.util.Date;
 import java.util.concurrent.TimeUnit;
 import java.text.ParseException;
+import org.apache.sis.test.DependsOnMethod;
 import org.apache.sis.test.TestCase;
 import org.junit.Test;
 
@@ -57,7 +58,8 @@ public final strictfp class StandardDateFormatTest extends 
TestCase {
         assertSame  ("2009-01-01T06:00:00+01:00", 
toISO("2009-01-01T06:00:00+01:00"));
         assertEquals("2005-09-22T04:30:15",       toISO("2005-09-22 
04:30:15"));
         assertSame  ("2005-09-22",                toISO("2005-09-22"));
-        assertEquals("2005-09-22T04 : 30 : 15",   toISO("  2005-09-22   04 : 
30 : 15 "));
+        assertEquals("2005-09-22T04:30:15",       toISO("  2005-09-22   04 : 
30 : 15 "));
+        assertEquals("1992-10-8T15:15:42.5-6:00", toISO("1992-10-8 15:15:42.5 
-6:00"));
     }
 
     /**
@@ -73,6 +75,7 @@ public final strictfp class StandardDateFormatTest extends 
TestCase {
      * @throws ParseException if an error occurred while parsing the date.
      */
     @Test
+    @DependsOnMethod("testToISO")
     public void testParse() throws ParseException {
         final long day = 1466985600000L;
         final StandardDateFormat f = new StandardDateFormat();
@@ -98,6 +101,7 @@ public final strictfp class StandardDateFormatTest extends 
TestCase {
      * @since 0.8
      */
     @Test
+    @DependsOnMethod("testParse")
     public void testParseBest() {
         final long day = 1466985600000L;
         assertEquals(Instant.ofEpochMilli(day + ((16*60 + 48)*60     )*1000),  
    StandardDateFormat.parseBest("2016-06-27T16:48Z"));
@@ -115,6 +119,7 @@ public final strictfp class StandardDateFormatTest extends 
TestCase {
      * @since 1.0
      */
     @Test
+    @DependsOnMethod("testParse")
     public void testParseInstant() {
         final long day = 1466985600000L;
         assertEquals(Instant.ofEpochMilli(day + ((16*60 + 48)*60     )*1000),  
    StandardDateFormat.parseInstantUTC("2016-06-27T16:48Z"));
diff --git 
a/core/sis-utility/src/test/java/org/apache/sis/measure/UnitsTest.java 
b/core/sis-utility/src/test/java/org/apache/sis/measure/UnitsTest.java
index c774c18..0f4e632 100644
--- a/core/sis-utility/src/test/java/org/apache/sis/measure/UnitsTest.java
+++ b/core/sis-utility/src/test/java/org/apache/sis/measure/UnitsTest.java
@@ -268,6 +268,18 @@ public final strictfp class UnitsTest extends TestCase {
         assertSame(RADIAN,       valueOf("radian"));
         assertSame(RADIAN,       valueOf("radians"));
         assertSame(SECOND,       valueOf("s"));
+        assertSame(SECOND,       valueOf("second"));
+        assertSame(SECOND,       valueOf("seconds"));
+        assertSame(MINUTE,       valueOf("min"));
+        assertSame(MINUTE,       valueOf("minute"));
+        assertSame(MINUTE,       valueOf("minutes"));
+        assertSame(HOUR,         valueOf("h"));
+        assertSame(HOUR,         valueOf("hr"));
+        assertSame(HOUR,         valueOf("hour"));
+        assertSame(HOUR,         valueOf("hours"));
+        assertSame(DAY,          valueOf("d"));
+        assertSame(DAY,          valueOf("day"));
+        assertSame(DAY,          valueOf("days"));
         assertSame(METRE,        valueOf("m"));
         assertSame(METRE,        valueOf("metre"));
         assertSame(METRE,        valueOf("meter"));
diff --git 
a/storage/sis-netcdf/src/main/java/org/apache/sis/storage/netcdf/GridResource.java
 
b/storage/sis-netcdf/src/main/java/org/apache/sis/storage/netcdf/GridResource.java
index 8f92404..8ea8595 100644
--- 
a/storage/sis-netcdf/src/main/java/org/apache/sis/storage/netcdf/GridResource.java
+++ 
b/storage/sis-netcdf/src/main/java/org/apache/sis/storage/netcdf/GridResource.java
@@ -67,7 +67,11 @@ import ucar.nc2.constants.CDM;                      // We 
use only String consta
 final class GridResource extends AbstractGridResource implements 
ResourceOnFileSystem {
     /**
      * Words used in standard (preferred) or long (if no standard) variable 
names which suggest
-     * that the variable is a component of a vector. Example of standard 
variable names:
+     * that the variable is a component of a vector. Those words are used in 
heuristic rules
+     * for deciding if two variables should be stored in a single {@code 
Coverage} instance.
+     * For example the eastward (u) and northward (v) components of oceanic 
current vectors
+     * should be stored as two sample dimensions of a single "Current" 
coverage.
+     * Example of standard variable names:
      *
      * <ul>
      *   <li>{@code baroclinic_eastward_sea_water_velocity}</li>
diff --git 
a/storage/sis-netcdf/src/test/java/org/apache/sis/internal/netcdf/VariableTest.java
 
b/storage/sis-netcdf/src/test/java/org/apache/sis/internal/netcdf/VariableTest.java
index 39a602b..09a9a0a 100644
--- 
a/storage/sis-netcdf/src/test/java/org/apache/sis/internal/netcdf/VariableTest.java
+++ 
b/storage/sis-netcdf/src/test/java/org/apache/sis/internal/netcdf/VariableTest.java
@@ -17,10 +17,12 @@
 package org.apache.sis.internal.netcdf;
 
 import java.io.IOException;
+import java.time.Instant;
 import org.apache.sis.math.Vector;
 import org.apache.sis.util.Workaround;
 import org.apache.sis.storage.DataStoreException;
 import org.apache.sis.internal.netcdf.ucar.DecoderWrapper;
+import org.apache.sis.measure.Units;
 import org.apache.sis.test.DependsOn;
 import org.opengis.test.dataset.TestData;
 import org.junit.Test;
@@ -66,7 +68,7 @@ public strictfp class VariableTest extends TestCase {
      * Gets the variable from the given decoder, reordering them if the 
decoder is a wrapper for UCAR library.
      * We perform this reordering because UCAR library does not always return 
the variables in the order they
      * are declared. In the case of the {@link TestData#NETCDF_4D_PROJECTED} 
file, the CIP variable is expected
-     * last but UCAR library put it second.
+     * last but UCAR library puts it second.
      */
     @Workaround(library = "UCAR", version = "4.6.11")
     private Variable[] getVariablesCIP(final Decoder decoder) {
@@ -137,6 +139,39 @@ public strictfp class VariableTest extends TestCase {
     }
 
     /**
+     * Tests {@link Variable#parseUnit(String)} method.
+     *
+     * @throws Exception if an I/O or logical error occurred while opening the 
file.
+     */
+    @Test
+    public void testParseUnit() throws Exception {
+        final Variable variable = 
selectDataset(TestData.NETCDF_2D_GEOGRAPHIC).getVariables()[0];
+        assertSame(Units.SECOND, variable.parseUnit("s"));
+        assertSame(Units.SECOND, variable.parseUnit("second"));
+        assertSame(Units.SECOND, variable.parseUnit("seconds"));
+        assertSame(Units.MINUTE, variable.parseUnit("min"));
+        assertSame(Units.MINUTE, variable.parseUnit("minute"));
+        assertSame(Units.MINUTE, variable.parseUnit("minutes"));
+        assertSame(Units.HOUR,   variable.parseUnit("h"));
+        assertSame(Units.HOUR,   variable.parseUnit("hr"));
+        assertSame(Units.HOUR,   variable.parseUnit("hour"));
+        assertSame(Units.HOUR,   variable.parseUnit("hours"));
+        assertSame(Units.DAY,    variable.parseUnit("d"));
+        assertSame(Units.DAY,    variable.parseUnit("day"));
+        assertSame(Units.DAY,    variable.parseUnit("days"));
+        /*
+         * Parsing date set the epoch as a side effect.
+         */
+        final Instant save = variable.epoch;
+        try {
+            assertSame(Units.DAY, variable.parseUnit("days since 1992-10-8 
15:15:42.5 -6:00"));
+            assertEquals("epoch", variable.epoch, 
Instant.parse("1992-10-08T21:15:42.500Z"));
+        } finally {
+            variable.epoch = save;
+        }
+    }
+
+    /**
      * Tests {@link Variable#getGridDimensionNames()} and {@link 
Variable#getShape()}
      * on a simple two-dimensional dataset.
      *

Reply via email to