This is an automated email from the ASF dual-hosted git repository.
desruisseaux pushed a commit to branch geoapi-4.0
in repository https://gitbox.apache.org/repos/asf/sis.git
The following commit(s) were added to refs/heads/geoapi-4.0 by this push:
new b37fde6 Try to produce better identifiers and title for netCDF
variables. The identifier excludes the "long_name" attribute, which is sometime
more like a sentence than an identifier (the standard name or variable name
provides more stable identifiers). Conversely the `metadata.citation.title`
uses the "long_name" attribute as first choice, formatted in a way more like
English sentence (with '_' and CamelCase replaced by space). Variable names are
declared as citation identifier [...]
b37fde6 is described below
commit b37fde605db9f8f72c2a1e18f9b16afafc4d4c8d
Author: Martin Desruisseaux <[email protected]>
AuthorDate: Mon Mar 16 17:30:11 2020 +0100
Try to produce better identifiers and title for netCDF variables.
The identifier excludes the "long_name" attribute, which is sometime more
like a sentence
than an identifier (the standard name or variable name provides more stable
identifiers).
Conversely the `metadata.citation.title` uses the "long_name" attribute as
first choice,
formatted in a way more like English sentence (with '_' and CamelCase
replaced by space).
Variable names are declared as citation identifiers and shown in the
summary panel.
---
.../sis/gui/metadata/IdentificationInfo.java | 30 ++++
.../org/apache/sis/internal/gui/Resources.java | 5 +
.../apache/sis/internal/gui/Resources.properties | 1 +
.../sis/internal/gui/Resources_fr.properties | 1 +
.../java/org/apache/sis/util/CharSequences.java | 181 +++++++++++++++++++--
.../org/apache/sis/util/CharSequencesTest.java | 26 ++-
.../apache/sis/internal/netcdf/RasterResource.java | 96 ++++++++---
.../org/apache/sis/internal/netcdf/Variable.java | 25 +--
.../apache/sis/internal/netcdf/impl/GridInfo.java | 2 +-
.../sis/internal/netcdf/impl/VariableInfo.java | 3 +-
.../apache/sis/storage/netcdf/MetadataReader.java | 4 +-
.../sis/internal/storage/AbstractGridResource.java | 4 +-
.../sis/internal/storage/AbstractResource.java | 40 ++++-
.../internal/storage/ConcatenatedFeatureSet.java | 2 +-
.../sis/internal/storage/MetadataBuilder.java | 18 +-
.../storage/ConcatenatedFeatureSetTest.java | 5 +-
16 files changed, 378 insertions(+), 65 deletions(-)
diff --git
a/application/sis-javafx/src/main/java/org/apache/sis/gui/metadata/IdentificationInfo.java
b/application/sis-javafx/src/main/java/org/apache/sis/gui/metadata/IdentificationInfo.java
index eecfca6..bebc66f 100644
---
a/application/sis-javafx/src/main/java/org/apache/sis/gui/metadata/IdentificationInfo.java
+++
b/application/sis-javafx/src/main/java/org/apache/sis/gui/metadata/IdentificationInfo.java
@@ -17,6 +17,7 @@
package org.apache.sis.gui.metadata;
import java.util.Date;
+import java.util.StringJoiner;
import javafx.geometry.HPos;
import javafx.scene.canvas.Canvas;
import javafx.scene.canvas.GraphicsContext;
@@ -53,6 +54,18 @@ import static
org.apache.sis.internal.util.CollectionsExt.nonNull;
* The pane where to show the values of {@link Identification} objects.
* The same pane can be used for an arbitrary amount of identifications.
* Each instance is identified by its title.
+ * The content is:
+ *
+ * <ol>
+ * <li>The title in bold font.</li>
+ * <li>Identifiers.</li>
+ * <li>Abstract, or purpose, or credit (in this preference order).</li>
+ * <li>Topic category.</li>
+ * <li>Release date, or publication date, or creation date, or any date (in
this preference order).</li>
+ * <li>Type of resource.</li>
+ * <li>Spatiotemporal extent as a textual description.</li>
+ * <li>Extent shown as a rectangle on a world map.</li>
+ * </ol>
*
* @author Smaniotto Enzo (GSoC)
* @author Martin Desruisseaux (Geomatys)
@@ -147,6 +160,7 @@ final class IdentificationInfo extends
Section<Identification> {
/**
* Invoked when new identification information should be shown.
* This method updates all fields in this section with the content of
given identification information.
+ * The content is summarized in {@linkplain IdentificationInfo class
javadoc}.
*/
@Override
void buildContent(final Identification info) {
@@ -166,6 +180,19 @@ final class IdentificationInfo extends
Section<Identification> {
}
title.setText(text);
/*
+ * Identifiers as a comma-separated list on a single line. Each
identifier
+ * is formatted as "codespace:code" or only "code" if there is no
codespace.
+ */
+ if (citation != null) {
+ final StringJoiner buffer = new StringJoiner(", ");
+ for (final Identifier id : citation.getIdentifiers()) {
+ buffer.add(IdentifiedObjects.toString(id));
+ }
+ if (buffer.length() != 0) {
+ addLine(Resources.Keys.Identifiers, buffer.toString());
+ }
+ }
+ /*
* The abstract, or if there is no abstract the purpose, or if no
purpose the credit as a fallback.
* We use those fallback because they can provide some hints about the
product.
* The topic category (climatology, health, etc.) follows.
@@ -186,6 +213,9 @@ final class IdentificationInfo extends
Section<Identification> {
}
}
addLine(label, text);
+ /*
+ * Topic category.
+ */
addLine(Resources.Keys.TopicCategory,
owner.string(nonNull(info.getTopicCategories())));
/*
* Select a single, arbitrary date. We take the release or publication
date if available.
diff --git
a/application/sis-javafx/src/main/java/org/apache/sis/internal/gui/Resources.java
b/application/sis-javafx/src/main/java/org/apache/sis/internal/gui/Resources.java
index 4060265..ff0a748 100644
---
a/application/sis-javafx/src/main/java/org/apache/sis/internal/gui/Resources.java
+++
b/application/sis-javafx/src/main/java/org/apache/sis/internal/gui/Resources.java
@@ -211,6 +211,11 @@ public final class Resources extends IndexedResourceBundle
{
public static final short GeospatialFiles = 4;
/**
+ * Identifiers:
+ */
+ public static final short Identifiers = 54;
+
+ /**
* Loading…
*/
public static final short Loading = 7;
diff --git
a/application/sis-javafx/src/main/java/org/apache/sis/internal/gui/Resources.properties
b/application/sis-javafx/src/main/java/org/apache/sis/internal/gui/Resources.properties
index 7af6c69..d9f07a1 100644
---
a/application/sis-javafx/src/main/java/org/apache/sis/internal/gui/Resources.properties
+++
b/application/sis-javafx/src/main/java/org/apache/sis/internal/gui/Resources.properties
@@ -51,6 +51,7 @@ Format = Format:
FromMetadata = From metadata
FullScreen = Full screen
GeospatialFiles = Geospatial data files
+Identifiers = Identifiers:
Loading = Loading\u2026
MainWindow = Main window
Metadata = Metadata
diff --git
a/application/sis-javafx/src/main/java/org/apache/sis/internal/gui/Resources_fr.properties
b/application/sis-javafx/src/main/java/org/apache/sis/internal/gui/Resources_fr.properties
index 48bb9b3..4b7c231 100644
---
a/application/sis-javafx/src/main/java/org/apache/sis/internal/gui/Resources_fr.properties
+++
b/application/sis-javafx/src/main/java/org/apache/sis/internal/gui/Resources_fr.properties
@@ -56,6 +56,7 @@ Format = Format\u00a0:
FromMetadata = Des m\u00e9ta-donn\u00e9es
FullScreen = Plein \u00e9cran
GeospatialFiles = Fichiers de donn\u00e9es g\u00e9ospatiales
+Identifiers = Identifiants\u00a0:
Loading = Chargement\u2026
MainWindow = Fen\u00eatre principale
Metadata = Metadonn\u00e9es
diff --git
a/core/sis-utility/src/main/java/org/apache/sis/util/CharSequences.java
b/core/sis-utility/src/main/java/org/apache/sis/util/CharSequences.java
index 7448785..8802403 100644
--- a/core/sis-utility/src/main/java/org/apache/sis/util/CharSequences.java
+++ b/core/sis-utility/src/main/java/org/apache/sis/util/CharSequences.java
@@ -72,7 +72,7 @@ import static java.lang.Character.*;
* {@code 0} or {@code false} primitive type calculated as if the input was an
empty string.
*
* @author Martin Desruisseaux (Geomatys)
- * @version 1.0
+ * @version 1.1
*
* @see StringBuilders
*
@@ -546,9 +546,9 @@ search: for (; fromIndex <= toIndex; fromIndex++) {
*/
public static int skipLeadingWhitespaces(final CharSequence text, int
fromIndex, final int toIndex) {
while (fromIndex < toIndex) {
- final int c = Character.codePointAt(text, fromIndex);
- if (!Character.isWhitespace(c)) break;
- fromIndex += Character.charCount(c);
+ final int c = codePointAt(text, fromIndex);
+ if (!isWhitespace(c)) break;
+ fromIndex += charCount(c);
}
return fromIndex;
}
@@ -584,9 +584,9 @@ search: for (; fromIndex <= toIndex; fromIndex++) {
*/
public static int skipTrailingWhitespaces(final CharSequence text, final
int fromIndex, int toIndex) {
while (toIndex > fromIndex) {
- final int c = Character.codePointBefore(text, toIndex);
- if (!Character.isWhitespace(c)) break;
- toIndex -= Character.charCount(c);
+ final int c = codePointBefore(text, toIndex);
+ if (!isWhitespace(c)) break;
+ toIndex -= charCount(c);
}
return toIndex;
}
@@ -1187,16 +1187,16 @@ searchWordBreak: while (true) {
final StringBuilder buffer = new StringBuilder(identifier.length());
final int length = identifier.length();
for (int i=0; i<length;) {
- int c = Character.codePointAt(identifier, i);
+ int c = codePointAt(identifier, i);
if (i != 0) {
if (c == '_') {
c = ' ';
} else {
- c = Character.toLowerCase(c);
+ c = toLowerCase(c);
}
}
buffer.appendCodePoint(c);
- i += Character.charCount(c);
+ i += charCount(c);
}
return buffer;
}
@@ -1359,7 +1359,7 @@ searchWordBreak: while (true) {
} else if (Character.isUpperCase(c)) {
// Test for mixed-case (e.g. "northEast").
// Note that i is guaranteed to be greater than 0 here.
- if (!Character.isUpperCase(Character.codePointBefore(text,
i))) {
+ if (!Character.isUpperCase(codePointBefore(text, i))) {
buffer.appendCodePoint(c);
}
}
@@ -1892,12 +1892,13 @@ cmp: while (ia < lga) {
}
/**
- * Returns the longest sequence of characters which is found at the
beginning of the two
- * given texts. If one of those texts is {@code null}, then the other text
is returned.
+ * Returns the longest sequence of characters which is found at the
beginning of the two given texts.
+ * If one of those texts is {@code null}, then the other text is returned.
+ * If there is no common prefix, then this method returns an empty string.
*
* @param s1 the first text, or {@code null}.
* @param s2 the second text, or {@code null}.
- * @return the common prefix of both texts, or {@code null} if both texts
are null.
+ * @return the common prefix of both texts (may be empty), or {@code null}
if both texts are null.
*/
public static CharSequence commonPrefix(final CharSequence s1, final
CharSequence s2) {
if (s1 == null) return s2;
@@ -1927,10 +1928,11 @@ cmp: while (ia < lga) {
/**
* Returns the longest sequence of characters which is found at the end of
the two given texts.
* If one of those texts is {@code null}, then the other text is returned.
+ * If there is no common suffix, then this method returns an empty string.
*
* @param s1 the first text, or {@code null}.
* @param s2 the second text, or {@code null}.
- * @return the common suffix of both texts, or {@code null} if both texts
are null.
+ * @return the common suffix of both texts (may be empty), or {@code null}
if both texts are null.
*/
public static CharSequence commonSuffix(final CharSequence s1, final
CharSequence s2) {
if (s1 == null) return s2;
@@ -1958,6 +1960,149 @@ cmp: while (ia < lga) {
}
/**
+ * Returns the words found at the beginning and end of both texts.
+ * The returned string is the concatenation of the {@linkplain
#commonPrefix common prefix}
+ * with the {@linkplain #commonSuffix common suffix}, with prefix and
suffix eventually made
+ * shorter for avoiding to cut in the middle of a word.
+ *
+ * <p>The purpose of this method is to create a global identifier from a
list of component identifiers.
+ * The later are often eastward and northward components of a vector, in
which case this method provides
+ * an identifier for the vector as a whole.</p>
+ *
+ * <div class="note"><b>Example:</b>
+ * given the following inputs:
+ * <ul>
+ * <li>{@code "baroclinic_eastward_velocity"}</li>
+ * <li>{@code "baroclinic_northward_velocity"}</li>
+ * </ul>
+ * This method returns {@code "baroclinic_velocity"}. Note that the {@code
"ward"} characters
+ * are a common suffix of both texts but nevertheless omitted because they
cut a word.</div>
+ *
+ * <p>If one of those texts is {@code null}, then the other text is
returned.
+ * If there is no common words, then this method returns an empty
string.</p>
+ *
+ * <h4>Possible future evolution</h4>
+ * Current implementation searches only for a common prefix and a common
suffix, ignoring any common words
+ * that may appear in the middle of the strings. A character is considered
the beginning of a word if it is
+ * {@linkplain Character#isLetterOrDigit(int) a letter or digit} which is
not preceded by another letter or
+ * digit (as leading "s" and "c" in "snake_case"), or if it is an
{@linkplain Character#isUpperCase(int)
+ * upper case} letter preceded by a {@linkplain Character#isLowerCase(int)
lower case} letter or no letter
+ * (as both "C" in "CamelCase").
+ *
+ * @param s1 the first text, or {@code null}.
+ * @param s2 the second text, or {@code null}.
+ * @return the common suffix of both texts (may be empty), or {@code null}
if both texts are null.
+ *
+ * @since 1.1
+ */
+ public static CharSequence commonWords(final CharSequence s1, final
CharSequence s2) {
+ final int lg1 = length(s1);
+ final int lg2 = length(s2);
+ final int shortestLength = Math.min(lg1, lg2); // 0 if s1 or s2 is
null, in which case prefix and suffix will have the other value.
+ final CharSequence prefix = commonPrefix(s1, s2); int prefixLength =
length(prefix); if (prefixLength >= shortestLength) return prefix;
+ final CharSequence suffix = commonSuffix(s1, s2); int suffixLength =
length(suffix); if (suffixLength >= shortestLength) return suffix;
+ final int length = prefixLength + suffixLength;
+ if (length >= lg1) return s1; // Check if one of
the strings is already equal to prefix + suffix.
+ if (length >= lg2) return s2;
+ /*
+ * At this point `s1` and `s2` contain at least one character between
the prefix and the suffix.
+ * If the prefix or the suffix seems to stop in the middle of a word,
skip the remaining of that word.
+ * For example if `s1` and `s2` are "eastward_velocity" and
"northward_velocity", the common suffix is
+ * "ward_velocity" but we want to retain only "velocity".
+ *
+ * The first condition below (before the loop) checks the character
after the common prefix (for example "e"
+ * in "baroclinic_eastward_velocity" if the prefix is "baroclinic_").
The intent is to handle the case where
+ * the word separator is not the same (e.g.
"baroclinic_eastward_velocity" and "baroclinic northward velocity",
+ * in which case the '_' or ' ' character would not appear in the
prefix).
+ */
+ if (!isWordBoundary(s1, prefixLength, codePointAt(s1, prefixLength)) &&
+ !isWordBoundary(s2, prefixLength, codePointAt(s2, prefixLength)))
+ {
+ while (prefixLength > 0) {
+ final int c = codePointBefore(prefix, prefixLength);
+ final int n = charCount(c);
+ prefixLength -= n;
+ if (isWordBoundary(prefix, prefixLength, c)) {
+ if (!isLetterOrDigit(c)) prefixLength += n;
// Keep separator character.
+ break;
+ }
+ }
+ }
+ /*
+ * Same process than for the prefix above. The condition before the
loop checks the character before suffix
+ * for the same reason than above, but using only `isLetterOrDigit`
ignoring camel-case. The reason is that
+ * if the character before was a word separator according camel-case
convention (i.e. an upper-case letter),
+ * we would need to include it in the common suffix.
+ */
+ int suffixStart = 0;
+ if (isLetterOrDigit(codePointBefore(s1, lg1 - suffixLength)) &&
+ isLetterOrDigit(codePointBefore(s2, lg2 - suffixLength)))
+ {
+ while (suffixStart < suffixLength) {
+ final int c = codePointAt(suffix, suffixStart);
+ if (isWordBoundary(suffix, suffixStart, c)) break;
+ suffixStart += charCount(c);
+ }
+ }
+ /*
+ * At this point we got the final prefix and suffix to use. If the
prefix or suffix is empty,
+ * trim whitespaces or '_' character. For example if the suffix is
"_velocity" and no prefix,
+ * return "velocity" without leading "_" character.
+ */
+ if (prefixLength == 0) {
+ while (suffixStart < suffixLength) {
+ final int c = codePointAt(suffix, suffixStart);
+ if (isLetterOrDigit(c)) {
+ return suffix.subSequence(suffixStart, suffixLength); //
Skip leading ignorable characters in suffix.
+ }
+ suffixStart += charCount(c);
+ }
+ return "";
+ }
+ if (suffixStart >= suffixLength) {
+ while (prefixLength > 0) {
+ final int c = codePointBefore(prefix, prefixLength);
+ if (isLetterOrDigit(c)) {
+ return prefix.subSequence(0, prefixLength); //
Skip trailing ignorable characters in prefix.
+ }
+ prefixLength -= charCount(c);
+ }
+ return "";
+ }
+ /*
+ * All special cases have been examined. Return the concatenation of
(possibly shortened)
+ * common prefix and suffix.
+ */
+ final StringBuilder buffer = new StringBuilder(prefixLength +
suffixLength).append(prefix);
+ final int c1 = codePointBefore(prefix, prefixLength);
+ final int c2 = codePointAt(suffix, suffixStart);
+ if (isLetterOrDigit(c1) && isLetterOrDigit(c2)) {
+ if (!Character.isUpperCase(c2) || !isLowerCase(c1)) {
+ buffer.append(' '); // Keep a separator between
two words (except if CamelCase is used).
+ }
+ } else if (c1 == c2) {
+ suffixStart += charCount(c2); // Avoid repeating '_' in e.g.
"baroclinic_<removed>_velocity".
+ }
+ return buffer.append(suffix, suffixStart, suffixLength).toString();
+ }
+
+ /**
+ * Returns {@code true} if the character {@code c} is the beginning of a
word or a non-word character.
+ * For example this method returns {@code true} if {@code c} is {@code
'_'} in {@code "snake_case"} or
+ * {@code "C"} in {@code "CamelCase"}.
+ *
+ * @param s the character sequence from which the {@code c} character
has been obtained.
+ * @param i the index in {@code s} where the {@code c} character has
been obtained.
+ * @param c the code point in {@code s} as index {@code i}.
+ * @return whether the given character is the beginning of a word or a
non-word character.
+ */
+ private static boolean isWordBoundary(final CharSequence s, final int i,
final int c) {
+ if (!isLetterOrDigit(c)) return true;
+ if (!Character.isUpperCase(c)) return false;
+ return (i <= 0 || isLowerCase(codePointBefore(s, i)));
+ }
+
+ /**
* Returns the token starting at the given offset in the given text. For
the purpose of this
* method, a "token" is any sequence of consecutive characters of the same
type, as defined
* below.
@@ -2085,8 +2230,10 @@ cmp: while (ia < lga) {
} else if (src instanceof CharBuffer) {
((CharBuffer) src).subSequence(srcOffset, srcOffset +
length).get(dst, dstOffset, length);
} else {
- // An other candidate could be javax.swing.text.Segment, but it
- // is probably not worth to introduce a Swing dependency for it.
+ /*
+ * Another candidate could be `javax.swing.text.Segment`, but it
+ * is probably not worth to introduce a Swing dependency for it.
+ */
while (length != 0) {
dst[dstOffset++] = src.charAt(srcOffset++);
length--;
diff --git
a/core/sis-utility/src/test/java/org/apache/sis/util/CharSequencesTest.java
b/core/sis-utility/src/test/java/org/apache/sis/util/CharSequencesTest.java
index 34b164d..5686cea 100644
--- a/core/sis-utility/src/test/java/org/apache/sis/util/CharSequencesTest.java
+++ b/core/sis-utility/src/test/java/org/apache/sis/util/CharSequencesTest.java
@@ -33,7 +33,7 @@ import static org.apache.sis.util.CharSequences.*;
*
* @author Martin Desruisseaux (Geomatys)
* @author Johann Sorel (Geomatys)
- * @version 1.0
+ * @version 1.1
* @since 0.3
* @module
*/
@@ -507,6 +507,10 @@ public final strictfp class CharSequencesTest extends
TestCase {
@Test
public void testCommonPrefix() {
assertEquals("testCommon", commonPrefix(new
StringBuilder("testCommonPrefix()"), "testCommonSuffix()"));
+ assertEquals("", commonPrefix(
"testCommonPrefix()", "unrelated"));
+ assertEquals("", commonPrefix( "",
"unrelated"));
+ assertEquals("", commonPrefix( "",
""));
+ assertEquals("equal", commonPrefix( "equal",
"equal"));
}
/**
@@ -515,6 +519,26 @@ public final strictfp class CharSequencesTest extends
TestCase {
@Test
public void testCommonSuffix() {
assertEquals("fix()", commonSuffix(new
StringBuilder("testCommonPrefix()"), "testCommonSuffix()"));
+ assertEquals("", commonSuffix(
"testCommonPrefix()", "unrelated"));
+ assertEquals("", commonSuffix( "",
"unrelated"));
+ assertEquals("", commonSuffix( "",
""));
+ assertEquals("equal", commonSuffix( "equal",
"equal"));
+ }
+
+ /**
+ * Tests the {@link CharSequences#commonWords(CharSequence, CharSequence)}
method.
+ */
+ @Test
+ public void testCommonWords() {
+ assertSame ("baroclinic_eastward_velocity",
commonWords("baroclinic_eastward_velocity", null));
+ assertSame ("baroclinic_velocity",
commonWords("baroclinic_eastward_velocity", "baroclinic_velocity"));
+ assertEquals("baroclinic_velocity",
commonWords("baroclinic_eastward_velocity", "baroclinic_northward_velocity"));
+ assertEquals("baroclinic velocity",
commonWords("baroclinic_eastward_velocity", "baroclinic northward velocity"));
+ assertEquals("baroclinic",
commonWords("baroclinic_eastward", "baroclinic_northward"));
+ assertEquals("velocity",
commonWords("eastward_velocity", "northward_velocity"));
+ assertEquals("BaroclinicVelocity",
commonWords("BaroclinicEastwardVelocity", "BaroclinicNorthwardVelocity"));
+ assertEquals("Baroclinic",
commonWords("BaroclinicEastward", "BaroclinicNorthward"));
+ assertEquals("Velocity",
commonWords("EastwardVelocity", "NorthwardVelocity"));
}
/**
diff --git
a/storage/sis-netcdf/src/main/java/org/apache/sis/internal/netcdf/RasterResource.java
b/storage/sis-netcdf/src/main/java/org/apache/sis/internal/netcdf/RasterResource.java
index e1c4adb..19f0460 100644
---
a/storage/sis-netcdf/src/main/java/org/apache/sis/internal/netcdf/RasterResource.java
+++
b/storage/sis-netcdf/src/main/java/org/apache/sis/internal/netcdf/RasterResource.java
@@ -20,6 +20,7 @@ import java.util.Map;
import java.util.List;
import java.util.HashMap;
import java.util.ArrayList;
+import java.util.Iterator;
import java.util.Optional;
import java.io.IOException;
import java.nio.file.Path;
@@ -53,9 +54,11 @@ import org.apache.sis.measure.MeasurementRange;
import org.apache.sis.measure.NumberRange;
import org.apache.sis.math.Vector;
import org.apache.sis.util.Numbers;
+import org.apache.sis.util.CharSequences;
import org.apache.sis.util.resources.Errors;
import org.apache.sis.util.resources.Vocabulary;
import org.apache.sis.internal.jdk9.JDK9;
+import org.apache.sis.internal.storage.MetadataBuilder;
/**
@@ -100,7 +103,7 @@ public final class RasterResource extends
AbstractGridResource implements Resour
* than variable name because the former is controlled vocabulary. The use
of controlled vocabulary for identifiers increases
* the chances of stability or consistency between similar products.
*
- * <p>The value set by constructor may be updated by {@link
#resolveNameCollision(RasterResource, Decoder)},
+ * <p>The value set by constructor may be updated by {@link
#resolveNameCollision(Decoder)},
* but should not be modified after that point.</p>
*
* @see #getIdentifier()
@@ -203,7 +206,7 @@ public final class RasterResource extends
AbstractGridResource implements Resour
final Variable[] variables = decoder.getVariables().clone();
// Needs a clone because may be modified.
final List<Variable> siblings = new ArrayList<>(4);
// Usually has only 1 element, sometime 2.
final List<Resource> resources = new ArrayList<>(variables.length);
// The raster resources to be returned.
- final Map<GenericName,RasterResource> firstOfName = new HashMap<>();
// For detecting name collisions.
+ final Map<GenericName,List<RasterResource>> byName = new HashMap<>();
// For detecting name collisions.
for (int i=0; i<variables.length; i++) {
final Variable variable = variables[i];
if (variable == null || variable.getRole() !=
VariableRole.COVERAGE) {
@@ -306,40 +309,95 @@ public final class RasterResource extends
AbstractGridResource implements Resour
numBands = siblings.size();
}
final RasterResource r = new RasterResource(decoder, name.trim(),
grid, siblings, numBands, bandDimension, lock);
- r.resolveNameCollision(firstOfName.putIfAbsent(r.identifier, r),
decoder);
+ r.addToNameMap(byName);
resources.add(r);
siblings.clear();
}
+ /*
+ * At this point all resources have been prepared. If the same
standard name is used by more than one resource,
+ * replace the standard name by the identifier for all resources in
collision (an "all or nothing" replacement).
+ * The identifiers should be unique, which should resolve the name
collision. We nevertheless check again after
+ * renaming until there is nothing to change.
+ */
+ boolean changed;
+ do {
+ changed = false;
+ List<RasterResource> collisions = null;
+ for (final Iterator<List<RasterResource>> it =
byName.values().iterator(); it.hasNext();) {
+ final List<RasterResource> rs = it.next();
+ if (rs.size() >= 2) {
+ it.remove(); // Remove
resources before to re-insert them in next loop below.
+ if (collisions == null) {
+ collisions = rs;
+ } else {
+ collisions.addAll(rs);
+ }
+ }
+ }
+ if (collisions != null) { // After above
loop because may change the byName` map.
+ for (final RasterResource r : collisions) {
+ changed |= r.resolveNameCollision(decoder);
+ r.addToNameMap(byName); // For checking if
new names cause new collisions.
+ }
+ }
+ }
+ while (changed);
return resources;
}
/**
- * If the given resource is non-null, modifies the name of this resource
for avoiding name collision.
- * The {@code other} resource shall be non-null when the caller detected
that there is a name collision
- * with that resource.
+ * Adds the name of the given resource to the given map. This is used for
resolving name collision: any entry
+ * associated to two values or more will have the resources renamed by
{@link #resolveNameCollision(Decoder)}.
+ */
+ private void addToNameMap(final Map<GenericName,List<RasterResource>>
byName) {
+ byName.computeIfAbsent(identifier, (key) -> new
ArrayList<>()).add(this);
+ }
+
+ /**
+ * Invoked when the name of this resource needs to be changed because it
collides with the name of another resource.
+ * This method uses the variable name, which should be unique in each
netCDF file. If this resource wraps more than
+ * one variable (for example "eastward_velocity" and
"northward_velocity"), then this method takes the common part
+ * of all variable names ("velocity" in above example).
*
- * @param other the other resource for which there is a name collision,
or {@code null} if no collision.
+ * @return whether this resource has been renamed as an effect of this
method call.
*/
- private void resolveNameCollision(final RasterResource other, final
Decoder decoder) {
- if (other != null) {
- if (identifier.equals(other.identifier)) {
- other.resolveNameCollision(decoder);
- }
- resolveNameCollision(decoder);
+ private boolean resolveNameCollision(final Decoder decoder) {
+ String name = null;
+ for (final Variable v : data) {
+ name = (String) CharSequences.commonWords(name, v.getName());
}
+ if (name == null || name.isEmpty()) {
+ name = data[0].getName(); // If unable to get a common
name, fallback on the first one.
+ }
+ final GenericName newValue =
decoder.nameFactory.createLocalName(decoder.namespace, name);
+ if (newValue.equals(identifier)) return false;
+ identifier = newValue;
+ return true;
}
/**
- * Invoked when the name of this resource needs to be changed because it
collides with the name of another resource.
- * This method appends the variable name, which should be unique in each
netCDF file.
+ * Invoked the first time that {@link #getMetadata()} is invoked. Computes
metadata based on information
+ * provided by {@link #getIdentifier()}, {@link #getGridGeometry()},
{@link #getSampleDimensions()} and
+ * variable names.
+ *
+ * @param metadata the builder where to set metadata properties.
+ * @throws DataStoreException if an error occurred while reading metadata
from the data store.
*/
- private void resolveNameCollision(final Decoder decoder) {
- String name = identifier + " (" + data[0].getName() + ')';
- identifier = decoder.nameFactory.createLocalName(decoder.namespace,
name);
+ @Override
+ protected void createMetadata(final MetadataBuilder metadata) throws
DataStoreException {
+ String title = null;
+ for (final Variable v : data) {
+ title = (String) CharSequences.commonWords(title,
v.getDescription());
+ metadata.addIdentifier(v.getGroupName(), v.getName(),
MetadataBuilder.Scope.RESOURCE);
+ }
+ if (title != null && !title.isEmpty()) {
+
metadata.addTitle(CharSequences.camelCaseToSentence(title).toString());
+ }
+ super.createMetadata(metadata);
}
/**
- * Returns the variable name as an identifier of this resource.
+ * Returns the standard name (if non ambiguous) or the variable name as an
identifier of this resource.
*/
@Override
public Optional<GenericName> getIdentifier() {
diff --git
a/storage/sis-netcdf/src/main/java/org/apache/sis/internal/netcdf/Variable.java
b/storage/sis-netcdf/src/main/java/org/apache/sis/internal/netcdf/Variable.java
index 6ebaad6..bcee136 100644
---
a/storage/sis-netcdf/src/main/java/org/apache/sis/internal/netcdf/Variable.java
+++
b/storage/sis-netcdf/src/main/java/org/apache/sis/internal/netcdf/Variable.java
@@ -155,6 +155,8 @@ public abstract class Variable extends Node {
/**
* Returns the name of this variable. May be used as sample dimension name
in a raster.
+ * The variable name should be unique in each netCDF file
+ * (by contrast, {@link #getStandardName()} is not always unique).
*
* @return the name of this variable.
*/
@@ -162,24 +164,23 @@ public abstract class Variable extends Node {
public abstract String getName();
/**
- * Returns the standard name if available, or the long name other, or the
ordinary name otherwise.
- * May be used as the {@link RasterResource} label, or the label of a
{@link Raster} as a whole
- * (including all bands). Standard name is preferred to variable name when
controlled vocabulary
- * is desired, for example for more stable identifier or more consistency
between similar data.
+ * Returns the standard name if available, or the unique variable name
otherwise.
+ * May be used for {@link RasterResource#getIdentifier()} and {@link
Raster#label}.
+ * Standard name is preferred to variable name when controlled vocabulary
is desired,
+ * for example for more stable identifier or more consistency between
similar data.
+ *
+ * <p>This method does not check the {@code "long_name"} attribute because
the long
+ * name is more like a sentence (e.g. <cite>"model wind direction at 10
m"</cite>)
+ * while standard name and variable name are more like identifiers.
+ * For the long name, use {@link #getDescription()} instead.</p>
*
* @return the standard name, or a fallback if there is no standard name.
*
* @see RasterResource#identifier
*/
public final String getStandardName() {
- String name = getAttributeAsString(CF.STANDARD_NAME);
- if (name == null) {
- name = getAttributeAsString(CDM.LONG_NAME);
- if (name == null) {
- name = getName();
- }
- }
- return name;
+ final String name = getAttributeAsString(CF.STANDARD_NAME);
+ return (name != null) ? name : getName();
}
/**
diff --git
a/storage/sis-netcdf/src/main/java/org/apache/sis/internal/netcdf/impl/GridInfo.java
b/storage/sis-netcdf/src/main/java/org/apache/sis/internal/netcdf/impl/GridInfo.java
index db9460a..e62ddbf 100644
---
a/storage/sis-netcdf/src/main/java/org/apache/sis/internal/netcdf/impl/GridInfo.java
+++
b/storage/sis-netcdf/src/main/java/org/apache/sis/internal/netcdf/impl/GridInfo.java
@@ -262,7 +262,7 @@ next: for (final String name : axisNames) {
*/
char abbreviation = getAxisType(axis.getAxisType());
if (abbreviation == 0) {
- abbreviation =
getAxisType(axis.getAttributeAsString(CF.STANDARD_NAME));
+ abbreviation =
getAxisType(axis.getAttributeAsString(CF.STANDARD_NAME)); // No fallback on
variable name.
/*
* If the abbreviation is still unknown, look at the
"long_name", "description" or "title" attribute. Those
* attributes are not standardized, so they are less reliable
than "standard_name". But they are still more
diff --git
a/storage/sis-netcdf/src/main/java/org/apache/sis/internal/netcdf/impl/VariableInfo.java
b/storage/sis-netcdf/src/main/java/org/apache/sis/internal/netcdf/impl/VariableInfo.java
index 9f6a095..c0e424e 100644
---
a/storage/sis-netcdf/src/main/java/org/apache/sis/internal/netcdf/impl/VariableInfo.java
+++
b/storage/sis-netcdf/src/main/java/org/apache/sis/internal/netcdf/impl/VariableInfo.java
@@ -68,7 +68,8 @@ import org.apache.sis.math.Vector;
final class VariableInfo extends Variable implements Comparable<VariableInfo> {
/**
* The names of attributes where to look for the description to be
returned by {@link #getDescription()}.
- * We use the same attributes than the one documented in the {@link
ucar.nc2.Variable#getDescription()} javadoc.
+ * We use the same attributes than the one documented in the {@link
ucar.nc2.Variable#getDescription()}
+ * javadoc, in same order.
*/
private static final String[] DESCRIPTION_ATTRIBUTES = {
CDM.LONG_NAME,
diff --git
a/storage/sis-netcdf/src/main/java/org/apache/sis/storage/netcdf/MetadataReader.java
b/storage/sis-netcdf/src/main/java/org/apache/sis/storage/netcdf/MetadataReader.java
index dc0504f..d4cd862 100644
---
a/storage/sis-netcdf/src/main/java/org/apache/sis/storage/netcdf/MetadataReader.java
+++
b/storage/sis-netcdf/src/main/java/org/apache/sis/storage/netcdf/MetadataReader.java
@@ -938,8 +938,8 @@ split: while ((start =
CharSequences.skipLeadingWhitespaces(value, start, lengt
variable.writeDataTypeName(buffer);
setBandIdentifier(f.createMemberName(null, name,
f.createTypeName(null, buffer.toString())));
}
- final String id = variable.getAttributeAsString(CF.STANDARD_NAME);
- if (id != null && !id.equals(name)) {
+ final String id = Strings.trimOrNull(variable.getStandardName());
+ if (!id.equals(name)) {
addBandName(variable.getAttributeAsString(ACDD.standard_name_vocabulary), id);
}
final String description =
Strings.trimOrNull(variable.getDescription());
diff --git
a/storage/sis-storage/src/main/java/org/apache/sis/internal/storage/AbstractGridResource.java
b/storage/sis-storage/src/main/java/org/apache/sis/internal/storage/AbstractGridResource.java
index a803548..0df65f5 100644
---
a/storage/sis-storage/src/main/java/org/apache/sis/internal/storage/AbstractGridResource.java
+++
b/storage/sis-storage/src/main/java/org/apache/sis/internal/storage/AbstractGridResource.java
@@ -71,8 +71,8 @@ public abstract class AbstractGridResource extends
AbstractResource implements G
/**
* Invoked the first time that {@link #getMetadata()} is invoked. The
default implementation populates
- * metadata based on information provided by {@link #getIdentifier()} and
{@link #getGridGeometry()}.
- * Subclasses should override if they can provide more information.
+ * metadata based on information provided by {@link #getIdentifier()},
{@link #getGridGeometry()} and
+ * {@link #getSampleDimensions()}. Subclasses should override if they can
provide more information.
*
* @param metadata the builder where to set metadata properties.
* @throws DataStoreException if an error occurred while reading metadata
from the data store.
diff --git
a/storage/sis-storage/src/main/java/org/apache/sis/internal/storage/AbstractResource.java
b/storage/sis-storage/src/main/java/org/apache/sis/internal/storage/AbstractResource.java
index aa57fe1..a1c8eba 100644
---
a/storage/sis-storage/src/main/java/org/apache/sis/internal/storage/AbstractResource.java
+++
b/storage/sis-storage/src/main/java/org/apache/sis/internal/storage/AbstractResource.java
@@ -16,8 +16,10 @@
*/
package org.apache.sis.internal.storage;
+import java.util.Locale;
import java.util.Optional;
import org.opengis.util.GenericName;
+import org.opengis.util.InternationalString;
import org.opengis.metadata.Metadata;
import org.opengis.geometry.Envelope;
import org.opengis.referencing.operation.TransformException;
@@ -27,6 +29,8 @@ import org.apache.sis.storage.event.StoreEvent;
import org.apache.sis.storage.event.StoreListener;
import org.apache.sis.storage.event.StoreListeners;
import org.apache.sis.storage.event.WarningEvent;
+import org.apache.sis.util.iso.AbstractInternationalString;
+import org.apache.sis.util.CharSequences;
/**
@@ -47,7 +51,7 @@ import org.apache.sis.storage.event.WarningEvent;
* Synchronization, when needed, uses {@code this} lock.
*
* @author Martin Desruisseaux (Geomatys)
- * @version 1.0
+ * @version 1.1
* @since 0.8
* @module
*/
@@ -92,7 +96,7 @@ public class AbstractResource extends StoreListeners
implements Resource {
/**
* Returns a description of this resource. This method invokes {@link
#createMetadata(MetadataBuilder)}
- * the first time it is invoked, then cache the result.
+ * the first time it is invoked, then caches the result.
*
* @return information about this resource (never {@code null} in this
implementation).
* @throws DataStoreException if an error occurred while reading or
computing the envelope.
@@ -116,7 +120,7 @@ public class AbstractResource extends StoreListeners
implements Resource {
* @throws DataStoreException if an error occurred while reading metadata
from the data store.
*/
protected void createMetadata(final MetadataBuilder metadata) throws
DataStoreException {
- getIdentifier().ifPresent((name) ->
metadata.addTitle(name.toInternationalString()));
+ getIdentifier().ifPresent((name) -> metadata.addTitle(new
Sentence(name)));
getEnvelope().ifPresent((envelope) -> {
try {
metadata.addExtent(envelope);
@@ -127,6 +131,36 @@ public class AbstractResource extends StoreListeners
implements Resource {
}
/**
+ * An international string where localized identifiers are formatted more
like an English sentence.
+ * This is used for wrapping {@link GenericName#toInternationalString()}
representation for use as
+ * a citation title.
+ */
+ private static final class Sentence extends AbstractInternationalString {
+ /** The generic name localized representation. */
+ private final InternationalString name;
+
+ /** Returns a new wrapper for the given generic name. */
+ Sentence(final GenericName name) {
+ this.name = name.toInternationalString();
+ }
+
+ /** Returns the generic name as an English-like sentence. */
+ @Override public String toString(final Locale locale) {
+ return
CharSequences.camelCaseToSentence(name.toString(locale)).toString();
+ }
+
+ /** Returns a hash code value for this sentence. */
+ @Override public int hashCode() {
+ return ~name.hashCode();
+ }
+
+ /** Compares the given object with this sentence for equality. */
+ @Override public boolean equals(final Object other) {
+ return (other instanceof Sentence) && name.equals(((Sentence)
other).name);
+ }
+ }
+
+ /**
* Clears any cache in this resource, forcing the data to be recomputed
when needed again.
* This method should be invoked if the data in underlying data store
changed.
*/
diff --git
a/storage/sis-storage/src/main/java/org/apache/sis/internal/storage/ConcatenatedFeatureSet.java
b/storage/sis-storage/src/main/java/org/apache/sis/internal/storage/ConcatenatedFeatureSet.java
index db05e2b..ce758ad 100644
---
a/storage/sis-storage/src/main/java/org/apache/sis/internal/storage/ConcatenatedFeatureSet.java
+++
b/storage/sis-storage/src/main/java/org/apache/sis/internal/storage/ConcatenatedFeatureSet.java
@@ -68,7 +68,7 @@ public class ConcatenatedFeatureSet extends
AggregatedFeatureSet {
/**
* Creates a new concatenated feature set with the same types than the
given feature set,
- * but different sources.
+ * but different sources. This is used for creating {@linkplain
#subset(Query) subsets}.
*/
private ConcatenatedFeatureSet(final FeatureSet[] sources, final
ConcatenatedFeatureSet original) {
super(original);
diff --git
a/storage/sis-storage/src/main/java/org/apache/sis/internal/storage/MetadataBuilder.java
b/storage/sis-storage/src/main/java/org/apache/sis/internal/storage/MetadataBuilder.java
index 6185215..2a33f3b 100644
---
a/storage/sis-storage/src/main/java/org/apache/sis/internal/storage/MetadataBuilder.java
+++
b/storage/sis-storage/src/main/java/org/apache/sis/internal/storage/MetadataBuilder.java
@@ -130,6 +130,7 @@ import org.apache.sis.internal.util.Numerics;
import org.apache.sis.internal.util.Strings;
import org.apache.sis.util.ArgumentChecks;
import org.apache.sis.util.CharSequences;
+import org.apache.sis.util.Characters;
import org.apache.sis.util.iso.Names;
import org.apache.sis.util.iso.Types;
import org.apache.sis.measure.Units;
@@ -150,7 +151,7 @@ import org.opengis.metadata.citation.Responsibility;
* @author Martin Desruisseaux (Geomatys)
* @author Rémi Maréchal (Geomatys)
* @author Thi Phuong Hao Nguyen (VNSC)
- * @version 1.0
+ * @version 1.1
* @since 0.8
* @module
*/
@@ -913,8 +914,9 @@ public class MetadataBuilder {
/**
* Adds a resource (data) identifier, a metadata identifier, or both as
they are often the same.
* The identifier is added only if {@code code} is non-null, regardless
other argument values.
- * Empty strings (ignoring spaces) are ignored.
- * Storages locations are:
+ * Empty strings (ignoring spaces) are considered as null.
+ * The identifier is not added if already presents.
+ * Storage locations are:
*
* <ul>
* <li><b>Metadata:</b> {@code metadata/metadataIdentifier}</li>
@@ -1139,7 +1141,10 @@ public class MetadataBuilder {
}
/**
- * Adds a title or alternate title of the resource.
+ * Adds a title or alternate title of the resource, if not already present.
+ * This operation does nothing if the title is already defined and the
given
+ * title is already used as an identifier (this policy is a complement of
the
+ * {@link #addTitleOrIdentifier(String, Scope)} behavior).
* Storage location is:
*
* <ul>
@@ -1159,6 +1164,11 @@ public class MetadataBuilder {
if (current == null) {
citation.setTitle(i18n);
} else if (!equals(current, i18n)) {
+ for (final Identifier id : citation.getIdentifiers()) {
+ if (CharSequences.equalsFiltered(title, id.getCode(),
Characters.Filter.LETTERS_AND_DIGITS, true)) {
+ return;
+ }
+ }
addIfNotPresent(citation.getAlternateTitles(), i18n);
}
}
diff --git
a/storage/sis-storage/src/test/java/org/apache/sis/internal/storage/ConcatenatedFeatureSetTest.java
b/storage/sis-storage/src/test/java/org/apache/sis/internal/storage/ConcatenatedFeatureSetTest.java
index a6006ab..7841638 100644
---
a/storage/sis-storage/src/test/java/org/apache/sis/internal/storage/ConcatenatedFeatureSetTest.java
+++
b/storage/sis-storage/src/test/java/org/apache/sis/internal/storage/ConcatenatedFeatureSetTest.java
@@ -19,6 +19,7 @@ package org.apache.sis.internal.storage;
import java.util.Arrays;
import java.util.Collections;
import org.opengis.metadata.Metadata;
+import org.opengis.metadata.lineage.Lineage;
import org.opengis.metadata.content.FeatureCatalogueDescription;
import org.apache.sis.feature.builder.FeatureTypeBuilder;
import org.apache.sis.storage.DataStoreContentException;
@@ -68,8 +69,8 @@ public final strictfp class ConcatenatedFeatureSetTest
extends TestCase {
final Metadata md = cfs.getMetadata();
assertNotNull("getMetadata()", md);
assertContentInfoEquals("City", 3, (FeatureCatalogueDescription)
getSingleton(md.getContentInfo()));
- assertFeatureSourceEquals("City", new String[] {"City"},
-
getSingleton(getSingleton(md.getResourceLineages()).getSources()));
+ final Lineage lineage = getSingleton(md.getResourceLineages());
+ assertFeatureSourceEquals("City", new String[] {"City"},
getSingleton(lineage.getSources()));
}
/**