This is an automated email from the ASF dual-hosted git repository.

desruisseaux pushed a commit to branch geoapi-4.0
in repository https://gitbox.apache.org/repos/asf/sis.git


The following commit(s) were added to refs/heads/geoapi-4.0 by this push:
     new df7ac6d62f Add a helper tools for reducing `VARCHAR` to the minimal 
required length in EPSG script. This is used only by maintainers during 
upgrades of the EPSG geodetic dataset.
df7ac6d62f is described below

commit df7ac6d62f10bcb32e5d7f85a1d8a996e16a9926
Author: Martin Desruisseaux <[email protected]>
AuthorDate: Thu Aug 14 18:13:33 2025 +0200

    Add a helper tools for reducing `VARCHAR` to the minimal required length in 
EPSG script.
    This is used only by maintainers during upgrades of the EPSG geodetic 
dataset.
---
 .../sis/referencing/factory/sql/SQLTranslator.java |   6 +-
 optional/build.gradle.kts                          |   2 +
 .../factory/sql/epsg/DataScriptFormatter.java      |   7 +-
 .../epsg/{Updater.java => DataScriptUpdater.java}  |   6 +-
 .../sis/referencing/factory/sql/epsg/README.md     |  21 ++-
 .../factory/sql/epsg/TableScriptUpdater.java       | 174 +++++++++++++++++++++
 6 files changed, 207 insertions(+), 9 deletions(-)

diff --git 
a/endorsed/src/org.apache.sis.referencing/main/org/apache/sis/referencing/factory/sql/SQLTranslator.java
 
b/endorsed/src/org.apache.sis.referencing/main/org/apache/sis/referencing/factory/sql/SQLTranslator.java
index ea11bcfd25..462b5fbbc1 100644
--- 
a/endorsed/src/org.apache.sis.referencing/main/org/apache/sis/referencing/factory/sql/SQLTranslator.java
+++ 
b/endorsed/src/org.apache.sis.referencing/main/org/apache/sis/referencing/factory/sql/SQLTranslator.java
@@ -596,10 +596,12 @@ check:  for (;;) {
      * The names of the tables for the two conventions are listed in a table 
in the Javadoc of this class.
      * The returned string does not include the identifier quotes.
      *
-     * @param  name  the mixed-case table name.
+     * @param  name  the mixed-case table name, without quotes.
      * @return the name converted to the convention used by the database.
+     *
+     * @since 1.5
      */
-    final String toActualTableName(String name) {
+    public final String toActualTableName(String name) {
         if (useMixedCaseTableNames) {
             return name;
         }
diff --git a/optional/build.gradle.kts b/optional/build.gradle.kts
index c12edc7a6e..51e4111a76 100644
--- a/optional/build.gradle.kts
+++ b/optional/build.gradle.kts
@@ -72,6 +72,7 @@ dependencies {
     api           (drivers.derby.tools)
 
     // Test dependencies
+    testImplementation(drivers.postgres)
     testImplementation(tests.junit5)
     testRuntimeOnly   (tests.jupiter)
     testRuntimeOnly   (libs.jts.core)
@@ -133,6 +134,7 @@ fun patchForTests(args : MutableList<String>) {
     patchModuleWithTests(args, "org.apache.sis.feature")
 
     addRead(args, "org.apache.sis.referencing.database", 
"org.apache.sis.referencing.epsg");
+    addRead(args, "org.apache.sis.referencing.epsg",     
"org.postgresql.jdbc");
 
     // ――――――――――――― Module name ――――――――――――――――――――――― Package to export 
―――――――――――――――
     addExport(args, "org.apache.sis.util",              "org.apache.sis.test",
diff --git 
a/optional/src/org.apache.sis.referencing.epsg/test/org/apache/sis/referencing/factory/sql/epsg/DataScriptFormatter.java
 
b/optional/src/org.apache.sis.referencing.epsg/test/org/apache/sis/referencing/factory/sql/epsg/DataScriptFormatter.java
index 8795ce9762..ccedddaf5d 100644
--- 
a/optional/src/org.apache.sis.referencing.epsg/test/org/apache/sis/referencing/factory/sql/epsg/DataScriptFormatter.java
+++ 
b/optional/src/org.apache.sis.referencing.epsg/test/org/apache/sis/referencing/factory/sql/epsg/DataScriptFormatter.java
@@ -91,7 +91,9 @@ final class DataScriptFormatter extends ScriptRunner {
      *
      * <p>The following columns are deprecated for other reasons:</p>
      * <ul>
-     *   <li>{@code realization_epoch}: is replaced by {@code 
publication_date}.</li>
+     *   <li>{@code realization_epoch}: replaced by {@code 
publication_date}.</li>
+     *   <li>{@code area_polygon_file_ref}: replaced by the {@code "Area 
Polygon"} table.
+     *       The latter is not included in the downloaded files.</li>
      * </ul>
      *
      * <p>The following columns contains <abbr>EPSG</abbr> metadata not used 
by Apache<abbr>SIS</abbr>.
@@ -99,6 +101,7 @@ final class DataScriptFormatter extends ScriptRunner {
      * <ul>
      *   <li>{@code information_source}, {@code data_source} and {@code 
revision_date}: appeared in almost all tables.</li>
      *   <li>{@code change_id}: appeared in almost all tables. Required by the 
removal of the {@code "Change"} table.</li>
+     *   <li>{@code example}: appeared in {@code "Coordinate_Operation 
Method"}.</li>
      * </ul>
      */
     private final Set<String> excludedColumns;
@@ -131,7 +134,7 @@ final class DataScriptFormatter extends ScriptRunner {
         doubleColumns   = Set.of("parameter_value");
         excludedColumns = Set.of("area_of_use_code", "crs_scope", 
"coord_op_scope", "datum_scope",    // Replaced by "Usage" table.
                                  "information_source", "data_source", 
"revision_date", "change_id",   // Unused EPSG metadata.
-                                 "realization_epoch");
+                                 "realization_epoch", "example", 
"area_polygon_file_ref");
 
         toOriginalTableNames = Map.ofEntries(
                 Map.entry("epsg_alias",                      "Alias"),
diff --git 
a/optional/src/org.apache.sis.referencing.epsg/test/org/apache/sis/referencing/factory/sql/epsg/Updater.java
 
b/optional/src/org.apache.sis.referencing.epsg/test/org/apache/sis/referencing/factory/sql/epsg/DataScriptUpdater.java
similarity index 95%
rename from 
optional/src/org.apache.sis.referencing.epsg/test/org/apache/sis/referencing/factory/sql/epsg/Updater.java
rename to 
optional/src/org.apache.sis.referencing.epsg/test/org/apache/sis/referencing/factory/sql/epsg/DataScriptUpdater.java
index 5718574c4b..2d0adc2e72 100644
--- 
a/optional/src/org.apache.sis.referencing.epsg/test/org/apache/sis/referencing/factory/sql/epsg/Updater.java
+++ 
b/optional/src/org.apache.sis.referencing.epsg/test/org/apache/sis/referencing/factory/sql/epsg/DataScriptUpdater.java
@@ -29,11 +29,11 @@ import org.apache.sis.metadata.sql.TestDatabase;
  *
  * @author  Martin Desruisseaux (Geomatys)
  */
-public final class Updater {
+public final class DataScriptUpdater {
     /**
      * Do not allow instantiation of this class.
      */
-    private Updater() {
+    private DataScriptUpdater() {
     }
 
     /**
@@ -54,7 +54,7 @@ public final class Updater {
      *   <li>{@code $NON_FREE_DIR/epsg/Data.sql}</li>
      * </ol>
      *
-     * @param  arguments  the source files and the destination file.
+     * @param  arguments  the source file and the destination file.
      * @throws Exception if an error occurred while reading of writing the 
file.
      */
     @SuppressWarnings("UseOfSystemOutOrSystemErr")
diff --git 
a/optional/src/org.apache.sis.referencing.epsg/test/org/apache/sis/referencing/factory/sql/epsg/README.md
 
b/optional/src/org.apache.sis.referencing.epsg/test/org/apache/sis/referencing/factory/sql/epsg/README.md
index 0605037233..3aaefdee6f 100644
--- 
a/optional/src/org.apache.sis.referencing.epsg/test/org/apache/sis/referencing/factory/sql/epsg/README.md
+++ 
b/optional/src/org.apache.sis.referencing.epsg/test/org/apache/sis/referencing/factory/sql/epsg/README.md
@@ -22,6 +22,9 @@ The compact form is about half the size of the original 
files. Compaction is ach
 This conversion applies only to the data types, the integrity constraints and 
the way that the SQL scripts are written.
 No data value should be altered, accept for accented letters in some names. 
Steps to follow:
 
+
+### Get the new EPSG scripts
+
 Download the latest SQL scripts for PostgreSQL from https://epsg.org/ (require 
registration).
 Unzip in the directory of your choice and remember the path to that directory:
 
@@ -30,6 +33,10 @@ unzip EPSG-<version>-PostgreSQL.zip
 export EPSG_SCRIPTS=$PWD
 ```
 
+Execute the scripts in the `public` schema of a PostgreSQL database on the 
local host.
+This page assumes that the database name is "Referencing", but any other name 
can be used
+if the argument given to `TableScriptUpdater` (later in this page) is adjusted 
accordingly.
+
 If a copy of the original SQL scripts (as downloaded from EPSG) for the 
previous version is still available,
 and if the following commands report no difference, then jump to "execute 
main" step.
 
@@ -54,6 +61,9 @@ cp $EPSG_SCRIPTS/PostgreSQL_Table_Script.sql Tables.sql
 cp $EPSG_SCRIPTS/PostgreSQL_FKey_Script.sql  FKeys.sql
 ```
 
+
+### Manual checks and editions
+
 Open the `Tables.sql` file for edition:
 
 * Keep the header comments that existed in the overwritten file.
@@ -92,7 +102,10 @@ in which case the maintainer can just revert the changes in 
order to preserve th
 However, if some changes are found in the schema, then hard-coded values in 
the `DataScriptFormatter` class may need
 to be modified, in particular the `booleanColumnIndicesForTables` and 
`doubleColumnIndicesForTables` collections.
 
-Execute the `main` method of the 
`org.apache.sis.referencing.factory.sql.epsg.Updater` class
+
+### Automatic updates after the manual checks
+
+Execute the `main` method of the 
`org.apache.sis.referencing.factory.sql.epsg.*Updater` classes
 located in the test directory of the `org.apache.sis.non-free:sis-epsg` Maven 
sub-project.
 Adjust version numbers as needed in the following commands:
 
@@ -110,9 +123,13 @@ export 
CLASSPATH=$PWD/endorsed/build/classes/java/test/org.apache.sis.metadata:$
 export 
CLASSPATH=$PWD/optional/build/classes/java/test/org.apache.sis.referencing.epsg:$CLASSPATH
 
 # From any directory
-java org.apache.sis.referencing.factory.sql.epsg.Updater 
$EPSG_SCRIPTS/PostgreSQL_Data_Script.sql $NON_FREE_DIR/Data.sql
+java org.apache.sis.referencing.factory.sql.epsg.TableScriptUpdater 
$NON_FREE_DIR/Tables.sql Referencing
+java org.apache.sis.referencing.factory.sql.epsg.DataScriptUpdater  
$EPSG_SCRIPTS/PostgreSQL_Data_Script.sql $NON_FREE_DIR/Data.sql
 ```
 
+
+### Finalize
+
 Run the tests. It it convenient to run 
`org.apache.sis.referencing.factory.sql.EPSGInstallerTest`
 in an IDE first, for easier debugging if some changes in database structure or 
content broke some code.
 Then the whole Apache SIS project should be [tested 
extensively](https://sis.apache.org/source.html#tests),
diff --git 
a/optional/src/org.apache.sis.referencing.epsg/test/org/apache/sis/referencing/factory/sql/epsg/TableScriptUpdater.java
 
b/optional/src/org.apache.sis.referencing.epsg/test/org/apache/sis/referencing/factory/sql/epsg/TableScriptUpdater.java
new file mode 100644
index 0000000000..65eb537a9c
--- /dev/null
+++ 
b/optional/src/org.apache.sis.referencing.epsg/test/org/apache/sis/referencing/factory/sql/epsg/TableScriptUpdater.java
@@ -0,0 +1,174 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one or more
+ * contributor license agreements.  See the NOTICE file distributed with
+ * this work for additional information regarding copyright ownership.
+ * The ASF licenses this file to You under the Apache License, Version 2.0
+ * (the "License"); you may not use this file except in compliance with
+ * the License.  You may obtain a copy of the License at
+ *
+ *     http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+package org.apache.sis.referencing.factory.sql.epsg;
+
+import java.util.List;
+import java.io.IOException;
+import java.nio.file.Files;
+import java.nio.file.Path;
+import java.sql.Connection;
+import java.sql.Statement;
+import java.sql.ResultSet;
+import java.sql.SQLException;
+import org.postgresql.ds.PGSimpleDataSource;
+import org.apache.sis.referencing.factory.sql.SQLTranslator;
+
+
+/**
+ * A command-line tool for updating the <abbr>EPSG</abbr> {@code Tables.sql} 
file distributed by Apache <abbr>SIS</abbr>.
+ * This application makes many assumptions about how {@code Tables.sql} is 
formatted in the <abbr>SIS</abbr> repository.
+ * Therefore, this application is valid only for that specific file.
+ * The steps to follow are documented in the {@code README.md} file.
+ *
+ * <p>This application requires a connection to a PostgreSQL database of the 
given name on the local host.
+ * It only rewrites the {@code VARCHAR} types with the minimal length which is 
required for holding the data.
+ * Other changes must be done manually as documented in {@code README.md}.</p>
+ *
+ * @author  Martin Desruisseaux (Geomatys)
+ */
+public final class TableScriptUpdater {
+    /**
+     * The statement to search for identifying the table name.
+     */
+    private static final String CREATE = "CREATE TABLE \"";
+
+    /**
+     * The keyword to search in order to update them.
+     */
+    private static final String VARCHAR = "VARCHAR(";
+
+    /**
+     * The file to update.
+     */
+    private final Path file;
+
+    /**
+     * All lines in the file.
+     */
+    private final List<String> lines;
+
+    /**
+     * Creates a new checker.
+     *
+     * @param file  the file to update.
+     * @throws IOException if an error occurred while reading the {@code 
Tables.sql} file.
+     */
+    private TableScriptUpdater(final Path file) throws IOException {
+        this.file = file;
+        lines = Files.readAllLines(file);
+    }
+
+    /**
+     * Verifies all lines that have been read.
+     *
+     * @param  database  the database on which to connect.
+     * @throws SQLException if an error occurred while querying the 
<abbr>EPSG</abbr> database.
+     */
+    private void update(final String database) throws SQLException {
+        final var dataSource = new PGSimpleDataSource();
+        dataSource.setDatabaseName(database);
+        try (Connection connection = dataSource.getConnection();
+             Statement  statement  = connection.createStatement())
+        {
+            final var translator = new SQLTranslator(connection.getMetaData(), 
null, null);
+            String table = null;
+            for (int i = 0; i < lines.size(); i++) {
+                String line = lines.get(i);
+                if (line.startsWith(CREATE)) {
+                    table = 
translator.toActualTableName(line.substring(CREATE.length(), line.indexOf('"', 
CREATE.length())));
+                    continue;
+                }
+                if (line.isBlank() || 
!Character.isLowerCase(line.codePointAt(0))) {
+                    continue;   // In `Tables.sql` all columns are in lower 
case.
+                }
+                final String column = line.substring(0, line.indexOf(' '));
+                if (table == null) {
+                    throw new RuntimeException("CREATE TABLE statement not 
found before column " + column);
+                }
+                /*
+                 * Check if a nullable column should be declared not null or 
the converse.
+                 * Note that EPSG sometime stores empty texts, which we 
replace by nulls.
+                 * Remarks are kept optional even if EPSG always provide a 
value.
+                 */
+                final boolean hasNull;
+                if (column.equals("remarks") || column.equals("description") 
|| column.equals("formula")) {
+                    hasNull = true;
+                } else try (ResultSet result = statement.executeQuery(
+                        "SELECT " + column + " FROM " + table + " WHERE "
+                                  + column + " IS NULL OR CAST(" + column + " 
AS VARCHAR(4000)) = ''"))
+                {
+                    hasNull = result.next();
+                }
+                if (hasNull == line.contains("NOT NULL")) {
+                    if (hasNull) {
+                        line = line.replace("NOT NULL", "");
+                    } else {
+                        final int s = line.lastIndexOf(',');
+                        line = line.substring(0, s) + " NOT NULL" + 
line.substring(s);
+                    }
+                }
+                /*
+                 * Restrict the length of a `VARCHAR` to the smallest length 
which is sufficient for all data.
+                 */
+                int start = line.indexOf(VARCHAR, column.length());
+                if (start >= 0) {
+                    int length = 1;     // Minimal length.
+                    try (ResultSet result = statement.executeQuery("SELECT 
MAX(LENGTH(" + column + ")) FROM " + table)) {
+                        while (result.next()) {
+                            length = Math.max(length, result.getInt(1));
+                        }
+                    }
+                    start += VARCHAR.length();      // Start of the string 
where the length is written.
+                    final int end = line.indexOf(')', start);
+                    line = line.substring(0, start) + length + 
line.substring(end);
+                }
+                lines.set(i, line);
+            }
+        }
+    }
+
+    /**
+     * Writes the updated {@code Tables.sql} file.
+     */
+    private void write() throws IOException {
+        Files.write(file, lines);
+    }
+
+    /**
+     * Updates the {@code Tables.sql} file.
+     * This method expects two arguments:
+     *
+     * <ol>
+     *   <li>The file of the <abbr>SQL</abbr> script to update, which must 
exist.</li>
+     *   <li>The name of a PostgreSQL database containing the 
<abbr>EPSG</abbr> geodetic dataset.</li>
+     * </ol>
+     *
+     * @param  arguments  the files and the destination file.
+     * @throws IOException if an error occurred while reading of writing the 
{@code Tables.sql} file.
+     * @throws SQLException if an error occurred while querying the 
<abbr>EPSG</abbr> database.
+     */
+    @SuppressWarnings("UseOfSystemOutOrSystemErr")
+    public static void main(String[] arguments) throws IOException, 
SQLException {
+        if (arguments.length != 2) {
+            System.err.println("Expected two arguments: SQL file and 
PostgreSQL database.");
+            return;
+        }
+        final var t = new TableScriptUpdater(Path.of(arguments[0]));
+        t.update(arguments[1]);
+        t.write();
+    }
+}

Reply via email to