This is an automated email from the ASF dual-hosted git repository. fanng pushed a commit to branch main in repository https://gitbox.apache.org/repos/asf/gravitino.git
The following commit(s) were added to refs/heads/main by this push: new ab85969a1 feat(spark-connector):support JDBC catalog (#6212) ab85969a1 is described below commit ab85969a1bcd314ac4d6da6410b066e9455e209f Author: youze Liang <41617983+liangyo...@users.noreply.github.com> AuthorDate: Fri Jan 24 15:24:57 2025 +0800 feat(spark-connector):support JDBC catalog (#6212) ### What changes were proposed in this pull request? Support JDBC catalog in Spark Connector ### Why are the changes needed? Fix: #1572 #6164 ### Does this PR introduce _any_ user-facing change? no ### How was this patch tested? TestJdbcPropertiesConverter SparkJdbcCatalogIT --- docs/spark-connector/spark-catalog-jdbc.md | 72 ++++++++++++++ docs/spark-connector/spark-connector.md | 2 +- .../spark/connector/jdbc/GravitinoJdbcCatalog.java | 108 +++++++++++++++++++++ .../connector/jdbc/JdbcPropertiesConstants.java | 33 +++++++ .../connector/jdbc/JdbcPropertiesConverter.java | 73 ++++++++++++++ .../spark/connector/jdbc/SparkJdbcTable.java | 71 ++++++++++++++ .../connector/jdbc/SparkJdbcTypeConverter.java | 40 ++++++++ .../connector/version/CatalogNameAdaptor.java | 12 +++ .../connector/integration/test/SparkCommonIT.java | 37 ++++--- .../connector/integration/test/SparkEnvIT.java | 3 + .../integration/test/hive/SparkHiveCatalogIT.java | 10 ++ .../test/iceberg/SparkIcebergCatalogIT.java | 10 ++ .../test/jdbc/SparkJdbcMysqlCatalogIT.java | 108 +++++++++++++++++++++ .../test/jdbc/SparkJdbcTableInfoChecker.java | 55 +++++++++++ .../test/paimon/SparkPaimonCatalogIT.java | 10 ++ .../integration/test/util/SparkTableInfo.java | 3 + .../test/util/SparkTableInfoChecker.java | 6 +- .../integration/test/util/SparkUtilIT.java | 2 +- .../jdbc/TestJdbcPropertiesConverter.java | 62 ++++++++++++ spark-connector/v3.3/spark/build.gradle.kts | 4 + .../jdbc/GravitinoJdbcCatalogSpark33.java | 22 +++++ .../test/jdbc/SparkJdbcMysqlCatalogIT33.java | 36 +++++++ spark-connector/v3.4/spark/build.gradle.kts | 4 + .../jdbc/GravitinoJdbcCatalogSpark34.java | 38 ++++++++ .../connector/jdbc/SparkJdbcTypeConverter34.java | 39 ++++++++ .../test/jdbc/SparkJdbcMysqlCatalogIT34.java | 35 +++++++ spark-connector/v3.5/spark/build.gradle.kts | 4 + .../jdbc/GravitinoJdbcCatalogSpark35.java | 39 ++++++++ .../test/jdbc/SparkJdbcMysqlCatalogIT35.java | 36 +++++++ 29 files changed, 957 insertions(+), 17 deletions(-) diff --git a/docs/spark-connector/spark-catalog-jdbc.md b/docs/spark-connector/spark-catalog-jdbc.md new file mode 100644 index 000000000..7805d8026 --- /dev/null +++ b/docs/spark-connector/spark-catalog-jdbc.md @@ -0,0 +1,72 @@ +--- +title: "Spark connector JDBC catalog" +slug: /spark-connector/spark-catalog-jdbc +keyword: spark connector jdbc catalog +license: "This software is licensed under the Apache License version 2." +--- + +The Apache Gravitino Spark connector offers the capability to read JDBC tables, with the metadata managed by the Gravitino server. To enable the use of the JDBC catalog within the Spark connector, you must download the jdbc driver jar which you used to Spark classpath. + +## Capabilities + +Supports MySQL and PostgreSQL. For OceanBase which is compatible with Mysql Dialects could use Mysql driver and Mysql Dialects as a trackoff way. But for Doris which do not support MySQL Dialects, are not currently supported. + +#### Support DML and DDL operations: + +- `CREATE TABLE` +- `DROP TABLE` +- `ALTER TABLE` +- `SELECT` +- `INSERT` + + :::info + JDBCTable does not support distributed transaction. When writing data to RDBMS, each task is an independent transaction. If some tasks of spark succeed and some tasks fail, dirty data is generated. + ::: + +#### Not supported operations: + +- `UPDATE` +- `DELETE` +- `TRUNCATE` + +## SQL example + +```sql +-- Suppose mysql_a is the mysql catalog name managed by Gravitino +USE mysql_a; + +CREATE DATABASE IF NOT EXISTS mydatabase; +USE mydatabase; + +CREATE TABLE IF NOT EXISTS employee ( + id bigint, + name string, + department string, + hire_date timestamp +) +DESC TABLE EXTENDED employee; + +INSERT INTO employee +VALUES +(1, 'Alice', 'Engineering', TIMESTAMP '2021-01-01 09:00:00'), +(2, 'Bob', 'Marketing', TIMESTAMP '2021-02-01 10:30:00'), +(3, 'Charlie', 'Sales', TIMESTAMP '2021-03-01 08:45:00'); + +SELECT * FROM employee WHERE date(hire_date) = '2021-01-01'; + + +``` + +## Catalog properties + +Gravitino spark connector will transform below property names which are defined in catalog properties to Spark JDBC connector configuration. + +| Gravitino catalog property name | Spark JDBC connector configuration | Description | Since Version | +|---------------------------------|------------------------------------|---------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------|---------------| +| `jdbc-url` | `url` | JDBC URL for connecting to the database. For example, jdbc:mysql://localhost:3306 | 0.3.0 | +| `jdbc-user` | `jdbc.user` | JDBC user name | 0.3.0 | +| `jdbc-password` | `jdbc.password` | JDBC password | 0.3.0 | +| `jdbc-driver` | `driver` | The driver of the JDBC connection. For example, com.mysql.jdbc.Driver or com.mysql.cj.jdbc.Driver | 0.3.0 | + +Gravitino catalog property names with the prefix `spark.bypass.` are passed to Spark JDBC connector. + diff --git a/docs/spark-connector/spark-connector.md b/docs/spark-connector/spark-connector.md index a7c47d51f..a98231343 100644 --- a/docs/spark-connector/spark-connector.md +++ b/docs/spark-connector/spark-connector.md @@ -11,7 +11,7 @@ The Apache Gravitino Spark connector leverages the Spark DataSourceV2 interface ## Capabilities -1. Supports [Hive catalog](spark-catalog-hive.md), [Iceberg catalog](spark-catalog-iceberg.md) and [Paimon catalog](spark-catalog-paimon.md). +1. Supports [Hive catalog](spark-catalog-hive.md), [Iceberg catalog](spark-catalog-iceberg.md), [Paimon catalog](spark-catalog-paimon.md) and [Jdbc catalog](spark-catalog-jdbc.md). 2. Supports federation query. 3. Supports most DDL and DML SQLs. diff --git a/spark-connector/spark-common/src/main/java/org/apache/gravitino/spark/connector/jdbc/GravitinoJdbcCatalog.java b/spark-connector/spark-common/src/main/java/org/apache/gravitino/spark/connector/jdbc/GravitinoJdbcCatalog.java new file mode 100644 index 000000000..3f36b9a2a --- /dev/null +++ b/spark-connector/spark-common/src/main/java/org/apache/gravitino/spark/connector/jdbc/GravitinoJdbcCatalog.java @@ -0,0 +1,108 @@ +/* + * Licensed to the Apache Software Foundation (ASF) under one + * or more contributor license agreements. See the NOTICE file + * distributed with this work for additional information + * regarding copyright ownership. The ASF licenses this file + * to you under the Apache License, Version 2.0 (the + * "License"); you may not use this file except in compliance + * with the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, + * software distributed under the License is distributed on an + * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY + * KIND, either express or implied. See the License for the + * specific language governing permissions and limitations + * under the License. + */ + +package org.apache.gravitino.spark.connector.jdbc; + +import com.google.common.collect.Maps; +import java.util.Map; +import org.apache.gravitino.spark.connector.PropertiesConverter; +import org.apache.gravitino.spark.connector.SparkTransformConverter; +import org.apache.gravitino.spark.connector.SparkTypeConverter; +import org.apache.gravitino.spark.connector.catalog.BaseCatalog; +import org.apache.spark.sql.catalyst.analysis.NamespaceAlreadyExistsException; +import org.apache.spark.sql.connector.catalog.Identifier; +import org.apache.spark.sql.connector.catalog.SupportsNamespaces; +import org.apache.spark.sql.connector.catalog.Table; +import org.apache.spark.sql.connector.catalog.TableCatalog; +import org.apache.spark.sql.errors.QueryCompilationErrors; +import org.apache.spark.sql.execution.datasources.v2.jdbc.JDBCTable; +import org.apache.spark.sql.execution.datasources.v2.jdbc.JDBCTableCatalog; +import org.apache.spark.sql.util.CaseInsensitiveStringMap; + +public class GravitinoJdbcCatalog extends BaseCatalog { + + @Override + protected TableCatalog createAndInitSparkCatalog( + String name, CaseInsensitiveStringMap options, Map<String, String> properties) { + JDBCTableCatalog jdbcTableCatalog = new JDBCTableCatalog(); + Map<String, String> all = + getPropertiesConverter().toSparkCatalogProperties(options, properties); + jdbcTableCatalog.initialize(name, new CaseInsensitiveStringMap(all)); + return jdbcTableCatalog; + } + + @Override + protected Table createSparkTable( + Identifier identifier, + org.apache.gravitino.rel.Table gravitinoTable, + Table sparkTable, + TableCatalog sparkCatalog, + PropertiesConverter propertiesConverter, + SparkTransformConverter sparkTransformConverter, + SparkTypeConverter sparkTypeConverter) { + return new SparkJdbcTable( + identifier, + gravitinoTable, + (JDBCTable) sparkTable, + (JDBCTableCatalog) sparkCatalog, + propertiesConverter, + sparkTransformConverter, + sparkTypeConverter); + } + + @Override + protected PropertiesConverter getPropertiesConverter() { + return JdbcPropertiesConverter.getInstance(); + } + + @Override + protected SparkTransformConverter getSparkTransformConverter() { + return new SparkTransformConverter(false); + } + + @Override + protected SparkTypeConverter getSparkTypeConverter() { + return new SparkJdbcTypeConverter(); + } + + @Override + public void createNamespace(String[] namespace, Map<String, String> metadata) + throws NamespaceAlreadyExistsException { + Map<String, String> properties = Maps.newHashMap(); + if (!metadata.isEmpty()) { + metadata.forEach( + (k, v) -> { + switch (k) { + case SupportsNamespaces.PROP_COMMENT: + properties.put(k, v); + break; + case SupportsNamespaces.PROP_OWNER: + break; + case SupportsNamespaces.PROP_LOCATION: + throw new RuntimeException( + QueryCompilationErrors.cannotCreateJDBCNamespaceUsingProviderError()); + default: + throw new RuntimeException( + QueryCompilationErrors.cannotCreateJDBCNamespaceWithPropertyError(k)); + } + }); + } + super.createNamespace(namespace, properties); + } +} diff --git a/spark-connector/spark-common/src/main/java/org/apache/gravitino/spark/connector/jdbc/JdbcPropertiesConstants.java b/spark-connector/spark-common/src/main/java/org/apache/gravitino/spark/connector/jdbc/JdbcPropertiesConstants.java new file mode 100644 index 000000000..f1cf50f81 --- /dev/null +++ b/spark-connector/spark-common/src/main/java/org/apache/gravitino/spark/connector/jdbc/JdbcPropertiesConstants.java @@ -0,0 +1,33 @@ +/* + * Licensed to the Apache Software Foundation (ASF) under one + * or more contributor license agreements. See the NOTICE file + * distributed with this work for additional information + * regarding copyright ownership. The ASF licenses this file + * to you under the Apache License, Version 2.0 (the + * "License"); you may not use this file except in compliance + * with the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, + * software distributed under the License is distributed on an + * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY + * KIND, either express or implied. See the License for the + * specific language governing permissions and limitations + * under the License. + */ + +package org.apache.gravitino.spark.connector.jdbc; + +public class JdbcPropertiesConstants { + + public static final String GRAVITINO_JDBC_USER = "jdbc-user"; + public static final String GRAVITINO_JDBC_PASSWORD = "jdbc-password"; + public static final String GRAVITINO_JDBC_DRIVER = "jdbc-driver"; + public static final String GRAVITINO_JDBC_URL = "jdbc-url"; + + public static final String SPARK_JDBC_URL = "url"; + public static final String SPARK_JDBC_USER = "user"; + public static final String SPARK_JDBC_PASSWORD = "password"; + public static final String SPARK_JDBC_DRIVER = "driver"; +} diff --git a/spark-connector/spark-common/src/main/java/org/apache/gravitino/spark/connector/jdbc/JdbcPropertiesConverter.java b/spark-connector/spark-common/src/main/java/org/apache/gravitino/spark/connector/jdbc/JdbcPropertiesConverter.java new file mode 100644 index 000000000..7516646e3 --- /dev/null +++ b/spark-connector/spark-common/src/main/java/org/apache/gravitino/spark/connector/jdbc/JdbcPropertiesConverter.java @@ -0,0 +1,73 @@ +/* + * Licensed to the Apache Software Foundation (ASF) under one + * or more contributor license agreements. See the NOTICE file + * distributed with this work for additional information + * regarding copyright ownership. The ASF licenses this file + * to you under the Apache License, Version 2.0 (the + * "License"); you may not use this file except in compliance + * with the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, + * software distributed under the License is distributed on an + * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY + * KIND, either express or implied. See the License for the + * specific language governing permissions and limitations + * under the License. + */ + +package org.apache.gravitino.spark.connector.jdbc; + +import com.google.common.base.Preconditions; +import com.google.common.collect.ImmutableMap; +import java.util.HashMap; +import java.util.Map; +import org.apache.gravitino.spark.connector.PropertiesConverter; + +public class JdbcPropertiesConverter implements PropertiesConverter { + + public static class JdbcPropertiesConverterHolder { + private static final JdbcPropertiesConverter INSTANCE = new JdbcPropertiesConverter(); + } + + private JdbcPropertiesConverter() {} + + public static JdbcPropertiesConverter getInstance() { + return JdbcPropertiesConverterHolder.INSTANCE; + } + + private static final Map<String, String> GRAVITINO_CONFIG_TO_JDBC = + ImmutableMap.of( + JdbcPropertiesConstants.GRAVITINO_JDBC_URL, + JdbcPropertiesConstants.SPARK_JDBC_URL, + JdbcPropertiesConstants.GRAVITINO_JDBC_USER, + JdbcPropertiesConstants.SPARK_JDBC_USER, + JdbcPropertiesConstants.GRAVITINO_JDBC_PASSWORD, + JdbcPropertiesConstants.SPARK_JDBC_PASSWORD, + JdbcPropertiesConstants.GRAVITINO_JDBC_DRIVER, + JdbcPropertiesConstants.SPARK_JDBC_DRIVER); + + @Override + public Map<String, String> toSparkCatalogProperties(Map<String, String> properties) { + Preconditions.checkArgument(properties != null, "Jdbc Catalog properties should not be null"); + HashMap<String, String> jdbcProperties = new HashMap<>(); + properties.forEach( + (key, value) -> { + if (GRAVITINO_CONFIG_TO_JDBC.containsKey(key)) { + jdbcProperties.put(GRAVITINO_CONFIG_TO_JDBC.get(key), value); + } + }); + return jdbcProperties; + } + + @Override + public Map<String, String> toGravitinoTableProperties(Map<String, String> properties) { + return new HashMap<>(properties); + } + + @Override + public Map<String, String> toSparkTableProperties(Map<String, String> properties) { + return new HashMap<>(properties); + } +} diff --git a/spark-connector/spark-common/src/main/java/org/apache/gravitino/spark/connector/jdbc/SparkJdbcTable.java b/spark-connector/spark-common/src/main/java/org/apache/gravitino/spark/connector/jdbc/SparkJdbcTable.java new file mode 100644 index 000000000..3de807c36 --- /dev/null +++ b/spark-connector/spark-common/src/main/java/org/apache/gravitino/spark/connector/jdbc/SparkJdbcTable.java @@ -0,0 +1,71 @@ +/* + * Licensed to the Apache Software Foundation (ASF) under one + * or more contributor license agreements. See the NOTICE file + * distributed with this work for additional information + * regarding copyright ownership. The ASF licenses this file + * to you under the Apache License, Version 2.0 (the + * "License"); you may not use this file except in compliance + * with the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, + * software distributed under the License is distributed on an + * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY + * KIND, either express or implied. See the License for the + * specific language governing permissions and limitations + * under the License. + */ + +package org.apache.gravitino.spark.connector.jdbc; + +import java.util.Map; +import org.apache.gravitino.rel.Table; +import org.apache.gravitino.spark.connector.PropertiesConverter; +import org.apache.gravitino.spark.connector.SparkTransformConverter; +import org.apache.gravitino.spark.connector.SparkTypeConverter; +import org.apache.gravitino.spark.connector.utils.GravitinoTableInfoHelper; +import org.apache.spark.sql.connector.catalog.Identifier; +import org.apache.spark.sql.execution.datasources.v2.jdbc.JDBCTable; +import org.apache.spark.sql.execution.datasources.v2.jdbc.JDBCTableCatalog; +import org.apache.spark.sql.types.StructType; + +public class SparkJdbcTable extends JDBCTable { + + private GravitinoTableInfoHelper gravitinoTableInfoHelper; + + public SparkJdbcTable( + Identifier identifier, + Table gravitinoTable, + JDBCTable jdbcTable, + JDBCTableCatalog jdbcTableCatalog, + PropertiesConverter propertiesConverter, + SparkTransformConverter sparkTransformConverter, + SparkTypeConverter sparkTypeConverter) { + super(identifier, jdbcTable.schema(), jdbcTable.jdbcOptions()); + this.gravitinoTableInfoHelper = + new GravitinoTableInfoHelper( + false, + identifier, + gravitinoTable, + propertiesConverter, + sparkTransformConverter, + sparkTypeConverter); + } + + @Override + public String name() { + return gravitinoTableInfoHelper.name(); + } + + @Override + @SuppressWarnings("deprecation") + public StructType schema() { + return gravitinoTableInfoHelper.schema(); + } + + @Override + public Map<String, String> properties() { + return gravitinoTableInfoHelper.properties(); + } +} diff --git a/spark-connector/spark-common/src/main/java/org/apache/gravitino/spark/connector/jdbc/SparkJdbcTypeConverter.java b/spark-connector/spark-common/src/main/java/org/apache/gravitino/spark/connector/jdbc/SparkJdbcTypeConverter.java new file mode 100644 index 000000000..56e2734a7 --- /dev/null +++ b/spark-connector/spark-common/src/main/java/org/apache/gravitino/spark/connector/jdbc/SparkJdbcTypeConverter.java @@ -0,0 +1,40 @@ +/* + * Licensed to the Apache Software Foundation (ASF) under one + * or more contributor license agreements. See the NOTICE file + * distributed with this work for additional information + * regarding copyright ownership. The ASF licenses this file + * to you under the Apache License, Version 2.0 (the + * "License"); you may not use this file except in compliance + * with the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, + * software distributed under the License is distributed on an + * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY + * KIND, either express or implied. See the License for the + * specific language governing permissions and limitations + * under the License. + */ + +package org.apache.gravitino.spark.connector.jdbc; + +import org.apache.gravitino.rel.types.Type; +import org.apache.gravitino.rel.types.Types; +import org.apache.gravitino.spark.connector.SparkTypeConverter; +import org.apache.spark.sql.types.DataType; +import org.apache.spark.sql.types.DataTypes; + +public class SparkJdbcTypeConverter extends SparkTypeConverter { + + @Override + public DataType toSparkType(Type gravitinoType) { + // if spark version lower than 3.4.4, using VarCharType will throw an exception: Unsupported + // type varchar. + if (gravitinoType instanceof Types.VarCharType) { + return DataTypes.StringType; + } else { + return super.toSparkType(gravitinoType); + } + } +} diff --git a/spark-connector/spark-common/src/main/java/org/apache/gravitino/spark/connector/version/CatalogNameAdaptor.java b/spark-connector/spark-common/src/main/java/org/apache/gravitino/spark/connector/version/CatalogNameAdaptor.java index 9392feac2..9d8594b91 100644 --- a/spark-connector/spark-common/src/main/java/org/apache/gravitino/spark/connector/version/CatalogNameAdaptor.java +++ b/spark-connector/spark-common/src/main/java/org/apache/gravitino/spark/connector/version/CatalogNameAdaptor.java @@ -46,11 +46,23 @@ public class CatalogNameAdaptor { "lakehouse-paimon-3.5", "org.apache.gravitino.spark.connector.paimon.GravitinoPaimonCatalogSpark35"); + private static final Map<String, String> jdbcCatalogNames = + ImmutableMap.of( + "3.3", + "org.apache.gravitino.spark.connector.jdbc.GravitinoJdbcCatalogSpark33", + "3.4", + "org.apache.gravitino.spark.connector.jdbc.GravitinoJdbcCatalogSpark34", + "3.5", + "org.apache.gravitino.spark.connector.jdbc.GravitinoJdbcCatalogSpark35"); + private static String sparkVersion() { return package$.MODULE$.SPARK_VERSION(); } private static String getCatalogName(String provider, int majorVersion, int minorVersion) { + if (provider.startsWith("jdbc")) { + return jdbcCatalogNames.get(String.format("%d.%d", majorVersion, minorVersion)); + } String key = String.format("%s-%d.%d", provider.toLowerCase(Locale.ROOT), majorVersion, minorVersion); return catalogNames.get(key); diff --git a/spark-connector/spark-common/src/test/java/org/apache/gravitino/spark/connector/integration/test/SparkCommonIT.java b/spark-connector/spark-common/src/test/java/org/apache/gravitino/spark/connector/integration/test/SparkCommonIT.java index c7517a3bf..2eb9e7b9b 100644 --- a/spark-connector/spark-common/src/test/java/org/apache/gravitino/spark/connector/integration/test/SparkCommonIT.java +++ b/spark-connector/spark-common/src/test/java/org/apache/gravitino/spark/connector/integration/test/SparkCommonIT.java @@ -119,6 +119,14 @@ public abstract class SparkCommonIT extends SparkEnvIT { protected abstract boolean supportsReplaceColumns(); + protected abstract boolean supportsSchemaAndTableProperties(); + + protected abstract boolean supportsComplexType(); + + protected SparkTableInfoChecker getTableInfoChecker() { + return SparkTableInfoChecker.create(); + } + // Use a custom database not the original default database because SparkCommonIT couldn't // read&write data to tables in default database. The main reason is default database location is // determined by `hive.metastore.warehouse.dir` in hive-site.xml which is local HDFS address @@ -189,6 +197,7 @@ public abstract class SparkCommonIT extends SparkEnvIT { } @Test + @EnabledIf("supportsSchemaAndTableProperties") protected void testCreateAndLoadSchema() { String testDatabaseName = "t_create1"; dropDatabaseIfExists(testDatabaseName); @@ -218,6 +227,7 @@ public abstract class SparkCommonIT extends SparkEnvIT { } @Test + @EnabledIf("supportsSchemaAndTableProperties") protected void testAlterSchema() { String testDatabaseName = "t_alter"; dropDatabaseIfExists(testDatabaseName); @@ -266,7 +276,7 @@ public abstract class SparkCommonIT extends SparkEnvIT { SparkTableInfo tableInfo = getTableInfo(tableName); SparkTableInfoChecker checker = - SparkTableInfoChecker.create() + getTableInfoChecker() .withName(tableName) .withColumns(getSimpleTableColumn()) .withComment(null); @@ -287,7 +297,7 @@ public abstract class SparkCommonIT extends SparkEnvIT { createSimpleTable(tableIdentifier); SparkTableInfo tableInfo = getTableInfo(tableIdentifier); SparkTableInfoChecker checker = - SparkTableInfoChecker.create().withName(tableName).withColumns(getSimpleTableColumn()); + getTableInfoChecker().withName(tableName).withColumns(getSimpleTableColumn()); checker.check(tableInfo); checkTableReadWrite(tableInfo); @@ -300,8 +310,7 @@ public abstract class SparkCommonIT extends SparkEnvIT { dropTableIfExists(tableName); createSimpleTable(tableName); tableInfo = getTableInfo(tableName); - checker = - SparkTableInfoChecker.create().withName(tableName).withColumns(getSimpleTableColumn()); + checker = getTableInfoChecker().withName(tableName).withColumns(getSimpleTableColumn()); checker.check(tableInfo); checkTableReadWrite(tableInfo); } @@ -317,7 +326,7 @@ public abstract class SparkCommonIT extends SparkEnvIT { SparkTableInfo tableInfo = getTableInfo(tableName); SparkTableInfoChecker checker = - SparkTableInfoChecker.create() + getTableInfoChecker() .withName(tableName) .withColumns(getSimpleTableColumn()) .withComment(tableComment); @@ -396,6 +405,7 @@ public abstract class SparkCommonIT extends SparkEnvIT { } @Test + @EnabledIf("supportsSchemaAndTableProperties") void testAlterTableSetAndRemoveProperty() { String tableName = "test_property"; dropTableIfExists(tableName); @@ -425,8 +435,7 @@ public abstract class SparkCommonIT extends SparkEnvIT { "ALTER TABLE %s SET TBLPROPERTIES('%s'='%s')", tableName, ConnectorConstants.COMMENT, comment)); SparkTableInfo tableInfo = getTableInfo(tableName); - SparkTableInfoChecker checker = - SparkTableInfoChecker.create().withName(tableName).withComment(comment); + SparkTableInfoChecker checker = getTableInfoChecker().withName(tableName).withComment(comment); checker.check(tableInfo); } @@ -593,6 +602,7 @@ public abstract class SparkCommonIT extends SparkEnvIT { } @Test + @EnabledIf("supportsComplexType") void testComplexType() { String tableName = "complex_type_table"; dropTableIfExists(tableName); @@ -632,7 +642,7 @@ public abstract class SparkCommonIT extends SparkEnvIT { sql(createTableSQL); SparkTableInfo tableInfo = getTableInfo(tableName); SparkTableInfoChecker checker = - SparkTableInfoChecker.create() + getTableInfoChecker() .withName(tableName) .withColumns(getSimpleTableColumn()) .withIdentifyPartition(Arrays.asList("name", "age")); @@ -652,7 +662,7 @@ public abstract class SparkCommonIT extends SparkEnvIT { sql(createTableSQL); SparkTableInfo tableInfo = getTableInfo(tableName); SparkTableInfoChecker checker = - SparkTableInfoChecker.create() + getTableInfoChecker() .withName(tableName) .withColumns(getSimpleTableColumn()) .withBucket(4, Arrays.asList("id", "name")); @@ -672,7 +682,7 @@ public abstract class SparkCommonIT extends SparkEnvIT { sql(createTableSQL); SparkTableInfo tableInfo = getTableInfo(tableName); SparkTableInfoChecker checker = - SparkTableInfoChecker.create() + getTableInfoChecker() .withName(tableName) .withColumns(getSimpleTableColumn()) .withBucket(4, Arrays.asList("id", "name"), Arrays.asList("name", "id")); @@ -695,7 +705,7 @@ public abstract class SparkCommonIT extends SparkEnvIT { SparkTableInfo newTableInfo = getTableInfo(newTableName); SparkTableInfoChecker checker = - SparkTableInfoChecker.create().withName(newTableName).withColumns(getSimpleTableColumn()); + getTableInfoChecker().withName(newTableName).withColumns(getSimpleTableColumn()); checker.check(newTableInfo); List<String> tableData = getTableData(newTableName); @@ -797,6 +807,7 @@ public abstract class SparkCommonIT extends SparkEnvIT { } @Test + @EnabledIf("supportsSchemaAndTableProperties") void testTableOptions() { String tableName = "options_table"; dropTableIfExists(tableName); @@ -806,7 +817,7 @@ public abstract class SparkCommonIT extends SparkEnvIT { SparkTableInfo tableInfo = getTableInfo(tableName); SparkTableInfoChecker checker = - SparkTableInfoChecker.create() + getTableInfoChecker() .withName(tableName) .withTableProperties(ImmutableMap.of(TableCatalog.OPTION_PREFIX + "a", "b")); checker.check(tableInfo); @@ -983,7 +994,7 @@ public abstract class SparkCommonIT extends SparkEnvIT { protected void checkTableColumns( String tableName, List<SparkColumnInfo> columns, SparkTableInfo tableInfo) { - SparkTableInfoChecker.create() + getTableInfoChecker() .withName(tableName) .withColumns(columns) .withComment(null) diff --git a/spark-connector/spark-common/src/test/java/org/apache/gravitino/spark/connector/integration/test/SparkEnvIT.java b/spark-connector/spark-common/src/test/java/org/apache/gravitino/spark/connector/integration/test/SparkEnvIT.java index b534a9772..5bcdc9a2c 100644 --- a/spark-connector/spark-common/src/test/java/org/apache/gravitino/spark/connector/integration/test/SparkEnvIT.java +++ b/spark-connector/spark-common/src/test/java/org/apache/gravitino/spark/connector/integration/test/SparkEnvIT.java @@ -82,6 +82,7 @@ public abstract class SparkEnvIT extends SparkUtilIT { if (lakeHouseIcebergProvider.equalsIgnoreCase(getProvider())) { initIcebergRestServiceEnv(); } + initCatalogEnv(); // Start Gravitino server super.startIntegrationTest(); initHdfsFileSystem(); @@ -151,6 +152,8 @@ public abstract class SparkEnvIT extends SparkUtilIT { HiveContainer.HDFS_DEFAULTFS_PORT); } + protected void initCatalogEnv() throws Exception {} + private void initIcebergRestServiceEnv() { ignoreIcebergRestService = false; Map<String, String> icebergRestServiceConfigs = new HashMap<>(); diff --git a/spark-connector/spark-common/src/test/java/org/apache/gravitino/spark/connector/integration/test/hive/SparkHiveCatalogIT.java b/spark-connector/spark-common/src/test/java/org/apache/gravitino/spark/connector/integration/test/hive/SparkHiveCatalogIT.java index b95882a0d..6ed8e12d6 100644 --- a/spark-connector/spark-common/src/test/java/org/apache/gravitino/spark/connector/integration/test/hive/SparkHiveCatalogIT.java +++ b/spark-connector/spark-common/src/test/java/org/apache/gravitino/spark/connector/integration/test/hive/SparkHiveCatalogIT.java @@ -84,6 +84,16 @@ public abstract class SparkHiveCatalogIT extends SparkCommonIT { return true; } + @Override + protected boolean supportsSchemaAndTableProperties() { + return true; + } + + @Override + protected boolean supportsComplexType() { + return true; + } + @Test void testCreateHiveFormatPartitionTable() { String tableName = "hive_partition_table"; diff --git a/spark-connector/spark-common/src/test/java/org/apache/gravitino/spark/connector/integration/test/iceberg/SparkIcebergCatalogIT.java b/spark-connector/spark-common/src/test/java/org/apache/gravitino/spark/connector/integration/test/iceberg/SparkIcebergCatalogIT.java index f5fd337a1..291f8f25d 100644 --- a/spark-connector/spark-common/src/test/java/org/apache/gravitino/spark/connector/integration/test/iceberg/SparkIcebergCatalogIT.java +++ b/spark-connector/spark-common/src/test/java/org/apache/gravitino/spark/connector/integration/test/iceberg/SparkIcebergCatalogIT.java @@ -109,6 +109,16 @@ public abstract class SparkIcebergCatalogIT extends SparkCommonIT { return true; } + @Override + protected boolean supportsSchemaAndTableProperties() { + return true; + } + + @Override + protected boolean supportsComplexType() { + return true; + } + @Override protected String getTableLocation(SparkTableInfo table) { return String.join(File.separator, table.getTableLocation(), "data"); diff --git a/spark-connector/spark-common/src/test/java/org/apache/gravitino/spark/connector/integration/test/jdbc/SparkJdbcMysqlCatalogIT.java b/spark-connector/spark-common/src/test/java/org/apache/gravitino/spark/connector/integration/test/jdbc/SparkJdbcMysqlCatalogIT.java new file mode 100644 index 000000000..1b77047fa --- /dev/null +++ b/spark-connector/spark-common/src/test/java/org/apache/gravitino/spark/connector/integration/test/jdbc/SparkJdbcMysqlCatalogIT.java @@ -0,0 +1,108 @@ +/* + * Licensed to the Apache Software Foundation (ASF) under one + * or more contributor license agreements. See the NOTICE file + * distributed with this work for additional information + * regarding copyright ownership. The ASF licenses this file + * to you under the Apache License, Version 2.0 (the + * "License"); you may not use this file except in compliance + * with the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, + * software distributed under the License is distributed on an + * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY + * KIND, either express or implied. See the License for the + * specific language governing permissions and limitations + * under the License. + */ +package org.apache.gravitino.spark.connector.integration.test.jdbc; + +import static org.apache.gravitino.integration.test.util.TestDatabaseName.MYSQL_CATALOG_MYSQL_IT; + +import com.google.common.collect.Maps; +import java.util.Map; +import org.apache.gravitino.integration.test.container.ContainerSuite; +import org.apache.gravitino.spark.connector.integration.test.SparkCommonIT; +import org.apache.gravitino.spark.connector.integration.test.util.SparkTableInfoChecker; +import org.apache.gravitino.spark.connector.jdbc.JdbcPropertiesConstants; +import org.junit.jupiter.api.Tag; + +@Tag("gravitino-docker-test") +public abstract class SparkJdbcMysqlCatalogIT extends SparkCommonIT { + + protected String mysqlUrl; + protected String mysqlUsername; + protected String mysqlPassword; + protected String mysqlDriver; + + @Override + protected boolean supportsSparkSQLClusteredBy() { + return false; + } + + @Override + protected boolean supportsPartition() { + return false; + } + + @Override + protected boolean supportsDelete() { + return false; + } + + @Override + protected boolean supportsSchemaEvolution() { + return false; + } + + @Override + protected boolean supportsReplaceColumns() { + return false; + } + + @Override + protected boolean supportsSchemaAndTableProperties() { + return false; + } + + @Override + protected boolean supportsComplexType() { + return false; + } + + @Override + protected String getCatalogName() { + return "jdbc_mysql"; + } + + @Override + protected String getProvider() { + return "jdbc-mysql"; + } + + @Override + protected SparkTableInfoChecker getTableInfoChecker() { + return SparkJdbcTableInfoChecker.create(); + } + + @Override + protected void initCatalogEnv() throws Exception { + ContainerSuite containerSuite = ContainerSuite.getInstance(); + containerSuite.startMySQLContainer(MYSQL_CATALOG_MYSQL_IT); + mysqlUrl = containerSuite.getMySQLContainer().getJdbcUrl(); + mysqlUsername = containerSuite.getMySQLContainer().getUsername(); + mysqlPassword = containerSuite.getMySQLContainer().getPassword(); + mysqlDriver = containerSuite.getMySQLContainer().getDriverClassName(MYSQL_CATALOG_MYSQL_IT); + } + + @Override + protected Map<String, String> getCatalogConfigs() { + Map<String, String> catalogProperties = Maps.newHashMap(); + catalogProperties.put(JdbcPropertiesConstants.GRAVITINO_JDBC_URL, mysqlUrl); + catalogProperties.put(JdbcPropertiesConstants.GRAVITINO_JDBC_USER, mysqlUsername); + catalogProperties.put(JdbcPropertiesConstants.GRAVITINO_JDBC_PASSWORD, mysqlPassword); + catalogProperties.put(JdbcPropertiesConstants.GRAVITINO_JDBC_DRIVER, mysqlDriver); + return catalogProperties; + } +} diff --git a/spark-connector/spark-common/src/test/java/org/apache/gravitino/spark/connector/integration/test/jdbc/SparkJdbcTableInfoChecker.java b/spark-connector/spark-common/src/test/java/org/apache/gravitino/spark/connector/integration/test/jdbc/SparkJdbcTableInfoChecker.java new file mode 100644 index 000000000..32a66923c --- /dev/null +++ b/spark-connector/spark-common/src/test/java/org/apache/gravitino/spark/connector/integration/test/jdbc/SparkJdbcTableInfoChecker.java @@ -0,0 +1,55 @@ +/* + * Licensed to the Apache Software Foundation (ASF) under one + * or more contributor license agreements. See the NOTICE file + * distributed with this work for additional information + * regarding copyright ownership. The ASF licenses this file + * to you under the Apache License, Version 2.0 (the + * "License"); you may not use this file except in compliance + * with the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, + * software distributed under the License is distributed on an + * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY + * KIND, either express or implied. See the License for the + * specific language governing permissions and limitations + * under the License. + */ + +package org.apache.gravitino.spark.connector.integration.test.jdbc; + +import java.util.List; +import java.util.stream.Collectors; +import org.apache.commons.lang3.StringUtils; +import org.apache.gravitino.spark.connector.integration.test.util.SparkTableInfo; +import org.apache.gravitino.spark.connector.integration.test.util.SparkTableInfoChecker; + +public class SparkJdbcTableInfoChecker extends SparkTableInfoChecker { + + public static SparkJdbcTableInfoChecker create() { + return new SparkJdbcTableInfoChecker(); + } + + // Spark jdbc table cannot distinguish between comment=null and comment="" + @Override + public SparkTableInfoChecker withColumns(List<SparkTableInfo.SparkColumnInfo> columns) { + getExpectedTableInfo() + .setColumns( + columns.stream() + .peek( + column -> + column.setComment( + StringUtils.isEmpty(column.getComment()) ? null : column.getComment())) + .collect(Collectors.toList())); + getCheckFields().add(CheckField.COLUMN); + return this; + } + + @Override + public SparkTableInfoChecker withComment(String comment) { + getExpectedTableInfo().setComment(StringUtils.isEmpty(comment) ? "" : comment); + getCheckFields().add(CheckField.COMMENT); + return this; + } +} diff --git a/spark-connector/spark-common/src/test/java/org/apache/gravitino/spark/connector/integration/test/paimon/SparkPaimonCatalogIT.java b/spark-connector/spark-common/src/test/java/org/apache/gravitino/spark/connector/integration/test/paimon/SparkPaimonCatalogIT.java index 9d0364828..40afa0608 100644 --- a/spark-connector/spark-common/src/test/java/org/apache/gravitino/spark/connector/integration/test/paimon/SparkPaimonCatalogIT.java +++ b/spark-connector/spark-common/src/test/java/org/apache/gravitino/spark/connector/integration/test/paimon/SparkPaimonCatalogIT.java @@ -63,6 +63,16 @@ public abstract class SparkPaimonCatalogIT extends SparkCommonIT { return true; } + @Override + protected boolean supportsSchemaAndTableProperties() { + return true; + } + + @Override + protected boolean supportsComplexType() { + return true; + } + @Override protected boolean supportsReplaceColumns() { // Paimon doesn't support replace columns, because it doesn't support drop all fields in table. diff --git a/spark-connector/spark-common/src/test/java/org/apache/gravitino/spark/connector/integration/test/util/SparkTableInfo.java b/spark-connector/spark-common/src/test/java/org/apache/gravitino/spark/connector/integration/test/util/SparkTableInfo.java index 077936c29..74b3ea096 100644 --- a/spark-connector/spark-common/src/test/java/org/apache/gravitino/spark/connector/integration/test/util/SparkTableInfo.java +++ b/spark-connector/spark-common/src/test/java/org/apache/gravitino/spark/connector/integration/test/util/SparkTableInfo.java @@ -31,6 +31,7 @@ import org.apache.commons.lang3.StringUtils; import org.apache.gravitino.spark.connector.ConnectorConstants; import org.apache.gravitino.spark.connector.hive.SparkHiveTable; import org.apache.gravitino.spark.connector.iceberg.SparkIcebergTable; +import org.apache.gravitino.spark.connector.jdbc.SparkJdbcTable; import org.apache.gravitino.spark.connector.paimon.SparkPaimonTable; import org.apache.spark.sql.connector.catalog.SupportsMetadataColumns; import org.apache.spark.sql.connector.catalog.Table; @@ -193,6 +194,8 @@ public class SparkTableInfo { return ((SparkIcebergTable) baseTable).schema(); } else if (baseTable instanceof SparkPaimonTable) { return ((SparkPaimonTable) baseTable).schema(); + } else if (baseTable instanceof SparkJdbcTable) { + return ((SparkJdbcTable) baseTable).schema(); } else { throw new IllegalArgumentException( "Doesn't support Spark table: " + baseTable.getClass().getName()); diff --git a/spark-connector/spark-common/src/test/java/org/apache/gravitino/spark/connector/integration/test/util/SparkTableInfoChecker.java b/spark-connector/spark-common/src/test/java/org/apache/gravitino/spark/connector/integration/test/util/SparkTableInfoChecker.java index 33a6a3568..bd7164af7 100644 --- a/spark-connector/spark-common/src/test/java/org/apache/gravitino/spark/connector/integration/test/util/SparkTableInfoChecker.java +++ b/spark-connector/spark-common/src/test/java/org/apache/gravitino/spark/connector/integration/test/util/SparkTableInfoChecker.java @@ -23,6 +23,7 @@ import java.util.LinkedHashSet; import java.util.List; import java.util.Map; import java.util.Set; +import lombok.Data; import org.apache.gravitino.spark.connector.SparkTransformConverter; import org.apache.gravitino.spark.connector.integration.test.util.SparkTableInfo.SparkColumnInfo; import org.apache.spark.sql.connector.expressions.Expressions; @@ -34,17 +35,18 @@ import org.junit.jupiter.api.Assertions; * To create an expected SparkTableInfo for verifying the SQL execution result, only the explicitly * set fields will be checked. */ +@Data public class SparkTableInfoChecker { private SparkTableInfo expectedTableInfo = new SparkTableInfo(); private Set<CheckField> checkFields = new LinkedHashSet<>(); - private SparkTableInfoChecker() {} + protected SparkTableInfoChecker() {} public static SparkTableInfoChecker create() { return new SparkTableInfoChecker(); } - private enum CheckField { + protected enum CheckField { NAME, COLUMN, PARTITION, diff --git a/spark-connector/spark-common/src/test/java/org/apache/gravitino/spark/connector/integration/test/util/SparkUtilIT.java b/spark-connector/spark-common/src/test/java/org/apache/gravitino/spark/connector/integration/test/util/SparkUtilIT.java index ed7d2085f..5c188f580 100644 --- a/spark-connector/spark-common/src/test/java/org/apache/gravitino/spark/connector/integration/test/util/SparkUtilIT.java +++ b/spark-connector/spark-common/src/test/java/org/apache/gravitino/spark/connector/integration/test/util/SparkUtilIT.java @@ -77,7 +77,7 @@ public abstract class SparkUtilIT extends BaseIT { // However, Paimon does not support create a database with a specified location. protected void createDatabaseIfNotExists(String database, String provider) { String locationClause = - "lakehouse-paimon".equalsIgnoreCase(provider) + "lakehouse-paimon".equalsIgnoreCase(provider) || provider.startsWith("jdbc") ? "" : String.format("LOCATION '/user/hive/%s'", database); sql(String.format("CREATE DATABASE IF NOT EXISTS %s %s", database, locationClause)); diff --git a/spark-connector/spark-common/src/test/java/org/apache/gravitino/spark/connector/jdbc/TestJdbcPropertiesConverter.java b/spark-connector/spark-common/src/test/java/org/apache/gravitino/spark/connector/jdbc/TestJdbcPropertiesConverter.java new file mode 100644 index 000000000..5d3e4d065 --- /dev/null +++ b/spark-connector/spark-common/src/test/java/org/apache/gravitino/spark/connector/jdbc/TestJdbcPropertiesConverter.java @@ -0,0 +1,62 @@ +/* + * Licensed to the Apache Software Foundation (ASF) under one + * or more contributor license agreements. See the NOTICE file + * distributed with this work for additional information + * regarding copyright ownership. The ASF licenses this file + * to you under the Apache License, Version 2.0 (the + * "License"); you may not use this file except in compliance + * with the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, + * software distributed under the License is distributed on an + * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY + * KIND, either express or implied. See the License for the + * specific language governing permissions and limitations + * under the License. + */ + +package org.apache.gravitino.spark.connector.jdbc; + +import com.google.common.collect.ImmutableMap; +import java.util.Map; +import org.junit.jupiter.api.Assertions; +import org.junit.jupiter.api.Test; + +public class TestJdbcPropertiesConverter { + private final JdbcPropertiesConverter jdbcPropertiesConverter = + JdbcPropertiesConverter.getInstance(); + + @Test + void testCatalogProperties() { + String url = "jdbc-url"; + String user = "user1"; + String passwd = "passwd1"; + String driver = "jdbc-driver"; + Map<String, String> properties = + jdbcPropertiesConverter.toSparkCatalogProperties( + ImmutableMap.of( + JdbcPropertiesConstants.GRAVITINO_JDBC_URL, + url, + JdbcPropertiesConstants.GRAVITINO_JDBC_USER, + user, + JdbcPropertiesConstants.GRAVITINO_JDBC_PASSWORD, + passwd, + JdbcPropertiesConstants.GRAVITINO_JDBC_DRIVER, + driver, + "key1", + "value1")); + Assertions.assertEquals( + ImmutableMap.of( + JdbcPropertiesConstants.SPARK_JDBC_URL, + url, + JdbcPropertiesConstants.SPARK_JDBC_USER, + user, + JdbcPropertiesConstants.SPARK_JDBC_PASSWORD, + passwd, + JdbcPropertiesConstants.SPARK_JDBC_DRIVER, + driver), + properties); + } +} diff --git a/spark-connector/v3.3/spark/build.gradle.kts b/spark-connector/v3.3/spark/build.gradle.kts index 66c65f863..6b633434e 100644 --- a/spark-connector/v3.3/spark/build.gradle.kts +++ b/spark-connector/v3.3/spark/build.gradle.kts @@ -52,6 +52,9 @@ dependencies { exclude("org.apache.logging.log4j") exclude("org.slf4j") } + testImplementation(project(":catalogs:catalog-jdbc-common")) { + exclude("org.apache.logging.log4j") + } testImplementation(project(":catalogs:hive-metastore-common")) { exclude("*") } @@ -163,6 +166,7 @@ tasks.test { dependsOn(":catalogs:catalog-hive:jar") dependsOn(":iceberg:iceberg-rest-server:jar") dependsOn(":catalogs:catalog-lakehouse-paimon:jar") + dependsOn(":catalogs:catalog-jdbc-mysql:jar") } } diff --git a/spark-connector/v3.3/spark/src/main/java/org/apache/gravitino/spark/connector/jdbc/GravitinoJdbcCatalogSpark33.java b/spark-connector/v3.3/spark/src/main/java/org/apache/gravitino/spark/connector/jdbc/GravitinoJdbcCatalogSpark33.java new file mode 100644 index 000000000..d322cd82c --- /dev/null +++ b/spark-connector/v3.3/spark/src/main/java/org/apache/gravitino/spark/connector/jdbc/GravitinoJdbcCatalogSpark33.java @@ -0,0 +1,22 @@ +/* + * Licensed to the Apache Software Foundation (ASF) under one + * or more contributor license agreements. See the NOTICE file + * distributed with this work for additional information + * regarding copyright ownership. The ASF licenses this file + * to you under the Apache License, Version 2.0 (the + * "License"); you may not use this file except in compliance + * with the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, + * software distributed under the License is distributed on an + * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY + * KIND, either express or implied. See the License for the + * specific language governing permissions and limitations + * under the License. + */ + +package org.apache.gravitino.spark.connector.jdbc; + +public class GravitinoJdbcCatalogSpark33 extends GravitinoJdbcCatalog {} diff --git a/spark-connector/v3.3/spark/src/test/java/org/apache/gravitino/spark/connector/integration/test/jdbc/SparkJdbcMysqlCatalogIT33.java b/spark-connector/v3.3/spark/src/test/java/org/apache/gravitino/spark/connector/integration/test/jdbc/SparkJdbcMysqlCatalogIT33.java new file mode 100644 index 000000000..cf190cfd4 --- /dev/null +++ b/spark-connector/v3.3/spark/src/test/java/org/apache/gravitino/spark/connector/integration/test/jdbc/SparkJdbcMysqlCatalogIT33.java @@ -0,0 +1,36 @@ +/* + * Licensed to the Apache Software Foundation (ASF) under one + * or more contributor license agreements. See the NOTICE file + * distributed with this work for additional information + * regarding copyright ownership. The ASF licenses this file + * to you under the Apache License, Version 2.0 (the + * "License"); you may not use this file except in compliance + * with the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, + * software distributed under the License is distributed on an + * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY + * KIND, either express or implied. See the License for the + * specific language governing permissions and limitations + * under the License. + */ + +package org.apache.gravitino.spark.connector.integration.test.jdbc; + +import org.apache.gravitino.spark.connector.jdbc.GravitinoJdbcCatalogSpark33; +import org.junit.jupiter.api.Assertions; +import org.junit.jupiter.api.Test; + +public class SparkJdbcMysqlCatalogIT33 extends SparkJdbcMysqlCatalogIT { + @Test + void testCatalogClassName() { + String catalogClass = + getSparkSession() + .sessionState() + .conf() + .getConfString("spark.sql.catalog." + getCatalogName()); + Assertions.assertEquals(GravitinoJdbcCatalogSpark33.class.getName(), catalogClass); + } +} diff --git a/spark-connector/v3.4/spark/build.gradle.kts b/spark-connector/v3.4/spark/build.gradle.kts index aa4134a3c..08ab9ca9c 100644 --- a/spark-connector/v3.4/spark/build.gradle.kts +++ b/spark-connector/v3.4/spark/build.gradle.kts @@ -53,6 +53,9 @@ dependencies { exclude("org.apache.logging.log4j") exclude("org.slf4j") } + testImplementation(project(":catalogs:catalog-jdbc-common")) { + exclude("org.apache.logging.log4j") + } testImplementation(project(":catalogs:hive-metastore-common")) { exclude("*") } @@ -163,6 +166,7 @@ tasks.test { dependsOn(":catalogs:catalog-hive:jar") dependsOn(":iceberg:iceberg-rest-server:jar") dependsOn(":catalogs:catalog-lakehouse-paimon:jar") + dependsOn(":catalogs:catalog-jdbc-mysql:jar") } } diff --git a/spark-connector/v3.4/spark/src/main/java/org/apache/gravitino/spark/connector/jdbc/GravitinoJdbcCatalogSpark34.java b/spark-connector/v3.4/spark/src/main/java/org/apache/gravitino/spark/connector/jdbc/GravitinoJdbcCatalogSpark34.java new file mode 100644 index 000000000..e9c091c18 --- /dev/null +++ b/spark-connector/v3.4/spark/src/main/java/org/apache/gravitino/spark/connector/jdbc/GravitinoJdbcCatalogSpark34.java @@ -0,0 +1,38 @@ +/* + * Licensed to the Apache Software Foundation (ASF) under one + * or more contributor license agreements. See the NOTICE file + * distributed with this work for additional information + * regarding copyright ownership. The ASF licenses this file + * to you under the Apache License, Version 2.0 (the + * "License"); you may not use this file except in compliance + * with the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, + * software distributed under the License is distributed on an + * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY + * KIND, either express or implied. See the License for the + * specific language governing permissions and limitations + * under the License. + */ + +package org.apache.gravitino.spark.connector.jdbc; + +import org.apache.gravitino.spark.connector.SparkTableChangeConverter; +import org.apache.gravitino.spark.connector.SparkTableChangeConverter34; +import org.apache.gravitino.spark.connector.SparkTypeConverter; + +public class GravitinoJdbcCatalogSpark34 extends GravitinoJdbcCatalog { + + @Override + protected SparkTypeConverter getSparkTypeConverter() { + return new SparkJdbcTypeConverter34(); + } + + @Override + protected SparkTableChangeConverter getSparkTableChangeConverter( + SparkTypeConverter sparkTypeConverter) { + return new SparkTableChangeConverter34(sparkTypeConverter); + } +} diff --git a/spark-connector/v3.4/spark/src/main/java/org/apache/gravitino/spark/connector/jdbc/SparkJdbcTypeConverter34.java b/spark-connector/v3.4/spark/src/main/java/org/apache/gravitino/spark/connector/jdbc/SparkJdbcTypeConverter34.java new file mode 100644 index 000000000..bbd32e022 --- /dev/null +++ b/spark-connector/v3.4/spark/src/main/java/org/apache/gravitino/spark/connector/jdbc/SparkJdbcTypeConverter34.java @@ -0,0 +1,39 @@ +/* + * Licensed to the Apache Software Foundation (ASF) under one + * or more contributor license agreements. See the NOTICE file + * distributed with this work for additional information + * regarding copyright ownership. The ASF licenses this file + * to you under the Apache License, Version 2.0 (the + * "License"); you may not use this file except in compliance + * with the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, + * software distributed under the License is distributed on an + * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY + * KIND, either express or implied. See the License for the + * specific language governing permissions and limitations + * under the License. + */ + +package org.apache.gravitino.spark.connector.jdbc; + +import org.apache.gravitino.rel.types.Type; +import org.apache.gravitino.rel.types.Types; +import org.apache.gravitino.spark.connector.SparkTypeConverter34; +import org.apache.spark.sql.types.DataType; +import org.apache.spark.sql.types.DataTypes; + +public class SparkJdbcTypeConverter34 extends SparkTypeConverter34 { + @Override + public DataType toSparkType(Type gravitinoType) { + // if spark version lower than 3.4.4, using VarCharType will throw an exception: Unsupported + // type varchar. + if (gravitinoType instanceof Types.VarCharType) { + return DataTypes.StringType; + } else { + return super.toSparkType(gravitinoType); + } + } +} diff --git a/spark-connector/v3.4/spark/src/test/java/org/apache/gravitino/spark/connector/integration/test/jdbc/SparkJdbcMysqlCatalogIT34.java b/spark-connector/v3.4/spark/src/test/java/org/apache/gravitino/spark/connector/integration/test/jdbc/SparkJdbcMysqlCatalogIT34.java new file mode 100644 index 000000000..9a4038404 --- /dev/null +++ b/spark-connector/v3.4/spark/src/test/java/org/apache/gravitino/spark/connector/integration/test/jdbc/SparkJdbcMysqlCatalogIT34.java @@ -0,0 +1,35 @@ +/* + * Licensed to the Apache Software Foundation (ASF) under one + * or more contributor license agreements. See the NOTICE file + * distributed with this work for additional information + * regarding copyright ownership. The ASF licenses this file + * to you under the Apache License, Version 2.0 (the + * "License"); you may not use this file except in compliance + * with the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, + * software distributed under the License is distributed on an + * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY + * KIND, either express or implied. See the License for the + * specific language governing permissions and limitations + * under the License. + */ +package org.apache.gravitino.spark.connector.integration.test.jdbc; + +import org.apache.gravitino.spark.connector.jdbc.GravitinoJdbcCatalogSpark34; +import org.junit.jupiter.api.Assertions; +import org.junit.jupiter.api.Test; + +public class SparkJdbcMysqlCatalogIT34 extends SparkJdbcMysqlCatalogIT { + @Test + void testCatalogClassName() { + String catalogClass = + getSparkSession() + .sessionState() + .conf() + .getConfString("spark.sql.catalog." + getCatalogName()); + Assertions.assertEquals(GravitinoJdbcCatalogSpark34.class.getName(), catalogClass); + } +} diff --git a/spark-connector/v3.5/spark/build.gradle.kts b/spark-connector/v3.5/spark/build.gradle.kts index 15aa01808..782d514ae 100644 --- a/spark-connector/v3.5/spark/build.gradle.kts +++ b/spark-connector/v3.5/spark/build.gradle.kts @@ -53,6 +53,9 @@ dependencies { testImplementation(project(":api")) { exclude("org.apache.logging.log4j") } + testImplementation(project(":catalogs:catalog-jdbc-common")) { + exclude("org.apache.logging.log4j") + } testImplementation(project(":catalogs:hive-metastore-common")) { exclude("*") } @@ -165,6 +168,7 @@ tasks.test { dependsOn(":catalogs:catalog-hive:jar") dependsOn(":iceberg:iceberg-rest-server:jar") dependsOn(":catalogs:catalog-lakehouse-paimon:jar") + dependsOn(":catalogs:catalog-jdbc-mysql:jar") } } diff --git a/spark-connector/v3.5/spark/src/main/java/org/apache/gravitino/spark/connector/jdbc/GravitinoJdbcCatalogSpark35.java b/spark-connector/v3.5/spark/src/main/java/org/apache/gravitino/spark/connector/jdbc/GravitinoJdbcCatalogSpark35.java new file mode 100644 index 000000000..1b10d63fa --- /dev/null +++ b/spark-connector/v3.5/spark/src/main/java/org/apache/gravitino/spark/connector/jdbc/GravitinoJdbcCatalogSpark35.java @@ -0,0 +1,39 @@ +/* + * Licensed to the Apache Software Foundation (ASF) under one + * or more contributor license agreements. See the NOTICE file + * distributed with this work for additional information + * regarding copyright ownership. The ASF licenses this file + * to you under the Apache License, Version 2.0 (the + * "License"); you may not use this file except in compliance + * with the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, + * software distributed under the License is distributed on an + * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY + * KIND, either express or implied. See the License for the + * specific language governing permissions and limitations + * under the License. + */ + +package org.apache.gravitino.spark.connector.jdbc; + +import org.apache.gravitino.spark.connector.SparkTableChangeConverter; +import org.apache.gravitino.spark.connector.SparkTableChangeConverter34; +import org.apache.gravitino.spark.connector.SparkTypeConverter; +import org.apache.gravitino.spark.connector.SparkTypeConverter34; + +public class GravitinoJdbcCatalogSpark35 extends GravitinoJdbcCatalog { + + @Override + protected SparkTypeConverter getSparkTypeConverter() { + return new SparkTypeConverter34(); + } + + @Override + protected SparkTableChangeConverter getSparkTableChangeConverter( + SparkTypeConverter sparkTypeConverter) { + return new SparkTableChangeConverter34(sparkTypeConverter); + } +} diff --git a/spark-connector/v3.5/spark/src/test/java/org/apache/gravitino/spark/connector/integration/test/jdbc/SparkJdbcMysqlCatalogIT35.java b/spark-connector/v3.5/spark/src/test/java/org/apache/gravitino/spark/connector/integration/test/jdbc/SparkJdbcMysqlCatalogIT35.java new file mode 100644 index 000000000..00c14e40d --- /dev/null +++ b/spark-connector/v3.5/spark/src/test/java/org/apache/gravitino/spark/connector/integration/test/jdbc/SparkJdbcMysqlCatalogIT35.java @@ -0,0 +1,36 @@ +/* + * Licensed to the Apache Software Foundation (ASF) under one + * or more contributor license agreements. See the NOTICE file + * distributed with this work for additional information + * regarding copyright ownership. The ASF licenses this file + * to you under the Apache License, Version 2.0 (the + * "License"); you may not use this file except in compliance + * with the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, + * software distributed under the License is distributed on an + * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY + * KIND, either express or implied. See the License for the + * specific language governing permissions and limitations + * under the License. + */ + +package org.apache.gravitino.spark.connector.integration.test.jdbc; + +import org.apache.gravitino.spark.connector.jdbc.GravitinoJdbcCatalogSpark35; +import org.junit.jupiter.api.Assertions; +import org.junit.jupiter.api.Test; + +public class SparkJdbcMysqlCatalogIT35 extends SparkJdbcMysqlCatalogIT { + @Test + void testCatalogClassName() { + String catalogClass = + getSparkSession() + .sessionState() + .conf() + .getConfString("spark.sql.catalog." + getCatalogName()); + Assertions.assertEquals(GravitinoJdbcCatalogSpark35.class.getName(), catalogClass); + } +}