This is an automated email from the ASF dual-hosted git repository.
morningman pushed a commit to branch master
in repository https://gitbox.apache.org/repos/asf/doris.git
The following commit(s) were added to refs/heads/master by this push:
new 7689a8ba7a4 [opt](iceberg) support glue rest catalog with s3tables
(#54385)
7689a8ba7a4 is described below
commit 7689a8ba7a43be358587209b2a48558ca22ab018
Author: Mingyu Chen (Rayner) <[email protected]>
AuthorDate: Fri Aug 8 10:04:08 2025 -0700
[opt](iceberg) support glue rest catalog with s3tables (#54385)
### What problem does this PR solve?
```
CREATE CATALOG glue_s3 PROPERTIES (
'type' = 'iceberg',
'iceberg.catalog.type' = 'rest',
'iceberg.rest.uri' = 'https://glue.ap-east-1.amazonaws.com/iceberg',
'iceberg.rest.warehouse' =
'<account_id>:s3tablescatalog/s3-table-bucket-hk-glue-test',
"iceberg.rest.sigv4-enabled" = "true",
"iceberg.rest.signing-name" = "glue",
"iceberg.rest.access-key-id" = "ak",
"iceberg.rest.secret-access-key" = "sk",
"iceberg.rest.signing-region" = "ap-east-1"
);
```
---
.../property/metastore/IcebergRestProperties.java | 48 ++
.../storage/AbstractS3CompatibleProperties.java | 10 +
.../datasource/property/storage/S3Properties.java | 44 +-
.../metastore/IcebergRestPropertiesTest.java | 185 ++++++
.../property/metastore/RestCatalogTest.java | 112 ++++
.../property/storage/S3PropertiesTest.java | 15 +
regression-test/conf/regression-conf.groovy | 3 +
.../iceberg/test_glue_rest_s3tables.out | Bin 0 -> 20823 bytes
.../backup_restore_object_storage.groovy | 8 +-
.../refactor_storage_param/s3_load.groovy | 7 +-
.../iceberg/test_glue_rest_s3tables.groovy | 667 +++++++++++++++++++++
11 files changed, 1081 insertions(+), 18 deletions(-)
diff --git
a/fe/fe-core/src/main/java/org/apache/doris/datasource/property/metastore/IcebergRestProperties.java
b/fe/fe-core/src/main/java/org/apache/doris/datasource/property/metastore/IcebergRestProperties.java
index f59f2138052..c55611f47dd 100644
---
a/fe/fe-core/src/main/java/org/apache/doris/datasource/property/metastore/IcebergRestProperties.java
+++
b/fe/fe-core/src/main/java/org/apache/doris/datasource/property/metastore/IcebergRestProperties.java
@@ -130,6 +130,32 @@ public class IcebergRestProperties extends
AbstractIcebergProperties {
description = "The cache TTL for case insensitive name matching in
ms.")
private String icebergRestCaseInsensitiveNameMatchingCacheTtlMs = "0";
+ // The following properties are specific to AWS Glue Rest Catalog
+ @ConnectorProperty(names = {"iceberg.rest.sigv4-enabled"},
+ required = false,
+ description = "True for Glue Rest Catalog")
+ private String icebergRestSigV4Enabled = "";
+
+ @ConnectorProperty(names = {"iceberg.rest.signing-name"},
+ required = false,
+ description = "The signing name for the iceberg rest catalog
service.")
+ private String icebergRestSigningName = "";
+
+ @ConnectorProperty(names = {"iceberg.rest.signing-region"},
+ required = false,
+ description = "The signing region for the iceberg rest catalog
service.")
+ private String icebergRestSigningRegion = "";
+
+ @ConnectorProperty(names = {"iceberg.rest.access-key-id"},
+ required = false,
+ description = "The access key ID for the iceberg rest catalog
service.")
+ private String icebergRestAccessKeyId = "";
+
+ @ConnectorProperty(names = {"iceberg.rest.secret-access-key"},
+ required = false,
+ description = "The secret access key for the iceberg rest catalog
service.")
+ private String icebergRestSecretAccessKey = "";
+
protected IcebergRestProperties(Map<String, String> props) {
super(props);
}
@@ -196,6 +222,15 @@ public class IcebergRestProperties extends
AbstractIcebergProperties {
throw new IllegalArgumentException("OAuth2 requires either
credential or token");
}
}
+
+ // Check for glue rest catalog specific properties
+ rules.requireIf(icebergRestSigningName, "glue",
+ new String[] {icebergRestSigningRegion,
+ icebergRestAccessKeyId,
+ icebergRestSecretAccessKey,
+ icebergRestSigV4Enabled},
+ "Rest Catalog requires signing-region, access-key-id,
secret-access-key "
+ + "and sigv4-enabled set to true when signing-name is
glue");
return rules;
}
@@ -207,6 +242,8 @@ public class IcebergRestProperties extends
AbstractIcebergProperties {
addOptionalProperties();
// Authentication properties
addAuthenticationProperties();
+ // Glue Rest Catalog specific properties
+ addGlueRestCatalogProperties();
}
private void addCoreCatalogProperties() {
@@ -253,6 +290,17 @@ public class IcebergRestProperties extends
AbstractIcebergProperties {
}
}
+ private void addGlueRestCatalogProperties() {
+ if (Strings.isNotBlank(icebergRestSigningName) &&
icebergRestSigningName.equalsIgnoreCase("glue")) {
+ icebergRestCatalogProperties.put("rest.signing-name", "glue");
+ icebergRestCatalogProperties.put("rest.sigv4-enabled",
icebergRestSigV4Enabled);
+ icebergRestCatalogProperties.put("rest.access-key-id",
icebergRestAccessKeyId);
+ icebergRestCatalogProperties.put("rest.secret-access-key",
icebergRestSecretAccessKey);
+ icebergRestCatalogProperties.put("rest.signing-region",
icebergRestSigningRegion);
+ }
+ }
+
+
public Map<String, String> getIcebergRestCatalogProperties() {
return Collections.unmodifiableMap(icebergRestCatalogProperties);
}
diff --git
a/fe/fe-core/src/main/java/org/apache/doris/datasource/property/storage/AbstractS3CompatibleProperties.java
b/fe/fe-core/src/main/java/org/apache/doris/datasource/property/storage/AbstractS3CompatibleProperties.java
index b36e4309701..4726fd53ca0 100644
---
a/fe/fe-core/src/main/java/org/apache/doris/datasource/property/storage/AbstractS3CompatibleProperties.java
+++
b/fe/fe-core/src/main/java/org/apache/doris/datasource/property/storage/AbstractS3CompatibleProperties.java
@@ -239,12 +239,22 @@ public abstract class AbstractS3CompatibleProperties
extends StorageProperties i
return;
}
String endpoint = S3PropertyUtils.constructEndpointFromUrl(origProps,
usePathStyle, forceParsingByStandardUrl);
+ if (StringUtils.isBlank(endpoint)) {
+ endpoint = getEndpointFromRegion();
+ }
if (StringUtils.isBlank(endpoint)) {
throw new IllegalArgumentException("endpoint is required");
}
setEndpoint(endpoint);
}
+ // This method should be overridden by subclasses to provide a default
endpoint based on the region.
+ // Because for aws s3, only region is needed, the endpoint can be
constructed from the region.
+ // But for other s3 compatible storage, the endpoint may need to be
specified explicitly.
+ protected String getEndpointFromRegion() {
+ return "";
+ }
+
@Override
public String validateAndNormalizeUri(String uri) throws UserException {
return S3PropertyUtils.validateAndNormalizeUri(uri, getUsePathStyle(),
getForceParsingByStandardUrl());
diff --git
a/fe/fe-core/src/main/java/org/apache/doris/datasource/property/storage/S3Properties.java
b/fe/fe-core/src/main/java/org/apache/doris/datasource/property/storage/S3Properties.java
index 19a9195ec2f..f8bbc6bc3e5 100644
---
a/fe/fe-core/src/main/java/org/apache/doris/datasource/property/storage/S3Properties.java
+++
b/fe/fe-core/src/main/java/org/apache/doris/datasource/property/storage/S3Properties.java
@@ -46,10 +46,15 @@ import java.util.stream.Stream;
public class S3Properties extends AbstractS3CompatibleProperties {
- private static final String[] ENDPOINT_NAMES = {
+ private static final String[] ENDPOINT_NAMES_FOR_GUESSING = {
"s3.endpoint", "AWS_ENDPOINT", "endpoint", "ENDPOINT",
"aws.endpoint", "glue.endpoint",
"aws.glue.endpoint"
};
+
+ private static final String[] REGION_NAMES_FOR_GUESSING = {
+ "s3.region", "glue.region", "aws.glue.region",
"iceberg.rest.signing-region"
+ };
+
@Setter
@Getter
@ConnectorProperty(names = {"s3.endpoint", "AWS_ENDPOINT", "endpoint",
"ENDPOINT", "aws.endpoint", "glue.endpoint",
@@ -61,21 +66,21 @@ public class S3Properties extends
AbstractS3CompatibleProperties {
@Setter
@Getter
@ConnectorProperty(names = {"s3.region", "AWS_REGION", "region", "REGION",
"aws.region", "glue.region",
- "aws.glue.region"},
+ "aws.glue.region", "iceberg.rest.signing-region"},
required = false,
description = "The region of S3.")
protected String region = "";
@Getter
@ConnectorProperty(names = {"s3.access_key", "AWS_ACCESS_KEY",
"access_key", "ACCESS_KEY", "glue.access_key",
- "aws.glue.access-key",
"client.credentials-provider.glue.access_key"},
+ "aws.glue.access-key",
"client.credentials-provider.glue.access_key", "iceberg.rest.access-key-id"},
required = false,
description = "The access key of S3. Optional for anonymous access
to public datasets.")
protected String accessKey = "";
@Getter
@ConnectorProperty(names = {"s3.secret_key", "AWS_SECRET_KEY",
"secret_key", "SECRET_KEY", "glue.secret_key",
- "aws.glue.secret-key",
"client.credentials-provider.glue.secret_key"},
+ "aws.glue.secret-key",
"client.credentials-provider.glue.secret_key",
"iceberg.rest.secret-access-key"},
required = false,
description = "The secret key of S3. Optional for anonymous access
to public datasets.")
protected String secretKey = "";
@@ -189,7 +194,7 @@ public class S3Properties extends
AbstractS3CompatibleProperties {
* @return
*/
protected static boolean guessIsMe(Map<String, String> origProps) {
- String endpoint = Stream.of(ENDPOINT_NAMES)
+ String endpoint = Stream.of(ENDPOINT_NAMES_FOR_GUESSING)
.map(origProps::get)
.filter(Objects::nonNull)
.findFirst()
@@ -203,11 +208,26 @@ public class S3Properties extends
AbstractS3CompatibleProperties {
if (!Strings.isNullOrEmpty(endpoint)) {
return endpoint.contains("amazonaws.com");
}
+
+ // guess from URI
Optional<String> uriValue = origProps.entrySet().stream()
.filter(e -> e.getKey().equalsIgnoreCase("uri"))
.map(Map.Entry::getValue)
.findFirst();
- return uriValue.isPresent() &&
uriValue.get().contains("amazonaws.com");
+ if (uriValue.isPresent()) {
+ return uriValue.get().contains("amazonaws.com");
+ }
+
+ // guess from region
+ String region = Stream.of(REGION_NAMES_FOR_GUESSING)
+ .map(origProps::get)
+ .filter(Objects::nonNull)
+ .findFirst()
+ .orElse(null);
+ if (!Strings.isNullOrEmpty(region)) {
+ return true;
+ }
+ return false;
}
@Override
@@ -278,4 +298,16 @@ public class S3Properties extends
AbstractS3CompatibleProperties {
"org.apache.hadoop.fs.s3a.auth.AssumedRoleCredentialProvider");
}
}
+
+ @Override
+ protected String getEndpointFromRegion() {
+ if (!StringUtils.isBlank(endpoint)) {
+ return endpoint;
+ }
+ if (StringUtils.isBlank(region)) {
+ return "";
+ }
+ return "https://s3." + region + ".amazonaws.com";
+ }
}
+
diff --git
a/fe/fe-core/src/test/java/org/apache/doris/datasource/property/metastore/IcebergRestPropertiesTest.java
b/fe/fe-core/src/test/java/org/apache/doris/datasource/property/metastore/IcebergRestPropertiesTest.java
index 16aae778ada..1d550e061cc 100644
---
a/fe/fe-core/src/test/java/org/apache/doris/datasource/property/metastore/IcebergRestPropertiesTest.java
+++
b/fe/fe-core/src/test/java/org/apache/doris/datasource/property/metastore/IcebergRestPropertiesTest.java
@@ -231,4 +231,189 @@ public class IcebergRestPropertiesTest {
catalogProps.put("test", "value");
});
}
+
+ @Test
+ public void testGlueRestCatalogValidConfiguration() {
+ Map<String, String> props = new HashMap<>();
+ props.put("iceberg.rest.uri", "http://localhost:8080");
+ props.put("iceberg.rest.signing-name", "glue");
+ props.put("iceberg.rest.signing-region", "us-east-1");
+ props.put("iceberg.rest.access-key-id", "AKIAIOSFODNN7EXAMPLE");
+ props.put("iceberg.rest.secret-access-key",
"wJalrXUtnFEMI/K7MDENG/bPxRfiCYEXAMPLEKEY");
+ props.put("iceberg.rest.sigv4-enabled", "true");
+
+ IcebergRestProperties restProps = new IcebergRestProperties(props);
+ restProps.initNormalizeAndCheckProps();
+
+ Map<String, String> catalogProps =
restProps.getIcebergRestCatalogProperties();
+ Assertions.assertEquals("glue", catalogProps.get("rest.signing-name"));
+ Assertions.assertEquals("us-east-1",
catalogProps.get("rest.signing-region"));
+ Assertions.assertEquals("AKIAIOSFODNN7EXAMPLE",
catalogProps.get("rest.access-key-id"));
+ Assertions.assertEquals("wJalrXUtnFEMI/K7MDENG/bPxRfiCYEXAMPLEKEY",
+ catalogProps.get("rest.secret-access-key"));
+ Assertions.assertEquals("true",
catalogProps.get("rest.sigv4-enabled"));
+ }
+
+ @Test
+ public void testGlueRestCatalogCaseInsensitive() {
+ // Test that "GLUE" is also recognized (case insensitive)
+ Map<String, String> props = new HashMap<>();
+ props.put("iceberg.rest.uri", "http://localhost:8080");
+ props.put("iceberg.rest.signing-name", "GLUE");
+ props.put("iceberg.rest.signing-region", "us-west-2");
+ props.put("iceberg.rest.access-key-id", "AKIAIOSFODNN7EXAMPLE");
+ props.put("iceberg.rest.secret-access-key",
"wJalrXUtnFEMI/K7MDENG/bPxRfiCYEXAMPLEKEY");
+ props.put("iceberg.rest.sigv4-enabled", "true");
+
+ IcebergRestProperties restProps = new IcebergRestProperties(props);
+ restProps.initNormalizeAndCheckProps();
+
+ Map<String, String> catalogProps =
restProps.getIcebergRestCatalogProperties();
+ Assertions.assertEquals("glue", catalogProps.get("rest.signing-name"));
+ Assertions.assertEquals("us-west-2",
catalogProps.get("rest.signing-region"));
+ }
+
+ @Test
+ public void testGlueRestCatalogMissingSigningRegion() {
+ Map<String, String> props = new HashMap<>();
+ props.put("iceberg.rest.uri", "http://localhost:8080");
+ props.put("iceberg.rest.signing-name", "glue");
+ props.put("iceberg.rest.access-key-id", "AKIAIOSFODNN7EXAMPLE");
+ props.put("iceberg.rest.secret-access-key",
"wJalrXUtnFEMI/K7MDENG/bPxRfiCYEXAMPLEKEY");
+ props.put("iceberg.rest.sigv4-enabled", "true");
+ // Missing signing-region
+
+ IcebergRestProperties restProps = new IcebergRestProperties(props);
+ Assertions.assertThrows(IllegalArgumentException.class,
restProps::initNormalizeAndCheckProps);
+ }
+
+ @Test
+ public void testGlueRestCatalogMissingAccessKeyId() {
+ Map<String, String> props = new HashMap<>();
+ props.put("iceberg.rest.uri", "http://localhost:8080");
+ props.put("iceberg.rest.signing-name", "glue");
+ props.put("iceberg.rest.signing-region", "us-east-1");
+ props.put("iceberg.rest.secret-access-key",
"wJalrXUtnFEMI/K7MDENG/bPxRfiCYEXAMPLEKEY");
+ props.put("iceberg.rest.sigv4-enabled", "true");
+ // Missing access-key-id
+
+ IcebergRestProperties restProps = new IcebergRestProperties(props);
+ Assertions.assertThrows(IllegalArgumentException.class,
restProps::initNormalizeAndCheckProps);
+ }
+
+ @Test
+ public void testGlueRestCatalogMissingSecretAccessKey() {
+ Map<String, String> props = new HashMap<>();
+ props.put("iceberg.rest.uri", "http://localhost:8080");
+ props.put("iceberg.rest.signing-name", "glue");
+ props.put("iceberg.rest.signing-region", "us-east-1");
+ props.put("iceberg.rest.access-key-id", "AKIAIOSFODNN7EXAMPLE");
+ props.put("iceberg.rest.sigv4-enabled", "true");
+ // Missing secret-access-key
+
+ IcebergRestProperties restProps = new IcebergRestProperties(props);
+ Assertions.assertThrows(IllegalArgumentException.class,
restProps::initNormalizeAndCheckProps);
+ }
+
+ @Test
+ public void testGlueRestCatalogMissingSigV4Enabled() {
+ Map<String, String> props = new HashMap<>();
+ props.put("iceberg.rest.uri", "http://localhost:8080");
+ props.put("iceberg.rest.signing-name", "glue");
+ props.put("iceberg.rest.signing-region", "us-east-1");
+ props.put("iceberg.rest.access-key-id", "AKIAIOSFODNN7EXAMPLE");
+ props.put("iceberg.rest.secret-access-key",
"wJalrXUtnFEMI/K7MDENG/bPxRfiCYEXAMPLEKEY");
+ // Missing sigv4-enabled
+
+ IcebergRestProperties restProps = new IcebergRestProperties(props);
+ Assertions.assertThrows(IllegalArgumentException.class,
restProps::initNormalizeAndCheckProps);
+ }
+
+ @Test
+ public void testNonGlueSigningNameDoesNotRequireAdditionalProperties() {
+ // Test that non-glue signing names don't require additional properties
+ Map<String, String> props = new HashMap<>();
+ props.put("iceberg.rest.uri", "http://localhost:8080");
+ props.put("iceberg.rest.signing-name", "custom-service");
+
+ IcebergRestProperties restProps = new IcebergRestProperties(props);
+ restProps.initNormalizeAndCheckProps(); // Should not throw
+
+ Map<String, String> catalogProps =
restProps.getIcebergRestCatalogProperties();
+ // Should not contain glue-specific properties
+ Assertions.assertFalse(catalogProps.containsKey("rest.signing-name"));
+
Assertions.assertFalse(catalogProps.containsKey("rest.signing-region"));
+ Assertions.assertFalse(catalogProps.containsKey("rest.access-key-id"));
+
Assertions.assertFalse(catalogProps.containsKey("rest.secret-access-key"));
+ Assertions.assertFalse(catalogProps.containsKey("rest.sigv4-enabled"));
+ }
+
+ @Test
+ public void testEmptySigningNameDoesNotAddGlueProperties() {
+ // Test that empty signing name doesn't add glue properties
+ Map<String, String> props = new HashMap<>();
+ props.put("iceberg.rest.uri", "http://localhost:8080");
+ props.put("iceberg.rest.signing-region", "us-east-1");
+ props.put("iceberg.rest.access-key-id", "AKIAIOSFODNN7EXAMPLE");
+ props.put("iceberg.rest.secret-access-key",
"wJalrXUtnFEMI/K7MDENG/bPxRfiCYEXAMPLEKEY");
+ props.put("iceberg.rest.sigv4-enabled", "true");
+
+ IcebergRestProperties restProps = new IcebergRestProperties(props);
+ restProps.initNormalizeAndCheckProps(); // Should not throw
+
+ Map<String, String> catalogProps =
restProps.getIcebergRestCatalogProperties();
+ // Should not contain glue-specific properties since signing-name is
not "glue"
+ Assertions.assertFalse(catalogProps.containsKey("rest.signing-name"));
+
Assertions.assertFalse(catalogProps.containsKey("rest.signing-region"));
+ Assertions.assertFalse(catalogProps.containsKey("rest.access-key-id"));
+
Assertions.assertFalse(catalogProps.containsKey("rest.secret-access-key"));
+ Assertions.assertFalse(catalogProps.containsKey("rest.sigv4-enabled"));
+ }
+
+ @Test
+ public void testGlueRestCatalogWithOAuth2() {
+ // Test that Glue properties can be combined with OAuth2
+ Map<String, String> props = new HashMap<>();
+ props.put("iceberg.rest.uri", "http://localhost:8080");
+ props.put("iceberg.rest.security.type", "oauth2");
+ props.put("iceberg.rest.oauth2.token", "my-access-token");
+ props.put("iceberg.rest.signing-name", "glue");
+ props.put("iceberg.rest.signing-region", "us-east-1");
+ props.put("iceberg.rest.access-key-id", "AKIAIOSFODNN7EXAMPLE");
+ props.put("iceberg.rest.secret-access-key",
"wJalrXUtnFEMI/K7MDENG/bPxRfiCYEXAMPLEKEY");
+ props.put("iceberg.rest.sigv4-enabled", "true");
+
+ IcebergRestProperties restProps = new IcebergRestProperties(props);
+ restProps.initNormalizeAndCheckProps();
+
+ Map<String, String> catalogProps =
restProps.getIcebergRestCatalogProperties();
+ // Should have both OAuth2 and Glue properties
+ Assertions.assertEquals("my-access-token",
catalogProps.get(OAuth2Properties.TOKEN));
+ Assertions.assertEquals("glue", catalogProps.get("rest.signing-name"));
+ Assertions.assertEquals("us-east-1",
catalogProps.get("rest.signing-region"));
+ Assertions.assertEquals("AKIAIOSFODNN7EXAMPLE",
catalogProps.get("rest.access-key-id"));
+ Assertions.assertEquals("wJalrXUtnFEMI/K7MDENG/bPxRfiCYEXAMPLEKEY",
+ catalogProps.get("rest.secret-access-key"));
+ Assertions.assertEquals("true",
catalogProps.get("rest.sigv4-enabled"));
+ }
+
+ @Test
+ public void testGlueRestCatalogMissingMultipleProperties() {
+ // Test error message when multiple required properties are missing
+ Map<String, String> props = new HashMap<>();
+ props.put("iceberg.rest.uri", "http://localhost:8080");
+ props.put("iceberg.rest.signing-name", "glue");
+ // Missing all required properties
+
+ IcebergRestProperties restProps = new IcebergRestProperties(props);
+ IllegalArgumentException exception = Assertions.assertThrows(
+ IllegalArgumentException.class,
restProps::initNormalizeAndCheckProps);
+
+ // The error message should mention the required properties
+ String errorMessage = exception.getMessage();
+ Assertions.assertTrue(errorMessage.contains("signing-region")
+ || errorMessage.contains("access-key-id")
+ || errorMessage.contains("secret-access-key")
+ || errorMessage.contains("sigv4-enabled"));
+ }
}
diff --git
a/fe/fe-core/src/test/java/org/apache/doris/datasource/property/metastore/RestCatalogTest.java
b/fe/fe-core/src/test/java/org/apache/doris/datasource/property/metastore/RestCatalogTest.java
new file mode 100644
index 00000000000..dca4f91c789
--- /dev/null
+++
b/fe/fe-core/src/test/java/org/apache/doris/datasource/property/metastore/RestCatalogTest.java
@@ -0,0 +1,112 @@
+// Licensed to the Apache Software Foundation (ASF) under one
+// or more contributor license agreements. See the NOTICE file
+// distributed with this work for additional information
+// regarding copyright ownership. The ASF licenses this file
+// to you under the Apache License, Version 2.0 (the
+// "License"); you may not use this file except in compliance
+// with the License. You may obtain a copy of the License at
+//
+// http://www.apache.org/licenses/LICENSE-2.0
+//
+// Unless required by applicable law or agreed to in writing,
+// software distributed under the License is distributed on an
+// "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+// KIND, either express or implied. See the License for the
+// specific language governing permissions and limitations
+// under the License.
+
+package org.apache.doris.datasource.property.metastore;
+
+import com.google.common.collect.Maps;
+import org.apache.hadoop.conf.Configuration;
+import org.apache.iceberg.CatalogProperties;
+import org.apache.iceberg.CatalogUtil;
+import org.apache.iceberg.FileScanTask;
+import org.apache.iceberg.Table;
+import org.apache.iceberg.aws.AwsClientProperties;
+import org.apache.iceberg.aws.s3.S3FileIOProperties;
+import org.apache.iceberg.catalog.Catalog;
+import org.apache.iceberg.catalog.Namespace;
+import org.apache.iceberg.catalog.SupportsNamespaces;
+import org.apache.iceberg.io.CloseableIterable;
+import org.junit.jupiter.api.Assertions;
+import org.junit.jupiter.api.BeforeEach;
+import org.junit.jupiter.api.Disabled;
+import org.junit.jupiter.api.Test;
+
+import java.util.Map;
+
+@Disabled("Disabled until fill ak/sk")
+public class RestCatalogTest {
+
+ // set your AWS access key and secret key
+ private String ak = "";
+ private String sk = "";
+
+ @BeforeEach
+ protected void initCatalog() {
+ }
+
+ private Catalog initGlueRestCatalog() {
+ Map<String, String> options = Maps.newHashMap();
+ options.put(CatalogUtil.ICEBERG_CATALOG_TYPE,
CatalogUtil.ICEBERG_CATALOG_TYPE_REST);
+ options.put(CatalogProperties.URI,
"https://glue.ap-east-1.amazonaws.com/iceberg");
+ options.put(CatalogProperties.WAREHOUSE_LOCATION,
"169698404049:s3tablescatalog/s3-table-bucket-hk-glue-test");
+ // remove this endpoint prop, or, add https://
+ options.put(S3FileIOProperties.ENDPOINT,
"https://s3.ap-east-1.amazonaws.com");
+ // must set:
+ // software.amazon.awssdk.core.exception.SdkClientException: Unable to
load region from any of the providers in
+ // the chain
software.amazon.awssdk.regions.providers.DefaultAwsRegionProviderChain@627ff1b8:
+ //
[software.amazon.awssdk.regions.providers.SystemSettingsRegionProvider@67d32a54:
+ // Unable to load region from system settings. Region must be
specified either via environment variable
+ // (AWS_REGION) or system property (aws.region).,
+ //
software.amazon.awssdk.regions.providers.AwsProfileRegionProvider@2792b416: No
region provided in profile:
+ // default,
software.amazon.awssdk.regions.providers.InstanceProfileRegionProvider@5cff6b74:
+ // Unable to contact EC2 metadata service.]
+ options.put(AwsClientProperties.CLIENT_REGION, "ap-east-1");
+ // Forbidden: {"message":"Missing Authentication Token"}
+ options.put("rest.sigv4-enabled", "true");
+ // Forbidden: {"message":"Credential should be scoped to correct
service: 'glue'. "}
+ options.put("rest.signing-name", "glue");
+ // Forbidden: {"message":"The security token included in the request
is invalid."}
+ options.put("rest.access-key-id", ak);
+ // Forbidden: {"message":"The request signature we calculated does not
match the signature you provided.
+ // Check your AWS Secret Access Key and signing method. Consult the
service documentation for details."}
+ options.put("rest.secret-access-key", sk);
+ // same as AwsClientProperties.CLIENT_REGION, "ap-east-1"
+ options.put("rest.signing-region", "ap-east-1");
+ // options.put("iceberg.catalog.warehouse",
"<accountid>:s3tablescatalog/<table-bucket-name>");
+ // 4. Build iceberg catalog
+ Configuration conf = new Configuration();
+ return CatalogUtil.buildIcebergCatalog("glue_test", options, conf);
+ }
+
+ @Test
+ public void testGlueRestCatalog() {
+ Catalog glueRestCatalog = initGlueRestCatalog();
+ SupportsNamespaces nsCatalog = (SupportsNamespaces) glueRestCatalog;
+ // List namespaces and assert
+ nsCatalog.listNamespaces(Namespace.empty()).forEach(namespace1 -> {
+ System.out.println("Namespace: " + namespace1);
+ Assertions.assertNotNull(namespace1, "Namespace should not be
null");
+
+ glueRestCatalog.listTables(namespace1).forEach(tableIdentifier -> {
+ System.out.println("Table: " + tableIdentifier.name());
+ Assertions.assertNotNull(tableIdentifier, "TableIdentifier
should not be null");
+
+ // Load table history and assert
+ Table iceTable = glueRestCatalog.loadTable(tableIdentifier);
+ iceTable.history().forEach(snapshot -> {
+ System.out.println("Snapshot: " + snapshot);
+ Assertions.assertNotNull(snapshot, "Snapshot should not be
null");
+ });
+
+ CloseableIterable<FileScanTask> tasks =
iceTable.newScan().planFiles();
+ tasks.forEach(task -> {
+ System.out.println("FileScanTask: " + task);
+ Assertions.assertNotNull(task, "FileScanTask should not be
null");
+ });
+ });
+ });
+ }
+}
diff --git
a/fe/fe-core/src/test/java/org/apache/doris/datasource/property/storage/S3PropertiesTest.java
b/fe/fe-core/src/test/java/org/apache/doris/datasource/property/storage/S3PropertiesTest.java
index d81f5148573..23e3c99e309 100644
---
a/fe/fe-core/src/test/java/org/apache/doris/datasource/property/storage/S3PropertiesTest.java
+++
b/fe/fe-core/src/test/java/org/apache/doris/datasource/property/storage/S3PropertiesTest.java
@@ -20,6 +20,7 @@ package org.apache.doris.datasource.property.storage;
import org.apache.doris.common.UserException;
import
org.apache.doris.datasource.property.storage.exception.StoragePropertiesException;
+import com.google.common.collect.Maps;
import mockit.Expectations;
import mockit.Mocked;
import org.junit.jupiter.api.Assertions;
@@ -383,4 +384,18 @@ public class S3PropertiesTest {
Assertions.assertEquals("base-access-key",
result.get("AWS_ACCESS_KEY"));
Assertions.assertEquals("base-secret-key",
result.get("AWS_SECRET_KEY"));
}
+
+ @Test
+ public void testS3PropertiesFromIcebergRest() throws UserException {
+ Map<String, String> props = Maps.newHashMap();
+ props.put("iceberg.rest.access-key-id", "aaa");
+ props.put("iceberg.rest.secret-access-key", "bbb");
+ props.put("iceberg.rest.signing-region", "ap-east-1");
+
+ S3Properties s3Properties = (S3Properties)
StorageProperties.createPrimary(props);
+ Assertions.assertEquals("ap-east-1", s3Properties.region);
+ Assertions.assertEquals("https://s3.ap-east-1.amazonaws.com",
s3Properties.endpoint);
+ Assertions.assertEquals("aaa", s3Properties.accessKey);
+ Assertions.assertEquals("bbb", s3Properties.secretKey);
+ }
}
diff --git a/regression-test/conf/regression-conf.groovy
b/regression-test/conf/regression-conf.groovy
index 84431bfdfa2..8fd23dc9c78 100644
--- a/regression-test/conf/regression-conf.groovy
+++ b/regression-test/conf/regression-conf.groovy
@@ -280,3 +280,6 @@ icebergS3TablesCatalog = ""
enableExternalHudiTest = false
// The properties string of hudi catalog
hudiEmrCatalog = ""
+icebergS3TablesCatalog=""
+icebergS3TablesCatalogGlueRest=""
+
diff --git
a/regression-test/data/external_table_p2/iceberg/test_glue_rest_s3tables.out
b/regression-test/data/external_table_p2/iceberg/test_glue_rest_s3tables.out
new file mode 100644
index 00000000000..40a714ac82b
Binary files /dev/null and
b/regression-test/data/external_table_p2/iceberg/test_glue_rest_s3tables.out
differ
diff --git
a/regression-test/suites/external_table_p0/refactor_storage_param/backup_restore_object_storage.groovy
b/regression-test/suites/external_table_p0/refactor_storage_param/backup_restore_object_storage.groovy
index 179a33c19d1..920060c104b 100644
---
a/regression-test/suites/external_table_p0/refactor_storage_param/backup_restore_object_storage.groovy
+++
b/regression-test/suites/external_table_p0/refactor_storage_param/backup_restore_object_storage.groovy
@@ -199,17 +199,13 @@ suite("refactor_storage_backup_restore_object_storage",
"p0,external,external_do
shouldFail {
createRepository("${failedRepoName}", "s3.endpoint", s3_endpoint,
"s3.region", region, "AWS_ACCESS_KEY", ak, "AWS_SECRET_KEY", sk, "true",
"s3://ck/" + System.currentTimeMillis())
}
- //endpoint is empty
- shouldFail {
- createRepository("${failedRepoName}", "s3.endpoint", "",
"s3.region", region, "s3.access_key", ak, "s3.secret_key", sk, "",
"s3://${bucket}/test_" + System.currentTimeMillis())
- }
//region is empty
shouldFail {
createRepository("${failedRepoName}", "s3.endpoint", "",
"s3.region", "", "s3.access_key", ak, "s3.secret_key", sk, "",
"s3://${bucket}/test_" + System.currentTimeMillis())
}
}
/*-------------AWS S3--------------------------------*/
- String ak = context.config.otherConfigs.get("AWSAK")
+ String ak = context.config.otherConfigs.get("AWSAK")
String sk = context.config.otherConfigs.get("AWSSK")
String s3_endpoint = "s3.ap-northeast-1.amazonaws.com"
String region = "ap-northeast-1"
@@ -297,4 +293,4 @@ suite("refactor_storage_backup_restore_object_storage",
"p0,external,external_do
backupAndRestore("${oss_repoName2}", ossDbName2, s3table,
"backup_${oss_repoName1}_test")
-}
\ No newline at end of file
+}
diff --git
a/regression-test/suites/external_table_p0/refactor_storage_param/s3_load.groovy
b/regression-test/suites/external_table_p0/refactor_storage_param/s3_load.groovy
index d9ed6a40189..1b628bde854 100644
---
a/regression-test/suites/external_table_p0/refactor_storage_param/s3_load.groovy
+++
b/regression-test/suites/external_table_p0/refactor_storage_param/s3_load.groovy
@@ -24,7 +24,7 @@ suite("refactor_storage_param_s3_load",
"p0,external,external_docker") {
if (enabled == null || enabled.equalsIgnoreCase("false")) {
return
}
- String ak = context.config.otherConfigs.get("AWSAK")
+ String ak = context.config.otherConfigs.get("AWSAK")
String sk = context.config.otherConfigs.get("AWSSK")
String endpoint = "s3.ap-northeast-1.amazonaws.com"
String region = "ap-northeast-1"
@@ -124,16 +124,12 @@ suite("refactor_storage_param_s3_load",
"p0,external,external_docker") {
s3Load("http://${bucket}.${endpoint}${filePath}", bucket, "s3.endpoint",
endpoint, "s3.region", region, "s3.access_key", ak, "s3.secret_key", sk,
"false")
s3Load("http://${bucket}.${endpoint}${filePath}", bucket, "s3.endpoint",
endpoint, "s3.region", region, "s3.access_key", ak, "s3.secret_key", sk, "")
s3Load("https://${bucket}${filePath}", bucket, "s3.endpoint", endpoint,
"s3.region", region, "s3.access_key", ak, "s3.secret_key", sk, "false")
- shouldFail {
- s3Load("https://${bucket}${filePath}", bucket, "", endpoint,
"s3.region", region, "s3.access_key", ak, "s3.secret_key", sk, "false")
- }
shouldFail {
s3Load("https://${bucket}${filePath}", bucket, "", endpoint,
"s3.region", region, "s3.access_key", "", "s3.secret_key", sk, "false")
}
shouldFail {
s3Load("https://${bucket}/${endpoint}${filePath}", bucket,
"s3.endpoint", endpoint, "s3.region", region, "s3.access_key", ak,
"s3.secret_key", sk, "")
-
}
shouldFail {
s3Load("https://${bucket}/${endpoint}${filePath}", bucket,
"s3.endpoint", endpoint, "s3.region", region, "s3.access_key", ak,
"s3.secret_key", sk, "true")
@@ -168,7 +164,6 @@ suite("refactor_storage_param_s3_load",
"p0,external,external_docker") {
shouldFail {
s3Load("https://${bucket}${filePath}", bucket, "", endpoint,
"obs.region", region, "obs.access_key", ak, "obs.secret_key", sk, "false")
}
-
shouldFail {
s3Load("https://${bucket}${filePath}", bucket, "", endpoint,
"obs.region", region, "obs.access_key", "", "obs.secret_key", sk, "false")
}
diff --git
a/regression-test/suites/external_table_p2/iceberg/test_glue_rest_s3tables.groovy
b/regression-test/suites/external_table_p2/iceberg/test_glue_rest_s3tables.groovy
new file mode 100644
index 00000000000..1954ce0a231
--- /dev/null
+++
b/regression-test/suites/external_table_p2/iceberg/test_glue_rest_s3tables.groovy
@@ -0,0 +1,667 @@
+// Licensed to the Apache Software Foundation (ASF) under one
+// or more contributor license agreements. See the NOTICE file
+// distributed with this work for additional information
+// regarding copyright ownership. The ASF licenses this file
+// to you under the Apache License, Version 2.0 (the
+// "License"); you may not use this file except in compliance
+// with the License. You may obtain a copy of the License at
+//
+// http://www.apache.org/licenses/LICENSE-2.0
+//
+// Unless required by applicable law or agreed to in writing,
+// software distributed under the License is distributed on an
+// "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+// KIND, either express or implied. See the License for the
+// specific language governing permissions and limitations
+// under the License.
+
+suite("test_glue_rest_s3tables",
"p2,external,iceberg,external_remote,external_remote_iceberg") {
+ def format_compressions = ["parquet_zstd"]
+
+ def q01 = { String format_compression, String catalog_name ->
+ def parts = format_compression.split("_")
+ def format = parts[0]
+ def compression = parts[1]
+ def all_types_table = "iceberg_glue_rest_${format_compression}_master"
+ sql """ DROP TABLE IF EXISTS `${all_types_table}`; """
+ sql """
+ CREATE TABLE `${all_types_table}`(
+ `boolean_col` boolean,
+ `int_col` int,
+ `bigint_col` bigint,
+ `float_col` float,
+ `double_col` double,
+ `decimal_col1` decimal(9,0),
+ `decimal_col2` decimal(8,4),
+ `decimal_col3` decimal(18,6),
+ `decimal_col4` decimal(38,12),
+ `string_col` string,
+ `date_col` date,
+ `timestamp_col1` datetime,
+ `timestamp_col2` datetime,
+ `timestamp_col3` datetime,
+ `t_map_string` map<string,string>,
+ `t_map_int` map<int,int>,
+ `t_map_bigint` map<bigint,bigint>,
+ `t_map_float` map<float,float>,
+ `t_map_double` map<double,double>,
+ `t_map_boolean` map<boolean,boolean>,
+ `t_map_decimal_precision_2` map<decimal(2,1),decimal(2,1)>,
+ `t_map_decimal_precision_4` map<decimal(4,2),decimal(4,2)>,
+ `t_map_decimal_precision_8` map<decimal(8,4),decimal(8,4)>,
+ `t_map_decimal_precision_17` map<decimal(17,8),decimal(17,8)>,
+ `t_map_decimal_precision_18` map<decimal(18,8),decimal(18,8)>,
+ `t_map_decimal_precision_38` map<decimal(38,16),decimal(38,16)>,
+ `t_array_string` array<string>,
+ `t_array_int` array<int>,
+ `t_array_bigint` array<bigint>,
+ `t_array_float` array<float>,
+ `t_array_double` array<double>,
+ `t_array_boolean` array<boolean>,
+ `t_array_decimal_precision_2` array<decimal(2,1)>,
+ `t_array_decimal_precision_4` array<decimal(4,2)>,
+ `t_array_decimal_precision_8` array<decimal(8,4)>,
+ `t_array_decimal_precision_17` array<decimal(17,8)>,
+ `t_array_decimal_precision_18` array<decimal(18,8)>,
+ `t_array_decimal_precision_38` array<decimal(38,16)>,
+ `t_struct_bigint` struct<s_bigint:bigint>,
+ `t_complex` map<string,array<struct<s_int:int>>>,
+ `t_struct_nested` struct<struct_field:array<string>>,
+ `t_struct_null`
struct<struct_field_null:string,struct_field_null2:string>,
+ `t_struct_non_nulls_after_nulls`
struct<struct_non_nulls_after_nulls1:int,struct_non_nulls_after_nulls2:string>,
+ `t_nested_struct_non_nulls_after_nulls`
struct<struct_field1:int,struct_field2:string,strict_field3:struct<nested_struct_field1:int,nested_struct_field2:string>>,
+ `t_map_null_value` map<string,string>,
+ `t_array_string_starting_with_nulls` array<string>,
+ `t_array_string_with_nulls_in_between` array<string>,
+ `t_array_string_ending_with_nulls` array<string>,
+ `t_array_string_all_nulls` array<string>,
+ `dt` int) ENGINE=iceberg
+ properties (
+ "compression-codec" = ${compression},
+ "write-format"=${format}
+ )
+ """
+
+ sql """
+ INSERT INTO ${all_types_table}
+ VALUES (
+ 1, -- boolean_col
+ 2147483647, -- int_col
+ 9223372036854775807, -- bigint_col
+ 123.45, -- float_col
+ 123456.789, -- double_col
+ CAST(123456789 AS DECIMAL(9,0)), -- decimal_col1
+ CAST(1234.5678 AS DECIMAL(8,4)), -- decimal_col2
+ CAST(123456.789012 AS DECIMAL(18,6)), -- decimal_col3
+ CAST(123456789.012345678901 AS DECIMAL(38,12)), -- decimal_col4
+ 'string_value', -- string_col
+ '2024-03-20', -- date_col
+ '2024-03-20 12:00:00', -- timestamp_col1
+ '2024-03-20 12:00:00.123456789', -- timestamp_col2
+ '2024-03-20 12:00:00.123456789', -- timestamp_col3
+ MAP('key1', 'value1'), -- t_map_string
+ MAP(1, 10), -- t_map_int
+ MAP(1, 100000000000), -- t_map_bigint
+ MAP(CAST(1.1 AS FLOAT), CAST(10.1 AS FLOAT)), -- t_map_float
+ MAP(CAST(1.1 AS DOUBLE), CAST(10.1 AS DOUBLE)), -- t_map_double
+ MAP(TRUE, FALSE), -- t_map_boolean
+ MAP(CAST(1.1 AS DECIMAL(2,1)), CAST(1.1 AS DECIMAL(2,1))), --
t_map_decimal_precision_2
+ MAP(CAST(1.23 AS DECIMAL(4,2)), CAST(1.23 AS DECIMAL(4,2))), --
t_map_decimal_precision_4
+ MAP(CAST(1.2345 AS DECIMAL(8,4)), CAST(1.2345 AS DECIMAL(8,4))), --
t_map_decimal_precision_8
+ MAP(CAST(1.23456789 AS DECIMAL(17,8)), CAST(1.23456789 AS
DECIMAL(17,8))), -- t_map_decimal_precision_17
+ MAP(CAST(1.23456789 AS DECIMAL(18,8)), CAST(1.23456789 AS
DECIMAL(18,8))), -- t_map_decimal_precision_18
+ MAP(CAST(1.234567890123456789 AS DECIMAL(38,16)),
CAST(1.234567890123456789 AS DECIMAL(38,16))), -- t_map_decimal_precision_38
+ ARRAY('string1', 'string2'), -- t_ARRAY_string
+ ARRAY(1, 2, 3), -- t_ARRAY_int
+ ARRAY(100000000000, 200000000000), -- t_ARRAY_bigint
+ ARRAY(CAST(1.1 AS FLOAT), CAST(2.2 AS FLOAT)), -- t_ARRAY_float
+ ARRAY(CAST(1.123456789 AS DOUBLE), CAST(2.123456789 AS DOUBLE)), --
t_ARRAY_double
+ ARRAY(TRUE, FALSE), -- t_ARRAY_boolean
+ ARRAY(CAST(1.1 AS DECIMAL(2,1)), CAST(2.2 AS DECIMAL(2,1))), --
t_ARRAY_decimal_precision_2
+ ARRAY(CAST(1.23 AS DECIMAL(4,2)), CAST(2.34 AS DECIMAL(4,2))), --
t_ARRAY_decimal_precision_4
+ ARRAY(CAST(1.2345 AS DECIMAL(8,4)), CAST(2.3456 AS DECIMAL(8,4))),
-- t_ARRAY_decimal_precision_8
+ ARRAY(CAST(1.23456789 AS DECIMAL(17,8)), CAST(2.34567891 AS
DECIMAL(17,8))), -- t_ARRAY_decimal_precision_17
+ ARRAY(1.23456789, 2.34567891), -- t_ARRAY_decimal_precision_18
+ ARRAY(1.234567890123456789, 2.345678901234567890), --
t_ARRAY_decimal_precision_38
+ NAMED_STRUCT('s_bigint', 1234567890), -- t_struct_bigint
+ MAP('key', ARRAY(NAMED_STRUCT('s_int', 123))), -- t_complex
+ NAMED_STRUCT('struct_field', ARRAY('value1', 'value2')), --
t_struct_nested
+ NAMED_STRUCT('struct_field_null', null, 'struct_field_null2', null),
-- t_struct_null
+ NAMED_STRUCT('struct_non_nulls_after_nulls1', 123,
'struct_non_nulls_after_nulls2', 'value'), -- t_struct_non_nulls_after_nulls
+ NAMED_STRUCT('struct_field1', 123, 'struct_field2', 'value',
'strict_field3', NAMED_STRUCT('nested_struct_field1', 123,
'nested_struct_field2', 'nested_value')), --
t_nested_struct_non_nulls_after_nulls
+ MAP('null_key', null), -- t_map_null_value
+ ARRAY(null, 'value1', 'value2'), --
t_ARRAY_string_starting_with_nulls
+ ARRAY('value1', null, 'value2'), --
t_ARRAY_string_with_nulls_in_between
+ ARRAY('value1', 'value2', null), -- t_ARRAY_string_ending_with_nulls
+ ARRAY(null, null, null), -- t_ARRAY_string_all_nulls
+ 20240320 -- dt
+ );
+ """
+ order_qt_q01 """ select * from ${all_types_table};
+ """
+
+ sql """
+ INSERT INTO ${all_types_table}
+ VALUES (
+ 1, -- boolean_col
+ 2147483647, -- int_col
+ 9223372036854775807, -- bigint_col
+ CAST(123.45 AS FLOAT), -- float_col
+ CAST(123456.789 AS DOUBLE), -- double_col
+ CAST(123456789 AS DECIMAL(9,0)), -- decimal_col1
+ CAST(1234.5678 AS DECIMAL(8,4)), -- decimal_col2
+ CAST(123456.789012 AS DECIMAL(18,6)), -- decimal_col3
+ CAST(123456789.012345678901 AS DECIMAL(38,12)), -- decimal_col4
+ 'string_value', -- string_col
+ '2024-03-20', -- date_col
+ '2024-03-20 12:00:00', -- timestamp_col1
+ '2024-03-20 12:00:00.123456789', -- timestamp_col2
+ '2024-03-20 12:00:00.123456789', -- timestamp_col3
+ MAP('key1', 'value1'), -- t_map_string
+ MAP(1, 10), -- t_map_int
+ MAP(1, 100000000000), -- t_map_bigint
+ MAP(CAST(1.1 AS FLOAT), CAST(10.1 AS FLOAT)), -- t_map_float
+ MAP(CAST(1.1 AS DOUBLE), CAST(10.1 AS DOUBLE)), -- t_map_double
+ MAP(true, false), -- t_map_boolean
+ MAP(CAST(1.1 AS DECIMAL(2,1)), CAST(1.1 AS DECIMAL(2,1))), --
t_map_decimal_precision_2
+ MAP(CAST(1.23 AS DECIMAL(4,2)), CAST(1.23 AS DECIMAL(4,2))), --
t_map_decimal_precision_4
+ MAP(CAST(1.2345 AS DECIMAL(8,4)), CAST(1.2345 AS DECIMAL(8,4))), --
t_map_decimal_precision_8
+ MAP(CAST(1.23456789 AS DECIMAL(17,8)), CAST(1.23456789 AS
DECIMAL(17,8))), -- t_map_decimal_precision_17
+ MAP(CAST(1.23456789 AS DECIMAL(18,8)), CAST(1.23456789 AS
DECIMAL(18,8))), -- t_map_decimal_precision_18
+ MAP(CAST(1.234567890123456789 AS DECIMAL(38,16)),
CAST(1.234567890123456789 AS DECIMAL(38,16))), -- t_map_decimal_precision_38
+ ARRAY('string1', 'string2'), -- t_ARRAY_string
+ ARRAY(1, 2, 3), -- t_ARRAY_int
+ ARRAY(100000000000, 200000000000), -- t_ARRAY_bigint
+ ARRAY(CAST(1.1 AS FLOAT), CAST(2.2 AS FLOAT)), -- t_ARRAY_float
+ ARRAY(CAST(1.123456789 AS DOUBLE), CAST(2.123456789 AS DOUBLE)), --
t_ARRAY_double
+ ARRAY(true, false), -- t_ARRAY_boolean
+ ARRAY(CAST(1.1 AS DECIMAL(2,1)), CAST(2.2 AS DECIMAL(2,1))), --
t_ARRAY_decimal_precision_2
+ ARRAY(CAST(1.23 AS DECIMAL(4,2)), CAST(2.34 AS DECIMAL(4,2))), --
t_ARRAY_decimal_precision_4
+ ARRAY(CAST(1.2345 AS DECIMAL(8,4)), CAST(2.3456 AS DECIMAL(8,4))),
-- t_ARRAY_decimal_precision_8
+ ARRAY(CAST(1.23456789 AS DECIMAL(17,8)), CAST(2.34567891 AS
DECIMAL(17,8))), -- t_ARRAY_decimal_precision_17
+ ARRAY(CAST(1.23456789 AS DECIMAL(18,8)), CAST(2.34567891 AS
DECIMAL(18,8))), -- t_ARRAY_decimal_precision_18
+ ARRAY(CAST(1.234567890123456789 AS DECIMAL(38,16)),
CAST(2.345678901234567890 AS DECIMAL(38,16))), -- t_ARRAY_decimal_precision_38
+ NAMED_STRUCT('s_bigint', 1234567890), -- t_struct_bigint
+ MAP('key', ARRAY(NAMED_STRUCT('s_int', 123))), -- t_complex
+ NAMED_STRUCT('struct_field', ARRAY('value1', 'value2')), --
t_struct_nested
+ NAMED_STRUCT('struct_field_null', null, 'struct_field_null2', null),
-- t_struct_null
+ NAMED_STRUCT('struct_non_nulls_after_nulls1', 123,
'struct_non_nulls_after_nulls2', 'value'), -- t_struct_non_nulls_after_nulls
+ NAMED_STRUCT('struct_field1', 123, 'struct_field2', 'value',
'strict_field3', NAMED_STRUCT('nested_struct_field1', 123,
'nested_struct_field2', 'nested_value')), --
t_nested_struct_non_nulls_after_nulls
+ MAP('null_key', null), -- t_map_null_value
+ ARRAY(null, 'value1', 'value2'), --
t_ARRAY_string_starting_with_nulls
+ ARRAY('value1', null, 'value2'), --
t_ARRAY_string_with_nulls_in_between
+ ARRAY('value1', 'value2', null), -- t_ARRAY_string_ending_with_nulls
+ ARRAY(null, null, null), -- t_ARRAY_string_all_nulls
+ 20240320 -- dt
+ ),
+ (
+ 0, -- boolean_col
+ -2147483648, -- int_col
+ -9223372036854775808, -- bigint_col
+ CAST(-123.45 AS FLOAT), -- float_col
+ CAST(-123456.789 AS DOUBLE), -- double_col
+ CAST(-123456789 AS DECIMAL(9,0)), -- decimal_col1
+ CAST(-1234.5678 AS DECIMAL(8,4)), -- decimal_col2
+ CAST(-123456.789012 AS DECIMAL(18,6)), -- decimal_col3
+ CAST(-123456789.012345678901 AS DECIMAL(38,12)), -- decimal_col4
+ 'string_value', -- string_col
+ '2024-03-21', -- date_col
+ '2024-03-21 12:00:00', -- timestamp_col1
+ '2024-03-21 12:00:00.123456789', -- timestamp_col2
+ '2024-03-21 12:00:00.123456789', -- timestamp_col3
+ MAP('key1', 'value1'), -- t_map_string
+ MAP(2, 20), -- t_map_int
+ MAP(2, 200000000000), -- t_map_bigint
+ MAP(CAST(2.2 AS FLOAT), CAST(20.2 AS FLOAT)), -- t_map_float
+ MAP(CAST(2.2 AS DOUBLE), CAST(20.2 AS DOUBLE)), -- t_map_double
+ MAP(false, true), -- t_map_boolean
+ MAP(CAST(2.2 AS DECIMAL(2,1)), CAST(2.2 AS DECIMAL(2,1))), --
t_map_decimal_precision_2
+ MAP(CAST(2.34 AS DECIMAL(4,2)), CAST(2.34 AS DECIMAL(4,2))), --
t_map_decimal_precision_4
+ MAP(CAST(2.3456 AS DECIMAL(8,4)), CAST(2.3456 AS DECIMAL(8,4))), --
t_map_decimal_precision_8
+ MAP(CAST(2.34567890 AS DECIMAL(17,8)), CAST(2.34567890 AS
DECIMAL(17,8))), -- t_map_decimal_precision_17
+ MAP(CAST(2.34567890 AS DECIMAL(18,8)), CAST(2.34567890 AS
DECIMAL(18,8))), -- t_map_decimal_precision_18
+ MAP(CAST(2.345678901234567890 AS DECIMAL(38,16)),
CAST(2.345678901234567890 AS DECIMAL(38,16))), -- t_map_decimal_precision_38
+ ARRAY('string1', 'string2'), -- t_ARRAY_string
+ ARRAY(4, 5, 6), -- t_ARRAY_int
+ ARRAY(300000000000, 400000000000), -- t_ARRAY_bigint
+ ARRAY(CAST(3.3 AS FLOAT), CAST(4.4 AS FLOAT)), -- t_ARRAY_float
+ ARRAY(CAST(3.123456789 AS DOUBLE), CAST(4.123456789 AS DOUBLE)), --
t_ARRAY_double
+ ARRAY(false, true), -- t_ARRAY_boolean
+ ARRAY(CAST(3.3 AS DECIMAL(2,1)), CAST(4.4 AS DECIMAL(2,1))), --
t_ARRAY_decimal_precision_2
+ ARRAY(CAST(3.45 AS DECIMAL(4,2)), CAST(4.56 AS DECIMAL(4,2))), --
t_ARRAY_decimal_precision_4
+ ARRAY(CAST(3.4567 AS DECIMAL(8,4)), CAST(4.5678 AS DECIMAL(8,4))),
-- t_ARRAY_decimal_precision_8
+ ARRAY(CAST(3.45678901 AS DECIMAL(17,8)), CAST(4.56789012 AS
DECIMAL(17,8))), -- t_ARRAY_decimal_precision_17
+ ARRAY(CAST(3.45678901 AS DECIMAL(18,8)), CAST(4.56789012 AS
DECIMAL(18,8))), -- t_ARRAY_decimal_precision_18
+ ARRAY(CAST(3.456789012345678901 AS DECIMAL(38,16)),
CAST(4.567890123456789012 AS DECIMAL(38,16))), -- t_ARRAY_decimal_precision_38
+ NAMED_STRUCT('s_bigint', -1234567890), -- t_struct_bigint
+ MAP('key', ARRAY(NAMED_STRUCT('s_int', -123))), -- t_complex
+ NAMED_STRUCT('struct_field', ARRAY('value1', 'value2')), --
t_struct_nested
+ NAMED_STRUCT('struct_field_null', null, 'struct_field_null2', null),
-- t_struct_null
+ NAMED_STRUCT('struct_non_nulls_after_nulls1', -123,
'struct_non_nulls_after_nulls2', 'value'), -- t_struct_non_nulls_after_nulls
+ NAMED_STRUCT('struct_field1', -123, 'struct_field2', 'value',
'strict_field3', NAMED_STRUCT('nested_struct_field1', -123,
'nested_struct_field2', 'nested_value')), --
t_nested_struct_non_nulls_after_nulls
+ MAP('null_key', null), -- t_map_null_value
+ ARRAY(null, 'value1', 'value2'), --
t_ARRAY_string_starting_with_nulls
+ ARRAY('value1', null, 'value2'), --
t_ARRAY_string_with_nulls_in_between
+ ARRAY('value1', 'value2', null), -- t_ARRAY_string_ending_with_nulls
+ ARRAY(null, null, null), -- t_ARRAY_string_all_nulls
+ 20240321 -- dt
+ ),
+ (
+ 0, -- boolean_col
+ -2147483648, -- int_col
+ -9223372036854775808, -- bigint_col
+ CAST(-123.45 AS FLOAT), -- float_col
+ CAST(-123456.789 AS DOUBLE), -- double_col
+ CAST(-123456789 AS DECIMAL(9,0)), -- decimal_col1
+ CAST(-1234.5678 AS DECIMAL(8,4)), -- decimal_col2
+ CAST(-123456.789012 AS DECIMAL(18,6)), -- decimal_col3
+ CAST(-123456789.012345678901 AS DECIMAL(38,12)), -- decimal_col4
+ 'string_value', -- string_col
+ '2024-03-22', -- date_col
+ '2024-03-22 12:00:00', -- timestamp_col1
+ '2024-03-22 12:00:00.123456789', -- timestamp_col2
+ '2024-03-22 12:00:00.123456789', -- timestamp_col3
+ MAP('key1', 'value1'), -- t_map_string
+ MAP(3, 20), -- t_map_int
+ MAP(3, 200000000000), -- t_map_bigint
+ MAP(CAST(3.2 AS FLOAT), CAST(20.2 AS FLOAT)), -- t_map_float
+ MAP(CAST(3.2 AS DOUBLE), CAST(20.2 AS DOUBLE)), -- t_map_double
+ MAP(false, true), -- t_map_boolean
+ MAP(CAST(3.2 AS DECIMAL(2,1)), CAST(2.2 AS DECIMAL(2,1))), --
t_map_decimal_precision_2
+ MAP(CAST(3.34 AS DECIMAL(4,2)), CAST(2.34 AS DECIMAL(4,2))), --
t_map_decimal_precision_4
+ MAP(CAST(2.3456 AS DECIMAL(8,4)), CAST(2.3456 AS DECIMAL(8,4))), --
t_map_decimal_precision_8
+ MAP(CAST(2.34567890 AS DECIMAL(17,8)), CAST(2.34567890 AS
DECIMAL(17,8))), -- t_map_decimal_precision_17
+ MAP(CAST(2.34567890 AS DECIMAL(18,8)), CAST(2.34567890 AS
DECIMAL(18,8))), -- t_map_decimal_precision_18
+ MAP(CAST(3.345678901234567890 AS DECIMAL(38,16)),
CAST(2.345678901234567890 AS DECIMAL(38,16))), -- t_map_decimal_precision_38
+ ARRAY('string1', 'string2'), -- t_ARRAY_string
+ ARRAY(4, 5, 6), -- t_ARRAY_int
+ ARRAY(300000000000, 400000000000), -- t_ARRAY_bigint
+ ARRAY(CAST(3.3 AS FLOAT), CAST(4.4 AS FLOAT)), -- t_ARRAY_float
+ ARRAY(CAST(3.123456789 AS DOUBLE), CAST(4.123456789 AS DOUBLE)), --
t_ARRAY_double
+ ARRAY(false, true), -- t_ARRAY_boolean
+ ARRAY(CAST(3.3 AS DECIMAL(2,1)), CAST(4.4 AS DECIMAL(2,1))), --
t_ARRAY_decimal_precision_2
+ ARRAY(CAST(3.45 AS DECIMAL(4,2)), CAST(4.56 AS DECIMAL(4,2))), --
t_ARRAY_decimal_precision_4
+ ARRAY(CAST(8.4567 AS DECIMAL(8,4)), CAST(4.5678 AS DECIMAL(8,4))),
-- t_ARRAY_decimal_precision_8
+ ARRAY(CAST(3.45678901 AS DECIMAL(17,8)), CAST(4.56789012 AS
DECIMAL(17,8))), -- t_ARRAY_decimal_precision_17
+ ARRAY(CAST(3.45678901 AS DECIMAL(18,8)), CAST(4.56789012 AS
DECIMAL(18,8))), -- t_ARRAY_decimal_precision_18
+ ARRAY(CAST(3.456789012345678901 AS DECIMAL(38,16)),
CAST(4.567890123456789012 AS DECIMAL(38,16))), -- t_ARRAY_decimal_precision_38
+ NAMED_STRUCT('s_bigint', -1234567890), -- t_struct_bigint
+ MAP('key', ARRAY(NAMED_STRUCT('s_int', -123))), -- t_complex
+ NAMED_STRUCT('struct_field', ARRAY('value1', 'value2')), --
t_struct_nested
+ NAMED_STRUCT('struct_field_null', null, 'struct_field_null2', null),
-- t_struct_null
+ NAMED_STRUCT('struct_non_nulls_after_nulls1', -123,
'struct_non_nulls_after_nulls2', 'value'), -- t_struct_non_nulls_after_nulls
+ NAMED_STRUCT('struct_field1', -123, 'struct_field2', 'value',
'strict_field3', NAMED_STRUCT('nested_struct_field1', -123,
'nested_struct_field2', 'nested_value')), --
t_nested_struct_non_nulls_after_nulls
+ MAP('null_key', null), -- t_map_null_value
+ ARRAY(null, 'value1', 'value2'), --
t_ARRAY_string_starting_with_nulls
+ ARRAY('value1', null, 'value2'), --
t_ARRAY_string_with_nulls_in_between
+ ARRAY('value11', 'value2', null), -- t_ARRAY_string_ending_with_nulls
+ ARRAY(null, null, null), -- t_ARRAY_string_all_nulls
+ 20240322 -- dt
+ );
+ """
+ order_qt_q02 """ select * from ${all_types_table};
+ """
+
+ sql """
+ INSERT INTO ${all_types_table}(float_col, t_map_int,
t_ARRAY_decimal_precision_8, t_ARRAY_string_starting_with_nulls)
+ VALUES (
+ CAST(123.45 AS FLOAT), -- float_col
+ MAP(1, 10), -- t_map_int
+ ARRAY(CAST(1.2345 AS DECIMAL(8,4)), CAST(2.3456 AS DECIMAL(8,4))),
-- t_ARRAY_decimal_precision_8
+ ARRAY(null, 'value1', 'value2') -- t_ARRAY_string_starting_with_nulls
+ );
+ """
+ order_qt_q03 """ select * from ${all_types_table};
+ """
+
+ sql """ DROP TABLE ${all_types_table}; """
+ }
+
+ def q03 = { String format_compression, String catalog_name ->
+ def parts = format_compression.split("_")
+ def format = parts[0]
+ def compression = parts[1]
+ def all_types_partition_table =
"iceberg_all_types_par_glue_rest_${format_compression}_master"
+ sql """ DROP TABLE IF EXISTS `${all_types_partition_table}`; """
+ sql """
+ CREATE TABLE `${all_types_partition_table}`(
+ `boolean_col` boolean,
+ `int_col` int,
+ `bigint_col` bigint,
+ `float_col` float,
+ `double_col` double,
+ `decimal_col1` decimal(9,0),
+ `decimal_col2` decimal(8,4),
+ `decimal_col3` decimal(18,6),
+ `decimal_col4` decimal(38,12),
+ `string_col` string,
+ `date_col` date,
+ `timestamp_col1` datetime,
+ `timestamp_col2` datetime,
+ `timestamp_col3` datetime,
+ `t_map_string` map<string,string>,
+ `t_map_int` map<int,int>,
+ `t_map_bigint` map<bigint,bigint>,
+ `t_map_float` map<float,float>,
+ `t_map_double` map<double,double>,
+ `t_map_boolean` map<boolean,boolean>,
+ `t_map_decimal_precision_2` map<decimal(2,1),decimal(2,1)>,
+ `t_map_decimal_precision_4` map<decimal(4,2),decimal(4,2)>,
+ `t_map_decimal_precision_8` map<decimal(8,4),decimal(8,4)>,
+ `t_map_decimal_precision_17` map<decimal(17,8),decimal(17,8)>,
+ `t_map_decimal_precision_18` map<decimal(18,8),decimal(18,8)>,
+ `t_map_decimal_precision_38` map<decimal(38,16),decimal(38,16)>,
+ `t_array_string` array<string>,
+ `t_array_int` array<int>,
+ `t_array_bigint` array<bigint>,
+ `t_array_float` array<float>,
+ `t_array_double` array<double>,
+ `t_array_boolean` array<boolean>,
+ `t_array_decimal_precision_2` array<decimal(2,1)>,
+ `t_array_decimal_precision_4` array<decimal(4,2)>,
+ `t_array_decimal_precision_8` array<decimal(8,4)>,
+ `t_array_decimal_precision_17` array<decimal(17,8)>,
+ `t_array_decimal_precision_18` array<decimal(18,8)>,
+ `t_array_decimal_precision_38` array<decimal(38,16)>,
+ `t_struct_bigint` struct<s_bigint:bigint>,
+ `t_complex` map<string,array<struct<s_int:int>>>,
+ `t_struct_nested` struct<struct_field:array<string>>,
+ `t_struct_null`
struct<struct_field_null:string,struct_field_null2:string>,
+ `t_struct_non_nulls_after_nulls`
struct<struct_non_nulls_after_nulls1:int,struct_non_nulls_after_nulls2:string>,
+ `t_nested_struct_non_nulls_after_nulls`
struct<struct_field1:int,struct_field2:string,strict_field3:struct<nested_struct_field1:int,nested_struct_field2:string>>,
+ `t_map_null_value` map<string,string>,
+ `t_array_string_starting_with_nulls` array<string>,
+ `t_array_string_with_nulls_in_between` array<string>,
+ `t_array_string_ending_with_nulls` array<string>,
+ `t_array_string_all_nulls` array<string>,
+ `dt` int) ENGINE=iceberg
+ PARTITION BY LIST (dt) ()
+ properties (
+ "compression-codec" = ${compression},
+ "write-format"=${format}
+ );
+ """
+
+ sql """
+ INSERT INTO ${all_types_partition_table}
+ VALUES (
+ 1, -- boolean_col
+ 2147483647, -- int_col
+ 9223372036854775807, -- bigint_col
+ 123.45, -- float_col
+ 123456.789, -- double_col
+ 123456789, -- decimal_col1
+ 1234.5678, -- decimal_col2
+ CAST(123456.789012 AS DECIMAL(18,6)), -- decimal_col3
+ CAST(123456789.012345678901 AS DECIMAL(38,12)), -- decimal_col4
+ 'string_value', -- string_col
+ '2024-03-20', -- date_col
+ '2024-03-20 12:00:00', -- timestamp_col1
+ '2024-03-20 12:00:00.123456789', -- timestamp_col2
+ '2024-03-20 12:00:00.123456789', -- timestamp_col3
+ MAP('key1', 'value1'), -- t_map_string
+ MAP(1, 10), -- t_map_int
+ MAP(1, 100000000000), -- t_map_bigint
+ MAP(CAST(1.1 AS FLOAT), CAST(10.1 AS FLOAT)), -- t_map_float
+ MAP(CAST(1.1 AS DOUBLE), CAST(10.1 AS DOUBLE)), -- t_map_double
+ MAP(true, false), -- t_map_boolean
+ MAP(CAST(1.1 AS DECIMAL(2,1)), CAST(1.1 AS DECIMAL(2,1))), --
t_map_decimal_precision_2
+ MAP(CAST(1.23 AS DECIMAL(4,2)), CAST(1.23 AS DECIMAL(4,2))), --
t_map_decimal_precision_4
+ MAP(CAST(1.2345 AS DECIMAL(8,4)), CAST(1.2345 AS DECIMAL(8,4))), --
t_map_decimal_precision_8
+ MAP(CAST(1.23456789 AS DECIMAL(17,8)), CAST(1.23456789 AS
DECIMAL(17,8))), -- t_map_decimal_precision_17
+ MAP(CAST(1.23456789 AS DECIMAL(18,8)), CAST(1.23456789 AS
DECIMAL(18,8))), -- t_map_decimal_precision_18
+ MAP(CAST(1.234567890123456789 AS DECIMAL(38,16)),
CAST(1.234567890123456789 AS DECIMAL(38,16))), -- t_map_decimal_precision_38
+ ARRAY('string1', 'string2'), -- t_ARRAY_string
+ ARRAY(1, 2, 3), -- t_ARRAY_int
+ ARRAY(100000000000, 200000000000), -- t_ARRAY_bigint
+ ARRAY(CAST(1.1 AS FLOAT), CAST(2.2 AS FLOAT)), -- t_ARRAY_float
+ ARRAY(CAST(1.123456789 AS DOUBLE), CAST(2.123456789 AS DOUBLE)), --
t_ARRAY_double
+ ARRAY(true, false), -- t_ARRAY_boolean
+ ARRAY(CAST(1.1 AS DECIMAL(2,1)), CAST(2.2 AS DECIMAL(2,1))), --
t_ARRAY_decimal_precision_2
+ ARRAY(CAST(1.23 AS DECIMAL(4,2)), CAST(2.34 AS DECIMAL(4,2))), --
t_ARRAY_decimal_precision_4
+ ARRAY(CAST(1.2345 AS DECIMAL(8,4)), CAST(2.3456 AS DECIMAL(8,4))),
-- t_ARRAY_decimal_precision_8
+ ARRAY(CAST(1.23456789 AS DECIMAL(17,8)), CAST(2.34567891 AS
DECIMAL(17,8))), -- t_ARRAY_decimal_precision_17
+ ARRAY(CAST(1.23456789 AS DECIMAL(18,8)), CAST(2.34567891 AS
DECIMAL(18,8))), -- t_ARRAY_decimal_precision_18
+ ARRAY(CAST(1.234567890123456789 AS DECIMAL(38,16)),
CAST(2.345678901234567890 AS DECIMAL(38,16))), -- t_ARRAY_decimal_precision_38
+ NAMED_STRUCT('s_bigint', 1234567890), -- t_struct_bigint
+ MAP('key', ARRAY(NAMED_STRUCT('s_int', 123))), -- t_complex
+ NAMED_STRUCT('struct_field', ARRAY('value1', 'value2')), --
t_struct_nested
+ NAMED_STRUCT('struct_field_null', null, 'struct_field_null2', null),
-- t_struct_null
+ NAMED_STRUCT('struct_non_nulls_after_nulls1', 123,
'struct_non_nulls_after_nulls2', 'value'), -- t_struct_non_nulls_after_nulls
+ NAMED_STRUCT('struct_field1', 123, 'struct_field2', 'value',
'strict_field3', NAMED_STRUCT('nested_struct_field1', 123,
'nested_struct_field2', 'nested_value')), --
t_nested_struct_non_nulls_after_nulls
+ MAP('null_key', null), -- t_map_null_value
+ ARRAY(null, 'value1', 'value2'), --
t_ARRAY_string_starting_with_nulls
+ ARRAY('value1', null, 'value2'), --
t_ARRAY_string_with_nulls_in_between
+ ARRAY('value1', 'value2', null), -- t_ARRAY_string_ending_with_nulls
+ ARRAY(null, null, null), -- t_ARRAY_string_all_nulls
+ 20240320 -- dt
+ );
+ """
+ order_qt_q01 """ select * from ${all_types_partition_table};
+ """
+
+ sql """
+ INSERT INTO ${all_types_partition_table}
+ VALUES (
+ 1, -- boolean_col
+ 2147483647, -- int_col
+ 9223372036854775807, -- bigint_col
+ CAST(123.45 AS FLOAT), -- float_col
+ CAST(123456.789 AS DOUBLE), -- double_col
+ CAST(123456789 AS DECIMAL(9,0)), -- decimal_col1
+ CAST(1234.5678 AS DECIMAL(8,4)), -- decimal_col2
+ CAST(123456.789012 AS DECIMAL(18,6)), -- decimal_col3
+ CAST(123456789.012345678901 AS DECIMAL(38,12)), -- decimal_col4
+ 'string_value', -- string_col
+ '2024-03-20', -- date_col
+ '2024-03-20 12:00:00', -- timestamp_col1
+ '2024-03-20 12:00:00.123456789', -- timestamp_col2
+ '2024-03-20 12:00:00.123456789', -- timestamp_col3
+ MAP('key1', 'value1'), -- t_map_string
+ MAP(1, 10), -- t_map_int
+ MAP(1, 100000000000), -- t_map_bigint
+ MAP(CAST(1.1 AS FLOAT), CAST(10.1 AS FLOAT)), -- t_map_float
+ MAP(CAST(1.1 AS DOUBLE), CAST(10.1 AS DOUBLE)), -- t_map_double
+ MAP(true, false), -- t_map_boolean
+ MAP(CAST(1.1 AS DECIMAL(2,1)), CAST(1.1 AS DECIMAL(2,1))), --
t_map_decimal_precision_2
+ MAP(CAST(1.23 AS DECIMAL(4,2)), CAST(1.23 AS DECIMAL(4,2))), --
t_map_decimal_precision_4
+ MAP(CAST(1.2345 AS DECIMAL(8,4)), CAST(1.2345 AS DECIMAL(8,4))), --
t_map_decimal_precision_8
+ MAP(CAST(1.23456789 AS DECIMAL(17,8)), CAST(1.23456789 AS
DECIMAL(17,8))), -- t_map_decimal_precision_17
+ MAP(CAST(1.23456789 AS DECIMAL(18,8)), CAST(1.23456789 AS
DECIMAL(18,8))), -- t_map_decimal_precision_18
+ MAP(CAST(1.234567890123456789 AS DECIMAL(38,16)),
CAST(1.234567890123456789 AS DECIMAL(38,16))), -- t_map_decimal_precision_38
+ ARRAY('string1', 'string2'), -- t_ARRAY_string
+ ARRAY(1, 2, 3), -- t_ARRAY_int
+ ARRAY(100000000000, 200000000000), -- t_ARRAY_bigint
+ ARRAY(CAST(1.1 AS FLOAT), CAST(2.2 AS FLOAT)), -- t_ARRAY_float
+ ARRAY(CAST(1.123456789 AS DOUBLE), CAST(2.123456789 AS DOUBLE)), --
t_ARRAY_double
+ ARRAY(true, false), -- t_ARRAY_boolean
+ ARRAY(CAST(1.1 AS DECIMAL(2,1)), CAST(2.2 AS DECIMAL(2,1))), --
t_ARRAY_decimal_precision_2
+ ARRAY(CAST(1.23 AS DECIMAL(4,2)), CAST(2.34 AS DECIMAL(4,2))), --
t_ARRAY_decimal_precision_4
+ ARRAY(CAST(1.2345 AS DECIMAL(8,4)), CAST(2.3456 AS DECIMAL(8,4))),
-- t_ARRAY_decimal_precision_8
+ ARRAY(CAST(1.23456789 AS DECIMAL(17,8)), CAST(2.34567891 AS
DECIMAL(17,8))), -- t_ARRAY_decimal_precision_17
+ ARRAY(CAST(1.23456789 AS DECIMAL(18,8)), CAST(2.34567891 AS
DECIMAL(18,8))), -- t_ARRAY_decimal_precision_18
+ ARRAY(CAST(1.234567890123456789 AS DECIMAL(38,16)),
CAST(2.345678901234567890 AS DECIMAL(38,16))), -- t_ARRAY_decimal_precision_38
+ NAMED_STRUCT('s_bigint', 1234567890), -- t_struct_bigint
+ MAP('key', ARRAY(NAMED_STRUCT('s_int', 123))), -- t_complex
+ NAMED_STRUCT('struct_field', ARRAY('value1', 'value2')), --
t_struct_nested
+ NAMED_STRUCT('struct_field_null', null, 'struct_field_null2', null),
-- t_struct_null
+ NAMED_STRUCT('struct_non_nulls_after_nulls1', 123,
'struct_non_nulls_after_nulls2', 'value'), -- t_struct_non_nulls_after_nulls
+ NAMED_STRUCT('struct_field1', 123, 'struct_field2', 'value',
'strict_field3', NAMED_STRUCT('nested_struct_field1', 123,
'nested_struct_field2', 'nested_value')), --
t_nested_struct_non_nulls_after_nulls
+ MAP('null_key', null), -- t_map_null_value
+ ARRAY(null, 'value1', 'value2'), --
t_ARRAY_string_starting_with_nulls
+ ARRAY('value1', null, 'value2'), --
t_ARRAY_string_with_nulls_in_between
+ ARRAY('value1', 'value2', null), -- t_ARRAY_string_ending_with_nulls
+ ARRAY(null, null, null), -- t_ARRAY_string_all_nulls
+ 20240320 -- dt
+ ),
+ (
+ 0, -- boolean_col
+ -2147483648, -- int_col
+ -9223372036854775808, -- bigint_col
+ CAST(-123.45 AS FLOAT), -- float_col
+ CAST(-123456.789 AS DOUBLE), -- double_col
+ CAST(-123456789 AS DECIMAL(9,0)), -- decimal_col1
+ CAST(-1234.5678 AS DECIMAL(8,4)), -- decimal_col2
+ CAST(-123456.789012 AS DECIMAL(18,6)), -- decimal_col3
+ CAST(-123456789.012345678901 AS DECIMAL(38,12)), -- decimal_col4
+ 'string_value', -- string_col
+ '2024-03-21', -- date_col
+ '2024-03-21 12:00:00', -- timestamp_col1
+ '2024-03-21 12:00:00.123456789', -- timestamp_col2
+ '2024-03-21 12:00:00.123456789', -- timestamp_col3
+ MAP('key1', 'value1'), -- t_map_string
+ MAP(2, 20), -- t_map_int
+ MAP(2, 200000000000), -- t_map_bigint
+ MAP(CAST(2.2 AS FLOAT), CAST(20.2 AS FLOAT)), -- t_map_float
+ MAP(CAST(2.2 AS DOUBLE), CAST(20.2 AS DOUBLE)), -- t_map_double
+ MAP(false, true), -- t_map_boolean
+ MAP(CAST(2.2 AS DECIMAL(2,1)), CAST(2.2 AS DECIMAL(2,1))), --
t_map_decimal_precision_2
+ MAP(CAST(2.34 AS DECIMAL(4,2)), CAST(2.34 AS DECIMAL(4,2))), --
t_map_decimal_precision_4
+ MAP(CAST(2.3456 AS DECIMAL(8,4)), CAST(2.3456 AS DECIMAL(8,4))), --
t_map_decimal_precision_8
+ MAP(CAST(2.34567890 AS DECIMAL(17,8)), CAST(2.34567890 AS
DECIMAL(17,8))), -- t_map_decimal_precision_17
+ MAP(CAST(2.34567890 AS DECIMAL(18,8)), CAST(2.34567890 AS
DECIMAL(18,8))), -- t_map_decimal_precision_18
+ MAP(CAST(2.345678901234567890 AS DECIMAL(38,16)),
CAST(2.345678901234567890 AS DECIMAL(38,16))), -- t_map_decimal_precision_38
+ ARRAY('string1', 'string2'), -- t_ARRAY_string
+ ARRAY(4, 5, 6), -- t_ARRAY_int
+ ARRAY(300000000000, 400000000000), -- t_ARRAY_bigint
+ ARRAY(CAST(3.3 AS FLOAT), CAST(4.4 AS FLOAT)), -- t_ARRAY_float
+ ARRAY(CAST(3.123456789 AS DOUBLE), CAST(4.123456789 AS DOUBLE)), --
t_ARRAY_double
+ ARRAY(false, true), -- t_ARRAY_boolean
+ ARRAY(CAST(3.3 AS DECIMAL(2,1)), CAST(4.4 AS DECIMAL(2,1))), --
t_ARRAY_decimal_precision_2
+ ARRAY(CAST(3.45 AS DECIMAL(4,2)), CAST(4.56 AS DECIMAL(4,2))), --
t_ARRAY_decimal_precision_4
+ ARRAY(CAST(3.4567 AS DECIMAL(8,4)), CAST(4.5678 AS DECIMAL(8,4))),
-- t_ARRAY_decimal_precision_8
+ ARRAY(CAST(3.45678901 AS DECIMAL(17,8)), CAST(4.56789012 AS
DECIMAL(17,8))), -- t_ARRAY_decimal_precision_17
+ ARRAY(CAST(3.45678901 AS DECIMAL(18,8)), CAST(4.56789012 AS
DECIMAL(18,8))), -- t_ARRAY_decimal_precision_18
+ ARRAY(CAST(3.456789012345678901 AS DECIMAL(38,16)),
CAST(4.567890123456789012 AS DECIMAL(38,16))), -- t_ARRAY_decimal_precision_38
+ NAMED_STRUCT('s_bigint', -1234567890), -- t_struct_bigint
+ MAP('key', ARRAY(NAMED_STRUCT('s_int', -123))), -- t_complex
+ NAMED_STRUCT('struct_field', ARRAY('value1', 'value2')), --
t_struct_nested
+ NAMED_STRUCT('struct_field_null', null, 'struct_field_null2', null),
-- t_struct_null
+ NAMED_STRUCT('struct_non_nulls_after_nulls1', -123,
'struct_non_nulls_after_nulls2', 'value'), -- t_struct_non_nulls_after_nulls
+ NAMED_STRUCT('struct_field1', -123, 'struct_field2', 'value',
'strict_field3', NAMED_STRUCT('nested_struct_field1', -123,
'nested_struct_field2', 'nested_value')), --
t_nested_struct_non_nulls_after_nulls
+ MAP('null_key', null), -- t_map_null_value
+ ARRAY(null, 'value1', 'value2'), --
t_ARRAY_string_starting_with_nulls
+ ARRAY('value1', null, 'value2'), --
t_ARRAY_string_with_nulls_in_between
+ ARRAY('value1', 'value2', null), -- t_ARRAY_string_ending_with_nulls
+ ARRAY(null, null, null), -- t_ARRAY_string_all_nulls
+ 20240321 -- dt
+ ),
+ (
+ 0, -- boolean_col
+ -2147483648, -- int_col
+ -9223372036854775808, -- bigint_col
+ -123.45, -- float_col
+ -123456.789, -- double_col
+ CAST(-123456789 AS DECIMAL(9,0)), -- decimal_col1
+ CAST(-1234.5678 AS DECIMAL(8,4)), -- decimal_col2
+ CAST(-123456.789012 AS DECIMAL(18,6)), -- decimal_col3
+ CAST(-123456789.012345678901 AS DECIMAL(38,12)), -- decimal_col4
+ 'string_value', -- string_col
+ '2024-03-22', -- date_col
+ '2024-03-22 12:00:00', -- timestamp_col1
+ '2024-03-22 12:00:00.123456789', -- timestamp_col2
+ '2024-03-22 12:00:00.123456789', -- timestamp_col3
+ MAP('key1', 'value1'), -- t_map_string
+ MAP(3, 20), -- t_map_int
+ MAP(3, 200000000000), -- t_map_bigint
+ MAP(CAST(3.2 AS FLOAT), CAST(20.2 AS FLOAT)), -- t_map_float
+ MAP(CAST(3.2 AS DOUBLE), CAST(20.2 AS DOUBLE)), -- t_map_double
+ MAP(false, true), -- t_map_boolean
+ MAP(CAST(3.2 AS DECIMAL(2,1)), CAST(2.2 AS DECIMAL(2,1))), --
t_map_decimal_precision_2
+ MAP(CAST(3.34 AS DECIMAL(4,2)), CAST(2.34 AS DECIMAL(4,2))), --
t_map_decimal_precision_4
+ MAP(CAST(2.3456 AS DECIMAL(8,4)), CAST(2.3456 AS DECIMAL(8,4))), --
t_map_decimal_precision_8
+ MAP(CAST(2.34567890 AS DECIMAL(17,8)), CAST(2.34567890 AS
DECIMAL(17,8))), -- t_map_decimal_precision_17
+ MAP(CAST(2.34567890 AS DECIMAL(18,8)), CAST(2.34567890 AS
DECIMAL(18,8))), -- t_map_decimal_precision_18
+ MAP(CAST(3.345678901234567890 AS DECIMAL(38,16)),
CAST(2.345678901234567890 AS DECIMAL(38,16))), -- t_map_decimal_precision_38
+ ARRAY('string1', 'string2'), -- t_ARRAY_string
+ ARRAY(4, 5, 6), -- t_ARRAY_int
+ ARRAY(300000000000, 400000000000), -- t_ARRAY_bigint
+ ARRAY(CAST(3.3 AS FLOAT), CAST(4.4 AS FLOAT)), -- t_ARRAY_float
+ ARRAY(CAST(3.123456789 AS DOUBLE), CAST(4.123456789 AS DOUBLE)), --
t_ARRAY_double
+ ARRAY(false, true), -- t_ARRAY_boolean
+ ARRAY(CAST(3.3 AS DECIMAL(2,1)), CAST(4.4 AS DECIMAL(2,1))), --
t_ARRAY_decimal_precision_2
+ ARRAY(CAST(3.45 AS DECIMAL(4,2)), CAST(4.56 AS DECIMAL(4,2))), --
t_ARRAY_decimal_precision_4
+ ARRAY(CAST(8.4567 AS DECIMAL(8,4)), CAST(4.5678 AS DECIMAL(8,4))),
-- t_ARRAY_decimal_precision_8
+ ARRAY(CAST(3.45678901 AS DECIMAL(17,8)), CAST(4.56789012 AS
DECIMAL(17,8))), -- t_ARRAY_decimal_precision_17
+ ARRAY(CAST(3.45678901 AS DECIMAL(18,8)), CAST(4.56789012 AS
DECIMAL(18,8))), -- t_ARRAY_decimal_precision_18
+ ARRAY(CAST(3.456789012345678901 AS DECIMAL(38,16)),
CAST(4.567890123456789012 AS DECIMAL(38,16))), -- t_ARRAY_decimal_precision_38
+ NAMED_STRUCT('s_bigint', -1234567890), -- t_struct_bigint
+ MAP('key', ARRAY(NAMED_STRUCT('s_int', -123))), -- t_complex
+ NAMED_STRUCT('struct_field', ARRAY('value1', 'value2')), --
t_struct_nested
+ NAMED_STRUCT('struct_field_null', null, 'struct_field_null2', null),
-- t_struct_null
+ NAMED_STRUCT('struct_non_nulls_after_nulls1', -123,
'struct_non_nulls_after_nulls2', 'value'), -- t_struct_non_nulls_after_nulls
+ NAMED_STRUCT('struct_field1', -123, 'struct_field2', 'value',
'strict_field3', NAMED_STRUCT('nested_struct_field1', -123,
'nested_struct_field2', 'nested_value')), --
t_nested_struct_non_nulls_after_nulls
+ MAP('null_key', null), -- t_map_null_value
+ ARRAY(null, 'value1', 'value2'), --
t_ARRAY_string_starting_with_nulls
+ ARRAY('value1', null, 'value2'), --
t_ARRAY_string_with_nulls_in_between
+ ARRAY('value11', 'value2', null), -- t_ARRAY_string_ending_with_nulls
+ ARRAY(null, null, null), -- t_ARRAY_string_all_nulls
+ 20240322 -- dt
+ );
+ """
+ order_qt_q02 """ select * from ${all_types_partition_table};
+ """
+
+ sql """
+ INSERT INTO ${all_types_partition_table}(float_col, t_map_int,
t_ARRAY_decimal_precision_8, t_ARRAY_string_starting_with_nulls, dt)
+ VALUES (
+ 123.45, -- float_col
+ MAP(1, 10), -- t_map_int
+ ARRAY(CAST(1.2345 AS DECIMAL(8,4)), CAST(2.3456 AS DECIMAL(8,4))),
-- t_ARRAY_decimal_precision_8
+ ARRAY(null, 'value1', 'value2'), --
t_ARRAY_string_starting_with_nulls
+ 20240321 -- dt
+ );
+ """
+ order_qt_q03 """ select * from ${all_types_partition_table};
+ """
+
+ // just test
+ sql """
+ SELECT
+ CASE
+ WHEN file_size_in_bytes BETWEEN 0 AND 8 * 1024 * 1024 THEN
'0-8M'
+ WHEN file_size_in_bytes BETWEEN 8 * 1024 * 1024 + 1 AND 32 *
1024 * 1024 THEN '8-32M'
+ WHEN file_size_in_bytes BETWEEN 2 * 1024 * 1024 + 1 AND 128 *
1024 * 1024 THEN '32-128M'
+ WHEN file_size_in_bytes BETWEEN 128 * 1024 * 1024 + 1 AND 512
* 1024 * 1024 THEN '128-512M'
+ WHEN file_size_in_bytes > 512 * 1024 * 1024 THEN '> 512M'
+ ELSE 'Unknown'
+ END AS SizeRange,
+ COUNT(*) AS FileNum
+ FROM ${all_types_partition_table}\$data_files
+ GROUP BY
+ SizeRange;
+ """
+
+ sql """ DROP TABLE ${all_types_partition_table}; """
+ }
+
+ String enabled =
context.config.otherConfigs.get("enableExternalIcebergTest")
+ if (enabled == null || !enabled.equalsIgnoreCase("true")) {
+ logger.info("disable iceberg test")
+ return
+ }
+
+ String catalog_name = "test_s3tables_glue_rest"
+ String props =
context.config.otherConfigs.get("icebergS3TablesCatalogGlueRest")
+ sql """drop catalog if exists ${catalog_name};"""
+ sql """
+ create catalog ${catalog_name} properties (
+ ${props}
+ );
+ """
+
+ sql """ switch ${catalog_name};"""
+ sql """ drop database if exists iceberg_s3tables_glue_rest force"""
+ sql """ create database iceberg_s3tables_glue_rest"""
+ sql """ use iceberg_s3tables_glue_rest;"""
+ sql """ set enable_fallback_to_original_planner=false """
+
+ try {
+ for (String format_compression in format_compressions) {
+ logger.info("Process format_compression " + format_compression)
+ q01(format_compression, catalog_name)
+ q03(format_compression, catalog_name)
+ }
+ } finally {
+ }
+}
---------------------------------------------------------------------
To unsubscribe, e-mail: [email protected]
For additional commands, e-mail: [email protected]