This is an automated email from the ASF dual-hosted git repository. sivabalan pushed a commit to branch release-0.10.1 in repository https://gitbox.apache.org/repos/asf/hudi.git
commit 6df46394eec649658d355c4df4ca0acb77467db4 Author: Thinking Chen <[email protected]> AuthorDate: Wed Jan 19 03:51:09 2022 +0800 [HUDI-3245] Convert uppercase letters to lowercase in storage configs (#4602) --- .../main/java/org/apache/hudi/config/HoodieStorageConfig.java | 4 ++-- .../src/main/java/org/apache/hudi/DataSourceUtils.java | 9 +++++---- .../src/test/java/org/apache/hudi/TestDataSourceUtils.java | 8 ++++---- .../src/main/java/org/apache/hudi/internal/DefaultSource.java | 2 +- .../main/java/org/apache/hudi/spark3/internal/DefaultSource.java | 7 +++++-- 5 files changed, 17 insertions(+), 13 deletions(-) diff --git a/hudi-client/hudi-client-common/src/main/java/org/apache/hudi/config/HoodieStorageConfig.java b/hudi-client/hudi-client-common/src/main/java/org/apache/hudi/config/HoodieStorageConfig.java index 22118da..42689ec 100644 --- a/hudi-client/hudi-client-common/src/main/java/org/apache/hudi/config/HoodieStorageConfig.java +++ b/hudi-client/hudi-client-common/src/main/java/org/apache/hudi/config/HoodieStorageConfig.java @@ -116,14 +116,14 @@ public class HoodieStorageConfig extends HoodieConfig { .withDocumentation("Whether to use dictionary encoding"); public static final ConfigProperty<String> PARQUET_WRITE_LEGACY_FORMAT_ENABLED = ConfigProperty - .key("hoodie.parquet.writeLegacyFormat.enabled") + .key("hoodie.parquet.writelegacyformat.enabled") .defaultValue("false") .withDocumentation("Sets spark.sql.parquet.writeLegacyFormat. If true, data will be written in a way of Spark 1.4 and earlier. " + "For example, decimal values will be written in Parquet's fixed-length byte array format which other systems such as Apache Hive and Apache Impala use. " + "If false, the newer format in Parquet will be used. For example, decimals will be written in int-based format."); public static final ConfigProperty<String> PARQUET_OUTPUT_TIMESTAMP_TYPE = ConfigProperty - .key("hoodie.parquet.outputTimestampType") + .key("hoodie.parquet.outputtimestamptype") .defaultValue("TIMESTAMP_MILLIS") .withDocumentation("Sets spark.sql.parquet.outputTimestampType. Parquet timestamp type to use when Spark writes data to Parquet files."); diff --git a/hudi-spark-datasource/hudi-spark-common/src/main/java/org/apache/hudi/DataSourceUtils.java b/hudi-spark-datasource/hudi-spark-common/src/main/java/org/apache/hudi/DataSourceUtils.java index 3af6ccc..b3bc3b2 100644 --- a/hudi-spark-datasource/hudi-spark-common/src/main/java/org/apache/hudi/DataSourceUtils.java +++ b/hudi-spark-datasource/hudi-spark-common/src/main/java/org/apache/hudi/DataSourceUtils.java @@ -38,6 +38,7 @@ import org.apache.hudi.config.HoodieClusteringConfig; import org.apache.hudi.config.HoodieCompactionConfig; import org.apache.hudi.config.HoodieIndexConfig; import org.apache.hudi.config.HoodiePayloadConfig; +import org.apache.hudi.config.HoodieStorageConfig; import org.apache.hudi.config.HoodieWriteConfig; import org.apache.hudi.exception.HoodieException; import org.apache.hudi.exception.HoodieNotSupportedException; @@ -316,12 +317,12 @@ public class DataSourceUtils { // Now by default ParquetWriteSupport will write DecimalType to parquet as int32/int64 when the scale of decimalType < Decimal.MAX_LONG_DIGITS(), // but AvroParquetReader which used by HoodieParquetReader cannot support read int32/int64 as DecimalType. - // try to find current schema whether contains that DecimalType, and auto set the value of "hoodie.parquet.writeLegacyFormat.enabled" + // try to find current schema whether contains that DecimalType, and auto set the value of "hoodie.parquet.writelegacyformat.enabled" public static void mayBeOverwriteParquetWriteLegacyFormatProp(Map<String, String> properties, StructType schema) { if (DataTypeUtils.foundSmallPrecisionDecimalType(schema) - && !Boolean.parseBoolean(properties.getOrDefault("hoodie.parquet.writeLegacyFormat.enabled", "false"))) { - properties.put("hoodie.parquet.writeLegacyFormat.enabled", "true"); - LOG.warn("Small Decimal Type found in current schema, auto set the value of hoodie.parquet.writeLegacyFormat.enabled to true"); + && !Boolean.parseBoolean(properties.getOrDefault(HoodieStorageConfig.PARQUET_WRITE_LEGACY_FORMAT_ENABLED.key(), "false"))) { + properties.put(HoodieStorageConfig.PARQUET_WRITE_LEGACY_FORMAT_ENABLED.key(), "true"); + LOG.warn("Small Decimal Type found in current schema, auto set the value of hoodie.parquet.writelegacyformat.enabled to true"); } } } diff --git a/hudi-spark-datasource/hudi-spark/src/test/java/org/apache/hudi/TestDataSourceUtils.java b/hudi-spark-datasource/hudi-spark/src/test/java/org/apache/hudi/TestDataSourceUtils.java index a130c3a..0c5a212 100644 --- a/hudi-spark-datasource/hudi-spark/src/test/java/org/apache/hudi/TestDataSourceUtils.java +++ b/hudi-spark-datasource/hudi-spark/src/test/java/org/apache/hudi/TestDataSourceUtils.java @@ -299,18 +299,18 @@ public class TestDataSourceUtils { StructType structType = StructType$.MODULE$.apply(structFields); // create write options Map<String, String> options = new HashMap<>(); - options.put("hoodie.parquet.writeLegacyFormat.enabled", String.valueOf(defaultWriteValue)); + options.put("hoodie.parquet.writelegacyformat.enabled", String.valueOf(defaultWriteValue)); // start test mayBeOverwriteParquetWriteLegacyFormatProp(options, structType); // check result - boolean res = Boolean.parseBoolean(options.get("hoodie.parquet.writeLegacyFormat.enabled")); + boolean res = Boolean.parseBoolean(options.get("hoodie.parquet.writelegacyformat.enabled")); if (smallDecimal) { - // should auto modify "hoodie.parquet.writeLegacyFormat.enabled" = "true". + // should auto modify "hoodie.parquet.writelegacyformat.enabled" = "true". assertEquals(true, res); } else { - // should not modify the value of "hoodie.parquet.writeLegacyFormat.enabled". + // should not modify the value of "hoodie.parquet.writelegacyformat.enabled". assertEquals(defaultWriteValue, res); } } diff --git a/hudi-spark-datasource/hudi-spark2/src/main/java/org/apache/hudi/internal/DefaultSource.java b/hudi-spark-datasource/hudi-spark2/src/main/java/org/apache/hudi/internal/DefaultSource.java index e607b2f..e9ed609 100644 --- a/hudi-spark-datasource/hudi-spark2/src/main/java/org/apache/hudi/internal/DefaultSource.java +++ b/hudi-spark-datasource/hudi-spark2/src/main/java/org/apache/hudi/internal/DefaultSource.java @@ -68,7 +68,7 @@ public class DefaultSource extends BaseDefaultSource implements DataSourceV2, boolean populateMetaFields = options.getBoolean(HoodieTableConfig.POPULATE_META_FIELDS.key(), Boolean.parseBoolean(HoodieTableConfig.POPULATE_META_FIELDS.defaultValue())); Map<String, String> properties = options.asMap(); - // Auto set the value of "hoodie.parquet.writeLegacyFormat.enabled" + // Auto set the value of "hoodie.parquet.writelegacyformat.enabled" mayBeOverwriteParquetWriteLegacyFormatProp(properties, schema); // 1st arg to createHoodieConfig is not really required to be set. but passing it anyways. HoodieWriteConfig config = DataSourceUtils.createHoodieConfig(options.get(HoodieWriteConfig.AVRO_SCHEMA_STRING.key()).get(), path, tblName, properties); diff --git a/hudi-spark-datasource/hudi-spark3/src/main/java/org/apache/hudi/spark3/internal/DefaultSource.java b/hudi-spark-datasource/hudi-spark3/src/main/java/org/apache/hudi/spark3/internal/DefaultSource.java index 63c09e0..3071894 100644 --- a/hudi-spark-datasource/hudi-spark3/src/main/java/org/apache/hudi/spark3/internal/DefaultSource.java +++ b/hudi-spark-datasource/hudi-spark3/src/main/java/org/apache/hudi/spark3/internal/DefaultSource.java @@ -31,6 +31,7 @@ import org.apache.spark.sql.connector.expressions.Transform; import org.apache.spark.sql.types.StructType; import org.apache.spark.sql.util.CaseInsensitiveStringMap; +import java.util.HashMap; import java.util.Map; import static org.apache.hudi.DataSourceUtils.mayBeOverwriteParquetWriteLegacyFormatProp; @@ -55,8 +56,10 @@ public class DefaultSource extends BaseDefaultSource implements TableProvider { HoodieTableConfig.POPULATE_META_FIELDS.defaultValue())); boolean arePartitionRecordsSorted = Boolean.parseBoolean(properties.getOrDefault(HoodieInternalConfig.BULKINSERT_ARE_PARTITIONER_RECORDS_SORTED, Boolean.toString(HoodieInternalConfig.DEFAULT_BULKINSERT_ARE_PARTITIONER_RECORDS_SORTED))); - // Auto set the value of "hoodie.parquet.writeLegacyFormat.enabled" - mayBeOverwriteParquetWriteLegacyFormatProp(properties, schema); + // Auto set the value of "hoodie.parquet.writelegacyformat.enabled" + // Create a new map as the properties is an unmodifiableMap on Spark 3.2.0 + Map<String, String> newProps = new HashMap<>(properties); + mayBeOverwriteParquetWriteLegacyFormatProp(newProps, schema); // 1st arg to createHoodieConfig is not really required to be set. but passing it anyways. HoodieWriteConfig config = DataSourceUtils.createHoodieConfig(properties.get(HoodieWriteConfig.AVRO_SCHEMA_STRING.key()), path, tblName, properties); return new HoodieDataSourceInternalTable(instantTime, config, schema, getSparkSession(),
