xushiyan commented on a change in pull request #3416:
URL: https://github.com/apache/hudi/pull/3416#discussion_r751071263
##########
File path:
hudi-common/src/main/java/org/apache/hudi/common/config/DFSPropertiesConfiguration.java
##########
@@ -117,7 +159,43 @@ public void addProperties(BufferedReader reader) throws
IOException {
}
}
- public TypedProperties getConfig() {
- return props;
+ public static TypedProperties getGlobalProps() {
+ final TypedProperties globalProps = new TypedProperties();
+ globalProps.putAll(GLOBAL_PROPS);
+ return globalProps;
+ }
+
+ public TypedProperties getProps() {
+ return new TypedProperties(hoodieConfig.getProps());
+ }
+
+ public TypedProperties getProps(boolean includeGlobalProps) {
+ return new TypedProperties(hoodieConfig.getProps(includeGlobalProps));
+ }
+
+ private static Option<Path> getConfPathFromEnv() {
+ String confDir = System.getenv(CONF_FILE_DIR_ENV_NAME);
+ if (confDir == null) {
+ LOG.warn("Cannot find " + CONF_FILE_DIR_ENV_NAME + ", please set it as
the dir of " + DEFAULT_PROPERTIES_FILE);
+ return Option.empty();
+ }
+ return Option.of(new Path("file://" + confDir + File.separator +
DEFAULT_PROPERTIES_FILE));
Review comment:
why file scheme is hardcoded here? even `DEFAULT_CONF_FILE_DIR` already
starts with `file:/`
```suggestion
return Option.of(new Path(confDir + File.separator +
DEFAULT_PROPERTIES_FILE));
```
##########
File path:
hudi-common/src/main/java/org/apache/hudi/common/config/DFSPropertiesConfiguration.java
##########
@@ -43,72 +47,110 @@
private static final Logger LOG =
LogManager.getLogger(DFSPropertiesConfiguration.class);
+ private static final String DEFAULT_PROPERTIES_FILE = "hudi-defaults.conf";
+
+ public static final String CONF_FILE_DIR_ENV_NAME = "HUDI_CONF_DIR";
+
+ public static final String DEFAULT_CONF_FILE_DIR = "file:/etc/hudi/conf";
+
+ // props read from hudi-defaults.conf
+ private static TypedProperties GLOBAL_PROPS = loadGlobalProps();
+
private final FileSystem fs;
- private final Path rootFile;
+ private Path currentFilePath;
- private final TypedProperties props;
+ // props read from user defined configuration file or input stream
+ private final HoodieConfig hoodieConfig;
// Keep track of files visited, to detect loops
- private final Set<String> visitedFiles;
+ private final Set<String> visitedFilePaths;
- public DFSPropertiesConfiguration(FileSystem fs, Path rootFile,
TypedProperties defaults) {
+ public DFSPropertiesConfiguration(FileSystem fs, Path filePath) {
this.fs = fs;
- this.rootFile = rootFile;
- this.props = defaults;
- this.visitedFiles = new HashSet<>();
- visitFile(rootFile);
- }
-
- public DFSPropertiesConfiguration(FileSystem fs, Path rootFile) {
- this(fs, rootFile, new TypedProperties());
+ this.currentFilePath = filePath;
+ this.hoodieConfig = new HoodieConfig();
+ this.visitedFilePaths = new HashSet<>();
+ addPropsFromFile(filePath);
}
public DFSPropertiesConfiguration() {
this.fs = null;
- this.rootFile = null;
- this.props = new TypedProperties();
- this.visitedFiles = new HashSet<>();
+ this.currentFilePath = null;
+ this.hoodieConfig = new HoodieConfig();
+ this.visitedFilePaths = new HashSet<>();
}
- private String[] splitProperty(String line) {
- int ind = line.indexOf('=');
- String k = line.substring(0, ind).trim();
- String v = line.substring(ind + 1).trim();
- return new String[] {k, v};
+ /**
+ * Load global props from hudi-defaults.conf which is under
CONF_FILE_DIR_ENV_NAME.
+ * @return Typed Properties
+ */
+ public static TypedProperties loadGlobalProps() {
+ DFSPropertiesConfiguration conf = new DFSPropertiesConfiguration();
+ Option<Path> defaultConfPath = getConfPathFromEnv();
+ if (defaultConfPath.isPresent()) {
+ conf.addPropsFromFile(defaultConfPath.get());
+ } else {
+ try {
+ conf.addPropsFromFile(new Path(DEFAULT_CONF_FILE_DIR));
+ } catch (Exception ignored) {
+ LOG.debug("Didn't find config file under default conf file dir: " +
DEFAULT_CONF_FILE_DIR);
+ }
+ }
+ return conf.getProps();
+ }
+
+ public static void refreshGlobalProps() {
+ GLOBAL_PROPS = loadGlobalProps();
+ }
+
+ public static void clearGlobalProps() {
+ GLOBAL_PROPS = new TypedProperties();
}
- private void visitFile(Path file) {
+ /**
+ * Add properties from external configuration files.
+ *
+ * @param filePath File path for configuration file
+ */
+ public void addPropsFromFile(Path filePath) {
+ if (visitedFilePaths.contains(filePath.toString())) {
+ throw new IllegalStateException("Loop detected; file " + filePath + "
already referenced");
+ }
+ FileSystem fileSystem;
try {
- if (visitedFiles.contains(file.getName())) {
- throw new IllegalStateException("Loop detected; file " + file + "
already referenced");
- }
- visitedFiles.add(file.getName());
- BufferedReader reader = new BufferedReader(new
InputStreamReader(fs.open(file)));
- addProperties(reader);
+ fileSystem = fs != null ? fs : filePath.getFileSystem(new
Configuration());
+ } catch (IOException e) {
+ throw new IllegalArgumentException("Cannot get the file system from file
path", e);
+ }
+ try (BufferedReader reader = new BufferedReader(new
InputStreamReader(fileSystem.open(filePath)))) {
Review comment:
can you also put `FileSystem fs = ...` into the try-with-resources
clause? so fileSystem can be closed as well. it will reduce 1 catch block too.
##########
File path:
hudi-common/src/main/java/org/apache/hudi/common/config/DFSPropertiesConfiguration.java
##########
@@ -43,72 +47,110 @@
private static final Logger LOG =
LogManager.getLogger(DFSPropertiesConfiguration.class);
+ private static final String DEFAULT_PROPERTIES_FILE = "hudi-defaults.conf";
Review comment:
can this be public? since it's always the same file name to be read?
this info should be made public then
##########
File path:
hudi-common/src/test/java/org/apache/hudi/common/util/TestDFSPropertiesConfiguration.java
##########
@@ -119,15 +130,25 @@ public void testParsing() {
@Test
public void testIncludes() {
DFSPropertiesConfiguration cfg = new DFSPropertiesConfiguration(dfs, new
Path(dfsBasePath + "/t3.props"));
- TypedProperties props = cfg.getConfig();
+ TypedProperties props = cfg.getProps();
assertEquals(123, props.getInteger("int.prop"));
assertEquals(243.4, props.getDouble("double.prop"), 0.001);
assertTrue(props.getBoolean("boolean.prop"));
assertEquals("t3.value", props.getString("string.prop"));
assertEquals(1354354354, props.getLong("long.prop"));
assertThrows(IllegalStateException.class, () -> {
- new DFSPropertiesConfiguration(dfs, new Path(dfsBasePath + "/t4.props"));
+ cfg.addPropsFromFile(new Path(dfsBasePath + "/t4.props"));
}, "Should error out on a self-included file.");
}
+
+ @Test
+ public void testLoadGlobalConfFile() {
+ assertEquals(5, DFSPropertiesConfiguration.loadGlobalProps().size());
+ assertEquals("jdbc:hive2://localhost:10000",
DFSPropertiesConfiguration.loadGlobalProps().get("hoodie.datasource.hive_sync.jdbcurl"));
+ assertEquals("true",
DFSPropertiesConfiguration.loadGlobalProps().get("hoodie.datasource.hive_sync.use_jdbc"));
+ assertEquals("false",
DFSPropertiesConfiguration.loadGlobalProps().get("hoodie.datasource.hive_sync.support_timestamp"));
+ assertEquals("BLOOM",
DFSPropertiesConfiguration.loadGlobalProps().get("hoodie.index.type"));
+ assertEquals("true",
DFSPropertiesConfiguration.loadGlobalProps().get("hoodie.metadata.enable"));
Review comment:
can you just load it once ?
`DFSPropertiesConfiguration.loadGlobalProps()`
##########
File path:
hudi-common/src/test/java/org/apache/hudi/common/util/TestDFSPropertiesConfiguration.java
##########
@@ -70,10 +77,14 @@ public static void initClass() throws Exception {
filePath = new Path(dfsBasePath + "/t4.props");
writePropertiesFile(filePath, new String[] {"double.prop=838.3", "include
= t4.props"});
+
+ // set HUDI_CONF_DIR
+ String testPropsFilePath = new
File("src/test/resources").getAbsolutePath();
Review comment:
can you add a dedicated folder for putting test files for this test
class? a top-level `hoodie-default.conf` can easily be misused by other test
cases.
##########
File path:
hudi-spark-datasource/hudi-spark/src/test/scala/org/apache/spark/sql/hudi/TestSqlConf.scala
##########
@@ -0,0 +1,104 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one or more
+ * contributor license agreements. See the NOTICE file distributed with
+ * this work for additional information regarding copyright ownership.
+ * The ASF licenses this file to You under the Apache License, Version 2.0
+ * (the "License"); you may not use this file except in compliance with
+ * the License. You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+package org.apache.spark.sql.hudi
+
+import org.apache.hadoop.conf.Configuration
+import org.apache.hadoop.fs.Path
+import org.apache.hudi.common.config.DFSPropertiesConfiguration
+import org.apache.hudi.common.model.HoodieTableType
+import org.apache.hudi.common.table.{HoodieTableConfig, HoodieTableMetaClient}
+
+import java.io.File
+import java.nio.file.{Files, Paths}
+
+import org.scalatest.BeforeAndAfter
+
+class TestSqlConf extends TestHoodieSqlBase with BeforeAndAfter {
+
+ def setEnv(key: String, value: String): String = {
+ val field = System.getenv().getClass.getDeclaredField("m")
+ field.setAccessible(true)
+ val map =
field.get(System.getenv()).asInstanceOf[java.util.Map[java.lang.String,
java.lang.String]]
+ map.put(key, value)
+ }
+
+ test("Test Hudi Conf") {
+ withTempDir { tmp =>
+ val tableName = generateTableName
+ val tablePath = tmp.getCanonicalPath
+ val partitionVal = "2021"
+ // Create table
+ spark.sql(
+ s"""
+ |create table $tableName (
+ | id int,
+ | name string,
+ | price double,
+ | ts long,
+ | year string
+ |) using hudi
+ | partitioned by (year)
+ | location '$tablePath'
+ | options (
+ | primaryKey ='id',
+ | preCombineField = 'ts'
+ | )
+ """.stripMargin)
+
+ // First merge with a extra input field 'flag' (insert a new record)
+ spark.sql(
+ s"""
+ | merge into $tableName
+ | using (
+ | select 1 as id, 'a1' as name, 10 as price, 1000 as ts, '1' as
flag, $partitionVal as year
+ | ) s0
+ | on s0.id = $tableName.id
+ | when matched and flag = '1' then update set
+ | id = s0.id, name = s0.name, price = s0.price, ts = s0.ts, year =
s0.year
+ | when not matched and flag = '1' then insert *
+ """.stripMargin)
+ checkAnswer(s"select id, name, price, ts, year from $tableName")(
+ Seq(1, "a1", 10.0, 1000, partitionVal)
+ )
+
+ // By default, Spark DML would set table type to COW and use Hive style
partitioning, here we
+ // set table type to MOR and disable Hive style partitioning in the hudi
conf file, and check
+ // if Hudi DML can load these configs correctly
+ assertResult(true)(Files.exists(Paths.get(s"$tablePath/$partitionVal")))
+ assertResult(HoodieTableType.MERGE_ON_READ)(new HoodieTableConfig(
+ new Path(tablePath).getFileSystem(new Configuration),
+ s"$tablePath/" + HoodieTableMetaClient.METAFOLDER_NAME,
+ HoodieTableConfig.PAYLOAD_CLASS_NAME.defaultValue).getTableType)
+
+ // delete the record
+ spark.sql(s"delete from $tableName where year = $partitionVal")
+ val cnt = spark.sql(s"select * from $tableName where year =
$partitionVal").count()
+ assertResult(0)(cnt)
+ }
+ }
+
+ before {
+ val testPropsFilePath = new File("src/test/resources").getAbsolutePath
+ setEnv(DFSPropertiesConfiguration.CONF_FILE_DIR_ENV_NAME,
testPropsFilePath)
+ DFSPropertiesConfiguration.refreshGlobalProps()
Review comment:
always a practice to isolate test file in folder for each test class
##########
File path:
hudi-common/src/main/java/org/apache/hudi/common/config/DFSPropertiesConfiguration.java
##########
@@ -117,7 +159,43 @@ public void addProperties(BufferedReader reader) throws
IOException {
}
}
- public TypedProperties getConfig() {
- return props;
+ public static TypedProperties getGlobalProps() {
+ final TypedProperties globalProps = new TypedProperties();
+ globalProps.putAll(GLOBAL_PROPS);
+ return globalProps;
+ }
+
+ public TypedProperties getProps() {
+ return new TypedProperties(hoodieConfig.getProps());
+ }
+
+ public TypedProperties getProps(boolean includeGlobalProps) {
+ return new TypedProperties(hoodieConfig.getProps(includeGlobalProps));
+ }
+
+ private static Option<Path> getConfPathFromEnv() {
+ String confDir = System.getenv(CONF_FILE_DIR_ENV_NAME);
+ if (confDir == null) {
+ LOG.warn("Cannot find " + CONF_FILE_DIR_ENV_NAME + ", please set it as
the dir of " + DEFAULT_PROPERTIES_FILE);
+ return Option.empty();
Review comment:
can you make the UT cover this gracefully failover path?
##########
File path:
hudi-spark-datasource/hudi-spark/src/main/scala/org/apache/spark/sql/hudi/command/CreateHoodieTableCommand.scala
##########
@@ -100,7 +100,8 @@ case class CreateHoodieTableCommand(table: CatalogTable,
ignoreIfExists: Boolean
// if CTAS, we treat the table we just created as nonexistent
val isTableExists = if (ctas) false else tableExistsInPath(path, conf)
var existingTableConfig = Map.empty[String, String]
- val sqlOptions = HoodieOptionConfig.withDefaultSqlOptions(tblProperties)
+ val globalProps =
HoodieOptionConfig.mappingTableConfigToSqlOption(HoodieWriterUtils.mappingSparkDatasourceConfigsToTableConfigs(DFSPropertiesConfiguration.getGlobalProps.asScala.toMap))
Review comment:
this line too long, taking time to read. can you shorten it somehow?
##########
File path:
hudi-spark-datasource/hudi-spark/src/main/scala/org/apache/spark/sql/hudi/command/CreateHoodieTableCommand.scala
##########
@@ -340,18 +342,22 @@ case class CreateHoodieTableCommand(table: CatalogTable,
ignoreIfExists: Boolean
String.valueOf(isUrlEncodeEnabled(allPartitionPaths, table))
}
} else {
- extraConfig(HoodieTableConfig.HIVE_STYLE_PARTITIONING_ENABLE.key) =
"true"
- extraConfig(HoodieTableConfig.URL_ENCODE_PARTITIONING.key) =
HoodieTableConfig.URL_ENCODE_PARTITIONING.defaultValue()
+ extraConfig(HoodieTableConfig.HIVE_STYLE_PARTITIONING_ENABLE.key) =
globalProps.getOrDefault(HoodieTableConfig.HIVE_STYLE_PARTITIONING_ENABLE.key,
"true")
+ extraConfig(HoodieTableConfig.URL_ENCODE_PARTITIONING.key) =
globalProps.getOrDefault(HoodieTableConfig.URL_ENCODE_PARTITIONING.key,
HoodieTableConfig.URL_ENCODE_PARTITIONING.defaultValue)
Review comment:
ditto
--
This is an automated message from the Apache Git Service.
To respond to the message, please log on to GitHub and use the
URL above to go to the specific comment.
To unsubscribe, e-mail: [email protected]
For queries about this service, please contact Infrastructure at:
[email protected]