This is an automated email from the ASF dual-hosted git repository.
xxyu pushed a commit to branch kylin3
in repository https://gitbox.apache.org/repos/asf/kylin.git
The following commit(s) were added to refs/heads/kylin3 by this push:
new 80404c0 configurable hive dict table format
80404c0 is described below
commit 80404c0e9cdce896219a89040ffd4fc8ec73a6f7
Author: fengpod <[email protected]>
AuthorDate: Wed Dec 8 20:23:14 2021 +0800
configurable hive dict table format
---
.../org/apache/kylin/common/KylinConfigBase.java | 4 ++
.../apache/kylin/source/hive/HiveInputBase.java | 4 +-
.../apache/kylin/source/hive/MRHiveDictUtil.java | 50 ++++++++++++++--------
3 files changed, 38 insertions(+), 20 deletions(-)
diff --git
a/core-common/src/main/java/org/apache/kylin/common/KylinConfigBase.java
b/core-common/src/main/java/org/apache/kylin/common/KylinConfigBase.java
index 4d24fd0..707848f 100644
--- a/core-common/src/main/java/org/apache/kylin/common/KylinConfigBase.java
+++ b/core-common/src/main/java/org/apache/kylin/common/KylinConfigBase.java
@@ -708,6 +708,10 @@ public abstract class KylinConfigBase implements
Serializable {
return
getOptional("kylin.dictionary.mr-hive.intermediate.table.suffix",
"_distinct_value");
}
+ public String getMrHiveDictTableFormat() {
+ return getOptional("kylin.dictionary.mr-hive.table.format",
"TEXTFILE");
+ }
+
//
============================================================================
// CUBE
//
============================================================================
diff --git
a/source-hive/src/main/java/org/apache/kylin/source/hive/HiveInputBase.java
b/source-hive/src/main/java/org/apache/kylin/source/hive/HiveInputBase.java
index 193990d..90239d9 100644
--- a/source-hive/src/main/java/org/apache/kylin/source/hive/HiveInputBase.java
+++ b/source-hive/src/main/java/org/apache/kylin/source/hive/HiveInputBase.java
@@ -168,11 +168,11 @@ public class HiveInputBase {
final String distinctValueTable =
MRHiveDictUtil.distinctValueTable(flatDesc);
final String segmentLevelDictTableName =
MRHiveDictUtil.segmentLevelDictTableName(flatDesc);
- final String createGlobalDictTableHql =
MRHiveDictUtil.generateDictionaryDdl(globalDictDatabase, globalDictTable);
+ final String createGlobalDictTableHql =
MRHiveDictUtil.generateDictionaryDdl(flatDesc, globalDictDatabase,
globalDictTable);
final String dropDistinctValueTableHql =
MRHiveDictUtil.generateDropTableStatement(distinctValueTable);
final String createDistinctValueTableHql =
MRHiveDictUtil.generateDistinctValueTableStatement(flatDesc);
final String dropSegmentLevelDictTableHql =
MRHiveDictUtil.generateDropTableStatement(segmentLevelDictTableName);
- final String createSegmentLevelDictTableHql =
MRHiveDictUtil.generateDictTableStatement(segmentLevelDictTableName);
+ final String createSegmentLevelDictTableHql =
MRHiveDictUtil.generateDictTableStatement(flatDesc, segmentLevelDictTableName);
String maxAndDistinctCountSql =
MRHiveDictUtil.generateDictStatisticsSql(distinctValueTable, globalDictTable,
globalDictDatabase);
diff --git
a/source-hive/src/main/java/org/apache/kylin/source/hive/MRHiveDictUtil.java
b/source-hive/src/main/java/org/apache/kylin/source/hive/MRHiveDictUtil.java
index 5fe8a97..62bf03d 100644
--- a/source-hive/src/main/java/org/apache/kylin/source/hive/MRHiveDictUtil.java
+++ b/source-hive/src/main/java/org/apache/kylin/source/hive/MRHiveDictUtil.java
@@ -96,15 +96,23 @@ public class MRHiveDictUtil {
return cubeName +
flatDesc.getSegment().getConfig().getMrHiveDictTableSuffix();
}
- public static String generateDictionaryDdl(String db, String tbl) {
- return "CREATE TABLE IF NOT EXISTS " + db + "." + tbl + "\n"
- + " ( dict_key STRING COMMENT '', \n"
- + " dict_val INT COMMENT '' \n"
- + ") \n"
- + "COMMENT 'Hive Global Dictionary' \n"
- + "PARTITIONED BY (dict_column string) \n"
- + "ROW FORMAT DELIMITED FIELDS TERMINATED BY '\\t' \n"
- + "STORED AS TEXTFILE; \n";
+ public static String generateDictionaryDdl(IJoinedFlatTableDesc flatDesc,
String db, String tbl) {
+ KylinConfig config = flatDesc.getSegment().getConfig();
+ String tableFormat = config.getMrHiveDictTableFormat();
+ StringBuilder ddl = new StringBuilder();
+ ddl.append("CREATE TABLE IF NOT EXISTS " + db + "." + tbl + "\n");
+ ddl.append(" ( dict_key STRING COMMENT '', \n");
+ ddl.append(" dict_val INT COMMENT '' \n");
+ ddl.append(") \n");
+ ddl.append("COMMENT 'Hive Global Dictionary' \n");
+ ddl.append("PARTITIONED BY (dict_column string) \n");
+ if ("TEXTFILE".equalsIgnoreCase(tableFormat)) {
+ ddl.append("ROW FORMAT DELIMITED FIELDS TERMINATED BY '\\t' \n");
+ ddl.append("STORED AS TEXTFILE; \n");
+ } else {
+ ddl.append("STORED AS " +tableFormat+ "; \n");
+ }
+ return ddl.toString();
}
public static String generateDropTableStatement(String tableName) {
@@ -114,24 +122,26 @@ public class MRHiveDictUtil {
}
public static String
generateDistinctValueTableStatement(IJoinedFlatTableDesc flatDesc) {
- StringBuilder ddl = new StringBuilder();
- String table = flatDesc.getTableName()
- +
flatDesc.getSegment().getConfig().getMrHiveDistinctValueTableSuffix();
+ KylinConfig config = flatDesc.getSegment().getConfig();
+ String table = config.getMrHiveDistinctValueTableSuffix();
+ String tableFormat = config.getMrHiveDictTableFormat();
+ StringBuilder ddl = new StringBuilder();
ddl.append("CREATE TABLE IF NOT EXISTS " + table + " \n");
ddl.append("( \n ");
ddl.append(" dict_key" + " " + "STRING" + " COMMENT '' \n");
ddl.append(") \n");
ddl.append("COMMENT '' \n");
ddl.append("PARTITIONED BY (dict_column string) \n");
- ddl.append("STORED AS TEXTFILE \n");
- ddl.append(";").append("\n");
+ ddl.append("STORED AS ").append(tableFormat).append(";\n");
return ddl.toString();
}
- public static String generateDictTableStatement(String globalTableName) {
- StringBuilder ddl = new StringBuilder();
+ public static String generateDictTableStatement(IJoinedFlatTableDesc
flatDesc, String globalTableName) {
+ KylinConfig config = flatDesc.getSegment().getConfig();
+ String tableFormat = config.getMrHiveDictTableFormat();
+ StringBuilder ddl = new StringBuilder();
ddl.append("CREATE TABLE IF NOT EXISTS " + globalTableName + " \n");
ddl.append("( \n ");
ddl.append(" dict_key" + " " + "STRING" + " COMMENT '' , \n");
@@ -139,8 +149,12 @@ public class MRHiveDictUtil {
ddl.append(") \n");
ddl.append("COMMENT '' \n");
ddl.append("PARTITIONED BY (dict_column string) \n");
- ddl.append("ROW FORMAT DELIMITED FIELDS TERMINATED BY '\\t' \n");
- ddl.append("STORED AS TEXTFILE \n");
+ if ("TEXTFILE".equalsIgnoreCase(tableFormat)) {
+ ddl.append("ROW FORMAT DELIMITED FIELDS TERMINATED BY '\\t' \n");
+ ddl.append("STORED AS TEXTFILE \n");
+ } else {
+ ddl.append("STORED AS ").append(tableFormat).append("\n");
+ }
ddl.append(";").append("\n");
return ddl.toString();
}