This is an automated email from the ASF dual-hosted git repository.
xxyu pushed a commit to branch main
in repository https://gitbox.apache.org/repos/asf/kylin.git
The following commit(s) were added to refs/heads/main by this push:
new 6fa14239c4 KYLIN-5182, support to ignore only one null count in check
duplicate key
6fa14239c4 is described below
commit 6fa14239c49c800f69369321eafdc4ef9727ba33
Author: Mukvin <[email protected]>
AuthorDate: Fri May 6 18:37:29 2022 +0800
KYLIN-5182, support to ignore only one null count in check duplicate key
---
.../src/main/java/org/apache/kylin/common/KylinConfigBase.java | 5 +++++
.../apache/kylin/engine/spark/builder/CubeSnapshotBuilder.scala | 7 ++++++-
2 files changed, 11 insertions(+), 1 deletion(-)
diff --git
a/core-common/src/main/java/org/apache/kylin/common/KylinConfigBase.java
b/core-common/src/main/java/org/apache/kylin/common/KylinConfigBase.java
index 31a600857f..ba06df7345 100644
--- a/core-common/src/main/java/org/apache/kylin/common/KylinConfigBase.java
+++ b/core-common/src/main/java/org/apache/kylin/common/KylinConfigBase.java
@@ -2321,6 +2321,11 @@ public abstract class KylinConfigBase implements
Serializable {
return
Boolean.valueOf(getOptional("kylin.engine.build-base-cuboid-enabled", TRUE));
}
+ @ConfigTag(ConfigTag.Tag.CUBE_LEVEL)
+ public boolean isIgnoringNullInCheckDupKeyEnabled() {
+ return
Boolean.valueOf(this.getOptional("kylin.job.ignoring-null-in-check-dup-key-enabled",
FALSE));
+ }
+
//
============================================================================
// Kylin 4.X Spark resources automatic adjustment strategy configuration
//
============================================================================
diff --git
a/kylin-spark-project/kylin-spark-engine/src/main/scala/org/apache/kylin/engine/spark/builder/CubeSnapshotBuilder.scala
b/kylin-spark-project/kylin-spark-engine/src/main/scala/org/apache/kylin/engine/spark/builder/CubeSnapshotBuilder.scala
index 7fcfbd0a57..468f1228a2 100644
---
a/kylin-spark-project/kylin-spark-engine/src/main/scala/org/apache/kylin/engine/spark/builder/CubeSnapshotBuilder.scala
+++
b/kylin-spark-project/kylin-spark-engine/src/main/scala/org/apache/kylin/engine/spark/builder/CubeSnapshotBuilder.scala
@@ -197,7 +197,12 @@ class CubeSnapshotBuilder extends Logging {
val lookupTablePKS = joinDesc.PKS.map(lookupTablePK =>
lookupTablePK.columnName)
val countDistinctColumn = df.agg(countDistinct(lookupTablePKS.head,
lookupTablePKS.tail: _*)).collect().map(_.getLong(0)).head
if (countColumn != countDistinctColumn) {
- throw new IllegalStateException(s"Failed to build lookup table
${lookupTableName} snapshot for Dup key found, key=
${lookupTablePKS.mkString(",")}")
+ if (seg.kylinconf.isIgnoringNullInCheckDupKeyEnabled &&
countDistinctColumn + 1 == countColumn) {
+ // if only one row with null value, then countDistinctColumn + 1
will equals to countColumn
+ logInfo("Using config:
kylin.job.ignoring-null-in-check-dup-key-enabled=true to ignore only one null
count.")
+ } else {
+ throw new IllegalStateException(s"Failed to build lookup table
${lookupTableName} snapshot for Dup key found, key=
${lookupTablePKS.mkString(",")}")
+ }
}
} else {
logInfo("Skip check duplicate primary key on table : " +
tableInfo.identity)