This is an automated email from the ASF dual-hosted git repository.
xxyu pushed a commit to branch main
in repository https://gitbox.apache.org/repos/asf/kylin.git
The following commit(s) were added to refs/heads/main by this push:
new df6dd47 KYLIN-5067 Skip snapshot build when dimension table's kind is
LOOKUP
df6dd47 is described below
commit df6dd4726a90fdabb4dc4e04daacbee8f206b782
Author: XiaoxiangYu <[email protected]>
AuthorDate: Sat Sep 18 15:02:10 2021 +0800
KYLIN-5067 Skip snapshot build when dimension table's kind is LOOKUP
---
.../kylin/engine/spark/builder/CreateFlatTable.scala | 2 +-
.../engine/spark/builder/CubeSnapshotBuilder.scala | 19 ++++++++++++-------
.../kylin/engine/spark/job/ParentSourceChooser.scala | 2 +-
.../kylin/storage/spark/HadoopFileStorageQuery.java | 1 +
4 files changed, 15 insertions(+), 9 deletions(-)
diff --git
a/kylin-spark-project/kylin-spark-engine/src/main/scala/org/apache/kylin/engine/spark/builder/CreateFlatTable.scala
b/kylin-spark-project/kylin-spark-engine/src/main/scala/org/apache/kylin/engine/spark/builder/CreateFlatTable.scala
index d266b9a..8154bd2 100644
---
a/kylin-spark-project/kylin-spark-engine/src/main/scala/org/apache/kylin/engine/spark/builder/CreateFlatTable.scala
+++
b/kylin-spark-project/kylin-spark-engine/src/main/scala/org/apache/kylin/engine/spark/builder/CreateFlatTable.scala
@@ -46,7 +46,7 @@ class CreateFlatTable(val seg: SegmentInfo,
val ccCols =
seg.allColumns.filter(_.isInstanceOf[ComputedColumnDesc]).toSet
var rootFactDataset = generateTableDataset(seg.factTable, ccCols.toSeq,
ss, seg.project)
- logInfo(s"Create flattable need join lookup tables $needJoin, need encode
cols $needEncode")
+ logInfo(s"Create flat table need join lookup tables $needJoin, need encode
cols $needEncode")
rootFactDataset = applyPartitionCondition(seg, rootFactDataset)
(needJoin, needEncode) match {
diff --git
a/kylin-spark-project/kylin-spark-engine/src/main/scala/org/apache/kylin/engine/spark/builder/CubeSnapshotBuilder.scala
b/kylin-spark-project/kylin-spark-engine/src/main/scala/org/apache/kylin/engine/spark/builder/CubeSnapshotBuilder.scala
index 0248fb5..7fcfbd0 100644
---
a/kylin-spark-project/kylin-spark-engine/src/main/scala/org/apache/kylin/engine/spark/builder/CubeSnapshotBuilder.scala
+++
b/kylin-spark-project/kylin-spark-engine/src/main/scala/org/apache/kylin/engine/spark/builder/CubeSnapshotBuilder.scala
@@ -189,13 +189,18 @@ class CubeSnapshotBuilder extends Logging {
joinDescs.foreach {
joinDesc =>
val tableInfo = joinDesc.lookupTable
- val lookupTableName = tableInfo.tableName
- val df = ss.table(tableInfo)
- val countColumn = df.count()
- val lookupTablePKS = joinDesc.PKS.map(lookupTablePK =>
lookupTablePK.columnName)
- val countDistinctColumn = df.agg(countDistinct(lookupTablePKS.head,
lookupTablePKS.tail: _*)).collect().map(_.getLong(0)).head
- if (countColumn != countDistinctColumn) {
- throw new IllegalStateException(s"Failed to build lookup table
${lookupTableName} snapshot for Dup key found, key=
${lookupTablePKS.mkString(",")}")
+ // Build snapshot when DataModelDesc.JoinTableDesc.TableKind is
TableKind.LOOKUP
+ if (seg.snapshotTables.exists(t =>
t.identity.equals(tableInfo.identity))) {
+ val lookupTableName = tableInfo.tableName
+ val df = ss.table(tableInfo)
+ val countColumn = df.count()
+ val lookupTablePKS = joinDesc.PKS.map(lookupTablePK =>
lookupTablePK.columnName)
+ val countDistinctColumn = df.agg(countDistinct(lookupTablePKS.head,
lookupTablePKS.tail: _*)).collect().map(_.getLong(0)).head
+ if (countColumn != countDistinctColumn) {
+ throw new IllegalStateException(s"Failed to build lookup table
${lookupTableName} snapshot for Dup key found, key=
${lookupTablePKS.mkString(",")}")
+ }
+ } else {
+ logInfo("Skip check duplicate primary key on table : " +
tableInfo.identity)
}
}
}
diff --git
a/kylin-spark-project/kylin-spark-engine/src/main/scala/org/apache/kylin/engine/spark/job/ParentSourceChooser.scala
b/kylin-spark-project/kylin-spark-engine/src/main/scala/org/apache/kylin/engine/spark/job/ParentSourceChooser.scala
index fdb04ab..0f77c76 100644
---
a/kylin-spark-project/kylin-spark-engine/src/main/scala/org/apache/kylin/engine/spark/job/ParentSourceChooser.scala
+++
b/kylin-spark-project/kylin-spark-engine/src/main/scala/org/apache/kylin/engine/spark/job/ParentSourceChooser.scala
@@ -75,7 +75,7 @@ class ParentSourceChooser(
def decideFlatTableSource(entity: LayoutEntity): Unit = {
if (flatTableSource == null) {
- if (needEncoding) {
+ if (segInfo.snapshotTables.nonEmpty && needEncoding) {
// hacked, for some case, you do not want to trigger buildSnapshot
// eg: resource detect
// Move this to a more suitable place
diff --git
a/kylin-spark-project/kylin-spark-query/src/main/scala/org/apache/kylin/storage/spark/HadoopFileStorageQuery.java
b/kylin-spark-project/kylin-spark-query/src/main/scala/org/apache/kylin/storage/spark/HadoopFileStorageQuery.java
index a209a43..5ab9fc5 100644
---
a/kylin-spark-project/kylin-spark-query/src/main/scala/org/apache/kylin/storage/spark/HadoopFileStorageQuery.java
+++
b/kylin-spark-project/kylin-spark-query/src/main/scala/org/apache/kylin/storage/spark/HadoopFileStorageQuery.java
@@ -84,6 +84,7 @@ public class HadoopFileStorageQuery extends
GTCubeStorageQueryBase {
dimensionsD.addAll(groupsD);
dimensionsD.addAll(otherDimsD);
Cuboid cuboid = findCuboid(cubeInstance, dimensionsD, metrics);
+ log.info("For OLAPContext {}, need cuboid {}, hit cuboid {}, level
diff is {}.", olapContext.id, cuboid.getInputID() , cuboid.getId(),
Long.bitCount(cuboid.getInputID() ^ cuboid.getId()));
context.setCuboid(cuboid);
return new GTCubeStorageQueryRequest(cuboid, dimensionsD, groupsD,
null, null, null,
metrics, null, null, null, context);