This is an automated email from the ASF dual-hosted git repository.
zhangzc pushed a commit to branch main
in repository https://gitbox.apache.org/repos/asf/kylin.git
The following commit(s) were added to refs/heads/main by this push:
new e0de524 KYLIN-5019 Avoid building global dictionary from all data of
fact table every time (#1720)
e0de524 is described below
commit e0de52432f3d9e253ac3a581f9f8e389e67afc86
Author: zhengshengjun <[email protected]>
AuthorDate: Wed Aug 11 09:53:06 2021 +0800
KYLIN-5019 Avoid building global dictionary from all data of fact table
every time (#1720)
---
.../org/apache/kylin/engine/spark/builder/CreateFlatTable.scala | 8 +++++++-
1 file changed, 7 insertions(+), 1 deletion(-)
diff --git
a/kylin-spark-project/kylin-spark-engine/src/main/scala/org/apache/kylin/engine/spark/builder/CreateFlatTable.scala
b/kylin-spark-project/kylin-spark-engine/src/main/scala/org/apache/kylin/engine/spark/builder/CreateFlatTable.scala
index 3bfd519..d266b9a 100644
---
a/kylin-spark-project/kylin-spark-engine/src/main/scala/org/apache/kylin/engine/spark/builder/CreateFlatTable.scala
+++
b/kylin-spark-project/kylin-spark-engine/src/main/scala/org/apache/kylin/engine/spark/builder/CreateFlatTable.scala
@@ -47,6 +47,7 @@ class CreateFlatTable(val seg: SegmentInfo,
var rootFactDataset = generateTableDataset(seg.factTable, ccCols.toSeq,
ss, seg.project)
logInfo(s"Create flattable need join lookup tables $needJoin, need encode
cols $needEncode")
+ rootFactDataset = applyPartitionCondition(seg, rootFactDataset)
(needJoin, needEncode) match {
case (true, true) =>
@@ -148,13 +149,18 @@ object CreateFlatTable extends Logging {
private def applyFilterCondition(desc: SegmentInfo, ds: Dataset[Row]):
Dataset[Row] = {
var afterFilter = ds
-
if (StringUtils.isNotBlank(desc.filterCondition)) {
val afterConvertCondition = desc.filterCondition
logInfo(s"Filter condition is $afterConvertCondition")
afterFilter = afterFilter.where(afterConvertCondition)
}
+ afterFilter
+ }
+
+ private def applyPartitionCondition(desc: SegmentInfo, ds: Dataset[Row]):
Dataset[Row] = {
+ var afterFilter = ds
if (StringUtils.isNotBlank(desc.partitionExp)) {
+ logInfo(s"Partition Filter condition is ${desc.partitionExp}")
afterFilter = afterFilter.where(desc.partitionExp)
}
afterFilter