This is an automated email from the ASF dual-hosted git repository.
xxyu pushed a commit to branch master
in repository https://gitbox.apache.org/repos/asf/kylin.git
The following commit(s) were added to refs/heads/master by this push:
new 65c3ee5 KYLIN-4841 Spark RDD cache is invalid when building with
spark engine
65c3ee5 is described below
commit 65c3ee51f57cd636114b542df042a1c026370514
Author: Zhichao Zhang <[email protected]>
AuthorDate: Fri Dec 11 23:33:48 2020 +0800
KYLIN-4841 Spark RDD cache is invalid when building with spark engine
---
.../main/java/org/apache/kylin/engine/spark/SparkCubingByLayer.java | 4 +++-
1 file changed, 3 insertions(+), 1 deletion(-)
diff --git
a/engine-spark/src/main/java/org/apache/kylin/engine/spark/SparkCubingByLayer.java
b/engine-spark/src/main/java/org/apache/kylin/engine/spark/SparkCubingByLayer.java
index 381ef13..cbedc8b 100644
---
a/engine-spark/src/main/java/org/apache/kylin/engine/spark/SparkCubingByLayer.java
+++
b/engine-spark/src/main/java/org/apache/kylin/engine/spark/SparkCubingByLayer.java
@@ -197,11 +197,13 @@ public class SparkCubingByLayer extends
AbstractApplication implements Serializa
allRDDs[level] = allRDDs[level -
1].flatMapToPair(flatMapFunction).reduceByKey(reducerFunction2, partition)
.persist(storageLevel);
- allRDDs[level - 1].unpersist(false);
if (envConfig.isSparkSanityCheckEnabled() == true) {
sanityCheck(allRDDs[level], totalCount, level,
cubeStatsReader, countMeasureIndex);
}
saveToHDFS(allRDDs[level], metaUrl, cubeName, cubeSegment,
outputPath, level, job, envConfig);
+ // must do 'unpersist' after allRDDs[level] is created,
+ // otherwise the parent RDD 'allRDDs[level - 1]' will be recomputed
+ allRDDs[level - 1].unpersist(false);
}
allRDDs[totalLevels].unpersist(false);
logger.info("Finished on calculating all level cuboids.");