This is an automated email from the ASF dual-hosted git repository.
xxyu pushed a commit to branch main
in repository https://gitbox.apache.org/repos/asf/kylin.git
The following commit(s) were added to refs/heads/main by this push:
new 6416ea0 KYLIN-5011 Detect and scatter skewed data in dict encoding
step
6416ea0 is described below
commit 6416ea0f426ccfa60bed707693bccc0e7676eac2
Author: zhengshengjun <[email protected]>
AuthorDate: Tue Aug 10 17:11:33 2021 +0800
KYLIN-5011 Detect and scatter skewed data in dict encoding step
---
.../org/apache/kylin/engine/spark/builder/CubeTableEncoder.scala | 8 ++++----
1 file changed, 4 insertions(+), 4 deletions(-)
diff --git
a/kylin-spark-project/kylin-spark-engine/src/main/scala/org/apache/kylin/engine/spark/builder/CubeTableEncoder.scala
b/kylin-spark-project/kylin-spark-engine/src/main/scala/org/apache/kylin/engine/spark/builder/CubeTableEncoder.scala
index 8a5fe20..c9cea1f 100644
---
a/kylin-spark-project/kylin-spark-engine/src/main/scala/org/apache/kylin/engine/spark/builder/CubeTableEncoder.scala
+++
b/kylin-spark-project/kylin-spark-engine/src/main/scala/org/apache/kylin/engine/spark/builder/CubeTableEncoder.scala
@@ -111,12 +111,12 @@ object CubeTableEncoder extends Logging {
partitionedDs = partitionedDs.select(columns ++
Seq(scatterColumn): _*)
.repartition(enlargedBucketSize, col("scatter_skew_data_" +
ref.columnName))
.select(columns ++ Seq(encodeCol): _*)
- return partitionedDs;
}
+ } else {
+ partitionedDs = partitionedDs
+ .repartition(enlargedBucketSize,
col(encodeColRef).cast(StringType))
+ .select(columns ++ Seq(encodeCol): _*)
}
- partitionedDs = partitionedDs
- .repartition(enlargedBucketSize, col(encodeColRef).cast(StringType))
- .select(columns ++ Seq(encodeCol): _*)
}
)