This is an automated email from the ASF dual-hosted git repository. xxyu pushed a commit to branch kylin5 in repository https://gitbox.apache.org/repos/asf/kylin.git
commit 7bd187ae0d6dc88c3518d61b8afd665f4ee13797 Author: xingjian.zheng <[email protected]> AuthorDate: Fri Nov 4 20:37:20 2022 +0800 KYLIN-5347 use spark session hadoop config when aws serverless environment on build snapshot for partition table --- .../scala/org/apache/kylin/engine/spark/builder/SnapshotBuilder.scala | 4 ++++ 1 file changed, 4 insertions(+) diff --git a/src/spark-project/engine-spark/src/main/scala/org/apache/kylin/engine/spark/builder/SnapshotBuilder.scala b/src/spark-project/engine-spark/src/main/scala/org/apache/kylin/engine/spark/builder/SnapshotBuilder.scala index ab2c13e3d0..ce5a080601 100644 --- a/src/spark-project/engine-spark/src/main/scala/org/apache/kylin/engine/spark/builder/SnapshotBuilder.scala +++ b/src/spark-project/engine-spark/src/main/scala/org/apache/kylin/engine/spark/builder/SnapshotBuilder.scala @@ -459,6 +459,10 @@ class SnapshotBuilder(var jobId: String) extends Logging with Serializable { } private[builder] def decideSparkJobArg(sourceData: Dataset[Row]): (Int, Double) = { + var hadoopConf = SparderEnv.getHadoopConfiguration() + if (kylinConfig.getClusterManagerClassName.contains("AWSServerless")) { + hadoopConf = sourceData.sparkSession.sparkContext.hadoopConfiguration + } try { val sizeInMB = ResourceDetectUtils.getPaths(sourceData.queryExecution.sparkPlan) .map(path => HadoopUtil.getContentSummary(path.getFileSystem(SparderEnv.getHadoopConfiguration()), path).getLength)
