在build dictionary时候使用DictEncode方法,但是其需要的三个表达式Expression 在实例化时继承自来自于TreeNode中自定义的trait TernaryLike。 case class DictEncode(left: Expression, mid: Expression, right: Expression) extends TernaryExpression with ExpectsInputTypes with TernaryLike{
def maxFields: Int = SQLConf.get.maxToStringFields override def first: Expression = left override def second: Expression = mid override def third: Expression = right 如果去掉override,虽然编译可以通过,但是相应的在build dictionary时会抛出空指针异常。 2023-12-06T03:50:58,896 ERROR [logger-thread-0] application.JobMonitor : handleResourceLack --> java.lang.NullPointerException java.lang.RuntimeException: Error execute org.apache.kylin.engine.spark.job.SegmentBuildJob at org.apache.kylin.engine.spark.application.SparkApplication.execute(SparkApplication.java:135) ~[newten-job.jar:?] at org.apache.spark.application.JobWorker$$anon$2.run(JobWorker.scala:56) ~[newten-job.jar:?] at java.util.concurrent.ThreadPoolExecutor.runWorker(ThreadPoolExecutor.java:1149) [?:1.8.0_202] at java.util.concurrent.ThreadPoolExecutor$Worker.run(ThreadPoolExecutor.java:624) [?:1.8.0_202] at java.lang.Thread.run(Thread.java:748) [?:1.8.0_202] Caused by: java.lang.NullPointerException at org.apache.spark.sql.catalyst.trees.TreeNode.containsChild$lzycompute(TreeNode.scala:121) ~[spark-catalyst_2.12-3.1.1.1.1.0.jar:3.1.1.1.1.0] at org.apache.spark.sql.catalyst.trees.TreeNode.containsChild(TreeNode.scala:121) ~[spark-catalyst_2.12-3.1.1.1.1.0.jar:3.1.1.1.1.0] at org.apache.spark.sql.catalyst.trees.TreeNode.mapChildren(TreeNode.scala:357) ~[spark-catalyst_2.12-3.1.1.1.1.0.jar:3.1.1.1.1.0] at org.apache.spark.sql.catalyst.trees.TreeNode.transformDown(TreeNode.scala:322) ~[spark-catalyst_2.12-3.1.1.1.1.0.jar:3.1.1.1.1.0] at org.apache.spark.sql.catalyst.trees.TreeNode.$anonfun$transformDown$3(TreeNode.scala:322) ~[spark-catalyst_2.12-3.1.1.1.1.0.jar:3.1.1.1.1.0] at org.apache.spark.sql.catalyst.trees.TreeNode.$anonfun$mapChildren$1(TreeNode.scala:407) ~[spark-catalyst_2.12-3.1.1.1.1.0.jar:3.1.1.1.1.0] at org.apache.spark.sql.catalyst.trees.TreeNode.mapProductIterator(TreeNode.scala:243) ~[spark-catalyst_2.12-3.1.1.1.1.0.jar:3.1.1.1.1.0] at org.apache.spark.sql.catalyst.trees.TreeNode.mapChildren(TreeNode.scala:405) ~[spark-catalyst_2.12-3.1.1.1.1.0.jar:3.1.1.1.1.0] at org.apache.spark.sql.catalyst.trees.TreeNode.mapChildren(TreeNode.scala:358) ~[spark-catalyst_2.12-3.1.1.1.1.0.jar:3.1.1.1.1.0] at org.apache.spark.sql.catalyst.trees.TreeNode.transformDown(TreeNode.scala:322) ~[spark-catalyst_2.12-3.1.1.1.1.0.jar:3.1.1.1.1.0] at org.apache.spark.sql.catalyst.trees.TreeNode.transform(TreeNode.scala:306) ~[spark-catalyst_2.12-3.1.1.1.1.0.jar:3.1.1.1.1.0] at org.apache.spark.sql.Column.normalizedExpr(Column.scala:161) ~[spark-sql_2.12-3.1.1.1.1.0.jar:3.1.1.1.1.0] at org.apache.spark.sql.Column.hashCode(Column.scala:159) ~[spark-sql_2.12-3.1.1.1.1.0.jar:3.1.1.1.1.0] at scala.runtime.Statics.anyHash(Statics.java:122) ~[scala-library-2.12.17.jar:?] at scala.util.hashing.MurmurHash3.productHash(MurmurHash3.scala:76) ~[scala-library-2.12.17.jar:?] at scala.util.hashing.MurmurHash3$.productHash(MurmurHash3.scala:246) ~[scala-library-2.12.17.jar:?] at scala.runtime.ScalaRunTime$._hashCode(ScalaRunTime.scala:167) ~[scala-library-2.12.17.jar:?] at scala.Tuple5.hashCode(Tuple5.scala:27) ~[scala-library-2.12.17.jar:?] at scala.collection.mutable.FlatHashTable.addEntry(FlatHashTable.scala:153) ~[scala-library-2.12.17.jar:?] at scala.collection.mutable.FlatHashTable.addEntry$(FlatHashTable.scala:152) ~[scala-library-2.12.17.jar:?] at scala.collection.mutable.HashSet.addEntry(HashSet.scala:41) ~[scala-library-2.12.17.jar:?] at scala.collection.mutable.FlatHashTable.addElem(FlatHashTable.scala:144) ~[scala-library-2.12.17.jar:?] at scala.collection.mutable.FlatHashTable.addElem$(FlatHashTable.scala:143) ~[scala-library-2.12.17.jar:?] at scala.collection.mutable.HashSet.addElem(HashSet.scala:41) ~[scala-library-2.12.17.jar:?] at scala.collection.mutable.HashSet.$plus$eq(HashSet.scala:60) ~[scala-library-2.12.17.jar:?] at scala.collection.mutable.HashSet.$plus$eq(HashSet.scala:41) ~[scala-library-2.12.17.jar:?] at scala.collection.mutable.GrowingBuilder.$plus$eq(GrowingBuilder.scala:32) ~[scala-library-2.12.17.jar:?] at scala.collection.mutable.GrowingBuilder.$plus$eq(GrowingBuilder.scala:30) ~[scala-library-2.12.17.jar:?] at scala.collection.TraversableLike.$anonfun$map$1(TraversableLike.scala:286) ~[scala-library-2.12.17.jar:?] at scala.collection.Iterator.foreach(Iterator.scala:943) ~[scala-library-2.12.17.jar:?] at scala.collection.Iterator.foreach$(Iterator.scala:943) ~[scala-library-2.12.17.jar:?] at scala.collection.AbstractIterator.foreach(Iterator.scala:1431) ~[scala-library-2.12.17.jar:?] at scala.collection.IterableLike.foreach(IterableLike.scala:74) ~[scala-library-2.12.17.jar:?] at scala.collection.IterableLike.foreach$(IterableLike.scala:73) ~[scala-library-2.12.17.jar:?] at scala.collection.AbstractIterable.foreach(Iterable.scala:56) ~[scala-library-2.12.17.jar:?] at scala.collection.TraversableLike.map(TraversableLike.scala:286) ~[scala-library-2.12.17.jar:?] at scala.collection.TraversableLike.map$(TraversableLike.scala:279) ~[scala-library-2.12.17.jar:?] at scala.collection.mutable.AbstractSet.scala$collection$SetLike$$super$map(Set.scala:50) ~[scala-library-2.12.17.jar:?] at scala.collection.SetLike.map(SetLike.scala:105) ~[scala-library-2.12.17.jar:?] at scala.collection.SetLike.map$(SetLike.scala:105) ~[scala-library-2.12.17.jar:?] at scala.collection.mutable.AbstractSet.map(Set.scala:50) ~[scala-library-2.12.17.jar:?] at org.apache.kylin.engine.spark.builder.DFTableEncoder$.encodeTable(DFTableEncoder.scala:62) ~[newten-job.jar:?] at org.apache.kylin.engine.spark.job.stage.build.FlatTableAndDictBase.encodeColumn(FlatTableAndDictBase.scala:569) ~[newten-job.jar:?] at org.apache.kylin.engine.spark.job.stage.build.FlatTableAndDictBase.buildDictIfNeed(FlatTableAndDictBase.scala:545) ~[newten-job.jar:?] at org.apache.kylin.engine.spark.job.stage.build.FlatTableAndDictBase.buildDictIfNeed(FlatTableAndDictBase.scala:201) ~[newten-job.jar:?] at org.apache.kylin.engine.spark.job.stage.build.BuildDict.execute(BuildDict.scala:31) ~[newten-job.jar:?] at org.apache.kylin.engine.spark.job.stage.StageExec.toWork(StageExec.scala:116) ~[newten-job.jar:?] at org.apache.kylin.engine.spark.job.stage.StageExec.toWork$(StageExec.scala:112) ~[newten-job.jar:?] at org.apache.kylin.engine.spark.job.stage.build.BuildStage.toWork(BuildStage.scala:48) ~[newten-job.jar:?] at org.apache.kylin.engine.spark.job.exec.BuildExec.$anonfun$buildSegment$1(BuildExec.scala:38) ~[newten-job.jar:?] at org.apache.kylin.engine.spark.job.exec.BuildExec.$anonfun$buildSegment$1$adapted(BuildExec.scala:37) ~[newten-job.jar:?] at scala.collection.Iterator.foreach(Iterator.scala:943) ~[scala-library-2.12.17.jar:?] at scala.collection.Iterator.foreach$(Iterator.scala:943) ~[scala-library-2.12.17.jar:?] at scala.collection.AbstractIterator.foreach(Iterator.scala:1431) ~[scala-library-2.12.17.jar:?] at scala.collection.IterableLike.foreach(IterableLike.scala:74) ~[scala-library-2.12.17.jar:?] at scala.collection.IterableLike.foreach$(IterableLike.scala:73) ~[scala-library-2.12.17.jar:?] at scala.collection.AbstractIterable.foreach(Iterable.scala:56) ~[scala-library-2.12.17.jar:?] at org.apache.kylin.engine.spark.job.exec.BuildExec.buildSegment(BuildExec.scala:37) ~[newten-job.jar:?] at org.apache.kylin.engine.spark.job.SegmentBuildJob.buildSegment(SegmentBuildJob.java:181) ~[newten-job.jar:?] at org.apache.kylin.engine.spark.job.SegmentBuildJob.lambda$build$1(SegmentBuildJob.java:166) ~[newten-job.jar:?] at java.util.Iterator.forEachRemaining(Iterator.java:116) ~[?:1.8.0_202] at java.util.Spliterators$IteratorSpliterator.forEachRemaining(Spliterators.java:1801) ~[?:1.8.0_202] at java.util.stream.ReferencePipeline$Head.forEach(ReferencePipeline.java:580) ~[?:1.8.0_202] at org.apache.kylin.engine.spark.job.SegmentBuildJob.build(SegmentBuildJob.java:146) ~[newten-job.jar:?] at org.apache.kylin.engine.spark.job.SegmentBuildJob.doExecute(SegmentBuildJob.java:108) ~[newten-job.jar:?] at org.apache.kylin.engine.spark.application.SparkApplication.execute(SparkApplication.java:319) ~[newten-job.jar:?] at org.apache.kylin.engine.spark.application.SparkApplication.execute(SparkApplication.java:133) ~[newten-job.jar:?] ... 4 more 发件人: MINGMING GE <7mmi...@gmail.com> 日期: 星期一, 2023年12月25日 19:17 收件人: dev@kylin.apache.org <dev@kylin.apache.org> 主题: Re: 关于特供spark版本的问题 External Email Can you explain in detail the problem you encountered? On Mon, Dec 25, 2023 at 4:50 PM Li, Can <c...@ebay.com.invalid> wrote: > 我们在替换spark版本为社区版本后发现部份build job会失败。 > 我们对比了社区版本的spark和kylin 特供的spark版本,发现在需要build global > dictionary的时候这一块kylin的版本在jar包里自定义了一些接口方法,这些是否能在应用层去实现。 >