在build dictionary时候使用DictEncode方法,但是其需要的三个表达式Expression 
在实例化时继承自来自于TreeNode中自定义的trait TernaryLike。
case class DictEncode(left: Expression, mid: Expression, right: Expression) 
extends TernaryExpression with ExpectsInputTypes
  with TernaryLike{

  def maxFields: Int = SQLConf.get.maxToStringFields

  override def first: Expression = left

  override def second: Expression = mid

   override def third: Expression = right
如果去掉override,虽然编译可以通过,但是相应的在build dictionary时会抛出空指针异常。
2023-12-06T03:50:58,896 ERROR [logger-thread-0] application.JobMonitor : 
handleResourceLack --> java.lang.NullPointerException
java.lang.RuntimeException: Error execute 
org.apache.kylin.engine.spark.job.SegmentBuildJob
    at 
org.apache.kylin.engine.spark.application.SparkApplication.execute(SparkApplication.java:135)
 ~[newten-job.jar:?]
    at org.apache.spark.application.JobWorker$$anon$2.run(JobWorker.scala:56) 
~[newten-job.jar:?]
    at 
java.util.concurrent.ThreadPoolExecutor.runWorker(ThreadPoolExecutor.java:1149) 
[?:1.8.0_202]
    at 
java.util.concurrent.ThreadPoolExecutor$Worker.run(ThreadPoolExecutor.java:624) 
[?:1.8.0_202]
    at java.lang.Thread.run(Thread.java:748) [?:1.8.0_202]
Caused by: java.lang.NullPointerException
    at 
org.apache.spark.sql.catalyst.trees.TreeNode.containsChild$lzycompute(TreeNode.scala:121)
 ~[spark-catalyst_2.12-3.1.1.1.1.0.jar:3.1.1.1.1.0]
    at 
org.apache.spark.sql.catalyst.trees.TreeNode.containsChild(TreeNode.scala:121) 
~[spark-catalyst_2.12-3.1.1.1.1.0.jar:3.1.1.1.1.0]
    at 
org.apache.spark.sql.catalyst.trees.TreeNode.mapChildren(TreeNode.scala:357) 
~[spark-catalyst_2.12-3.1.1.1.1.0.jar:3.1.1.1.1.0]
    at 
org.apache.spark.sql.catalyst.trees.TreeNode.transformDown(TreeNode.scala:322) 
~[spark-catalyst_2.12-3.1.1.1.1.0.jar:3.1.1.1.1.0]
    at 
org.apache.spark.sql.catalyst.trees.TreeNode.$anonfun$transformDown$3(TreeNode.scala:322)
 ~[spark-catalyst_2.12-3.1.1.1.1.0.jar:3.1.1.1.1.0]
    at 
org.apache.spark.sql.catalyst.trees.TreeNode.$anonfun$mapChildren$1(TreeNode.scala:407)
 ~[spark-catalyst_2.12-3.1.1.1.1.0.jar:3.1.1.1.1.0]
    at 
org.apache.spark.sql.catalyst.trees.TreeNode.mapProductIterator(TreeNode.scala:243)
 ~[spark-catalyst_2.12-3.1.1.1.1.0.jar:3.1.1.1.1.0]
    at 
org.apache.spark.sql.catalyst.trees.TreeNode.mapChildren(TreeNode.scala:405) 
~[spark-catalyst_2.12-3.1.1.1.1.0.jar:3.1.1.1.1.0]
    at 
org.apache.spark.sql.catalyst.trees.TreeNode.mapChildren(TreeNode.scala:358) 
~[spark-catalyst_2.12-3.1.1.1.1.0.jar:3.1.1.1.1.0]
    at 
org.apache.spark.sql.catalyst.trees.TreeNode.transformDown(TreeNode.scala:322) 
~[spark-catalyst_2.12-3.1.1.1.1.0.jar:3.1.1.1.1.0]
    at 
org.apache.spark.sql.catalyst.trees.TreeNode.transform(TreeNode.scala:306) 
~[spark-catalyst_2.12-3.1.1.1.1.0.jar:3.1.1.1.1.0]
    at org.apache.spark.sql.Column.normalizedExpr(Column.scala:161) 
~[spark-sql_2.12-3.1.1.1.1.0.jar:3.1.1.1.1.0]
    at org.apache.spark.sql.Column.hashCode(Column.scala:159) 
~[spark-sql_2.12-3.1.1.1.1.0.jar:3.1.1.1.1.0]
    at scala.runtime.Statics.anyHash(Statics.java:122) 
~[scala-library-2.12.17.jar:?]
    at scala.util.hashing.MurmurHash3.productHash(MurmurHash3.scala:76) 
~[scala-library-2.12.17.jar:?]
    at scala.util.hashing.MurmurHash3$.productHash(MurmurHash3.scala:246) 
~[scala-library-2.12.17.jar:?]
    at scala.runtime.ScalaRunTime$._hashCode(ScalaRunTime.scala:167) 
~[scala-library-2.12.17.jar:?]
    at scala.Tuple5.hashCode(Tuple5.scala:27) ~[scala-library-2.12.17.jar:?]
    at scala.collection.mutable.FlatHashTable.addEntry(FlatHashTable.scala:153) 
~[scala-library-2.12.17.jar:?]
    at 
scala.collection.mutable.FlatHashTable.addEntry$(FlatHashTable.scala:152) 
~[scala-library-2.12.17.jar:?]
    at scala.collection.mutable.HashSet.addEntry(HashSet.scala:41) 
~[scala-library-2.12.17.jar:?]
    at scala.collection.mutable.FlatHashTable.addElem(FlatHashTable.scala:144) 
~[scala-library-2.12.17.jar:?]
    at scala.collection.mutable.FlatHashTable.addElem$(FlatHashTable.scala:143) 
~[scala-library-2.12.17.jar:?]
    at scala.collection.mutable.HashSet.addElem(HashSet.scala:41) 
~[scala-library-2.12.17.jar:?]
    at scala.collection.mutable.HashSet.$plus$eq(HashSet.scala:60) 
~[scala-library-2.12.17.jar:?]
    at scala.collection.mutable.HashSet.$plus$eq(HashSet.scala:41) 
~[scala-library-2.12.17.jar:?]
    at 
scala.collection.mutable.GrowingBuilder.$plus$eq(GrowingBuilder.scala:32) 
~[scala-library-2.12.17.jar:?]
    at 
scala.collection.mutable.GrowingBuilder.$plus$eq(GrowingBuilder.scala:30) 
~[scala-library-2.12.17.jar:?]
    at 
scala.collection.TraversableLike.$anonfun$map$1(TraversableLike.scala:286) 
~[scala-library-2.12.17.jar:?]
    at scala.collection.Iterator.foreach(Iterator.scala:943) 
~[scala-library-2.12.17.jar:?]
    at scala.collection.Iterator.foreach$(Iterator.scala:943) 
~[scala-library-2.12.17.jar:?]
    at scala.collection.AbstractIterator.foreach(Iterator.scala:1431) 
~[scala-library-2.12.17.jar:?]
    at scala.collection.IterableLike.foreach(IterableLike.scala:74) 
~[scala-library-2.12.17.jar:?]
    at scala.collection.IterableLike.foreach$(IterableLike.scala:73) 
~[scala-library-2.12.17.jar:?]
    at scala.collection.AbstractIterable.foreach(Iterable.scala:56) 
~[scala-library-2.12.17.jar:?]
    at scala.collection.TraversableLike.map(TraversableLike.scala:286) 
~[scala-library-2.12.17.jar:?]
    at scala.collection.TraversableLike.map$(TraversableLike.scala:279) 
~[scala-library-2.12.17.jar:?]
    at 
scala.collection.mutable.AbstractSet.scala$collection$SetLike$$super$map(Set.scala:50)
 ~[scala-library-2.12.17.jar:?]
    at scala.collection.SetLike.map(SetLike.scala:105) 
~[scala-library-2.12.17.jar:?]
    at scala.collection.SetLike.map$(SetLike.scala:105) 
~[scala-library-2.12.17.jar:?]
    at scala.collection.mutable.AbstractSet.map(Set.scala:50) 
~[scala-library-2.12.17.jar:?]
    at 
org.apache.kylin.engine.spark.builder.DFTableEncoder$.encodeTable(DFTableEncoder.scala:62)
 ~[newten-job.jar:?]
    at 
org.apache.kylin.engine.spark.job.stage.build.FlatTableAndDictBase.encodeColumn(FlatTableAndDictBase.scala:569)
 ~[newten-job.jar:?]
    at 
org.apache.kylin.engine.spark.job.stage.build.FlatTableAndDictBase.buildDictIfNeed(FlatTableAndDictBase.scala:545)
 ~[newten-job.jar:?]
    at 
org.apache.kylin.engine.spark.job.stage.build.FlatTableAndDictBase.buildDictIfNeed(FlatTableAndDictBase.scala:201)
 ~[newten-job.jar:?]
    at 
org.apache.kylin.engine.spark.job.stage.build.BuildDict.execute(BuildDict.scala:31)
 ~[newten-job.jar:?]
    at 
org.apache.kylin.engine.spark.job.stage.StageExec.toWork(StageExec.scala:116) 
~[newten-job.jar:?]
    at 
org.apache.kylin.engine.spark.job.stage.StageExec.toWork$(StageExec.scala:112) 
~[newten-job.jar:?]
    at 
org.apache.kylin.engine.spark.job.stage.build.BuildStage.toWork(BuildStage.scala:48)
 ~[newten-job.jar:?]
    at 
org.apache.kylin.engine.spark.job.exec.BuildExec.$anonfun$buildSegment$1(BuildExec.scala:38)
 ~[newten-job.jar:?]
    at 
org.apache.kylin.engine.spark.job.exec.BuildExec.$anonfun$buildSegment$1$adapted(BuildExec.scala:37)
 ~[newten-job.jar:?]
    at scala.collection.Iterator.foreach(Iterator.scala:943) 
~[scala-library-2.12.17.jar:?]
    at scala.collection.Iterator.foreach$(Iterator.scala:943) 
~[scala-library-2.12.17.jar:?]
    at scala.collection.AbstractIterator.foreach(Iterator.scala:1431) 
~[scala-library-2.12.17.jar:?]
    at scala.collection.IterableLike.foreach(IterableLike.scala:74) 
~[scala-library-2.12.17.jar:?]
    at scala.collection.IterableLike.foreach$(IterableLike.scala:73) 
~[scala-library-2.12.17.jar:?]
    at scala.collection.AbstractIterable.foreach(Iterable.scala:56) 
~[scala-library-2.12.17.jar:?]
    at 
org.apache.kylin.engine.spark.job.exec.BuildExec.buildSegment(BuildExec.scala:37)
 ~[newten-job.jar:?]
    at 
org.apache.kylin.engine.spark.job.SegmentBuildJob.buildSegment(SegmentBuildJob.java:181)
 ~[newten-job.jar:?]
    at 
org.apache.kylin.engine.spark.job.SegmentBuildJob.lambda$build$1(SegmentBuildJob.java:166)
 ~[newten-job.jar:?]
    at java.util.Iterator.forEachRemaining(Iterator.java:116) ~[?:1.8.0_202]
    at 
java.util.Spliterators$IteratorSpliterator.forEachRemaining(Spliterators.java:1801)
 ~[?:1.8.0_202]
    at 
java.util.stream.ReferencePipeline$Head.forEach(ReferencePipeline.java:580) 
~[?:1.8.0_202]
    at 
org.apache.kylin.engine.spark.job.SegmentBuildJob.build(SegmentBuildJob.java:146)
 ~[newten-job.jar:?]
    at 
org.apache.kylin.engine.spark.job.SegmentBuildJob.doExecute(SegmentBuildJob.java:108)
 ~[newten-job.jar:?]
    at 
org.apache.kylin.engine.spark.application.SparkApplication.execute(SparkApplication.java:319)
 ~[newten-job.jar:?]
    at 
org.apache.kylin.engine.spark.application.SparkApplication.execute(SparkApplication.java:133)
 ~[newten-job.jar:?]
    ... 4 more



发件人: MINGMING GE <7mmi...@gmail.com>
日期: 星期一, 2023年12月25日 19:17
收件人: dev@kylin.apache.org <dev@kylin.apache.org>
主题: Re: 关于特供spark版本的问题
External Email

Can you explain in detail the problem you encountered?

On Mon, Dec 25, 2023 at 4:50 PM Li, Can <c...@ebay.com.invalid> wrote:

> 我们在替换spark版本为社区版本后发现部份build job会失败。
> 我们对比了社区版本的spark和kylin 特供的spark版本,发现在需要build global
> dictionary的时候这一块kylin的版本在jar包里自定义了一些接口方法,这些是否能在应用层去实现。
>

Reply via email to