Hi,

You need to resolve the expressions before passing into creating
UnsafeProjection.



Aviral Agarwal wrote
> Hi guys,
> 
> I want transform Row using NamedExpression.
> 
> Below is the code snipped that I am using :
> 
> 
> def apply(dataFrame: DataFrame, selectExpressions:
> java.util.List[String]): RDD[UnsafeRow] = {
> 
>     val exprArray = selectExpressions.map(s =>
>       Column(SqlParser.parseExpression(s)).named
>     )
> 
>     val inputSchema = dataFrame.logicalPlan.output
> 
>     val transformedRDD = dataFrame.mapPartitions(
>       iter => {
>         val project = UnsafeProjection.create(exprArray,inputSchema)
>         iter.map{
>           row =>
>             project(InternalRow.fromSeq(row.toSeq))
>         }
>     })
> 
>     transformedRDD
>   }
> 
> 
> The problem is that expression becomes unevaluable :
> 
> Caused by: java.lang.UnsupportedOperationException: Cannot evaluate
> expression: 'a
>         at org.apache.spark.sql.catalyst.expressions.Unevaluable$class.
> genCode(Expression.scala:233)
>         at org.apache.spark.sql.catalyst.analysis.UnresolvedAttribute.g
> enCode(unresolved.scala:53)
>         at org.apache.spark.sql.catalyst.expressions.Expression$$anonfu
> n$gen$2.apply(Expression.scala:106)
>         at org.apache.spark.sql.catalyst.expressions.Expression$$anonfu
> n$gen$2.apply(Expression.scala:102)
>         at scala.Option.getOrElse(Option.scala:120)
>         at org.apache.spark.sql.catalyst.expressions.Expression.gen(Exp
> ression.scala:102)
>         at org.apache.spark.sql.catalyst.expressions.codegen.CodeGenCon
> text$$anonfun$generateExpressions$1.apply(CodeGenerator.scala:464)
>         at org.apache.spark.sql.catalyst.expressions.codegen.CodeGenCon
> text$$anonfun$generateExpressions$1.apply(CodeGenerator.scala:464)
>         at scala.collection.TraversableLike$$anonfun$map$1.apply(
> TraversableLike.scala:244)
>         at scala.collection.TraversableLike$$anonfun$map$1.apply(
> TraversableLike.scala:244)
>         at scala.collection.mutable.ResizableArray$class.foreach(Resiza
> bleArray.scala:59)
>         at scala.collection.mutable.ArrayBuffer.foreach(ArrayBuffer.
> scala:47)
>         at scala.collection.TraversableLike$class.map(TraversableLike.
> scala:244)
>         at scala.collection.AbstractTraversable.map(Traversable.scala:105)
>         at org.apache.spark.sql.catalyst.expressions.codegen.CodeGenCon
> text.generateExpressions(CodeGenerator.scala:464)
>         at org.apache.spark.sql.catalyst.expressions.codegen.GenerateUn
> safeProjection$.createCode(GenerateUnsafeProjection.scala:281)
>         at org.apache.spark.sql.catalyst.expressions.codegen.GenerateUn
> safeProjection$.create(GenerateUnsafeProjection.scala:324)
>         at org.apache.spark.sql.catalyst.expressions.codegen.GenerateUn
> safeProjection$.create(GenerateUnsafeProjection.scala:317)
>         at org.apache.spark.sql.catalyst.expressions.codegen.GenerateUn
> safeProjection$.create(GenerateUnsafeProjection.scala:32)
>         at org.apache.spark.sql.catalyst.expressions.codegen.CodeGenera
> tor.generate(CodeGenerator.scala:635)
>         at org.apache.spark.sql.catalyst.expressions.UnsafeProjection$.
> create(Projection.scala:125)
>         at org.apache.spark.sql.catalyst.expressions.UnsafeProjection$.
> create(Projection.scala:135)
>         at org.apache.spark.sql.ScalaTransform$$anonfun$3.apply(
> ScalaTransform.scala:31)
>         at org.apache.spark.sql.ScalaTransform$$anonfun$3.apply(
> ScalaTransform.scala:30)
>         at org.apache.spark.rdd.RDD$$anonfun$mapPartitions$1$$anonfun$
> apply$20.apply(RDD.scala:710)
>         at org.apache.spark.rdd.RDD$$anonfun$mapPartitions$1$$anonfun$
> apply$20.apply(RDD.scala:710)
>         at org.apache.spark.rdd.MapPartitionsRDD.compute(MapPartitionsR
> DD.scala:38)
>         at org.apache.spark.rdd.RDD.computeOrReadCheckpoint(RDD.scala:306)
>         at org.apache.spark.rdd.RDD.iterator(RDD.scala:270)
>         at org.apache.spark.scheduler.ResultTask.runTask(ResultTask.sca
> la:66)
>         at org.apache.spark.scheduler.Task.run(Task.scala:89)
>         at org.apache.spark.executor.Executor$TaskRunner.run(Executor.
> scala:214)
>         at java.util.concurrent.ThreadPoolExecutor.runWorker(ThreadPool
> Executor.java:1142)
>         at java.util.concurrent.ThreadPoolExecutor$Worker.run(ThreadPoo
> lExecutor.java:617)
>         at java.lang.Thread.run(Thread.java:745)
> 
> 
> This might be because the Expression is unresolved.
> 
> Any help would be appreciated.
> 
> Thanks and Regards,
> Aviral Agarwal





-----
Liang-Chi Hsieh | @viirya 
Spark Technology Center 
http://www.spark.tc/ 
--
View this message in context: 
http://apache-spark-developers-list.1001551.n3.nabble.com/Fwd-SparkSQL-Project-using-NamedExpression-tp21224p21230.html
Sent from the Apache Spark Developers List mailing list archive at Nabble.com.

---------------------------------------------------------------------
To unsubscribe e-mail: dev-unsubscr...@spark.apache.org

Reply via email to