Hi guys, I want transform Row using NamedExpression.
Below is the code snipped that I am using : def apply(dataFrame: DataFrame, selectExpressions: java.util.List[String]): RDD[UnsafeRow] = { val exprArray = selectExpressions.map(s => Column(SqlParser.parseExpression(s)).named ) val inputSchema = dataFrame.logicalPlan.output val transformedRDD = dataFrame.mapPartitions( iter => { val project = UnsafeProjection.create(exprArray,inputSchema) iter.map{ row => project(InternalRow.fromSeq(row.toSeq)) } }) transformedRDD } The problem is that expression becomes unevaluable : Caused by: java.lang.UnsupportedOperationException: Cannot evaluate expression: 'a at org.apache.spark.sql.catalyst.expressions.Unevaluable$class. genCode(Expression.scala:233) at org.apache.spark.sql.catalyst.analysis.UnresolvedAttribute.g enCode(unresolved.scala:53) at org.apache.spark.sql.catalyst.expressions.Expression$$anonfu n$gen$2.apply(Expression.scala:106) at org.apache.spark.sql.catalyst.expressions.Expression$$anonfu n$gen$2.apply(Expression.scala:102) at scala.Option.getOrElse(Option.scala:120) at org.apache.spark.sql.catalyst.expressions.Expression.gen(Exp ression.scala:102) at org.apache.spark.sql.catalyst.expressions.codegen.CodeGenCon text$$anonfun$generateExpressions$1.apply(CodeGenerator.scala:464) at org.apache.spark.sql.catalyst.expressions.codegen.CodeGenCon text$$anonfun$generateExpressions$1.apply(CodeGenerator.scala:464) at scala.collection.TraversableLike$$anonfun$map$1.apply( TraversableLike.scala:244) at scala.collection.TraversableLike$$anonfun$map$1.apply( TraversableLike.scala:244) at scala.collection.mutable.ResizableArray$class.foreach(Resiza bleArray.scala:59) at scala.collection.mutable.ArrayBuffer.foreach(ArrayBuffer. scala:47) at scala.collection.TraversableLike$class.map(TraversableLike. scala:244) at scala.collection.AbstractTraversable.map(Traversable.scala:105) at org.apache.spark.sql.catalyst.expressions.codegen.CodeGenCon text.generateExpressions(CodeGenerator.scala:464) at org.apache.spark.sql.catalyst.expressions.codegen.GenerateUn safeProjection$.createCode(GenerateUnsafeProjection.scala:281) at org.apache.spark.sql.catalyst.expressions.codegen.GenerateUn safeProjection$.create(GenerateUnsafeProjection.scala:324) at org.apache.spark.sql.catalyst.expressions.codegen.GenerateUn safeProjection$.create(GenerateUnsafeProjection.scala:317) at org.apache.spark.sql.catalyst.expressions.codegen.GenerateUn safeProjection$.create(GenerateUnsafeProjection.scala:32) at org.apache.spark.sql.catalyst.expressions.codegen.CodeGenera tor.generate(CodeGenerator.scala:635) at org.apache.spark.sql.catalyst.expressions.UnsafeProjection$. create(Projection.scala:125) at org.apache.spark.sql.catalyst.expressions.UnsafeProjection$. create(Projection.scala:135) at org.apache.spark.sql.ScalaTransform$$anonfun$3.apply( ScalaTransform.scala:31) at org.apache.spark.sql.ScalaTransform$$anonfun$3.apply( ScalaTransform.scala:30) at org.apache.spark.rdd.RDD$$anonfun$mapPartitions$1$$anonfun$ apply$20.apply(RDD.scala:710) at org.apache.spark.rdd.RDD$$anonfun$mapPartitions$1$$anonfun$ apply$20.apply(RDD.scala:710) at org.apache.spark.rdd.MapPartitionsRDD.compute(MapPartitionsR DD.scala:38) at org.apache.spark.rdd.RDD.computeOrReadCheckpoint(RDD.scala:306) at org.apache.spark.rdd.RDD.iterator(RDD.scala:270) at org.apache.spark.scheduler.ResultTask.runTask(ResultTask.sca la:66) at org.apache.spark.scheduler.Task.run(Task.scala:89) at org.apache.spark.executor.Executor$TaskRunner.run(Executor. scala:214) at java.util.concurrent.ThreadPoolExecutor.runWorker(ThreadPool Executor.java:1142) at java.util.concurrent.ThreadPoolExecutor$Worker.run(ThreadPoo lExecutor.java:617) at java.lang.Thread.run(Thread.java:745) This might be because the Expression is unresolved. Any help would be appreciated. Thanks and Regards, Aviral Agarwal