bogao007 commented on code in PR #50926: URL: https://github.com/apache/spark/pull/50926#discussion_r2093748679
########## sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/plans/logical/pythonLogicalOperators.scala: ########## @@ -215,10 +216,15 @@ case class TransformWithStateInPySpark( left.output.take(groupingAttributesLen) } - def rightAttributes: Seq[Attribute] = { + def rightAttributes(includesInitialStateColumns: Boolean = false): Seq[Attribute] = { assert(resolved, "This method is expected to be called after resolution.") if (hasInitialState) { - right.output.take(initGroupingAttrsLen) + if (includesInitialStateColumns) { + // Include the initial state columns in the references to avoid being column pruned. Review Comment: yeah it happened when Spark applies [ColumnPruning](https://github.com/apache/spark/blob/af6499fd1996e72b3f768c2ee8e984a45ec43f8b/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/optimizer/Optimizer.scala#L968) rule. Since we didn't add these columns to `references`, ColumnPruning rule thinks these columns can be dropped. -- This is an automated message from the Apache Git Service. To respond to the message, please log on to GitHub and use the URL above to go to the specific comment. To unsubscribe, e-mail: reviews-unsubscr...@spark.apache.org For queries about this service, please contact Infrastructure at: us...@infra.apache.org --------------------------------------------------------------------- To unsubscribe, e-mail: reviews-unsubscr...@spark.apache.org For additional commands, e-mail: reviews-h...@spark.apache.org