anishshri-db commented on code in PR #50742:
URL: https://github.com/apache/spark/pull/50742#discussion_r2067627737


##########
sql/core/src/main/scala/org/apache/spark/sql/execution/streaming/state/StateStoreRDD.scala:
##########
@@ -82,19 +106,47 @@ class ReadStateStoreRDD[T: ClassTag, U: ClassTag](
     useColumnFamilies: Boolean = false,
     extraOptions: Map[String, String] = Map.empty)
   extends BaseStateStoreRDD[T, U](dataRDD, checkpointLocation, queryRunId, 
operatorId,
-    sessionState, storeCoordinator, extraOptions) {
+    sessionState, storeCoordinator, extraOptions) with StateStoreRDDProvider {
+
+  // Using a ConcurrentHashMap to track state stores by partition ID
+  // and whether this store was used to create a write store or not.
+  @transient private lazy val partitionStores =
+    new java.util.concurrent.ConcurrentHashMap[Int, (ReadStateStore, 
Boolean)]()
+
+  override def getStateStoreForPartition(partitionId: Int): 
Option[ReadStateStore] = {
+    val (readStore, _) = partitionStores.get(partitionId)
+    partitionStores.put(partitionId, (readStore, true))
+    Option(readStore)
+  }
 
   override protected def getPartitions: Array[Partition] = dataRDD.partitions
 
   override def compute(partition: Partition, ctxt: TaskContext): Iterator[U] = 
{
     val storeProviderId = getStateProviderId(partition)
+    val partitionId = partition.index
 
     val inputIter = dataRDD.iterator(partition, ctxt)
     val store = StateStore.getReadOnly(
       storeProviderId, keySchema, valueSchema, keyStateEncoderSpec, 
storeVersion,
-      stateStoreCkptIds.map(_.apply(partition.index).head),
+      stateStoreCkptIds.map(_.apply(partitionId).head),
       stateSchemaBroadcast,
       useColumnFamilies, storeConf, hadoopConfBroadcast.value.value)
+
+    // Store reference for this partition
+    partitionStores.put(partitionId, (store, false))
+
+    // Register a cleanup callback to be executed when the task completes
+    ctxt.addTaskCompletionListener[Unit](_ => {

Review Comment:
   @ericm-db - i guess we did not register any listener before in the 
`ReadStoreRDD` path ?



-- 
This is an automated message from the Apache Git Service.
To respond to the message, please log on to GitHub and use the
URL above to go to the specific comment.

To unsubscribe, e-mail: reviews-unsubscr...@spark.apache.org

For queries about this service, please contact Infrastructure at:
us...@infra.apache.org


---------------------------------------------------------------------
To unsubscribe, e-mail: reviews-unsubscr...@spark.apache.org
For additional commands, e-mail: reviews-h...@spark.apache.org

Reply via email to