ericm-db commented on code in PR #50123: URL: https://github.com/apache/spark/pull/50123#discussion_r2006158058
########## sql/core/src/main/scala/org/apache/spark/sql/execution/streaming/state/RocksDBStateStoreProvider.scala: ########## @@ -966,3 +968,38 @@ class RocksDBStateStoreChangeDataReader( } } } + +/** + * Class used to relay events reported from a RocksDB instance to the state store coordinator. + * + * We pass this into the RocksDB instance to report specific events like snapshot uploads. + * This should only be used to report back to the coordinator for metrics and monitoring purposes. + */ +private[state] case class RocksDBEventListener( + queryRunId: String, + stateStoreId: StateStoreId, + storeConf: StateStoreConf) { + + /** ID of the state store provider managing the RocksDB instance */ + private val stateStoreProviderId: StateStoreProviderId = + StateStoreProviderId(stateStoreId, UUID.fromString(queryRunId)) Review Comment: remove queryRunId ########## sql/core/src/main/scala/org/apache/spark/sql/execution/streaming/state/RocksDB.scala: ########## @@ -73,7 +74,9 @@ class RocksDB( hadoopConf: Configuration = new Configuration, loggingId: String = "", useColumnFamilies: Boolean = false, - enableStateStoreCheckpointIds: Boolean = false) extends Logging { + enableStateStoreCheckpointIds: Boolean = false, + eventListener: Option[RocksDBEventListener] = None) + extends Logging { Review Comment: nit: move to line above ########## sql/core/src/main/scala/org/apache/spark/sql/execution/streaming/state/RocksDBStateStoreProvider.scala: ########## @@ -385,6 +385,7 @@ private[sql] class RocksDBStateStoreProvider this.useColumnFamilies = useColumnFamilies this.stateStoreEncoding = storeConf.stateStoreEncodingFormat this.stateSchemaProvider = stateSchemaProvider + this.rocksDBEventListener = RocksDBEventListener(getRunId(hadoopConf), stateStoreId, storeConf) Review Comment: nit: remove runID if it's unnecessary ########## sql/core/src/test/scala/org/apache/spark/sql/execution/streaming/state/StateStoreCoordinatorSuite.scala: ########## @@ -26,8 +26,9 @@ import org.apache.spark.{SharedSparkContext, SparkContext, SparkFunSuite} import org.apache.spark.scheduler.ExecutorCacheTaskLocation import org.apache.spark.sql.classic.SparkSession import org.apache.spark.sql.execution.streaming.{MemoryStream, StreamingQueryWrapper} -import org.apache.spark.sql.functions.count -import org.apache.spark.sql.internal.SQLConf.SHUFFLE_PARTITIONS +import org.apache.spark.sql.execution.streaming.StreamingSymmetricHashJoinHelper.{LeftSide, RightSide} +import org.apache.spark.sql.functions.{count, expr} +import org.apache.spark.sql.internal.SQLConf import org.apache.spark.util.Utils class StateStoreCoordinatorSuite extends SparkFunSuite with SharedSparkContext { Review Comment: Add a test case for the query restart case ########## sql/core/src/main/scala/org/apache/spark/sql/execution/streaming/state/StateStoreCoordinator.scala: ########## @@ -129,10 +202,25 @@ class StateStoreCoordinatorRef private(rpcEndpointRef: RpcEndpointRef) { * Class for coordinating instances of [[StateStore]]s loaded in executors across the cluster, * and get their locations for job scheduling. */ -private class StateStoreCoordinator(override val rpcEnv: RpcEnv) - extends ThreadSafeRpcEndpoint with Logging { +private class StateStoreCoordinator( + override val rpcEnv: RpcEnv, + val sqlConf: SQLConf) + extends ThreadSafeRpcEndpoint + with Logging { Review Comment: nit: move to line above -- This is an automated message from the Apache Git Service. To respond to the message, please log on to GitHub and use the URL above to go to the specific comment. To unsubscribe, e-mail: reviews-unsubscr...@spark.apache.org For queries about this service, please contact Infrastructure at: us...@infra.apache.org --------------------------------------------------------------------- To unsubscribe, e-mail: reviews-unsubscr...@spark.apache.org For additional commands, e-mail: reviews-h...@spark.apache.org