zhijiangW commented on a change in pull request #11515: [FLINK-16744][task] implement channel state persistence for unaligned checkpoints URL: https://github.com/apache/flink/pull/11515#discussion_r403721544
########## File path: flink-streaming-java/src/main/java/org/apache/flink/streaming/runtime/tasks/SubtaskCheckpointCoordinatorImpl.java ########## @@ -175,6 +203,77 @@ public void checkpointState( } } + private OperatorSnapshotFutures buildOperatorSnapshotFutures( + CheckpointMetaData checkpointMetaData, + CheckpointOptions checkpointOptions, + OperatorChain<?, ?> operatorChain, + StreamOperator<?> op, + Supplier<Boolean> isCanceled, + ChannelStateWriteResult channelStateWriteResult) throws Exception { + CheckpointStreamFactory storage = checkpointStorage.resolveCheckpointStorageLocation( + checkpointMetaData.getCheckpointId(), + checkpointOptions.getTargetLocation()); + OperatorSnapshotFutures snapshotInProgress = checkpointStreamOperator( + op, + checkpointMetaData, + checkpointOptions, + storage, + isCanceled); + if (op == operatorChain.getHeadOperator()) { + snapshotInProgress.setInputChannelStateFuture(channelStateWriteResult + .getInputChannelStateHandles() + .thenApply(StateObjectCollection::new) + .thenApply(SnapshotResult::of) + ); + } + if (op == operatorChain.getTailOperator()) { + snapshotInProgress.setResultSubpartitionStateFuture(channelStateWriteResult + .getResultSubpartitionStateHandles() + .thenApply(StateObjectCollection::new) + .thenApply(SnapshotResult::of) + ); + } + return snapshotInProgress; + } + + @Override + public void close() { + channelStateWriter.close(); + } + + // Caches checkpoint output stream factories to prevent multiple output stream per checkpoint. + // This could result from requesting output stream by different entities (this and channelStateWriter) + // We can't just pass a stream to the channelStateWriter because it can receive checkpoint call earlier than this class + // in some unaligned checkpoints scenarios + private static class CachingCheckpointStorageWorkerView implements CheckpointStorageWorkerView { + private final Map<Long, CheckpointStreamFactory> cache = new ConcurrentHashMap<>(); + private final CheckpointStorageWorkerView delegate; + + private CachingCheckpointStorageWorkerView(CheckpointStorageWorkerView delegate) { + this.delegate = delegate; + } + + void clearCacheFor(long checkpointId) { + cache.remove(checkpointId); + } + + @Override + public CheckpointStreamFactory resolveCheckpointStorageLocation(long checkpointId, CheckpointStorageLocationReference reference) { + return cache.computeIfAbsent(checkpointId, id -> { + try { + return delegate.resolveCheckpointStorageLocation(checkpointId, reference); + } catch (IOException e) { + throw new WrappingRuntimeException(e); Review comment: Sorry for misleading. Some concerns are the same as https://github.com/apache/flink/pull/11515#discussion_r402050562, and another tiny concern is why not use `FlinkRuntimeException` directly? I have not found any special purpose for `WrappingRuntimeException`. ---------------------------------------------------------------- This is an automated message from the Apache Git Service. To respond to the message, please log on to GitHub and use the URL above to go to the specific comment. For queries about this service, please contact Infrastructure at: us...@infra.apache.org With regards, Apache Git Services