zhijiangW commented on a change in pull request #11515: [FLINK-16744][task] implement channel state persistence for unaligned checkpoints URL: https://github.com/apache/flink/pull/11515#discussion_r401526771
########## File path: flink-streaming-java/src/main/java/org/apache/flink/streaming/runtime/tasks/SubtaskCheckpointCoordinatorImpl.java ########## @@ -175,6 +203,77 @@ public void checkpointState( } } + private OperatorSnapshotFutures buildOperatorSnapshotFutures( + CheckpointMetaData checkpointMetaData, + CheckpointOptions checkpointOptions, + OperatorChain<?, ?> operatorChain, + StreamOperator<?> op, + Supplier<Boolean> isCanceled, + ChannelStateWriteResult channelStateWriteResult) throws Exception { + CheckpointStreamFactory storage = checkpointStorage.resolveCheckpointStorageLocation( + checkpointMetaData.getCheckpointId(), + checkpointOptions.getTargetLocation()); + OperatorSnapshotFutures snapshotInProgress = checkpointStreamOperator( + op, + checkpointMetaData, + checkpointOptions, + storage, + isCanceled); + if (op == operatorChain.getHeadOperator()) { + snapshotInProgress.setInputChannelStateFuture(channelStateWriteResult + .getInputChannelStateHandles() + .thenApply(StateObjectCollection::new) + .thenApply(SnapshotResult::of) + ); + } + if (op == operatorChain.getTailOperator()) { + snapshotInProgress.setResultSubpartitionStateFuture(channelStateWriteResult + .getResultSubpartitionStateHandles() + .thenApply(StateObjectCollection::new) + .thenApply(SnapshotResult::of) + ); + } + return snapshotInProgress; + } + + @Override + public void close() { + channelStateWriter.close(); + } + + // Caches checkpoint output stream factories to prevent multiple output stream per checkpoint. + // This could result from requesting output stream by different entities (this and channelStateWriter) + // We can't just pass a stream to the channelStateWriter because it can receive checkpoint call earlier than this class + // in some unaligned checkpoints scenarios + private static class CachingCheckpointStorageWorkerView implements CheckpointStorageWorkerView { + private final Map<Long, CheckpointStreamFactory> cache = new ConcurrentHashMap<>(); + private final CheckpointStorageWorkerView delegate; + + private CachingCheckpointStorageWorkerView(CheckpointStorageWorkerView delegate) { + this.delegate = delegate; + } + + void clearCacheFor(long checkpointId) { + cache.remove(checkpointId); + } + + @Override + public CheckpointStreamFactory resolveCheckpointStorageLocation(long checkpointId, CheckpointStorageLocationReference reference) { + return cache.computeIfAbsent(checkpointId, id -> { + try { + return delegate.resolveCheckpointStorageLocation(checkpointId, reference); + } catch (IOException e) { + throw new WrappingRuntimeException(e); Review comment: `WrappingRuntimeException` is used for wrapping non-runtime exceptions? I guess this should belong to runtime exception. If so, how about using `ExceptionUtils.rethrow(e)`? ---------------------------------------------------------------- This is an automated message from the Apache Git Service. To respond to the message, please log on to GitHub and use the URL above to go to the specific comment. For queries about this service, please contact Infrastructure at: us...@infra.apache.org With regards, Apache Git Services