zhuzhurk commented on a change in pull request #12278: URL: https://github.com/apache/flink/pull/12278#discussion_r441347449
########## File path: flink-runtime/src/main/java/org/apache/flink/runtime/jobmaster/slotpool/SlotPoolImpl.java ########## @@ -592,6 +589,31 @@ private PendingRequest findMatchingPendingRequest(final AllocatedSlot slot) { return null; } + private void maybeRemapOrphanedAllocation( + final AllocationID allocationIdOfRequest, + final AllocationID allocationIdOfSlot) { + + final AllocationID orphanedAllocationId = allocationIdOfRequest.equals(allocationIdOfSlot) + ? null : allocationIdOfRequest; + + // if the request that initiated the allocation is still pending, it should take over the orphaned allocation + // of the fulfilled request so that it can fail fast if the remapped allocation fails + if (orphanedAllocationId != null) { + final SlotRequestId requestIdOfAllocatedSlot = pendingRequests.getKeyA(allocationIdOfSlot); + if (requestIdOfAllocatedSlot != null) { + final PendingRequest requestOfAllocatedSlot = pendingRequests.getByKeyA(requestIdOfAllocatedSlot); + requestOfAllocatedSlot.setAllocationId(orphanedAllocationId); + + // this re-insertion of initiatedRequestId will not affect its original insertion order + pendingRequests.put(requestIdOfAllocatedSlot, orphanedAllocationId, requestOfAllocatedSlot); + } else { + // cancel the slot request if the orphaned allocation is not remapped to a pending request. + // the request id can be null if the slot is returned by scheduler + resourceManagerGateway.cancelSlotRequest(orphanedAllocationId); Review comment: updated the comments to make it easier to understand ---------------------------------------------------------------- This is an automated message from the Apache Git Service. To respond to the message, please log on to GitHub and use the URL above to go to the specific comment. For queries about this service, please contact Infrastructure at: us...@infra.apache.org