[ https://issues.apache.org/jira/browse/FLINK-33442?page=com.atlassian.jira.plugin.system.issuetabpanels:all-tabpanel ]
Roman Khachatryan resolved FLINK-33442. --------------------------------------- Resolution: Fixed > UnsupportedOperationException thrown from RocksDBIncrementalRestoreOperation > ---------------------------------------------------------------------------- > > Key: FLINK-33442 > URL: https://issues.apache.org/jira/browse/FLINK-33442 > Project: Flink > Issue Type: Bug > Components: Runtime / State Backends > Affects Versions: 1.17.1 > Reporter: Roman Khachatryan > Assignee: Roman Khachatryan > Priority: Minor > Labels: pull-request-available > Fix For: 1.17.2 > > > When using the new rescaling API, it's possible to get > {code:java} > 2023-10-31 18:25:05,179 ERROR > org.apache.flink.contrib.streaming.state.RocksDBKeyedStateBackendBuilder [] - > Caught unexpected exception. > java.lang.UnsupportedOperationException: null > at java.util.Collections$1.remove(Collections.java:4714) ~[?:?] > at java.util.AbstractCollection.remove(AbstractCollection.java:299) > ~[?:?] > at > org.apache.flink.runtime.checkpoint.StateObjectCollection.remove(StateObjectCollection.java:105) > ~[flink-runtime-1.17.1-143.jar:1.17.1-143] > at > org.apache.flink.contrib.streaming.state.restore.RocksDBIncrementalRestoreOperation.restoreWithRescaling(RocksDBIncrementalRestoreOperation.java:294) > ~[flink-dist-1.17.1-143.jar:1.17.1-143] > at > org.apache.flink.contrib.streaming.state.restore.RocksDBIncrementalRestoreOperation.restore(RocksDBIncrementalRestoreOperation.java:167) > ~[flink-dist-1.17.1-143.jar:1.17.1-143] > at > org.apache.flink.contrib.streaming.state.RocksDBKeyedStateBackendBuilder.build(RocksDBKeyedStateBackendBuilder.java:327) > ~[flink-dist-1.17.1-143.jar:1.17.1-143] > at > org.apache.flink.contrib.streaming.state.EmbeddedRocksDBStateBackend.createKeyedStateBackend(EmbeddedRocksDBStateBackend.java:512) > ~[flink-dist-1.17.1-143.jar:1.17.1-143] > at > org.apache.flink.contrib.streaming.state.EmbeddedRocksDBStateBackend.createKeyedStateBackend(EmbeddedRocksDBStateBackend.java:99) > ~[flink-dist-1.17.1-143.jar:1.17.1-143] > at > org.apache.flink.streaming.api.operators.StreamTaskStateInitializerImpl.lambda$keyedStatedBackend$1(StreamTaskStateInitializerImpl.java:338) > ~[flink-dist-1.17.1-143.jar:1.17.1-143] > at > org.apache.flink.streaming.api.operators.BackendRestorerProcedure.attemptCreateAndRestore(BackendRestorerProcedure.java:168) > ~[flink-dist-1.17.1-143.jar:1.17.1-143] > at > org.apache.flink.streaming.api.operators.BackendRestorerProcedure.createAndRestore(BackendRestorerProcedure.java:135) > ~[flink-dist-1.17.1-143.jar:1.17.1-143] > at > org.apache.flink.streaming.api.operators.StreamTaskStateInitializerImpl.keyedStatedBackend(StreamTaskStateInitializerImpl.java:355) > ~[flink-dist-1.17.1-143.jar:1.17.1-143] > at > org.apache.flink.streaming.api.operators.StreamTaskStateInitializerImpl.streamOperatorStateContext(StreamTaskStateInitializerImpl.java:166) > ~[flink-dist-1.17.1-143.jar:1.17.1-143] > at > org.apache.flink.streaming.api.operators.AbstractStreamOperator.initializeState(AbstractStreamOperator.java:256) > ~[flink-dist-1.17.1-143.jar:1.17.1-143] > at > org.apache.flink.streaming.runtime.tasks.RegularOperatorChain.initializeStateAndOpenOperators(RegularOperatorChain.java:106) > ~[flink-dist-1.17.1-143.jar:1.17.1-143] > at > org.apache.flink.streaming.runtime.tasks.StreamTask.restoreGates(StreamTask.java:735) > ~[flink-dist-1.17.1-143.jar:1.17.1-143] > at > org.apache.flink.streaming.runtime.tasks.StreamTaskActionExecutor$1.call(StreamTaskActionExecutor.java:55) > ~[flink-dist-1.17.1-143.jar:1.17.1-143] > at > org.apache.flink.streaming.runtime.tasks.StreamTask.restoreInternal(StreamTask.java:710) > ~[flink-dist-1.17.1-143.jar:1.17.1-143] > at > org.apache.flink.streaming.runtime.tasks.StreamTask.restore(StreamTask.java:676) > ~[flink-dist-1.17.1-143.jar:1.17.1-143] > at > org.apache.flink.runtime.taskmanager.Task.runWithSystemExitMonitoring(Task.java:952) > [flink-runtime-1.17.1-143.jar:1.17.1-143] > at > org.apache.flink.runtime.taskmanager.Task.restoreAndInvoke(Task.java:921) > [flink-runtime-1.17.1-143.jar:1.17.1-143] > at org.apache.flink.runtime.taskmanager.Task.doRun(Task.java:745) > [flink-runtime-1.17.1-143.jar:1.17.1-143] > at org.apache.flink.runtime.taskmanager.Task.run(Task.java:562) > [flink-runtime-1.17.1-143.jar:1.17.1-143] > at java.lang.Thread.run(Thread.java:829) [?:?] > 2023-10-31 18:25:05,182 WARN > org.apache.flink.streaming.api.operators.BackendRestorerProcedure [] - > Exception while restoring keyed state backend for > KeyedProcessOperator_353a6b34b8b7f1c1d0fb4616d911049c_(1/2) from alternative > (1/2), will retry while more alternatives are available. > org.apache.flink.runtime.state.BackendBuildingException: Caught unexpected > exception. > at > org.apache.flink.contrib.streaming.state.RocksDBKeyedStateBackendBuilder.build(RocksDBKeyedStateBackendBuilder.java:407) > ~[flink-dist-1.17.1-143.jar:1.17.1-143] > at > org.apache.flink.contrib.streaming.state.EmbeddedRocksDBStateBackend.createKeyedStateBackend(EmbeddedRocksDBStateBackend.java:512) > ~[flink-dist-1.17.1-143.jar:1.17.1-143] > at > org.apache.flink.contrib.streaming.state.EmbeddedRocksDBStateBackend.createKeyedStateBackend(EmbeddedRocksDBStateBackend.java:99) > ~[flink-dist-1.17.1-143.jar:1.17.1-143] > at > org.apache.flink.streaming.api.operators.StreamTaskStateInitializerImpl.lambda$keyedStatedBackend$1(StreamTaskStateInitializerImpl.java:338) > ~[flink-dist-1.17.1-143.jar:1.17.1-143] > at > org.apache.flink.streaming.api.operators.BackendRestorerProcedure.attemptCreateAndRestore(BackendRestorerProcedure.java:168) > ~[flink-dist-1.17.1-143.jar:1.17.1-143] > at > org.apache.flink.streaming.api.operators.BackendRestorerProcedure.createAndRestore(BackendRestorerProcedure.java:135) > ~[flink-dist-1.17.1-143.jar:1.17.1-143] > at > org.apache.flink.streaming.api.operators.StreamTaskStateInitializerImpl.keyedStatedBackend(StreamTaskStateInitializerImpl.java:355) > ~[flink-dist-1.17.1-143.jar:1.17.1-143] > at > org.apache.flink.streaming.api.operators.StreamTaskStateInitializerImpl.streamOperatorStateContext(StreamTaskStateInitializerImpl.java:166) > ~[flink-dist-1.17.1-143.jar:1.17.1-143] > at > org.apache.flink.streaming.api.operators.AbstractStreamOperator.initializeState(AbstractStreamOperator.java:256) > ~[flink-dist-1.17.1-143.jar:1.17.1-143] > at > org.apache.flink.streaming.runtime.tasks.RegularOperatorChain.initializeStateAndOpenOperators(RegularOperatorChain.java:106) > ~[flink-dist-1.17.1-143.jar:1.17.1-143] > at > org.apache.flink.streaming.runtime.tasks.StreamTask.restoreGates(StreamTask.java:735) > ~[flink-dist-1.17.1-143.jar:1.17.1-143] > at > org.apache.flink.streaming.runtime.tasks.StreamTaskActionExecutor$1.call(StreamTaskActionExecutor.java:55) > ~[flink-dist-1.17.1-143.jar:1.17.1-143] > at > org.apache.flink.streaming.runtime.tasks.StreamTask.restoreInternal(StreamTask.java:710) > ~[flink-dist-1.17.1-143.jar:1.17.1-143] > at > org.apache.flink.streaming.runtime.tasks.StreamTask.restore(StreamTask.java:676) > ~[flink-dist-1.17.1-143.jar:1.17.1-143] > at > org.apache.flink.runtime.taskmanager.Task.runWithSystemExitMonitoring(Task.java:952) > [flink-runtime-1.17.1-143.jar:1.17.1-143] > at > org.apache.flink.runtime.taskmanager.Task.restoreAndInvoke(Task.java:921) > [flink-runtime-1.17.1-143.jar:1.17.1-143] > at org.apache.flink.runtime.taskmanager.Task.doRun(Task.java:745) > [flink-runtime-1.17.1-143.jar:1.17.1-143] > at org.apache.flink.runtime.taskmanager.Task.run(Task.java:562) > [flink-runtime-1.17.1-143.jar:1.17.1-143] > at java.lang.Thread.run(Thread.java:829) [?:?] > Caused by: java.lang.UnsupportedOperationException > at java.util.Collections$1.remove(Collections.java:4714) ~[?:?] > at java.util.AbstractCollection.remove(AbstractCollection.java:299) > ~[?:?] > at > org.apache.flink.runtime.checkpoint.StateObjectCollection.remove(StateObjectCollection.java:105) > ~[flink-runtime-1.17.1-143.jar:1.17.1-143] > at > org.apache.flink.contrib.streaming.state.restore.RocksDBIncrementalRestoreOperation.restoreWithRescaling(RocksDBIncrementalRestoreOperation.java:294) > ~[flink-dist-1.17.1-143.jar:1.17.1-143] > at > org.apache.flink.contrib.streaming.state.restore.RocksDBIncrementalRestoreOperation.restore(RocksDBIncrementalRestoreOperation.java:167) > ~[flink-dist-1.17.1-143.jar:1.17.1-143] > at > org.apache.flink.contrib.streaming.state.RocksDBKeyedStateBackendBuilder.build(RocksDBKeyedStateBackendBuilder.java:327) > ~[flink-dist-1.17.1-143.jar:1.17.1-143] > ... 18 more > {code} > presumably on upscaling. > The job continues to recover (using the remote state). > The issue occurs on 1.17 and should be fixed in 1.18 and master. -- This message was sent by Atlassian Jira (v8.20.10#820010)