[ 
https://issues.apache.org/jira/browse/FLINK-33442?page=com.atlassian.jira.plugin.system.issuetabpanels:all-tabpanel
 ]

Roman Khachatryan resolved FLINK-33442.
---------------------------------------
    Resolution: Fixed

> UnsupportedOperationException thrown from RocksDBIncrementalRestoreOperation
> ----------------------------------------------------------------------------
>
>                 Key: FLINK-33442
>                 URL: https://issues.apache.org/jira/browse/FLINK-33442
>             Project: Flink
>          Issue Type: Bug
>          Components: Runtime / State Backends
>    Affects Versions: 1.17.1
>            Reporter: Roman Khachatryan
>            Assignee: Roman Khachatryan
>            Priority: Minor
>              Labels: pull-request-available
>             Fix For: 1.17.2
>
>
> When using the new rescaling API, it's possible to get
> {code:java}
> 2023-10-31 18:25:05,179 ERROR 
> org.apache.flink.contrib.streaming.state.RocksDBKeyedStateBackendBuilder [] - 
> Caught unexpected exception.
> java.lang.UnsupportedOperationException: null
>       at java.util.Collections$1.remove(Collections.java:4714) ~[?:?]
>       at java.util.AbstractCollection.remove(AbstractCollection.java:299) 
> ~[?:?]
>       at 
> org.apache.flink.runtime.checkpoint.StateObjectCollection.remove(StateObjectCollection.java:105)
>  ~[flink-runtime-1.17.1-143.jar:1.17.1-143]
>       at 
> org.apache.flink.contrib.streaming.state.restore.RocksDBIncrementalRestoreOperation.restoreWithRescaling(RocksDBIncrementalRestoreOperation.java:294)
>  ~[flink-dist-1.17.1-143.jar:1.17.1-143]
>       at 
> org.apache.flink.contrib.streaming.state.restore.RocksDBIncrementalRestoreOperation.restore(RocksDBIncrementalRestoreOperation.java:167)
>  ~[flink-dist-1.17.1-143.jar:1.17.1-143]
>       at 
> org.apache.flink.contrib.streaming.state.RocksDBKeyedStateBackendBuilder.build(RocksDBKeyedStateBackendBuilder.java:327)
>  ~[flink-dist-1.17.1-143.jar:1.17.1-143]
>       at 
> org.apache.flink.contrib.streaming.state.EmbeddedRocksDBStateBackend.createKeyedStateBackend(EmbeddedRocksDBStateBackend.java:512)
>  ~[flink-dist-1.17.1-143.jar:1.17.1-143]
>       at 
> org.apache.flink.contrib.streaming.state.EmbeddedRocksDBStateBackend.createKeyedStateBackend(EmbeddedRocksDBStateBackend.java:99)
>  ~[flink-dist-1.17.1-143.jar:1.17.1-143]
>       at 
> org.apache.flink.streaming.api.operators.StreamTaskStateInitializerImpl.lambda$keyedStatedBackend$1(StreamTaskStateInitializerImpl.java:338)
>  ~[flink-dist-1.17.1-143.jar:1.17.1-143]
>       at 
> org.apache.flink.streaming.api.operators.BackendRestorerProcedure.attemptCreateAndRestore(BackendRestorerProcedure.java:168)
>  ~[flink-dist-1.17.1-143.jar:1.17.1-143]
>       at 
> org.apache.flink.streaming.api.operators.BackendRestorerProcedure.createAndRestore(BackendRestorerProcedure.java:135)
>  ~[flink-dist-1.17.1-143.jar:1.17.1-143]
>       at 
> org.apache.flink.streaming.api.operators.StreamTaskStateInitializerImpl.keyedStatedBackend(StreamTaskStateInitializerImpl.java:355)
>  ~[flink-dist-1.17.1-143.jar:1.17.1-143]
>       at 
> org.apache.flink.streaming.api.operators.StreamTaskStateInitializerImpl.streamOperatorStateContext(StreamTaskStateInitializerImpl.java:166)
>  ~[flink-dist-1.17.1-143.jar:1.17.1-143]
>       at 
> org.apache.flink.streaming.api.operators.AbstractStreamOperator.initializeState(AbstractStreamOperator.java:256)
>  ~[flink-dist-1.17.1-143.jar:1.17.1-143]
>       at 
> org.apache.flink.streaming.runtime.tasks.RegularOperatorChain.initializeStateAndOpenOperators(RegularOperatorChain.java:106)
>  ~[flink-dist-1.17.1-143.jar:1.17.1-143]
>       at 
> org.apache.flink.streaming.runtime.tasks.StreamTask.restoreGates(StreamTask.java:735)
>  ~[flink-dist-1.17.1-143.jar:1.17.1-143]
>       at 
> org.apache.flink.streaming.runtime.tasks.StreamTaskActionExecutor$1.call(StreamTaskActionExecutor.java:55)
>  ~[flink-dist-1.17.1-143.jar:1.17.1-143]
>       at 
> org.apache.flink.streaming.runtime.tasks.StreamTask.restoreInternal(StreamTask.java:710)
>  ~[flink-dist-1.17.1-143.jar:1.17.1-143]
>       at 
> org.apache.flink.streaming.runtime.tasks.StreamTask.restore(StreamTask.java:676)
>  ~[flink-dist-1.17.1-143.jar:1.17.1-143]
>       at 
> org.apache.flink.runtime.taskmanager.Task.runWithSystemExitMonitoring(Task.java:952)
>  [flink-runtime-1.17.1-143.jar:1.17.1-143]
>       at 
> org.apache.flink.runtime.taskmanager.Task.restoreAndInvoke(Task.java:921) 
> [flink-runtime-1.17.1-143.jar:1.17.1-143]
>       at org.apache.flink.runtime.taskmanager.Task.doRun(Task.java:745) 
> [flink-runtime-1.17.1-143.jar:1.17.1-143]
>       at org.apache.flink.runtime.taskmanager.Task.run(Task.java:562) 
> [flink-runtime-1.17.1-143.jar:1.17.1-143]
>       at java.lang.Thread.run(Thread.java:829) [?:?]
> 2023-10-31 18:25:05,182 WARN  
> org.apache.flink.streaming.api.operators.BackendRestorerProcedure [] - 
> Exception while restoring keyed state backend for 
> KeyedProcessOperator_353a6b34b8b7f1c1d0fb4616d911049c_(1/2) from alternative 
> (1/2), will retry while more alternatives are available.
> org.apache.flink.runtime.state.BackendBuildingException: Caught unexpected 
> exception.
>       at 
> org.apache.flink.contrib.streaming.state.RocksDBKeyedStateBackendBuilder.build(RocksDBKeyedStateBackendBuilder.java:407)
>  ~[flink-dist-1.17.1-143.jar:1.17.1-143]
>       at 
> org.apache.flink.contrib.streaming.state.EmbeddedRocksDBStateBackend.createKeyedStateBackend(EmbeddedRocksDBStateBackend.java:512)
>  ~[flink-dist-1.17.1-143.jar:1.17.1-143]
>       at 
> org.apache.flink.contrib.streaming.state.EmbeddedRocksDBStateBackend.createKeyedStateBackend(EmbeddedRocksDBStateBackend.java:99)
>  ~[flink-dist-1.17.1-143.jar:1.17.1-143]
>       at 
> org.apache.flink.streaming.api.operators.StreamTaskStateInitializerImpl.lambda$keyedStatedBackend$1(StreamTaskStateInitializerImpl.java:338)
>  ~[flink-dist-1.17.1-143.jar:1.17.1-143]
>       at 
> org.apache.flink.streaming.api.operators.BackendRestorerProcedure.attemptCreateAndRestore(BackendRestorerProcedure.java:168)
>  ~[flink-dist-1.17.1-143.jar:1.17.1-143]
>       at 
> org.apache.flink.streaming.api.operators.BackendRestorerProcedure.createAndRestore(BackendRestorerProcedure.java:135)
>  ~[flink-dist-1.17.1-143.jar:1.17.1-143]
>       at 
> org.apache.flink.streaming.api.operators.StreamTaskStateInitializerImpl.keyedStatedBackend(StreamTaskStateInitializerImpl.java:355)
>  ~[flink-dist-1.17.1-143.jar:1.17.1-143]
>       at 
> org.apache.flink.streaming.api.operators.StreamTaskStateInitializerImpl.streamOperatorStateContext(StreamTaskStateInitializerImpl.java:166)
>  ~[flink-dist-1.17.1-143.jar:1.17.1-143]
>       at 
> org.apache.flink.streaming.api.operators.AbstractStreamOperator.initializeState(AbstractStreamOperator.java:256)
>  ~[flink-dist-1.17.1-143.jar:1.17.1-143]
>       at 
> org.apache.flink.streaming.runtime.tasks.RegularOperatorChain.initializeStateAndOpenOperators(RegularOperatorChain.java:106)
>  ~[flink-dist-1.17.1-143.jar:1.17.1-143]
>       at 
> org.apache.flink.streaming.runtime.tasks.StreamTask.restoreGates(StreamTask.java:735)
>  ~[flink-dist-1.17.1-143.jar:1.17.1-143]
>       at 
> org.apache.flink.streaming.runtime.tasks.StreamTaskActionExecutor$1.call(StreamTaskActionExecutor.java:55)
>  ~[flink-dist-1.17.1-143.jar:1.17.1-143]
>       at 
> org.apache.flink.streaming.runtime.tasks.StreamTask.restoreInternal(StreamTask.java:710)
>  ~[flink-dist-1.17.1-143.jar:1.17.1-143]
>       at 
> org.apache.flink.streaming.runtime.tasks.StreamTask.restore(StreamTask.java:676)
>  ~[flink-dist-1.17.1-143.jar:1.17.1-143]
>       at 
> org.apache.flink.runtime.taskmanager.Task.runWithSystemExitMonitoring(Task.java:952)
>  [flink-runtime-1.17.1-143.jar:1.17.1-143]
>       at 
> org.apache.flink.runtime.taskmanager.Task.restoreAndInvoke(Task.java:921) 
> [flink-runtime-1.17.1-143.jar:1.17.1-143]
>       at org.apache.flink.runtime.taskmanager.Task.doRun(Task.java:745) 
> [flink-runtime-1.17.1-143.jar:1.17.1-143]
>       at org.apache.flink.runtime.taskmanager.Task.run(Task.java:562) 
> [flink-runtime-1.17.1-143.jar:1.17.1-143]
>       at java.lang.Thread.run(Thread.java:829) [?:?]
> Caused by: java.lang.UnsupportedOperationException
>       at java.util.Collections$1.remove(Collections.java:4714) ~[?:?]
>       at java.util.AbstractCollection.remove(AbstractCollection.java:299) 
> ~[?:?]
>       at 
> org.apache.flink.runtime.checkpoint.StateObjectCollection.remove(StateObjectCollection.java:105)
>  ~[flink-runtime-1.17.1-143.jar:1.17.1-143]
>       at 
> org.apache.flink.contrib.streaming.state.restore.RocksDBIncrementalRestoreOperation.restoreWithRescaling(RocksDBIncrementalRestoreOperation.java:294)
>  ~[flink-dist-1.17.1-143.jar:1.17.1-143]
>       at 
> org.apache.flink.contrib.streaming.state.restore.RocksDBIncrementalRestoreOperation.restore(RocksDBIncrementalRestoreOperation.java:167)
>  ~[flink-dist-1.17.1-143.jar:1.17.1-143]
>       at 
> org.apache.flink.contrib.streaming.state.RocksDBKeyedStateBackendBuilder.build(RocksDBKeyedStateBackendBuilder.java:327)
>  ~[flink-dist-1.17.1-143.jar:1.17.1-143]
>       ... 18 more
> {code}
> presumably on upscaling.
> The job continues to recover (using the remote state).
> The issue occurs on 1.17 and should be fixed in 1.18 and master.



--
This message was sent by Atlassian Jira
(v8.20.10#820010)

Reply via email to