[ 
https://issues.apache.org/jira/browse/FLINK-20615?page=com.atlassian.jira.plugin.system.issuetabpanels:all-tabpanel
 ]

Xintong Song updated FLINK-20615:
---------------------------------
    Fix Version/s:     (was: 1.12.2)
                   1.12.1

> Local recovery and sticky scheduling end-to-end test timeout with 
> "IOException: Stream Closed"
> ----------------------------------------------------------------------------------------------
>
>                 Key: FLINK-20615
>                 URL: https://issues.apache.org/jira/browse/FLINK-20615
>             Project: Flink
>          Issue Type: Bug
>          Components: Runtime / State Backends, Runtime / Task
>    Affects Versions: 1.12.0, 1.13.0
>            Reporter: Huang Xingbo
>            Assignee: Till Rohrmann
>            Priority: Critical
>              Labels: pull-request-available, test-stability
>             Fix For: 1.13.0, 1.12.1
>
>
> [https://dev.azure.com/apache-flink/apache-flink/_build/results?buildId=10905&view=logs&j=6caf31d6-847a-526e-9624-468e053467d6&t=0b23652f-b18b-5b6e-6eb6-a11070364610]
> It tried to restart many times, and the final error was following:
> {code:java}
> 2020-12-15T23:54:00.5067862Z Dec 15 23:53:42 2020-12-15 23:53:41,538 ERROR 
> org.apache.flink.contrib.streaming.state.RocksDBKeyedStateBackendBuilder [] - 
> Caught unexpected exception.
> 2020-12-15T23:54:00.5068392Z Dec 15 23:53:42 java.io.IOException: Stream 
> Closed
> 2020-12-15T23:54:00.5068767Z Dec 15 23:53:42  at 
> java.io.FileInputStream.readBytes(Native Method) ~[?:?]
> 2020-12-15T23:54:00.5069223Z Dec 15 23:53:42  at 
> java.io.FileInputStream.read(FileInputStream.java:279) ~[?:?]
> 2020-12-15T23:54:00.5070150Z Dec 15 23:53:42  at 
> org.apache.flink.core.fs.local.LocalDataInputStream.read(LocalDataInputStream.java:73)
>  ~[flink-dist_2.11-1.13-SNAPSHOT.jar:1.13-SNAPSHOT]
> 2020-12-15T23:54:00.5071217Z Dec 15 23:53:42  at 
> org.apache.flink.core.fs.FSDataInputStreamWrapper.read(FSDataInputStreamWrapper.java:61)
>  ~[flink-dist_2.11-1.13-SNAPSHOT.jar:1.13-SNAPSHOT]
> 2020-12-15T23:54:00.5072295Z Dec 15 23:53:42  at 
> org.apache.flink.runtime.util.ForwardingInputStream.read(ForwardingInputStream.java:51)
>  ~[flink-dist_2.11-1.13-SNAPSHOT.jar:1.13-SNAPSHOT]
> 2020-12-15T23:54:00.5072967Z Dec 15 23:53:42  at 
> java.io.DataInputStream.readFully(DataInputStream.java:200) ~[?:?]
> 2020-12-15T23:54:00.5073483Z Dec 15 23:53:42  at 
> java.io.DataInputStream.readFully(DataInputStream.java:170) ~[?:?]
> 2020-12-15T23:54:00.5074535Z Dec 15 23:53:42  at 
> org.apache.flink.api.common.typeutils.base.array.BytePrimitiveArraySerializer.deserialize(BytePrimitiveArraySerializer.java:85)
>  ~[flink-dist_2.11-1.13-SNAPSHOT.jar:1.13-SNAPSHOT]
> 2020-12-15T23:54:00.5075847Z Dec 15 23:53:42  at 
> org.apache.flink.contrib.streaming.state.restore.RocksDBFullRestoreOperation.restoreKVStateData(RocksDBFullRestoreOperation.java:222)
>  ~[flink-dist_2.11-1.13-SNAPSHOT.jar:1.13-SNAPSHOT]
> 2020-12-15T23:54:00.5077187Z Dec 15 23:53:42  at 
> org.apache.flink.contrib.streaming.state.restore.RocksDBFullRestoreOperation.restoreKeyGroupsInStateHandle(RocksDBFullRestoreOperation.java:169)
>  ~[flink-dist_2.11-1.13-SNAPSHOT.jar:1.13-SNAPSHOT]
> 2020-12-15T23:54:00.5078495Z Dec 15 23:53:42  at 
> org.apache.flink.contrib.streaming.state.restore.RocksDBFullRestoreOperation.restore(RocksDBFullRestoreOperation.java:152)
>  ~[flink-dist_2.11-1.13-SNAPSHOT.jar:1.13-SNAPSHOT]
> 2020-12-15T23:54:00.5079802Z Dec 15 23:53:42  at 
> org.apache.flink.contrib.streaming.state.RocksDBKeyedStateBackendBuilder.build(RocksDBKeyedStateBackendBuilder.java:269)
>  ~[flink-dist_2.11-1.13-SNAPSHOT.jar:1.13-SNAPSHOT]
> 2020-12-15T23:54:00.5081013Z Dec 15 23:53:42  at 
> org.apache.flink.contrib.streaming.state.RocksDBStateBackend.createKeyedStateBackend(RocksDBStateBackend.java:565)
>  ~[flink-dist_2.11-1.13-SNAPSHOT.jar:1.13-SNAPSHOT]
> 2020-12-15T23:54:00.5082215Z Dec 15 23:53:42  at 
> org.apache.flink.contrib.streaming.state.RocksDBStateBackend.createKeyedStateBackend(RocksDBStateBackend.java:94)
>  ~[flink-dist_2.11-1.13-SNAPSHOT.jar:1.13-SNAPSHOT]
> 2020-12-15T23:54:00.5083500Z Dec 15 23:53:42  at 
> org.apache.flink.streaming.api.operators.StreamTaskStateInitializerImpl.lambda$keyedStatedBackend$1(StreamTaskStateInitializerImpl.java:299)
>  ~[flink-dist_2.11-1.13-SNAPSHOT.jar:1.13-SNAPSHOT]
> 2020-12-15T23:54:00.5084899Z Dec 15 23:53:42  at 
> org.apache.flink.streaming.api.operators.BackendRestorerProcedure.attemptCreateAndRestore(BackendRestorerProcedure.java:142)
>  ~[flink-dist_2.11-1.13-SNAPSHOT.jar:1.13-SNAPSHOT]
> 2020-12-15T23:54:00.5086342Z Dec 15 23:53:42  at 
> org.apache.flink.streaming.api.operators.BackendRestorerProcedure.createAndRestore(BackendRestorerProcedure.java:121)
>  ~[flink-dist_2.11-1.13-SNAPSHOT.jar:1.13-SNAPSHOT]
> 2020-12-15T23:54:00.5087601Z Dec 15 23:53:42  at 
> org.apache.flink.streaming.api.operators.StreamTaskStateInitializerImpl.keyedStatedBackend(StreamTaskStateInitializerImpl.java:316)
>  ~[flink-dist_2.11-1.13-SNAPSHOT.jar:1.13-SNAPSHOT]
> 2020-12-15T23:54:00.5088924Z Dec 15 23:53:42  at 
> org.apache.flink.streaming.api.operators.StreamTaskStateInitializerImpl.streamOperatorStateContext(StreamTaskStateInitializerImpl.java:155)
>  ~[flink-dist_2.11-1.13-SNAPSHOT.jar:1.13-SNAPSHOT]
> 2020-12-15T23:54:00.5090261Z Dec 15 23:53:42  at 
> org.apache.flink.streaming.api.operators.AbstractStreamOperator.initializeState(AbstractStreamOperator.java:248)
>  ~[flink-dist_2.11-1.13-SNAPSHOT.jar:1.13-SNAPSHOT]
> 2020-12-15T23:54:00.5091459Z Dec 15 23:53:42  at 
> org.apache.flink.streaming.runtime.tasks.OperatorChain.initializeStateAndOpenOperators(OperatorChain.java:400)
>  ~[flink-dist_2.11-1.13-SNAPSHOT.jar:1.13-SNAPSHOT]
> 2020-12-15T23:54:00.5092604Z Dec 15 23:53:42  at 
> org.apache.flink.streaming.runtime.tasks.StreamTask.lambda$beforeInvoke$2(StreamTask.java:507)
>  ~[flink-dist_2.11-1.13-SNAPSHOT.jar:1.13-SNAPSHOT]
> 2020-12-15T23:54:00.5093748Z Dec 15 23:53:42  at 
> org.apache.flink.streaming.runtime.tasks.StreamTaskActionExecutor$1.runThrowing(StreamTaskActionExecutor.java:47)
>  [flink-dist_2.11-1.13-SNAPSHOT.jar:1.13-SNAPSHOT]
> 2020-12-15T23:54:00.5094866Z Dec 15 23:53:42  at 
> org.apache.flink.streaming.runtime.tasks.StreamTask.beforeInvoke(StreamTask.java:501)
>  [flink-dist_2.11-1.13-SNAPSHOT.jar:1.13-SNAPSHOT]
> 2020-12-15T23:54:00.5095912Z Dec 15 23:53:42  at 
> org.apache.flink.streaming.runtime.tasks.StreamTask.invoke(StreamTask.java:531)
>  [flink-dist_2.11-1.13-SNAPSHOT.jar:1.13-SNAPSHOT]
> 2020-12-15T23:54:00.5096875Z Dec 15 23:53:42  at 
> org.apache.flink.runtime.taskmanager.Task.doRun(Task.java:722) 
> [flink-dist_2.11-1.13-SNAPSHOT.jar:1.13-SNAPSHOT]
> 2020-12-15T23:54:00.5097814Z Dec 15 23:53:42  at 
> org.apache.flink.runtime.taskmanager.Task.run(Task.java:547) 
> [flink-dist_2.11-1.13-SNAPSHOT.jar:1.13-SNAPSHOT]
> 2020-12-15T23:54:00.5098373Z Dec 15 23:53:42  at 
> java.lang.Thread.run(Thread.java:834) [?:?]
> 2020-12-15T23:54:00.5099549Z Dec 15 23:53:42 2020-12-15 23:53:41,557 WARN  
> org.apache.flink.streaming.api.operators.BackendRestorerProcedure [] - 
> Exception while restoring keyed state backend for 
> StreamFlatMap_20ba6b65f97481d5570070de90e4e791_(1/4) from alternative (1/1), 
> will retry while more alternatives are available.
> 2020-12-15T23:54:00.5100556Z Dec 15 23:53:42 
> org.apache.flink.runtime.state.BackendBuildingException: Caught unexpected 
> exception.
> 2020-12-15T23:54:00.5101480Z Dec 15 23:53:42  at 
> org.apache.flink.contrib.streaming.state.RocksDBKeyedStateBackendBuilder.build(RocksDBKeyedStateBackendBuilder.java:328)
>  ~[flink-dist_2.11-1.13-SNAPSHOT.jar:1.13-SNAPSHOT]
> 2020-12-15T23:54:00.5102669Z Dec 15 23:53:42  at 
> org.apache.flink.contrib.streaming.state.RocksDBStateBackend.createKeyedStateBackend(RocksDBStateBackend.java:565)
>  ~[flink-dist_2.11-1.13-SNAPSHOT.jar:1.13-SNAPSHOT]
> 2020-12-15T23:54:00.5103763Z Dec 15 23:53:42  at 
> org.apache.flink.contrib.streaming.state.RocksDBStateBackend.createKeyedStateBackend(RocksDBStateBackend.java:94)
>  ~[flink-dist_2.11-1.13-SNAPSHOT.jar:1.13-SNAPSHOT]
> 2020-12-15T23:54:00.5104723Z Dec 15 23:53:42  at 
> org.apache.flink.streaming.api.operators.StreamTaskStateInitializerImpl.lambda$keyedStatedBackend$1(StreamTaskStateInitializerImpl.java:299)
>  ~[flink-dist_2.11-1.13-SNAPSHOT.jar:1.13-SNAPSHOT]
> 2020-12-15T23:54:00.5105700Z Dec 15 23:53:42  at 
> org.apache.flink.streaming.api.operators.BackendRestorerProcedure.attemptCreateAndRestore(BackendRestorerProcedure.java:142)
>  ~[flink-dist_2.11-1.13-SNAPSHOT.jar:1.13-SNAPSHOT]
> 2020-12-15T23:54:00.5106630Z Dec 15 23:53:42  at 
> org.apache.flink.streaming.api.operators.BackendRestorerProcedure.createAndRestore(BackendRestorerProcedure.java:121)
>  ~[flink-dist_2.11-1.13-SNAPSHOT.jar:1.13-SNAPSHOT]
> 2020-12-15T23:54:00.5107587Z Dec 15 23:53:42  at 
> org.apache.flink.streaming.api.operators.StreamTaskStateInitializerImpl.keyedStatedBackend(StreamTaskStateInitializerImpl.java:316)
>  ~[flink-dist_2.11-1.13-SNAPSHOT.jar:1.13-SNAPSHOT]
> 2020-12-15T23:54:00.5108581Z Dec 15 23:53:42  at 
> org.apache.flink.streaming.api.operators.StreamTaskStateInitializerImpl.streamOperatorStateContext(StreamTaskStateInitializerImpl.java:155)
>  ~[flink-dist_2.11-1.13-SNAPSHOT.jar:1.13-SNAPSHOT]
> 2020-12-15T23:54:00.5109505Z Dec 15 23:53:42  at 
> org.apache.flink.streaming.api.operators.AbstractStreamOperator.initializeState(AbstractStreamOperator.java:248)
>  ~[flink-dist_2.11-1.13-SNAPSHOT.jar:1.13-SNAPSHOT]
> 2020-12-15T23:54:00.5110456Z Dec 15 23:53:42  at 
> org.apache.flink.streaming.runtime.tasks.OperatorChain.initializeStateAndOpenOperators(OperatorChain.java:400)
>  ~[flink-dist_2.11-1.13-SNAPSHOT.jar:1.13-SNAPSHOT]
> 2020-12-15T23:54:00.5111316Z Dec 15 23:53:42  at 
> org.apache.flink.streaming.runtime.tasks.StreamTask.lambda$beforeInvoke$2(StreamTask.java:507)
>  ~[flink-dist_2.11-1.13-SNAPSHOT.jar:1.13-SNAPSHOT]
> 2020-12-15T23:54:00.5112175Z Dec 15 23:53:42  at 
> org.apache.flink.streaming.runtime.tasks.StreamTaskActionExecutor$1.runThrowing(StreamTaskActionExecutor.java:47)
>  [flink-dist_2.11-1.13-SNAPSHOT.jar:1.13-SNAPSHOT]
> 2020-12-15T23:54:00.5113012Z Dec 15 23:53:42  at 
> org.apache.flink.streaming.runtime.tasks.StreamTask.beforeInvoke(StreamTask.java:501)
>  [flink-dist_2.11-1.13-SNAPSHOT.jar:1.13-SNAPSHOT]
> 2020-12-15T23:54:00.5113787Z Dec 15 23:53:42  at 
> org.apache.flink.streaming.runtime.tasks.StreamTask.invoke(StreamTask.java:531)
>  [flink-dist_2.11-1.13-SNAPSHOT.jar:1.13-SNAPSHOT]
> 2020-12-15T23:54:00.5114521Z Dec 15 23:53:42  at 
> org.apache.flink.runtime.taskmanager.Task.doRun(Task.java:722) 
> [flink-dist_2.11-1.13-SNAPSHOT.jar:1.13-SNAPSHOT]
> 2020-12-15T23:54:00.5115209Z Dec 15 23:53:42  at 
> org.apache.flink.runtime.taskmanager.Task.run(Task.java:547) 
> [flink-dist_2.11-1.13-SNAPSHOT.jar:1.13-SNAPSHOT]
> 2020-12-15T23:54:00.5115635Z Dec 15 23:53:42  at 
> java.lang.Thread.run(Thread.java:834) [?:?]
> 2020-12-15T23:54:00.5115949Z Dec 15 23:53:42 Caused by: java.io.IOException: 
> Stream Closed
> 2020-12-15T23:54:00.5116246Z Dec 15 23:53:42  at 
> java.io.FileInputStream.readBytes(Native Method) ~[?:?]
> 2020-12-15T23:54:00.5116589Z Dec 15 23:53:42  at 
> java.io.FileInputStream.read(FileInputStream.java:279) ~[?:?]
> 2020-12-15T23:54:00.5117284Z Dec 15 23:53:42  at 
> org.apache.flink.core.fs.local.LocalDataInputStream.read(LocalDataInputStream.java:73)
>  ~[flink-dist_2.11-1.13-SNAPSHOT.jar:1.13-SNAPSHOT]
> 2020-12-15T23:54:00.5118080Z Dec 15 23:53:42  at 
> org.apache.flink.core.fs.FSDataInputStreamWrapper.read(FSDataInputStreamWrapper.java:61)
>  ~[flink-dist_2.11-1.13-SNAPSHOT.jar:1.13-SNAPSHOT]
> 2020-12-15T23:54:00.5118894Z Dec 15 23:53:42  at 
> org.apache.flink.runtime.util.ForwardingInputStream.read(ForwardingInputStream.java:51)
>  ~[flink-dist_2.11-1.13-SNAPSHOT.jar:1.13-SNAPSHOT]
> 2020-12-15T23:54:00.5119392Z Dec 15 23:53:42  at 
> java.io.DataInputStream.readFully(DataInputStream.java:200) ~[?:?]
> 2020-12-15T23:54:00.5119808Z Dec 15 23:53:42  at 
> java.io.DataInputStream.readFully(DataInputStream.java:170) ~[?:?]
> 2020-12-15T23:54:00.5120605Z Dec 15 23:53:42  at 
> org.apache.flink.api.common.typeutils.base.array.BytePrimitiveArraySerializer.deserialize(BytePrimitiveArraySerializer.java:85)
>  ~[flink-dist_2.11-1.13-SNAPSHOT.jar:1.13-SNAPSHOT]
> 2020-12-15T23:54:00.5121576Z Dec 15 23:53:42  at 
> org.apache.flink.contrib.streaming.state.restore.RocksDBFullRestoreOperation.restoreKVStateData(RocksDBFullRestoreOperation.java:222)
>  ~[flink-dist_2.11-1.13-SNAPSHOT.jar:1.13-SNAPSHOT]
> 2020-12-15T23:54:00.5122579Z Dec 15 23:53:42  at 
> org.apache.flink.contrib.streaming.state.restore.RocksDBFullRestoreOperation.restoreKeyGroupsInStateHandle(RocksDBFullRestoreOperation.java:169)
>  ~[flink-dist_2.11-1.13-SNAPSHOT.jar:1.13-SNAPSHOT]
> 2020-12-15T23:54:00.5123543Z Dec 15 23:53:42  at 
> org.apache.flink.contrib.streaming.state.restore.RocksDBFullRestoreOperation.restore(RocksDBFullRestoreOperation.java:152)
>  ~[flink-dist_2.11-1.13-SNAPSHOT.jar:1.13-SNAPSHOT]
> 2020-12-15T23:54:00.5124476Z Dec 15 23:53:42  at 
> org.apache.flink.contrib.streaming.state.RocksDBKeyedStateBackendBuilder.build(RocksDBKeyedStateBackendBuilder.java:269)
>  ~[flink-dist_2.11-1.13-SNAPSHOT.jar:1.13-SNAPSHOT]
> 2020-12-15T23:54:00.5124994Z Dec 15 23:53:42  ... 16 more
> {code}
>  



--
This message was sent by Atlassian Jira
(v8.3.4#803005)

Reply via email to