Hi I am using flink retained check points and along with jobs/:jobid/checkpoints API for retrieving the latest retained check point Following the response of Flink Checkpoints API:
I have my jobs restart attempts are 5 check point API response in "latest" key, check point file name of both "restored" and "completed" values are having following behavior 1)Suppose the job is failed 3 times and recovered 4'th time, then both values are same 2)Suppose the job is failed 4 times and recovered 5'th time, then both values are same 3)Suppose the job is failed 5 times and recovered 6'th time, then both values are same 4) Suppose the job is failed all 6 times and the job marked failed. then also both the values are same 5)Suppose job is failed 6'th time , after recovering from 5 attempts and made few check points, then both values are different. During case (1), case (2), case (3) and case (4) i never had any issue. Only When case (5) i had severe issue in my production as the "restored " field check point doesn't exist Please suggest any { "counts":{ "restored":6, "total":3, "in_progress":0, "completed":3, "failed":0 }, "summary":{ "state_size":{ "min":4879, "max":4879, "avg":4879 }, "end_to_end_duration":{ "min":25, "max":130, "avg":87 }, "alignment_buffered":{ "min":0, "max":0, "avg":0 } }, "latest":{ "completed":{ "@class":"completed", "id":7094, "status":"COMPLETED", "is_savepoint":false, "trigger_timestamp":1590382502772, "latest_ack_timestamp":1590382502902, "state_size":4879, "end_to_end_duration":130, "alignment_buffered":0, "num_subtasks":2, "num_acknowledged_subtasks":2, "tasks":{ }, "external_path":"file:/var/lib/persist/flink/checkpoints/29ae7600aa4f7d53a0dc1a0a7b257c85/chk-7094", "discarded":false }, "savepoint":null, "failed":null, "restored":{ "id":7093, "restore_timestamp":1590382478448, "is_savepoint":false, "external_path":"file:/var/lib/persist/flink/checkpoints/29ae7600aa4f7d53a0dc1a0a7b257c85/chk-7093" } }, "history":[ { "@class":"completed", "id":7094, "status":"COMPLETED", "is_savepoint":false, "trigger_timestamp":1590382502772, "latest_ack_timestamp":1590382502902, "state_size":4879, "end_to_end_duration":130, "alignment_buffered":0, "num_subtasks":2, "num_acknowledged_subtasks":2, "tasks":{ }, "external_path":"file:/var/lib/persist/flink/checkpoints/29ae7600aa4f7d53a0dc1a0a7b257c85/chk-7094", "discarded":false }, { "@class":"completed", "id":7093, "status":"COMPLETED", "is_savepoint":false, "trigger_timestamp":1590382310195, "latest_ack_timestamp":1590382310220, "state_size":4879, "end_to_end_duration":25, "alignment_buffered":0, "num_subtasks":2, "num_acknowledged_subtasks":2, "tasks":{ }, "external_path":"file:/var/lib/persist/flink/checkpoints/29ae7600aa4f7d53a0dc1a0a7b257c85/chk-7093", "discarded":false }, { "@class":"completed", "id":7092, "status":"COMPLETED", "is_savepoint":false, "trigger_timestamp":1590382190195, "latest_ack_timestamp":1590382190303, "state_size":4879, "end_to_end_duration":108, "alignment_buffered":0, "num_subtasks":2, "num_acknowledged_subtasks":2, "tasks":{ }, "external_path":"file:/var/lib/persist/flink/checkpoints/29ae7600aa4f7d53a0dc1a0a7b257c85/chk-7092", "discarded":true } ] }