Hi
I am using flink retained check points and along with
 jobs/:jobid/checkpoints API for retrieving the latest retained check point
Following the response of Flink Checkpoints API:

I have my jobs restart attempts are 5
 check point API response in "latest" key, check point file name of both
"restored" and "completed" values are having following behavior
1)Suppose the job is failed 3 times and recovered 4'th time, then both
values are same
2)Suppose the job is failed 4 times and recovered 5'th time, then both
values are same
3)Suppose the job is failed 5 times and recovered 6'th time, then both
values are same
4) Suppose the job is failed all 6 times and the job marked failed. then
also both the values are same
5)Suppose job is failed 6'th time , after recovering from 5 attempts
and made few check points, then both values are different.

During case (1), case (2), case (3) and case (4) i never had any issue.
Only When case (5) i had severe issue in my production as the "restored "
field check point doesn't exist

Please suggest any



{
   "counts":{
      "restored":6,
      "total":3,
      "in_progress":0,
      "completed":3,
      "failed":0
   },
   "summary":{
      "state_size":{
         "min":4879,
         "max":4879,
         "avg":4879
      },
      "end_to_end_duration":{
         "min":25,
         "max":130,
         "avg":87
      },
      "alignment_buffered":{
         "min":0,
         "max":0,
         "avg":0
      }
   },
   "latest":{
      "completed":{
         "@class":"completed",
         "id":7094,
         "status":"COMPLETED",
         "is_savepoint":false,
         "trigger_timestamp":1590382502772,
         "latest_ack_timestamp":1590382502902,
         "state_size":4879,
         "end_to_end_duration":130,
         "alignment_buffered":0,
         "num_subtasks":2,
         "num_acknowledged_subtasks":2,
         "tasks":{

         },

 
"external_path":"file:/var/lib/persist/flink/checkpoints/29ae7600aa4f7d53a0dc1a0a7b257c85/chk-7094",
         "discarded":false
      },
      "savepoint":null,
      "failed":null,
      "restored":{
         "id":7093,
         "restore_timestamp":1590382478448,
         "is_savepoint":false,

 
"external_path":"file:/var/lib/persist/flink/checkpoints/29ae7600aa4f7d53a0dc1a0a7b257c85/chk-7093"
      }
   },
   "history":[
      {
         "@class":"completed",
         "id":7094,
         "status":"COMPLETED",
         "is_savepoint":false,
         "trigger_timestamp":1590382502772,
         "latest_ack_timestamp":1590382502902,
         "state_size":4879,
         "end_to_end_duration":130,
         "alignment_buffered":0,
         "num_subtasks":2,
         "num_acknowledged_subtasks":2,
         "tasks":{

         },

 
"external_path":"file:/var/lib/persist/flink/checkpoints/29ae7600aa4f7d53a0dc1a0a7b257c85/chk-7094",
         "discarded":false
      },
      {
         "@class":"completed",
         "id":7093,
         "status":"COMPLETED",
         "is_savepoint":false,
         "trigger_timestamp":1590382310195,
         "latest_ack_timestamp":1590382310220,
         "state_size":4879,
         "end_to_end_duration":25,
         "alignment_buffered":0,
         "num_subtasks":2,
         "num_acknowledged_subtasks":2,
         "tasks":{

         },

 
"external_path":"file:/var/lib/persist/flink/checkpoints/29ae7600aa4f7d53a0dc1a0a7b257c85/chk-7093",
         "discarded":false
      },
      {
         "@class":"completed",
         "id":7092,
         "status":"COMPLETED",
         "is_savepoint":false,
         "trigger_timestamp":1590382190195,
         "latest_ack_timestamp":1590382190303,
         "state_size":4879,
         "end_to_end_duration":108,
         "alignment_buffered":0,
         "num_subtasks":2,
         "num_acknowledged_subtasks":2,
         "tasks":{

         },

 
"external_path":"file:/var/lib/persist/flink/checkpoints/29ae7600aa4f7d53a0dc1a0a7b257c85/chk-7092",
         "discarded":true
      }
   ]
}

Reply via email to