[ https://issues.apache.org/jira/browse/FLINK-10299?page=com.atlassian.jira.plugin.system.issuetabpanels:comment-tabpanel&focusedCommentId=16608810#comment-16608810 ]
ambition edited comment on FLINK-10299 at 9/10/18 12:51 PM: ------------------------------------------------------------ Sorry,The past two days are not workdays. I briefly describe the complete process. Flink consuming Kafka captured user app data and some value is error,like "-". The sample data: {code:java} {"event_id": "10001","uid":"1561529398","timestamp": "1536288421", "viewport_height": "667","viewport_width": "375","language":"zh-CN"} {"event_id": "1002","uid":"1561529398","timestamp": "-", "viewport_height": "667","viewport_width": "375","language":"zh-CN" } {"event_id": "1003","uid":"1561529398","timestamp": "1536288421", "viewport_height": "667","viewport_width": "-" ,"language":"zh-CN"} {code} Flink Job code: {code:java} public class UserDataSQL { public static void main(String[] args) throws Exception { StreamExecutionEnvironment execEnv = StreamExecutionEnvironment.createLocalEnvironment(); execEnv.getCheckpointConfig().setCheckpointingMode(CheckpointingMode.EXACTLY_ONCE); execEnv.getCheckpointConfig().setCheckpointInterval(Long.valueOf(5000)); execEnv.getCheckpointConfig().enableExternalizedCheckpoints(CheckpointConfig.ExternalizedCheckpointCleanup.RETAIN_ON_CANCELLATION); execEnv.setRestartStrategy(RestartStrategies.fixedDelayRestart(3,10000)); FsStateBackend stateBackend = new FsStateBackend("hdfs:/flink/flink-checkpoints"); execEnv.setStateBackend(stateBackend); StreamTableEnvironment env = StreamTableEnvironment.getTableEnvironment(execEnv); Map<String,String> schemaMap = new LinkedHashMap<>(); schemaMap.put("event_id","Integer"); schemaMap.put("uid","Long"); schemaMap.put("timestamp","Timestamp"); schemaMap.put("viewport_height","Integer"); schemaMap.put("viewport_width","Integer"); schemaMap.put("language","String"); TableSchema tableSchema = new TableSchema( schemaMap.keySet().toArray(new String[schemaMap.size()]), schemaMap.values().toArray(new TypeInformation<?>[schemaMap.size()]) ); Properties kafkaProps = new Properties(); kafkaProps.setProperty("bootstrap.servers","xxx:9092"); kafkaProps.setProperty("topic","topic"); kafkaProps.setProperty("enable.auto.commit","true"); kafkaProps.setProperty("group.id","flink_group"); Kafka010JsonTableSource kafka010JsonTableSource = new Kafka010JsonTableSource("topic", kafkaProps, tableSchema, tableSchema); kafka010JsonTableSource.setProctimeAttribute("timestamp"); env.registerTableSource("user_data",kafka010JsonTableSource); env.registerTableSink("user_count",new MysqlTableUpsertSink()); env.sqlUpdate("inset into user_count select count(uid) as uv,event_id from user_data group by event_id"); execEnv.execute(); } public static class MysqlTableUpsertSink implements UpsertStreamTableSink<Row> { //omit other code } public static class UserData { public Integer event_id; public Long uid; public Timestamp timestamp; public Integer viewport_height; public Integer viewport_width; public String language; //omit other code } {code} Use checkpoint function,if data contains error value, job Shutting down, Could not restart this job. Now have two ways can restart this job: 1. FsStateBackend on hdfs data deleted 2. error value set null, like I provide picture Is the a batter way to record error data without affecting checkpoint function. thanks was (Author: ambition): Sorry,The past two days are not workdays. I briefly describe the complete process. Flink consuming Kafka captured user app data and some value is error,like "-". The sample data: {code:java} {"event_id": "10001","uid":"1561529398","timestamp": "1536288421", "viewport_height": "667","viewport_width": "375","language":"zh-CN"} {"event_id": "1002","uid":"1561529398","timestamp": "-", "viewport_height": "667","viewport_width": "375","language":"zh-CN" } {"event_id": "1003","uid":"1561529398","timestamp": "1536288421", "viewport_height": "667","viewport_width": "-" ,"language":"zh-CN"} {code} Flink Job code: {code:java} public class UserDataSQL { public static void main(String[] args) throws Exception { StreamExecutionEnvironment execEnv = StreamExecutionEnvironment.createLocalEnvironment(); execEnv.getCheckpointConfig().setCheckpointingMode(CheckpointingMode.EXACTLY_ONCE); execEnv.getCheckpointConfig().setCheckpointInterval(Long.valueOf(5000)); execEnv.getCheckpointConfig().enableExternalizedCheckpoints(CheckpointConfig.ExternalizedCheckpointCleanup.RETAIN_ON_CANCELLATION); execEnv.setRestartStrategy(RestartStrategies.fixedDelayRestart(3,10000)); FsStateBackend stateBackend = new FsStateBackend("hdfs:/flink/flink-checkpoints"); execEnv.setStateBackend(stateBackend); StreamTableEnvironment env = StreamTableEnvironment.getTableEnvironment(execEnv); Map<String,String> schemaMap = new LinkedHashMap<>(); schemaMap.put("event_id","Integer"); schemaMap.put("uid","Long"); schemaMap.put("timestamp","Timestamp"); schemaMap.put("viewport_height","Integer"); schemaMap.put("viewport_width","Integer"); schemaMap.put("language","String"); TableSchema tableSchema = new TableSchema( schemaMap.keySet().toArray(new String[schemaMap.size()]), schemaMap.values().toArray(new TypeInformation<?>[schemaMap.size()]) ); Properties kafkaProps = new Properties(); kafkaProps.setProperty("bootstrap.servers","xxx:9092"); kafkaProps.setProperty("topic","topic"); kafkaProps.setProperty("enable.auto.commit","true"); kafkaProps.setProperty("group.id","flink_group"); Kafka010JsonTableSource kafka010JsonTableSource = new Kafka010JsonTableSource("topic", kafkaProps, tableSchema, tableSchema); kafka010JsonTableSource.setProctimeAttribute("timestamp"); env.registerTableSource("user_data",kafka010JsonTableSource); env.registerTableSink("user_count",new MysqlTableUpsertSink()); env.sqlUpdate("inset into user_count select count(uid) as uv,event_id from user_data group by event_id"); execEnv.execute(); } public static class MysqlTableUpsertSink implements UpsertStreamTableSink<Row> { //omit other code } public static class UserData { public Integer event_id; public Long uid; public Timestamp timestamp; public Integer viewport_height; public Integer viewport_width; public String language; //omit other code } {code} Use checkpoint function,if data contains error value, job Shutting down, Could not restart this job. Now have two ways can restart this job: 1. FsStateBackend on hdfs data deleted 2. error value set null, like I provide picture Is the a batter way to record error data without affecting checkpoint function. thanks > RowSerializer.copy data value cast exception and use checkpoint function Lead > to Could not restart this job > ----------------------------------------------------------------------------------------------------------- > > Key: FLINK-10299 > URL: https://issues.apache.org/jira/browse/FLINK-10299 > Project: Flink > Issue Type: Bug > Components: Core > Affects Versions: 1.6.0 > Reporter: ambition > Priority: Minor > Attachments: image-2018-09-07-17-47-04-343.png > > > Flink sql deal with User behavior data collection, such as: > {code:java} > { > "event_id": "session_start", > "timestamp": "-", // error data, > "viewport_height": "667", > "viewport_width": "-" //error data > } > {code} > Causing exception info : > {code:java} > 2018-09-07 10:47:01,834 [flink-akka.actor.default-dispatcher-2] INFO > executiongraph.ExecutionGraph (ExecutionGraph.java:tryRestartOrFail(1511)) - > Could not restart the job Flink Streaming Job > (6f0248219c631158f6e38f2dca0beb91) because the restart strategy prevented it. > java.lang.ClassCastException: java.lang.String cannot be cast to > java.sql.Timestamp > at > org.apache.flink.api.common.typeutils.base.SqlTimestampSerializer.copy(SqlTimestampSerializer.java:27) > at > org.apache.flink.api.java.typeutils.runtime.RowSerializer.copy(RowSerializer.java:95) > at > org.apache.flink.api.java.typeutils.runtime.RowSerializer.copy(RowSerializer.java:46) > at > org.apache.flink.streaming.runtime.tasks.OperatorChain$CopyingChainingOutput.pushToOperator(OperatorChain.java:577) > at > org.apache.flink.streaming.runtime.tasks.OperatorChain$CopyingChainingOutput.collect(OperatorChain.java:554) > at > org.apache.flink.streaming.runtime.tasks.OperatorChain$CopyingChainingOutput.collect(OperatorChain.java:534) > at > org.apache.flink.streaming.api.operators.AbstractStreamOperator$CountingOutput.collect(AbstractStreamOperator.java:689) > at > org.apache.flink.streaming.api.operators.AbstractStreamOperator$CountingOutput.collect(AbstractStreamOperator.java:667) > at > org.apache.flink.streaming.api.operators.StreamSourceContexts$NonTimestampContext.collect(StreamSourceContexts.java:104) > at > org.apache.flink.streaming.api.operators.StreamSourceContexts$NonTimestampContext.collectWithTimestamp(StreamSourceContexts.java:111) > at > org.apache.flink.streaming.connectors.kafka.internals.AbstractFetcher.emitRecordWithTimestamp(AbstractFetcher.java:398) > at > org.apache.flink.streaming.connectors.kafka.internal.Kafka010Fetcher.emitRecord(Kafka010Fetcher.java:89) > at > org.apache.flink.streaming.connectors.kafka.internal.Kafka09Fetcher.runFetchLoop(Kafka09Fetcher.java:154) > at > org.apache.flink.streaming.connectors.kafka.FlinkKafkaConsumerBase.run(FlinkKafkaConsumerBase.java:738) > at > org.apache.flink.streaming.api.operators.StreamSource.run(StreamSource.java:87) > at > org.apache.flink.streaming.api.operators.StreamSource.run(StreamSource.java:56) > at > org.apache.flink.streaming.runtime.tasks.SourceStreamTask.run(SourceStreamTask.java:99) > at > org.apache.flink.streaming.runtime.tasks.StreamTask.invoke(StreamTask.java:300) > at org.apache.flink.runtime.taskmanager.Task.run(Task.java:711) > at java.lang.Thread.run(Thread.java:748) > 2018-09-07 10:47:01,834 [flink-akka.actor.default-dispatcher-2] INFO > checkpoint.CheckpointCoordinator (CheckpointCoordinator.java:shutdown(320)) - > Stopping checkpoint coordinator for job 6f0248219c631158f6e38f2dca0beb91. > 2018-09-07 10:47:01,834 [flink-akka.actor.default-dispatcher-2] INFO > checkpoint.StandaloneCompletedCheckpointStore > (StandaloneCompletedCheckpointStore.java:shutdown(102)) - Shutting down > {code} > Use Flink checkpoint function and Uncatch exception lead to Could not > restart this job, so just error data happen exception set null, like under > image.hope flink commiter provide better solution。 > !image-2018-09-07-17-47-04-343.png! > -- This message was sent by Atlassian JIRA (v7.6.3#76005)