[ https://issues.apache.org/jira/browse/BEAM-14544?page=com.atlassian.jira.plugin.system.issuetabpanels:comment-tabpanel&focusedCommentId=17550107#comment-17550107 ]
Danny McCormick commented on BEAM-14544: ---------------------------------------- This issue has been migrated to https://github.com/apache/beam/issues/21669 > Unable to drain pipelines due to seemingly irrelevant timestamp shift > validation > -------------------------------------------------------------------------------- > > Key: BEAM-14544 > URL: https://issues.apache.org/jira/browse/BEAM-14544 > Project: Beam > Issue Type: Bug > Components: runner-dataflow > Affects Versions: 2.38.0 > Reporter: Balázs Németh > Priority: P2 > > Beam 2.38.0 > I encounter the following stacktrace when I try to drain a Dataflow pipeline. > During the normal execution the pipeline is flawless, but gets stuck during > draining, > I don't explicitly modify windowing, triggering, timestamps, etc, but the > built-in IOs might (AFAIK the KafkaIO doesn't, but the using load jobs for BQ > insertions does due to the triggering frequency). > The pipeline that produces this: > - KafkaIO.readBytes() > - a custom minimalistic DoFn doing the parsing of > KafkaRecord.getKV().getValue() > - BigQueryIO.write() + method(FILE_LOADS) + triggeringFrequency(...) > > {code:java} > Error message from worker: java.lang.IllegalStateException: TimestampCombiner > moved element from 294247-01-10T04:00:54.775Z (TIMESTAMP_MAX_VALUE) to > earlier time 294247-01-09T04:00:54.775Z (end of global window) for window > org.apache.beam.sdk.transforms.windowing.GlobalWindow@71e075dc > > org.apache.beam.runners.dataflow.worker.repackaged.org.apache.beam.runners.core.WatermarkHold.shift(WatermarkHold.java:120) > > org.apache.beam.runners.dataflow.worker.repackaged.org.apache.beam.runners.core.WatermarkHold.addElementHold(WatermarkHold.java:157) > > org.apache.beam.runners.dataflow.worker.repackaged.org.apache.beam.runners.core.WatermarkHold.addHolds(WatermarkHold.java:104) > > org.apache.beam.runners.dataflow.worker.repackaged.org.apache.beam.runners.core.ReduceFnRunner.processElement(ReduceFnRunner.java:610) > > org.apache.beam.runners.dataflow.worker.repackaged.org.apache.beam.runners.core.ReduceFnRunner.processElements(ReduceFnRunner.java:360) > > org.apache.beam.runners.dataflow.worker.StreamingGroupAlsoByWindowViaWindowSetFn.processElement(StreamingGroupAlsoByWindowViaWindowSetFn.java:96) > > org.apache.beam.runners.dataflow.worker.StreamingGroupAlsoByWindowViaWindowSetFn.processElement(StreamingGroupAlsoByWindowViaWindowSetFn.java:43) > > org.apache.beam.runners.dataflow.worker.GroupAlsoByWindowFnRunner.invokeProcessElement(GroupAlsoByWindowFnRunner.java:121) > > org.apache.beam.runners.dataflow.worker.GroupAlsoByWindowFnRunner.processElement(GroupAlsoByWindowFnRunner.java:73) > > org.apache.beam.runners.dataflow.worker.repackaged.org.apache.beam.runners.core.LateDataDroppingDoFnRunner.processElement(LateDataDroppingDoFnRunner.java:80) > > org.apache.beam.runners.dataflow.worker.GroupAlsoByWindowsParDoFn.processElement(GroupAlsoByWindowsParDoFn.java:137) > > org.apache.beam.runners.dataflow.worker.util.common.worker.ParDoOperation.process(ParDoOperation.java:44) > > org.apache.beam.runners.dataflow.worker.util.common.worker.OutputReceiver.process(OutputReceiver.java:49) > > org.apache.beam.runners.dataflow.worker.util.common.worker.ReadOperation.runReadLoop(ReadOperation.java:212) > > org.apache.beam.runners.dataflow.worker.util.common.worker.ReadOperation.start(ReadOperation.java:163) > > org.apache.beam.runners.dataflow.worker.util.common.worker.MapTaskExecutor.execute(MapTaskExecutor.java:83) > > org.apache.beam.runners.dataflow.worker.StreamingDataflowWorker.process(StreamingDataflowWorker.java:1445) > > org.apache.beam.runners.dataflow.worker.StreamingDataflowWorker.access$1100(StreamingDataflowWorker.java:165) > > org.apache.beam.runners.dataflow.worker.StreamingDataflowWorker$7.run(StreamingDataflowWorker.java:1120) > > org.apache.beam.runners.dataflow.worker.util.BoundedQueueExecutor.lambda$executeLockHeld$0(BoundedQueueExecutor.java:133) > > java.base/java.util.concurrent.ThreadPoolExecutor.runWorker(ThreadPoolExecutor.java:1128) > > java.base/java.util.concurrent.ThreadPoolExecutor$Worker.run(ThreadPoolExecutor.java:628) > java.base/java.lang.Thread.run(Thread.java:834) > java.lang.IllegalStateException: TimestampCombiner moved element from > 294247-01-10T04:00:54.775Z (TIMESTAMP_MAX_VALUE) to earlier time > 294247-01-09T04:00:54.775Z (end of global window) for window > org.apache.beam.sdk.transforms.windowing.GlobalWindow@71e075dc > > org.apache.beam.runners.dataflow.worker.repackaged.org.apache.beam.runners.core.WatermarkHold.shift(WatermarkHold.java:120) > > org.apache.beam.runners.dataflow.worker.repackaged.org.apache.beam.runners.core.WatermarkHold.addElementHold(WatermarkHold.java:157) > > org.apache.beam.runners.dataflow.worker.repackaged.org.apache.beam.runners.core.WatermarkHold.addHolds(WatermarkHold.java:104) > > org.apache.beam.runners.dataflow.worker.repackaged.org.apache.beam.runners.core.ReduceFnRunner.processElement(ReduceFnRunner.java:610) > > org.apache.beam.runners.dataflow.worker.repackaged.org.apache.beam.runners.core.ReduceFnRunner.processElements(ReduceFnRunner.java:360) > > org.apache.beam.runners.dataflow.worker.StreamingGroupAlsoByWindowViaWindowSetFn.processElement(StreamingGroupAlsoByWindowViaWindowSetFn.java:96) > > org.apache.beam.runners.dataflow.worker.StreamingGroupAlsoByWindowViaWindowSetFn.processElement(StreamingGroupAlsoByWindowViaWindowSetFn.java:43) > > org.apache.beam.runners.dataflow.worker.GroupAlsoByWindowFnRunner.invokeProcessElement(GroupAlsoByWindowFnRunner.java:121) > > org.apache.beam.runners.dataflow.worker.GroupAlsoByWindowFnRunner.processElement(GroupAlsoByWindowFnRunner.java:73) > > org.apache.beam.runners.dataflow.worker.repackaged.org.apache.beam.runners.core.LateDataDroppingDoFnRunner.processElement(LateDataDroppingDoFnRunner.java:80) > > org.apache.beam.runners.dataflow.worker.GroupAlsoByWindowsParDoFn.processElement(GroupAlsoByWindowsParDoFn.java:137) > > org.apache.beam.runners.dataflow.worker.util.common.worker.ParDoOperation.process(ParDoOperation.java:44) > > org.apache.beam.runners.dataflow.worker.util.common.worker.OutputReceiver.process(OutputReceiver.java:49) > > org.apache.beam.runners.dataflow.worker.util.common.worker.ReadOperation.runReadLoop(ReadOperation.java:212) > > org.apache.beam.runners.dataflow.worker.util.common.worker.ReadOperation.start(ReadOperation.java:163) > > org.apache.beam.runners.dataflow.worker.util.common.worker.MapTaskExecutor.execute(MapTaskExecutor.java:83) > > org.apache.beam.runners.dataflow.worker.StreamingDataflowWorker.process(StreamingDataflowWorker.java:1445) > > org.apache.beam.runners.dataflow.worker.StreamingDataflowWorker.access$1100(StreamingDataflowWorker.java:165) > > org.apache.beam.runners.dataflow.worker.StreamingDataflowWorker$7.run(StreamingDataflowWorker.java:1120) > > org.apache.beam.runners.dataflow.worker.util.BoundedQueueExecutor.lambda$executeLockHeld$0(BoundedQueueExecutor.java:133) > > java.base/java.util.concurrent.ThreadPoolExecutor.runWorker(ThreadPoolExecutor.java:1128) > > java.base/java.util.concurrent.ThreadPoolExecutor$Worker.run(ThreadPoolExecutor.java:628) > java.base/java.lang.Thread.run(Thread.java:834){code} -- This message was sent by Atlassian Jira (v8.20.7#820007)