Francesco Guardiani created FLINK-26988:
-------------------------------------------

             Summary: Better error reporting when format fails
                 Key: FLINK-26988
                 URL: https://issues.apache.org/jira/browse/FLINK-26988
             Project: Flink
          Issue Type: Bug
          Components: Connectors / FileSystem
            Reporter: Francesco Guardiani


Today when a format fails, depending on the format implementation, there might 
be no information about the split (file name, offset, etc), making hard to 
debug format failures. For example, here is what I've seen with csv format:


{code:java}
Caused by: java.lang.RuntimeException: One or more fetchers have encountered 
exception
    at 
org.apache.flink.connector.base.source.reader.fetcher.SplitFetcherManager.checkErrors(SplitFetcherManager.java:225)
 ~[flink-connector-files-1.16-SNAPSHOT.jar:1.16-SNAPSHOT]
    at 
org.apache.flink.connector.base.source.reader.SourceReaderBase.getNextFetch(SourceReaderBase.java:169)
 ~[flink-connector-files-1.16-SNAPSHOT.jar:1.16-SNAPSHOT]
    at 
org.apache.flink.connector.base.source.reader.SourceReaderBase.pollNext(SourceReaderBase.java:130)
 ~[flink-connector-files-1.16-SNAPSHOT.jar:1.16-SNAPSHOT]
    at 
org.apache.flink.streaming.api.operators.SourceOperator.emitNextNotReading(SourceOperator.java:403)
 ~[flink-dist-1.16-SNAPSHOT.jar:1.16-SNAPSHOT]
    at 
org.apache.flink.streaming.api.operators.SourceOperator.emitNext(SourceOperator.java:387)
 ~[flink-dist-1.16-SNAPSHOT.jar:1.16-SNAPSHOT]
    at 
org.apache.flink.streaming.runtime.io.StreamTaskSourceInput.emitNext(StreamTaskSourceInput.java:68)
 ~[flink-dist-1.16-SNAPSHOT.jar:1.16-SNAPSHOT]
    at 
org.apache.flink.streaming.runtime.io.StreamOneInputProcessor.processInput(StreamOneInputProcessor.java:65)
 ~[flink-dist-1.16-SNAPSHOT.jar:1.16-SNAPSHOT]
    at 
org.apache.flink.streaming.runtime.io.StreamMultipleInputProcessor.processInput(StreamMultipleInputProcessor.java:85)
 ~[flink-dist-1.16-SNAPSHOT.jar:1.16-SNAPSHOT]
    at 
org.apache.flink.streaming.runtime.tasks.StreamTask.processInput(StreamTask.java:531)
 ~[flink-dist-1.16-SNAPSHOT.jar:1.16-SNAPSHOT]
    at 
org.apache.flink.streaming.runtime.tasks.mailbox.MailboxProcessor.runMailboxLoop(MailboxProcessor.java:227)
 ~[flink-dist-1.16-SNAPSHOT.jar:1.16-SNAPSHOT]
    at 
org.apache.flink.streaming.runtime.tasks.StreamTask.runMailboxLoop(StreamTask.java:841)
 ~[flink-dist-1.16-SNAPSHOT.jar:1.16-SNAPSHOT]
    at 
org.apache.flink.streaming.runtime.tasks.StreamTask.invoke(StreamTask.java:767) 
~[flink-dist-1.16-SNAPSHOT.jar:1.16-SNAPSHOT]
    at 
org.apache.flink.runtime.taskmanager.Task.runWithSystemExitMonitoring(Task.java:948)
 ~[flink-dist-1.16-SNAPSHOT.jar:1.16-SNAPSHOT]
    at 
org.apache.flink.runtime.taskmanager.Task.restoreAndInvoke(Task.java:927) 
~[flink-dist-1.16-SNAPSHOT.jar:1.16-SNAPSHOT]
    at org.apache.flink.runtime.taskmanager.Task.doRun(Task.java:741) 
~[flink-dist-1.16-SNAPSHOT.jar:1.16-SNAPSHOT]
    at org.apache.flink.runtime.taskmanager.Task.run(Task.java:563) 
~[flink-dist-1.16-SNAPSHOT.jar:1.16-SNAPSHOT]
    at java.lang.Thread.run(Thread.java:829) ~[?:?]
Caused by: java.lang.RuntimeException: SplitFetcher thread 0 received 
unexpected exception while polling the records
    at 
org.apache.flink.connector.base.source.reader.fetcher.SplitFetcher.runOnce(SplitFetcher.java:150)
 ~[flink-connector-files-1.16-SNAPSHOT.jar:1.16-SNAPSHOT]
    at 
org.apache.flink.connector.base.source.reader.fetcher.SplitFetcher.run(SplitFetcher.java:105)
 ~[flink-connector-files-1.16-SNAPSHOT.jar:1.16-SNAPSHOT]
    at java.util.concurrent.Executors$RunnableAdapter.call(Executors.java:515) 
~[?:?]
    at java.util.concurrent.FutureTask.run(FutureTask.java:264) ~[?:?]
    at 
java.util.concurrent.ThreadPoolExecutor.runWorker(ThreadPoolExecutor.java:1128) 
~[?:?]
    at 
java.util.concurrent.ThreadPoolExecutor$Worker.run(ThreadPoolExecutor.java:628) 
~[?:?]
    at java.lang.Thread.run(Thread.java:829) ~[?:?]
Caused by: java.lang.RuntimeException: Invalid UTF-8 middle byte 0x54 (at char 
#3529, byte #3528): check content encoding, does not look like UTF-8
    at 
org.apache.flink.shaded.jackson2.com.fasterxml.jackson.databind.MappingIterator._handleIOException(MappingIterator.java:417)
 ~[flink-dist-1.16-SNAPSHOT.jar:1.16-SNAPSHOT]
    at 
org.apache.flink.shaded.jackson2.com.fasterxml.jackson.databind.MappingIterator.hasNext(MappingIterator.java:190)
 ~[flink-dist-1.16-SNAPSHOT.jar:1.16-SNAPSHOT]
    at 
org.apache.flink.formats.csv.CsvReaderFormat$Reader.read(CsvReaderFormat.java:194)
 ~[flink-csv-1.16-SNAPSHOT.jar:1.16-SNAPSHOT]
    at 
org.apache.flink.connector.file.src.impl.StreamFormatAdapter$Reader.readBatch(StreamFormatAdapter.java:214)
 ~[flink-connector-files-1.16-SNAPSHOT.jar:1.16-SNAPSHOT]
    at 
org.apache.flink.connector.file.src.impl.FileSourceSplitReader.fetch(FileSourceSplitReader.java:67)
 ~[flink-connector-files-1.16-SNAPSHOT.jar:1.16-SNAPSHOT]
    at 
org.apache.flink.connector.base.source.reader.fetcher.FetchTask.run(FetchTask.java:58)
 ~[flink-connector-files-1.16-SNAPSHOT.jar:1.16-SNAPSHOT]
    at 
org.apache.flink.connector.base.source.reader.fetcher.SplitFetcher.runOnce(SplitFetcher.java:142)
 ~[flink-connector-files-1.16-SNAPSHOT.jar:1.16-SNAPSHOT]
    at 
org.apache.flink.connector.base.source.reader.fetcher.SplitFetcher.run(SplitFetcher.java:105)
 ~[flink-connector-files-1.16-SNAPSHOT.jar:1.16-SNAPSHOT]
    at java.util.concurrent.Executors$RunnableAdapter.call(Executors.java:515) 
~[?:?]
    at java.util.concurrent.FutureTask.run(FutureTask.java:264) ~[?:?]
    at 
java.util.concurrent.ThreadPoolExecutor.runWorker(ThreadPoolExecutor.java:1128) 
~[?:?]
    at 
java.util.concurrent.ThreadPoolExecutor$Worker.run(ThreadPoolExecutor.java:628) 
~[?:?]
    at java.lang.Thread.run(Thread.java:829) ~[?:?]
Caused by: java.io.CharConversionException: Invalid UTF-8 middle byte 0x54 (at 
char #3529, byte #3528): check content encoding, does not look like UTF-8
    at 
org.apache.flink.shaded.jackson2.com.fasterxml.jackson.dataformat.csv.impl.UTF8Reader.reportInvalidOther(UTF8Reader.java:507)
 ~[flink-dist-1.16-SNAPSHOT.jar:1.16-SNAPSHOT]
    at 
org.apache.flink.shaded.jackson2.com.fasterxml.jackson.dataformat.csv.impl.UTF8Reader.reportDeferredInvalid(UTF8Reader.java:518)
 ~[flink-dist-1.16-SNAPSHOT.jar:1.16-SNAPSHOT]
    at 
org.apache.flink.shaded.jackson2.com.fasterxml.jackson.dataformat.csv.impl.UTF8Reader.read(UTF8Reader.java:172)
 ~[flink-dist-1.16-SNAPSHOT.jar:1.16-SNAPSHOT]
    at 
org.apache.flink.shaded.jackson2.com.fasterxml.jackson.dataformat.csv.impl.CsvDecoder.loadMore(CsvDecoder.java:443)
 ~[flink-dist-1.16-SNAPSHOT.jar:1.16-SNAPSHOT]
    at 
org.apache.flink.shaded.jackson2.com.fasterxml.jackson.dataformat.csv.impl.CsvDecoder._nextChar(CsvDecoder.java:945)
 ~[flink-dist-1.16-SNAPSHOT.jar:1.16-SNAPSHOT]
    at 
org.apache.flink.shaded.jackson2.com.fasterxml.jackson.dataformat.csv.impl.CsvDecoder.nextString(CsvDecoder.java:642)
 ~[flink-dist-1.16-SNAPSHOT.jar:1.16-SNAPSHOT]
    at 
org.apache.flink.shaded.jackson2.com.fasterxml.jackson.dataformat.csv.CsvParser._skipUntilEndOfLine(CsvParser.java:1155)
 ~[flink-dist-1.16-SNAPSHOT.jar:1.16-SNAPSHOT]
    at 
org.apache.flink.shaded.jackson2.com.fasterxml.jackson.dataformat.csv.CsvParser.nextToken(CsvParser.java:680)
 ~[flink-dist-1.16-SNAPSHOT.jar:1.16-SNAPSHOT]
    at 
org.apache.flink.shaded.jackson2.com.fasterxml.jackson.databind.MappingIterator._resync(MappingIterator.java:394)
 ~[flink-dist-1.16-SNAPSHOT.jar:1.16-SNAPSHOT]
    at 
org.apache.flink.shaded.jackson2.com.fasterxml.jackson.databind.MappingIterator.hasNextValue(MappingIterator.java:238)
 ~[flink-dist-1.16-SNAPSHOT.jar:1.16-SNAPSHOT]
    at 
org.apache.flink.shaded.jackson2.com.fasterxml.jackson.databind.MappingIterator.hasNext(MappingIterator.java:186)
 ~[flink-dist-1.16-SNAPSHOT.jar:1.16-SNAPSHOT]
    at 
org.apache.flink.formats.csv.CsvReaderFormat$Reader.read(CsvReaderFormat.java:194)
 ~[flink-csv-1.16-SNAPSHOT.jar:1.16-SNAPSHOT]
    at 
org.apache.flink.connector.file.src.impl.StreamFormatAdapter$Reader.readBatch(StreamFormatAdapter.java:214)
 ~[flink-connector-files-1.16-SNAPSHOT.jar:1.16-SNAPSHOT]
    at 
org.apache.flink.connector.file.src.impl.FileSourceSplitReader.fetch(FileSourceSplitReader.java:67)
 ~[flink-connector-files-1.16-SNAPSHOT.jar:1.16-SNAPSHOT]
    at 
org.apache.flink.connector.base.source.reader.fetcher.FetchTask.run(FetchTask.java:58)
 ~[flink-connector-files-1.16-SNAPSHOT.jar:1.16-SNAPSHOT]
    at 
org.apache.flink.connector.base.source.reader.fetcher.SplitFetcher.runOnce(SplitFetcher.java:142)
 ~[flink-connector-files-1.16-SNAPSHOT.jar:1.16-SNAPSHOT]
    at 
org.apache.flink.connector.base.source.reader.fetcher.SplitFetcher.run(SplitFetcher.java:105)
 ~[flink-connector-files-1.16-SNAPSHOT.jar:1.16-SNAPSHOT]
    at java.util.concurrent.Executors$RunnableAdapter.call(Executors.java:515) 
~[?:?]
    at java.util.concurrent.FutureTask.run(FutureTask.java:264) ~[?:?]
    at 
java.util.concurrent.ThreadPoolExecutor.runWorker(ThreadPoolExecutor.java:1128) 
~[?:?]
    at 
java.util.concurrent.ThreadPoolExecutor$Worker.run(ThreadPoolExecutor.java:628) 
~[?:?]
    at java.lang.Thread.run(Thread.java:829) ~[?:?] {code}
It would be nice and helpful to add informations such as filename and offset to 
the exception propagated by the split reader thread, so any format can profit 
from it



--
This message was sent by Atlassian Jira
(v8.20.1#820001)

Reply via email to