[ https://issues.apache.org/jira/browse/FLINK-32566?page=com.atlassian.jira.plugin.system.issuetabpanels:all-tabpanel ]
Yuan Kui updated FLINK-32566: ----------------------------- Description: When reading the parquet format files, if the data of some fields is too large, HeapBytesVector will exceed the int maximum value when expending the capacity, and then it will meet the java.lang.NegativeArraySizeException. {code:java} // code placeholder switched from RUNNING to FAILED with failure cause: java.lang.RuntimeException: One or more fetchers have encountered exception at org.apache.flink.connector.base.source.reader.fetcher.SplitFetcherManager.checkErrors(SplitFetcherManager.java:261) at org.apache.flink.connector.base.source.reader.SourceReaderBase.getNextFetch(SourceReaderBase.java:169) at org.apache.flink.connector.base.source.reader.SourceReaderBase.pollNext(SourceReaderBase.java:131) at org.apache.flink.streaming.api.operators.SourceOperator.emitNext(SourceOperator.java:422) at org.apache.flink.streaming.runtime.io.StreamTaskSourceInput.emitNext(StreamTaskSourceInput.java:68) at org.apache.flink.streaming.runtime.io.StreamOneInputProcessor.processInput(StreamOneInputProcessor.java:65) at org.apache.flink.streaming.runtime.tasks.StreamTask.processInput(StreamTask.java:550) at org.apache.flink.streaming.runtime.tasks.mailbox.MailboxProcessor.runMailboxLoop(MailboxProcessor.java:231) at org.apache.flink.streaming.runtime.tasks.StreamTask.runMailboxLoop(StreamTask.java:839) at org.apache.flink.streaming.runtime.tasks.StreamTask.invoke(StreamTask.java:788) at org.apache.flink.runtime.taskmanager.Task.runWithSystemExitMonitoring(Task.java:961) at org.apache.flink.runtime.taskmanager.Task.restoreAndInvoke(Task.java:939) at org.apache.flink.runtime.taskmanager.Task.doRun(Task.java:749) at org.apache.flink.runtime.taskmanager.Task.run(Task.java:564) at java.lang.Thread.run(Thread.java:748) Caused by: java.lang.RuntimeException: SplitFetcher thread 0 received unexpected exception while polling the records at org.apache.flink.connector.base.source.reader.fetcher.SplitFetcher.runOnce(SplitFetcher.java:165) at org.apache.flink.connector.base.source.reader.fetcher.SplitFetcher.run(SplitFetcher.java:114) at java.util.concurrent.Executors$RunnableAdapter.call(Executors.java:511) at java.util.concurrent.FutureTask.run(FutureTask.java:266) at java.util.concurrent.ThreadPoolExecutor.runWorker(ThreadPoolExecutor.java:1149) at java.util.concurrent.ThreadPoolExecutor$Worker.run(ThreadPoolExecutor.java:624) ... 1 more Caused by: java.lang.NegativeArraySizeException at org.apache.flink.table.data.columnar.vector.heap.HeapBytesVector.reserve(HeapBytesVector.java:102) at org.apache.flink.table.data.columnar.vector.heap.HeapBytesVector.appendBytes(HeapBytesVector.java:79) at org.apache.flink.formats.parquet.vector.reader.BytesColumnReader.readBinary(BytesColumnReader.java:88) at org.apache.flink.formats.parquet.vector.reader.BytesColumnReader.readBatch(BytesColumnReader.java:50) at org.apache.flink.formats.parquet.vector.reader.BytesColumnReader.readBatch(BytesColumnReader.java:31) at org.apache.flink.formats.parquet.vector.reader.AbstractColumnReader.readToVector(AbstractColumnReader.java:189) at org.apache.flink.formats.parquet.ParquetVectorizedInputFormat$ParquetReader.nextBatch(ParquetVectorizedInputFormat.java:401) at org.apache.flink.formats.parquet.ParquetVectorizedInputFormat$ParquetReader.readBatch(ParquetVectorizedInputFormat.java:369) at org.apache.flink.connector.file.src.impl.FileSourceSplitReader.fetch(FileSourceSplitReader.java:67) at org.apache.flink.connector.base.source.reader.fetcher.FetchTask.run(FetchTask.java:58) at org.apache.flink.connector.base.source.reader.fetcher.SplitFetcher.runOnce(SplitFetcher.java:162) ... 6 more {code} was: When reading the parquet format files, if the data of some fields is too large, HeapBytesVector will exceed the int maximum value when expending the capacity, and then it will meet the java.lang.NegativeArraySizeException. {code:java} // code placeholder switched from RUNNING to FAILED with failure cause: java.lang.RuntimeException: One or more fetchers have encountered exception at org.apache.flink.connector.base.source.reader.fetcher.SplitFetcherManager.checkErrors(SplitFetcherManager.java:233) at org.apache.flink.connector.base.source.reader.SourceReaderBase.getNextFetch(SourceReaderBase.java:168) at org.apache.flink.connector.base.source.reader.SourceReaderBase.pollNext(SourceReaderBase.java:129) at org.apache.flink.streaming.api.operators.SourceOperator.emitNext(SourceOperator.java:396) at org.apache.flink.streaming.runtime.io.StreamTaskSourceInput.emitNext(StreamTaskSourceInput.java:69) at org.apache.flink.streaming.runtime.io.StreamOneInputProcessor.processInput(StreamOneInputProcessor.java:66) at org.apache.flink.streaming.runtime.tasks.StreamTask.processInput(StreamTask.java:426) at org.apache.flink.streaming.runtime.tasks.mailbox.MailboxProcessor.runMailboxLoop(MailboxProcessor.java:204) at org.apache.flink.streaming.runtime.tasks.StreamTask.runMailboxLoop(StreamTask.java:684) at org.apache.flink.streaming.runtime.tasks.StreamTask.executeInvoke(StreamTask.java:639) at org.apache.flink.streaming.runtime.tasks.StreamTask.runWithCleanUpOnFail(StreamTask.java:650) at org.apache.flink.streaming.runtime.tasks.StreamTask.invoke(StreamTask.java:623) at org.apache.flink.runtime.taskmanager.Task.doRun(Task.java:786) at org.apache.flink.runtime.taskmanager.Task.run(Task.java:566) at java.lang.Thread.run(Thread.java:748) Caused by: java.lang.RuntimeException: SplitFetcher thread 0 received unexpected exception while polling the records at org.apache.flink.connector.base.source.reader.fetcher.SplitFetcher.runOnce(SplitFetcher.java:163) at org.apache.flink.connector.base.source.reader.fetcher.SplitFetcher.run(SplitFetcher.java:112) at java.util.concurrent.Executors$RunnableAdapter.call(Executors.java:511) at java.util.concurrent.FutureTask.run(FutureTask.java:266) at java.util.concurrent.ThreadPoolExecutor.runWorker(ThreadPoolExecutor.java:1149) at java.util.concurrent.ThreadPoolExecutor$Worker.run(ThreadPoolExecutor.java:624) ... 1 more Caused by: java.lang.NegativeArraySizeException at org.apache.flink.table.data.vector.heap.HeapBytesVector.reserve(HeapBytesVector.java:100) at org.apache.flink.table.data.vector.heap.HeapBytesVector.appendBytes(HeapBytesVector.java:77) at org.apache.flink.formats.parquet.vector.reader.BytesColumnReader.readBinary(BytesColumnReader.java:88) at org.apache.flink.formats.parquet.vector.reader.BytesColumnReader.readBatch(BytesColumnReader.java:50) at org.apache.flink.formats.parquet.vector.reader.BytesColumnReader.readBatch(BytesColumnReader.java:31) at org.apache.flink.formats.parquet.vector.reader.AbstractColumnReader.readToVector(AbstractColumnReader.java:189) at org.apache.flink.formats.parquet.ParquetVectorizedInputFormat$ParquetReader.nextBatch(ParquetVectorizedInputFormat.java:405) at org.apache.flink.formats.parquet.ParquetVectorizedInputFormat$ParquetReader.readBatch(ParquetVectorizedInputFormat.java:373) at org.apache.flink.connector.file.src.impl.FileSourceSplitReader.fetch(FileSourceSplitReader.java:71) at org.apache.flink.connector.base.source.reader.fetcher.FetchTask.run(FetchTask.java:56) at org.apache.flink.connector.base.source.reader.fetcher.SplitFetcher.runOnce(SplitFetcher.java:160) ... 6 more {code} > HeapBytesVector meets java.lang.NegativeArraySizeException > ---------------------------------------------------------- > > Key: FLINK-32566 > URL: https://issues.apache.org/jira/browse/FLINK-32566 > Project: Flink > Issue Type: Bug > Components: Table SQL / Runtime > Reporter: Yuan Kui > Priority: Major > > When reading the parquet format files, if the data of some fields is too > large, HeapBytesVector will exceed the int maximum value when expending the > capacity, and then it will meet the java.lang.NegativeArraySizeException. > {code:java} > // code placeholder > switched from RUNNING to FAILED with failure cause: > java.lang.RuntimeException: One or more fetchers have encountered exception > at > org.apache.flink.connector.base.source.reader.fetcher.SplitFetcherManager.checkErrors(SplitFetcherManager.java:261) > at > org.apache.flink.connector.base.source.reader.SourceReaderBase.getNextFetch(SourceReaderBase.java:169) > at > org.apache.flink.connector.base.source.reader.SourceReaderBase.pollNext(SourceReaderBase.java:131) > at > org.apache.flink.streaming.api.operators.SourceOperator.emitNext(SourceOperator.java:422) > at > org.apache.flink.streaming.runtime.io.StreamTaskSourceInput.emitNext(StreamTaskSourceInput.java:68) > at > org.apache.flink.streaming.runtime.io.StreamOneInputProcessor.processInput(StreamOneInputProcessor.java:65) > at > org.apache.flink.streaming.runtime.tasks.StreamTask.processInput(StreamTask.java:550) > at > org.apache.flink.streaming.runtime.tasks.mailbox.MailboxProcessor.runMailboxLoop(MailboxProcessor.java:231) > at > org.apache.flink.streaming.runtime.tasks.StreamTask.runMailboxLoop(StreamTask.java:839) > at > org.apache.flink.streaming.runtime.tasks.StreamTask.invoke(StreamTask.java:788) > at > org.apache.flink.runtime.taskmanager.Task.runWithSystemExitMonitoring(Task.java:961) > at > org.apache.flink.runtime.taskmanager.Task.restoreAndInvoke(Task.java:939) > at org.apache.flink.runtime.taskmanager.Task.doRun(Task.java:749) > at org.apache.flink.runtime.taskmanager.Task.run(Task.java:564) > at java.lang.Thread.run(Thread.java:748) > Caused by: java.lang.RuntimeException: SplitFetcher thread 0 received > unexpected exception while polling the records > at > org.apache.flink.connector.base.source.reader.fetcher.SplitFetcher.runOnce(SplitFetcher.java:165) > at > org.apache.flink.connector.base.source.reader.fetcher.SplitFetcher.run(SplitFetcher.java:114) > at > java.util.concurrent.Executors$RunnableAdapter.call(Executors.java:511) > at java.util.concurrent.FutureTask.run(FutureTask.java:266) > at > java.util.concurrent.ThreadPoolExecutor.runWorker(ThreadPoolExecutor.java:1149) > at > java.util.concurrent.ThreadPoolExecutor$Worker.run(ThreadPoolExecutor.java:624) > ... 1 more > Caused by: java.lang.NegativeArraySizeException > at > org.apache.flink.table.data.columnar.vector.heap.HeapBytesVector.reserve(HeapBytesVector.java:102) > at > org.apache.flink.table.data.columnar.vector.heap.HeapBytesVector.appendBytes(HeapBytesVector.java:79) > at > org.apache.flink.formats.parquet.vector.reader.BytesColumnReader.readBinary(BytesColumnReader.java:88) > at > org.apache.flink.formats.parquet.vector.reader.BytesColumnReader.readBatch(BytesColumnReader.java:50) > at > org.apache.flink.formats.parquet.vector.reader.BytesColumnReader.readBatch(BytesColumnReader.java:31) > at > org.apache.flink.formats.parquet.vector.reader.AbstractColumnReader.readToVector(AbstractColumnReader.java:189) > at > org.apache.flink.formats.parquet.ParquetVectorizedInputFormat$ParquetReader.nextBatch(ParquetVectorizedInputFormat.java:401) > at > org.apache.flink.formats.parquet.ParquetVectorizedInputFormat$ParquetReader.readBatch(ParquetVectorizedInputFormat.java:369) > at > org.apache.flink.connector.file.src.impl.FileSourceSplitReader.fetch(FileSourceSplitReader.java:67) > at > org.apache.flink.connector.base.source.reader.fetcher.FetchTask.run(FetchTask.java:58) > at > org.apache.flink.connector.base.source.reader.fetcher.SplitFetcher.runOnce(SplitFetcher.java:162) > ... 6 more {code} -- This message was sent by Atlassian Jira (v8.20.10#820010)