Hi, 你这个报错源自pyarrow反序列数据时的报错。你使用的pyarrow的版本能提供一下吗 pip list | grep pyarrow可以查看
Best, Xingbo 小学生 <201782...@qq.com> 于2020年12月28日周一 上午10:37写道: > 请教一下各位,使用pyflink中的向量化udf后,程序运行一段时间报错,查资料没有类似的问题,麻烦各位看看是咋回事 > Caused by: java.lang.RuntimeException: Error received from SDK harness for > instruction 8: Traceback (most recent call last): > File > "/root/anaconda3/envs/bigdata/lib/python3.6/site-packages/apache_beam/runners/worker/sdk_worker.py", > line 253, in _execute > response = task() > File > "/root/anaconda3/envs/bigdata/lib/python3.6/site-packages/apache_beam/runners/worker/sdk_worker.py", > line 310, in <lambda> > lambda: self.create_worker().do_instruction(request), > request) > File > "/root/anaconda3/envs/bigdata/lib/python3.6/site-packages/apache_beam/runners/worker/sdk_worker.py", > line 480, in do_instruction > getattr(request, request_type), request.instruction_id) > File > "/root/anaconda3/envs/bigdata/lib/python3.6/site-packages/apache_beam/runners/worker/sdk_worker.py", > line 515, in process_bundle > bundle_processor.process_bundle(instruction_id)) > File > "/root/anaconda3/envs/bigdata/lib/python3.6/site-packages/apache_beam/runners/worker/bundle_processor.py", > line 978, in process_bundle > element.data) > File > "/root/anaconda3/envs/bigdata/lib/python3.6/site-packages/apache_beam/runners/worker/bundle_processor.py", > line 218, in process_encoded > self.output(decoded_value) > File "apache_beam/runners/worker/operations.py", line 330, in > apache_beam.runners.worker.operations.Operation.output > File "apache_beam/runners/worker/operations.py", line 332, in > apache_beam.runners.worker.operations.Operation.output > File "apache_beam/runners/worker/operations.py", line 195, in > apache_beam.runners.worker.operations.SingletonConsumerSet.receive > File "pyflink/fn_execution/beam/beam_operations_fast.pyx", line 71, > in pyflink.fn_execution.beam.beam_operations_fast.FunctionOperation.process > File "pyflink/fn_execution/beam/beam_operations_fast.pyx", line 73, > in pyflink.fn_execution.beam.beam_operations_fast.FunctionOperation.process > File > "/root/anaconda3/envs/bigdata/lib/python3.6/site-packages/pyflink/fn_execution/beam/beam_coder_impl_slow.py", > line 625, in decode_from_stream > yield self._decode_one_batch_from_stream(in_stream, > in_stream.read_var_int64()) > File > "/root/anaconda3/envs/bigdata/lib/python3.6/site-packages/pyflink/fn_execution/beam/beam_coder_impl_slow.py", > line 636, in _decode_one_batch_from_stream > return arrow_to_pandas(self._timezone, self._field_types, > [next(self._batch_reader)]) > File > "/root/anaconda3/envs/bigdata/lib/python3.6/site-packages/pyflink/fn_execution/beam/beam_coder_impl_slow.py", > line 629, in _load_from_stream > reader = pa.ipc.open_stream(stream) > File > "/root/anaconda3/envs/bigdata/lib/python3.6/site-packages/pyarrow/ipc.py", > line 146, in open_stream > return RecordBatchStreamReader(source) > File > "/root/anaconda3/envs/bigdata/lib/python3.6/site-packages/pyarrow/ipc.py", > line 62, in __init__ > self._open(source) > File "pyarrow/ipc.pxi", line 360, in > pyarrow.lib._RecordBatchStreamReader._open > File "pyarrow/error.pxi", line 123, in > pyarrow.lib.pyarrow_internal_check_status > File "pyarrow/error.pxi", line 100, in pyarrow.lib.check_status > OSError: Expected IPC message of type schema but got record batch