This is an automated email from the ASF dual-hosted git repository.
wesm pushed a commit to branch master
in repository https://gitbox.apache.org/repos/asf/arrow.git
The following commit(s) were added to refs/heads/master by this push:
new 3b0055a ARROW-9417: [C++] Write length in IPC message by using
little-endian
3b0055a is described below
commit 3b0055adc4ab54b59d0671821c3767cebf291bd5
Author: Kazuaki Ishizaki <[email protected]>
AuthorDate: Sun Jul 12 12:09:18 2020 -0500
ARROW-9417: [C++] Write length in IPC message by using little-endian
This PR forces to write metadata_length and footer_length in IPC messages
by using little-endian to follow [the
specification](https://github.com/apache/arrow/blob/master/docs/source/format/Columnar.rst).
Closes #7716 from kiszk/ARROW-9417
Authored-by: Kazuaki Ishizaki <[email protected]>
Signed-off-by: Wes McKinney <[email protected]>
---
cpp/src/arrow/ipc/message.cc | 18 ++++++++++--------
cpp/src/arrow/ipc/read_write_test.cc | 5 +++++
cpp/src/arrow/ipc/reader.cc | 3 ++-
cpp/src/arrow/ipc/writer.cc | 2 ++
4 files changed, 19 insertions(+), 9 deletions(-)
diff --git a/cpp/src/arrow/ipc/message.cc b/cpp/src/arrow/ipc/message.cc
index aeb106e..dcf61ef 100644
--- a/cpp/src/arrow/ipc/message.cc
+++ b/cpp/src/arrow/ipc/message.cc
@@ -424,8 +424,9 @@ Status WriteMessage(const Buffer& message, const
IpcWriteOptions& options,
RETURN_NOT_OK(file->Write(&internal::kIpcContinuationToken,
sizeof(int32_t)));
}
- // Write the flatbuffer size prefix including padding
- int32_t padded_flatbuffer_size = padded_message_length - prefix_size;
+ // Write the flatbuffer size prefix including padding in little endian
+ int32_t padded_flatbuffer_size =
+ BitUtil::ToLittleEndian(padded_message_length - prefix_size);
RETURN_NOT_OK(file->Write(&padded_flatbuffer_size, sizeof(int32_t)));
// Write the flatbuffer
@@ -577,18 +578,18 @@ class MessageDecoder::MessageDecoderImpl {
}
Status ConsumeInitialData(const uint8_t* data, int64_t size) {
- return ConsumeInitial(util::SafeLoadAs<int32_t>(data));
+ return
ConsumeInitial(BitUtil::FromLittleEndian(util::SafeLoadAs<int32_t>(data)));
}
Status ConsumeInitialBuffer(const std::shared_ptr<Buffer>& buffer) {
ARROW_ASSIGN_OR_RAISE(auto continuation, ConsumeDataBufferInt32(buffer));
- return ConsumeInitial(continuation);
+ return ConsumeInitial(BitUtil::FromLittleEndian(continuation));
}
Status ConsumeInitialChunks() {
int32_t continuation = 0;
RETURN_NOT_OK(ConsumeDataChunks(sizeof(int32_t), &continuation));
- return ConsumeInitial(continuation);
+ return ConsumeInitial(BitUtil::FromLittleEndian(continuation));
}
Status ConsumeInitial(int32_t continuation) {
@@ -616,18 +617,19 @@ class MessageDecoder::MessageDecoderImpl {
}
Status ConsumeMetadataLengthData(const uint8_t* data, int64_t size) {
- return ConsumeMetadataLength(util::SafeLoadAs<int32_t>(data));
+ return ConsumeMetadataLength(
+ BitUtil::FromLittleEndian(util::SafeLoadAs<int32_t>(data)));
}
Status ConsumeMetadataLengthBuffer(const std::shared_ptr<Buffer>& buffer) {
ARROW_ASSIGN_OR_RAISE(auto metadata_length,
ConsumeDataBufferInt32(buffer));
- return ConsumeMetadataLength(metadata_length);
+ return ConsumeMetadataLength(BitUtil::FromLittleEndian(metadata_length));
}
Status ConsumeMetadataLengthChunks() {
int32_t metadata_length = 0;
RETURN_NOT_OK(ConsumeDataChunks(sizeof(int32_t), &metadata_length));
- return ConsumeMetadataLength(metadata_length);
+ return ConsumeMetadataLength(BitUtil::FromLittleEndian(metadata_length));
}
Status ConsumeMetadataLength(int32_t metadata_length) {
diff --git a/cpp/src/arrow/ipc/read_write_test.cc
b/cpp/src/arrow/ipc/read_write_test.cc
index 9e4f4c9..6ae7611 100644
--- a/cpp/src/arrow/ipc/read_write_test.cc
+++ b/cpp/src/arrow/ipc/read_write_test.cc
@@ -131,6 +131,11 @@ TEST_P(TestMessage, SerializeTo) {
ASSERT_EQ(BitUtil::RoundUp(metadata->size() + prefix_size, alignment) +
body_length,
output_length);
ASSERT_OK_AND_EQ(output_length, stream->Tell());
+ ASSERT_OK_AND_ASSIGN(auto buffer, stream->Finish());
+ // chech whether length is written in little endian
+ auto buffer_ptr = buffer.get()->data();
+ ASSERT_EQ(output_length - body_length - prefix_size,
+ BitUtil::FromLittleEndian(*(uint32_t*)(buffer_ptr + 4)));
};
CheckWithAlignment(8);
diff --git a/cpp/src/arrow/ipc/reader.cc b/cpp/src/arrow/ipc/reader.cc
index 3c51fef..75f2213 100644
--- a/cpp/src/arrow/ipc/reader.cc
+++ b/cpp/src/arrow/ipc/reader.cc
@@ -979,7 +979,8 @@ class RecordBatchFileReaderImpl : public
RecordBatchFileReader {
return Status::Invalid("Not an Arrow file");
}
- int32_t footer_length = *reinterpret_cast<const int32_t*>(buffer->data());
+ int32_t footer_length =
+ BitUtil::FromLittleEndian(*reinterpret_cast<const
int32_t*>(buffer->data()));
if (footer_length <= 0 || footer_length > footer_offset_ - magic_size * 2
- 4) {
return Status::Invalid("File is smaller than indicated metadata size");
diff --git a/cpp/src/arrow/ipc/writer.cc b/cpp/src/arrow/ipc/writer.cc
index 292fe9c..d3af24d 100644
--- a/cpp/src/arrow/ipc/writer.cc
+++ b/cpp/src/arrow/ipc/writer.cc
@@ -1184,6 +1184,8 @@ class PayloadFileWriter : public
internal::IpcPayloadWriter, protected StreamBoo
return Status::Invalid("Invalid file footer");
}
+ // write footer length in little endian
+ footer_length = BitUtil::ToLittleEndian(footer_length);
RETURN_NOT_OK(Write(&footer_length, sizeof(int32_t)));
// Write magic bytes to end file