Just to be sure, have you tried to pass `check_metadata = true` as the optional argument for `DataType::Equals`?
If that doesn't change anything, perhaps you can push your code somewhere so that I (or someone else) can take a look. Le 10/02/2021 à 18:39, Ying Zhou a écrit : > Not really. So what’s really going on?! > > TEST(TestAdapterWriteNested, writeList) { > std::shared_ptr<Schema> table_schema = schema({field("list", > list(int32()))}); > int64_t num_rows = 10000; > arrow::random::RandomArrayGenerator rand(kRandomSeed); > auto value_array = rand.ArrayOf(int32(), 5 * num_rows, 0.6); > std::shared_ptr<Array> array = rand.List(*value_array, num_rows + 1, 0.8); > std::shared_ptr<ChunkedArray> chunked_array = > std::make_shared<ChunkedArray>(array); > std::shared_ptr<Table> table = Table::Make(table_schema, {chunked_array}); > > std::shared_ptr<io::BufferOutputStream> buffer_output_stream = > io::BufferOutputStream::Create(kDefaultSmallMemStreamSize * > 15).ValueOrDie(); > std::unique_ptr<adapters::orc::ORCFileWriter> writer = > adapters::orc::ORCFileWriter::Open(*buffer_output_stream).ValueOrDie(); > ARROW_EXPECT_OK(writer->Write(*table)); > ARROW_EXPECT_OK(writer->Close()); > std::shared_ptr<Buffer> buffer = > buffer_output_stream->Finish().ValueOrDie(); > std::shared_ptr<io::RandomAccessFile> in_stream(new > io::BufferReader(buffer)); > std::unique_ptr<adapters::orc::ORCFileReader> reader; > ARROW_EXPECT_OK( > adapters::orc::ORCFileReader::Open(in_stream, default_memory_pool(), > &reader)); > std::shared_ptr<Table> actual_output_table; > ARROW_EXPECT_OK(reader->Read(&actual_output_table)); > auto actual_array = > > std::static_pointer_cast<ListArray>(actual_output_table->column(0)->chunk(0)); > auto expected_array = > std::static_pointer_cast<ListArray>(table->column(0)->chunk(0)); > AssertArraysEqual(*(actual_array->offsets()), *(expected_array->offsets())); > AssertArraysEqual(*(actual_array->values()), *(expected_array->values())); > AssertBufferEqual(*(actual_array->null_bitmap()), > *(expected_array->null_bitmap())); > ASSERT_TRUE(actual_array->type()->Equals(*(expected_array->type()))); > RecordProperty("output_type", actual_array->type()->ToString()); > RecordProperty("input_type", expected_array->type()->ToString()); > RecordProperty("array_equality", actual_array->Equals(*expected_array)); > } > > <testcase name="writeList" status="run" result="completed" time="0.029" > timestamp="2021-02-10T12:33:47" classname="TestAdapterWriteNested"> > <properties> > <property name="output_type" value="list<item: int32>"/> > <property name="input_type" value="list<item: int32>"/> > <property name="array_equality" value="0"/> > </properties> > </testcase> >> On Feb 10, 2021, at 12:10 PM, Antoine Pitrou <anto...@python.org> wrote: >> >> >> Hmm, perhaps the types are unequal, then. Can you print them out >> (including field metadata)? >> >> >> Le 10/02/2021 à 18:03, Ying Zhou a écrit : >>> Thanks! Now we have an even weirder phenomenon. Even the null bitmaps and >>> offsets are equal. However the arrays aren’t! Does anyone know why? >>> >>> TEST(TestAdapterWriteNested, writeList) { >>> std::shared_ptr<Schema> table_schema = schema({field("list", >>> list(int32()))}); >>> int64_t num_rows = 10000; >>> arrow::random::RandomArrayGenerator rand(kRandomSeed); >>> auto value_array = rand.ArrayOf(int32(), 5 * num_rows, 0.6); >>> std::shared_ptr<Array> array = rand.List(*value_array, num_rows + 1, 0.8); >>> std::shared_ptr<ChunkedArray> chunked_array = >>> std::make_shared<ChunkedArray>(array); >>> std::shared_ptr<Table> table = Table::Make(table_schema, {chunked_array}); >>> >>> std::shared_ptr<io::BufferOutputStream> buffer_output_stream = >>> io::BufferOutputStream::Create(kDefaultSmallMemStreamSize * >>> 15).ValueOrDie(); >>> std::unique_ptr<adapters::orc::ORCFileWriter> writer = >>> adapters::orc::ORCFileWriter::Open(*buffer_output_stream).ValueOrDie(); >>> ARROW_EXPECT_OK(writer->Write(*table)); >>> ARROW_EXPECT_OK(writer->Close()); >>> std::shared_ptr<Buffer> buffer = >>> buffer_output_stream->Finish().ValueOrDie(); >>> std::shared_ptr<io::RandomAccessFile> in_stream(new >>> io::BufferReader(buffer)); >>> std::unique_ptr<adapters::orc::ORCFileReader> reader; >>> ARROW_EXPECT_OK( >>> adapters::orc::ORCFileReader::Open(in_stream, default_memory_pool(), >>> &reader)); >>> std::shared_ptr<Table> actual_output_table; >>> ARROW_EXPECT_OK(reader->Read(&actual_output_table)); >>> auto actual_array = >>> >>> std::static_pointer_cast<ListArray>(actual_output_table->column(0)->chunk(0)); >>> auto expected_array = >>> std::static_pointer_cast<ListArray>(table->column(0)->chunk(0)); >>> AssertArraysEqual(*(actual_array->offsets()), >>> *(expected_array->offsets())); >>> AssertArraysEqual(*(actual_array->values()), *(expected_array->values())); >>> AssertBufferEqual(*(actual_array->null_bitmap()), >>> *(expected_array->null_bitmap())); >>> RecordProperty("array_equality", actual_array->Equals(*expected_array)); >>> } >>> >>> <testcase name="writeList" status="run" result="completed" time="0.028" >>> timestamp="2021-02-10T11:58:23" classname="TestAdapterWriteNested"> >>> <properties> >>> <property name="array_equality" value="0"/> >>> </properties> >>> </testcase> >>> >>>> On Feb 10, 2021, at 3:52 AM, Antoine Pitrou <anto...@python.org> wrote: >>>> >>>> >>>> Hi Ying, >>>> >>>> Hmm, yes, this may be related to the null bitmaps, or the offsets. >>>> Can you try to inspect or pretty-print the offsets arrays for the two >>>> list arrays? >>>> >>>> Regards >>>> >>>> Antoine. >>>> >>>> >>>> Le 10/02/2021 à 03:26, Ying Zhou a écrit : >>>>> Hi, >>>>> >>>>> This is an extremely weird phenomenon. There are two 2*1 tables that are >>>>> supposedly different when I got a confusing error message like this: >>>>> >>>>> [ RUN ] TestAdapterWriteNested.writeList >>>>> /Users/karlkatzen/Documents/code/arrow-dev/arrow/cpp/src/arrow/testing/gtest_util.cc:459: >>>>> Failure >>>>> Failed >>>>> Unequal at absolute position 2 >>>>> Expected: >>>>> [ >>>>> [ >>>>> null, >>>>> 1074834796, >>>>> null, >>>>> null >>>>> ], >>>>> null >>>>> ] >>>>> Actual: >>>>> [ >>>>> [ >>>>> null, >>>>> 1074834796, >>>>> null, >>>>> null >>>>> ], >>>>> null >>>>> ] >>>>> [ FAILED ] TestAdapterWriteNested.writeList (2 ms) >>>>> >>>>> Here is the code that causes the issue: >>>>> >>>>> TEST(TestAdapterWriteNested, writeList) { >>>>> std::shared_ptr<Schema> table_schema = schema({field("list", >>>>> list(int32()))}); >>>>> int64_t num_rows = 2; >>>>> arrow::random::RandomArrayGenerator rand(kRandomSeed); >>>>> auto value_array = rand.ArrayOf(int32(), 2 * num_rows, 0.6); >>>>> std::shared_ptr<Array> array = rand.List(*value_array, num_rows + 1, 1); >>>>> std::shared_ptr<ChunkedArray> chunked_array = >>>>> std::make_shared<ChunkedArray>(array); >>>>> std::shared_ptr<Table> table = Table::Make(table_schema, {chunked_array}); >>>>> AssertTableWriteReadEqual(table, table, kDefaultSmallMemStreamSize * 5); >>>>> } >>>>> >>>>> Here AssertTableWriteReadEqual is a function I use to test that >>>>> from_orc(to_orc(table_in)) == expected_table_out. The function did not >>>>> have issues before. >>>>> >>>>> void AssertTableWriteReadEqual(const std::shared_ptr<Table>& input_table, >>>>> const std::shared_ptr<Table>& >>>>> expected_output_table, >>>>> const int64_t max_size = >>>>> kDefaultSmallMemStreamSize) { >>>>> std::shared_ptr<io::BufferOutputStream> buffer_output_stream = >>>>> io::BufferOutputStream::Create(max_size).ValueOrDie(); >>>>> std::unique_ptr<adapters::orc::ORCFileWriter> writer = >>>>> >>>>> adapters::orc::ORCFileWriter::Open(*buffer_output_stream).ValueOrDie(); >>>>> ARROW_EXPECT_OK(writer->Write(*input_table)); >>>>> ARROW_EXPECT_OK(writer->Close()); >>>>> std::shared_ptr<Buffer> buffer = >>>>> buffer_output_stream->Finish().ValueOrDie(); >>>>> std::shared_ptr<io::RandomAccessFile> in_stream(new >>>>> io::BufferReader(buffer)); >>>>> std::unique_ptr<adapters::orc::ORCFileReader> reader; >>>>> ARROW_EXPECT_OK( >>>>> adapters::orc::ORCFileReader::Open(in_stream, default_memory_pool(), >>>>> &reader)); >>>>> std::shared_ptr<Table> actual_output_table; >>>>> ARROW_EXPECT_OK(reader->Read(&actual_output_table)); >>>>> AssertTablesEqual(*actual_output_table, *expected_output_table, false, >>>>> false); >>>>> } >>>>> >>>>> I strongly suspect that this is related to the null bitmaps. What do you >>>>> guys think? >>>>> >>>>> Ying >>>>> >>> >>> > >