Just to be sure, have you tried to pass `check_metadata = true` as the
optional argument for `DataType::Equals`?

If that doesn't change anything, perhaps you can push your code
somewhere so that I (or someone else) can take a look.


Le 10/02/2021 à 18:39, Ying Zhou a écrit :
> Not really. So what’s really going on?!
> 
> TEST(TestAdapterWriteNested, writeList) {
>   std::shared_ptr<Schema> table_schema = schema({field("list", 
> list(int32()))});
>   int64_t num_rows = 10000;
>   arrow::random::RandomArrayGenerator rand(kRandomSeed);
>   auto value_array = rand.ArrayOf(int32(), 5 * num_rows, 0.6);
>   std::shared_ptr<Array> array = rand.List(*value_array, num_rows + 1, 0.8);
>   std::shared_ptr<ChunkedArray> chunked_array = 
> std::make_shared<ChunkedArray>(array);
>   std::shared_ptr<Table> table = Table::Make(table_schema, {chunked_array});
> 
>   std::shared_ptr<io::BufferOutputStream> buffer_output_stream =
>       io::BufferOutputStream::Create(kDefaultSmallMemStreamSize * 
> 15).ValueOrDie();
>   std::unique_ptr<adapters::orc::ORCFileWriter> writer =
>       adapters::orc::ORCFileWriter::Open(*buffer_output_stream).ValueOrDie();
>   ARROW_EXPECT_OK(writer->Write(*table));
>   ARROW_EXPECT_OK(writer->Close());
>   std::shared_ptr<Buffer> buffer = 
> buffer_output_stream->Finish().ValueOrDie();
>   std::shared_ptr<io::RandomAccessFile> in_stream(new 
> io::BufferReader(buffer));
>   std::unique_ptr<adapters::orc::ORCFileReader> reader;
>   ARROW_EXPECT_OK(
>       adapters::orc::ORCFileReader::Open(in_stream, default_memory_pool(), 
> &reader));
>   std::shared_ptr<Table> actual_output_table;
>   ARROW_EXPECT_OK(reader->Read(&actual_output_table));
>   auto actual_array =
>       
> std::static_pointer_cast<ListArray>(actual_output_table->column(0)->chunk(0));
>   auto expected_array = 
> std::static_pointer_cast<ListArray>(table->column(0)->chunk(0));
>   AssertArraysEqual(*(actual_array->offsets()), *(expected_array->offsets()));
>   AssertArraysEqual(*(actual_array->values()), *(expected_array->values()));
>   AssertBufferEqual(*(actual_array->null_bitmap()), 
> *(expected_array->null_bitmap()));
>   ASSERT_TRUE(actual_array->type()->Equals(*(expected_array->type())));
>   RecordProperty("output_type", actual_array->type()->ToString());
>   RecordProperty("input_type", expected_array->type()->ToString());
>   RecordProperty("array_equality", actual_array->Equals(*expected_array));
> }
> 
>     <testcase name="writeList" status="run" result="completed" time="0.029" 
> timestamp="2021-02-10T12:33:47" classname="TestAdapterWriteNested">
> <properties>
> <property name="output_type" value="list&lt;item: int32&gt;"/>
> <property name="input_type" value="list&lt;item: int32&gt;"/>
> <property name="array_equality" value="0"/>
> </properties>
>     </testcase>
>> On Feb 10, 2021, at 12:10 PM, Antoine Pitrou <anto...@python.org> wrote:
>>
>>
>> Hmm, perhaps the types are unequal, then.  Can you print them out
>> (including field metadata)?
>>
>>
>> Le 10/02/2021 à 18:03, Ying Zhou a écrit :
>>> Thanks! Now we have an even weirder phenomenon. Even the null bitmaps and 
>>> offsets are equal. However the arrays aren’t! Does anyone know why?
>>>
>>> TEST(TestAdapterWriteNested, writeList) {
>>>  std::shared_ptr<Schema> table_schema = schema({field("list", 
>>> list(int32()))});
>>>  int64_t num_rows = 10000;
>>>  arrow::random::RandomArrayGenerator rand(kRandomSeed);
>>>  auto value_array = rand.ArrayOf(int32(), 5 * num_rows, 0.6);
>>>  std::shared_ptr<Array> array = rand.List(*value_array, num_rows + 1, 0.8);
>>>  std::shared_ptr<ChunkedArray> chunked_array = 
>>> std::make_shared<ChunkedArray>(array);
>>>  std::shared_ptr<Table> table = Table::Make(table_schema, {chunked_array});
>>>
>>>  std::shared_ptr<io::BufferOutputStream> buffer_output_stream =
>>>      io::BufferOutputStream::Create(kDefaultSmallMemStreamSize * 
>>> 15).ValueOrDie();
>>>  std::unique_ptr<adapters::orc::ORCFileWriter> writer =
>>>      adapters::orc::ORCFileWriter::Open(*buffer_output_stream).ValueOrDie();
>>>  ARROW_EXPECT_OK(writer->Write(*table));
>>>  ARROW_EXPECT_OK(writer->Close());
>>>  std::shared_ptr<Buffer> buffer = 
>>> buffer_output_stream->Finish().ValueOrDie();
>>>  std::shared_ptr<io::RandomAccessFile> in_stream(new 
>>> io::BufferReader(buffer));
>>>  std::unique_ptr<adapters::orc::ORCFileReader> reader;
>>>  ARROW_EXPECT_OK(
>>>      adapters::orc::ORCFileReader::Open(in_stream, default_memory_pool(), 
>>> &reader));
>>>  std::shared_ptr<Table> actual_output_table;
>>>  ARROW_EXPECT_OK(reader->Read(&actual_output_table));
>>>  auto actual_array =
>>>      
>>> std::static_pointer_cast<ListArray>(actual_output_table->column(0)->chunk(0));
>>>  auto expected_array = 
>>> std::static_pointer_cast<ListArray>(table->column(0)->chunk(0));
>>>  AssertArraysEqual(*(actual_array->offsets()), 
>>> *(expected_array->offsets()));
>>>  AssertArraysEqual(*(actual_array->values()), *(expected_array->values()));
>>>  AssertBufferEqual(*(actual_array->null_bitmap()), 
>>> *(expected_array->null_bitmap()));
>>>  RecordProperty("array_equality", actual_array->Equals(*expected_array));
>>> }
>>>
>>>    <testcase name="writeList" status="run" result="completed" time="0.028" 
>>> timestamp="2021-02-10T11:58:23" classname="TestAdapterWriteNested">
>>> <properties>
>>> <property name="array_equality" value="0"/>
>>> </properties>
>>>    </testcase>
>>>
>>>> On Feb 10, 2021, at 3:52 AM, Antoine Pitrou <anto...@python.org> wrote:
>>>>
>>>>
>>>> Hi Ying,
>>>>
>>>> Hmm, yes, this may be related to the null bitmaps, or the offsets.
>>>> Can you try to inspect or pretty-print the offsets arrays for the two
>>>> list arrays?
>>>>
>>>> Regards
>>>>
>>>> Antoine.
>>>>
>>>>
>>>> Le 10/02/2021 à 03:26, Ying Zhou a écrit :
>>>>> Hi,
>>>>>
>>>>> This is an extremely weird phenomenon. There are two 2*1 tables that are 
>>>>> supposedly different when I got a confusing error message like this:
>>>>>
>>>>> [ RUN      ] TestAdapterWriteNested.writeList
>>>>> /Users/karlkatzen/Documents/code/arrow-dev/arrow/cpp/src/arrow/testing/gtest_util.cc:459:
>>>>>  Failure
>>>>> Failed
>>>>> Unequal at absolute position 2
>>>>> Expected:
>>>>> [
>>>>>   [
>>>>>     null,
>>>>>     1074834796,
>>>>>     null,
>>>>>     null
>>>>>   ],
>>>>>   null
>>>>> ]
>>>>> Actual:
>>>>> [
>>>>>   [
>>>>>     null,
>>>>>     1074834796,
>>>>>     null,
>>>>>     null
>>>>>   ],
>>>>>   null
>>>>> ]
>>>>> [  FAILED  ] TestAdapterWriteNested.writeList (2 ms)
>>>>>
>>>>> Here is the code that causes the issue:
>>>>>
>>>>> TEST(TestAdapterWriteNested, writeList) {
>>>>> std::shared_ptr<Schema> table_schema = schema({field("list", 
>>>>> list(int32()))});
>>>>> int64_t num_rows = 2;
>>>>> arrow::random::RandomArrayGenerator rand(kRandomSeed);
>>>>> auto value_array = rand.ArrayOf(int32(), 2 * num_rows, 0.6);
>>>>> std::shared_ptr<Array> array = rand.List(*value_array, num_rows + 1, 1);
>>>>> std::shared_ptr<ChunkedArray> chunked_array = 
>>>>> std::make_shared<ChunkedArray>(array);
>>>>> std::shared_ptr<Table> table = Table::Make(table_schema, {chunked_array});
>>>>> AssertTableWriteReadEqual(table, table, kDefaultSmallMemStreamSize * 5);
>>>>> }
>>>>>
>>>>> Here AssertTableWriteReadEqual is a function I use to test that 
>>>>> from_orc(to_orc(table_in)) == expected_table_out. The function did not 
>>>>> have issues before.
>>>>>
>>>>> void AssertTableWriteReadEqual(const std::shared_ptr<Table>& input_table,
>>>>>                              const std::shared_ptr<Table>& 
>>>>> expected_output_table,
>>>>>                              const int64_t max_size = 
>>>>> kDefaultSmallMemStreamSize) {
>>>>> std::shared_ptr<io::BufferOutputStream> buffer_output_stream =
>>>>>     io::BufferOutputStream::Create(max_size).ValueOrDie();
>>>>> std::unique_ptr<adapters::orc::ORCFileWriter> writer =
>>>>>     
>>>>> adapters::orc::ORCFileWriter::Open(*buffer_output_stream).ValueOrDie();
>>>>> ARROW_EXPECT_OK(writer->Write(*input_table));
>>>>> ARROW_EXPECT_OK(writer->Close());
>>>>> std::shared_ptr<Buffer> buffer = 
>>>>> buffer_output_stream->Finish().ValueOrDie();
>>>>> std::shared_ptr<io::RandomAccessFile> in_stream(new 
>>>>> io::BufferReader(buffer));
>>>>> std::unique_ptr<adapters::orc::ORCFileReader> reader;
>>>>> ARROW_EXPECT_OK(
>>>>>     adapters::orc::ORCFileReader::Open(in_stream, default_memory_pool(), 
>>>>> &reader));
>>>>> std::shared_ptr<Table> actual_output_table;
>>>>> ARROW_EXPECT_OK(reader->Read(&actual_output_table));
>>>>> AssertTablesEqual(*actual_output_table, *expected_output_table, false, 
>>>>> false);
>>>>> }
>>>>>
>>>>> I strongly suspect that this is related to the null bitmaps. What do you 
>>>>> guys think?
>>>>>
>>>>> Ying
>>>>>
>>>
>>>
> 
> 

Reply via email to