I want use ParquetDatasetFactory to create a dataset for s3, but failed! The
error message as follows
/build/apache-arrow-7.0.0/cpp/src/arrow/result.cc:28: ValueOrDie called on an
error: IOError: Path does not exist 'iceberg-test/warehouse/test/metadata'
/lib/x86_64-linux-gnu/libarrow.so.700(+0x10430bb)[0x7f4ee6fe50bb]
/lib/x86_64-linux-gnu/libarrow.so.700(_ZN5arrow4util8ArrowLogD1Ev+0xed)[0x7f4ee6fe52fd]
/lib/x86_64-linux-gnu/libarrow.so.700(_ZN5arrow8internal17InvalidValueOrDieERKNS_6StatusE+0x17e)[0x7f4ee7104a2e]
./example(+0xd97d)[0x564087f3e97d] ./example(+0x8bc2)[0x564087f39bc2]
./example(+0x94c8)[0x564087f3a4c8] ./example(+0x9fb4)[0x564087f3afb4]
/lib/x86_64-linux-gnu/libc.so.6(__libc_start_main+0xf3)[0x7f4ee572b0b3]
./example(+0x69fe)[0x564087f379fe] Aborted (core dumped)
In the follow code snippet??There is a line of comment code??use
FileSystemDatasetFactory to create dataset, It works well, Can't a dataset be
created through a ParquetDatasetFactory????
std::shared_ptr<ds::Dataset> GetDatasetFromS3(const std::string&
access_key,
const std::string& secret_key,
const std::string&
endpoint_override,
const std::string&
bucket_uri) {
EnsureS3Initialized();
S3Options s3Options = S3Options::FromAccessKey(access_key, secret_key);
s3Options.endpoint_override = endpoint_override;
s3Options.scheme = "http";
std::shared_ptr<S3FileSystem> s3fs =
S3FileSystem::Make(s3Options).ValueOrDie();
std::string path;
std::stringstream ss;
ss << "s3://" << access_key << ":" << secret_key
<< "@" << K_METADATA_PATH
<< "?scheme=http&endpoint_override=" << endpoint_override;
auto fs = arrow::fs::FileSystemFromUri(ss.str(), &path).ValueOrDie();
// auto fileInfo = fs->GetFileInfo().ValueOrDie();
auto format = std::make_shared<ParquetFileFormat>();
// FileSelector selector;
// selector.base_dir = bucket_uri;
// FileSystemFactoryOptions options;
ds::ParquetFactoryOptions options;
std::string metadata_path = bucket_uri;
ds::FileSource source(bucket_uri, s3fs);
//auto factory = ds::ParquetDatasetFactory::Make(source, bucket_uri, fs,
format, options).ValueOrDie();
auto factory = ds::ParquetDatasetFactory::Make(path, fs, format,
options).ValueOrDie();
//auto factory = FileSystemDatasetFactory::Make(s3fs, selector, format,
options).ValueOrDie();
return factory->Finish().ValueOrDie();
}