[
https://issues.apache.org/jira/browse/HADOOP-18679?page=com.atlassian.jira.plugin.system.issuetabpanels:comment-tabpanel&focusedCommentId=17841304#comment-17841304
]
ASF GitHub Bot commented on HADOOP-18679:
-----------------------------------------
steveloughran commented on code in PR #6726:
URL: https://github.com/apache/hadoop/pull/6726#discussion_r1581288128
##########
hadoop-common-project/hadoop-common/src/main/java/org/apache/hadoop/fs/DefaultBulkDeleteOperation.java:
##########
@@ -17,61 +17,86 @@
*/
package org.apache.hadoop.fs;
+import java.io.FileNotFoundException;
import java.io.IOException;
import java.util.ArrayList;
import java.util.Collection;
import java.util.List;
import java.util.Map;
+import org.slf4j.Logger;
+import org.slf4j.LoggerFactory;
+
import org.apache.hadoop.util.functional.Tuples;
import static java.util.Objects.requireNonNull;
import static org.apache.hadoop.fs.BulkDeleteUtils.validateBulkDeletePaths;
-import static org.apache.hadoop.util.Preconditions.checkArgument;
/**
* Default implementation of the {@link BulkDelete} interface.
*/
public class DefaultBulkDeleteOperation implements BulkDelete {
- private final int pageSize;
+ private static Logger LOG =
LoggerFactory.getLogger(DefaultBulkDeleteOperation.class);
+
+ /** Default page size for bulk delete. */
+ private static final int DEFAULT_PAGE_SIZE = 1;
+ /** Base path for the bulk delete operation. */
private final Path basePath;
+ /** Delegate File system make actual delete calls. */
private final FileSystem fs;
- public DefaultBulkDeleteOperation(int pageSize,
- Path basePath,
+ public DefaultBulkDeleteOperation(Path basePath,
FileSystem fs) {
- checkArgument(pageSize == 1, "Page size must be equal to 1");
- this.pageSize = pageSize;
this.basePath = requireNonNull(basePath);
this.fs = fs;
}
@Override
public int pageSize() {
- return pageSize;
+ return DEFAULT_PAGE_SIZE;
}
@Override
public Path basePath() {
return basePath;
}
+ /**
+ * {@inheritDoc}
+ */
@Override
public List<Map.Entry<Path, String>> bulkDelete(Collection<Path> paths)
throws IOException, IllegalArgumentException {
- validateBulkDeletePaths(paths, pageSize, basePath);
+ validateBulkDeletePaths(paths, DEFAULT_PAGE_SIZE, basePath);
List<Map.Entry<Path, String>> result = new ArrayList<>();
- // this for loop doesn't make sense as pageSize must be 1.
- for (Path path : paths) {
+ if (!paths.isEmpty()) {
+ // As the page size is always 1, this should be the only one
+ // path in the collection.
+ Path pathToDelete = paths.iterator().next();
try {
- fs.delete(path, false);
- // What to do if this return false?
- // I think we should add the path to the result list with
value "Not Deleted".
- } catch (IOException e) {
- result.add(Tuples.pair(path, e.toString()));
+ boolean deleted = fs.delete(pathToDelete, false);
+ if (deleted) {
+ return result;
+ } else {
+ try {
+ FileStatus fileStatus = fs.getFileStatus(pathToDelete);
+ if (fileStatus.isDirectory()) {
+ result.add(Tuples.pair(pathToDelete, "Path is a
directory"));
+ }
+ } catch (FileNotFoundException e) {
+ // Ignore FNFE and don't add to the result list.
+ LOG.debug("Couldn't delete {} - does not exist: {}",
pathToDelete, e.toString());
+ } catch (Exception e) {
+ LOG.debug("Couldn't delete {} - exception occurred:
{}", pathToDelete, e.toString());
+ result.add(Tuples.pair(pathToDelete, e.toString()));
+ }
+ }
+ } catch (Exception ex) {
Review Comment:
make this an IOException
##########
hadoop-common-project/hadoop-common/src/main/java/org/apache/hadoop/fs/DefaultBulkDeleteOperation.java:
##########
@@ -17,61 +17,86 @@
*/
package org.apache.hadoop.fs;
+import java.io.FileNotFoundException;
import java.io.IOException;
import java.util.ArrayList;
import java.util.Collection;
import java.util.List;
import java.util.Map;
+import org.slf4j.Logger;
+import org.slf4j.LoggerFactory;
+
import org.apache.hadoop.util.functional.Tuples;
import static java.util.Objects.requireNonNull;
import static org.apache.hadoop.fs.BulkDeleteUtils.validateBulkDeletePaths;
-import static org.apache.hadoop.util.Preconditions.checkArgument;
/**
* Default implementation of the {@link BulkDelete} interface.
*/
public class DefaultBulkDeleteOperation implements BulkDelete {
- private final int pageSize;
+ private static Logger LOG =
LoggerFactory.getLogger(DefaultBulkDeleteOperation.class);
+
+ /** Default page size for bulk delete. */
+ private static final int DEFAULT_PAGE_SIZE = 1;
+ /** Base path for the bulk delete operation. */
private final Path basePath;
+ /** Delegate File system make actual delete calls. */
private final FileSystem fs;
- public DefaultBulkDeleteOperation(int pageSize,
- Path basePath,
+ public DefaultBulkDeleteOperation(Path basePath,
FileSystem fs) {
- checkArgument(pageSize == 1, "Page size must be equal to 1");
- this.pageSize = pageSize;
this.basePath = requireNonNull(basePath);
this.fs = fs;
}
@Override
public int pageSize() {
- return pageSize;
+ return DEFAULT_PAGE_SIZE;
}
@Override
public Path basePath() {
return basePath;
}
+ /**
+ * {@inheritDoc}
+ */
@Override
public List<Map.Entry<Path, String>> bulkDelete(Collection<Path> paths)
throws IOException, IllegalArgumentException {
- validateBulkDeletePaths(paths, pageSize, basePath);
+ validateBulkDeletePaths(paths, DEFAULT_PAGE_SIZE, basePath);
List<Map.Entry<Path, String>> result = new ArrayList<>();
- // this for loop doesn't make sense as pageSize must be 1.
- for (Path path : paths) {
+ if (!paths.isEmpty()) {
+ // As the page size is always 1, this should be the only one
+ // path in the collection.
+ Path pathToDelete = paths.iterator().next();
try {
- fs.delete(path, false);
- // What to do if this return false?
- // I think we should add the path to the result list with
value "Not Deleted".
- } catch (IOException e) {
- result.add(Tuples.pair(path, e.toString()));
+ boolean deleted = fs.delete(pathToDelete, false);
+ if (deleted) {
+ return result;
+ } else {
+ try {
+ FileStatus fileStatus = fs.getFileStatus(pathToDelete);
+ if (fileStatus.isDirectory()) {
+ result.add(Tuples.pair(pathToDelete, "Path is a
directory"));
+ }
+ } catch (FileNotFoundException e) {
+ // Ignore FNFE and don't add to the result list.
+ LOG.debug("Couldn't delete {} - does not exist: {}",
pathToDelete, e.toString());
+ } catch (Exception e) {
Review Comment:
make IOE
##########
hadoop-common-project/hadoop-common/src/site/markdown/filesystem/bulkdelete.md:
##########
@@ -94,8 +94,10 @@ No other restrictions are placed upon the outcome.
### Availability
The `BulkDeleteSource` interface is exported by `FileSystem` and `FileContext`
storage clients
-which MAY support the API; it may still be unsupported by the
-specific instance.
+which is available for all FS via
`org.apache.hadoop.fs.DefalutBulkDeleteSource`
+Some FS MAY still decide to not support the API by overwriting the
`createBulkDelete()` method
+with an UnsupportedOperationException. While doing so they must also declare
the path
Review Comment:
use MUST in capitals and recommend that implementations do not do this.
##########
hadoop-tools/hadoop-aws/src/test/java/org/apache/hadoop/fs/s3a/auth/ITestAssumeRole.java:
##########
@@ -768,6 +769,11 @@ private void
executeBulkDeleteOnSomeReadOnlyFiles(Configuration assumedRoleConfi
bindReadOnlyRolePolicy(assumedRoleConfig, readOnlyDir);
roleFS = (S3AFileSystem) destDir.getFileSystem(assumedRoleConfig);
S3AFileSystem fs = getFileSystem();
+ if (WrappedIO.bulkDeletePageSize(fs, destDir) == 1) {
+ String msg = "Skipping as this test requires more than one paths to be
deleted in bulk";
Review Comment:
nit "path"
##########
hadoop-common-project/hadoop-common/src/site/markdown/filesystem/bulkdelete.md:
##########
@@ -94,8 +94,10 @@ No other restrictions are placed upon the outcome.
### Availability
The `BulkDeleteSource` interface is exported by `FileSystem` and `FileContext`
storage clients
-which MAY support the API; it may still be unsupported by the
-specific instance.
+which is available for all FS via
`org.apache.hadoop.fs.DefalutBulkDeleteSource`
+Some FS MAY still decide to not support the API by overwriting the
`createBulkDelete()` method
+with an UnsupportedOperationException. While doing so they must also declare
the path
+capability `fs.capability.bulk.delete` as false.
Review Comment:
we're going to have to modify that iceberg PoC to check this aren't we? then
fail
##########
hadoop-tools/hadoop-aws/src/test/java/org/apache/hadoop/fs/contract/s3a/ITestS3AContractBulkDelete.java:
##########
@@ -85,6 +88,9 @@ public ITestS3AContractBulkDelete(boolean
enableMultiObjectDelete) {
protected Configuration createConfiguration() {
Configuration conf = super.createConfiguration();
S3ATestUtils.disableFilesystemCaching(conf);
+ conf = propagateBucketOptions(conf, getTestBucketName(conf));
+ skipIfNotEnabled(conf, Constants.ENABLE_MULTI_DELETE,
Review Comment:
this should actually be guarded with 'if (enableMultiObjectDelete)' so at
least the single entry delete ops are tested with GCS. This does matter as GCS
returns 404 on a DELETE nonexistent-path; we need to make sure that doesn't
trigger a failure
> Add API for bulk/paged object deletion
> --------------------------------------
>
> Key: HADOOP-18679
> URL: https://issues.apache.org/jira/browse/HADOOP-18679
> Project: Hadoop Common
> Issue Type: Sub-task
> Components: fs/s3
> Affects Versions: 3.3.5
> Reporter: Steve Loughran
> Priority: Major
> Labels: pull-request-available
>
> iceberg and hbase could benefit from being able to give a list of individual
> files to delete -files which may be scattered round the bucket for better
> read peformance.
> Add some new optional interface for an object store which allows a caller to
> submit a list of paths to files to delete, where
> the expectation is
> * if a path is a file: delete
> * if a path is a dir, outcome undefined
> For s3 that'd let us build these into DeleteRequest objects, and submit,
> without any probes first.
--
This message was sent by Atlassian Jira
(v8.20.10#820010)
---------------------------------------------------------------------
To unsubscribe, e-mail: [email protected]
For additional commands, e-mail: [email protected]