[
https://issues.apache.org/jira/browse/HADOOP-19802?page=com.atlassian.jira.plugin.system.issuetabpanels:comment-tabpanel&focusedCommentId=18060264#comment-18060264
]
ASF GitHub Bot commented on HADOOP-19802:
-----------------------------------------
bhattmanish98 commented on code in PR #8229:
URL: https://github.com/apache/hadoop/pull/8229#discussion_r2839536563
##########
hadoop-tools/hadoop-azure/src/main/java/org/apache/hadoop/fs/azurebfs/constants/AbfsHttpConstants.java:
##########
@@ -250,6 +250,22 @@ public static ApiVersion getCurrentVersion() {
public static final String XML_TAG_COMMITTED_BLOCKS = "CommittedBlocks";
public static final String XML_TAG_BLOCK_NAME = "Block";
public static final String PUT_BLOCK_LIST = "PutBlockList";
+ // ===== List Containers (Blob Endpoint) XML Tags =====
+ public static final String XML_TAG_CONTAINERS = "Containers";
Review Comment:
This is not getting used anywhere, we can remove this if not in use
##########
hadoop-tools/hadoop-azure/src/main/java/org/apache/hadoop/fs/azurebfs/services/AbfsBlobClient.java:
##########
@@ -2359,4 +2362,110 @@ private void findParentPathsForMarkerCreation(Path
path, TracingContext tracingC
}
} while (current != null && !current.isRoot());
}
+
+ /**
+ * Lists containers in the storage account using the Blob service endpoint.
+ *
+ * @param prefix optional prefix to filter container names
+ * @param continuation optional continuation token for paginated results
+ * @param tracingContext tracing context for the REST call
+ * @return response containing listed containers and continuation token
+ * @throws IOException if the operation fails or the response cannot be
parsed
+ */
+ public ContainerListResponseData listContainers(
+ final String prefix,
+ final String continuation,
+ final TracingContext tracingContext) throws IOException {
+ final List<AbfsHttpHeader> requestHeaders = createDefaultHeaders();
+ final AbfsUriQueryBuilder queryBuilder = createDefaultUriQueryBuilder();
+ queryBuilder.addQuery(QUERY_PARAM_COMP, LIST);
+ if (prefix != null && !prefix.isEmpty()) {
Review Comment:
We can use StringUtils.isEmpty(): this function does both null and empty
string check.
##########
hadoop-tools/hadoop-azure/src/main/java/org/apache/hadoop/fs/azurebfs/contracts/services/ContainerListXmlParser.java:
##########
@@ -0,0 +1,203 @@
+/**
+ * Licensed to the Apache Software Foundation (ASF) under one
+ * or more contributor license agreements. See the NOTICE file
+ * distributed with this work for additional information
+ * regarding copyright ownership. The ASF licenses this file
+ * to you under the Apache License, Version 2.0 (the
+ * "License"); you may not use this file except in compliance
+ * with the License. You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+package org.apache.hadoop.fs.azurebfs.contracts.services;
+
+import java.util.Stack;
+
+import org.xml.sax.Attributes;
+import org.xml.sax.SAXException;
+import org.xml.sax.helpers.DefaultHandler;
+
+import org.apache.hadoop.fs.azurebfs.constants.AbfsHttpConstants;
+import org.apache.hadoop.fs.azurebfs.utils.DateTimeUtils;
+
+import static
org.apache.hadoop.fs.azurebfs.constants.AbfsHttpConstants.EMPTY_STRING;
+
+/**
+ * SAX parser for Azure Blob Storage "List Containers" REST API response.
+ *
+ * <p>
+ * Parses the XML response returned by:
+ * https://learn.microsoft.com/en-us/rest/api/storageservices/list-containers2
+ * </p>
+ *
+ * <p>
+ * This parser is streaming (SAX-based) to avoid loading the full XML into
memory.
+ * </p>
+ */
+public class ContainerListXmlParser extends DefaultHandler {
+
+ /** Parsed response object */
+ private final ContainerListResponseData responseData;
+
+ /** Stack of active XML elements */
+ private final Stack<String> elements = new Stack<>();
+
+ /** Buffer for character data */
+ private StringBuilder bld = new StringBuilder();
Review Comment:
Rename this variable.
Some suggestions: buffer, charBuffer
##########
hadoop-tools/hadoop-azure/src/test/java/org/apache/hadoop/fs/azurebfs/ITestAzureBlobFileSystemListStatus.java:
##########
@@ -889,4 +892,80 @@ public void testIsDirectoryWithDifferentCases() throws
Exception {
testIsDirectory(true, "HDI_ISFOLDER", "Hdi_ISFOLDER1", "Test");
}
+
+ @Test
+ public void testListAndDeleteContainers() throws Exception {
+ final AzureBlobFileSystem fs = getFileSystem();
+ final AbfsBlobClient blobClient =
+ fs.getAbfsStore().getClientHandler().getBlobClient();
+ final TracingContext tracingContext =
+ getTestTracingContext(fs, true);
+
+ // DFS- and Blob-compliant container (filesystem) names
+ String container1 = "abfs-test-listtest1";
+ String container2 = "abfs-test-listtest2";
+
+ AzureBlobFileSystem fs1 = null;
+ AzureBlobFileSystem fs2 = null;
+
+ try {
+ String account = fs.getAbfsStore()
+ .getAbfsConfiguration().getAccountName();
+
+ fs1 = (AzureBlobFileSystem) FileSystem.get(
+ new URI("abfs://" + container1 + "@" + account), fs.getConf());
+ fs2 = (AzureBlobFileSystem) FileSystem.get(
+ new URI("abfs://" + container2 + "@" + account), fs.getConf());
+
+ fs1.mkdirs(new Path("/dir1"));
+ fs1.create(new Path("/dir1/file1")).close();
+ fs2.mkdirs(new Path("/dir2"));
+ fs2.create(new Path("/dir2/file2")).close();
+
+ ContainerListResponseData response =
+ blobClient.listContainers("abfs-test-", null, tracingContext);
+
+ assertThat(response)
+ .describedAs("listContainers response should not be null")
+ .isNotNull();
+
+ assertThat(response.getContainers())
+ .describedAs("Container list should contain created test containers")
+ .extracting(ContainerListEntrySchema::getName)
+ .contains(container1, container2);
+
+ boolean deleted1 = deleteContainer(blobClient, container1,
tracingContext);
+ boolean deleted2 = deleteContainer(blobClient, container2,
tracingContext);
+
+ assertThat(deleted1)
+ .describedAs("First container should be deleted or already absent")
+ .isTrue();
+ assertThat(deleted2)
+ .describedAs("Second container should be deleted or already absent")
+ .isTrue();
+
+ } finally {
+ if (fs1 != null) {
+ fs1.close();
+ }
+ if (fs2 != null) {
+ fs2.close();
+ }
+ }
+ }
+
+
+ private boolean deleteContainer(
Review Comment:
Java doc missing
##########
hadoop-tools/hadoop-azure/src/main/java/org/apache/hadoop/fs/azurebfs/services/AbfsBlobClient.java:
##########
@@ -2359,4 +2362,110 @@ private void findParentPathsForMarkerCreation(Path
path, TracingContext tracingC
}
} while (current != null && !current.isRoot());
}
+
+ /**
+ * Lists containers in the storage account using the Blob service endpoint.
+ *
+ * @param prefix optional prefix to filter container names
+ * @param continuation optional continuation token for paginated results
+ * @param tracingContext tracing context for the REST call
+ * @return response containing listed containers and continuation token
+ * @throws IOException if the operation fails or the response cannot be
parsed
+ */
+ public ContainerListResponseData listContainers(
+ final String prefix,
+ final String continuation,
+ final TracingContext tracingContext) throws IOException {
+ final List<AbfsHttpHeader> requestHeaders = createDefaultHeaders();
+ final AbfsUriQueryBuilder queryBuilder = createDefaultUriQueryBuilder();
+ queryBuilder.addQuery(QUERY_PARAM_COMP, LIST);
+ if (prefix != null && !prefix.isEmpty()) {
+ queryBuilder.addQuery(QUERY_PARAM_PREFIX, prefix);
+ }
+ queryBuilder.addQuery(HttpQueryParams.QUERY_PARAM_MARKER, continuation);
+ appendSASTokenToQuery(EMPTY_STRING,
SASTokenProvider.LIST_CONTAINERS_OPERATION, queryBuilder);
+ URL accountUrl = new URL(getBaseUrl().getProtocol(),
getBaseUrl().getHost(), ROOT_PATH);
+ final URL url = createRequestUrl(accountUrl, EMPTY_STRING,
queryBuilder.toString());
+ final AbfsRestOperation op = getAbfsRestOperation(
+ AbfsRestOperationType.ListContainers,
+ HTTP_METHOD_GET,
+ url,
+ requestHeaders);
+ op.execute(tracingContext);
+ return parseListContainersResponse(op.getResult());
+ }
+
+ /**
+ * Parses the List Containers response returned by the Blob service.
+ *
+ * @param result HTTP operation containing the list containers response
+ * @return parsed container listing with continuation token
+ * @throws AzureBlobFileSystemException if response parsing fails
+ */
+ private ContainerListResponseData parseListContainersResponse(
+ final AbfsHttpOperation result)
+ throws AzureBlobFileSystemException {
+ try (InputStream stream = result.getListResultStream()) {
+ try {
Review Comment:
Internal try block is redundant here. It is not required. We can simplify it
```
try (InputStream stream = result.getListResultStream()) {
final SAXParser saxParser = saxParserThreadLocal.get();
saxParser.reset();
final ContainerListResponseData responseData = new
ContainerListResponseData();
saxParser.parse(stream, new ContainerListXmlParser(responseData));
LOG.debug(
"ListContainers listed {} containers with {} as continuation token",
responseData.getContainers().size(),
responseData.getContinuationToken()
);
return responseData;
} catch (SAXException | IOException ex) {
throw new AbfsDriverException(ERR_BLOB_LIST_PARSING, ex);
} catch (AbfsDriverException ex) {
// Avoid double-wrapping
LOG.error("Unable to deserialize list containers response", ex);
throw ex;
} catch (Exception ex) {
LOG.error("Unable to get stream for list containers response", ex);
throw new AbfsDriverException(ERR_BLOB_LIST_PARSING, ex);
}
##########
hadoop-tools/hadoop-azure/src/test/java/org/apache/hadoop/fs/azurebfs/ITestAzureBlobFileSystemListStatus.java:
##########
@@ -889,4 +892,80 @@ public void testIsDirectoryWithDifferentCases() throws
Exception {
testIsDirectory(true, "HDI_ISFOLDER", "Hdi_ISFOLDER1", "Test");
}
+
+ @Test
+ public void testListAndDeleteContainers() throws Exception {
Review Comment:
Java doc missing
##########
hadoop-tools/hadoop-azure/src/main/java/org/apache/hadoop/fs/azurebfs/contracts/services/ContainerListEntrySchema.java:
##########
@@ -0,0 +1,286 @@
+/**
+ * Licensed to the Apache Software Foundation (ASF) under one
+ * or more contributor license agreements. See the NOTICE file
+ * distributed with this work for additional information
+ * regarding copyright ownership. The ASF licenses this file
+ * to you under the Apache License, Version 2.0 (the
+ * "License"); you may not use this file except in compliance
+ * with the License. You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+package org.apache.hadoop.fs.azurebfs.contracts.services;
+
+import java.util.HashMap;
+import java.util.Map;
+
+/**
+ * List result entry schema for the Azure Blob Storage List Containers API.
+ *
+ * Represents a single container and its associated properties returned by
+ * the Blob endpoint container listing operation.
+ */
+public class ContainerListEntrySchema implements ListResultEntrySchema {
+
+ private String name;
+ private String version;
+ private Boolean deleted = false;
+ private Long lastModified;
+ private String eTag;
+ private String leaseStatus;
+ private String leaseState;
+ private String leaseDuration;
+ private String publicAccess;
+ private Boolean hasImmutabilityPolicy;
+ private Boolean hasLegalHold;
+ private Long deletedTime;
+ private Integer remainingRetentionDays;
+
+ private final Map<String, String> metadata = new HashMap<>();
+
+ /**
+ * {@inheritDoc}
+ */
+ @Override
+ public String name() {
+ return name;
+ }
+
+ /**
+ * {@inheritDoc}
+ *
+ * Containers are treated as directories in ABFS semantics.
+ */
+ @Override
+ public Boolean isDirectory() {
+ return Boolean.TRUE;
+ }
+
+ /**
+ * {@inheritDoc}
+ */
+ @Override
+ public String eTag() {
+ return eTag;
+ }
+
+ /**
+ * {@inheritDoc}
+ *
+ * Returns the container last-modified time as a string, if available.
+ */
+ @Override
+ public String lastModified() {
+ return lastModified != null ? String.valueOf(lastModified) : null;
Review Comment:
Better way to write this is:
`lastModified != null ? lastModified.toString() : null;`
as it gives type clarity
##########
hadoop-tools/hadoop-azure/src/main/java/org/apache/hadoop/fs/azurebfs/services/AbfsBlobClient.java:
##########
@@ -2359,4 +2362,110 @@ private void findParentPathsForMarkerCreation(Path
path, TracingContext tracingC
}
} while (current != null && !current.isRoot());
}
+
+ /**
+ * Lists containers in the storage account using the Blob service endpoint.
+ *
+ * @param prefix optional prefix to filter container names
+ * @param continuation optional continuation token for paginated results
+ * @param tracingContext tracing context for the REST call
+ * @return response containing listed containers and continuation token
+ * @throws IOException if the operation fails or the response cannot be
parsed
+ */
+ public ContainerListResponseData listContainers(
+ final String prefix,
+ final String continuation,
+ final TracingContext tracingContext) throws IOException {
+ final List<AbfsHttpHeader> requestHeaders = createDefaultHeaders();
+ final AbfsUriQueryBuilder queryBuilder = createDefaultUriQueryBuilder();
+ queryBuilder.addQuery(QUERY_PARAM_COMP, LIST);
+ if (prefix != null && !prefix.isEmpty()) {
+ queryBuilder.addQuery(QUERY_PARAM_PREFIX, prefix);
+ }
+ queryBuilder.addQuery(HttpQueryParams.QUERY_PARAM_MARKER, continuation);
+ appendSASTokenToQuery(EMPTY_STRING,
SASTokenProvider.LIST_CONTAINERS_OPERATION, queryBuilder);
+ URL accountUrl = new URL(getBaseUrl().getProtocol(),
getBaseUrl().getHost(), ROOT_PATH);
+ final URL url = createRequestUrl(accountUrl, EMPTY_STRING,
queryBuilder.toString());
+ final AbfsRestOperation op = getAbfsRestOperation(
+ AbfsRestOperationType.ListContainers,
+ HTTP_METHOD_GET,
+ url,
+ requestHeaders);
+ op.execute(tracingContext);
+ return parseListContainersResponse(op.getResult());
+ }
+
+ /**
+ * Parses the List Containers response returned by the Blob service.
+ *
+ * @param result HTTP operation containing the list containers response
+ * @return parsed container listing with continuation token
+ * @throws AzureBlobFileSystemException if response parsing fails
+ */
+ private ContainerListResponseData parseListContainersResponse(
+ final AbfsHttpOperation result)
+ throws AzureBlobFileSystemException {
+ try (InputStream stream = result.getListResultStream()) {
+ try {
+ final SAXParser saxParser = saxParserThreadLocal.get();
+ saxParser.reset();
+ final ContainerListResponseData responseData =
+ new ContainerListResponseData();
+ saxParser.parse(stream, new ContainerListXmlParser(responseData));
+ LOG.debug("ListContainers listed {} containers with {} as continuation
token",
+ responseData.getContainers().size(),
+ responseData.getContinuationToken());
+ return responseData;
+ } catch (SAXException | IOException ex) {
+ throw new AbfsDriverException(ERR_BLOB_LIST_PARSING, ex);
+ }
+ } catch (AbfsDriverException ex) {
+ // Throw as it is to avoid multiple wrapping.
+ LOG.error("Unable to deserialize list containers response", ex);
+ throw ex;
+ } catch (Exception ex) {
+ LOG.error("Unable to get stream for list containers response", ex);
+ throw new AbfsDriverException(ERR_BLOB_LIST_PARSING, ex);
+ }
+ }
+
+ /**
+ * Deletes a container from the storage account using the Blob service
endpoint.
+ *
+ * @param container name of the container to delete (must be a single path
segment)
+ * @param tracingContext tracing context for the REST call
+ * @return REST operation representing the delete request
+ * @throws AzureBlobFileSystemException if the delete operation fails
+ */
+ public AbfsRestOperation deleteContainer(
+ final String container,
+ final TracingContext tracingContext)
+ throws AzureBlobFileSystemException, MalformedURLException {
+
+ if (container == null || container.isEmpty()) {
Review Comment:
Please use StringUtils.isEmpty() here
> ABFS: Remove SDK dependency in hadoop-azure
> -------------------------------------------
>
> Key: HADOOP-19802
> URL: https://issues.apache.org/jira/browse/HADOOP-19802
> Project: Hadoop Common
> Issue Type: Sub-task
> Affects Versions: 3.4.2
> Reporter: Anmol Asrani
> Assignee: Anmol Asrani
> Priority: Major
> Labels: pull-request-available
>
> Remove use of SDK from hadoop-azure
--
This message was sent by Atlassian Jira
(v8.20.10#820010)
---------------------------------------------------------------------
To unsubscribe, e-mail: [email protected]
For additional commands, e-mail: [email protected]