amit-jain commented on code in PR #1861: URL: https://github.com/apache/jackrabbit-oak/pull/1861#discussion_r1961902824
########## oak-blob-cloud-azure/src/main/java/org/apache/jackrabbit/oak/blob/cloud/azure/blobstorage/AzureBlobContainerProvider.java: ########## @@ -171,176 +153,92 @@ public String getContainerName() { return containerName; } + public String getAzureConnectionString() { + return azureConnectionString; + } + @NotNull - public CloudBlobContainer getBlobContainer() throws DataStoreException { - return this.getBlobContainer(null); + public BlobContainerClient getBlobContainer() throws DataStoreException { + return this.getBlobContainer(null, new Properties()); } @NotNull - public CloudBlobContainer getBlobContainer(@Nullable BlobRequestOptions blobRequestOptions) throws DataStoreException { + public BlobContainerClient getBlobContainer(@Nullable RequestRetryOptions retryOptions, Properties properties) throws DataStoreException { // connection string will be given preference over service principals / sas / account key if (StringUtils.isNotBlank(azureConnectionString)) { log.debug("connecting to azure blob storage via azureConnectionString"); - return Utils.getBlobContainer(azureConnectionString, containerName, blobRequestOptions); + return Utils.getBlobContainerFromConnectionString(getAzureConnectionString(), accountName); } else if (authenticateViaServicePrincipal()) { log.debug("connecting to azure blob storage via service principal credentials"); - return getBlobContainerFromServicePrincipals(blobRequestOptions); + return getBlobContainerFromServicePrincipals(accountName, retryOptions); } else if (StringUtils.isNotBlank(sasToken)) { log.debug("connecting to azure blob storage via sas token"); final String connectionStringWithSasToken = Utils.getConnectionStringForSas(sasToken, blobEndpoint, accountName); - return Utils.getBlobContainer(connectionStringWithSasToken, containerName, blobRequestOptions); + return Utils.getBlobContainer(connectionStringWithSasToken, containerName, retryOptions, properties); } log.debug("connecting to azure blob storage via access key"); final String connectionStringWithAccountKey = Utils.getConnectionString(accountName, accountKey, blobEndpoint); - return Utils.getBlobContainer(connectionStringWithAccountKey, containerName, blobRequestOptions); - } - - @NotNull - private CloudBlobContainer getBlobContainerFromServicePrincipals(@Nullable BlobRequestOptions blobRequestOptions) throws DataStoreException { - StorageCredentialsToken storageCredentialsToken = getStorageCredentials(); - try { - CloudStorageAccount cloud = new CloudStorageAccount(storageCredentialsToken, true, DEFAULT_ENDPOINT_SUFFIX, accountName); - CloudBlobClient cloudBlobClient = cloud.createCloudBlobClient(); - if (blobRequestOptions != null) { - cloudBlobClient.setDefaultRequestOptions(blobRequestOptions); - } - return cloudBlobClient.getContainerReference(containerName); - } catch (URISyntaxException | StorageException e) { - throw new DataStoreException(e); - } - } - - @NotNull - private StorageCredentialsToken getStorageCredentials() { - boolean isAccessTokenGenerated = false; - /* generate access token, the same token will be used for subsequent access - * generated token is valid for 1 hour only and will be refreshed in background - * */ - if (accessToken == null) { - clientSecretCredential = new ClientSecretCredentialBuilder() - .clientId(clientId) - .clientSecret(clientSecret) - .tenantId(tenantId) - .build(); - accessToken = clientSecretCredential.getTokenSync(new TokenRequestContext().addScopes(AZURE_DEFAULT_SCOPE)); - if (accessToken == null || StringUtils.isBlank(accessToken.getToken())) { - log.error("Access token is null or empty"); - throw new IllegalArgumentException("Could not connect to azure storage, access token is null or empty"); - } - storageCredentialsToken = new StorageCredentialsToken(accountName, accessToken.getToken()); - isAccessTokenGenerated = true; - } - - Objects.requireNonNull(storageCredentialsToken, "storage credentials token cannot be null"); - - // start refresh token executor only when the access token is first generated - if (isAccessTokenGenerated) { - log.info("starting refresh token task at: {}", OffsetDateTime.now()); - TokenRefresher tokenRefresher = new TokenRefresher(); - executorService.scheduleWithFixedDelay(tokenRefresher, TOKEN_REFRESHER_INITIAL_DELAY, TOKEN_REFRESHER_DELAY, TimeUnit.MINUTES); - } - return storageCredentialsToken; + return Utils.getBlobContainer(connectionStringWithAccountKey, containerName, retryOptions, properties); } @NotNull - public String generateSharedAccessSignature(BlobRequestOptions requestOptions, + public String generateSharedAccessSignature(RequestRetryOptions retryOptions, String key, - EnumSet<SharedAccessBlobPermissions> permissions, + BlobSasPermission blobSasPermissions, int expirySeconds, - SharedAccessBlobHeaders optionalHeaders) throws DataStoreException, URISyntaxException, StorageException, InvalidKeyException { - SharedAccessBlobPolicy policy = new SharedAccessBlobPolicy(); - Date expiry = Date.from(Instant.now().plusSeconds(expirySeconds)); - policy.setSharedAccessExpiryTime(expiry); - policy.setPermissions(permissions); + Properties properties) throws DataStoreException, URISyntaxException, InvalidKeyException { + + OffsetDateTime expiry = OffsetDateTime.now().plusSeconds(expirySeconds); + BlobServiceSasSignatureValues serviceSasSignatureValues = new BlobServiceSasSignatureValues(expiry, blobSasPermissions); - CloudBlockBlob blob = getBlobContainer(requestOptions).getBlockBlobReference(key); + BlockBlobClient blob = getBlobContainer(retryOptions, properties).getBlobClient(key).getBlockBlobClient(); if (authenticateViaServicePrincipal()) { - return generateUserDelegationKeySignedSas(blob, policy, optionalHeaders, expiry); + return generateUserDelegationKeySignedSas(blob, serviceSasSignatureValues, expiry); } - return generateSas(blob, policy, optionalHeaders); - } - - @NotNull - private String generateUserDelegationKeySignedSas(CloudBlockBlob blob, - SharedAccessBlobPolicy policy, - SharedAccessBlobHeaders optionalHeaders, - Date expiry) throws StorageException { - fillEmptyHeaders(optionalHeaders); - UserDelegationKey userDelegationKey = blob.getServiceClient().getUserDelegationKey(Date.from(Instant.now().minusSeconds(900)), - expiry); - return optionalHeaders == null ? blob.generateUserDelegationSharedAccessSignature(userDelegationKey, policy) : - blob.generateUserDelegationSharedAccessSignature(userDelegationKey, policy, optionalHeaders, null, null); - } - - /* set empty headers as blank string due to a bug in Azure SDK - * Azure SDK considers null headers as 'null' string which corrupts the string to sign and generates an invalid - * sas token - * */ - private void fillEmptyHeaders(SharedAccessBlobHeaders sharedAccessBlobHeaders) { - final String EMPTY_STRING = ""; - Optional.ofNullable(sharedAccessBlobHeaders) - .ifPresent(headers -> { - if (StringUtils.isBlank(headers.getCacheControl())) { - headers.setCacheControl(EMPTY_STRING); - } - if (StringUtils.isBlank(headers.getContentDisposition())) { - headers.setContentDisposition(EMPTY_STRING); - } - if (StringUtils.isBlank(headers.getContentEncoding())) { - headers.setContentEncoding(EMPTY_STRING); - } - if (StringUtils.isBlank(headers.getContentLanguage())) { - headers.setContentLanguage(EMPTY_STRING); - } - if (StringUtils.isBlank(headers.getContentType())) { - headers.setContentType(EMPTY_STRING); - } - }); + return generateSas(blob, serviceSasSignatureValues); } @NotNull - private String generateSas(CloudBlockBlob blob, - SharedAccessBlobPolicy policy, - SharedAccessBlobHeaders optionalHeaders) throws InvalidKeyException, StorageException { - return optionalHeaders == null ? blob.generateSharedAccessSignature(policy, null) : - blob.generateSharedAccessSignature(policy, - optionalHeaders, null, null, null, true); + public String generateUserDelegationKeySignedSas(BlockBlobClient blobClient, + BlobServiceSasSignatureValues serviceSasSignatureValues, + OffsetDateTime expiryTime) { + + BlobServiceClient blobServiceClient = new BlobServiceClientBuilder() + .endpoint(String.format(String.format("https://%s.%s", accountName, DEFAULT_ENDPOINT_SUFFIX))) + .credential(getClientSecretCredential()) + .buildClient(); + OffsetDateTime startTime = OffsetDateTime.now(ZoneOffset.UTC); + UserDelegationKey userDelegationKey = blobServiceClient.getUserDelegationKey(startTime, expiryTime); + return blobClient.generateUserDelegationSas(serviceSasSignatureValues, userDelegationKey); } private boolean authenticateViaServicePrincipal() { return StringUtils.isBlank(azureConnectionString) && StringUtils.isNoneBlank(accountName, tenantId, clientId, clientSecret); } - private class TokenRefresher implements Runnable { - @Override - public void run() { - try { - log.debug("Checking for azure access token expiry at: {}", LocalDateTime.now()); - OffsetDateTime tokenExpiryThreshold = OffsetDateTime.now().plusMinutes(5); - if (accessToken.getExpiresAt() != null && accessToken.getExpiresAt().isBefore(tokenExpiryThreshold)) { - log.info("Access token is about to expire (5 minutes or less) at: {}. New access token will be generated", - accessToken.getExpiresAt().format(DateTimeFormatter.ISO_LOCAL_DATE_TIME)); - AccessToken newToken = clientSecretCredential.getTokenSync(new TokenRequestContext().addScopes(AZURE_DEFAULT_SCOPE)); - log.info("New azure access token generated at: {}", LocalDateTime.now()); - if (newToken == null || StringUtils.isBlank(newToken.getToken())) { - log.error("New access token is null or empty"); - return; - } - // update access token with newly generated token - accessToken = newToken; - storageCredentialsToken.updateToken(accessToken.getToken()); - } - } catch (Exception e) { - log.error("Error while acquiring new access token: ", e); - } - } + private ClientSecretCredential getClientSecretCredential() { + return new ClientSecretCredentialBuilder() Review Comment: Does this not have any expiry issues? As I understand earlier the problem was that the access token retrieved expired in 60 minutes and that is why TokenRefresher functionality was added. ########## oak-jcr/src/test/java/org/apache/jackrabbit/oak/jcr/binary/fixtures/datastore/AzureDataStoreFixture.java: ########## @@ -94,7 +95,7 @@ public DataStore createDataStore() { String connectionString = Utils.getConnectionStringFromProperties(azProps); try { - CloudBlobContainer container = Utils.getBlobContainer(connectionString, containerName); + CloudBlobContainer container = UtilsV8.getBlobContainer(connectionString, containerName); Review Comment: Should we have a property to enable/disable v8/v12 so that both can be used. ########## oak-blob-cloud-azure/src/main/java/org/apache/jackrabbit/oak/blob/cloud/azure/blobstorage/AzureBlobStoreBackend.java: ########## @@ -1264,34 +1133,22 @@ private boolean loadItems() { try { currentThread().setContextClassLoader(getClass().getClassLoader()); - CloudBlobContainer container = azureBlobContainerProvider.getBlobContainer(); - if (!firstCall && (resultContinuation == null || !resultContinuation.hasContinuation())) { - LOG.trace("No more records in container. containerName={}", container); + if (!firstCall) { Review Comment: Ref https://learn.microsoft.com/en-us/java/api/com.azure.storage.blob.models?view=azure-java-stable ########## oak-blob-cloud-azure/src/main/java/org/apache/jackrabbit/oak/blob/cloud/azure/blobstorage/Constants.java: ########## @@ -0,0 +1,19 @@ +package org.apache.jackrabbit.oak.blob.cloud.azure.blobstorage; + +public interface Constants { + String META_DIR_NAME = "META"; + String META_KEY_PREFIX = META_DIR_NAME + "/"; + + String REF_KEY = "reference.key"; + String LAST_MODIFIED_KEY = "lastModified"; + + long BUFFERED_STREAM_THRESHOLD = 1024 * 1024; + long MIN_MULTIPART_UPLOAD_PART_SIZE = 1024 * 1024 * 10; // 10MB + long MAX_MULTIPART_UPLOAD_PART_SIZE = 1024 * 1024 * 100; // 100MB + long MAX_SINGLE_PUT_UPLOAD_SIZE = 1024 * 1024 * 256; // 256MB, Azure limit Review Comment: Not super important but are these the latest limits which are documented here? ########## oak-blob-cloud-azure/src/main/java/org/apache/jackrabbit/oak/blob/cloud/azure/blobstorage/AzureBlobStoreBackend.java: ########## @@ -1264,34 +1133,22 @@ private boolean loadItems() { try { currentThread().setContextClassLoader(getClass().getClassLoader()); - CloudBlobContainer container = azureBlobContainerProvider.getBlobContainer(); - if (!firstCall && (resultContinuation == null || !resultContinuation.hasContinuation())) { - LOG.trace("No more records in container. containerName={}", container); + if (!firstCall) { Review Comment: The pagination has been removed. In this case filling up a list in one go will potentially cause OOM. The result object is a PagedIterable and getting an iterator() ans delegating to it should be much more simpler. ########## oak-blob-cloud-azure/src/main/java/org/apache/jackrabbit/oak/blob/cloud/azure/blobstorage/AzureBlobStoreBackend.java: ########## @@ -1362,4 +1219,54 @@ private String getContainerName() { .map(AzureBlobContainerProvider::getContainerName) .orElse(null); } + + @Override + public byte[] getOrCreateReferenceKey() throws DataStoreException { + try { + if (secret != null && secret.length != 0) { + return secret; + } else { + byte[] key; + // Try reading from the metadata folder if it exists + key = readMetadataBytes(REF_KEY); + if (key == null) { + key = super.getOrCreateReferenceKey(); + addMetadataRecord(new ByteArrayInputStream(key), REF_KEY); + key = readMetadataBytes(REF_KEY); + } + secret = key; + return secret; + } + } catch (IOException e) { + throw new DataStoreException("Unable to get or create key " + e); + } + } + + protected byte[] readMetadataBytes(String name) throws IOException, DataStoreException { + DataRecord rec = getMetadataRecord(name); + byte[] key = null; + if (rec != null) { + InputStream stream = null; + try { + stream = rec.getStream(); + return IOUtils.toByteArray(stream); + } finally { + IOUtils.closeQuietly(stream); + } + } + return key; + } + + private String computeSecondaryLocationEndpoint() { Review Comment: Any documentation of why this is needed will be helpful ########## oak-blob-cloud-azure/src/test/java/org/apache/jackrabbit/oak/blob/cloud/azure/blobstorage/AzureBlobStoreBackendTest.java: ########## @@ -18,8 +18,10 @@ */ package org.apache.jackrabbit.oak.blob.cloud.azure.blobstorage; -import com.microsoft.azure.storage.StorageException; Review Comment: These tests should be moved to the v8 package as we still have the code living in the v8 package ########## oak-run-commons/src/main/java/org/apache/jackrabbit/oak/fixture/DataStoreUtils.java: ########## @@ -188,7 +189,7 @@ private static CloudBlobContainer getCloudBlobContainer(@NotNull Map<String, ?> return null; } - try (AzureBlobContainerProvider azureBlobContainerProvider = AzureBlobContainerProvider.Builder.builder(containerName) + try (AzureBlobContainerProviderV8 azureBlobContainerProvider = AzureBlobContainerProviderV8.Builder.builder(containerName) Review Comment: Would be better to have an option and support both ########## oak-run-commons/src/test/java/org/apache/jackrabbit/oak/fixture/DataStoreUtilsTest.java: ########## @@ -151,7 +152,7 @@ public void delete_container_service_principal() throws Exception { Assume.assumeNotNull(tenantId); CloudBlobContainer container; - try (AzureBlobContainerProvider azureBlobContainerProvider = AzureBlobContainerProvider.Builder.builder(CONTAINER_NAME) + try (AzureBlobContainerProviderV8 azureBlobContainerProvider = AzureBlobContainerProviderV8.Builder.builder(CONTAINER_NAME) Review Comment: same as above, support for v12 -- This is an automated message from the Apache Git Service. To respond to the message, please log on to GitHub and use the URL above to go to the specific comment. To unsubscribe, e-mail: oak-dev-unsubscr...@jackrabbit.apache.org For queries about this service, please contact Infrastructure at: us...@infra.apache.org