From 60bf9762bc8423d9e26fdee3ce079ef742c80303 Mon Sep 17 00:00:00 2001 From: Nidhi Nandwani Date: Tue, 17 Feb 2026 17:58:47 +0000 Subject: [PATCH] feat: implement skipIfExists option for downloadBlobs in TransferManager --- .../ParallelDownloadConfig.java | 35 +++++++- .../transfermanager/TransferManagerImpl.java | 21 ++++- .../storage/it/ITTransferManagerTest.java | 63 +++++++++++++++ .../ParallelDownloadConfigTest.java | 80 +++++++++++++++++++ 4 files changed, 194 insertions(+), 5 deletions(-) create mode 100644 google-cloud-storage/src/test/java/com/google/cloud/storage/transfermanager/ParallelDownloadConfigTest.java diff --git a/google-cloud-storage/src/main/java/com/google/cloud/storage/transfermanager/ParallelDownloadConfig.java b/google-cloud-storage/src/main/java/com/google/cloud/storage/transfermanager/ParallelDownloadConfig.java index 0ad8bd91f8..9a4c571798 100644 --- a/google-cloud-storage/src/main/java/com/google/cloud/storage/transfermanager/ParallelDownloadConfig.java +++ b/google-cloud-storage/src/main/java/com/google/cloud/storage/transfermanager/ParallelDownloadConfig.java @@ -34,22 +34,35 @@ */ public final class ParallelDownloadConfig { + private final boolean skipIfExists; @NonNull private final String stripPrefix; @NonNull private final Path downloadDirectory; @NonNull private final String bucketName; @NonNull private final List optionsPerRequest; private ParallelDownloadConfig( + boolean skipIfExists, @NonNull String stripPrefix, @NonNull Path downloadDirectory, @NonNull String bucketName, @NonNull List optionsPerRequest) { + this.skipIfExists = skipIfExists; this.stripPrefix = stripPrefix; this.downloadDirectory = downloadDirectory; this.bucketName = bucketName; this.optionsPerRequest = optionsPerRequest; } + /** + * If set, Transfer Manager will skip downloading an object if it already exists on the local + * filesystem. + * + * @see Builder#setSkipIfExists(boolean) + */ + public boolean isSkipIfExists() { + return skipIfExists; + } + /** * A common prefix removed from an object's name before being written to the filesystem. * @@ -96,7 +109,8 @@ public boolean equals(Object o) { return false; } ParallelDownloadConfig that = (ParallelDownloadConfig) o; - return stripPrefix.equals(that.stripPrefix) + return skipIfExists == that.skipIfExists + && stripPrefix.equals(that.stripPrefix) && downloadDirectory.equals(that.downloadDirectory) && bucketName.equals(that.bucketName) && optionsPerRequest.equals(that.optionsPerRequest); @@ -104,12 +118,13 @@ public boolean equals(Object o) { @Override public int hashCode() { - return Objects.hash(stripPrefix, downloadDirectory, bucketName, optionsPerRequest); + return Objects.hash(skipIfExists, stripPrefix, downloadDirectory, bucketName, optionsPerRequest); } @Override public String toString() { return MoreObjects.toStringHelper(this) + .add("skipIfExists", skipIfExists) .add("stripPrefix", stripPrefix) .add("downloadDirectory", downloadDirectory) .add("bucketName", bucketName) @@ -128,18 +143,32 @@ public static Builder newBuilder() { public static final class Builder { + private boolean skipIfExists; @NonNull private String stripPrefix; @NonNull private Path downloadDirectory; @NonNull private String bucketName; @NonNull private List optionsPerRequest; private Builder() { + this.skipIfExists = false; this.stripPrefix = ""; this.downloadDirectory = Paths.get(""); this.bucketName = ""; this.optionsPerRequest = ImmutableList.of(); } + /** + * Sets the value for skipIfExists. When set to true, Transfer Manager will skip downloading an + * object if it already exists on the local filesystem. + * + * @return the builder instance with the value for skipIfExists modified. + * @see ParallelDownloadConfig#isSkipIfExists() + */ + public Builder setSkipIfExists(boolean skipIfExists) { + this.skipIfExists = skipIfExists; + return this; + } + /** * Sets the value for stripPrefix. This string will be removed from the beginning of all object * names before they are written to the filesystem. @@ -197,7 +226,7 @@ public ParallelDownloadConfig build() { checkNotNull(downloadDirectory); checkNotNull(optionsPerRequest); return new ParallelDownloadConfig( - stripPrefix, downloadDirectory, bucketName, optionsPerRequest); + skipIfExists, stripPrefix, downloadDirectory, bucketName, optionsPerRequest); } } } diff --git a/google-cloud-storage/src/main/java/com/google/cloud/storage/transfermanager/TransferManagerImpl.java b/google-cloud-storage/src/main/java/com/google/cloud/storage/transfermanager/TransferManagerImpl.java index d005441924..d9869aac8a 100644 --- a/google-cloud-storage/src/main/java/com/google/cloud/storage/transfermanager/TransferManagerImpl.java +++ b/google-cloud-storage/src/main/java/com/google/cloud/storage/transfermanager/TransferManagerImpl.java @@ -147,13 +147,30 @@ public void close() throws Exception { List> downloadTasks = new ArrayList<>(); if (!transferManagerConfig.isAllowDivideAndConquerDownload()) { for (BlobInfo blob : blobs) { - DirectDownloadCallable callable = new DirectDownloadCallable(storage, blob, config, opts); - downloadTasks.add(convert(executor.submit(callable))); + Path destPath = TransferManagerUtils.createDestPath(config, blob); + if (config.isSkipIfExists() && Files.exists(destPath)) { + downloadTasks.add( + ApiFutures.immediateFuture( + DownloadResult.newBuilder(blob, TransferStatus.SKIPPED) + .setOutputDestination(destPath) + .build())); + } else { + DirectDownloadCallable callable = new DirectDownloadCallable(storage, blob, config, opts); + downloadTasks.add(convert(executor.submit(callable))); + } } } else { for (BlobInfo blob : blobs) { BlobInfo validatedBlob = retrieveSizeAndGeneration(storage, blob, config.getBucketName()); Path destPath = TransferManagerUtils.createDestPath(config, blob); + if (config.isSkipIfExists() && Files.exists(destPath)) { + downloadTasks.add( + ApiFutures.immediateFuture( + DownloadResult.newBuilder(blob, TransferStatus.SKIPPED) + .setOutputDestination(destPath) + .build())); + continue; + } if (validatedBlob != null && qos.divideAndConquer(validatedBlob.getSize())) { DownloadResult optimisticResult = DownloadResult.newBuilder(validatedBlob, TransferStatus.SUCCESS) diff --git a/google-cloud-storage/src/test/java/com/google/cloud/storage/it/ITTransferManagerTest.java b/google-cloud-storage/src/test/java/com/google/cloud/storage/it/ITTransferManagerTest.java index dd7708b5f9..a84a48fed9 100644 --- a/google-cloud-storage/src/test/java/com/google/cloud/storage/it/ITTransferManagerTest.java +++ b/google-cloud-storage/src/test/java/com/google/cloud/storage/it/ITTransferManagerTest.java @@ -377,6 +377,69 @@ public void downloadBlobsAllowChunked() throws Exception { } } + @Test + public void downloadBlobsSkipIfExists() throws Exception { + TransferManagerConfig config = + TransferManagerConfigTestingInstances.defaults(storage.getOptions()); + try (TransferManager transferManager = config.getService()) { + String bucketName = bucket.getName(); + ParallelDownloadConfig parallelDownloadConfig = + ParallelDownloadConfig.newBuilder() + .setBucketName(bucketName) + .setDownloadDirectory(baseDir) + .setSkipIfExists(true) + .build(); + // First download to ensure files exist + DownloadJob job1 = transferManager.downloadBlobs(blobs, parallelDownloadConfig); + List results1 = job1.getDownloadResults(); + assertThat(results1.stream().allMatch(r -> r.getStatus() == TransferStatus.SUCCESS)).isTrue(); + + // Second download with skipIfExists=true + DownloadJob job2 = transferManager.downloadBlobs(blobs, parallelDownloadConfig); + List results2 = job2.getDownloadResults(); + try { + assertThat(results2).hasSize(3); + assertThat(results2.stream().allMatch(r -> r.getStatus() == TransferStatus.SKIPPED)) + .isTrue(); + } finally { + cleanUpFiles(results1); + } + } + } + + @Test + public void downloadBlobsSkipIfExistsChunked() throws Exception { + TransferManagerConfig config = + TransferManagerConfigTestingInstances.defaults(storage.getOptions()).toBuilder() + .setAllowDivideAndConquerDownload(true) + .setPerWorkerBufferSize(128 * 1024) + .build(); + try (TransferManager transferManager = config.getService()) { + String bucketName = bucket.getName(); + ParallelDownloadConfig parallelDownloadConfig = + ParallelDownloadConfig.newBuilder() + .setBucketName(bucketName) + .setDownloadDirectory(baseDir) + .setSkipIfExists(true) + .build(); + // First download to ensure files exist + DownloadJob job1 = transferManager.downloadBlobs(blobs, parallelDownloadConfig); + List results1 = job1.getDownloadResults(); + assertThat(results1.stream().allMatch(r -> r.getStatus() == TransferStatus.SUCCESS)).isTrue(); + + // Second download with skipIfExists=true + DownloadJob job2 = transferManager.downloadBlobs(blobs, parallelDownloadConfig); + List results2 = job2.getDownloadResults(); + try { + assertThat(results2).hasSize(3); + assertThat(results2.stream().allMatch(r -> r.getStatus() == TransferStatus.SKIPPED)) + .isTrue(); + } finally { + cleanUpFiles(results1); + } + } + } + @Test public void uploadFilesAllowPCU() throws Exception { TransferManagerConfig config = diff --git a/google-cloud-storage/src/test/java/com/google/cloud/storage/transfermanager/ParallelDownloadConfigTest.java b/google-cloud-storage/src/test/java/com/google/cloud/storage/transfermanager/ParallelDownloadConfigTest.java new file mode 100644 index 0000000000..0e4e99ee40 --- /dev/null +++ b/google-cloud-storage/src/test/java/com/google/cloud/storage/transfermanager/ParallelDownloadConfigTest.java @@ -0,0 +1,80 @@ +/* + * Copyright 2024 Google LLC + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +package com.google.cloud.storage.transfermanager; + +import static com.google.common.truth.Truth.assertThat; + +import com.google.cloud.storage.Storage.BlobSourceOption; +import com.google.common.collect.ImmutableList; +import java.nio.file.Paths; +import org.junit.Test; + +public final class ParallelDownloadConfigTest { + + @Test + public void testBuilder() { + ParallelDownloadConfig config = + ParallelDownloadConfig.newBuilder() + .setBucketName("bucket") + .setDownloadDirectory(Paths.get("dir")) + .setStripPrefix("prefix") + .setSkipIfExists(true) + .setOptionsPerRequest(ImmutableList.of(BlobSourceOption.generationMatch(1L))) + .build(); + + assertThat(config.getBucketName()).isEqualTo("bucket"); + assertThat(config.getDownloadDirectory()).isEqualTo(Paths.get("dir")); + assertThat(config.getStripPrefix()).isEqualTo("prefix"); + assertThat(config.isSkipIfExists()).isTrue(); + assertThat(config.getOptionsPerRequest()) + .containsExactly(BlobSourceOption.generationMatch(1L)); + } + + @Test + public void testDefaultValues() { + ParallelDownloadConfig config = ParallelDownloadConfig.newBuilder().setBucketName("bucket").build(); + + assertThat(config.isSkipIfExists()).isFalse(); + assertThat(config.getDownloadDirectory()).isEqualTo(Paths.get("")); + assertThat(config.getStripPrefix()).isEqualTo(""); + assertThat(config.getOptionsPerRequest()).isEmpty(); + } + + @Test + public void testEqualsAndHashCode() { + ParallelDownloadConfig config1 = + ParallelDownloadConfig.newBuilder() + .setBucketName("bucket") + .setSkipIfExists(true) + .build(); + ParallelDownloadConfig config2 = + ParallelDownloadConfig.newBuilder() + .setBucketName("bucket") + .setSkipIfExists(true) + .build(); + ParallelDownloadConfig config3 = + ParallelDownloadConfig.newBuilder() + .setBucketName("bucket") + .setSkipIfExists(false) + .build(); + + assertThat(config1).isEqualTo(config2); + assertThat(config1.hashCode()).isEqualTo(config2.hashCode()); + assertThat(config1).isNotEqualTo(config3); + assertThat(config1.hashCode()).isNotEqualTo(config3.hashCode()); + } +}