| | | 1 | | // Copyright (c) Microsoft Corporation. All rights reserved. |
| | | 2 | | // Licensed under the MIT License. |
| | | 3 | | |
| | | 4 | | using System; |
| | | 5 | | using System.Collections.Generic; |
| | | 6 | | using System.Diagnostics; |
| | | 7 | | using System.Globalization; |
| | | 8 | | using System.IO; |
| | | 9 | | using System.Threading; |
| | | 10 | | using System.Threading.Tasks; |
| | | 11 | | using Azure.Core.Pipeline; |
| | | 12 | | using Azure.Storage.Blobs.Models; |
| | | 13 | | using Azure.Storage.Blobs.Specialized; |
| | | 14 | | |
| | | 15 | | namespace Azure.Storage.Blobs |
| | | 16 | | { |
| | | 17 | | internal class PartitionedDownloader |
| | | 18 | | { |
| | | 19 | | /// <summary> |
| | | 20 | | /// The client used to download the blob. |
| | | 21 | | /// </summary> |
| | | 22 | | private readonly BlobBaseClient _client; |
| | | 23 | | |
| | | 24 | | /// <summary> |
| | | 25 | | /// The maximum number of simultaneous workers. |
| | | 26 | | /// </summary> |
| | | 27 | | private readonly int _maxWorkerCount; |
| | | 28 | | |
| | | 29 | | /// <summary> |
| | | 30 | | /// The size of the first range requested (which can be larger than the |
| | | 31 | | /// other ranges). |
| | | 32 | | /// </summary> |
| | | 33 | | private readonly long _initialRangeSize; |
| | | 34 | | |
| | | 35 | | /// <summary> |
| | | 36 | | /// The size of subsequent ranges. |
| | | 37 | | /// </summary> |
| | | 38 | | private readonly long _rangeSize; |
| | | 39 | | |
| | 140 | 40 | | public PartitionedDownloader( |
| | 140 | 41 | | BlobBaseClient client, |
| | 140 | 42 | | StorageTransferOptions transferOptions = default) |
| | | 43 | | { |
| | 140 | 44 | | _client = client; |
| | | 45 | | |
| | | 46 | | // Set _maxWorkerCount |
| | 140 | 47 | | if (transferOptions.MaximumConcurrency.HasValue |
| | 140 | 48 | | && transferOptions.MaximumConcurrency > 0) |
| | | 49 | | { |
| | 4 | 50 | | _maxWorkerCount = transferOptions.MaximumConcurrency.Value; |
| | | 51 | | } |
| | | 52 | | else |
| | | 53 | | { |
| | 136 | 54 | | _maxWorkerCount = Constants.Blob.Block.DefaultConcurrentTransfersCount; |
| | | 55 | | } |
| | | 56 | | |
| | | 57 | | // Set _rangeSize |
| | 140 | 58 | | if (transferOptions.MaximumTransferSize.HasValue |
| | 140 | 59 | | && transferOptions.MaximumTransferSize.Value > 0) |
| | | 60 | | { |
| | 52 | 61 | | _rangeSize = Math.Min(transferOptions.MaximumTransferSize.Value, Constants.Blob.Block.MaxDownloadBytes); |
| | | 62 | | } |
| | | 63 | | else |
| | | 64 | | { |
| | 88 | 65 | | _rangeSize = Constants.DefaultBufferSize; |
| | | 66 | | } |
| | | 67 | | |
| | | 68 | | // Set _initialRangeSize |
| | 140 | 69 | | if (transferOptions.InitialTransferSize.HasValue |
| | 140 | 70 | | && transferOptions.InitialTransferSize.Value > 0) |
| | | 71 | | { |
| | 12 | 72 | | _initialRangeSize = transferOptions.MaximumTransferSize.Value; |
| | | 73 | | } |
| | | 74 | | else |
| | | 75 | | { |
| | 128 | 76 | | _initialRangeSize = Constants.Blob.Block.DefaultInitalDownloadRangeSize; |
| | | 77 | | } |
| | 128 | 78 | | } |
| | | 79 | | |
| | | 80 | | public async Task<Response> DownloadToAsync( |
| | | 81 | | Stream destination, |
| | | 82 | | BlobRequestConditions conditions, |
| | | 83 | | CancellationToken cancellationToken) |
| | | 84 | | { |
| | | 85 | | // Wrap the download range calls in a Download span for distributed |
| | | 86 | | // tracing |
| | 106 | 87 | | DiagnosticScope scope = _client.ClientDiagnostics.CreateScope($"{nameof(BlobBaseClient)}.{nameof(BlobBaseCli |
| | | 88 | | try |
| | | 89 | | { |
| | 106 | 90 | | scope.Start(); |
| | | 91 | | |
| | | 92 | | // Just start downloading using an initial range. If it's a |
| | | 93 | | // small blob, we'll get the whole thing in one shot. If it's |
| | | 94 | | // a large blob, we'll get its full size in Content-Range and |
| | | 95 | | // can keep downloading it in segments. |
| | 106 | 96 | | var initialRange = new HttpRange(0, _initialRangeSize); |
| | 106 | 97 | | Task<Response<BlobDownloadInfo>> initialResponseTask = |
| | 106 | 98 | | _client.DownloadAsync( |
| | 106 | 99 | | initialRange, |
| | 106 | 100 | | conditions, |
| | 106 | 101 | | rangeGetContentHash: false, |
| | 106 | 102 | | cancellationToken); |
| | | 103 | | |
| | 106 | 104 | | Response<BlobDownloadInfo> initialResponse = null; |
| | | 105 | | try |
| | | 106 | | { |
| | 106 | 107 | | initialResponse = await initialResponseTask.ConfigureAwait(false); |
| | 102 | 108 | | } |
| | 2 | 109 | | catch (RequestFailedException ex) when (ex.ErrorCode == BlobErrorCode.InvalidRange) |
| | | 110 | | { |
| | 2 | 111 | | initialResponse = await _client.DownloadAsync( |
| | 2 | 112 | | range: default, |
| | 2 | 113 | | conditions, |
| | 2 | 114 | | false, |
| | 2 | 115 | | cancellationToken) |
| | 2 | 116 | | .ConfigureAwait(false); |
| | | 117 | | } |
| | | 118 | | |
| | | 119 | | // If the initial request returned no content (i.e., a 304), |
| | | 120 | | // we'll pass that back to the user immediately |
| | 104 | 121 | | if (initialResponse.IsUnavailable()) |
| | | 122 | | { |
| | 2 | 123 | | return initialResponse.GetRawResponse(); |
| | | 124 | | } |
| | | 125 | | |
| | | 126 | | // If the first segment was the entire blob, we'll copy that to |
| | | 127 | | // the output stream and finish now |
| | 102 | 128 | | long initialLength = initialResponse.Value.ContentLength; |
| | 102 | 129 | | long totalLength = ParseRangeTotalLength(initialResponse.Value.Details.ContentRange); |
| | 102 | 130 | | if (initialLength == totalLength) |
| | | 131 | | { |
| | 94 | 132 | | await CopyToAsync( |
| | 94 | 133 | | initialResponse, |
| | 94 | 134 | | destination, |
| | 94 | 135 | | cancellationToken) |
| | 94 | 136 | | .ConfigureAwait(false); |
| | 94 | 137 | | return initialResponse.GetRawResponse(); |
| | | 138 | | } |
| | | 139 | | |
| | | 140 | | // Capture the etag from the first segment and construct |
| | | 141 | | // conditions to ensure the blob doesn't change while we're |
| | | 142 | | // downloading the remaining segments |
| | 8 | 143 | | ETag etag = initialResponse.Value.Details.ETag; |
| | 8 | 144 | | BlobRequestConditions conditionsWithEtag = CreateConditionsWithEtag(conditions, etag); |
| | | 145 | | |
| | | 146 | | // Create a queue of tasks that will each download one segment |
| | | 147 | | // of the blob. The queue maintains the order of the segments |
| | | 148 | | // so we can keep appending to the end of the destination |
| | | 149 | | // stream when each segment finishes. |
| | 8 | 150 | | var runningTasks = new Queue<Task<Response<BlobDownloadInfo>>>(); |
| | 8 | 151 | | runningTasks.Enqueue(initialResponseTask); |
| | | 152 | | |
| | | 153 | | // Fill the queue with tasks to download each of the remaining |
| | | 154 | | // ranges in the blob |
| | 108 | 155 | | foreach (HttpRange httpRange in GetRanges(initialLength, totalLength)) |
| | | 156 | | { |
| | | 157 | | // Add the next Task (which will start the download but |
| | | 158 | | // return before it's completed downloading) |
| | 48 | 159 | | runningTasks.Enqueue(_client.DownloadAsync( |
| | 48 | 160 | | httpRange, |
| | 48 | 161 | | conditionsWithEtag, |
| | 48 | 162 | | rangeGetContentHash: false, |
| | 48 | 163 | | cancellationToken)); |
| | | 164 | | |
| | | 165 | | // If we have fewer tasks than alotted workers, then just |
| | | 166 | | // continue adding tasks until we have _maxWorkerCount |
| | | 167 | | // running in parallel |
| | 48 | 168 | | if (runningTasks.Count < _maxWorkerCount) |
| | | 169 | | { |
| | | 170 | | continue; |
| | | 171 | | } |
| | | 172 | | |
| | | 173 | | // Once all the workers are busy, wait for the first |
| | | 174 | | // segment to finish downloading before we create more work |
| | 36 | 175 | | await ConsumeQueuedTask().ConfigureAwait(false); |
| | | 176 | | } |
| | | 177 | | |
| | | 178 | | // Wait for all of the remaining segments to download |
| | 20 | 179 | | while (runningTasks.Count > 0) |
| | | 180 | | { |
| | 16 | 181 | | await ConsumeQueuedTask().ConfigureAwait(false); |
| | | 182 | | } |
| | | 183 | | |
| | 4 | 184 | | return initialResponse.GetRawResponse(); |
| | | 185 | | |
| | | 186 | | // Wait for the first segment in the queue of tasks to complete |
| | | 187 | | // downloading and copy it to the destination stream |
| | | 188 | | async Task ConsumeQueuedTask() |
| | | 189 | | { |
| | | 190 | | // Don't need to worry about 304s here because the ETag |
| | | 191 | | // condition will turn into a 412 and throw a proper |
| | | 192 | | // RequestFailedException |
| | 52 | 193 | | using BlobDownloadInfo result = |
| | 52 | 194 | | await runningTasks.Dequeue().ConfigureAwait(false); |
| | | 195 | | |
| | | 196 | | // Even though the BlobDownloadInfo is returned immediately, |
| | | 197 | | // CopyToAsync causes ConsumeQueuedTask to wait until the |
| | | 198 | | // download is complete |
| | 48 | 199 | | await CopyToAsync( |
| | 48 | 200 | | result, |
| | 48 | 201 | | destination, |
| | 48 | 202 | | cancellationToken) |
| | 48 | 203 | | .ConfigureAwait(false); |
| | 48 | 204 | | } |
| | | 205 | | } |
| | 6 | 206 | | catch (Exception ex) |
| | | 207 | | { |
| | 6 | 208 | | scope.Failed(ex); |
| | 6 | 209 | | throw; |
| | | 210 | | } |
| | | 211 | | finally |
| | | 212 | | { |
| | 106 | 213 | | scope.Dispose(); |
| | | 214 | | } |
| | 100 | 215 | | } |
| | | 216 | | |
| | | 217 | | public Response DownloadTo( |
| | | 218 | | Stream destination, |
| | | 219 | | BlobRequestConditions conditions, |
| | | 220 | | CancellationToken cancellationToken) |
| | | 221 | | { |
| | | 222 | | // Wrap the download range calls in a Download span for distributed |
| | | 223 | | // tracing |
| | 34 | 224 | | DiagnosticScope scope = _client.ClientDiagnostics.CreateScope($"{nameof(BlobBaseClient)}.{nameof(BlobBaseCli |
| | | 225 | | try |
| | | 226 | | { |
| | 34 | 227 | | scope.Start(); |
| | | 228 | | |
| | | 229 | | // Just start downloading using an initial range. If it's a |
| | | 230 | | // small blob, we'll get the whole thing in one shot. If it's |
| | | 231 | | // a large blob, we'll get its full size in Content-Range and |
| | | 232 | | // can keep downloading it in segments. |
| | 34 | 233 | | var initialRange = new HttpRange(0, _initialRangeSize); |
| | | 234 | | Response<BlobDownloadInfo> initialResponse; |
| | | 235 | | |
| | | 236 | | try |
| | | 237 | | { |
| | 34 | 238 | | initialResponse = _client.Download( |
| | 34 | 239 | | initialRange, |
| | 34 | 240 | | conditions, |
| | 34 | 241 | | rangeGetContentHash: false, |
| | 34 | 242 | | cancellationToken); |
| | 30 | 243 | | } |
| | 2 | 244 | | catch (RequestFailedException ex) when (ex.ErrorCode == BlobErrorCode.InvalidRange) |
| | | 245 | | { |
| | 2 | 246 | | initialResponse = _client.Download( |
| | 2 | 247 | | range: default, |
| | 2 | 248 | | conditions, |
| | 2 | 249 | | rangeGetContentHash: false, |
| | 2 | 250 | | cancellationToken); |
| | 2 | 251 | | } |
| | | 252 | | |
| | | 253 | | // If the initial request returned no content (i.e., a 304), |
| | | 254 | | // we'll pass that back to the user immediately |
| | 32 | 255 | | if (initialResponse.IsUnavailable()) |
| | | 256 | | { |
| | 2 | 257 | | return initialResponse.GetRawResponse(); |
| | | 258 | | } |
| | | 259 | | |
| | | 260 | | // Copy the first segment to the destination stream |
| | 30 | 261 | | CopyTo(initialResponse, destination, cancellationToken); |
| | | 262 | | |
| | | 263 | | // If the first segment was the entire blob, we're finished now |
| | 30 | 264 | | long initialLength = initialResponse.Value.ContentLength; |
| | 30 | 265 | | long totalLength = ParseRangeTotalLength(initialResponse.Value.Details.ContentRange); |
| | 30 | 266 | | if (initialLength == totalLength) |
| | | 267 | | { |
| | 26 | 268 | | return initialResponse.GetRawResponse(); |
| | | 269 | | } |
| | | 270 | | |
| | | 271 | | // Capture the etag from the first segment and construct |
| | | 272 | | // conditions to ensure the blob doesn't change while we're |
| | | 273 | | // downloading the remaining segments |
| | 4 | 274 | | ETag etag = initialResponse.Value.Details.ETag; |
| | 4 | 275 | | BlobRequestConditions conditionsWithEtag = CreateConditionsWithEtag(conditions, etag); |
| | | 276 | | |
| | | 277 | | // Download each of the remaining ranges in the blob |
| | 80 | 278 | | foreach (HttpRange httpRange in GetRanges(initialLength, totalLength)) |
| | | 279 | | { |
| | | 280 | | // Don't need to worry about 304s here because the ETag |
| | | 281 | | // condition will turn into a 412 and throw a proper |
| | | 282 | | // RequestFailedException |
| | 36 | 283 | | Response<BlobDownloadInfo> result = _client.Download( |
| | 36 | 284 | | httpRange, |
| | 36 | 285 | | conditionsWithEtag, |
| | 36 | 286 | | rangeGetContentHash: false, |
| | 36 | 287 | | cancellationToken); |
| | 36 | 288 | | CopyTo(result.Value, destination, cancellationToken); |
| | | 289 | | } |
| | | 290 | | |
| | 4 | 291 | | return initialResponse.GetRawResponse(); |
| | | 292 | | } |
| | 2 | 293 | | catch (Exception ex) |
| | | 294 | | { |
| | 2 | 295 | | scope.Failed(ex); |
| | 2 | 296 | | throw; |
| | | 297 | | } |
| | | 298 | | finally |
| | | 299 | | { |
| | 34 | 300 | | scope.Dispose(); |
| | 34 | 301 | | } |
| | 32 | 302 | | } |
| | | 303 | | |
| | | 304 | | private static long ParseRangeTotalLength(string range) |
| | | 305 | | { |
| | 132 | 306 | | if (range == null) |
| | | 307 | | { |
| | 4 | 308 | | return 0; |
| | | 309 | | } |
| | 128 | 310 | | int lengthSeparator = range.IndexOf("/", StringComparison.InvariantCultureIgnoreCase); |
| | 128 | 311 | | if (lengthSeparator == -1) |
| | | 312 | | { |
| | 0 | 313 | | throw BlobErrors.ParsingFullHttpRangeFailed(range); |
| | | 314 | | } |
| | 128 | 315 | | return long.Parse(range.Substring(lengthSeparator + 1), CultureInfo.InvariantCulture); |
| | | 316 | | } |
| | | 317 | | |
| | | 318 | | private static BlobRequestConditions CreateConditionsWithEtag(BlobRequestConditions conditions, ETag etag) => |
| | 12 | 319 | | new BlobRequestConditions |
| | 12 | 320 | | { |
| | 12 | 321 | | LeaseId = conditions?.LeaseId, |
| | 12 | 322 | | IfMatch = conditions?.IfMatch ?? etag, |
| | 12 | 323 | | IfNoneMatch = conditions?.IfNoneMatch, |
| | 12 | 324 | | IfModifiedSince = conditions?.IfModifiedSince, |
| | 12 | 325 | | IfUnmodifiedSince = conditions?.IfUnmodifiedSince |
| | 12 | 326 | | }; |
| | | 327 | | |
| | | 328 | | private static async Task CopyToAsync( |
| | | 329 | | BlobDownloadInfo result, |
| | | 330 | | Stream destination, |
| | | 331 | | CancellationToken cancellationToken) |
| | | 332 | | { |
| | 142 | 333 | | using Stream source = result.Content; |
| | | 334 | | |
| | 142 | 335 | | await source.CopyToAsync( |
| | 142 | 336 | | destination, |
| | 142 | 337 | | Constants.DefaultDownloadCopyBufferSize, |
| | 142 | 338 | | cancellationToken) |
| | 142 | 339 | | .ConfigureAwait(false); |
| | 142 | 340 | | } |
| | | 341 | | |
| | | 342 | | |
| | | 343 | | private static void CopyTo( |
| | | 344 | | BlobDownloadInfo result, |
| | | 345 | | Stream destination, |
| | | 346 | | CancellationToken cancellationToken) |
| | | 347 | | { |
| | 66 | 348 | | cancellationToken.ThrowIfCancellationRequested(); |
| | 66 | 349 | | result.Content.CopyTo(destination, Constants.DefaultDownloadCopyBufferSize); |
| | 66 | 350 | | result.Content.Dispose(); |
| | 66 | 351 | | } |
| | | 352 | | |
| | | 353 | | private IEnumerable<HttpRange> GetRanges(long initialLength, long totalLength) |
| | | 354 | | { |
| | 184 | 355 | | for (long offset = initialLength; offset < totalLength; offset += _rangeSize) |
| | | 356 | | { |
| | 84 | 357 | | yield return new HttpRange(offset, Math.Min(totalLength - offset, _rangeSize)); |
| | | 358 | | } |
| | 8 | 359 | | } |
| | | 360 | | } |
| | | 361 | | } |