| | 1 | | // Copyright (c) Microsoft Corporation. All rights reserved. |
| | 2 | | // Licensed under the MIT License. |
| | 3 | |
|
| | 4 | | using System; |
| | 5 | | using System.Collections.Generic; |
| | 6 | | using System.Diagnostics; |
| | 7 | | using System.Globalization; |
| | 8 | | using System.IO; |
| | 9 | | using System.Threading; |
| | 10 | | using System.Threading.Tasks; |
| | 11 | | using Azure.Core.Pipeline; |
| | 12 | | using Azure.Storage.Blobs.Models; |
| | 13 | | using Azure.Storage.Blobs.Specialized; |
| | 14 | |
|
| | 15 | | namespace Azure.Storage.Blobs |
| | 16 | | { |
| | 17 | | internal class PartitionedDownloader |
| | 18 | | { |
| | 19 | | /// <summary> |
| | 20 | | /// The client used to download the blob. |
| | 21 | | /// </summary> |
| | 22 | | private readonly BlobBaseClient _client; |
| | 23 | |
|
| | 24 | | /// <summary> |
| | 25 | | /// The maximum number of simultaneous workers. |
| | 26 | | /// </summary> |
| | 27 | | private readonly int _maxWorkerCount; |
| | 28 | |
|
| | 29 | | /// <summary> |
| | 30 | | /// The size of the first range requested (which can be larger than the |
| | 31 | | /// other ranges). |
| | 32 | | /// </summary> |
| | 33 | | private readonly long _initialRangeSize; |
| | 34 | |
|
| | 35 | | /// <summary> |
| | 36 | | /// The size of subsequent ranges. |
| | 37 | | /// </summary> |
| | 38 | | private readonly long _rangeSize; |
| | 39 | |
|
| 140 | 40 | | public PartitionedDownloader( |
| 140 | 41 | | BlobBaseClient client, |
| 140 | 42 | | StorageTransferOptions transferOptions = default) |
| | 43 | | { |
| 140 | 44 | | _client = client; |
| | 45 | |
|
| | 46 | | // Set _maxWorkerCount |
| 140 | 47 | | if (transferOptions.MaximumConcurrency.HasValue |
| 140 | 48 | | && transferOptions.MaximumConcurrency > 0) |
| | 49 | | { |
| 4 | 50 | | _maxWorkerCount = transferOptions.MaximumConcurrency.Value; |
| | 51 | | } |
| | 52 | | else |
| | 53 | | { |
| 136 | 54 | | _maxWorkerCount = Constants.Blob.Block.DefaultConcurrentTransfersCount; |
| | 55 | | } |
| | 56 | |
|
| | 57 | | // Set _rangeSize |
| 140 | 58 | | if (transferOptions.MaximumTransferSize.HasValue |
| 140 | 59 | | && transferOptions.MaximumTransferSize.Value > 0) |
| | 60 | | { |
| 52 | 61 | | _rangeSize = Math.Min(transferOptions.MaximumTransferSize.Value, Constants.Blob.Block.MaxDownloadBytes); |
| | 62 | | } |
| | 63 | | else |
| | 64 | | { |
| 88 | 65 | | _rangeSize = Constants.DefaultBufferSize; |
| | 66 | | } |
| | 67 | |
|
| | 68 | | // Set _initialRangeSize |
| 140 | 69 | | if (transferOptions.InitialTransferSize.HasValue |
| 140 | 70 | | && transferOptions.InitialTransferSize.Value > 0) |
| | 71 | | { |
| 12 | 72 | | _initialRangeSize = transferOptions.MaximumTransferSize.Value; |
| | 73 | | } |
| | 74 | | else |
| | 75 | | { |
| 128 | 76 | | _initialRangeSize = Constants.Blob.Block.DefaultInitalDownloadRangeSize; |
| | 77 | | } |
| 128 | 78 | | } |
| | 79 | |
|
| | 80 | | public async Task<Response> DownloadToAsync( |
| | 81 | | Stream destination, |
| | 82 | | BlobRequestConditions conditions, |
| | 83 | | CancellationToken cancellationToken) |
| | 84 | | { |
| | 85 | | // Wrap the download range calls in a Download span for distributed |
| | 86 | | // tracing |
| 106 | 87 | | DiagnosticScope scope = _client.ClientDiagnostics.CreateScope($"{nameof(BlobBaseClient)}.{nameof(BlobBaseCli |
| | 88 | | try |
| | 89 | | { |
| 106 | 90 | | scope.Start(); |
| | 91 | |
|
| | 92 | | // Just start downloading using an initial range. If it's a |
| | 93 | | // small blob, we'll get the whole thing in one shot. If it's |
| | 94 | | // a large blob, we'll get its full size in Content-Range and |
| | 95 | | // can keep downloading it in segments. |
| 106 | 96 | | var initialRange = new HttpRange(0, _initialRangeSize); |
| 106 | 97 | | Task<Response<BlobDownloadInfo>> initialResponseTask = |
| 106 | 98 | | _client.DownloadAsync( |
| 106 | 99 | | initialRange, |
| 106 | 100 | | conditions, |
| 106 | 101 | | rangeGetContentHash: false, |
| 106 | 102 | | cancellationToken); |
| | 103 | |
|
| 106 | 104 | | Response<BlobDownloadInfo> initialResponse = null; |
| | 105 | | try |
| | 106 | | { |
| 106 | 107 | | initialResponse = await initialResponseTask.ConfigureAwait(false); |
| 102 | 108 | | } |
| 2 | 109 | | catch (RequestFailedException ex) when (ex.ErrorCode == BlobErrorCode.InvalidRange) |
| | 110 | | { |
| 2 | 111 | | initialResponse = await _client.DownloadAsync( |
| 2 | 112 | | range: default, |
| 2 | 113 | | conditions, |
| 2 | 114 | | false, |
| 2 | 115 | | cancellationToken) |
| 2 | 116 | | .ConfigureAwait(false); |
| | 117 | | } |
| | 118 | |
|
| | 119 | | // If the initial request returned no content (i.e., a 304), |
| | 120 | | // we'll pass that back to the user immediately |
| 104 | 121 | | if (initialResponse.IsUnavailable()) |
| | 122 | | { |
| 2 | 123 | | return initialResponse.GetRawResponse(); |
| | 124 | | } |
| | 125 | |
|
| | 126 | | // If the first segment was the entire blob, we'll copy that to |
| | 127 | | // the output stream and finish now |
| 102 | 128 | | long initialLength = initialResponse.Value.ContentLength; |
| 102 | 129 | | long totalLength = ParseRangeTotalLength(initialResponse.Value.Details.ContentRange); |
| 102 | 130 | | if (initialLength == totalLength) |
| | 131 | | { |
| 94 | 132 | | await CopyToAsync( |
| 94 | 133 | | initialResponse, |
| 94 | 134 | | destination, |
| 94 | 135 | | cancellationToken) |
| 94 | 136 | | .ConfigureAwait(false); |
| 94 | 137 | | return initialResponse.GetRawResponse(); |
| | 138 | | } |
| | 139 | |
|
| | 140 | | // Capture the etag from the first segment and construct |
| | 141 | | // conditions to ensure the blob doesn't change while we're |
| | 142 | | // downloading the remaining segments |
| 8 | 143 | | ETag etag = initialResponse.Value.Details.ETag; |
| 8 | 144 | | BlobRequestConditions conditionsWithEtag = CreateConditionsWithEtag(conditions, etag); |
| | 145 | |
|
| | 146 | | // Create a queue of tasks that will each download one segment |
| | 147 | | // of the blob. The queue maintains the order of the segments |
| | 148 | | // so we can keep appending to the end of the destination |
| | 149 | | // stream when each segment finishes. |
| 8 | 150 | | var runningTasks = new Queue<Task<Response<BlobDownloadInfo>>>(); |
| 8 | 151 | | runningTasks.Enqueue(initialResponseTask); |
| | 152 | |
|
| | 153 | | // Fill the queue with tasks to download each of the remaining |
| | 154 | | // ranges in the blob |
| 108 | 155 | | foreach (HttpRange httpRange in GetRanges(initialLength, totalLength)) |
| | 156 | | { |
| | 157 | | // Add the next Task (which will start the download but |
| | 158 | | // return before it's completed downloading) |
| 48 | 159 | | runningTasks.Enqueue(_client.DownloadAsync( |
| 48 | 160 | | httpRange, |
| 48 | 161 | | conditionsWithEtag, |
| 48 | 162 | | rangeGetContentHash: false, |
| 48 | 163 | | cancellationToken)); |
| | 164 | |
|
| | 165 | | // If we have fewer tasks than alotted workers, then just |
| | 166 | | // continue adding tasks until we have _maxWorkerCount |
| | 167 | | // running in parallel |
| 48 | 168 | | if (runningTasks.Count < _maxWorkerCount) |
| | 169 | | { |
| | 170 | | continue; |
| | 171 | | } |
| | 172 | |
|
| | 173 | | // Once all the workers are busy, wait for the first |
| | 174 | | // segment to finish downloading before we create more work |
| 36 | 175 | | await ConsumeQueuedTask().ConfigureAwait(false); |
| | 176 | | } |
| | 177 | |
|
| | 178 | | // Wait for all of the remaining segments to download |
| 20 | 179 | | while (runningTasks.Count > 0) |
| | 180 | | { |
| 16 | 181 | | await ConsumeQueuedTask().ConfigureAwait(false); |
| | 182 | | } |
| | 183 | |
|
| 4 | 184 | | return initialResponse.GetRawResponse(); |
| | 185 | |
|
| | 186 | | // Wait for the first segment in the queue of tasks to complete |
| | 187 | | // downloading and copy it to the destination stream |
| | 188 | | async Task ConsumeQueuedTask() |
| | 189 | | { |
| | 190 | | // Don't need to worry about 304s here because the ETag |
| | 191 | | // condition will turn into a 412 and throw a proper |
| | 192 | | // RequestFailedException |
| 52 | 193 | | using BlobDownloadInfo result = |
| 52 | 194 | | await runningTasks.Dequeue().ConfigureAwait(false); |
| | 195 | |
|
| | 196 | | // Even though the BlobDownloadInfo is returned immediately, |
| | 197 | | // CopyToAsync causes ConsumeQueuedTask to wait until the |
| | 198 | | // download is complete |
| 48 | 199 | | await CopyToAsync( |
| 48 | 200 | | result, |
| 48 | 201 | | destination, |
| 48 | 202 | | cancellationToken) |
| 48 | 203 | | .ConfigureAwait(false); |
| 48 | 204 | | } |
| | 205 | | } |
| 6 | 206 | | catch (Exception ex) |
| | 207 | | { |
| 6 | 208 | | scope.Failed(ex); |
| 6 | 209 | | throw; |
| | 210 | | } |
| | 211 | | finally |
| | 212 | | { |
| 106 | 213 | | scope.Dispose(); |
| | 214 | | } |
| 100 | 215 | | } |
| | 216 | |
|
| | 217 | | public Response DownloadTo( |
| | 218 | | Stream destination, |
| | 219 | | BlobRequestConditions conditions, |
| | 220 | | CancellationToken cancellationToken) |
| | 221 | | { |
| | 222 | | // Wrap the download range calls in a Download span for distributed |
| | 223 | | // tracing |
| 34 | 224 | | DiagnosticScope scope = _client.ClientDiagnostics.CreateScope($"{nameof(BlobBaseClient)}.{nameof(BlobBaseCli |
| | 225 | | try |
| | 226 | | { |
| 34 | 227 | | scope.Start(); |
| | 228 | |
|
| | 229 | | // Just start downloading using an initial range. If it's a |
| | 230 | | // small blob, we'll get the whole thing in one shot. If it's |
| | 231 | | // a large blob, we'll get its full size in Content-Range and |
| | 232 | | // can keep downloading it in segments. |
| 34 | 233 | | var initialRange = new HttpRange(0, _initialRangeSize); |
| | 234 | | Response<BlobDownloadInfo> initialResponse; |
| | 235 | |
|
| | 236 | | try |
| | 237 | | { |
| 34 | 238 | | initialResponse = _client.Download( |
| 34 | 239 | | initialRange, |
| 34 | 240 | | conditions, |
| 34 | 241 | | rangeGetContentHash: false, |
| 34 | 242 | | cancellationToken); |
| 30 | 243 | | } |
| 2 | 244 | | catch (RequestFailedException ex) when (ex.ErrorCode == BlobErrorCode.InvalidRange) |
| | 245 | | { |
| 2 | 246 | | initialResponse = _client.Download( |
| 2 | 247 | | range: default, |
| 2 | 248 | | conditions, |
| 2 | 249 | | rangeGetContentHash: false, |
| 2 | 250 | | cancellationToken); |
| 2 | 251 | | } |
| | 252 | |
|
| | 253 | | // If the initial request returned no content (i.e., a 304), |
| | 254 | | // we'll pass that back to the user immediately |
| 32 | 255 | | if (initialResponse.IsUnavailable()) |
| | 256 | | { |
| 2 | 257 | | return initialResponse.GetRawResponse(); |
| | 258 | | } |
| | 259 | |
|
| | 260 | | // Copy the first segment to the destination stream |
| 30 | 261 | | CopyTo(initialResponse, destination, cancellationToken); |
| | 262 | |
|
| | 263 | | // If the first segment was the entire blob, we're finished now |
| 30 | 264 | | long initialLength = initialResponse.Value.ContentLength; |
| 30 | 265 | | long totalLength = ParseRangeTotalLength(initialResponse.Value.Details.ContentRange); |
| 30 | 266 | | if (initialLength == totalLength) |
| | 267 | | { |
| 26 | 268 | | return initialResponse.GetRawResponse(); |
| | 269 | | } |
| | 270 | |
|
| | 271 | | // Capture the etag from the first segment and construct |
| | 272 | | // conditions to ensure the blob doesn't change while we're |
| | 273 | | // downloading the remaining segments |
| 4 | 274 | | ETag etag = initialResponse.Value.Details.ETag; |
| 4 | 275 | | BlobRequestConditions conditionsWithEtag = CreateConditionsWithEtag(conditions, etag); |
| | 276 | |
|
| | 277 | | // Download each of the remaining ranges in the blob |
| 80 | 278 | | foreach (HttpRange httpRange in GetRanges(initialLength, totalLength)) |
| | 279 | | { |
| | 280 | | // Don't need to worry about 304s here because the ETag |
| | 281 | | // condition will turn into a 412 and throw a proper |
| | 282 | | // RequestFailedException |
| 36 | 283 | | Response<BlobDownloadInfo> result = _client.Download( |
| 36 | 284 | | httpRange, |
| 36 | 285 | | conditionsWithEtag, |
| 36 | 286 | | rangeGetContentHash: false, |
| 36 | 287 | | cancellationToken); |
| 36 | 288 | | CopyTo(result.Value, destination, cancellationToken); |
| | 289 | | } |
| | 290 | |
|
| 4 | 291 | | return initialResponse.GetRawResponse(); |
| | 292 | | } |
| 2 | 293 | | catch (Exception ex) |
| | 294 | | { |
| 2 | 295 | | scope.Failed(ex); |
| 2 | 296 | | throw; |
| | 297 | | } |
| | 298 | | finally |
| | 299 | | { |
| 34 | 300 | | scope.Dispose(); |
| 34 | 301 | | } |
| 32 | 302 | | } |
| | 303 | |
|
| | 304 | | private static long ParseRangeTotalLength(string range) |
| | 305 | | { |
| 132 | 306 | | if (range == null) |
| | 307 | | { |
| 4 | 308 | | return 0; |
| | 309 | | } |
| 128 | 310 | | int lengthSeparator = range.IndexOf("/", StringComparison.InvariantCultureIgnoreCase); |
| 128 | 311 | | if (lengthSeparator == -1) |
| | 312 | | { |
| 0 | 313 | | throw BlobErrors.ParsingFullHttpRangeFailed(range); |
| | 314 | | } |
| 128 | 315 | | return long.Parse(range.Substring(lengthSeparator + 1), CultureInfo.InvariantCulture); |
| | 316 | | } |
| | 317 | |
|
| | 318 | | private static BlobRequestConditions CreateConditionsWithEtag(BlobRequestConditions conditions, ETag etag) => |
| 12 | 319 | | new BlobRequestConditions |
| 12 | 320 | | { |
| 12 | 321 | | LeaseId = conditions?.LeaseId, |
| 12 | 322 | | IfMatch = conditions?.IfMatch ?? etag, |
| 12 | 323 | | IfNoneMatch = conditions?.IfNoneMatch, |
| 12 | 324 | | IfModifiedSince = conditions?.IfModifiedSince, |
| 12 | 325 | | IfUnmodifiedSince = conditions?.IfUnmodifiedSince |
| 12 | 326 | | }; |
| | 327 | |
|
| | 328 | | private static async Task CopyToAsync( |
| | 329 | | BlobDownloadInfo result, |
| | 330 | | Stream destination, |
| | 331 | | CancellationToken cancellationToken) |
| | 332 | | { |
| 142 | 333 | | using Stream source = result.Content; |
| | 334 | |
|
| 142 | 335 | | await source.CopyToAsync( |
| 142 | 336 | | destination, |
| 142 | 337 | | Constants.DefaultDownloadCopyBufferSize, |
| 142 | 338 | | cancellationToken) |
| 142 | 339 | | .ConfigureAwait(false); |
| 142 | 340 | | } |
| | 341 | |
|
| | 342 | |
|
| | 343 | | private static void CopyTo( |
| | 344 | | BlobDownloadInfo result, |
| | 345 | | Stream destination, |
| | 346 | | CancellationToken cancellationToken) |
| | 347 | | { |
| 66 | 348 | | cancellationToken.ThrowIfCancellationRequested(); |
| 66 | 349 | | result.Content.CopyTo(destination, Constants.DefaultDownloadCopyBufferSize); |
| 66 | 350 | | result.Content.Dispose(); |
| 66 | 351 | | } |
| | 352 | |
|
| | 353 | | private IEnumerable<HttpRange> GetRanges(long initialLength, long totalLength) |
| | 354 | | { |
| 184 | 355 | | for (long offset = initialLength; offset < totalLength; offset += _rangeSize) |
| | 356 | | { |
| 84 | 357 | | yield return new HttpRange(offset, Math.Min(totalLength - offset, _rangeSize)); |
| | 358 | | } |
| 8 | 359 | | } |
| | 360 | | } |
| | 361 | | } |