| | 1 | | // Copyright (c) Microsoft and contributors. All rights reserved. |
| | 2 | | // |
| | 3 | | // Licensed under the Apache License, Version 2.0 (the "License"); |
| | 4 | | // you may not use this file except in compliance with the License. |
| | 5 | | // You may obtain a copy of the License at |
| | 6 | | // http://www.apache.org/licenses/LICENSE-2.0 |
| | 7 | | // |
| | 8 | | // Unless required by applicable law or agreed to in writing, software |
| | 9 | | // distributed under the License is distributed on an "AS IS" BASIS, |
| | 10 | | // WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. |
| | 11 | | // |
| | 12 | | // See the License for the specific language governing permissions and |
| | 13 | | // limitations under the License. |
| | 14 | |
|
| | 15 | | using Microsoft.Azure.Batch.Conventions.Files.Utilities; |
| | 16 | | using Microsoft.WindowsAzure.Storage; |
| | 17 | | using Microsoft.WindowsAzure.Storage.Blob; |
| | 18 | | using Microsoft.WindowsAzure.Storage.RetryPolicies; |
| | 19 | | using System; |
| | 20 | | using System.Collections.Generic; |
| | 21 | | using System.IO; |
| | 22 | | using System.Linq; |
| | 23 | | using System.Text; |
| | 24 | | using System.Threading; |
| | 25 | | using System.Threading.Tasks; |
| | 26 | |
|
| | 27 | | namespace Microsoft.Azure.Batch.Conventions.Files |
| | 28 | | { |
| | 29 | | /// <summary> |
| | 30 | | /// Represents persistent storage for the outputs of an Azure Batch task. |
| | 31 | | /// </summary> |
| | 32 | | /// <remarks> |
| | 33 | | /// Task outputs refer to output data logically associated with a specific task, rather than |
| | 34 | | /// the job as a whole. For example, in a movie rendering job, if a task rendered a single frame, |
| | 35 | | /// that frame would be a task output. Logs and other diagnostic information such as intermediate |
| | 36 | | /// files may also be persisted as task outputs (see <see cref="TaskOutputKind"/> for a way to |
| | 37 | | /// categorise these so that clients can distinguish between the main output and auxiliary data). |
| | 38 | | /// </remarks> |
| | 39 | | public class TaskOutputStorage |
| | 40 | | { |
| | 41 | | private readonly StoragePath _storagePath; |
| | 42 | |
|
| | 43 | | /// <summary> |
| | 44 | | /// Initializes a new instance of the <see cref="JobOutputStorage"/> class from a task id and |
| | 45 | | /// a URL representing the job output container. |
| | 46 | | /// </summary> |
| | 47 | | /// <param name="jobOutputContainerUri">The URL in Azure storage of the blob container to |
| | 48 | | /// use for outputs associated with this job. This URL must contain a SAS (Shared Access |
| | 49 | | /// Signature) granting access to the container, or the container must be public.</param> |
| | 50 | | /// <param name="taskId">The id of the Azure Batch task.</param> |
| | 51 | | /// <remarks>The container must already exist; the TaskOutputStorage class does not create |
| | 52 | | /// it for you.</remarks> |
| | 53 | | public TaskOutputStorage(Uri jobOutputContainerUri, string taskId) |
| 21 | 54 | | : this(CloudBlobContainerUtils.GetContainerReference(jobOutputContainerUri), taskId, null) |
| | 55 | | { |
| 21 | 56 | | } |
| | 57 | |
|
| | 58 | | /// <summary> |
| | 59 | | /// Initializes a new instance of the <see cref="JobOutputStorage"/> class from a storage account, |
| | 60 | | /// job id, and task id. |
| | 61 | | /// </summary> |
| | 62 | | /// <param name="storageAccount">The storage account linked to the Azure Batch account.</param> |
| | 63 | | /// <param name="jobId">The id of the Azure Batch job containing the task.</param> |
| | 64 | | /// <param name="taskId">The id of the Azure Batch task.</param> |
| | 65 | | /// <remarks>The job output container must already exist; the TaskOutputStorage class does not create |
| | 66 | | /// it for you.</remarks> |
| | 67 | | public TaskOutputStorage(CloudStorageAccount storageAccount, string jobId, string taskId) |
| 0 | 68 | | : this(CloudBlobContainerUtils.GetContainerReference(storageAccount, jobId), taskId, null) |
| | 69 | | { |
| 0 | 70 | | } |
| | 71 | |
|
| | 72 | | /// <summary> |
| | 73 | | /// Initializes a new instance of the <see cref="JobOutputStorage"/> class from a task id and |
| | 74 | | /// a URL representing the job output container. |
| | 75 | | /// </summary> |
| | 76 | | /// <param name="jobOutputContainerUri">The URL in Azure storage of the blob container to |
| | 77 | | /// use for outputs associated with this job. This URL must contain a SAS (Shared Access |
| | 78 | | /// Signature) granting access to the container, or the container must be public.</param> |
| | 79 | | /// <param name="taskId">The id of the Azure Batch task.</param> |
| | 80 | | /// <param name="storageRetryPolicy">The retry policy for storage requests.</param> |
| | 81 | | /// <remarks>The container must already exist; the TaskOutputStorage class does not create |
| | 82 | | /// it for you.</remarks> |
| | 83 | | public TaskOutputStorage(Uri jobOutputContainerUri, string taskId, IRetryPolicy storageRetryPolicy) |
| 0 | 84 | | : this(CloudBlobContainerUtils.GetContainerReference(jobOutputContainerUri), taskId, storageRetryPolicy) |
| | 85 | | { |
| 0 | 86 | | } |
| | 87 | |
|
| | 88 | | /// <summary> |
| | 89 | | /// Initializes a new instance of the <see cref="JobOutputStorage"/> class from a storage account, |
| | 90 | | /// job id, and task id. |
| | 91 | | /// </summary> |
| | 92 | | /// <param name="storageAccount">The storage account linked to the Azure Batch account.</param> |
| | 93 | | /// <param name="jobId">The id of the Azure Batch job containing the task.</param> |
| | 94 | | /// <param name="taskId">The id of the Azure Batch task.</param> |
| | 95 | | /// <param name="storageRetryPolicy">The retry policy for storage requests.</param> |
| | 96 | | /// <remarks>The job output container must already exist; the TaskOutputStorage class does not create |
| | 97 | | /// it for you.</remarks> |
| | 98 | | public TaskOutputStorage(CloudStorageAccount storageAccount, string jobId, string taskId, IRetryPolicy storageRe |
| 0 | 99 | | : this(CloudBlobContainerUtils.GetContainerReference(storageAccount, jobId), taskId, storageRetryPolicy) |
| | 100 | | { |
| 0 | 101 | | } |
| | 102 | |
|
| 21 | 103 | | private TaskOutputStorage(CloudBlobContainer jobOutputContainer, string taskId, IRetryPolicy storageRetryPolicy) |
| | 104 | | { |
| 21 | 105 | | if (jobOutputContainer == null) |
| | 106 | | { |
| 0 | 107 | | throw new ArgumentNullException(nameof(jobOutputContainer)); |
| | 108 | | } |
| | 109 | |
|
| 21 | 110 | | Validate.IsNotNullOrEmpty(taskId, nameof(taskId)); |
| | 111 | |
|
| 21 | 112 | | if (storageRetryPolicy != null) |
| | 113 | | { |
| 0 | 114 | | jobOutputContainer.ServiceClient.DefaultRequestOptions.RetryPolicy = storageRetryPolicy; |
| | 115 | | } |
| | 116 | |
|
| 21 | 117 | | _storagePath = new StoragePath.TaskStoragePath(jobOutputContainer, taskId); |
| 21 | 118 | | } |
| | 119 | |
|
| | 120 | | /// <summary> |
| | 121 | | /// Saves the specified file to persistent storage. |
| | 122 | | /// </summary> |
| | 123 | | /// <param name="kind">A <see cref="TaskOutputKind"/> representing the category under which to |
| | 124 | | /// store this file, for example <see cref="TaskOutputKind.TaskOutput"/> or <see cref="TaskOutputKind.TaskLog"/> |
| | 125 | | /// <param name="relativePath">The path of the file to save, relative to the current directory. |
| | 126 | | /// If the file is in a subdirectory of the current directory, the relative path will be preserved |
| | 127 | | /// in blob storage.</param> |
| | 128 | | /// <param name="cancellationToken">A <see cref="CancellationToken"/> for controlling the lifetime of the asynch |
| | 129 | | /// <returns>A <see cref="Task"/> that represents the asynchronous operation.</returns> |
| | 130 | | /// <remarks>If the file is outside the current directory, traversals up the directory tree are removed. |
| | 131 | | /// For example, a <paramref name="relativePath"/> of "..\ProcessEnv.cmd" would be treated as "ProcessEnv.cmd" |
| | 132 | | /// for the purposes of creating a blob name.</remarks> |
| | 133 | | /// <exception cref="ArgumentNullException">The <paramref name="kind"/> or <paramref name="relativePath"/> argum |
| | 134 | | /// <exception cref="ArgumentException">The <paramref name="relativePath"/> argument is an absolute path, or is |
| | 135 | | public async Task SaveAsync( |
| | 136 | | TaskOutputKind kind, |
| | 137 | | string relativePath, |
| | 138 | | CancellationToken cancellationToken = default(CancellationToken) |
| | 139 | | ) |
| 3 | 140 | | => await SaveAsyncImpl(kind, new DirectoryInfo(Directory.GetCurrentDirectory()), relativePath, cancellationT |
| | 141 | |
|
| | 142 | | internal async Task SaveAsyncImpl( |
| | 143 | | TaskOutputKind kind, |
| | 144 | | DirectoryInfo baseFolder, |
| | 145 | | string relativePath, |
| | 146 | | CancellationToken cancellationToken = default(CancellationToken) |
| | 147 | | ) |
| 3 | 148 | | => await _storagePath.SaveAsync(kind, baseFolder, relativePath, cancellationToken).ConfigureAwait(false); |
| | 149 | |
|
| | 150 | | /// <summary> |
| | 151 | | /// Saves the specified file to persistent storage. |
| | 152 | | /// </summary> |
| | 153 | | /// <param name="kind">A <see cref="TaskOutputKind"/> representing the category under which to |
| | 154 | | /// store this file, for example <see cref="TaskOutputKind.TaskOutput"/> or <see cref="TaskOutputKind.TaskLog"/> |
| | 155 | | /// <param name="sourcePath">The path of the file to save.</param> |
| | 156 | | /// <param name="destinationRelativePath">The blob name under which to save the file. This may include a |
| | 157 | | /// relative component, such as "pointclouds/pointcloud_0001.txt".</param> |
| | 158 | | /// <param name="cancellationToken">A <see cref="CancellationToken"/> for controlling the lifetime of the asynch |
| | 159 | | /// <returns>A <see cref="Task"/> that represents the asynchronous operation.</returns> |
| | 160 | | /// <exception cref="ArgumentNullException">The <paramref name="kind"/>, <paramref name="sourcePath"/>, or <para |
| | 161 | | /// <exception cref="ArgumentException">The <paramref name="sourcePath"/> or <paramref name="destinationRelative |
| | 162 | | public async Task SaveAsync( |
| | 163 | | TaskOutputKind kind, |
| | 164 | | string sourcePath, |
| | 165 | | string destinationRelativePath, |
| | 166 | | CancellationToken cancellationToken = default(CancellationToken) |
| | 167 | | ) |
| 5 | 168 | | => await _storagePath.SaveAsync(kind, sourcePath, destinationRelativePath, cancellationToken).ConfigureAwait |
| | 169 | |
|
| | 170 | | /// <summary> |
| | 171 | | /// Saves the specified text to persistent storage, without requiring you to create a local file. |
| | 172 | | /// </summary> |
| | 173 | | /// <param name="kind">A <see cref="TaskOutputKind"/> representing the category under which to |
| | 174 | | /// store this data, for example <see cref="TaskOutputKind.TaskOutput"/> or <see cref="TaskOutputKind.TaskLog"/> |
| | 175 | | /// <param name="text">The text to save.</param> |
| | 176 | | /// <param name="destinationRelativePath">The blob name under which to save the text. This may include a |
| | 177 | | /// relative component, such as "records/widget42.json".</param> |
| | 178 | | /// <param name="cancellationToken">A <see cref="CancellationToken"/> for controlling the lifetime of the asynch |
| | 179 | | /// <returns>A <see cref="Task"/> that represents the asynchronous operation.</returns> |
| | 180 | | /// <exception cref="ArgumentNullException">The <paramref name="kind"/>, <paramref name="text"/>, or <paramref n |
| | 181 | | /// <exception cref="ArgumentException">The <paramref name="destinationRelativePath"/> argument is empty.</excep |
| | 182 | | public async Task SaveTextAsync( |
| | 183 | | TaskOutputKind kind, |
| | 184 | | string text, |
| | 185 | | string destinationRelativePath, |
| | 186 | | CancellationToken cancellationToken = default(CancellationToken) |
| | 187 | | ) |
| 0 | 188 | | => await _storagePath.SaveTextAsync(kind, text, destinationRelativePath, cancellationToken).ConfigureAwait(f |
| | 189 | |
|
| | 190 | | /// <summary> |
| | 191 | | /// Lists the task outputs of the specified kind. |
| | 192 | | /// </summary> |
| | 193 | | /// <param name="kind">A <see cref="TaskOutputKind"/> representing the category of outputs to |
| | 194 | | /// list, for example <see cref="TaskOutputKind.TaskOutput"/> or <see cref="TaskOutputKind.TaskLog"/>.</param> |
| | 195 | | /// <returns>A list of persisted task outputs of the specified kind.</returns> |
| | 196 | | /// <remarks>The list is retrieved lazily from Azure blob storage when it is enumerated.</remarks> |
| | 197 | | public IEnumerable<OutputFileReference> ListOutputs(TaskOutputKind kind) |
| 1 | 198 | | => _storagePath.List(kind); |
| | 199 | |
|
| | 200 | | /// <summary> |
| | 201 | | /// Retrieves a task output from Azure blob storage by kind and path. |
| | 202 | | /// </summary> |
| | 203 | | /// <param name="kind">A <see cref="TaskOutputKind"/> representing the category of the output to |
| | 204 | | /// retrieve, for example <see cref="TaskOutputKind.TaskOutput"/> or <see cref="TaskOutputKind.TaskLog"/>.</para |
| | 205 | | /// <param name="filePath">The path under which the output was persisted in blob storage.</param> |
| | 206 | | /// <param name="cancellationToken">A <see cref="CancellationToken"/> for controlling the lifetime of the asynch |
| | 207 | | /// <returns>A reference to the requested file in Azure blob storage.</returns> |
| | 208 | | public async Task<OutputFileReference> GetOutputAsync( |
| | 209 | | TaskOutputKind kind, |
| | 210 | | string filePath, |
| | 211 | | CancellationToken cancellationToken = default(CancellationToken) |
| | 212 | | ) |
| 3 | 213 | | => await _storagePath.GetOutputAsync(kind, filePath, cancellationToken).ConfigureAwait(false); |
| | 214 | |
|
| | 215 | | /// <summary> |
| | 216 | | /// Saves the specified file to persistent storage as a <see cref="TaskOutputKind.TaskLog"/>, |
| | 217 | | /// and tracks subsequent appends to the file and appends them to the persistent copy too. |
| | 218 | | /// </summary> |
| | 219 | | /// <param name="relativePath">The path of the file to save, relative to the current directory. |
| | 220 | | /// If the file is in a subdirectory of the current directory, the relative path will be preserved |
| | 221 | | /// in blob storage.</param> |
| | 222 | | /// <returns>An <see cref="ITrackedSaveOperation"/> which will save a file to blob storage and will periodically |
| | 223 | | /// appends to the blob until disposed. When disposed, all remaining appends are flushed to |
| | 224 | | /// blob storage, and further tracking of file appends is stopped.</returns> |
| | 225 | | /// <remarks> |
| | 226 | | /// <para>Tracking supports only appends. That is, while a file is being tracked, any data added |
| | 227 | | /// at the end is appended to the persistent storage. Changes to data that has already been uploaded |
| | 228 | | /// will not be reflected to the persistent store. This method is therefore intended for use only |
| | 229 | | /// with files such as (non-rotating) log files where data is only added at the end of the file. |
| | 230 | | /// If the entire contents of a file can change, use <see cref="SaveAsync(TaskOutputKind, string, CancellationTo |
| | 231 | | /// and call it periodically or after each change.</para> |
| | 232 | | /// <para>If the file is outside the current directory, traversals up the directory tree are removed. |
| | 233 | | /// For example, a <paramref name="relativePath"/> of "..\ProcessEnv.cmd" would be treated as "ProcessEnv.cmd" |
| | 234 | | /// for the purposes of creating a blob name.</para> |
| | 235 | | /// </remarks> |
| | 236 | | /// <exception cref="ArgumentNullException">The <paramref name="relativePath"/> argument is null.</exception> |
| | 237 | | /// <exception cref="ArgumentException">The <paramref name="relativePath"/> argument is an absolute path, or is |
| | 238 | | public async Task<ITrackedSaveOperation> SaveTrackedAsync(string relativePath) |
| 2 | 239 | | => await _storagePath.SaveTrackedAsync(TaskOutputKind.TaskLog, relativePath, TrackedFile.DefaultFlushInterva |
| | 240 | |
|
| | 241 | | /// <summary> |
| | 242 | | /// Saves the specified file to persistent storage, and tracks subsequent appends to the file |
| | 243 | | /// and appends them to the persistent copy too. |
| | 244 | | /// </summary> |
| | 245 | | /// <param name="kind">A <see cref="TaskOutputKind"/> representing the category under which to |
| | 246 | | /// store this file, for example <see cref="TaskOutputKind.TaskOutput"/> or <see cref="TaskOutputKind.TaskLog"/> |
| | 247 | | /// <param name="sourcePath">The path of the file to save.</param> |
| | 248 | | /// <param name="destinationRelativePath">The blob name under which to save the file. This may include a |
| | 249 | | /// relative component, such as "pointclouds/pointcloud_0001.txt".</param> |
| | 250 | | /// <param name="flushInterval">The interval at which to flush appends to persistent storage.</param> |
| | 251 | | /// <returns>An <see cref="ITrackedSaveOperation"/> which will save a file to blob storage and will periodically |
| | 252 | | /// appends to the blob until disposed. When disposed, all remaining appends are flushed to |
| | 253 | | /// blob storage, and further tracking of file appends is stopped.</returns> |
| | 254 | | /// <remarks> |
| | 255 | | /// <para>Tracking supports only appends. That is, while a file is being tracked, any data added |
| | 256 | | /// at the end is appended to the persistent storage. Changes to data that has already been uploaded |
| | 257 | | /// will not be reflected to the persistent store. This method is therefore intended for use only |
| | 258 | | /// with files such as (non-rotating) log files where data is only added at the end of the file. |
| | 259 | | /// If the entire contents of a file can change, use <see cref="SaveAsync(TaskOutputKind, string, string, Cancel |
| | 260 | | /// and call it periodically or after each change.</para> |
| | 261 | | /// </remarks> |
| | 262 | | /// <exception cref="ArgumentNullException">The <paramref name="kind"/>, <paramref name="sourcePath"/>, or <para |
| | 263 | | /// <exception cref="ArgumentException">The <paramref name="sourcePath"/> or <paramref name="destinationRelative |
| | 264 | | public async Task<ITrackedSaveOperation> SaveTrackedAsync( |
| | 265 | | TaskOutputKind kind, |
| | 266 | | string sourcePath, |
| | 267 | | string destinationRelativePath, |
| | 268 | | TimeSpan flushInterval |
| | 269 | | ) |
| 5 | 270 | | => await _storagePath.SaveTrackedAsync(kind, sourcePath, destinationRelativePath, flushInterval).ConfigureAw |
| | 271 | |
|
| | 272 | | /// <summary> |
| | 273 | | /// Gets the Blob name prefix/folder where files of the given kind are stored |
| | 274 | | /// </summary> |
| | 275 | | /// <param name="kind">The output kind.</param> |
| | 276 | | /// <returns>The Blob name prefix/folder where files of the given kind are stored.</returns> |
| 4 | 277 | | public string GetOutputStoragePath(TaskOutputKind kind) => _storagePath.BlobNamePrefix(kind); |
| | 278 | | } |
| | 279 | | } |