| | 1 | | // Copyright (c) Microsoft Corporation. All rights reserved. |
| | 2 | | // Licensed under the MIT License. |
| | 3 | |
|
| | 4 | | // Copied from https://github.com/aspnet/AspNetCore/tree/master/src/Http/Headers/src |
| | 5 | |
|
| | 6 | | using System; |
| | 7 | | using System.Diagnostics.Contracts; |
| | 8 | | using System.Globalization; |
| | 9 | | using System.Text; |
| | 10 | |
|
| | 11 | | #pragma warning disable IDE0018 // Inline declaration |
| | 12 | | #pragma warning disable IDE0054 // Use compound assignment |
| | 13 | | #pragma warning disable IDE0059 // Unnecessary assignment |
| | 14 | | #pragma warning disable IDE1006 // Missing s_ prefix |
| | 15 | |
|
| | 16 | | namespace Azure.Core.Http.Multipart |
| | 17 | | { |
| | 18 | | internal static class HttpRuleParser |
| | 19 | | { |
| 0 | 20 | | private static readonly bool[] TokenChars = CreateTokenChars(); |
| | 21 | | private const int MaxNestedCount = 5; |
| 0 | 22 | | private static readonly string[] DateFormats = new string[] { |
| 0 | 23 | | // "r", // RFC 1123, required output format but too strict for input |
| 0 | 24 | | "ddd, d MMM yyyy H:m:s 'GMT'", // RFC 1123 (r, except it allows both 1 and 01 for date and time) |
| 0 | 25 | | "ddd, d MMM yyyy H:m:s", // RFC 1123, no zone - assume GMT |
| 0 | 26 | | "d MMM yyyy H:m:s 'GMT'", // RFC 1123, no day-of-week |
| 0 | 27 | | "d MMM yyyy H:m:s", // RFC 1123, no day-of-week, no zone |
| 0 | 28 | | "ddd, d MMM yy H:m:s 'GMT'", // RFC 1123, short year |
| 0 | 29 | | "ddd, d MMM yy H:m:s", // RFC 1123, short year, no zone |
| 0 | 30 | | "d MMM yy H:m:s 'GMT'", // RFC 1123, no day-of-week, short year |
| 0 | 31 | | "d MMM yy H:m:s", // RFC 1123, no day-of-week, short year, no zone |
| 0 | 32 | |
|
| 0 | 33 | | "dddd, d'-'MMM'-'yy H:m:s 'GMT'", // RFC 850, short year |
| 0 | 34 | | "dddd, d'-'MMM'-'yy H:m:s", // RFC 850 no zone |
| 0 | 35 | | "ddd, d'-'MMM'-'yyyy H:m:s 'GMT'", // RFC 850, long year |
| 0 | 36 | | "ddd MMM d H:m:s yyyy", // ANSI C's asctime() format |
| 0 | 37 | |
|
| 0 | 38 | | "ddd, d MMM yyyy H:m:s zzz", // RFC 5322 |
| 0 | 39 | | "ddd, d MMM yyyy H:m:s", // RFC 5322 no zone |
| 0 | 40 | | "d MMM yyyy H:m:s zzz", // RFC 5322 no day-of-week |
| 0 | 41 | | "d MMM yyyy H:m:s", // RFC 5322 no day-of-week, no zone |
| 0 | 42 | | }; |
| | 43 | |
|
| | 44 | | internal const char CR = '\r'; |
| | 45 | | internal const char LF = '\n'; |
| | 46 | | internal const char SP = ' '; |
| | 47 | | internal const char Tab = '\t'; |
| | 48 | | internal const int MaxInt64Digits = 19; |
| | 49 | | internal const int MaxInt32Digits = 10; |
| | 50 | |
|
| | 51 | | // iso-8859-1, Western European (ISO) |
| 0 | 52 | | internal static readonly Encoding DefaultHttpEncoding = Encoding.GetEncoding("iso-8859-1"); |
| | 53 | |
|
| | 54 | | private static bool[] CreateTokenChars() |
| | 55 | | { |
| | 56 | | // token = 1*<any CHAR except CTLs or separators> |
| | 57 | | // CTL = <any US-ASCII control character (octets 0 - 31) and DEL (127)> |
| | 58 | |
|
| 0 | 59 | | var tokenChars = new bool[128]; // everything is false |
| | 60 | |
|
| 0 | 61 | | for (int i = 33; i < 127; i++) // skip Space (32) & DEL (127) |
| | 62 | | { |
| 0 | 63 | | tokenChars[i] = true; |
| | 64 | | } |
| | 65 | |
|
| | 66 | | // remove separators: these are not valid token characters |
| 0 | 67 | | tokenChars[(byte)'('] = false; |
| 0 | 68 | | tokenChars[(byte)')'] = false; |
| 0 | 69 | | tokenChars[(byte)'<'] = false; |
| 0 | 70 | | tokenChars[(byte)'>'] = false; |
| 0 | 71 | | tokenChars[(byte)'@'] = false; |
| 0 | 72 | | tokenChars[(byte)','] = false; |
| 0 | 73 | | tokenChars[(byte)';'] = false; |
| 0 | 74 | | tokenChars[(byte)':'] = false; |
| 0 | 75 | | tokenChars[(byte)'\\'] = false; |
| 0 | 76 | | tokenChars[(byte)'"'] = false; |
| 0 | 77 | | tokenChars[(byte)'/'] = false; |
| 0 | 78 | | tokenChars[(byte)'['] = false; |
| 0 | 79 | | tokenChars[(byte)']'] = false; |
| 0 | 80 | | tokenChars[(byte)'?'] = false; |
| 0 | 81 | | tokenChars[(byte)'='] = false; |
| 0 | 82 | | tokenChars[(byte)'{'] = false; |
| 0 | 83 | | tokenChars[(byte)'}'] = false; |
| | 84 | |
|
| 0 | 85 | | return tokenChars; |
| | 86 | | } |
| | 87 | |
|
| | 88 | | internal static bool IsTokenChar(char character) |
| | 89 | | { |
| | 90 | | // Must be between 'space' (32) and 'DEL' (127) |
| 0 | 91 | | if (character > 127) |
| | 92 | | { |
| 0 | 93 | | return false; |
| | 94 | | } |
| | 95 | |
|
| 0 | 96 | | return TokenChars[character]; |
| | 97 | | } |
| | 98 | |
|
| | 99 | | [Pure] |
| | 100 | | internal static int GetTokenLength(StringSegment input, int startIndex) |
| | 101 | | { |
| | 102 | | Contract.Requires(input != null); |
| | 103 | | Contract.Ensures((Contract.Result<int>() >= 0) && (Contract.Result<int>() <= (input.Length - startIndex))); |
| | 104 | |
|
| 0 | 105 | | if (startIndex >= input.Length) |
| | 106 | | { |
| 0 | 107 | | return 0; |
| | 108 | | } |
| | 109 | |
|
| 0 | 110 | | var current = startIndex; |
| | 111 | |
|
| 0 | 112 | | while (current < input.Length) |
| | 113 | | { |
| 0 | 114 | | if (!IsTokenChar(input[current])) |
| | 115 | | { |
| 0 | 116 | | return current - startIndex; |
| | 117 | | } |
| 0 | 118 | | current++; |
| | 119 | | } |
| 0 | 120 | | return input.Length - startIndex; |
| | 121 | | } |
| | 122 | |
|
| | 123 | | internal static int GetWhitespaceLength(StringSegment input, int startIndex) |
| | 124 | | { |
| | 125 | | Contract.Requires(input != null); |
| | 126 | | Contract.Ensures((Contract.Result<int>() >= 0) && (Contract.Result<int>() <= (input.Length - startIndex))); |
| | 127 | |
|
| 0 | 128 | | if (startIndex >= input.Length) |
| | 129 | | { |
| 0 | 130 | | return 0; |
| | 131 | | } |
| | 132 | |
|
| 0 | 133 | | var current = startIndex; |
| | 134 | |
|
| | 135 | | char c; |
| 0 | 136 | | while (current < input.Length) |
| | 137 | | { |
| 0 | 138 | | c = input[current]; |
| | 139 | |
|
| 0 | 140 | | if ((c == SP) || (c == Tab)) |
| | 141 | | { |
| 0 | 142 | | current++; |
| 0 | 143 | | continue; |
| | 144 | | } |
| | 145 | |
|
| 0 | 146 | | if (c == CR) |
| | 147 | | { |
| | 148 | | // If we have a #13 char, it must be followed by #10 and then at least one SP or HT. |
| 0 | 149 | | if ((current + 2 < input.Length) && (input[current + 1] == LF)) |
| | 150 | | { |
| 0 | 151 | | char spaceOrTab = input[current + 2]; |
| 0 | 152 | | if ((spaceOrTab == SP) || (spaceOrTab == Tab)) |
| | 153 | | { |
| 0 | 154 | | current += 3; |
| 0 | 155 | | continue; |
| | 156 | | } |
| | 157 | | } |
| | 158 | | } |
| | 159 | |
|
| 0 | 160 | | return current - startIndex; |
| | 161 | | } |
| | 162 | |
|
| | 163 | | // All characters between startIndex and the end of the string are LWS characters. |
| 0 | 164 | | return input.Length - startIndex; |
| | 165 | | } |
| | 166 | |
|
| | 167 | | internal static int GetNumberLength(StringSegment input, int startIndex, bool allowDecimal) |
| | 168 | | { |
| | 169 | | Contract.Requires(input != null); |
| | 170 | | Contract.Requires((startIndex >= 0) && (startIndex < input.Length)); |
| | 171 | | Contract.Ensures((Contract.Result<int>() >= 0) && (Contract.Result<int>() <= (input.Length - startIndex))); |
| | 172 | |
|
| 0 | 173 | | var current = startIndex; |
| | 174 | | char c; |
| | 175 | |
|
| | 176 | | // If decimal values are not allowed, we pretend to have read the '.' character already. I.e. if a dot is |
| | 177 | | // found in the string, parsing will be aborted. |
| 0 | 178 | | var haveDot = !allowDecimal; |
| | 179 | |
|
| | 180 | | // The RFC doesn't allow decimal values starting with dot. I.e. value ".123" is invalid. It must be in the |
| | 181 | | // form "0.123". Also, there are no negative values defined in the RFC. So we'll just parse non-negative |
| | 182 | | // values. |
| | 183 | | // The RFC only allows decimal dots not ',' characters as decimal separators. Therefore value "1,23" is |
| | 184 | | // considered invalid and must be represented as "1.23". |
| 0 | 185 | | if (input[current] == '.') |
| | 186 | | { |
| 0 | 187 | | return 0; |
| | 188 | | } |
| | 189 | |
|
| 0 | 190 | | while (current < input.Length) |
| | 191 | | { |
| 0 | 192 | | c = input[current]; |
| 0 | 193 | | if ((c >= '0') && (c <= '9')) |
| | 194 | | { |
| 0 | 195 | | current++; |
| | 196 | | } |
| 0 | 197 | | else if (!haveDot && (c == '.')) |
| | 198 | | { |
| | 199 | | // Note that value "1." is valid. |
| 0 | 200 | | haveDot = true; |
| 0 | 201 | | current++; |
| | 202 | | } |
| | 203 | | else |
| | 204 | | { |
| | 205 | | break; |
| | 206 | | } |
| | 207 | | } |
| | 208 | |
|
| 0 | 209 | | return current - startIndex; |
| | 210 | | } |
| | 211 | |
|
| | 212 | | internal static HttpParseResult GetQuotedStringLength(StringSegment input, int startIndex, out int length) |
| | 213 | | { |
| 0 | 214 | | var nestedCount = 0; |
| 0 | 215 | | return GetExpressionLength(input, startIndex, '"', '"', false, ref nestedCount, out length); |
| | 216 | | } |
| | 217 | |
|
| | 218 | | // quoted-pair = "\" CHAR |
| | 219 | | // CHAR = <any US-ASCII character (octets 0 - 127)> |
| | 220 | | internal static HttpParseResult GetQuotedPairLength(StringSegment input, int startIndex, out int length) |
| | 221 | | { |
| | 222 | | Contract.Requires(input != null); |
| | 223 | | Contract.Requires((startIndex >= 0) && (startIndex < input.Length)); |
| | 224 | | Contract.Ensures((Contract.ValueAtReturn(out length) >= 0) && |
| | 225 | | (Contract.ValueAtReturn(out length) <= (input.Length - startIndex))); |
| | 226 | |
|
| 0 | 227 | | length = 0; |
| | 228 | |
|
| 0 | 229 | | if (input[startIndex] != '\\') |
| | 230 | | { |
| 0 | 231 | | return HttpParseResult.NotParsed; |
| | 232 | | } |
| | 233 | |
|
| | 234 | | // Quoted-char has 2 characters. Check whether there are 2 chars left ('\' + char) |
| | 235 | | // If so, check whether the character is in the range 0-127. If not, it's an invalid value. |
| 0 | 236 | | if ((startIndex + 2 > input.Length) || (input[startIndex + 1] > 127)) |
| | 237 | | { |
| 0 | 238 | | return HttpParseResult.InvalidFormat; |
| | 239 | | } |
| | 240 | |
|
| | 241 | | // We don't care what the char next to '\' is. |
| 0 | 242 | | length = 2; |
| 0 | 243 | | return HttpParseResult.Parsed; |
| | 244 | | } |
| | 245 | |
|
| | 246 | | // Try the various date formats in the order listed above. |
| | 247 | | // We should accept a wide verity of common formats, but only output RFC 1123 style dates. |
| | 248 | | internal static bool TryStringToDate(StringSegment input, out DateTimeOffset result) => |
| 0 | 249 | | DateTimeOffset.TryParseExact(input.ToString(), DateFormats, DateTimeFormatInfo.InvariantInfo, |
| 0 | 250 | | DateTimeStyles.AllowWhiteSpaces | DateTimeStyles.AssumeUniversal, out result); |
| | 251 | |
|
| | 252 | | // TEXT = <any OCTET except CTLs, but including LWS> |
| | 253 | | // LWS = [CRLF] 1*( SP | HT ) |
| | 254 | | // CTL = <any US-ASCII control character (octets 0 - 31) and DEL (127)> |
| | 255 | | // |
| | 256 | | // Since we don't really care about the content of a quoted string or comment, we're more tolerant and |
| | 257 | | // allow these characters. We only want to find the delimiters ('"' for quoted string and '(', ')' for comment). |
| | 258 | | // |
| | 259 | | // 'nestedCount': Comments can be nested. We allow a depth of up to 5 nested comments, i.e. something like |
| | 260 | | // "(((((comment)))))". If we wouldn't define a limit an attacker could send a comment with hundreds of nested |
| | 261 | | // comments, resulting in a stack overflow exception. In addition having more than 1 nested comment (if any) |
| | 262 | | // is unusual. |
| | 263 | | private static HttpParseResult GetExpressionLength( |
| | 264 | | StringSegment input, |
| | 265 | | int startIndex, |
| | 266 | | char openChar, |
| | 267 | | char closeChar, |
| | 268 | | bool supportsNesting, |
| | 269 | | ref int nestedCount, |
| | 270 | | out int length) |
| | 271 | | { |
| | 272 | | Contract.Requires(input != null); |
| | 273 | | Contract.Requires((startIndex >= 0) && (startIndex < input.Length)); |
| | 274 | | Contract.Ensures((Contract.Result<HttpParseResult>() != HttpParseResult.Parsed) || |
| | 275 | | (Contract.ValueAtReturn<int>(out length) > 0)); |
| | 276 | |
|
| 0 | 277 | | length = 0; |
| | 278 | |
|
| 0 | 279 | | if (input[startIndex] != openChar) |
| | 280 | | { |
| 0 | 281 | | return HttpParseResult.NotParsed; |
| | 282 | | } |
| | 283 | |
|
| 0 | 284 | | var current = startIndex + 1; // Start parsing with the character next to the first open-char |
| 0 | 285 | | while (current < input.Length) |
| | 286 | | { |
| | 287 | | // Only check whether we have a quoted char, if we have at least 3 characters left to read (i.e. |
| | 288 | | // quoted char + closing char). Otherwise the closing char may be considered part of the quoted char. |
| 0 | 289 | | var quotedPairLength = 0; |
| 0 | 290 | | if ((current + 2 < input.Length) && |
| 0 | 291 | | (GetQuotedPairLength(input, current, out quotedPairLength) == HttpParseResult.Parsed)) |
| | 292 | | { |
| | 293 | | // We ignore invalid quoted-pairs. Invalid quoted-pairs may mean that it looked like a quoted pair, |
| | 294 | | // but we actually have a quoted-string: e.g. "\ü" ('\' followed by a char >127 - quoted-pair only |
| | 295 | | // allows ASCII chars after '\'; qdtext allows both '\' and >127 chars). |
| 0 | 296 | | current = current + quotedPairLength; |
| 0 | 297 | | continue; |
| | 298 | | } |
| | 299 | |
|
| | 300 | | // If we support nested expressions and we find an open-char, then parse the nested expressions. |
| 0 | 301 | | if (supportsNesting && (input[current] == openChar)) |
| | 302 | | { |
| 0 | 303 | | nestedCount++; |
| | 304 | | try |
| | 305 | | { |
| | 306 | | // Check if we exceeded the number of nested calls. |
| 0 | 307 | | if (nestedCount > MaxNestedCount) |
| | 308 | | { |
| 0 | 309 | | return HttpParseResult.InvalidFormat; |
| | 310 | | } |
| | 311 | |
|
| 0 | 312 | | var nestedLength = 0; |
| 0 | 313 | | HttpParseResult nestedResult = GetExpressionLength(input, current, openChar, closeChar, |
| 0 | 314 | | supportsNesting, ref nestedCount, out nestedLength); |
| | 315 | |
|
| | 316 | | switch (nestedResult) |
| | 317 | | { |
| | 318 | | case HttpParseResult.Parsed: |
| 0 | 319 | | current += nestedLength; // add the length of the nested expression and continue. |
| 0 | 320 | | break; |
| | 321 | |
|
| | 322 | | case HttpParseResult.NotParsed: |
| | 323 | | Contract.Assert(false, "'NotParsed' is unexpected: We started nested expression " + |
| | 324 | | "parsing, because we found the open-char. So either it's a valid nested " + |
| | 325 | | "expression or it has invalid format."); |
| | 326 | | break; |
| | 327 | |
|
| | 328 | | case HttpParseResult.InvalidFormat: |
| | 329 | | // If the nested expression is invalid, we can't continue, so we fail with invalid forma |
| 0 | 330 | | return HttpParseResult.InvalidFormat; |
| | 331 | |
|
| | 332 | | default: |
| | 333 | | Contract.Assert(false, "Unknown enum result: " + nestedResult); |
| | 334 | | break; |
| | 335 | | } |
| 0 | 336 | | } |
| | 337 | | finally |
| | 338 | | { |
| 0 | 339 | | nestedCount--; |
| 0 | 340 | | } |
| | 341 | | } |
| | 342 | |
|
| 0 | 343 | | if (input[current] == closeChar) |
| | 344 | | { |
| 0 | 345 | | length = current - startIndex + 1; |
| 0 | 346 | | return HttpParseResult.Parsed; |
| | 347 | | } |
| 0 | 348 | | current++; |
| | 349 | | } |
| | 350 | |
|
| | 351 | | // We didn't see the final quote, therefore we have an invalid expression string. |
| 0 | 352 | | return HttpParseResult.InvalidFormat; |
| 0 | 353 | | } |
| | 354 | | } |
| | 355 | | } |