| | | 1 | | // Copyright (c) Microsoft Corporation. All rights reserved. |
| | | 2 | | // Licensed under the MIT License. |
| | | 3 | | |
| | | 4 | | // Copied from https://github.com/aspnet/AspNetCore/tree/master/src/Http/Headers/src |
| | | 5 | | |
| | | 6 | | using System; |
| | | 7 | | using System.Diagnostics.Contracts; |
| | | 8 | | using System.Globalization; |
| | | 9 | | using System.Text; |
| | | 10 | | |
| | | 11 | | #pragma warning disable IDE0018 // Inline declaration |
| | | 12 | | #pragma warning disable IDE0054 // Use compound assignment |
| | | 13 | | #pragma warning disable IDE0059 // Unnecessary assignment |
| | | 14 | | #pragma warning disable IDE1006 // Missing s_ prefix |
| | | 15 | | |
| | | 16 | | namespace Azure.Core.Http.Multipart |
| | | 17 | | { |
| | | 18 | | internal static class HttpRuleParser |
| | | 19 | | { |
| | 0 | 20 | | private static readonly bool[] TokenChars = CreateTokenChars(); |
| | | 21 | | private const int MaxNestedCount = 5; |
| | 0 | 22 | | private static readonly string[] DateFormats = new string[] { |
| | 0 | 23 | | // "r", // RFC 1123, required output format but too strict for input |
| | 0 | 24 | | "ddd, d MMM yyyy H:m:s 'GMT'", // RFC 1123 (r, except it allows both 1 and 01 for date and time) |
| | 0 | 25 | | "ddd, d MMM yyyy H:m:s", // RFC 1123, no zone - assume GMT |
| | 0 | 26 | | "d MMM yyyy H:m:s 'GMT'", // RFC 1123, no day-of-week |
| | 0 | 27 | | "d MMM yyyy H:m:s", // RFC 1123, no day-of-week, no zone |
| | 0 | 28 | | "ddd, d MMM yy H:m:s 'GMT'", // RFC 1123, short year |
| | 0 | 29 | | "ddd, d MMM yy H:m:s", // RFC 1123, short year, no zone |
| | 0 | 30 | | "d MMM yy H:m:s 'GMT'", // RFC 1123, no day-of-week, short year |
| | 0 | 31 | | "d MMM yy H:m:s", // RFC 1123, no day-of-week, short year, no zone |
| | 0 | 32 | | |
| | 0 | 33 | | "dddd, d'-'MMM'-'yy H:m:s 'GMT'", // RFC 850, short year |
| | 0 | 34 | | "dddd, d'-'MMM'-'yy H:m:s", // RFC 850 no zone |
| | 0 | 35 | | "ddd, d'-'MMM'-'yyyy H:m:s 'GMT'", // RFC 850, long year |
| | 0 | 36 | | "ddd MMM d H:m:s yyyy", // ANSI C's asctime() format |
| | 0 | 37 | | |
| | 0 | 38 | | "ddd, d MMM yyyy H:m:s zzz", // RFC 5322 |
| | 0 | 39 | | "ddd, d MMM yyyy H:m:s", // RFC 5322 no zone |
| | 0 | 40 | | "d MMM yyyy H:m:s zzz", // RFC 5322 no day-of-week |
| | 0 | 41 | | "d MMM yyyy H:m:s", // RFC 5322 no day-of-week, no zone |
| | 0 | 42 | | }; |
| | | 43 | | |
| | | 44 | | internal const char CR = '\r'; |
| | | 45 | | internal const char LF = '\n'; |
| | | 46 | | internal const char SP = ' '; |
| | | 47 | | internal const char Tab = '\t'; |
| | | 48 | | internal const int MaxInt64Digits = 19; |
| | | 49 | | internal const int MaxInt32Digits = 10; |
| | | 50 | | |
| | | 51 | | // iso-8859-1, Western European (ISO) |
| | 0 | 52 | | internal static readonly Encoding DefaultHttpEncoding = Encoding.GetEncoding("iso-8859-1"); |
| | | 53 | | |
| | | 54 | | private static bool[] CreateTokenChars() |
| | | 55 | | { |
| | | 56 | | // token = 1*<any CHAR except CTLs or separators> |
| | | 57 | | // CTL = <any US-ASCII control character (octets 0 - 31) and DEL (127)> |
| | | 58 | | |
| | 0 | 59 | | var tokenChars = new bool[128]; // everything is false |
| | | 60 | | |
| | 0 | 61 | | for (int i = 33; i < 127; i++) // skip Space (32) & DEL (127) |
| | | 62 | | { |
| | 0 | 63 | | tokenChars[i] = true; |
| | | 64 | | } |
| | | 65 | | |
| | | 66 | | // remove separators: these are not valid token characters |
| | 0 | 67 | | tokenChars[(byte)'('] = false; |
| | 0 | 68 | | tokenChars[(byte)')'] = false; |
| | 0 | 69 | | tokenChars[(byte)'<'] = false; |
| | 0 | 70 | | tokenChars[(byte)'>'] = false; |
| | 0 | 71 | | tokenChars[(byte)'@'] = false; |
| | 0 | 72 | | tokenChars[(byte)','] = false; |
| | 0 | 73 | | tokenChars[(byte)';'] = false; |
| | 0 | 74 | | tokenChars[(byte)':'] = false; |
| | 0 | 75 | | tokenChars[(byte)'\\'] = false; |
| | 0 | 76 | | tokenChars[(byte)'"'] = false; |
| | 0 | 77 | | tokenChars[(byte)'/'] = false; |
| | 0 | 78 | | tokenChars[(byte)'['] = false; |
| | 0 | 79 | | tokenChars[(byte)']'] = false; |
| | 0 | 80 | | tokenChars[(byte)'?'] = false; |
| | 0 | 81 | | tokenChars[(byte)'='] = false; |
| | 0 | 82 | | tokenChars[(byte)'{'] = false; |
| | 0 | 83 | | tokenChars[(byte)'}'] = false; |
| | | 84 | | |
| | 0 | 85 | | return tokenChars; |
| | | 86 | | } |
| | | 87 | | |
| | | 88 | | internal static bool IsTokenChar(char character) |
| | | 89 | | { |
| | | 90 | | // Must be between 'space' (32) and 'DEL' (127) |
| | 0 | 91 | | if (character > 127) |
| | | 92 | | { |
| | 0 | 93 | | return false; |
| | | 94 | | } |
| | | 95 | | |
| | 0 | 96 | | return TokenChars[character]; |
| | | 97 | | } |
| | | 98 | | |
| | | 99 | | [Pure] |
| | | 100 | | internal static int GetTokenLength(StringSegment input, int startIndex) |
| | | 101 | | { |
| | | 102 | | Contract.Requires(input != null); |
| | | 103 | | Contract.Ensures((Contract.Result<int>() >= 0) && (Contract.Result<int>() <= (input.Length - startIndex))); |
| | | 104 | | |
| | 0 | 105 | | if (startIndex >= input.Length) |
| | | 106 | | { |
| | 0 | 107 | | return 0; |
| | | 108 | | } |
| | | 109 | | |
| | 0 | 110 | | var current = startIndex; |
| | | 111 | | |
| | 0 | 112 | | while (current < input.Length) |
| | | 113 | | { |
| | 0 | 114 | | if (!IsTokenChar(input[current])) |
| | | 115 | | { |
| | 0 | 116 | | return current - startIndex; |
| | | 117 | | } |
| | 0 | 118 | | current++; |
| | | 119 | | } |
| | 0 | 120 | | return input.Length - startIndex; |
| | | 121 | | } |
| | | 122 | | |
| | | 123 | | internal static int GetWhitespaceLength(StringSegment input, int startIndex) |
| | | 124 | | { |
| | | 125 | | Contract.Requires(input != null); |
| | | 126 | | Contract.Ensures((Contract.Result<int>() >= 0) && (Contract.Result<int>() <= (input.Length - startIndex))); |
| | | 127 | | |
| | 0 | 128 | | if (startIndex >= input.Length) |
| | | 129 | | { |
| | 0 | 130 | | return 0; |
| | | 131 | | } |
| | | 132 | | |
| | 0 | 133 | | var current = startIndex; |
| | | 134 | | |
| | | 135 | | char c; |
| | 0 | 136 | | while (current < input.Length) |
| | | 137 | | { |
| | 0 | 138 | | c = input[current]; |
| | | 139 | | |
| | 0 | 140 | | if ((c == SP) || (c == Tab)) |
| | | 141 | | { |
| | 0 | 142 | | current++; |
| | 0 | 143 | | continue; |
| | | 144 | | } |
| | | 145 | | |
| | 0 | 146 | | if (c == CR) |
| | | 147 | | { |
| | | 148 | | // If we have a #13 char, it must be followed by #10 and then at least one SP or HT. |
| | 0 | 149 | | if ((current + 2 < input.Length) && (input[current + 1] == LF)) |
| | | 150 | | { |
| | 0 | 151 | | char spaceOrTab = input[current + 2]; |
| | 0 | 152 | | if ((spaceOrTab == SP) || (spaceOrTab == Tab)) |
| | | 153 | | { |
| | 0 | 154 | | current += 3; |
| | 0 | 155 | | continue; |
| | | 156 | | } |
| | | 157 | | } |
| | | 158 | | } |
| | | 159 | | |
| | 0 | 160 | | return current - startIndex; |
| | | 161 | | } |
| | | 162 | | |
| | | 163 | | // All characters between startIndex and the end of the string are LWS characters. |
| | 0 | 164 | | return input.Length - startIndex; |
| | | 165 | | } |
| | | 166 | | |
| | | 167 | | internal static int GetNumberLength(StringSegment input, int startIndex, bool allowDecimal) |
| | | 168 | | { |
| | | 169 | | Contract.Requires(input != null); |
| | | 170 | | Contract.Requires((startIndex >= 0) && (startIndex < input.Length)); |
| | | 171 | | Contract.Ensures((Contract.Result<int>() >= 0) && (Contract.Result<int>() <= (input.Length - startIndex))); |
| | | 172 | | |
| | 0 | 173 | | var current = startIndex; |
| | | 174 | | char c; |
| | | 175 | | |
| | | 176 | | // If decimal values are not allowed, we pretend to have read the '.' character already. I.e. if a dot is |
| | | 177 | | // found in the string, parsing will be aborted. |
| | 0 | 178 | | var haveDot = !allowDecimal; |
| | | 179 | | |
| | | 180 | | // The RFC doesn't allow decimal values starting with dot. I.e. value ".123" is invalid. It must be in the |
| | | 181 | | // form "0.123". Also, there are no negative values defined in the RFC. So we'll just parse non-negative |
| | | 182 | | // values. |
| | | 183 | | // The RFC only allows decimal dots not ',' characters as decimal separators. Therefore value "1,23" is |
| | | 184 | | // considered invalid and must be represented as "1.23". |
| | 0 | 185 | | if (input[current] == '.') |
| | | 186 | | { |
| | 0 | 187 | | return 0; |
| | | 188 | | } |
| | | 189 | | |
| | 0 | 190 | | while (current < input.Length) |
| | | 191 | | { |
| | 0 | 192 | | c = input[current]; |
| | 0 | 193 | | if ((c >= '0') && (c <= '9')) |
| | | 194 | | { |
| | 0 | 195 | | current++; |
| | | 196 | | } |
| | 0 | 197 | | else if (!haveDot && (c == '.')) |
| | | 198 | | { |
| | | 199 | | // Note that value "1." is valid. |
| | 0 | 200 | | haveDot = true; |
| | 0 | 201 | | current++; |
| | | 202 | | } |
| | | 203 | | else |
| | | 204 | | { |
| | | 205 | | break; |
| | | 206 | | } |
| | | 207 | | } |
| | | 208 | | |
| | 0 | 209 | | return current - startIndex; |
| | | 210 | | } |
| | | 211 | | |
| | | 212 | | internal static HttpParseResult GetQuotedStringLength(StringSegment input, int startIndex, out int length) |
| | | 213 | | { |
| | 0 | 214 | | var nestedCount = 0; |
| | 0 | 215 | | return GetExpressionLength(input, startIndex, '"', '"', false, ref nestedCount, out length); |
| | | 216 | | } |
| | | 217 | | |
| | | 218 | | // quoted-pair = "\" CHAR |
| | | 219 | | // CHAR = <any US-ASCII character (octets 0 - 127)> |
| | | 220 | | internal static HttpParseResult GetQuotedPairLength(StringSegment input, int startIndex, out int length) |
| | | 221 | | { |
| | | 222 | | Contract.Requires(input != null); |
| | | 223 | | Contract.Requires((startIndex >= 0) && (startIndex < input.Length)); |
| | | 224 | | Contract.Ensures((Contract.ValueAtReturn(out length) >= 0) && |
| | | 225 | | (Contract.ValueAtReturn(out length) <= (input.Length - startIndex))); |
| | | 226 | | |
| | 0 | 227 | | length = 0; |
| | | 228 | | |
| | 0 | 229 | | if (input[startIndex] != '\\') |
| | | 230 | | { |
| | 0 | 231 | | return HttpParseResult.NotParsed; |
| | | 232 | | } |
| | | 233 | | |
| | | 234 | | // Quoted-char has 2 characters. Check whether there are 2 chars left ('\' + char) |
| | | 235 | | // If so, check whether the character is in the range 0-127. If not, it's an invalid value. |
| | 0 | 236 | | if ((startIndex + 2 > input.Length) || (input[startIndex + 1] > 127)) |
| | | 237 | | { |
| | 0 | 238 | | return HttpParseResult.InvalidFormat; |
| | | 239 | | } |
| | | 240 | | |
| | | 241 | | // We don't care what the char next to '\' is. |
| | 0 | 242 | | length = 2; |
| | 0 | 243 | | return HttpParseResult.Parsed; |
| | | 244 | | } |
| | | 245 | | |
| | | 246 | | // Try the various date formats in the order listed above. |
| | | 247 | | // We should accept a wide verity of common formats, but only output RFC 1123 style dates. |
| | | 248 | | internal static bool TryStringToDate(StringSegment input, out DateTimeOffset result) => |
| | 0 | 249 | | DateTimeOffset.TryParseExact(input.ToString(), DateFormats, DateTimeFormatInfo.InvariantInfo, |
| | 0 | 250 | | DateTimeStyles.AllowWhiteSpaces | DateTimeStyles.AssumeUniversal, out result); |
| | | 251 | | |
| | | 252 | | // TEXT = <any OCTET except CTLs, but including LWS> |
| | | 253 | | // LWS = [CRLF] 1*( SP | HT ) |
| | | 254 | | // CTL = <any US-ASCII control character (octets 0 - 31) and DEL (127)> |
| | | 255 | | // |
| | | 256 | | // Since we don't really care about the content of a quoted string or comment, we're more tolerant and |
| | | 257 | | // allow these characters. We only want to find the delimiters ('"' for quoted string and '(', ')' for comment). |
| | | 258 | | // |
| | | 259 | | // 'nestedCount': Comments can be nested. We allow a depth of up to 5 nested comments, i.e. something like |
| | | 260 | | // "(((((comment)))))". If we wouldn't define a limit an attacker could send a comment with hundreds of nested |
| | | 261 | | // comments, resulting in a stack overflow exception. In addition having more than 1 nested comment (if any) |
| | | 262 | | // is unusual. |
| | | 263 | | private static HttpParseResult GetExpressionLength( |
| | | 264 | | StringSegment input, |
| | | 265 | | int startIndex, |
| | | 266 | | char openChar, |
| | | 267 | | char closeChar, |
| | | 268 | | bool supportsNesting, |
| | | 269 | | ref int nestedCount, |
| | | 270 | | out int length) |
| | | 271 | | { |
| | | 272 | | Contract.Requires(input != null); |
| | | 273 | | Contract.Requires((startIndex >= 0) && (startIndex < input.Length)); |
| | | 274 | | Contract.Ensures((Contract.Result<HttpParseResult>() != HttpParseResult.Parsed) || |
| | | 275 | | (Contract.ValueAtReturn<int>(out length) > 0)); |
| | | 276 | | |
| | 0 | 277 | | length = 0; |
| | | 278 | | |
| | 0 | 279 | | if (input[startIndex] != openChar) |
| | | 280 | | { |
| | 0 | 281 | | return HttpParseResult.NotParsed; |
| | | 282 | | } |
| | | 283 | | |
| | 0 | 284 | | var current = startIndex + 1; // Start parsing with the character next to the first open-char |
| | 0 | 285 | | while (current < input.Length) |
| | | 286 | | { |
| | | 287 | | // Only check whether we have a quoted char, if we have at least 3 characters left to read (i.e. |
| | | 288 | | // quoted char + closing char). Otherwise the closing char may be considered part of the quoted char. |
| | 0 | 289 | | var quotedPairLength = 0; |
| | 0 | 290 | | if ((current + 2 < input.Length) && |
| | 0 | 291 | | (GetQuotedPairLength(input, current, out quotedPairLength) == HttpParseResult.Parsed)) |
| | | 292 | | { |
| | | 293 | | // We ignore invalid quoted-pairs. Invalid quoted-pairs may mean that it looked like a quoted pair, |
| | | 294 | | // but we actually have a quoted-string: e.g. "\ü" ('\' followed by a char >127 - quoted-pair only |
| | | 295 | | // allows ASCII chars after '\'; qdtext allows both '\' and >127 chars). |
| | 0 | 296 | | current = current + quotedPairLength; |
| | 0 | 297 | | continue; |
| | | 298 | | } |
| | | 299 | | |
| | | 300 | | // If we support nested expressions and we find an open-char, then parse the nested expressions. |
| | 0 | 301 | | if (supportsNesting && (input[current] == openChar)) |
| | | 302 | | { |
| | 0 | 303 | | nestedCount++; |
| | | 304 | | try |
| | | 305 | | { |
| | | 306 | | // Check if we exceeded the number of nested calls. |
| | 0 | 307 | | if (nestedCount > MaxNestedCount) |
| | | 308 | | { |
| | 0 | 309 | | return HttpParseResult.InvalidFormat; |
| | | 310 | | } |
| | | 311 | | |
| | 0 | 312 | | var nestedLength = 0; |
| | 0 | 313 | | HttpParseResult nestedResult = GetExpressionLength(input, current, openChar, closeChar, |
| | 0 | 314 | | supportsNesting, ref nestedCount, out nestedLength); |
| | | 315 | | |
| | | 316 | | switch (nestedResult) |
| | | 317 | | { |
| | | 318 | | case HttpParseResult.Parsed: |
| | 0 | 319 | | current += nestedLength; // add the length of the nested expression and continue. |
| | 0 | 320 | | break; |
| | | 321 | | |
| | | 322 | | case HttpParseResult.NotParsed: |
| | | 323 | | Contract.Assert(false, "'NotParsed' is unexpected: We started nested expression " + |
| | | 324 | | "parsing, because we found the open-char. So either it's a valid nested " + |
| | | 325 | | "expression or it has invalid format."); |
| | | 326 | | break; |
| | | 327 | | |
| | | 328 | | case HttpParseResult.InvalidFormat: |
| | | 329 | | // If the nested expression is invalid, we can't continue, so we fail with invalid forma |
| | 0 | 330 | | return HttpParseResult.InvalidFormat; |
| | | 331 | | |
| | | 332 | | default: |
| | | 333 | | Contract.Assert(false, "Unknown enum result: " + nestedResult); |
| | | 334 | | break; |
| | | 335 | | } |
| | 0 | 336 | | } |
| | | 337 | | finally |
| | | 338 | | { |
| | 0 | 339 | | nestedCount--; |
| | 0 | 340 | | } |
| | | 341 | | } |
| | | 342 | | |
| | 0 | 343 | | if (input[current] == closeChar) |
| | | 344 | | { |
| | 0 | 345 | | length = current - startIndex + 1; |
| | 0 | 346 | | return HttpParseResult.Parsed; |
| | | 347 | | } |
| | 0 | 348 | | current++; |
| | | 349 | | } |
| | | 350 | | |
| | | 351 | | // We didn't see the final quote, therefore we have an invalid expression string. |
| | 0 | 352 | | return HttpParseResult.InvalidFormat; |
| | 0 | 353 | | } |
| | | 354 | | } |
| | | 355 | | } |