| | 1 | | // Copyright (c) Microsoft Corporation. All rights reserved. |
| | 2 | | // Licensed under the MIT License. |
| | 3 | |
|
| | 4 | | using System; |
| | 5 | | using System.IO; |
| | 6 | | using Azure.AI.FormRecognizer.Models; |
| | 7 | |
|
| | 8 | | namespace Azure.AI.FormRecognizer |
| | 9 | | { |
| | 10 | | internal static class StreamExtensions |
| | 11 | | { |
| | 12 | | /// <summary>The set of bytes expected to be present at the start of PDF files.</summary> |
| 2 | 13 | | private static byte[] PdfHeader = new byte[] { 0x25, 0x50, 0x44, 0x46 }; |
| | 14 | |
|
| | 15 | | /// <summary>The set of bytes expected to be present at the start of PNG files.</summary> |
| 2 | 16 | | private static byte[] PngHeader = new byte[] { 0x89, 0x50, 0x4E, 0x47 }; |
| | 17 | |
|
| | 18 | | /// <summary>The set of bytes expected to be present at the start of JPEG files.</summary> |
| 2 | 19 | | private static byte[] JpegHeader = new byte[] { 0xFF, 0xD8 }; |
| | 20 | |
|
| | 21 | | /// <summary>The set of bytes expected to be present at the start of TIFF (little-endian) files.</summary> |
| 2 | 22 | | private static byte[] TiffLeHeader = new byte[] { 0x49, 0x49, 0x2A, 0x00 }; |
| | 23 | |
|
| | 24 | | /// <summary>The set of bytes expected to be present at the start of TIFF (big-endian) files.</summary> |
| 2 | 25 | | private static byte[] TiffBeHeader = new byte[] { 0x4D, 0x4D, 0x00, 0x2A }; |
| | 26 | |
|
| | 27 | | /// <summary> |
| | 28 | | /// Attemps to detect the <see cref="FormContentType"/> of a stream of bytes. The algorithm searches through |
| | 29 | | /// the first set of bytes in the stream and compares it to well-known file signatures. |
| | 30 | | /// </summary> |
| | 31 | | /// <param name="stream">The stream to which the content type detection attempt will be performed.</param> |
| | 32 | | /// <param name="contentType">If the detection is successful, outputs the detected content type. Otherwise, <c>d |
| | 33 | | /// <returns><c>true</c> if the detection was successful. Otherwise, <c>false</c>.</returns> |
| | 34 | | /// <exception cref="NotSupportedException">Thrown when <paramref name="stream"/> is not seekable or readable.</ |
| | 35 | | public static bool TryGetContentType(this Stream stream, out FormContentType contentType) |
| | 36 | | { |
| 138 | 37 | | if (stream.BeginsWithHeader(PdfHeader)) |
| | 38 | | { |
| 78 | 39 | | contentType = FormContentType.Pdf; |
| | 40 | | } |
| 60 | 41 | | else if (stream.BeginsWithHeader(PngHeader)) |
| | 42 | | { |
| 6 | 43 | | contentType = FormContentType.Png; |
| | 44 | | } |
| 54 | 45 | | else if (stream.BeginsWithHeader(JpegHeader)) |
| | 46 | | { |
| 34 | 47 | | contentType = FormContentType.Jpeg; |
| | 48 | | } |
| 20 | 49 | | else if (stream.BeginsWithHeader(TiffLeHeader) || stream.BeginsWithHeader(TiffBeHeader)) |
| | 50 | | { |
| 16 | 51 | | contentType = FormContentType.Tiff; |
| | 52 | | } |
| | 53 | | else |
| | 54 | | { |
| 4 | 55 | | contentType = default; |
| 4 | 56 | | return false; |
| | 57 | | } |
| | 58 | |
|
| 134 | 59 | | return true; |
| | 60 | | } |
| | 61 | |
|
| | 62 | | /// <summary> |
| | 63 | | /// Determines whether a stream begins with a specified sequence of bytes. |
| | 64 | | /// </summary> |
| | 65 | | /// <param name="stream">The stream to be verified.</param> |
| | 66 | | /// <param name="header">The sequence of bytes expected to be at the start of <paramref name="stream"/>.</param> |
| | 67 | | /// <returns><c>true</c> if the <paramref name="stream"/> begins with the specified <paramref name="header"/>. O |
| | 68 | | private static bool BeginsWithHeader(this Stream stream, byte[] header) |
| | 69 | | { |
| 278 | 70 | | var originalPosition = stream.Position; |
| | 71 | |
|
| 278 | 72 | | if (stream.Length - originalPosition < header.Length) |
| | 73 | | { |
| 10 | 74 | | return false; |
| | 75 | | } |
| | 76 | |
|
| 1610 | 77 | | foreach (var headerByte in header) |
| | 78 | | { |
| 604 | 79 | | var streamByte = (byte)stream.ReadByte(); |
| | 80 | |
|
| 604 | 81 | | if (streamByte != headerByte) |
| | 82 | | { |
| 134 | 83 | | stream.Position = originalPosition; |
| 134 | 84 | | return false; |
| | 85 | | } |
| | 86 | | } |
| | 87 | |
|
| 134 | 88 | | stream.Position = originalPosition; |
| 134 | 89 | | return true; |
| | 90 | | } |
| | 91 | | } |
| | 92 | | } |