| | 1 | | // Copyright (c) Microsoft Corporation. All rights reserved. |
| | 2 | | // Licensed under the MIT License. |
| | 3 | |
|
| | 4 | | using System.Collections.Generic; |
| | 5 | |
|
| | 6 | | namespace Azure.AI.FormRecognizer.Models |
| | 7 | | { |
| | 8 | | /// <summary> |
| | 9 | | /// Represents a page recognized from the input document. Contains lines, words, tables and page metadata. |
| | 10 | | /// </summary> |
| | 11 | | public class FormPage |
| | 12 | | { |
| 216 | 13 | | internal FormPage(PageResult pageResult, IReadOnlyList<ReadResult> readResults, int pageIndex) |
| | 14 | | { |
| 216 | 15 | | ReadResult readResult = readResults[pageIndex]; |
| | 16 | |
|
| 216 | 17 | | PageNumber = readResult.Page; |
| | 18 | |
|
| | 19 | | // Workaround because the service can sometimes return angles between 180 and 360 (bug). |
| | 20 | | // Currently tracked by: https://github.com/Azure/azure-sdk-for-net/issues/12319 |
| 216 | 21 | | TextAngle = readResult.Angle <= 180.0f ? readResult.Angle : readResult.Angle - 360.0f; |
| | 22 | |
|
| 216 | 23 | | Width = readResult.Width; |
| 216 | 24 | | Height = readResult.Height; |
| 216 | 25 | | Unit = readResult.Unit; |
| 216 | 26 | | Lines = readResult.Lines != null |
| 216 | 27 | | ? ConvertLines(readResult.Lines, readResult.Page) |
| 216 | 28 | | : new List<FormLine>(); |
| 216 | 29 | | Tables = pageResult?.Tables != null |
| 216 | 30 | | ? ConvertTables(pageResult, readResults, pageIndex) |
| 216 | 31 | | : new List<FormTable>(); |
| 216 | 32 | | } |
| | 33 | |
|
| | 34 | | /// <summary> |
| | 35 | | /// The 1-based page number in the input document. |
| | 36 | | /// </summary> |
| 196 | 37 | | public int PageNumber { get; } |
| | 38 | |
|
| | 39 | | /// <summary> |
| | 40 | | /// The general orientation of the text in clockwise direction, measured in degrees between (-180, 180]. |
| | 41 | | /// </summary> |
| 344 | 42 | | public float TextAngle { get; } |
| | 43 | |
|
| | 44 | | /// <summary> |
| | 45 | | /// The width of the image/PDF in pixels/inches, respectively. |
| | 46 | | /// </summary> |
| 212 | 47 | | public float Width { get; } |
| | 48 | |
|
| | 49 | | /// <summary> |
| | 50 | | /// The height of the image/PDF in pixels/inches, respectively. |
| | 51 | | /// </summary> |
| 212 | 52 | | public float Height { get; } |
| | 53 | |
|
| | 54 | | /// <summary> |
| | 55 | | /// The unit used by the width, height and <see cref="BoundingBox"/> properties. For images, the unit is |
| | 56 | | /// "pixel". For PDF, the unit is "inch". |
| | 57 | | /// </summary> |
| 48 | 58 | | public LengthUnit Unit { get; } |
| | 59 | |
|
| | 60 | | /// <summary> |
| | 61 | | /// When `IncludeFieldElements` is set to <c>true</c>, a list of recognized lines of text. |
| | 62 | | /// An empty list otherwise. For calls to recognize content, this list is always populated. The maximum number o |
| | 63 | | /// lines returned is 300 per page. The lines are sorted top to bottom, left to right, although in certain cases |
| | 64 | | /// proximity is treated with higher priority. As the sorting order depends on the detected text, it may change |
| | 65 | | /// images and OCR version updates. Thus, business logic should be built upon the actual line location instead o |
| | 66 | | /// </summary> |
| 476 | 67 | | public IReadOnlyList<FormLine> Lines { get; } |
| | 68 | |
|
| | 69 | | /// <summary> |
| | 70 | | /// A list of extracted tables contained in a page. |
| | 71 | | /// </summary> |
| 388 | 72 | | public IReadOnlyList<FormTable> Tables { get; } |
| | 73 | |
|
| | 74 | | private static IReadOnlyList<FormLine> ConvertLines(IReadOnlyList<TextLine> textLines, int pageNumber) |
| | 75 | | { |
| 216 | 76 | | List<FormLine> rawLines = new List<FormLine>(); |
| | 77 | |
|
| 10304 | 78 | | foreach (TextLine textLine in textLines) |
| | 79 | | { |
| 4936 | 80 | | rawLines.Add(new FormLine(textLine, pageNumber)); |
| | 81 | | } |
| | 82 | |
|
| 216 | 83 | | return rawLines; |
| | 84 | | } |
| | 85 | |
|
| | 86 | | private static IReadOnlyList<FormTable> ConvertTables(PageResult pageResult, IReadOnlyList<ReadResult> readResul |
| | 87 | | { |
| 164 | 88 | | List<FormTable> tables = new List<FormTable>(); |
| | 89 | |
|
| 584 | 90 | | foreach (var table in pageResult.Tables) |
| | 91 | | { |
| 128 | 92 | | tables.Add(new FormTable(table, readResults, pageIndex)); |
| | 93 | | } |
| | 94 | |
|
| 164 | 95 | | return tables; |
| | 96 | | } |
| | 97 | | } |
| | 98 | | } |