| | 1 | | // Copyright (c) Microsoft Corporation. All rights reserved. |
| | 2 | | // Licensed under the MIT License. |
| | 3 | |
|
| | 4 | | using System.Collections.Generic; |
| | 5 | | using System.Linq; |
| | 6 | |
|
| | 7 | | namespace Azure.AI.FormRecognizer.Models |
| | 8 | | { |
| | 9 | | /// <summary> |
| | 10 | | /// Represents a form that has been recognized by a trained model. |
| | 11 | | /// </summary> |
| | 12 | | public class RecognizedForm |
| | 13 | | { |
| 44 | 14 | | internal RecognizedForm(PageResult pageResult, IReadOnlyList<ReadResult> readResults, int pageIndex) |
| | 15 | | { |
| | 16 | | // Recognized form from a model trained without labels. |
| 44 | 17 | | FormType = $"form-{pageResult.ClusterId}"; |
| 44 | 18 | | PageRange = new FormPageRange(pageResult.Page, pageResult.Page); |
| 44 | 19 | | Fields = ConvertUnsupervisedFields(pageResult.Page, pageResult.KeyValuePairs, readResults); |
| | 20 | |
|
| | 21 | | // For models trained without labels, the service treats every page as a separate form, so |
| | 22 | | // we end up with a single page per form. |
| | 23 | |
|
| 44 | 24 | | Pages = new List<FormPage> { new FormPage(pageResult, readResults, pageIndex) }; |
| 44 | 25 | | } |
| | 26 | |
|
| 92 | 27 | | internal RecognizedForm(DocumentResult documentResult, IReadOnlyList<PageResult> pageResults, IReadOnlyList<Read |
| | 28 | | { |
| | 29 | | // Recognized form from a model trained with labels. |
| 92 | 30 | | FormType = documentResult.DocType; |
| | 31 | |
|
| | 32 | | // TODO: validate that PageRange.Length == 2. |
| | 33 | | // https://github.com/Azure/azure-sdk-for-net/issues/10547 |
| | 34 | |
|
| 92 | 35 | | PageRange = new FormPageRange(documentResult.PageRange[0], documentResult.PageRange[1]); |
| | 36 | |
|
| | 37 | | // documentResult.Fields is required and not null, according to the swagger file, but it's not |
| | 38 | | // present when a blank receipt is submitted for recognition. |
| | 39 | |
|
| 92 | 40 | | Fields = documentResult.Fields == null |
| 92 | 41 | | ? new Dictionary<string, FormField>() |
| 92 | 42 | | : ConvertSupervisedFields(documentResult.Fields, readResults); |
| 92 | 43 | | Pages = ConvertSupervisedPages(pageResults, readResults); |
| 92 | 44 | | } |
| | 45 | |
|
| | 46 | | /// <summary> |
| | 47 | | /// The type of form the model identified the submitted form to be. |
| | 48 | | /// </summary> |
| | 49 | | // Convert clusterId to a string (ex. "FormType1"). |
| 156 | 50 | | public string FormType { get; } |
| | 51 | |
|
| | 52 | | /// <summary> |
| | 53 | | /// The range of pages this form spans. |
| | 54 | | /// </summary> |
| 540 | 55 | | public FormPageRange PageRange { get; } |
| | 56 | |
|
| | 57 | | /// <summary> |
| | 58 | | /// A dictionary of the fields recognized from the input document. The key is |
| | 59 | | /// the <see cref="FormField.Name"/> of the field. For models trained with labels, |
| | 60 | | /// this is the training-time label of the field. For models trained with forms |
| | 61 | | /// only, a unique name is generated for each field. |
| | 62 | | /// </summary> |
| 804 | 63 | | public IReadOnlyDictionary<string, FormField> Fields { get; } |
| | 64 | |
|
| | 65 | | /// <summary> |
| | 66 | | /// A list of pages describing the recognized form elements present in the input |
| | 67 | | /// document. |
| | 68 | | /// </summary> |
| 872 | 69 | | public IReadOnlyList<FormPage> Pages { get; } |
| | 70 | |
|
| | 71 | | private static IReadOnlyDictionary<string, FormField> ConvertUnsupervisedFields(int pageNumber, IReadOnlyList<Ke |
| | 72 | | { |
| 44 | 73 | | Dictionary<string, FormField> fieldDictionary = new Dictionary<string, FormField>(); |
| | 74 | |
|
| 44 | 75 | | int i = 0; |
| 1240 | 76 | | foreach (var keyValuePair in keyValuePairs) |
| | 77 | | { |
| 576 | 78 | | var fieldName = keyValuePair.Label ?? $"field-{i++}"; |
| 576 | 79 | | fieldDictionary[fieldName] = new FormField(fieldName, pageNumber, keyValuePair, readResults); |
| | 80 | | } |
| | 81 | |
|
| 44 | 82 | | return fieldDictionary; |
| | 83 | | } |
| | 84 | |
|
| | 85 | | private static IReadOnlyDictionary<string, FormField> ConvertSupervisedFields(IReadOnlyDictionary<string, FieldV |
| | 86 | | { |
| 92 | 87 | | Dictionary<string, FormField> fieldDictionary = new Dictionary<string, FormField>(); |
| | 88 | |
|
| 1752 | 89 | | foreach (var field in fields) |
| | 90 | | { |
| 784 | 91 | | fieldDictionary[field.Key] = field.Value == null |
| 784 | 92 | | ? null |
| 784 | 93 | | : new FormField(field.Key, field.Value, readResults); |
| | 94 | | } |
| | 95 | |
|
| 92 | 96 | | return fieldDictionary; |
| | 97 | | } |
| | 98 | |
|
| | 99 | | private IReadOnlyList<FormPage> ConvertSupervisedPages(IReadOnlyList<PageResult> pageResults, IReadOnlyList<Read |
| | 100 | | { |
| 92 | 101 | | List<FormPage> pages = new List<FormPage>(); |
| | 102 | |
|
| 496 | 103 | | for (int i = 0; i < readResults.Count; i++) |
| | 104 | | { |
| | 105 | | // Check range here so only pages that are part of this form are added. Avoid "pageNumber = i + 1" |
| | 106 | | // because it is not safe to assume the pages will always be in order. |
| | 107 | |
|
| 156 | 108 | | var pageNumber = readResults[i].Page; |
| | 109 | |
|
| 156 | 110 | | if (pageNumber >= PageRange.FirstPageNumber && pageNumber <= PageRange.LastPageNumber) |
| | 111 | | { |
| 116 | 112 | | pages.Add(new FormPage(pageResults.Any() ? pageResults[i] : null, readResults, i)); |
| | 113 | | } |
| | 114 | | } |
| | 115 | |
|
| 92 | 116 | | return pages; |
| | 117 | | } |
| | 118 | | } |
| | 119 | | } |