| | 1 | | // Copyright (c) Microsoft Corporation. All rights reserved. |
| | 2 | | // Licensed under the MIT License. |
| | 3 | |
|
| | 4 | | using System; |
| | 5 | | using System.Collections.Generic; |
| | 6 | | using System.Globalization; |
| | 7 | | using System.Text.RegularExpressions; |
| | 8 | |
|
| | 9 | | namespace Azure.AI.FormRecognizer.Models |
| | 10 | | { |
| | 11 | | /// <summary> |
| | 12 | | /// Represents a field recognized in an input form. |
| | 13 | | /// </summary> |
| | 14 | | public class FormField |
| | 15 | | { |
| 576 | 16 | | internal FormField(string name, int pageNumber, KeyValuePair field, IReadOnlyList<ReadResult> readResults) |
| | 17 | | { |
| 576 | 18 | | Confidence = field.Confidence; |
| 576 | 19 | | Name = name; |
| | 20 | |
|
| 576 | 21 | | BoundingBox labelBoundingBox = field.Key.BoundingBox == null ? default : new BoundingBox(field.Key.BoundingB |
| 576 | 22 | | IReadOnlyList<FormElement> labelFormElement = field.Key.Elements != null |
| 576 | 23 | | ? ConvertTextReferences(field.Key.Elements, readResults) |
| 576 | 24 | | : new List<FormElement>(); |
| 576 | 25 | | LabelData = new FieldData(field.Key.Text, pageNumber, labelBoundingBox, labelFormElement); |
| | 26 | |
|
| 576 | 27 | | BoundingBox valueBoundingBox = field.Value.BoundingBox == null ? default : new BoundingBox(field.Value.Bound |
| 576 | 28 | | IReadOnlyList<FormElement> valueFormElement = field.Value.Elements != null |
| 576 | 29 | | ? ConvertTextReferences(field.Value.Elements, readResults) |
| 576 | 30 | | : new List<FormElement>(); |
| 576 | 31 | | ValueData = new FieldData(field.Value.Text, pageNumber, valueBoundingBox, valueFormElement); |
| | 32 | |
|
| 576 | 33 | | Value = new FieldValue(new FieldValue_internal(field.Value.Text), readResults); |
| 576 | 34 | | } |
| | 35 | |
|
| 736 | 36 | | internal FormField(string name, FieldValue_internal fieldValue, IReadOnlyList<ReadResult> readResults) |
| | 37 | | { |
| 736 | 38 | | Confidence = fieldValue.Confidence ?? Constants.DefaultConfidenceValue; |
| 736 | 39 | | Name = name; |
| 736 | 40 | | LabelData = null; |
| | 41 | |
|
| | 42 | | // Bounding box, page and text are not returned by the service in two scenarios: |
| | 43 | | // - When this field is global and not associated with a specific page (e.g. ReceiptType). |
| | 44 | | // - When this field is a collection, such as a list or dictionary. |
| | 45 | | // |
| | 46 | | // In these scenarios we do not set a ValueData. |
| | 47 | |
|
| 736 | 48 | | if (fieldValue.BoundingBox.Count == 0 && fieldValue.Page == null && fieldValue.Text == null) |
| | 49 | | { |
| 96 | 50 | | ValueData = null; |
| | 51 | | } |
| | 52 | | else |
| | 53 | | { |
| 640 | 54 | | IReadOnlyList<FormElement> FormElement = ConvertTextReferences(fieldValue.Elements, readResults); |
| | 55 | |
|
| | 56 | | // TODO: FormEnum<T> ? |
| 640 | 57 | | BoundingBox boundingBox = new BoundingBox(fieldValue.BoundingBox); |
| | 58 | |
|
| 640 | 59 | | ValueData = new FieldData(fieldValue.Text, fieldValue.Page.Value, boundingBox, FormElement); |
| | 60 | | } |
| | 61 | |
|
| 736 | 62 | | Value = new FieldValue(fieldValue, readResults); |
| 736 | 63 | | } |
| | 64 | |
|
| | 65 | | /// <summary> |
| | 66 | | /// Canonical name; uniquely identifies a field within the form. |
| | 67 | | /// </summary> |
| 1000 | 68 | | public string Name { get; } |
| | 69 | |
|
| | 70 | | /// <summary> |
| | 71 | | /// Contains the text, bounding box and content of the label of the field in the form. |
| | 72 | | /// </summary> |
| 1064 | 73 | | public FieldData LabelData { get; } |
| | 74 | |
|
| | 75 | | /// <summary> |
| | 76 | | /// Contains the text, bounding box and content of the value of the field in the form. |
| | 77 | | /// </summary> |
| 1096 | 78 | | public FieldData ValueData { get; } |
| | 79 | |
|
| | 80 | | /// <summary> |
| | 81 | | /// The strongly-typed value of this <see cref="FormField"/>. |
| | 82 | | /// </summary> |
| 336 | 83 | | public FieldValue Value { get; } |
| | 84 | |
|
| | 85 | | /// <summary> |
| | 86 | | /// Measures the degree of certainty of the recognition result. Value is between [0.0, 1.0]. |
| | 87 | | /// </summary> |
| 2000 | 88 | | public float Confidence { get; } |
| | 89 | |
|
| | 90 | | internal static IReadOnlyList<FormElement> ConvertTextReferences(IReadOnlyList<string> references, IReadOnlyList |
| | 91 | | { |
| 3240 | 92 | | List<FormElement> FormElement = new List<FormElement>(); |
| 17096 | 93 | | foreach (var reference in references) |
| | 94 | | { |
| 5308 | 95 | | FormElement.Add(ResolveTextReference(readResults, reference)); |
| | 96 | | } |
| 3240 | 97 | | return FormElement; |
| | 98 | | } |
| | 99 | |
|
| 2 | 100 | | private static Regex _wordRegex = new Regex(@"/readResults/(?<pageIndex>\d*)/lines/(?<lineIndex>\d*)/words/(?<wo |
| 2 | 101 | | private static Regex _lineRegex = new Regex(@"/readResults/(?<pageIndex>\d*)/lines/(?<lineIndex>\d*)$", RegexOpt |
| | 102 | |
|
| | 103 | | private static FormElement ResolveTextReference(IReadOnlyList<ReadResult> readResults, string reference) |
| | 104 | | { |
| | 105 | | // TODO: Add additional validations here. |
| | 106 | | // https://github.com/Azure/azure-sdk-for-net/issues/10363 |
| | 107 | |
|
| | 108 | | // Example: the following should result in PageIndex = 3, LineIndex = 7, WordIndex = 12 |
| | 109 | | // "#/analyzeResult/readResults/3/lines/7/words/12" from DocumentResult |
| | 110 | | // "#/readResults/3/lines/7/words/12" from PageResult |
| | 111 | |
|
| | 112 | | // Word Reference |
| 5308 | 113 | | var wordMatch = _wordRegex.Match(reference); |
| 5308 | 114 | | if (wordMatch.Success && wordMatch.Groups.Count == 4) |
| | 115 | | { |
| 5308 | 116 | | int pageIndex = int.Parse(wordMatch.Groups["pageIndex"].Value, CultureInfo.InvariantCulture); |
| 5308 | 117 | | int lineIndex = int.Parse(wordMatch.Groups["lineIndex"].Value, CultureInfo.InvariantCulture); |
| 5308 | 118 | | int wordIndex = int.Parse(wordMatch.Groups["wordIndex"].Value, CultureInfo.InvariantCulture); |
| | 119 | |
|
| 5308 | 120 | | return new FormWord(readResults[pageIndex].Lines[lineIndex].Words[wordIndex], pageIndex + 1); |
| | 121 | | } |
| | 122 | |
|
| | 123 | | // Line Reference |
| 0 | 124 | | var lineMatch = _lineRegex.Match(reference); |
| 0 | 125 | | if (lineMatch.Success && lineMatch.Groups.Count == 3) |
| | 126 | | { |
| 0 | 127 | | int pageIndex = int.Parse(lineMatch.Groups["pageIndex"].Value, CultureInfo.InvariantCulture); |
| 0 | 128 | | int lineIndex = int.Parse(lineMatch.Groups["lineIndex"].Value, CultureInfo.InvariantCulture); |
| | 129 | |
|
| 0 | 130 | | return new FormLine(readResults[pageIndex].Lines[lineIndex], pageIndex + 1); |
| | 131 | | } |
| | 132 | |
|
| 0 | 133 | | throw new InvalidOperationException($"Failed to parse element reference: {reference}"); |
| | 134 | | } |
| | 135 | | } |
| | 136 | | } |