| | 1 | | // Copyright (c) Microsoft Corporation. All rights reserved. |
| | 2 | | // Licensed under the MIT License. See License.txt in the project root for |
| | 3 | | // license information. |
| | 4 | |
|
| | 5 | | namespace Microsoft.Azure.Search.Models |
| | 6 | | { |
| | 7 | | using System; |
| | 8 | | using Microsoft.Azure.Search.Common; |
| | 9 | | using Newtonsoft.Json; |
| | 10 | | using Serialization; |
| | 11 | |
|
| | 12 | | /// <summary> |
| | 13 | | /// Defines the names of all tokenizers supported by Azure Cognitive Search. |
| | 14 | | /// For more information, see <see href="https://docs.microsoft.com/azure/search/index-add-custom-analyzers">Add cus |
| | 15 | | /// </summary> |
| | 16 | | [JsonConverter(typeof(ExtensibleEnumConverter<TokenizerName>))] |
| | 17 | | public struct TokenizerName : IEquatable<TokenizerName> |
| | 18 | | { |
| | 19 | | private readonly string _value; |
| | 20 | |
|
| | 21 | | // MAINTENANCE NOTE: Keep these ordered the same as the table on this page: |
| | 22 | | // https://docs.microsoft.com/rest/api/searchservice/Custom-analyzers-in-Azure-Search |
| | 23 | |
|
| | 24 | | /// <summary> |
| | 25 | | /// Grammar-based tokenizer that is suitable for processing most |
| | 26 | | /// European-language documents. |
| | 27 | | /// <see href="http://lucene.apache.org/core/4_10_3/analyzers-common/org/apache/lucene/analysis/standard/Classic |
| | 28 | | /// </summary> |
| 2 | 29 | | public static readonly TokenizerName Classic = new TokenizerName("classic"); |
| | 30 | |
|
| | 31 | | /// <summary> |
| | 32 | | /// Tokenizes the input from an edge into n-grams of the given size(s). |
| | 33 | | /// <see href="https://lucene.apache.org/core/4_10_3/analyzers-common/org/apache/lucene/analysis/ngram/EdgeNGram |
| | 34 | | /// </summary> |
| 2 | 35 | | public static readonly TokenizerName EdgeNGram = new TokenizerName("edgeNGram"); |
| | 36 | |
|
| | 37 | | /// <summary> |
| | 38 | | /// Emits the entire input as a single token. |
| | 39 | | /// <see href="http://lucene.apache.org/core/4_10_3/analyzers-common/org/apache/lucene/analysis/core/KeywordToke |
| | 40 | | /// </summary> |
| 2 | 41 | | public static readonly TokenizerName Keyword = new TokenizerName("keyword_v2"); |
| | 42 | |
|
| | 43 | | /// <summary> |
| | 44 | | /// Divides text at non-letters. |
| | 45 | | /// <see href="http://lucene.apache.org/core/4_10_3/analyzers-common/org/apache/lucene/analysis/core/LetterToken |
| | 46 | | /// </summary> |
| 2 | 47 | | public static readonly TokenizerName Letter = new TokenizerName("letter"); |
| | 48 | |
|
| | 49 | | /// <summary> |
| | 50 | | /// Divides text at non-letters and converts them to lower case. |
| | 51 | | /// <see href="http://lucene.apache.org/core/4_10_3/analyzers-common/org/apache/lucene/analysis/core/LowerCaseTo |
| | 52 | | /// </summary> |
| 2 | 53 | | public static readonly TokenizerName Lowercase = new TokenizerName("lowercase"); |
| | 54 | |
|
| | 55 | | /// <summary> |
| | 56 | | /// Divides text using language-specific rules. |
| | 57 | | /// </summary> |
| 2 | 58 | | public static readonly TokenizerName MicrosoftLanguageTokenizer = new TokenizerName("microsoft_language_tokenize |
| | 59 | |
|
| | 60 | | /// <summary> |
| | 61 | | /// Divides text using language-specific rules and reduces words to their base forms. |
| | 62 | | /// </summary> |
| 2 | 63 | | public static readonly TokenizerName MicrosoftLanguageStemmingTokenizer = new TokenizerName("microsoft_language_ |
| | 64 | |
|
| | 65 | | /// <summary> |
| | 66 | | /// Tokenizes the input into n-grams of the given size(s). |
| | 67 | | /// <see href="http://lucene.apache.org/core/4_10_3/analyzers-common/org/apache/lucene/analysis/ngram/NGramToken |
| | 68 | | /// </summary> |
| 2 | 69 | | public static readonly TokenizerName NGram = new TokenizerName("nGram"); |
| | 70 | |
|
| | 71 | | /// <summary> |
| | 72 | | /// Tokenizer for path-like hierarchies. |
| | 73 | | /// <see href="http://lucene.apache.org/core/4_10_3/analyzers-common/org/apache/lucene/analysis/path/PathHierarc |
| | 74 | | /// </summary> |
| 2 | 75 | | public static readonly TokenizerName PathHierarchy = new TokenizerName("path_hierarchy_v2"); |
| | 76 | |
|
| | 77 | | /// <summary> |
| | 78 | | /// Tokenizer that uses regex pattern matching to construct distinct tokens. |
| | 79 | | /// <see href="http://lucene.apache.org/core/4_10_3/analyzers-common/org/apache/lucene/analysis/pattern/PatternT |
| | 80 | | /// </summary> |
| 2 | 81 | | public static readonly TokenizerName Pattern = new TokenizerName("pattern"); |
| | 82 | |
|
| | 83 | | /// <summary> |
| | 84 | | /// Standard Lucene analyzer; Composed of the standard tokenizer, |
| | 85 | | /// lowercase filter and stop filter. |
| | 86 | | /// <see href="http://lucene.apache.org/core/4_10_3/analyzers-common/org/apache/lucene/analysis/standard/Standar |
| | 87 | | /// </summary> |
| 2 | 88 | | public static readonly TokenizerName Standard = new TokenizerName("standard_v2"); |
| | 89 | |
|
| | 90 | | /// <summary> |
| | 91 | | /// Tokenizes urls and emails as one token. |
| | 92 | | /// <see href="http://lucene.apache.org/core/4_10_3/analyzers-common/org/apache/lucene/analysis/standard/UAX29UR |
| | 93 | | /// </summary> |
| 2 | 94 | | public static readonly TokenizerName UaxUrlEmail = new TokenizerName("uax_url_email"); |
| | 95 | |
|
| | 96 | | /// <summary> |
| | 97 | | /// Divides text at whitespace. |
| | 98 | | /// <see href="http://lucene.apache.org/core/4_10_3/analyzers-common/org/apache/lucene/analysis/core/WhitespaceT |
| | 99 | | /// </summary> |
| 2 | 100 | | public static readonly TokenizerName Whitespace = new TokenizerName("whitespace"); |
| | 101 | |
|
| | 102 | | private TokenizerName(string name) |
| | 103 | | { |
| 62 | 104 | | Throw.IfArgumentNull(name, nameof(name)); |
| 62 | 105 | | _value = name; |
| 62 | 106 | | } |
| | 107 | |
|
| | 108 | | /// <summary> |
| | 109 | | /// Defines implicit conversion from string to TokenizerName. |
| | 110 | | /// </summary> |
| | 111 | | /// <param name="name">string to convert.</param> |
| | 112 | | /// <returns>The string as a TokenizerName.</returns> |
| 2 | 113 | | public static implicit operator TokenizerName(string name) => new TokenizerName(name); |
| | 114 | |
|
| | 115 | | /// <summary> |
| | 116 | | /// Defines explicit conversion from TokenizerName to string. |
| | 117 | | /// </summary> |
| | 118 | | /// <param name="name">TokenizerName to convert.</param> |
| | 119 | | /// <returns>The TokenizerName as a string.</returns> |
| 0 | 120 | | public static explicit operator string(TokenizerName name) => name.ToString(); |
| | 121 | |
|
| | 122 | | /// <summary> |
| | 123 | | /// Compares two TokenizerName values for equality. |
| | 124 | | /// </summary> |
| | 125 | | /// <param name="lhs">The first TokenizerName to compare.</param> |
| | 126 | | /// <param name="rhs">The second TokenizerName to compare.</param> |
| | 127 | | /// <returns>true if the TokenizerName objects are equal or are both null; false otherwise.</returns> |
| 0 | 128 | | public static bool operator ==(TokenizerName lhs, TokenizerName rhs) => Equals(lhs, rhs); |
| | 129 | |
|
| | 130 | | /// <summary> |
| | 131 | | /// Compares two TokenizerName values for inequality. |
| | 132 | | /// </summary> |
| | 133 | | /// <param name="lhs">The first TokenizerName to compare.</param> |
| | 134 | | /// <param name="rhs">The second TokenizerName to compare.</param> |
| | 135 | | /// <returns>true if the TokenizerName objects are not equal; false otherwise.</returns> |
| 0 | 136 | | public static bool operator !=(TokenizerName lhs, TokenizerName rhs) => !Equals(lhs, rhs); |
| | 137 | |
|
| | 138 | | /// <summary> |
| | 139 | | /// Compares the TokenizerName for equality with another TokenizerName. |
| | 140 | | /// </summary> |
| | 141 | | /// <param name="other">The TokenizerName with which to compare.</param> |
| | 142 | | /// <returns><c>true</c> if the TokenizerName objects are equal; otherwise, <c>false</c>.</returns> |
| 32 | 143 | | public bool Equals(TokenizerName other) => _value == other._value; |
| | 144 | |
|
| | 145 | | /// <summary> |
| | 146 | | /// Determines whether the specified object is equal to the current object. |
| | 147 | | /// </summary> |
| | 148 | | /// <param name="obj">The object to compare with the current object.</param> |
| | 149 | | /// <returns><c>true</c> if the specified object is equal to the current object; otherwise, <c>false</c>.</retur |
| 0 | 150 | | public override bool Equals(object obj) => obj is TokenizerName ? Equals((TokenizerName)obj) : false; |
| | 151 | |
|
| | 152 | | /// <summary> |
| | 153 | | /// Serves as the default hash function. |
| | 154 | | /// </summary> |
| | 155 | | /// <returns>A hash code for the current object.</returns> |
| 0 | 156 | | public override int GetHashCode() => _value.GetHashCode(); |
| | 157 | |
|
| | 158 | | /// <summary> |
| | 159 | | /// Returns a string representation of the TokenizerName. |
| | 160 | | /// </summary> |
| | 161 | | /// <returns>The TokenizerName as a string.</returns> |
| 64 | 162 | | public override string ToString() => _value; |
| | 163 | | } |
| | 164 | | } |