| | 1 | | // Copyright (c) Microsoft Corporation. All rights reserved. |
| | 2 | | // Licensed under the MIT License. See License.txt in the project root for |
| | 3 | | // license information. |
| | 4 | |
|
| | 5 | | namespace Microsoft.Azure.Search.Models |
| | 6 | | { |
| | 7 | | using System; |
| | 8 | | using Microsoft.Azure.Search.Common; |
| | 9 | | using Newtonsoft.Json; |
| | 10 | | using Serialization; |
| | 11 | |
|
| | 12 | | /// <summary> |
| | 13 | | /// Defines the names of all token filters supported by Azure Cognitive Search. |
| | 14 | | /// For more information, see <see href="https://docs.microsoft.com/azure/search/index-add-custom-analyzers">Add cus |
| | 15 | | /// </summary> |
| | 16 | | [JsonConverter(typeof(ExtensibleEnumConverter<TokenFilterName>))] |
| | 17 | | public struct TokenFilterName : IEquatable<TokenFilterName> |
| | 18 | | { |
| | 19 | | private readonly string _value; |
| | 20 | |
|
| | 21 | | // MAINTENANCE NOTE: Keep these ordered the same as the table on this page: |
| | 22 | | // https://docs.microsoft.com/rest/api/searchservice/Custom-analyzers-in-Azure-Search |
| | 23 | |
|
| | 24 | | /// <summary> |
| | 25 | | /// A token filter that applies the Arabic normalizer to normalize the orthography. |
| | 26 | | /// <see href="http://lucene.apache.org/core/4_10_3/analyzers-common/org/apache/lucene/analysis/ar/ArabicNormali |
| | 27 | | /// </summary> |
| 2 | 28 | | public static readonly TokenFilterName ArabicNormalization = new TokenFilterName("arabic_normalization"); |
| | 29 | |
|
| | 30 | | /// <summary> |
| | 31 | | /// Strips all characters after an apostrophe (including the apostrophe itself). |
| | 32 | | /// <see href="http://lucene.apache.org/core/4_10_3/analyzers-common/org/apache/lucene/analysis/tr/ApostropheFil |
| | 33 | | /// </summary> |
| 2 | 34 | | public static readonly TokenFilterName Apostrophe = new TokenFilterName("apostrophe"); |
| | 35 | |
|
| | 36 | | /// <summary> |
| | 37 | | /// Converts alphabetic, numeric, and symbolic Unicode characters which |
| | 38 | | /// are not in the first 127 ASCII characters (the "Basic Latin" Unicode |
| | 39 | | /// block) into their ASCII equivalents, if such equivalents exist. |
| | 40 | | /// <see href="http://lucene.apache.org/core/4_10_3/analyzers-common/org/apache/lucene/analysis/miscellaneous/AS |
| | 41 | | /// </summary> |
| 2 | 42 | | public static readonly TokenFilterName AsciiFolding = new TokenFilterName("asciifolding"); |
| | 43 | |
|
| | 44 | | /// <summary> |
| | 45 | | /// Forms bigrams of CJK terms that are generated from StandardTokenizer. |
| | 46 | | /// <see href="http://lucene.apache.org/core/4_10_3/analyzers-common/org/apache/lucene/analysis/cjk/CJKBigramFil |
| | 47 | | /// </summary> |
| 2 | 48 | | public static readonly TokenFilterName CjkBigram = new TokenFilterName("cjk_bigram"); |
| | 49 | |
|
| | 50 | | /// <summary> |
| | 51 | | /// Normalizes CJK width differences. Folds fullwidth ASCII variants into |
| | 52 | | /// the equivalent basic Latin, and half-width Katakana variants into the |
| | 53 | | /// equivalent Kana. |
| | 54 | | /// <see href="http://lucene.apache.org/core/4_10_3/analyzers-common/org/apache/lucene/analysis/cjk/CJKWidthFilt |
| | 55 | | /// </summary> |
| 2 | 56 | | public static readonly TokenFilterName CjkWidth = new TokenFilterName("cjk_width"); |
| | 57 | |
|
| | 58 | | /// <summary> |
| | 59 | | /// Removes English possessives, and dots from acronyms. |
| | 60 | | /// <see href="http://lucene.apache.org/core/4_10_3/analyzers-common/org/apache/lucene/analysis/standard/Classic |
| | 61 | | /// </summary> |
| 2 | 62 | | public static readonly TokenFilterName Classic = new TokenFilterName("classic"); |
| | 63 | |
|
| | 64 | | /// <summary> |
| | 65 | | /// Construct bigrams for frequently occurring terms while indexing. |
| | 66 | | /// Single terms are still indexed too, with bigrams overlaid. |
| | 67 | | /// <see href="http://lucene.apache.org/core/4_10_3/analyzers-common/org/apache/lucene/analysis/commongrams/Comm |
| | 68 | | /// </summary> |
| 2 | 69 | | public static readonly TokenFilterName CommonGram = new TokenFilterName("common_grams"); |
| | 70 | |
|
| | 71 | | /// <summary> |
| | 72 | | /// Generates n-grams of the given size(s) starting from the front or the |
| | 73 | | /// back of an input token. |
| | 74 | | /// <see href="http://lucene.apache.org/core/4_10_3/analyzers-common/org/apache/lucene/analysis/ngram/EdgeNGramT |
| | 75 | | /// </summary> |
| 2 | 76 | | public static readonly TokenFilterName EdgeNGram = new TokenFilterName("edgeNGram_v2"); |
| | 77 | |
|
| | 78 | | /// <summary> |
| | 79 | | /// Removes elisions. For example, "l'avion" (the plane) will be converted |
| | 80 | | /// to "avion" (plane). |
| | 81 | | /// <see href="http://lucene.apache.org/core/4_10_3/analyzers-common/org/apache/lucene/analysis/util/ElisionFilt |
| | 82 | | /// </summary> |
| 2 | 83 | | public static readonly TokenFilterName Elision = new TokenFilterName("elision"); |
| | 84 | |
|
| | 85 | | /// <summary> |
| | 86 | | /// Normalizes German characters according to the heuristics of the |
| | 87 | | /// German2 snowball algorithm. |
| | 88 | | /// <see href="http://lucene.apache.org/core/4_10_3/analyzers-common/org/apache/lucene/analysis/de/GermanNormali |
| | 89 | | /// </summary> |
| 2 | 90 | | public static readonly TokenFilterName GermanNormalization = new TokenFilterName("german_normalization"); |
| | 91 | |
|
| | 92 | | /// <summary> |
| | 93 | | /// Normalizes text in Hindi to remove some differences in spelling |
| | 94 | | /// variations. |
| | 95 | | /// <see href="http://lucene.apache.org/core/4_10_3/analyzers-common/org/apache/lucene/analysis/hi/HindiNormaliz |
| | 96 | | /// </summary> |
| 2 | 97 | | public static readonly TokenFilterName HindiNormalization = new TokenFilterName("hindi_normalization"); |
| | 98 | |
|
| | 99 | | /// <summary> |
| | 100 | | /// Normalizes the Unicode representation of text in Indian languages. |
| | 101 | | /// <see href="http://lucene.apache.org/core/4_10_3/analyzers-common/org/apache/lucene/analysis/in/IndicNormaliz |
| | 102 | | /// </summary> |
| 2 | 103 | | public static readonly TokenFilterName IndicNormalization = new TokenFilterName("indic_normalization"); |
| | 104 | |
|
| | 105 | | /// <summary> |
| | 106 | | /// Emits each incoming token twice, once as keyword and once as |
| | 107 | | /// non-keyword. |
| | 108 | | /// <see href="http://lucene.apache.org/core/4_10_3/analyzers-common/org/apache/lucene/analysis/miscellaneous/Ke |
| | 109 | | /// </summary> |
| 2 | 110 | | public static readonly TokenFilterName KeywordRepeat = new TokenFilterName("keyword_repeat"); |
| | 111 | |
|
| | 112 | | /// <summary> |
| | 113 | | /// A high-performance kstem filter for English. |
| | 114 | | /// <see href="http://lucene.apache.org/core/4_10_3/analyzers-common/org/apache/lucene/analysis/en/KStemFilter.h |
| | 115 | | /// </summary> |
| 2 | 116 | | public static readonly TokenFilterName KStem = new TokenFilterName("kstem"); |
| | 117 | |
|
| | 118 | | /// <summary> |
| | 119 | | /// Removes words that are too long or too short. |
| | 120 | | /// <see href="http://lucene.apache.org/core/4_10_3/analyzers-common/org/apache/lucene/analysis/miscellaneous/Le |
| | 121 | | /// </summary> |
| 2 | 122 | | public static readonly TokenFilterName Length = new TokenFilterName("length"); |
| | 123 | |
|
| | 124 | | /// <summary> |
| | 125 | | /// Limits the number of tokens while indexing. |
| | 126 | | /// <see href="http://lucene.apache.org/core/4_10_3/analyzers-common/org/apache/lucene/analysis/miscellaneous/Li |
| | 127 | | /// </summary> |
| 2 | 128 | | public static readonly TokenFilterName Limit = new TokenFilterName("limit"); |
| | 129 | |
|
| | 130 | | /// <summary> |
| | 131 | | /// Normalizes token text to lower case. |
| | 132 | | /// <see href="http://lucene.apache.org/core/4_10_3/analyzers-common/org/apache/lucene/analysis/core/LowerCaseFi |
| | 133 | | /// </summary> |
| 2 | 134 | | public static readonly TokenFilterName Lowercase = new TokenFilterName("lowercase"); |
| | 135 | |
|
| | 136 | | /// <summary> |
| | 137 | | /// Generates n-grams of the given size(s). |
| | 138 | | /// <see href="http://lucene.apache.org/core/4_10_3/analyzers-common/org/apache/lucene/analysis/ngram/NGramToken |
| | 139 | | /// </summary> |
| 2 | 140 | | public static readonly TokenFilterName NGram = new TokenFilterName("nGram_v2"); |
| | 141 | |
|
| | 142 | | /// <summary> |
| | 143 | | /// Applies normalization for Persian. |
| | 144 | | /// <see href="http://lucene.apache.org/core/4_10_3/analyzers-common/org/apache/lucene/analysis/fa/PersianNormal |
| | 145 | | /// </summary> |
| 2 | 146 | | public static readonly TokenFilterName PersianNormalization = new TokenFilterName("persian_normalization"); |
| | 147 | |
|
| | 148 | | /// <summary> |
| | 149 | | /// Create tokens for phonetic matches. |
| | 150 | | /// <see href="https://lucene.apache.org/core/4_10_3/analyzers-phonetic/org/apache/lucene/analysis/phonetic/pack |
| | 151 | | /// </summary> |
| 2 | 152 | | public static readonly TokenFilterName Phonetic = new TokenFilterName("phonetic"); |
| | 153 | |
|
| | 154 | | /// <summary> |
| | 155 | | /// Uses the Porter stemming algorithm to transform the token stream. |
| | 156 | | /// <see href="http://tartarus.org/~martin/PorterStemmer/" /> |
| | 157 | | /// </summary> |
| 2 | 158 | | public static readonly TokenFilterName PorterStem = new TokenFilterName("porter_stem"); |
| | 159 | |
|
| | 160 | | /// <summary> |
| | 161 | | /// Reverses the token string. |
| | 162 | | /// <see href="http://lucene.apache.org/core/4_10_3/analyzers-common/org/apache/lucene/analysis/reverse/ReverseS |
| | 163 | | /// </summary> |
| 2 | 164 | | public static readonly TokenFilterName Reverse = new TokenFilterName("reverse"); |
| | 165 | |
|
| | 166 | | /// <summary> |
| | 167 | | /// Normalizes use of the interchangeable Scandinavian characters. |
| | 168 | | /// <see href="http://lucene.apache.org/core/4_10_3/analyzers-common/org/apache/lucene/analysis/miscellaneous/Sc |
| | 169 | | /// </summary> |
| 2 | 170 | | public static readonly TokenFilterName ScandinavianNormalization = new TokenFilterName("scandinavian_normalizati |
| | 171 | |
|
| | 172 | | /// <summary> |
| | 173 | | /// Folds Scandinavian characters åÅäæÄÆ->a and öÖøØ->o. It also |
| | 174 | | /// discriminates against use of double vowels aa, ae, ao, oe and oo, |
| | 175 | | /// leaving just the first one. |
| | 176 | | /// <see href="http://lucene.apache.org/core/4_10_3/analyzers-common/org/apache/lucene/analysis/miscellaneous/Sc |
| | 177 | | /// </summary> |
| 2 | 178 | | public static readonly TokenFilterName ScandinavianFoldingNormalization = new TokenFilterName("scandinavian_fold |
| | 179 | |
|
| | 180 | | /// <summary> |
| | 181 | | /// Creates combinations of tokens as a single token. |
| | 182 | | /// <see href="http://lucene.apache.org/core/4_10_3/analyzers-common/org/apache/lucene/analysis/shingle/ShingleF |
| | 183 | | /// </summary> |
| 2 | 184 | | public static readonly TokenFilterName Shingle = new TokenFilterName("shingle"); |
| | 185 | |
|
| | 186 | | /// <summary> |
| | 187 | | /// A filter that stems words using a Snowball-generated stemmer. |
| | 188 | | /// <see href="http://lucene.apache.org/core/4_10_3/analyzers-common/org/apache/lucene/analysis/snowball/Snowbal |
| | 189 | | /// </summary> |
| 2 | 190 | | public static readonly TokenFilterName Snowball = new TokenFilterName("snowball"); |
| | 191 | |
|
| | 192 | | /// <summary> |
| | 193 | | /// Normalizes the Unicode representation of Sorani text. |
| | 194 | | /// <see href="http://lucene.apache.org/core/4_10_3/analyzers-common/org/apache/lucene/analysis/ckb/SoraniNormal |
| | 195 | | /// </summary> |
| 2 | 196 | | public static readonly TokenFilterName SoraniNormalization = new TokenFilterName("sorani_normalization"); |
| | 197 | |
|
| | 198 | | /// <summary> |
| | 199 | | /// Language specific stemming filter. |
| | 200 | | /// <see href="https://docs.microsoft.com/rest/api/searchservice/Custom-analyzers-in-Azure-Search#TokenFilters" |
| | 201 | | /// </summary> |
| 2 | 202 | | public static readonly TokenFilterName Stemmer = new TokenFilterName("stemmer"); |
| | 203 | |
|
| | 204 | | /// <summary> |
| | 205 | | /// Removes stop words from a token stream. |
| | 206 | | /// <see href="http://lucene.apache.org/core/4_10_3/analyzers-common/org/apache/lucene/analysis/core/StopFilter. |
| | 207 | | /// </summary> |
| 2 | 208 | | public static readonly TokenFilterName Stopwords = new TokenFilterName("stopwords"); |
| | 209 | |
|
| | 210 | | /// <summary> |
| | 211 | | /// Trims leading and trailing whitespace from tokens. |
| | 212 | | /// <see href="http://lucene.apache.org/core/4_10_3/analyzers-common/org/apache/lucene/analysis/miscellaneous/Tr |
| | 213 | | /// </summary> |
| 2 | 214 | | public static readonly TokenFilterName Trim = new TokenFilterName("trim"); |
| | 215 | |
|
| | 216 | | /// <summary> |
| | 217 | | /// Truncates the terms to a specific length. |
| | 218 | | /// <see href="http://lucene.apache.org/core/4_10_3/analyzers-common/org/apache/lucene/analysis/miscellaneous/Tr |
| | 219 | | /// </summary> |
| 2 | 220 | | public static readonly TokenFilterName Truncate = new TokenFilterName("truncate"); |
| | 221 | |
|
| | 222 | | /// <summary> |
| | 223 | | /// Filters out tokens with same text as the previous token. |
| | 224 | | /// <see href="http://lucene.apache.org/core/4_10_3/analyzers-common/org/apache/lucene/analysis/miscellaneous/Re |
| | 225 | | /// </summary> |
| 2 | 226 | | public static readonly TokenFilterName Unique = new TokenFilterName("unique"); |
| | 227 | |
|
| | 228 | | /// <summary> |
| | 229 | | /// Normalizes token text to upper case. |
| | 230 | | /// <see href="http://lucene.apache.org/core/4_10_3/analyzers-common/org/apache/lucene/analysis/core/UpperCaseFi |
| | 231 | | /// </summary> |
| 2 | 232 | | public static readonly TokenFilterName Uppercase = new TokenFilterName("uppercase"); |
| | 233 | |
|
| | 234 | | /// <summary> |
| | 235 | | /// Splits words into subwords and performs optional transformations on |
| | 236 | | /// subword groups. |
| | 237 | | /// </summary> |
| 2 | 238 | | public static readonly TokenFilterName WordDelimiter = new TokenFilterName("word_delimiter"); |
| | 239 | |
|
| | 240 | | private TokenFilterName(string name) |
| | 241 | | { |
| 140 | 242 | | Throw.IfArgumentNull(name, nameof(name)); |
| 140 | 243 | | _value = name; |
| 140 | 244 | | } |
| | 245 | |
|
| | 246 | | /// <summary> |
| | 247 | | /// Defines implicit conversion from string to TokenFilterName. |
| | 248 | | /// </summary> |
| | 249 | | /// <param name="name">string to convert.</param> |
| | 250 | | /// <returns>The string as a TokenFilterName.</returns> |
| 2 | 251 | | public static implicit operator TokenFilterName(string name) => new TokenFilterName(name); |
| | 252 | |
|
| | 253 | | /// <summary> |
| | 254 | | /// Defines explicit conversion from TokenFilterName to string. |
| | 255 | | /// </summary> |
| | 256 | | /// <param name="name">TokenFilterName to convert.</param> |
| | 257 | | /// <returns>The TokenFilterName as a string.</returns> |
| 0 | 258 | | public static explicit operator string(TokenFilterName name) => name.ToString(); |
| | 259 | |
|
| | 260 | | /// <summary> |
| | 261 | | /// Compares two TokenFilterName values for equality. |
| | 262 | | /// </summary> |
| | 263 | | /// <param name="lhs">The first TokenFilterName to compare.</param> |
| | 264 | | /// <param name="rhs">The second TokenFilterName to compare.</param> |
| | 265 | | /// <returns>true if the TokenFilterName objects are equal or are both null; false otherwise.</returns> |
| 0 | 266 | | public static bool operator ==(TokenFilterName lhs, TokenFilterName rhs) => Equals(lhs, rhs); |
| | 267 | |
|
| | 268 | | /// <summary> |
| | 269 | | /// Compares two TokenFilterName values for inequality. |
| | 270 | | /// </summary> |
| | 271 | | /// <param name="lhs">The first TokenFilterName to compare.</param> |
| | 272 | | /// <param name="rhs">The second TokenFilterName to compare.</param> |
| | 273 | | /// <returns>true if the TokenFilterName objects are not equal; false otherwise.</returns> |
| 0 | 274 | | public static bool operator !=(TokenFilterName lhs, TokenFilterName rhs) => !Equals(lhs, rhs); |
| | 275 | |
|
| | 276 | | /// <summary> |
| | 277 | | /// Compares the TokenFilterName for equality with another TokenFilterName. |
| | 278 | | /// </summary> |
| | 279 | | /// <param name="other">The TokenFilterName with which to compare.</param> |
| | 280 | | /// <returns><c>true</c> if the TokenFilterName objects are equal; otherwise, <c>false</c>.</returns> |
| 70 | 281 | | public bool Equals(TokenFilterName other) => _value == other._value; |
| | 282 | |
|
| | 283 | | /// <summary> |
| | 284 | | /// Determines whether the specified object is equal to the current object. |
| | 285 | | /// </summary> |
| | 286 | | /// <param name="obj">The object to compare with the current object.</param> |
| | 287 | | /// <returns><c>true</c> if the specified object is equal to the current object; otherwise, <c>false</c>.</retur |
| 0 | 288 | | public override bool Equals(object obj) => obj is TokenFilterName ? Equals((TokenFilterName)obj) : false; |
| | 289 | |
|
| | 290 | | /// <summary> |
| | 291 | | /// Serves as the default hash function. |
| | 292 | | /// </summary> |
| | 293 | | /// <returns>A hash code for the current object.</returns> |
| 0 | 294 | | public override int GetHashCode() => _value.GetHashCode(); |
| | 295 | |
|
| | 296 | | /// <summary> |
| | 297 | | /// Returns a string representation of the TokenFilterName. |
| | 298 | | /// </summary> |
| | 299 | | /// <returns>The TokenFilterName as a string.</returns> |
| 140 | 300 | | public override string ToString() => _value; |
| | 301 | | } |
| | 302 | | } |