| | | 1 | | // Copyright (c) Microsoft Corporation. All rights reserved. |
| | | 2 | | // Licensed under the MIT License. See License.txt in the project root for |
| | | 3 | | // license information. |
| | | 4 | | |
| | | 5 | | namespace Microsoft.Azure.Search.Models |
| | | 6 | | { |
| | | 7 | | using System; |
| | | 8 | | using Microsoft.Azure.Search.Common; |
| | | 9 | | using Newtonsoft.Json; |
| | | 10 | | using Serialization; |
| | | 11 | | |
| | | 12 | | /// <summary> |
| | | 13 | | /// Defines the names of all token filters supported by Azure Cognitive Search. |
| | | 14 | | /// For more information, see <see href="https://docs.microsoft.com/azure/search/index-add-custom-analyzers">Add cus |
| | | 15 | | /// </summary> |
| | | 16 | | [JsonConverter(typeof(ExtensibleEnumConverter<TokenFilterName>))] |
| | | 17 | | public struct TokenFilterName : IEquatable<TokenFilterName> |
| | | 18 | | { |
| | | 19 | | private readonly string _value; |
| | | 20 | | |
| | | 21 | | // MAINTENANCE NOTE: Keep these ordered the same as the table on this page: |
| | | 22 | | // https://docs.microsoft.com/rest/api/searchservice/Custom-analyzers-in-Azure-Search |
| | | 23 | | |
| | | 24 | | /// <summary> |
| | | 25 | | /// A token filter that applies the Arabic normalizer to normalize the orthography. |
| | | 26 | | /// <see href="http://lucene.apache.org/core/4_10_3/analyzers-common/org/apache/lucene/analysis/ar/ArabicNormali |
| | | 27 | | /// </summary> |
| | 2 | 28 | | public static readonly TokenFilterName ArabicNormalization = new TokenFilterName("arabic_normalization"); |
| | | 29 | | |
| | | 30 | | /// <summary> |
| | | 31 | | /// Strips all characters after an apostrophe (including the apostrophe itself). |
| | | 32 | | /// <see href="http://lucene.apache.org/core/4_10_3/analyzers-common/org/apache/lucene/analysis/tr/ApostropheFil |
| | | 33 | | /// </summary> |
| | 2 | 34 | | public static readonly TokenFilterName Apostrophe = new TokenFilterName("apostrophe"); |
| | | 35 | | |
| | | 36 | | /// <summary> |
| | | 37 | | /// Converts alphabetic, numeric, and symbolic Unicode characters which |
| | | 38 | | /// are not in the first 127 ASCII characters (the "Basic Latin" Unicode |
| | | 39 | | /// block) into their ASCII equivalents, if such equivalents exist. |
| | | 40 | | /// <see href="http://lucene.apache.org/core/4_10_3/analyzers-common/org/apache/lucene/analysis/miscellaneous/AS |
| | | 41 | | /// </summary> |
| | 2 | 42 | | public static readonly TokenFilterName AsciiFolding = new TokenFilterName("asciifolding"); |
| | | 43 | | |
| | | 44 | | /// <summary> |
| | | 45 | | /// Forms bigrams of CJK terms that are generated from StandardTokenizer. |
| | | 46 | | /// <see href="http://lucene.apache.org/core/4_10_3/analyzers-common/org/apache/lucene/analysis/cjk/CJKBigramFil |
| | | 47 | | /// </summary> |
| | 2 | 48 | | public static readonly TokenFilterName CjkBigram = new TokenFilterName("cjk_bigram"); |
| | | 49 | | |
| | | 50 | | /// <summary> |
| | | 51 | | /// Normalizes CJK width differences. Folds fullwidth ASCII variants into |
| | | 52 | | /// the equivalent basic Latin, and half-width Katakana variants into the |
| | | 53 | | /// equivalent Kana. |
| | | 54 | | /// <see href="http://lucene.apache.org/core/4_10_3/analyzers-common/org/apache/lucene/analysis/cjk/CJKWidthFilt |
| | | 55 | | /// </summary> |
| | 2 | 56 | | public static readonly TokenFilterName CjkWidth = new TokenFilterName("cjk_width"); |
| | | 57 | | |
| | | 58 | | /// <summary> |
| | | 59 | | /// Removes English possessives, and dots from acronyms. |
| | | 60 | | /// <see href="http://lucene.apache.org/core/4_10_3/analyzers-common/org/apache/lucene/analysis/standard/Classic |
| | | 61 | | /// </summary> |
| | 2 | 62 | | public static readonly TokenFilterName Classic = new TokenFilterName("classic"); |
| | | 63 | | |
| | | 64 | | /// <summary> |
| | | 65 | | /// Construct bigrams for frequently occurring terms while indexing. |
| | | 66 | | /// Single terms are still indexed too, with bigrams overlaid. |
| | | 67 | | /// <see href="http://lucene.apache.org/core/4_10_3/analyzers-common/org/apache/lucene/analysis/commongrams/Comm |
| | | 68 | | /// </summary> |
| | 2 | 69 | | public static readonly TokenFilterName CommonGram = new TokenFilterName("common_grams"); |
| | | 70 | | |
| | | 71 | | /// <summary> |
| | | 72 | | /// Generates n-grams of the given size(s) starting from the front or the |
| | | 73 | | /// back of an input token. |
| | | 74 | | /// <see href="http://lucene.apache.org/core/4_10_3/analyzers-common/org/apache/lucene/analysis/ngram/EdgeNGramT |
| | | 75 | | /// </summary> |
| | 2 | 76 | | public static readonly TokenFilterName EdgeNGram = new TokenFilterName("edgeNGram_v2"); |
| | | 77 | | |
| | | 78 | | /// <summary> |
| | | 79 | | /// Removes elisions. For example, "l'avion" (the plane) will be converted |
| | | 80 | | /// to "avion" (plane). |
| | | 81 | | /// <see href="http://lucene.apache.org/core/4_10_3/analyzers-common/org/apache/lucene/analysis/util/ElisionFilt |
| | | 82 | | /// </summary> |
| | 2 | 83 | | public static readonly TokenFilterName Elision = new TokenFilterName("elision"); |
| | | 84 | | |
| | | 85 | | /// <summary> |
| | | 86 | | /// Normalizes German characters according to the heuristics of the |
| | | 87 | | /// German2 snowball algorithm. |
| | | 88 | | /// <see href="http://lucene.apache.org/core/4_10_3/analyzers-common/org/apache/lucene/analysis/de/GermanNormali |
| | | 89 | | /// </summary> |
| | 2 | 90 | | public static readonly TokenFilterName GermanNormalization = new TokenFilterName("german_normalization"); |
| | | 91 | | |
| | | 92 | | /// <summary> |
| | | 93 | | /// Normalizes text in Hindi to remove some differences in spelling |
| | | 94 | | /// variations. |
| | | 95 | | /// <see href="http://lucene.apache.org/core/4_10_3/analyzers-common/org/apache/lucene/analysis/hi/HindiNormaliz |
| | | 96 | | /// </summary> |
| | 2 | 97 | | public static readonly TokenFilterName HindiNormalization = new TokenFilterName("hindi_normalization"); |
| | | 98 | | |
| | | 99 | | /// <summary> |
| | | 100 | | /// Normalizes the Unicode representation of text in Indian languages. |
| | | 101 | | /// <see href="http://lucene.apache.org/core/4_10_3/analyzers-common/org/apache/lucene/analysis/in/IndicNormaliz |
| | | 102 | | /// </summary> |
| | 2 | 103 | | public static readonly TokenFilterName IndicNormalization = new TokenFilterName("indic_normalization"); |
| | | 104 | | |
| | | 105 | | /// <summary> |
| | | 106 | | /// Emits each incoming token twice, once as keyword and once as |
| | | 107 | | /// non-keyword. |
| | | 108 | | /// <see href="http://lucene.apache.org/core/4_10_3/analyzers-common/org/apache/lucene/analysis/miscellaneous/Ke |
| | | 109 | | /// </summary> |
| | 2 | 110 | | public static readonly TokenFilterName KeywordRepeat = new TokenFilterName("keyword_repeat"); |
| | | 111 | | |
| | | 112 | | /// <summary> |
| | | 113 | | /// A high-performance kstem filter for English. |
| | | 114 | | /// <see href="http://lucene.apache.org/core/4_10_3/analyzers-common/org/apache/lucene/analysis/en/KStemFilter.h |
| | | 115 | | /// </summary> |
| | 2 | 116 | | public static readonly TokenFilterName KStem = new TokenFilterName("kstem"); |
| | | 117 | | |
| | | 118 | | /// <summary> |
| | | 119 | | /// Removes words that are too long or too short. |
| | | 120 | | /// <see href="http://lucene.apache.org/core/4_10_3/analyzers-common/org/apache/lucene/analysis/miscellaneous/Le |
| | | 121 | | /// </summary> |
| | 2 | 122 | | public static readonly TokenFilterName Length = new TokenFilterName("length"); |
| | | 123 | | |
| | | 124 | | /// <summary> |
| | | 125 | | /// Limits the number of tokens while indexing. |
| | | 126 | | /// <see href="http://lucene.apache.org/core/4_10_3/analyzers-common/org/apache/lucene/analysis/miscellaneous/Li |
| | | 127 | | /// </summary> |
| | 2 | 128 | | public static readonly TokenFilterName Limit = new TokenFilterName("limit"); |
| | | 129 | | |
| | | 130 | | /// <summary> |
| | | 131 | | /// Normalizes token text to lower case. |
| | | 132 | | /// <see href="http://lucene.apache.org/core/4_10_3/analyzers-common/org/apache/lucene/analysis/core/LowerCaseFi |
| | | 133 | | /// </summary> |
| | 2 | 134 | | public static readonly TokenFilterName Lowercase = new TokenFilterName("lowercase"); |
| | | 135 | | |
| | | 136 | | /// <summary> |
| | | 137 | | /// Generates n-grams of the given size(s). |
| | | 138 | | /// <see href="http://lucene.apache.org/core/4_10_3/analyzers-common/org/apache/lucene/analysis/ngram/NGramToken |
| | | 139 | | /// </summary> |
| | 2 | 140 | | public static readonly TokenFilterName NGram = new TokenFilterName("nGram_v2"); |
| | | 141 | | |
| | | 142 | | /// <summary> |
| | | 143 | | /// Applies normalization for Persian. |
| | | 144 | | /// <see href="http://lucene.apache.org/core/4_10_3/analyzers-common/org/apache/lucene/analysis/fa/PersianNormal |
| | | 145 | | /// </summary> |
| | 2 | 146 | | public static readonly TokenFilterName PersianNormalization = new TokenFilterName("persian_normalization"); |
| | | 147 | | |
| | | 148 | | /// <summary> |
| | | 149 | | /// Create tokens for phonetic matches. |
| | | 150 | | /// <see href="https://lucene.apache.org/core/4_10_3/analyzers-phonetic/org/apache/lucene/analysis/phonetic/pack |
| | | 151 | | /// </summary> |
| | 2 | 152 | | public static readonly TokenFilterName Phonetic = new TokenFilterName("phonetic"); |
| | | 153 | | |
| | | 154 | | /// <summary> |
| | | 155 | | /// Uses the Porter stemming algorithm to transform the token stream. |
| | | 156 | | /// <see href="http://tartarus.org/~martin/PorterStemmer/" /> |
| | | 157 | | /// </summary> |
| | 2 | 158 | | public static readonly TokenFilterName PorterStem = new TokenFilterName("porter_stem"); |
| | | 159 | | |
| | | 160 | | /// <summary> |
| | | 161 | | /// Reverses the token string. |
| | | 162 | | /// <see href="http://lucene.apache.org/core/4_10_3/analyzers-common/org/apache/lucene/analysis/reverse/ReverseS |
| | | 163 | | /// </summary> |
| | 2 | 164 | | public static readonly TokenFilterName Reverse = new TokenFilterName("reverse"); |
| | | 165 | | |
| | | 166 | | /// <summary> |
| | | 167 | | /// Normalizes use of the interchangeable Scandinavian characters. |
| | | 168 | | /// <see href="http://lucene.apache.org/core/4_10_3/analyzers-common/org/apache/lucene/analysis/miscellaneous/Sc |
| | | 169 | | /// </summary> |
| | 2 | 170 | | public static readonly TokenFilterName ScandinavianNormalization = new TokenFilterName("scandinavian_normalizati |
| | | 171 | | |
| | | 172 | | /// <summary> |
| | | 173 | | /// Folds Scandinavian characters åÅäæÄÆ->a and öÖøØ->o. It also |
| | | 174 | | /// discriminates against use of double vowels aa, ae, ao, oe and oo, |
| | | 175 | | /// leaving just the first one. |
| | | 176 | | /// <see href="http://lucene.apache.org/core/4_10_3/analyzers-common/org/apache/lucene/analysis/miscellaneous/Sc |
| | | 177 | | /// </summary> |
| | 2 | 178 | | public static readonly TokenFilterName ScandinavianFoldingNormalization = new TokenFilterName("scandinavian_fold |
| | | 179 | | |
| | | 180 | | /// <summary> |
| | | 181 | | /// Creates combinations of tokens as a single token. |
| | | 182 | | /// <see href="http://lucene.apache.org/core/4_10_3/analyzers-common/org/apache/lucene/analysis/shingle/ShingleF |
| | | 183 | | /// </summary> |
| | 2 | 184 | | public static readonly TokenFilterName Shingle = new TokenFilterName("shingle"); |
| | | 185 | | |
| | | 186 | | /// <summary> |
| | | 187 | | /// A filter that stems words using a Snowball-generated stemmer. |
| | | 188 | | /// <see href="http://lucene.apache.org/core/4_10_3/analyzers-common/org/apache/lucene/analysis/snowball/Snowbal |
| | | 189 | | /// </summary> |
| | 2 | 190 | | public static readonly TokenFilterName Snowball = new TokenFilterName("snowball"); |
| | | 191 | | |
| | | 192 | | /// <summary> |
| | | 193 | | /// Normalizes the Unicode representation of Sorani text. |
| | | 194 | | /// <see href="http://lucene.apache.org/core/4_10_3/analyzers-common/org/apache/lucene/analysis/ckb/SoraniNormal |
| | | 195 | | /// </summary> |
| | 2 | 196 | | public static readonly TokenFilterName SoraniNormalization = new TokenFilterName("sorani_normalization"); |
| | | 197 | | |
| | | 198 | | /// <summary> |
| | | 199 | | /// Language specific stemming filter. |
| | | 200 | | /// <see href="https://docs.microsoft.com/rest/api/searchservice/Custom-analyzers-in-Azure-Search#TokenFilters" |
| | | 201 | | /// </summary> |
| | 2 | 202 | | public static readonly TokenFilterName Stemmer = new TokenFilterName("stemmer"); |
| | | 203 | | |
| | | 204 | | /// <summary> |
| | | 205 | | /// Removes stop words from a token stream. |
| | | 206 | | /// <see href="http://lucene.apache.org/core/4_10_3/analyzers-common/org/apache/lucene/analysis/core/StopFilter. |
| | | 207 | | /// </summary> |
| | 2 | 208 | | public static readonly TokenFilterName Stopwords = new TokenFilterName("stopwords"); |
| | | 209 | | |
| | | 210 | | /// <summary> |
| | | 211 | | /// Trims leading and trailing whitespace from tokens. |
| | | 212 | | /// <see href="http://lucene.apache.org/core/4_10_3/analyzers-common/org/apache/lucene/analysis/miscellaneous/Tr |
| | | 213 | | /// </summary> |
| | 2 | 214 | | public static readonly TokenFilterName Trim = new TokenFilterName("trim"); |
| | | 215 | | |
| | | 216 | | /// <summary> |
| | | 217 | | /// Truncates the terms to a specific length. |
| | | 218 | | /// <see href="http://lucene.apache.org/core/4_10_3/analyzers-common/org/apache/lucene/analysis/miscellaneous/Tr |
| | | 219 | | /// </summary> |
| | 2 | 220 | | public static readonly TokenFilterName Truncate = new TokenFilterName("truncate"); |
| | | 221 | | |
| | | 222 | | /// <summary> |
| | | 223 | | /// Filters out tokens with same text as the previous token. |
| | | 224 | | /// <see href="http://lucene.apache.org/core/4_10_3/analyzers-common/org/apache/lucene/analysis/miscellaneous/Re |
| | | 225 | | /// </summary> |
| | 2 | 226 | | public static readonly TokenFilterName Unique = new TokenFilterName("unique"); |
| | | 227 | | |
| | | 228 | | /// <summary> |
| | | 229 | | /// Normalizes token text to upper case. |
| | | 230 | | /// <see href="http://lucene.apache.org/core/4_10_3/analyzers-common/org/apache/lucene/analysis/core/UpperCaseFi |
| | | 231 | | /// </summary> |
| | 2 | 232 | | public static readonly TokenFilterName Uppercase = new TokenFilterName("uppercase"); |
| | | 233 | | |
| | | 234 | | /// <summary> |
| | | 235 | | /// Splits words into subwords and performs optional transformations on |
| | | 236 | | /// subword groups. |
| | | 237 | | /// </summary> |
| | 2 | 238 | | public static readonly TokenFilterName WordDelimiter = new TokenFilterName("word_delimiter"); |
| | | 239 | | |
| | | 240 | | private TokenFilterName(string name) |
| | | 241 | | { |
| | 140 | 242 | | Throw.IfArgumentNull(name, nameof(name)); |
| | 140 | 243 | | _value = name; |
| | 140 | 244 | | } |
| | | 245 | | |
| | | 246 | | /// <summary> |
| | | 247 | | /// Defines implicit conversion from string to TokenFilterName. |
| | | 248 | | /// </summary> |
| | | 249 | | /// <param name="name">string to convert.</param> |
| | | 250 | | /// <returns>The string as a TokenFilterName.</returns> |
| | 2 | 251 | | public static implicit operator TokenFilterName(string name) => new TokenFilterName(name); |
| | | 252 | | |
| | | 253 | | /// <summary> |
| | | 254 | | /// Defines explicit conversion from TokenFilterName to string. |
| | | 255 | | /// </summary> |
| | | 256 | | /// <param name="name">TokenFilterName to convert.</param> |
| | | 257 | | /// <returns>The TokenFilterName as a string.</returns> |
| | 0 | 258 | | public static explicit operator string(TokenFilterName name) => name.ToString(); |
| | | 259 | | |
| | | 260 | | /// <summary> |
| | | 261 | | /// Compares two TokenFilterName values for equality. |
| | | 262 | | /// </summary> |
| | | 263 | | /// <param name="lhs">The first TokenFilterName to compare.</param> |
| | | 264 | | /// <param name="rhs">The second TokenFilterName to compare.</param> |
| | | 265 | | /// <returns>true if the TokenFilterName objects are equal or are both null; false otherwise.</returns> |
| | 0 | 266 | | public static bool operator ==(TokenFilterName lhs, TokenFilterName rhs) => Equals(lhs, rhs); |
| | | 267 | | |
| | | 268 | | /// <summary> |
| | | 269 | | /// Compares two TokenFilterName values for inequality. |
| | | 270 | | /// </summary> |
| | | 271 | | /// <param name="lhs">The first TokenFilterName to compare.</param> |
| | | 272 | | /// <param name="rhs">The second TokenFilterName to compare.</param> |
| | | 273 | | /// <returns>true if the TokenFilterName objects are not equal; false otherwise.</returns> |
| | 0 | 274 | | public static bool operator !=(TokenFilterName lhs, TokenFilterName rhs) => !Equals(lhs, rhs); |
| | | 275 | | |
| | | 276 | | /// <summary> |
| | | 277 | | /// Compares the TokenFilterName for equality with another TokenFilterName. |
| | | 278 | | /// </summary> |
| | | 279 | | /// <param name="other">The TokenFilterName with which to compare.</param> |
| | | 280 | | /// <returns><c>true</c> if the TokenFilterName objects are equal; otherwise, <c>false</c>.</returns> |
| | 70 | 281 | | public bool Equals(TokenFilterName other) => _value == other._value; |
| | | 282 | | |
| | | 283 | | /// <summary> |
| | | 284 | | /// Determines whether the specified object is equal to the current object. |
| | | 285 | | /// </summary> |
| | | 286 | | /// <param name="obj">The object to compare with the current object.</param> |
| | | 287 | | /// <returns><c>true</c> if the specified object is equal to the current object; otherwise, <c>false</c>.</retur |
| | 0 | 288 | | public override bool Equals(object obj) => obj is TokenFilterName ? Equals((TokenFilterName)obj) : false; |
| | | 289 | | |
| | | 290 | | /// <summary> |
| | | 291 | | /// Serves as the default hash function. |
| | | 292 | | /// </summary> |
| | | 293 | | /// <returns>A hash code for the current object.</returns> |
| | 0 | 294 | | public override int GetHashCode() => _value.GetHashCode(); |
| | | 295 | | |
| | | 296 | | /// <summary> |
| | | 297 | | /// Returns a string representation of the TokenFilterName. |
| | | 298 | | /// </summary> |
| | | 299 | | /// <returns>The TokenFilterName as a string.</returns> |
| | 140 | 300 | | public override string ToString() => _value; |
| | | 301 | | } |
| | | 302 | | } |