MicrosoftLanguageTokenizer.java

  1. // Copyright (c) Microsoft Corporation. All rights reserved.
  2. // Licensed under the MIT License.

  3. package com.azure.search.documents.indexes.models;

  4. import com.azure.core.annotation.Fluent;
  5. import com.fasterxml.jackson.annotation.JsonProperty;
  6. import com.fasterxml.jackson.annotation.JsonTypeInfo;
  7. import com.fasterxml.jackson.annotation.JsonTypeName;

  8. /**
  9.  * Divides text using language-specific rules.
  10.  */
  11. @JsonTypeInfo(use = JsonTypeInfo.Id.NAME, include = JsonTypeInfo.As.PROPERTY, property = "@odata.type")
  12. @JsonTypeName("#Microsoft.Azure.Search.MicrosoftLanguageTokenizer")
  13. @Fluent
  14. public final class MicrosoftLanguageTokenizer extends LexicalTokenizer {
  15.     /*
  16.      * The maximum token length. Tokens longer than the maximum length are
  17.      * split. Maximum token length that can be used is 300 characters. Tokens
  18.      * longer than 300 characters are first split into tokens of length 300 and
  19.      * then each of those tokens is split based on the max token length set.
  20.      * Default is 255.
  21.      */
  22.     @JsonProperty(value = "maxTokenLength")
  23.     private Integer maxTokenLength;

  24.     /*
  25.      * A value indicating how the tokenizer is used. Set to true if used as the
  26.      * search tokenizer, set to false if used as the indexing tokenizer.
  27.      * Default is false.
  28.      */
  29.     @JsonProperty(value = "isSearchTokenizer")
  30.     private Boolean isSearchTokenizer;

  31.     /*
  32.      * The language to use. The default is English. Possible values include:
  33.      * 'Bangla', 'Bulgarian', 'Catalan', 'ChineseSimplified',
  34.      * 'ChineseTraditional', 'Croatian', 'Czech', 'Danish', 'Dutch', 'English',
  35.      * 'French', 'German', 'Greek', 'Gujarati', 'Hindi', 'Icelandic',
  36.      * 'Indonesian', 'Italian', 'Japanese', 'Kannada', 'Korean', 'Malay',
  37.      * 'Malayalam', 'Marathi', 'NorwegianBokmaal', 'Polish', 'Portuguese',
  38.      * 'PortugueseBrazilian', 'Punjabi', 'Romanian', 'Russian',
  39.      * 'SerbianCyrillic', 'SerbianLatin', 'Slovenian', 'Spanish', 'Swedish',
  40.      * 'Tamil', 'Telugu', 'Thai', 'Ukrainian', 'Urdu', 'Vietnamese'
  41.      */
  42.     @JsonProperty(value = "language")
  43.     private MicrosoftTokenizerLanguage language;

  44.     /**
  45.      * Constructor of {@link MicrosoftLanguageTokenizer}.
  46.      *
  47.      * @param name The name of the tokenizer. It must only contain letters, digits, spaces,
  48.      * dashes or underscores, can only start and end with alphanumeric
  49.      * characters, and is limited to 128 characters.
  50.      */
  51.     public MicrosoftLanguageTokenizer(String name) {
  52.         super(name);
  53.     }

  54.     /**
  55.      * Get the maxTokenLength property: The maximum token length. Tokens longer
  56.      * than the maximum length are split. Maximum token length that can be used
  57.      * is 300 characters. Tokens longer than 300 characters are first split
  58.      * into tokens of length 300 and then each of those tokens is split based
  59.      * on the max token length set. Default is 255.
  60.      *
  61.      * @return the maxTokenLength value.
  62.      */
  63.     public Integer getMaxTokenLength() {
  64.         return this.maxTokenLength;
  65.     }

  66.     /**
  67.      * Set the maxTokenLength property: The maximum token length. Tokens longer
  68.      * than the maximum length are split. Maximum token length that can be used
  69.      * is 300 characters. Tokens longer than 300 characters are first split
  70.      * into tokens of length 300 and then each of those tokens is split based
  71.      * on the max token length set. Default is 255.
  72.      *
  73.      * @param maxTokenLength the maxTokenLength value to set.
  74.      * @return the MicrosoftLanguageTokenizer object itself.
  75.      */
  76.     public MicrosoftLanguageTokenizer setMaxTokenLength(Integer maxTokenLength) {
  77.         this.maxTokenLength = maxTokenLength;
  78.         return this;
  79.     }

  80.     /**
  81.      * Get the isSearchTokenizer property: A value indicating how the tokenizer
  82.      * is used. Set to true if used as the search tokenizer, set to false if
  83.      * used as the indexing tokenizer. Default is false.
  84.      *
  85.      * @return the isSearchTokenizer value.
  86.      */
  87.     public Boolean isSearchTokenizer() {
  88.         return this.isSearchTokenizer;
  89.     }

  90.     /**
  91.      * Set the isSearchTokenizer property: A value indicating how the tokenizer
  92.      * is used. Set to true if used as the search tokenizer, set to false if
  93.      * used as the indexing tokenizer. Default is false.
  94.      *
  95.      * @param isSearchTokenizer the isSearchTokenizer value to set.
  96.      * @return the MicrosoftLanguageTokenizer object itself.
  97.      */
  98.     public MicrosoftLanguageTokenizer setIsSearchTokenizer(Boolean isSearchTokenizer) {
  99.         this.isSearchTokenizer = isSearchTokenizer;
  100.         return this;
  101.     }

  102.     /**
  103.      * Get the language property: The language to use. The default is English.
  104.      * Possible values include: 'Bangla', 'Bulgarian', 'Catalan',
  105.      * 'ChineseSimplified', 'ChineseTraditional', 'Croatian', 'Czech',
  106.      * 'Danish', 'Dutch', 'English', 'French', 'German', 'Greek', 'Gujarati',
  107.      * 'Hindi', 'Icelandic', 'Indonesian', 'Italian', 'Japanese', 'Kannada',
  108.      * 'Korean', 'Malay', 'Malayalam', 'Marathi', 'NorwegianBokmaal', 'Polish',
  109.      * 'Portuguese', 'PortugueseBrazilian', 'Punjabi', 'Romanian', 'Russian',
  110.      * 'SerbianCyrillic', 'SerbianLatin', 'Slovenian', 'Spanish', 'Swedish',
  111.      * 'Tamil', 'Telugu', 'Thai', 'Ukrainian', 'Urdu', 'Vietnamese'.
  112.      *
  113.      * @return the language value.
  114.      */
  115.     public MicrosoftTokenizerLanguage getLanguage() {
  116.         return this.language;
  117.     }

  118.     /**
  119.      * Set the language property: The language to use. The default is English.
  120.      * Possible values include: 'Bangla', 'Bulgarian', 'Catalan',
  121.      * 'ChineseSimplified', 'ChineseTraditional', 'Croatian', 'Czech',
  122.      * 'Danish', 'Dutch', 'English', 'French', 'German', 'Greek', 'Gujarati',
  123.      * 'Hindi', 'Icelandic', 'Indonesian', 'Italian', 'Japanese', 'Kannada',
  124.      * 'Korean', 'Malay', 'Malayalam', 'Marathi', 'NorwegianBokmaal', 'Polish',
  125.      * 'Portuguese', 'PortugueseBrazilian', 'Punjabi', 'Romanian', 'Russian',
  126.      * 'SerbianCyrillic', 'SerbianLatin', 'Slovenian', 'Spanish', 'Swedish',
  127.      * 'Tamil', 'Telugu', 'Thai', 'Ukrainian', 'Urdu', 'Vietnamese'.
  128.      *
  129.      * @param language the language value to set.
  130.      * @return the MicrosoftLanguageTokenizer object itself.
  131.      */
  132.     public MicrosoftLanguageTokenizer setLanguage(MicrosoftTokenizerLanguage language) {
  133.         this.language = language;
  134.         return this;
  135.     }
  136. }