EdgeNGramTokenizer.java

  1. // Copyright (c) Microsoft Corporation. All rights reserved.
  2. // Licensed under the MIT License.

  3. package com.azure.search.documents.indexes.models;

  4. import com.azure.core.annotation.Fluent;
  5. import com.fasterxml.jackson.annotation.JsonProperty;
  6. import com.fasterxml.jackson.annotation.JsonSetter;
  7. import com.fasterxml.jackson.annotation.JsonTypeInfo;
  8. import com.fasterxml.jackson.annotation.JsonTypeName;

  9. import java.util.Arrays;
  10. import java.util.List;

  11. /**
  12.  * Tokenizes the input from an edge into n-grams of the given size(s). This
  13.  * tokenizer is implemented using Apache Lucene.
  14.  */
  15. @JsonTypeInfo(use = JsonTypeInfo.Id.NAME, include = JsonTypeInfo.As.PROPERTY, property = "@odata.type")
  16. @JsonTypeName("#Microsoft.Azure.Search.EdgeNGramTokenizer")
  17. @Fluent
  18. public final class EdgeNGramTokenizer extends LexicalTokenizer {
  19.     /*
  20.      * The minimum n-gram length. Default is 1. Maximum is 300. Must be less
  21.      * than the value of maxGram.
  22.      */
  23.     @JsonProperty(value = "minGram")
  24.     private Integer minGram;

  25.     /*
  26.      * The maximum n-gram length. Default is 2. Maximum is 300.
  27.      */
  28.     @JsonProperty(value = "maxGram")
  29.     private Integer maxGram;

  30.     /*
  31.      * Character classes to keep in the tokens.
  32.      */
  33.     @JsonProperty(value = "tokenChars")
  34.     private List<TokenCharacterKind> tokenChars;

  35.     /**
  36.      * Constructor of {@link LexicalTokenizer}.
  37.      *
  38.      * @param name The name of the tokenizer. It must only contain letters, digits, spaces,
  39.      * dashes or underscores, can only start and end with alphanumeric
  40.      * characters, and is limited to 128 characters.
  41.      */
  42.     public EdgeNGramTokenizer(String name) {
  43.         super(name);
  44.     }

  45.     /**
  46.      * Get the minGram property: The minimum n-gram length. Default is 1.
  47.      * Maximum is 300. Must be less than the value of maxGram.
  48.      *
  49.      * @return the minGram value.
  50.      */
  51.     public Integer getMinGram() {
  52.         return this.minGram;
  53.     }

  54.     /**
  55.      * Set the minGram property: The minimum n-gram length. Default is 1.
  56.      * Maximum is 300. Must be less than the value of maxGram.
  57.      *
  58.      * @param minGram the minGram value to set.
  59.      * @return the EdgeNGramTokenizer object itself.
  60.      */
  61.     public EdgeNGramTokenizer setMinGram(Integer minGram) {
  62.         this.minGram = minGram;
  63.         return this;
  64.     }

  65.     /**
  66.      * Get the maxGram property: The maximum n-gram length. Default is 2.
  67.      * Maximum is 300.
  68.      *
  69.      * @return the maxGram value.
  70.      */
  71.     public Integer getMaxGram() {
  72.         return this.maxGram;
  73.     }

  74.     /**
  75.      * Set the maxGram property: The maximum n-gram length. Default is 2.
  76.      * Maximum is 300.
  77.      *
  78.      * @param maxGram the maxGram value to set.
  79.      * @return the EdgeNGramTokenizer object itself.
  80.      */
  81.     public EdgeNGramTokenizer setMaxGram(Integer maxGram) {
  82.         this.maxGram = maxGram;
  83.         return this;
  84.     }

  85.     /**
  86.      * Get the tokenChars property: Character classes to keep in the tokens.
  87.      *
  88.      * @return the tokenChars value.
  89.      */
  90.     public List<TokenCharacterKind> getTokenChars() {
  91.         return this.tokenChars;
  92.     }

  93.     /**
  94.      * Set the tokenChars property: Character classes to keep in the tokens.
  95.      *
  96.      * @param tokenChars the tokenChars value to set.
  97.      * @return the EdgeNGramTokenizer object itself.
  98.      */
  99.     public EdgeNGramTokenizer setTokenChars(TokenCharacterKind... tokenChars) {
  100.         this.tokenChars = (tokenChars == null) ? null : Arrays.asList(tokenChars);
  101.         return this;
  102.     }

  103.     /**
  104.      * Set the tokenChars property: Character classes to keep in the tokens.
  105.      *
  106.      * @param tokenChars the tokenChars value to set.
  107.      * @return the EdgeNGramTokenizer object itself.
  108.      */
  109.     @JsonSetter
  110.     public EdgeNGramTokenizer setTokenChars(List<TokenCharacterKind> tokenChars) {
  111.         this.tokenChars = tokenChars;
  112.         return this;
  113.     }
  114. }