PatternTokenizer.java

  1. // Copyright (c) Microsoft Corporation. All rights reserved.
  2. // Licensed under the MIT License.

  3. package com.azure.search.documents.indexes.models;

  4. import com.azure.core.annotation.Fluent;
  5. import com.fasterxml.jackson.annotation.JsonProperty;
  6. import com.fasterxml.jackson.annotation.JsonSetter;
  7. import com.fasterxml.jackson.annotation.JsonTypeInfo;
  8. import com.fasterxml.jackson.annotation.JsonTypeName;

  9. import java.util.Arrays;
  10. import java.util.List;

  11. /**
  12.  * Tokenizer that uses regex pattern matching to construct distinct tokens.
  13.  * This tokenizer is implemented using Apache Lucene.
  14.  */
  15. @JsonTypeInfo(use = JsonTypeInfo.Id.NAME, include = JsonTypeInfo.As.PROPERTY, property = "@odata.type")
  16. @JsonTypeName("#Microsoft.Azure.Search.PatternTokenizer")
  17. @Fluent
  18. public final class PatternTokenizer extends LexicalTokenizer {
  19.     /*
  20.      * A regular expression pattern to match token separators. Default is an
  21.      * expression that matches one or more non-word characters.
  22.      */
  23.     @JsonProperty(value = "pattern")
  24.     private String pattern;

  25.     /*
  26.      * Regular expression flags.
  27.      */
  28.     @JsonProperty(value = "flags")
  29.     private List<RegexFlags> flags;

  30.     /*
  31.      * The zero-based ordinal of the matching group in the regular expression
  32.      * pattern to extract into tokens. Use -1 if you want to use the entire
  33.      * pattern to split the input into tokens, irrespective of matching groups.
  34.      * Default is -1.
  35.      */
  36.     @JsonProperty(value = "group")
  37.     private Integer group;

  38.     /**
  39.      * Constructor of {@link PatternTokenizer}.
  40.      *
  41.      * @param name The name of the tokenizer. It must only contain letters, digits, spaces,
  42.      * dashes or underscores, can only start and end with alphanumeric
  43.      * characters, and is limited to 128 characters.
  44.      */
  45.     public PatternTokenizer(String name) {
  46.         super(name);
  47.     }

  48.     /**
  49.      * Get the pattern property: A regular expression pattern to match token
  50.      * separators. Default is an expression that matches one or more non-word
  51.      * characters.
  52.      *
  53.      * @return the pattern value.
  54.      */
  55.     public String getPattern() {
  56.         return this.pattern;
  57.     }

  58.     /**
  59.      * Set the pattern property: A regular expression pattern to match token
  60.      * separators. Default is an expression that matches one or more non-word
  61.      * characters.
  62.      *
  63.      * @param pattern the pattern value to set.
  64.      * @return the PatternTokenizer object itself.
  65.      */
  66.     public PatternTokenizer setPattern(String pattern) {
  67.         this.pattern = pattern;
  68.         return this;
  69.     }

  70.     /**
  71.      * Get the flags property: Regular expression flags.
  72.      *
  73.      * @return the flags value.
  74.      */
  75.     public List<RegexFlags> getFlags() {
  76.         return this.flags;
  77.     }

  78.     /**
  79.      * Set the flags property: Regular expression flags.
  80.      *
  81.      * @param flags the flags value to set.
  82.      * @return the PatternTokenizer object itself.
  83.      */
  84.     public PatternTokenizer setFlags(RegexFlags... flags) {
  85.         this.flags = (flags == null) ? null : Arrays.asList(flags);
  86.         return this;
  87.     }

  88.     /**
  89.      * Set the flags property: Regular expression flags.
  90.      *
  91.      * @param flags the flags value to set.
  92.      * @return the PatternTokenizer object itself.
  93.      */
  94.     @JsonSetter
  95.     public PatternTokenizer setFlags(List<RegexFlags> flags) {
  96.         this.flags = flags;
  97.         return this;
  98.     }

  99.     /**
  100.      * Get the group property: The zero-based ordinal of the matching group in
  101.      * the regular expression pattern to extract into tokens. Use -1 if you
  102.      * want to use the entire pattern to split the input into tokens,
  103.      * irrespective of matching groups. Default is -1.
  104.      *
  105.      * @return the group value.
  106.      */
  107.     public Integer getGroup() {
  108.         return this.group;
  109.     }

  110.     /**
  111.      * Set the group property: The zero-based ordinal of the matching group in
  112.      * the regular expression pattern to extract into tokens. Use -1 if you
  113.      * want to use the entire pattern to split the input into tokens,
  114.      * irrespective of matching groups. Default is -1.
  115.      *
  116.      * @param group the group value to set.
  117.      * @return the PatternTokenizer object itself.
  118.      */
  119.     public PatternTokenizer setGroup(Integer group) {
  120.         this.group = group;
  121.         return this;
  122.     }
  123. }