PatternTokenizer.java
- // Copyright (c) Microsoft Corporation. All rights reserved.
- // Licensed under the MIT License.
- package com.azure.search.documents.indexes.models;
- import com.azure.core.annotation.Fluent;
- import com.fasterxml.jackson.annotation.JsonProperty;
- import com.fasterxml.jackson.annotation.JsonSetter;
- import com.fasterxml.jackson.annotation.JsonTypeInfo;
- import com.fasterxml.jackson.annotation.JsonTypeName;
- import java.util.Arrays;
- import java.util.List;
- /**
- * Tokenizer that uses regex pattern matching to construct distinct tokens.
- * This tokenizer is implemented using Apache Lucene.
- */
- @JsonTypeInfo(use = JsonTypeInfo.Id.NAME, include = JsonTypeInfo.As.PROPERTY, property = "@odata.type")
- @JsonTypeName("#Microsoft.Azure.Search.PatternTokenizer")
- @Fluent
- public final class PatternTokenizer extends LexicalTokenizer {
- /*
- * A regular expression pattern to match token separators. Default is an
- * expression that matches one or more non-word characters.
- */
- @JsonProperty(value = "pattern")
- private String pattern;
- /*
- * Regular expression flags.
- */
- @JsonProperty(value = "flags")
- private List<RegexFlags> flags;
- /*
- * The zero-based ordinal of the matching group in the regular expression
- * pattern to extract into tokens. Use -1 if you want to use the entire
- * pattern to split the input into tokens, irrespective of matching groups.
- * Default is -1.
- */
- @JsonProperty(value = "group")
- private Integer group;
- /**
- * Constructor of {@link PatternTokenizer}.
- *
- * @param name The name of the tokenizer. It must only contain letters, digits, spaces,
- * dashes or underscores, can only start and end with alphanumeric
- * characters, and is limited to 128 characters.
- */
- public PatternTokenizer(String name) {
- super(name);
- }
- /**
- * Get the pattern property: A regular expression pattern to match token
- * separators. Default is an expression that matches one or more non-word
- * characters.
- *
- * @return the pattern value.
- */
- public String getPattern() {
- return this.pattern;
- }
- /**
- * Set the pattern property: A regular expression pattern to match token
- * separators. Default is an expression that matches one or more non-word
- * characters.
- *
- * @param pattern the pattern value to set.
- * @return the PatternTokenizer object itself.
- */
- public PatternTokenizer setPattern(String pattern) {
- this.pattern = pattern;
- return this;
- }
- /**
- * Get the flags property: Regular expression flags.
- *
- * @return the flags value.
- */
- public List<RegexFlags> getFlags() {
- return this.flags;
- }
- /**
- * Set the flags property: Regular expression flags.
- *
- * @param flags the flags value to set.
- * @return the PatternTokenizer object itself.
- */
- public PatternTokenizer setFlags(RegexFlags... flags) {
- this.flags = (flags == null) ? null : Arrays.asList(flags);
- return this;
- }
- /**
- * Set the flags property: Regular expression flags.
- *
- * @param flags the flags value to set.
- * @return the PatternTokenizer object itself.
- */
- @JsonSetter
- public PatternTokenizer setFlags(List<RegexFlags> flags) {
- this.flags = flags;
- return this;
- }
- /**
- * Get the group property: The zero-based ordinal of the matching group in
- * the regular expression pattern to extract into tokens. Use -1 if you
- * want to use the entire pattern to split the input into tokens,
- * irrespective of matching groups. Default is -1.
- *
- * @return the group value.
- */
- public Integer getGroup() {
- return this.group;
- }
- /**
- * Set the group property: The zero-based ordinal of the matching group in
- * the regular expression pattern to extract into tokens. Use -1 if you
- * want to use the entire pattern to split the input into tokens,
- * irrespective of matching groups. Default is -1.
- *
- * @param group the group value to set.
- * @return the PatternTokenizer object itself.
- */
- public PatternTokenizer setGroup(Integer group) {
- this.group = group;
- return this;
- }
- }