PatternTokenizer.java

// Copyright (c) Microsoft Corporation. All rights reserved.
// Licensed under the MIT License.

package com.azure.search.documents.indexes.models;

import com.azure.core.annotation.Fluent;
import com.fasterxml.jackson.annotation.JsonProperty;
import com.fasterxml.jackson.annotation.JsonSetter;
import com.fasterxml.jackson.annotation.JsonTypeInfo;
import com.fasterxml.jackson.annotation.JsonTypeName;

import java.util.Arrays;
import java.util.List;

/**
 * Tokenizer that uses regex pattern matching to construct distinct tokens.
 * This tokenizer is implemented using Apache Lucene.
 */
@JsonTypeInfo(use = JsonTypeInfo.Id.NAME, include = JsonTypeInfo.As.PROPERTY, property = "@odata.type")
@JsonTypeName("#Microsoft.Azure.Search.PatternTokenizer")
@Fluent
public final class PatternTokenizer extends LexicalTokenizer {
    /*
     * A regular expression pattern to match token separators. Default is an
     * expression that matches one or more non-word characters.
     */
    @JsonProperty(value = "pattern")
    private String pattern;

    /*
     * Regular expression flags.
     */
    @JsonProperty(value = "flags")
    private List<RegexFlags> flags;

    /*
     * The zero-based ordinal of the matching group in the regular expression
     * pattern to extract into tokens. Use -1 if you want to use the entire
     * pattern to split the input into tokens, irrespective of matching groups.
     * Default is -1.
     */
    @JsonProperty(value = "group")
    private Integer group;

    /**
     * Constructor of {@link PatternTokenizer}.
     *
     * @param name The name of the tokenizer. It must only contain letters, digits, spaces,
     * dashes or underscores, can only start and end with alphanumeric
     * characters, and is limited to 128 characters.
     */
    public PatternTokenizer(String name) {
        super(name);
    }

    /**
     * Get the pattern property: A regular expression pattern to match token
     * separators. Default is an expression that matches one or more non-word
     * characters.
     *
     * @return the pattern value.
     */
    public String getPattern() {
        return this.pattern;
    }

    /**
     * Set the pattern property: A regular expression pattern to match token
     * separators. Default is an expression that matches one or more non-word
     * characters.
     *
     * @param pattern the pattern value to set.
     * @return the PatternTokenizer object itself.
     */
    public PatternTokenizer setPattern(String pattern) {
        this.pattern = pattern;
        return this;
    }

    /**
     * Get the flags property: Regular expression flags.
     *
     * @return the flags value.
     */
    public List<RegexFlags> getFlags() {
        return this.flags;
    }

    /**
     * Set the flags property: Regular expression flags.
     *
     * @param flags the flags value to set.
     * @return the PatternTokenizer object itself.
     */
    public PatternTokenizer setFlags(RegexFlags... flags) {
        this.flags = (flags == null) ? null : Arrays.asList(flags);
        return this;
    }

    /**
     * Set the flags property: Regular expression flags.
     *
     * @param flags the flags value to set.
     * @return the PatternTokenizer object itself.
     */
    @JsonSetter
    public PatternTokenizer setFlags(List<RegexFlags> flags) {
        this.flags = flags;
        return this;
    }

    /**
     * Get the group property: The zero-based ordinal of the matching group in
     * the regular expression pattern to extract into tokens. Use -1 if you
     * want to use the entire pattern to split the input into tokens,
     * irrespective of matching groups. Default is -1.
     *
     * @return the group value.
     */
    public Integer getGroup() {
        return this.group;
    }

    /**
     * Set the group property: The zero-based ordinal of the matching group in
     * the regular expression pattern to extract into tokens. Use -1 if you
     * want to use the entire pattern to split the input into tokens,
     * irrespective of matching groups. Default is -1.
     *
     * @param group the group value to set.
     * @return the PatternTokenizer object itself.
     */
    public PatternTokenizer setGroup(Integer group) {
        this.group = group;
        return this;
    }
}