LuceneStandardTokenizer.java

  1. // Copyright (c) Microsoft Corporation. All rights reserved.
  2. // Licensed under the MIT License.

  3. package com.azure.search.documents.indexes.models;

  4. import com.azure.core.annotation.Fluent;
  5. import com.azure.search.documents.implementation.converters.LuceneStandardTokenizerHelper;
  6. import com.fasterxml.jackson.annotation.JsonCreator;
  7. import com.fasterxml.jackson.annotation.JsonProperty;

  8. /**
  9.  * Breaks text following the Unicode Text Segmentation rules. This tokenizer is
  10.  * implemented using Apache Lucene.
  11.  */
  12. @Fluent
  13. public final class LuceneStandardTokenizer extends LexicalTokenizer {
  14.     private String odataType;

  15.     /*
  16.      * The maximum token length. Default is 255. Tokens longer than the maximum
  17.      * length are split.
  18.      */
  19.     @JsonProperty(value = "maxTokenLength")
  20.     private Integer maxTokenLength;

  21.     static {
  22.         LuceneStandardTokenizerHelper.setAccessor(new LuceneStandardTokenizerHelper.LuceneStandardTokenizerAccessor() {
  23.             @Override
  24.             public void setODataType(LuceneStandardTokenizer tokenizer, String odataType) {
  25.                 tokenizer.setODataType(odataType);
  26.             }

  27.             @Override
  28.             public String getODataType(LuceneStandardTokenizer standardTokenizer) {
  29.                 return standardTokenizer.getODataType();
  30.             }
  31.         });
  32.     }

  33.     /**
  34.      * Constructor of {@link LuceneStandardTokenizer}.
  35.      *
  36.      * @param name The name of the tokenizer. It must only contain letters, digits, spaces,
  37.      * dashes or underscores, can only start and end with alphanumeric
  38.      * characters, and is limited to 128 characters.
  39.      */
  40.     @JsonCreator
  41.     public LuceneStandardTokenizer(@JsonProperty(value = "name") String name) {
  42.         super(name);
  43.         odataType = "#Microsoft.Azure.Search.LuceneStandardTokenizerV2";
  44.     }

  45.     /**
  46.      * Get the maxTokenLength property: The maximum token length. Default is
  47.      * 255. Tokens longer than the maximum length are split.
  48.      *
  49.      * @return the maxTokenLength value.
  50.      */
  51.     public Integer getMaxTokenLength() {
  52.         return this.maxTokenLength;
  53.     }

  54.     /**
  55.      * Set the maxTokenLength property: The maximum token length. Default is
  56.      * 255. Tokens longer than the maximum length are split.
  57.      *
  58.      * @param maxTokenLength the maxTokenLength value to set.
  59.      * @return the LuceneStandardTokenizer object itself.
  60.      */
  61.     public LuceneStandardTokenizer setMaxTokenLength(Integer maxTokenLength) {
  62.         this.maxTokenLength = maxTokenLength;
  63.         return this;
  64.     }

  65.     /**
  66.      * The private setter to set the odataType property
  67.      * via {@link LuceneStandardTokenizerHelper.LuceneStandardTokenizerAccessor}.
  68.      *
  69.      * @param odataType The OData type.
  70.      */
  71.     private void setODataType(String odataType) {
  72.         this.odataType = odataType;
  73.     }

  74.     /**
  75.      * The private getter to get the odataType property
  76.      * via {@link LuceneStandardTokenizerHelper.LuceneStandardTokenizerAccessor}.
  77.      *
  78.      * @return The OData type.
  79.      */
  80.     private String getODataType() {
  81.         return this.odataType;
  82.     }
  83. }