Class CompoundWordTokenFilterBase

    • Field Detail

      • DEFAULT_MIN_WORD_SIZE

        public static final int DEFAULT_MIN_WORD_SIZE
        The default for minimal word length that gets decomposed
        See Also:
        Constant Field Values
      • DEFAULT_MIN_SUBWORD_SIZE

        public static final int DEFAULT_MIN_SUBWORD_SIZE
        The default for minimal length of subwords that get propagated to the output of this filter
        See Also:
        Constant Field Values
      • DEFAULT_MAX_SUBWORD_SIZE

        public static final int DEFAULT_MAX_SUBWORD_SIZE
        The default for maximal length of subwords that get propagated to the output of this filter
        See Also:
        Constant Field Values
      • minWordSize

        protected final int minWordSize
      • minSubwordSize

        protected final int minSubwordSize
      • maxSubwordSize

        protected final int maxSubwordSize
      • onlyLongestMatch

        protected final boolean onlyLongestMatch
    • Constructor Detail

      • CompoundWordTokenFilterBase

        protected CompoundWordTokenFilterBase​(TokenStream input,
                                              CharArraySet dictionary,
                                              boolean onlyLongestMatch)
      • CompoundWordTokenFilterBase

        protected CompoundWordTokenFilterBase​(TokenStream input,
                                              CharArraySet dictionary)
      • CompoundWordTokenFilterBase

        protected CompoundWordTokenFilterBase​(TokenStream input,
                                              CharArraySet dictionary,
                                              int minWordSize,
                                              int minSubwordSize,
                                              int maxSubwordSize,
                                              boolean onlyLongestMatch)