|
||||||||||
PREV CLASS NEXT CLASS | FRAMES NO FRAMES | |||||||||
SUMMARY: NESTED | FIELD | CONSTR | METHOD | DETAIL: FIELD | CONSTR | METHOD |
java.lang.Object org.apache.lucene.util.AttributeSource org.apache.lucene.analysis.TokenStream org.apache.lucene.analysis.TokenFilter org.apache.lucene.analysis.compound.CompoundWordTokenFilterBase
public abstract class CompoundWordTokenFilterBase
Base class for decomposition token filters.
You must specify the required Version
compatibility when creating
CompoundWordTokenFilterBase:
Nested Class Summary |
---|
Nested classes/interfaces inherited from class org.apache.lucene.util.AttributeSource |
---|
org.apache.lucene.util.AttributeSource.AttributeFactory, org.apache.lucene.util.AttributeSource.State |
Field Summary | |
---|---|
static int |
DEFAULT_MAX_SUBWORD_SIZE
The default for maximal length of subwords that get propagated to the output of this filter |
static int |
DEFAULT_MIN_SUBWORD_SIZE
The default for minimal length of subwords that get propagated to the output of this filter |
static int |
DEFAULT_MIN_WORD_SIZE
The default for minimal word length that gets decomposed |
protected org.apache.lucene.analysis.CharArraySet |
dictionary
|
protected int |
maxSubwordSize
|
protected int |
minSubwordSize
|
protected int |
minWordSize
|
protected boolean |
onlyLongestMatch
|
protected LinkedList<org.apache.lucene.analysis.Token> |
tokens
|
Fields inherited from class org.apache.lucene.analysis.TokenFilter |
---|
input |
Constructor Summary | |
---|---|
protected |
CompoundWordTokenFilterBase(org.apache.lucene.analysis.TokenStream input,
Set<?> dictionary)
Deprecated. use CompoundWordTokenFilterBase(Version, TokenStream, Set) instead |
protected |
CompoundWordTokenFilterBase(org.apache.lucene.analysis.TokenStream input,
Set<?> dictionary,
boolean onlyLongestMatch)
Deprecated. use CompoundWordTokenFilterBase(Version, TokenStream, Set, boolean) instead |
protected |
CompoundWordTokenFilterBase(org.apache.lucene.analysis.TokenStream input,
Set<?> dictionary,
int minWordSize,
int minSubwordSize,
int maxSubwordSize,
boolean onlyLongestMatch)
Deprecated. use CompoundWordTokenFilterBase(Version, TokenStream, Set, int, int, int, boolean) instead |
protected |
CompoundWordTokenFilterBase(org.apache.lucene.analysis.TokenStream input,
String[] dictionary)
Deprecated. use CompoundWordTokenFilterBase(Version, TokenStream, String[]) instead |
protected |
CompoundWordTokenFilterBase(org.apache.lucene.analysis.TokenStream input,
String[] dictionary,
boolean onlyLongestMatch)
Deprecated. use CompoundWordTokenFilterBase(Version, TokenStream, String[], boolean) instead |
protected |
CompoundWordTokenFilterBase(org.apache.lucene.analysis.TokenStream input,
String[] dictionary,
int minWordSize,
int minSubwordSize,
int maxSubwordSize,
boolean onlyLongestMatch)
Deprecated. use CompoundWordTokenFilterBase(Version, TokenStream, String[], int, int, int, boolean) instead |
protected |
CompoundWordTokenFilterBase(org.apache.lucene.util.Version matchVersion,
org.apache.lucene.analysis.TokenStream input,
Set<?> dictionary)
|
protected |
CompoundWordTokenFilterBase(org.apache.lucene.util.Version matchVersion,
org.apache.lucene.analysis.TokenStream input,
Set<?> dictionary,
boolean onlyLongestMatch)
|
protected |
CompoundWordTokenFilterBase(org.apache.lucene.util.Version matchVersion,
org.apache.lucene.analysis.TokenStream input,
Set<?> dictionary,
int minWordSize,
int minSubwordSize,
int maxSubwordSize,
boolean onlyLongestMatch)
|
protected |
CompoundWordTokenFilterBase(org.apache.lucene.util.Version matchVersion,
org.apache.lucene.analysis.TokenStream input,
String[] dictionary)
|
protected |
CompoundWordTokenFilterBase(org.apache.lucene.util.Version matchVersion,
org.apache.lucene.analysis.TokenStream input,
String[] dictionary,
boolean onlyLongestMatch)
|
protected |
CompoundWordTokenFilterBase(org.apache.lucene.util.Version matchVersion,
org.apache.lucene.analysis.TokenStream input,
String[] dictionary,
int minWordSize,
int minSubwordSize,
int maxSubwordSize,
boolean onlyLongestMatch)
|
Method Summary | |
---|---|
protected static void |
addAllLowerCase(org.apache.lucene.analysis.CharArraySet target,
Collection<?> col)
|
protected org.apache.lucene.analysis.Token |
createToken(int offset,
int length,
org.apache.lucene.analysis.Token prototype)
|
protected void |
decompose(org.apache.lucene.analysis.Token token)
|
protected abstract void |
decomposeInternal(org.apache.lucene.analysis.Token token)
|
boolean |
incrementToken()
|
static Set<?> |
makeDictionary(String[] dictionary)
Create a set of words from an array The resulting Set does case insensitive matching TODO We should look for a faster dictionary lookup approach. |
static Set<?> |
makeDictionary(org.apache.lucene.util.Version matchVersion,
String[] dictionary)
|
protected static char[] |
makeLowerCaseCopy(char[] buffer)
|
void |
reset()
|
Methods inherited from class org.apache.lucene.analysis.TokenFilter |
---|
close, end |
Methods inherited from class org.apache.lucene.util.AttributeSource |
---|
addAttribute, addAttributeImpl, captureState, clearAttributes, cloneAttributes, copyTo, equals, getAttribute, getAttributeClassesIterator, getAttributeFactory, getAttributeImplsIterator, hasAttribute, hasAttributes, hashCode, reflectAsString, reflectWith, restoreState, toString |
Methods inherited from class java.lang.Object |
---|
clone, finalize, getClass, notify, notifyAll, wait, wait, wait |
Field Detail |
---|
public static final int DEFAULT_MIN_WORD_SIZE
public static final int DEFAULT_MIN_SUBWORD_SIZE
public static final int DEFAULT_MAX_SUBWORD_SIZE
protected final org.apache.lucene.analysis.CharArraySet dictionary
protected final LinkedList<org.apache.lucene.analysis.Token> tokens
protected final int minWordSize
protected final int minSubwordSize
protected final int maxSubwordSize
protected final boolean onlyLongestMatch
Constructor Detail |
---|
@Deprecated protected CompoundWordTokenFilterBase(org.apache.lucene.analysis.TokenStream input, String[] dictionary, int minWordSize, int minSubwordSize, int maxSubwordSize, boolean onlyLongestMatch)
CompoundWordTokenFilterBase(Version, TokenStream, String[], int, int, int, boolean)
instead
@Deprecated protected CompoundWordTokenFilterBase(org.apache.lucene.analysis.TokenStream input, String[] dictionary, boolean onlyLongestMatch)
CompoundWordTokenFilterBase(Version, TokenStream, String[], boolean)
instead
@Deprecated protected CompoundWordTokenFilterBase(org.apache.lucene.analysis.TokenStream input, Set<?> dictionary, boolean onlyLongestMatch)
CompoundWordTokenFilterBase(Version, TokenStream, Set, boolean)
instead
@Deprecated protected CompoundWordTokenFilterBase(org.apache.lucene.analysis.TokenStream input, String[] dictionary)
CompoundWordTokenFilterBase(Version, TokenStream, String[])
instead
@Deprecated protected CompoundWordTokenFilterBase(org.apache.lucene.analysis.TokenStream input, Set<?> dictionary)
CompoundWordTokenFilterBase(Version, TokenStream, Set)
instead
@Deprecated protected CompoundWordTokenFilterBase(org.apache.lucene.analysis.TokenStream input, Set<?> dictionary, int minWordSize, int minSubwordSize, int maxSubwordSize, boolean onlyLongestMatch)
CompoundWordTokenFilterBase(Version, TokenStream, Set, int, int, int, boolean)
instead
protected CompoundWordTokenFilterBase(org.apache.lucene.util.Version matchVersion, org.apache.lucene.analysis.TokenStream input, String[] dictionary, int minWordSize, int minSubwordSize, int maxSubwordSize, boolean onlyLongestMatch)
protected CompoundWordTokenFilterBase(org.apache.lucene.util.Version matchVersion, org.apache.lucene.analysis.TokenStream input, String[] dictionary, boolean onlyLongestMatch)
protected CompoundWordTokenFilterBase(org.apache.lucene.util.Version matchVersion, org.apache.lucene.analysis.TokenStream input, Set<?> dictionary, boolean onlyLongestMatch)
protected CompoundWordTokenFilterBase(org.apache.lucene.util.Version matchVersion, org.apache.lucene.analysis.TokenStream input, String[] dictionary)
protected CompoundWordTokenFilterBase(org.apache.lucene.util.Version matchVersion, org.apache.lucene.analysis.TokenStream input, Set<?> dictionary)
protected CompoundWordTokenFilterBase(org.apache.lucene.util.Version matchVersion, org.apache.lucene.analysis.TokenStream input, Set<?> dictionary, int minWordSize, int minSubwordSize, int maxSubwordSize, boolean onlyLongestMatch)
Method Detail |
---|
public static final Set<?> makeDictionary(String[] dictionary)
dictionary
-
Set
of lowercased termspublic static final Set<?> makeDictionary(org.apache.lucene.util.Version matchVersion, String[] dictionary)
public final boolean incrementToken() throws IOException
incrementToken
in class org.apache.lucene.analysis.TokenStream
IOException
protected static final void addAllLowerCase(org.apache.lucene.analysis.CharArraySet target, Collection<?> col)
protected static char[] makeLowerCaseCopy(char[] buffer)
protected final org.apache.lucene.analysis.Token createToken(int offset, int length, org.apache.lucene.analysis.Token prototype)
protected void decompose(org.apache.lucene.analysis.Token token)
protected abstract void decomposeInternal(org.apache.lucene.analysis.Token token)
public void reset() throws IOException
reset
in class org.apache.lucene.analysis.TokenFilter
IOException
|
||||||||||
PREV CLASS NEXT CLASS | FRAMES NO FRAMES | |||||||||
SUMMARY: NESTED | FIELD | CONSTR | METHOD | DETAIL: FIELD | CONSTR | METHOD |