|
||||||||||
| PREV CLASS NEXT CLASS | FRAMES NO FRAMES | |||||||||
| SUMMARY: NESTED | FIELD | CONSTR | METHOD | DETAIL: FIELD | CONSTR | METHOD | |||||||||
java.lang.Objectorg.apache.lucene.util.AttributeSource
org.apache.lucene.analysis.TokenStream
org.apache.lucene.analysis.TokenFilter
org.apache.lucene.analysis.compound.CompoundWordTokenFilterBase
public abstract class CompoundWordTokenFilterBase
Base class for decomposition token filters.
You must specify the required Version compatibility when creating
CompoundWordTokenFilterBase:
| Nested Class Summary |
|---|
| Nested classes/interfaces inherited from class org.apache.lucene.util.AttributeSource |
|---|
org.apache.lucene.util.AttributeSource.AttributeFactory, org.apache.lucene.util.AttributeSource.State |
| Field Summary | |
|---|---|
static int |
DEFAULT_MAX_SUBWORD_SIZE
The default for maximal length of subwords that get propagated to the output of this filter |
static int |
DEFAULT_MIN_SUBWORD_SIZE
The default for minimal length of subwords that get propagated to the output of this filter |
static int |
DEFAULT_MIN_WORD_SIZE
The default for minimal word length that gets decomposed |
protected org.apache.lucene.analysis.CharArraySet |
dictionary
|
protected int |
maxSubwordSize
|
protected int |
minSubwordSize
|
protected int |
minWordSize
|
protected boolean |
onlyLongestMatch
|
protected LinkedList<org.apache.lucene.analysis.Token> |
tokens
|
| Fields inherited from class org.apache.lucene.analysis.TokenFilter |
|---|
input |
| Constructor Summary | |
|---|---|
protected |
CompoundWordTokenFilterBase(org.apache.lucene.analysis.TokenStream input,
Set<?> dictionary)
Deprecated. use CompoundWordTokenFilterBase(Version, TokenStream, Set) instead |
protected |
CompoundWordTokenFilterBase(org.apache.lucene.analysis.TokenStream input,
Set<?> dictionary,
boolean onlyLongestMatch)
Deprecated. use CompoundWordTokenFilterBase(Version, TokenStream, Set, boolean) instead |
protected |
CompoundWordTokenFilterBase(org.apache.lucene.analysis.TokenStream input,
Set<?> dictionary,
int minWordSize,
int minSubwordSize,
int maxSubwordSize,
boolean onlyLongestMatch)
Deprecated. use CompoundWordTokenFilterBase(Version, TokenStream, Set, int, int, int, boolean) instead |
protected |
CompoundWordTokenFilterBase(org.apache.lucene.analysis.TokenStream input,
String[] dictionary)
Deprecated. use CompoundWordTokenFilterBase(Version, TokenStream, String[]) instead |
protected |
CompoundWordTokenFilterBase(org.apache.lucene.analysis.TokenStream input,
String[] dictionary,
boolean onlyLongestMatch)
Deprecated. use CompoundWordTokenFilterBase(Version, TokenStream, String[], boolean) instead |
protected |
CompoundWordTokenFilterBase(org.apache.lucene.analysis.TokenStream input,
String[] dictionary,
int minWordSize,
int minSubwordSize,
int maxSubwordSize,
boolean onlyLongestMatch)
Deprecated. use CompoundWordTokenFilterBase(Version, TokenStream, String[], int, int, int, boolean) instead |
protected |
CompoundWordTokenFilterBase(org.apache.lucene.util.Version matchVersion,
org.apache.lucene.analysis.TokenStream input,
Set<?> dictionary)
|
protected |
CompoundWordTokenFilterBase(org.apache.lucene.util.Version matchVersion,
org.apache.lucene.analysis.TokenStream input,
Set<?> dictionary,
boolean onlyLongestMatch)
|
protected |
CompoundWordTokenFilterBase(org.apache.lucene.util.Version matchVersion,
org.apache.lucene.analysis.TokenStream input,
Set<?> dictionary,
int minWordSize,
int minSubwordSize,
int maxSubwordSize,
boolean onlyLongestMatch)
|
protected |
CompoundWordTokenFilterBase(org.apache.lucene.util.Version matchVersion,
org.apache.lucene.analysis.TokenStream input,
String[] dictionary)
|
protected |
CompoundWordTokenFilterBase(org.apache.lucene.util.Version matchVersion,
org.apache.lucene.analysis.TokenStream input,
String[] dictionary,
boolean onlyLongestMatch)
|
protected |
CompoundWordTokenFilterBase(org.apache.lucene.util.Version matchVersion,
org.apache.lucene.analysis.TokenStream input,
String[] dictionary,
int minWordSize,
int minSubwordSize,
int maxSubwordSize,
boolean onlyLongestMatch)
|
| Method Summary | |
|---|---|
protected static void |
addAllLowerCase(org.apache.lucene.analysis.CharArraySet target,
Collection<?> col)
|
protected org.apache.lucene.analysis.Token |
createToken(int offset,
int length,
org.apache.lucene.analysis.Token prototype)
|
protected void |
decompose(org.apache.lucene.analysis.Token token)
|
protected abstract void |
decomposeInternal(org.apache.lucene.analysis.Token token)
|
boolean |
incrementToken()
|
static Set<?> |
makeDictionary(String[] dictionary)
Create a set of words from an array The resulting Set does case insensitive matching TODO We should look for a faster dictionary lookup approach. |
static Set<?> |
makeDictionary(org.apache.lucene.util.Version matchVersion,
String[] dictionary)
|
protected static char[] |
makeLowerCaseCopy(char[] buffer)
|
void |
reset()
|
| Methods inherited from class org.apache.lucene.analysis.TokenFilter |
|---|
close, end |
| Methods inherited from class org.apache.lucene.util.AttributeSource |
|---|
addAttribute, addAttributeImpl, captureState, clearAttributes, cloneAttributes, copyTo, equals, getAttribute, getAttributeClassesIterator, getAttributeFactory, getAttributeImplsIterator, hasAttribute, hasAttributes, hashCode, reflectAsString, reflectWith, restoreState, toString |
| Methods inherited from class java.lang.Object |
|---|
clone, finalize, getClass, notify, notifyAll, wait, wait, wait |
| Field Detail |
|---|
public static final int DEFAULT_MIN_WORD_SIZE
public static final int DEFAULT_MIN_SUBWORD_SIZE
public static final int DEFAULT_MAX_SUBWORD_SIZE
protected final org.apache.lucene.analysis.CharArraySet dictionary
protected final LinkedList<org.apache.lucene.analysis.Token> tokens
protected final int minWordSize
protected final int minSubwordSize
protected final int maxSubwordSize
protected final boolean onlyLongestMatch
| Constructor Detail |
|---|
@Deprecated
protected CompoundWordTokenFilterBase(org.apache.lucene.analysis.TokenStream input,
String[] dictionary,
int minWordSize,
int minSubwordSize,
int maxSubwordSize,
boolean onlyLongestMatch)
CompoundWordTokenFilterBase(Version, TokenStream, String[], int, int, int, boolean) instead
@Deprecated
protected CompoundWordTokenFilterBase(org.apache.lucene.analysis.TokenStream input,
String[] dictionary,
boolean onlyLongestMatch)
CompoundWordTokenFilterBase(Version, TokenStream, String[], boolean) instead
@Deprecated
protected CompoundWordTokenFilterBase(org.apache.lucene.analysis.TokenStream input,
Set<?> dictionary,
boolean onlyLongestMatch)
CompoundWordTokenFilterBase(Version, TokenStream, Set, boolean) instead
@Deprecated
protected CompoundWordTokenFilterBase(org.apache.lucene.analysis.TokenStream input,
String[] dictionary)
CompoundWordTokenFilterBase(Version, TokenStream, String[]) instead
@Deprecated
protected CompoundWordTokenFilterBase(org.apache.lucene.analysis.TokenStream input,
Set<?> dictionary)
CompoundWordTokenFilterBase(Version, TokenStream, Set) instead
@Deprecated
protected CompoundWordTokenFilterBase(org.apache.lucene.analysis.TokenStream input,
Set<?> dictionary,
int minWordSize,
int minSubwordSize,
int maxSubwordSize,
boolean onlyLongestMatch)
CompoundWordTokenFilterBase(Version, TokenStream, Set, int, int, int, boolean) instead
protected CompoundWordTokenFilterBase(org.apache.lucene.util.Version matchVersion,
org.apache.lucene.analysis.TokenStream input,
String[] dictionary,
int minWordSize,
int minSubwordSize,
int maxSubwordSize,
boolean onlyLongestMatch)
protected CompoundWordTokenFilterBase(org.apache.lucene.util.Version matchVersion,
org.apache.lucene.analysis.TokenStream input,
String[] dictionary,
boolean onlyLongestMatch)
protected CompoundWordTokenFilterBase(org.apache.lucene.util.Version matchVersion,
org.apache.lucene.analysis.TokenStream input,
Set<?> dictionary,
boolean onlyLongestMatch)
protected CompoundWordTokenFilterBase(org.apache.lucene.util.Version matchVersion,
org.apache.lucene.analysis.TokenStream input,
String[] dictionary)
protected CompoundWordTokenFilterBase(org.apache.lucene.util.Version matchVersion,
org.apache.lucene.analysis.TokenStream input,
Set<?> dictionary)
protected CompoundWordTokenFilterBase(org.apache.lucene.util.Version matchVersion,
org.apache.lucene.analysis.TokenStream input,
Set<?> dictionary,
int minWordSize,
int minSubwordSize,
int maxSubwordSize,
boolean onlyLongestMatch)
| Method Detail |
|---|
public static final Set<?> makeDictionary(String[] dictionary)
dictionary -
Set of lowercased terms
public static final Set<?> makeDictionary(org.apache.lucene.util.Version matchVersion,
String[] dictionary)
public final boolean incrementToken()
throws IOException
incrementToken in class org.apache.lucene.analysis.TokenStreamIOException
protected static final void addAllLowerCase(org.apache.lucene.analysis.CharArraySet target,
Collection<?> col)
protected static char[] makeLowerCaseCopy(char[] buffer)
protected final org.apache.lucene.analysis.Token createToken(int offset,
int length,
org.apache.lucene.analysis.Token prototype)
protected void decompose(org.apache.lucene.analysis.Token token)
protected abstract void decomposeInternal(org.apache.lucene.analysis.Token token)
public void reset()
throws IOException
reset in class org.apache.lucene.analysis.TokenFilterIOException
|
||||||||||
| PREV CLASS NEXT CLASS | FRAMES NO FRAMES | |||||||||
| SUMMARY: NESTED | FIELD | CONSTR | METHOD | DETAIL: FIELD | CONSTR | METHOD | |||||||||