public class MinHashFilter extends TokenFilter
AttributeSource.State
Modifier and Type | Field and Description |
---|---|
static int |
DEFAULT_BUCKET_COUNT |
static int |
DEFAULT_HASH_COUNT |
static int |
DEFAULT_HASH_SET_SIZE |
input
DEFAULT_TOKEN_ATTRIBUTE_FACTORY
Constructor and Description |
---|
MinHashFilter(TokenStream input,
int hashCount,
int bucketCount,
int hashSetSize,
boolean withRotation)
create a MinHash filter
|
Modifier and Type | Method and Description |
---|---|
void |
end() |
boolean |
incrementToken() |
void |
reset() |
close
addAttribute, addAttributeImpl, captureState, clearAttributes, cloneAttributes, copyTo, endAttributes, equals, getAttribute, getAttributeClassesIterator, getAttributeFactory, getAttributeImplsIterator, hasAttribute, hasAttributes, hashCode, reflectAsString, reflectWith, removeAllAttributes, restoreState, toString
public static final int DEFAULT_HASH_COUNT
public static final int DEFAULT_HASH_SET_SIZE
public static final int DEFAULT_BUCKET_COUNT
public MinHashFilter(TokenStream input, int hashCount, int bucketCount, int hashSetSize, boolean withRotation)
input
- the token streamhashCount
- the no. of hashesbucketCount
- the no. of buckets for hashinghashSetSize
- the no. of min hashes to keepwithRotation
- whether rotate or not hashes while incrementing tokenspublic final boolean incrementToken() throws IOException
incrementToken
in class TokenStream
IOException
public void end() throws IOException
end
in class TokenFilter
IOException
public void reset() throws IOException
reset
in class TokenFilter
IOException
Copyright © 2000-2019 Apache Software Foundation. All Rights Reserved.