|
|||||||||
PREV CLASS NEXT CLASS | FRAMES NO FRAMES | ||||||||
SUMMARY: NESTED | FIELD | CONSTR | METHOD | DETAIL: FIELD | CONSTR | METHOD |
java.lang.Object org.apache.solr.update.processor.UpdateRequestProcessor org.apache.solr.update.processor.LanguageIdentifierUpdateProcessor
public abstract class LanguageIdentifierUpdateProcessor
Identifies the language of a set of input fields. Also supports mapping of field names based on detected language.
See http://wiki.apache.org/solr/LanguageDetection
Field Summary | |
---|---|
protected HashSet<String> |
allMapFieldsSet
|
protected String |
docIdField
|
protected boolean |
enabled
|
protected boolean |
enableMapping
|
protected boolean |
enforceSchema
|
protected String[] |
fallbackFields
|
protected String |
fallbackValue
|
protected String[] |
inputFields
|
protected String |
langField
|
protected Pattern |
langPattern
|
protected String |
langsField
|
protected HashSet<String> |
langWhitelist
|
protected HashMap<String,String> |
lcMap
|
protected static org.slf4j.Logger |
log
|
protected String[] |
mapFields
|
protected boolean |
mapIndividual
|
protected HashSet<String> |
mapIndividualFieldsSet
|
protected boolean |
mapKeepOrig
|
protected boolean |
mapOverwrite
|
protected Pattern |
mapPattern
|
protected String |
mapReplaceStr
|
protected boolean |
overwrite
|
protected IndexSchema |
schema
|
protected double |
threshold
|
protected Pattern |
tikaSimilarityPattern
|
Fields inherited from class org.apache.solr.update.processor.UpdateRequestProcessor |
---|
next |
Fields inherited from interface org.apache.solr.update.processor.LangIdParams |
---|
DOCID_FIELD_DEFAULT, DOCID_LANGFIELD_DEFAULT, DOCID_LANGSFIELD_DEFAULT, DOCID_PARAM, DOCID_THRESHOLD_DEFAULT, ENFORCE_SCHEMA, FALLBACK, FALLBACK_FIELDS, FIELDS_PARAM, LANG_FIELD, LANG_WHITELIST, LANGS_FIELD, LANGUAGE_ID, MAP_ENABLE, MAP_FL, MAP_INDIVIDUAL, MAP_INDIVIDUAL_FL, MAP_KEEP_ORIG, MAP_LCMAP, MAP_OVERWRITE, MAP_PATTERN, MAP_PATTERN_DEFAULT, MAP_REPLACE, MAP_REPLACE_DEFAULT, OVERWRITE, THRESHOLD |
Constructor Summary | |
---|---|
LanguageIdentifierUpdateProcessor(SolrQueryRequest req,
SolrQueryResponse rsp,
UpdateRequestProcessor next)
|
Method Summary | |
---|---|
protected String |
concatFields(SolrInputDocument doc,
String[] fields)
|
protected abstract List<DetectedLanguage> |
detectLanguage(String content)
Detects language(s) from a string. |
protected String |
getMappedField(String currentField,
String language)
Returns the name of the field to map the current contents into, so that they are properly analyzed. |
boolean |
isEnabled()
Tells if this processor is enabled or not |
protected SolrInputDocument |
process(SolrInputDocument doc)
This is the main, testable process method called from processAdd() |
void |
processAdd(AddUpdateCommand cmd)
|
protected String |
resolveLanguage(List<DetectedLanguage> languages,
String fallbackLang)
Chooses a language based on the list of candidates detected |
protected String |
resolveLanguage(String language,
String fallbackLang)
Chooses a language based on the list of candidates detected |
void |
setEnabled(boolean enabled)
|
Methods inherited from class org.apache.solr.update.processor.UpdateRequestProcessor |
---|
finish, processCommit, processDelete, processMergeIndexes, processRollback |
Methods inherited from class java.lang.Object |
---|
clone, equals, finalize, getClass, hashCode, notify, notifyAll, toString, wait, wait, wait |
Field Detail |
---|
protected static final org.slf4j.Logger log
protected boolean enabled
protected String[] inputFields
protected String[] mapFields
protected Pattern mapPattern
protected String mapReplaceStr
protected String langField
protected String langsField
protected String docIdField
protected String fallbackValue
protected String[] fallbackFields
protected boolean enableMapping
protected boolean mapKeepOrig
protected boolean overwrite
protected boolean mapOverwrite
protected boolean mapIndividual
protected boolean enforceSchema
protected double threshold
protected HashSet<String> langWhitelist
protected HashSet<String> mapIndividualFieldsSet
protected HashSet<String> allMapFieldsSet
protected HashMap<String,String> lcMap
protected IndexSchema schema
protected final Pattern tikaSimilarityPattern
protected final Pattern langPattern
Constructor Detail |
---|
public LanguageIdentifierUpdateProcessor(SolrQueryRequest req, SolrQueryResponse rsp, UpdateRequestProcessor next)
Method Detail |
---|
public void processAdd(AddUpdateCommand cmd) throws IOException
processAdd
in class UpdateRequestProcessor
IOException
protected SolrInputDocument process(SolrInputDocument doc)
doc
- the SolrInputDocument to work on
protected String concatFields(SolrInputDocument doc, String[] fields)
protected abstract List<DetectedLanguage> detectLanguage(String content)
content
- The content to identify
protected String resolveLanguage(String language, String fallbackLang)
language
- language code as a stringfallbackLang
- the language code to use as a fallback
protected String resolveLanguage(List<DetectedLanguage> languages, String fallbackLang)
languages
- a List of DetectedLanguages with certainty scorefallbackLang
- the language code to use as a fallback
protected String getMappedField(String currentField, String language)
currentField
- The current field namelanguage
- the language code
public boolean isEnabled()
public void setEnabled(boolean enabled)
|
|||||||||
PREV CLASS NEXT CLASS | FRAMES NO FRAMES | ||||||||
SUMMARY: NESTED | FIELD | CONSTR | METHOD | DETAIL: FIELD | CONSTR | METHOD |