public abstract class LanguageIdentifierUpdateProcessor extends UpdateRequestProcessor implements LangIdParams
Modifier and Type | Field and Description |
---|---|
protected HashSet<String> |
allMapFieldsSet |
protected String |
docIdField |
protected boolean |
enabled |
protected boolean |
enableMapping |
protected boolean |
enforceSchema |
protected String[] |
fallbackFields |
protected String |
fallbackValue |
protected String[] |
inputFields |
protected String |
langField |
protected Pattern |
langPattern |
protected String |
langsField |
protected HashSet<String> |
langWhitelist |
protected HashMap<String,String> |
lcMap |
protected static org.slf4j.Logger |
log |
protected String[] |
mapFields |
protected boolean |
mapIndividual |
protected HashSet<String> |
mapIndividualFieldsSet |
protected boolean |
mapKeepOrig |
protected boolean |
mapOverwrite |
protected Pattern |
mapPattern |
protected String |
mapReplaceStr |
protected boolean |
overwrite |
protected IndexSchema |
schema |
protected double |
threshold |
protected Pattern |
tikaSimilarityPattern |
next
DOCID_FIELD_DEFAULT, DOCID_LANGFIELD_DEFAULT, DOCID_LANGSFIELD_DEFAULT, DOCID_PARAM, DOCID_THRESHOLD_DEFAULT, ENFORCE_SCHEMA, FALLBACK, FALLBACK_FIELDS, FIELDS_PARAM, LANG_FIELD, LANG_WHITELIST, LANGS_FIELD, LANGUAGE_ID, MAP_ENABLE, MAP_FL, MAP_INDIVIDUAL, MAP_INDIVIDUAL_FL, MAP_KEEP_ORIG, MAP_LCMAP, MAP_OVERWRITE, MAP_PATTERN, MAP_PATTERN_DEFAULT, MAP_REPLACE, MAP_REPLACE_DEFAULT, OVERWRITE, THRESHOLD
Constructor and Description |
---|
LanguageIdentifierUpdateProcessor(SolrQueryRequest req,
SolrQueryResponse rsp,
UpdateRequestProcessor next) |
Modifier and Type | Method and Description |
---|---|
protected String |
concatFields(SolrInputDocument doc,
String[] fields) |
protected abstract List<DetectedLanguage> |
detectLanguage(String content)
Detects language(s) from a string.
|
protected String |
getMappedField(String currentField,
String language)
Returns the name of the field to map the current contents into, so that they are properly analyzed.
|
boolean |
isEnabled()
Tells if this processor is enabled or not
|
protected SolrInputDocument |
process(SolrInputDocument doc)
This is the main, testable process method called from processAdd()
|
void |
processAdd(AddUpdateCommand cmd) |
protected String |
resolveLanguage(List<DetectedLanguage> languages,
String fallbackLang)
Chooses a language based on the list of candidates detected
|
protected String |
resolveLanguage(String language,
String fallbackLang)
Chooses a language based on the list of candidates detected
|
void |
setEnabled(boolean enabled) |
finish, processCommit, processDelete, processMergeIndexes, processRollback
protected static final org.slf4j.Logger log
protected boolean enabled
protected String[] inputFields
protected String[] mapFields
protected Pattern mapPattern
protected String mapReplaceStr
protected String langField
protected String langsField
protected String docIdField
protected String fallbackValue
protected String[] fallbackFields
protected boolean enableMapping
protected boolean mapKeepOrig
protected boolean overwrite
protected boolean mapOverwrite
protected boolean mapIndividual
protected boolean enforceSchema
protected double threshold
protected IndexSchema schema
protected final Pattern tikaSimilarityPattern
protected final Pattern langPattern
public LanguageIdentifierUpdateProcessor(SolrQueryRequest req, SolrQueryResponse rsp, UpdateRequestProcessor next)
public void processAdd(AddUpdateCommand cmd) throws IOException
processAdd
in class UpdateRequestProcessor
IOException
protected SolrInputDocument process(SolrInputDocument doc)
doc
- the SolrInputDocument to work onprotected String concatFields(SolrInputDocument doc, String[] fields)
protected abstract List<DetectedLanguage> detectLanguage(String content)
content
- The content to identifyprotected String resolveLanguage(String language, String fallbackLang)
language
- language code as a stringfallbackLang
- the language code to use as a fallbackprotected String resolveLanguage(List<DetectedLanguage> languages, String fallbackLang)
languages
- a List of DetectedLanguages with certainty scorefallbackLang
- the language code to use as a fallbackprotected String getMappedField(String currentField, String language)
currentField
- The current field namelanguage
- the language codepublic boolean isEnabled()
public void setEnabled(boolean enabled)