public abstract class BinaryDictionary extends Object implements Dictionary
Modifier and Type | Class and Description |
---|---|
static class |
BinaryDictionary.ResourceScheme
Used to specify where (dictionary) resources get loaded from.
|
Dictionary.Morpheme
Modifier and Type | Field and Description |
---|---|
static String |
DICT_FILENAME_SUFFIX |
static String |
DICT_HEADER |
static int |
HAS_READING
flag that the entry has reading data.
|
static int |
HAS_SINGLE_POS
flag that the entry has a single part of speech (leftPOS)
|
static String |
POSDICT_FILENAME_SUFFIX |
static String |
POSDICT_HEADER |
static String |
TARGETMAP_FILENAME_SUFFIX |
static String |
TARGETMAP_HEADER |
static int |
VERSION |
Modifier | Constructor and Description |
---|---|
protected |
BinaryDictionary() |
protected |
BinaryDictionary(BinaryDictionary.ResourceScheme resourceScheme,
String resourcePath) |
Modifier and Type | Method and Description |
---|---|
static InputStream |
getClassResource(Class<?> clazz,
String suffix) |
int |
getLeftId(int wordId)
Get left id of specified word
|
POS.Tag |
getLeftPOS(int wordId)
Get the left
POS.Tag of specfied word. |
Dictionary.Morpheme[] |
getMorphemes(int wordId,
char[] surfaceForm,
int off,
int len)
Get the morphemes of specified word (e.g.
|
POS.Type |
getPOSType(int wordId)
Get the
POS.Type of specified word (morpheme, compound, inflect or pre-analysis) |
String |
getReading(int wordId)
Get the reading of specified word (mainly used for Hanja to Hangul conversion).
|
static InputStream |
getResource(BinaryDictionary.ResourceScheme scheme,
String path) |
protected InputStream |
getResource(String suffix) |
int |
getRightId(int wordId)
Get right id of specified word
|
POS.Tag |
getRightPOS(int wordId)
Get the right
POS.Tag of specfied word. |
int |
getWordCost(int wordId)
Get word cost of specified word
|
void |
lookupWordIds(int sourceId,
IntsRef ref) |
public static final String TARGETMAP_FILENAME_SUFFIX
public static final String DICT_FILENAME_SUFFIX
public static final String POSDICT_FILENAME_SUFFIX
public static final String DICT_HEADER
public static final String TARGETMAP_HEADER
public static final String POSDICT_HEADER
public static final int VERSION
public static final int HAS_SINGLE_POS
public static final int HAS_READING
protected BinaryDictionary() throws IOException
IOException
protected BinaryDictionary(BinaryDictionary.ResourceScheme resourceScheme, String resourcePath) throws IOException
resourceScheme
- - scheme for loading resources (FILE or CLASSPATH).resourcePath
- - where to load resources (dictionaries) from. If null, with CLASSPATH scheme only, use
this class's name as the path.IOException
protected final InputStream getResource(String suffix) throws IOException
IOException
public static InputStream getResource(BinaryDictionary.ResourceScheme scheme, String path) throws IOException
IOException
public static InputStream getClassResource(Class<?> clazz, String suffix) throws IOException
IOException
public void lookupWordIds(int sourceId, IntsRef ref)
public int getLeftId(int wordId)
Dictionary
getLeftId
in interface Dictionary
public int getRightId(int wordId)
Dictionary
getRightId
in interface Dictionary
public int getWordCost(int wordId)
Dictionary
getWordCost
in interface Dictionary
public POS.Type getPOSType(int wordId)
Dictionary
POS.Type
of specified word (morpheme, compound, inflect or pre-analysis)getPOSType
in interface Dictionary
public POS.Tag getLeftPOS(int wordId)
Dictionary
POS.Tag
of specfied word.
For POS.Type.MORPHEME
and POS.Type.COMPOUND
the left and right POS are the same.getLeftPOS
in interface Dictionary
public POS.Tag getRightPOS(int wordId)
Dictionary
POS.Tag
of specfied word.
For POS.Type.MORPHEME
and POS.Type.COMPOUND
the left and right POS are the same.getRightPOS
in interface Dictionary
public String getReading(int wordId)
Dictionary
getReading
in interface Dictionary
public Dictionary.Morpheme[] getMorphemes(int wordId, char[] surfaceForm, int off, int len)
Dictionary
getMorphemes
in interface Dictionary
Copyright © 2000-2021 Apache Software Foundation. All Rights Reserved.