public abstract class LanguageIdentifier extends Object
Modifier and Type | Class and Description |
---|---|
protected static class |
LanguageIdentifier.ParsedLanguageLists |
Modifier and Type | Field and Description |
---|---|
protected static CommonWordsDetector |
COMMON_WORDS_LANG_IDENTIFIER |
protected static int |
CONSIDER_ONLY_PREFERRED_THRESHOLD |
protected int |
maxLength |
protected static List<String> |
NON_LATIN_CHARS_LANGUAGES |
protected static com.optimaize.langdetect.text.TextFilter |
REMOVE_EMAIL_SIGNATURE_FILTER |
protected static com.optimaize.langdetect.text.TextFilter |
REMOVE_MENTION_FILTER |
protected static com.optimaize.langdetect.text.TextFilter |
REMOVE_NON_BREAKING_SPACES_FILTER |
protected static com.optimaize.langdetect.text.TextFilter |
REMOVE_URL_FILTER |
protected static float |
SCORE_THRESHOLD |
protected static UnicodeBasedDetector |
UNICODE_BASED_LANG_IDENTIFIER |
Constructor and Description |
---|
LanguageIdentifier(int maxLength) |
Modifier and Type | Method and Description |
---|---|
String |
cleanAndShortenText(String text) |
abstract Language |
detectLanguage(String cleanText) |
abstract DetectedLanguage |
detectLanguage(String cleanText,
List<String> noopLangsTmp,
List<String> preferredLangsTmp) |
abstract DetectedLanguage |
detectLanguage(String cleanText,
List<String> noopLangsTmp,
List<String> preferredLangsTmp,
boolean limitOnPreferredLangs) |
protected Map.Entry<String,Double> |
getHighestScoringResult(Map<String,Double> probs) |
protected LanguageIdentifier.ParsedLanguageLists |
prepareDetectLanguage(String text,
List<String> noopLangsTmp,
List<String> preferredLangsTmp) |
protected static final float SCORE_THRESHOLD
protected static final int CONSIDER_ONLY_PREFERRED_THRESHOLD
protected static final com.optimaize.langdetect.text.TextFilter REMOVE_EMAIL_SIGNATURE_FILTER
protected static final com.optimaize.langdetect.text.TextFilter REMOVE_MENTION_FILTER
protected static final com.optimaize.langdetect.text.TextFilter REMOVE_NON_BREAKING_SPACES_FILTER
protected static final com.optimaize.langdetect.text.TextFilter REMOVE_URL_FILTER
protected static final UnicodeBasedDetector UNICODE_BASED_LANG_IDENTIFIER
protected static final CommonWordsDetector COMMON_WORDS_LANG_IDENTIFIER
protected int maxLength
@Nullable public abstract DetectedLanguage detectLanguage(String cleanText, List<String> noopLangsTmp, List<String> preferredLangsTmp)
cleanText
- a cleanText as returned by cleanAndShortenText(String)
noopLangsTmp
- list of codes that are detected but will lead to the NoopLanguage that has no rulesnull
if language could not be identified@Nullable public abstract DetectedLanguage detectLanguage(String cleanText, List<String> noopLangsTmp, List<String> preferredLangsTmp, boolean limitOnPreferredLangs)
@Nullable public abstract Language detectLanguage(String cleanText)
cleanText
- a cleanText as returned by cleanAndShortenText(String)
null
if language could not be identifiedprotected LanguageIdentifier.ParsedLanguageLists prepareDetectLanguage(String text, List<String> noopLangsTmp, List<String> preferredLangsTmp)