public abstract class LanguageIdentifier extends Object
| Modifier and Type | Class and Description |
|---|---|
protected static class |
LanguageIdentifier.ParsedLanguageLists |
| Modifier and Type | Field and Description |
|---|---|
protected static CommonWordsDetector |
COMMON_WORDS_LANG_IDENTIFIER |
protected static int |
CONSIDER_ONLY_PREFERRED_THRESHOLD |
protected int |
maxLength |
protected static List<String> |
NON_LATIN_CHARS_LANGUAGES |
protected static com.optimaize.langdetect.text.TextFilter |
REMOVE_EMAIL_SIGNATURE_FILTER |
protected static com.optimaize.langdetect.text.TextFilter |
REMOVE_MENTION_FILTER |
protected static com.optimaize.langdetect.text.TextFilter |
REMOVE_NON_BREAKING_SPACES_FILTER |
protected static com.optimaize.langdetect.text.TextFilter |
REMOVE_URL_FILTER |
protected static float |
SCORE_THRESHOLD |
protected static UnicodeBasedDetector |
UNICODE_BASED_LANG_IDENTIFIER |
| Constructor and Description |
|---|
LanguageIdentifier(int maxLength) |
| Modifier and Type | Method and Description |
|---|---|
String |
cleanAndShortenText(String text) |
abstract Language |
detectLanguage(String cleanText) |
abstract DetectedLanguage |
detectLanguage(String cleanText,
List<String> noopLangsTmp,
List<String> preferredLangsTmp) |
abstract DetectedLanguage |
detectLanguage(String cleanText,
List<String> noopLangsTmp,
List<String> preferredLangsTmp,
boolean limitOnPreferredLangs) |
protected Map.Entry<String,Double> |
getHighestScoringResult(Map<String,Double> probs) |
protected LanguageIdentifier.ParsedLanguageLists |
prepareDetectLanguage(String text,
List<String> noopLangsTmp,
List<String> preferredLangsTmp) |
protected static final float SCORE_THRESHOLD
protected static final int CONSIDER_ONLY_PREFERRED_THRESHOLD
protected static final com.optimaize.langdetect.text.TextFilter REMOVE_EMAIL_SIGNATURE_FILTER
protected static final com.optimaize.langdetect.text.TextFilter REMOVE_MENTION_FILTER
protected static final com.optimaize.langdetect.text.TextFilter REMOVE_NON_BREAKING_SPACES_FILTER
protected static final com.optimaize.langdetect.text.TextFilter REMOVE_URL_FILTER
protected static final UnicodeBasedDetector UNICODE_BASED_LANG_IDENTIFIER
protected static final CommonWordsDetector COMMON_WORDS_LANG_IDENTIFIER
protected int maxLength
@Nullable public abstract DetectedLanguage detectLanguage(String cleanText, List<String> noopLangsTmp, List<String> preferredLangsTmp)
cleanText - a cleanText as returned by cleanAndShortenText(String)noopLangsTmp - list of codes that are detected but will lead to the NoopLanguage that has no rulesnull if language could not be identified@Nullable public abstract DetectedLanguage detectLanguage(String cleanText, List<String> noopLangsTmp, List<String> preferredLangsTmp, boolean limitOnPreferredLangs)
@Nullable public abstract Language detectLanguage(String cleanText)
cleanText - a cleanText as returned by cleanAndShortenText(String)null if language could not be identifiedprotected LanguageIdentifier.ParsedLanguageLists prepareDetectLanguage(String text, List<String> noopLangsTmp, List<String> preferredLangsTmp)