package org.languagetool.language;

import com.google.common.base.Optional;
import com.optimaize.langdetect.LanguageDetector;
import com.optimaize.langdetect.LanguageDetectorBuilder;
import com.optimaize.langdetect.i18n.LdLocale;
import com.optimaize.langdetect.ngram.NgramExtractors;
import com.optimaize.langdetect.profiles.LanguageProfile;
import com.optimaize.langdetect.profiles.LanguageProfileReader;
import com.optimaize.langdetect.text.RemoveMinorityScriptsTextFilter;
import com.optimaize.langdetect.text.TextFilter;
import com.optimaize.langdetect.text.TextObjectFactory;
import com.optimaize.langdetect.text.TextObjectFactoryBuilder;
import com.optimaize.langdetect.text.UrlTextFilter;
import java.io.BufferedReader;
import java.io.BufferedWriter;
import java.io.File;
import java.io.IOException;
import java.io.InputStream;
import java.io.InputStreamReader;
import java.io.OutputStreamWriter;
import java.nio.charset.StandardCharsets;
import java.util.ArrayList;
import java.util.Arrays;
import java.util.HashMap;
import java.util.List;
import java.util.Map;
import java.util.regex.Pattern;
import org.jetbrains.annotations.Nullable;
import org.languagetool.JLanguageTool;
import org.languagetool.Language;
import org.languagetool.Languages;
import org.slf4j.Logger;
import org.slf4j.LoggerFactory;

/* loaded from: input_file:org/languagetool/language/LanguageIdentifier.class */
public class LanguageIdentifier {
    private static final double MINIMAL_CONFIDENCE = 0.9d;
    private static final int K_HIGHEST_SCORES = 5;
    private static final int SHORT_ALGO_THRESHOLD = 50;
    private final LanguageDetector languageDetector;
    private final TextObjectFactory textObjectFactory;
    private final int maxLength;
    private boolean fasttextEnabled;
    private Process fasttextProcess;
    private BufferedReader fasttextIn;
    private BufferedWriter fasttextOut;
    private static final Logger logger = LoggerFactory.getLogger((Class<?>) LanguageIdentifier.class);
    private static final Pattern SIGNATURE = Pattern.compile("\n-- \n.*", 32);
    private static final List<String> ignoreLangCodes = Arrays.asList("ast", "gl");
    private static final List<String> externalLangCodes = Arrays.asList("eo");

    /* loaded from: input_file:org/languagetool/language/LanguageIdentifier$RemoveEMailSignatureFilter.class */
    class RemoveEMailSignatureFilter implements TextFilter {
        RemoveEMailSignatureFilter() {
        }

        @Override // com.optimaize.langdetect.text.TextFilter
        public String filter(CharSequence charSequence) {
            return LanguageIdentifier.SIGNATURE.matcher(charSequence.toString()).replaceFirst("");
        }
    }

    public LanguageIdentifier() {
        this(1000);
    }

    public LanguageIdentifier(int i) {
        this.fasttextEnabled = false;
        try {
            this.languageDetector = LanguageDetectorBuilder.create(NgramExtractors.standard()).minimalConfidence(MINIMAL_CONFIDENCE).shortTextAlgorithm(50).withProfiles(loadProfiles(getLanguageCodes())).build();
            this.textObjectFactory = new TextObjectFactoryBuilder().maxTextLength(10000).withTextFilter(UrlTextFilter.getInstance()).withTextFilter(RemoveMinorityScriptsTextFilter.forThreshold(0.3d)).withTextFilter(new RemoveEMailSignatureFilter()).build();
            if (i < 10) {
                throw new IllegalArgumentException("maxLength must be >= 10 (but values > 100 are recommended): " + i);
            }
            this.maxLength = i;
        } catch (IOException e) {
            throw new RuntimeException("Could not set up language identifier", e);
        }
    }

    public void enableFasttext(File file, File file2) {
        if (file == null || file2 == null) {
            return;
        }
        try {
            startFasttext(file2, file);
            logger.info("Started fasttext process for language identification: Binary " + file + " with model @ " + file2);
            this.fasttextEnabled = true;
        } catch (IOException e) {
            this.fasttextEnabled = false;
            throw new RuntimeException("Could not start fasttext process for language identification @ " + file + " with model @ " + file2, e);
        }
    }

    private static List<String> getLanguageCodes() {
        ArrayList arrayList = new ArrayList();
        for (Language language : Languages.get()) {
            String shortCode = language.getShortCode();
            if (!(language.isVariant() || ignoreLangCodes.contains(shortCode) || externalLangCodes.contains(shortCode))) {
                if ("zh".equals(shortCode)) {
                    arrayList.add("zh-CN");
                    arrayList.add("zh-TW");
                } else {
                    arrayList.add(shortCode);
                }
            }
        }
        return arrayList;
    }

    private List<LanguageProfile> loadProfiles(List<String> list) throws IOException {
        List<LanguageProfile> read = new LanguageProfileReader().read(list);
        for (String str : externalLangCodes) {
            String str2 = "/" + str + "/" + str + ".profile";
            if (JLanguageTool.getDataBroker().resourceExists(str2)) {
                InputStream fromResourceDirAsStream = JLanguageTool.getDataBroker().getFromResourceDirAsStream(str2);
                Throwable th = null;
                try {
                    try {
                        read.add(new LanguageProfileReader().read(fromResourceDirAsStream));
                        if (fromResourceDirAsStream != null) {
                            if (0 != 0) {
                                try {
                                    fromResourceDirAsStream.close();
                                } catch (Throwable th2) {
                                    th.addSuppressed(th2);
                                }
                            } else {
                                fromResourceDirAsStream.close();
                            }
                        }
                    } catch (Throwable th3) {
                        if (fromResourceDirAsStream != null) {
                            if (th != null) {
                                try {
                                    fromResourceDirAsStream.close();
                                } catch (Throwable th4) {
                                    th.addSuppressed(th4);
                                }
                            } else {
                                fromResourceDirAsStream.close();
                            }
                        }
                        throw th3;
                    }
                } finally {
                }
            }
        }
        return read;
    }

    @Nullable
    public Language detectLanguage(String str) {
        String textObject = this.textObjectFactory.forText(str.length() > this.maxLength ? str.substring(0, this.maxLength) : str).toString();
        String str2 = null;
        if (this.fasttextEnabled) {
            try {
                str2 = getHighestScoringResult(runFasttext(textObject));
            } catch (Exception e) {
                this.fasttextEnabled = false;
                logger.error("Disabling fasttext language identification, got error for text: " + str, (Throwable) e);
                this.fasttextProcess.destroy();
            }
        }
        if (!this.fasttextEnabled) {
            str2 = detectLanguageCode(textObject);
        }
        if (str2 == null || !Languages.isLanguageSupported(str2)) {
            return null;
        }
        return Languages.getLanguageForShortCode(str2);
    }

    private void startFasttext(File file, File file2) throws IOException {
        this.fasttextProcess = new ProcessBuilder(file2.getPath(), "predict-prob", file.getPath(), "-", "5").start();
        this.fasttextIn = new BufferedReader(new InputStreamReader(this.fasttextProcess.getInputStream(), StandardCharsets.UTF_8));
        this.fasttextOut = new BufferedWriter(new OutputStreamWriter(this.fasttextProcess.getOutputStream(), StandardCharsets.UTF_8));
    }

    private String getHighestScoringResult(Map<String, Double> map) {
        String str = null;
        double d = -1.0d;
        for (Map.Entry<String, Double> entry : map.entrySet()) {
            if (entry.getValue().doubleValue() > d) {
                d = entry.getValue().doubleValue();
                str = entry.getKey();
            }
        }
        return str;
    }

    private synchronized Map<String, Double> runFasttext(String str) throws IOException {
        HashMap hashMap = new HashMap();
        this.fasttextOut.write(str.replace("\n", " "));
        this.fasttextOut.newLine();
        this.fasttextOut.flush();
        String readLine = this.fasttextIn.readLine();
        String[] split = readLine.split(" ");
        if (split.length % 2 != 0) {
            throw new RuntimeException("Error while parsing fasttext output: " + readLine);
        }
        for (int i = 0; i < split.length; i += 2) {
            String str2 = split[i];
            String substring = str2.substring(str2.lastIndexOf("__") + 2);
            Double valueOf = Double.valueOf(Double.parseDouble(split[i + 1]));
            if (Languages.isLanguageSupported(substring)) {
                hashMap.put(substring, valueOf);
            }
        }
        return hashMap;
    }

    @Nullable
    private String detectLanguageCode(String str) {
        Optional<LdLocale> detect = this.languageDetector.detect(str);
        if (detect.isPresent()) {
            return detect.get().getLanguage();
        }
        return null;
    }
}
