package org.languagetool.dev.eval;

import com.ibm.icu.text.PluralRules;
import java.io.File;
import java.io.FileInputStream;
import java.io.IOException;
import java.io.InputStream;
import java.util.HashSet;
import java.util.Iterator;
import java.util.List;
import java.util.Map;
import opennlp.tools.parser.Parse;
import org.apache.commons.io.IOUtils;
import org.languagetool.JLanguageTool;
import org.languagetool.language.BritishEnglish;
import org.languagetool.language.English;
import org.languagetool.languagemodel.LanguageModel;
import org.languagetool.languagemodel.LuceneLanguageModel;
import org.languagetool.rules.ConfusionSet;
import org.languagetool.rules.ConfusionSetLoader;
import org.languagetool.rules.Rule;
import org.languagetool.rules.RuleMatch;
import org.languagetool.rules.en.EnglishConfusionProbabilityRule;
import org.languagetool.rules.ngrams.ConfusionProbabilityRule;

/* loaded from: input_file:org/languagetool/dev/eval/RealWordFalseAlarmEvaluator.class */
class RealWordFalseAlarmEvaluator {
    private static final boolean EVAL_MODE = true;
    private static final int MAX_SENTENCES = 1000;
    private static final int MAX_ERROR_DISPLAY = 50;
    private static final int MIN_SENTENCES = 0;
    private static final float MAX_ERROR_RATE = 10.0f;
    private final JLanguageTool langTool;
    private final ConfusionProbabilityRule confusionRule;
    private final Map<String, List<ConfusionSet>> confusionSets;
    private final LanguageModel languageModel;
    private int globalSentenceCount;
    private int globalRuleMatches;

    RealWordFalseAlarmEvaluator(File file) throws IOException {
        InputStream fromResourceDirAsStream = JLanguageTool.getDataBroker().getFromResourceDirAsStream("/en/confusion_sets.txt");
        Throwable th = null;
        try {
            try {
                this.confusionSets = new ConfusionSetLoader().loadConfusionSet(fromResourceDirAsStream);
                if (fromResourceDirAsStream != null) {
                    if (0 != 0) {
                        try {
                            fromResourceDirAsStream.close();
                        } catch (Throwable th2) {
                            th.addSuppressed(th2);
                        }
                    } else {
                        fromResourceDirAsStream.close();
                    }
                }
                this.langTool = new JLanguageTool(new BritishEnglish());
                Iterator<Rule> it = this.langTool.getAllActiveRules().iterator();
                while (it.hasNext()) {
                    this.langTool.disableRule(it.next().getId());
                }
                this.languageModel = new LuceneLanguageModel(file);
                this.confusionRule = new EnglishConfusionProbabilityRule(JLanguageTool.getMessageBundle(), this.languageModel, new English());
                this.langTool.addRule(this.confusionRule);
            } finally {
            }
        } catch (Throwable th3) {
            if (fromResourceDirAsStream != null) {
                if (th != null) {
                    try {
                        fromResourceDirAsStream.close();
                    } catch (Throwable th4) {
                        th.addSuppressed(th4);
                    }
                } else {
                    fromResourceDirAsStream.close();
                }
            }
            throw th3;
        }
    }

    void close() {
        if (this.languageModel != null) {
            this.languageModel.close();
        }
    }

    void run(File file) throws IOException {
        System.out.println("Running in eval mode, no 'DATA' lines will be printed, only a subset of the homophones will be used.");
        File[] listFiles = file.listFiles();
        int i = 1;
        for (File file2 : listFiles) {
            if (file2.getName().endsWith(".txt")) {
                FileInputStream fileInputStream = new FileInputStream(file2);
                Throwable th = null;
                try {
                    try {
                        System.out.println("===== Working on " + file2.getName() + " (" + i + "/" + listFiles.length + ") =====");
                        checkLines(IOUtils.readLines(fileInputStream), file2.getName().replace(".txt", ""));
                        i++;
                        if (fileInputStream != null) {
                            if (0 != 0) {
                                try {
                                    fileInputStream.close();
                                } catch (Throwable th2) {
                                    th.addSuppressed(th2);
                                }
                            } else {
                                fileInputStream.close();
                            }
                        }
                    } catch (Throwable th3) {
                        if (fileInputStream != null) {
                            if (th != null) {
                                try {
                                    fileInputStream.close();
                                } catch (Throwable th4) {
                                    th.addSuppressed(th4);
                                }
                            } else {
                                fileInputStream.close();
                            }
                        }
                        throw th3;
                    }
                } finally {
                }
            } else {
                System.out.println("Ignoring " + file2 + ", does not match *.txt");
            }
        }
        System.out.println("==============================");
        System.out.println(this.globalSentenceCount + " sentences checked");
        System.out.println(this.globalRuleMatches + " errors found");
        System.out.printf("%.2f%% of sentences have a match\n", Float.valueOf((this.globalRuleMatches / this.globalSentenceCount) * 100.0f));
    }

    private void checkLines(List<String> list, String str) throws IOException {
        List<ConfusionSet> list2 = this.confusionSets.get(str);
        if (list2 == null) {
            System.out.println("Skipping '" + str + "', homophone not loaded");
            return;
        }
        if (list2.size() > 1) {
            System.err.println("WARN: will only use first confusion set of " + list2.size() + PluralRules.KEYWORD_RULE_SEPARATOR + list2.get(0));
        }
        this.confusionRule.setConfusionSet(list2.get(0));
        int i = 0;
        int i2 = 0;
        Iterator<String> it = list.iterator();
        while (true) {
            if (!it.hasNext()) {
                break;
            }
            String next = it.next();
            List<RuleMatch> check = this.langTool.check(next);
            i++;
            this.globalSentenceCount++;
            if (check.size() > 0) {
                HashSet hashSet = new HashSet();
                Iterator<RuleMatch> it2 = check.iterator();
                while (it2.hasNext()) {
                    hashSet.addAll(it2.next().getSuggestedReplacements());
                    i2++;
                    this.globalRuleMatches++;
                }
                if (i2 <= 50) {
                    System.out.println(Parse.BRACKET_LSB + str + "] " + next + " => " + hashSet);
                }
            }
            if (i > 1000) {
                System.out.println("Max sentences (1000) reached, stopping");
                break;
            }
        }
        System.out.println(i + " sentences checked");
        System.out.println(i2 + " errors found");
        System.out.printf("%.2f%% of sentences have a match\n", Float.valueOf((i2 / i) * 100.0f));
    }

    public static void main(String[] strArr) throws IOException {
        if (strArr.length != 2) {
            System.out.println("Usage: " + RealWordFalseAlarmEvaluator.class.getSimpleName() + " <languageModel> <sentenceDirectory>");
            System.out.println("   <languageModel> is a Lucene index with ngram frequency information");
            System.out.println("   <sentenceDirectory> is a directory with filenames like 'xx.txt' where 'xx' is the homophone");
            System.exit(1);
        }
        RealWordFalseAlarmEvaluator realWordFalseAlarmEvaluator = new RealWordFalseAlarmEvaluator(new File(strArr[0]));
        File file = new File(strArr[1]);
        if (!file.isDirectory()) {
            throw new RuntimeException("Not a directory: " + file);
        }
        realWordFalseAlarmEvaluator.run(file);
        realWordFalseAlarmEvaluator.close();
    }
}
