package org.languagetool.dev.bigdata;

import java.io.File;
import java.io.IOException;
import java.util.ArrayList;
import java.util.Arrays;
import java.util.HashSet;
import java.util.Iterator;
import java.util.List;
import java.util.Map;
import java.util.Set;
import org.apache.commons.lang3.StringUtils;
import org.languagetool.JLanguageTool;
import org.languagetool.Language;
import org.languagetool.Languages;
import org.languagetool.dev.bigdata.ConfusionRuleEvaluator;
import org.languagetool.dev.eval.FMeasure;
import org.languagetool.languagemodel.LuceneLanguageModel;
import org.languagetool.rules.ConfusionSet;
import org.languagetool.rules.ConfusionSetLoader;
import org.languagetool.rules.ConfusionString;

/* loaded from: input_file:org/languagetool/dev/bigdata/AllConfusionRulesEvaluator.class */
final class AllConfusionRulesEvaluator {
    private static final int MAX_SENTENCES = 1000;

    private AllConfusionRulesEvaluator() {
    }

    public static void main(String[] strArr) throws IOException {
        if (strArr.length < 3 || strArr.length > 4) {
            System.err.println("Usage: " + ConfusionRuleEvaluator.class.getSimpleName() + " <langCode> <languageModelTopDir> <wikipediaXml|tatoebaFile|dir>...");
            System.err.println("   <languageModelTopDir> is a directory with sub-directories '1grams', '2grams', and '3grams' with Lucene indexes");
            System.err.println("   <wikipediaXml|tatoebaFile|dir> either a Wikipedia XML dump, or a Tatoeba file or");
            System.err.println("                      a directory with example sentences (where <word>.txt contains only the sentences for <word>).");
            System.err.println("                      You can specify both a Wikipedia file and a Tatoeba file.");
            System.exit(1);
        }
        Language englishLight = "en".equals(strArr[0]) ? new ConfusionRuleEvaluator.EnglishLight() : Languages.getLanguageForShortCode(strArr[0]);
        LuceneLanguageModel luceneLanguageModel = new LuceneLanguageModel(new File(strArr[1]));
        ArrayList arrayList = new ArrayList();
        arrayList.add(strArr[2]);
        if (strArr.length >= 4) {
            arrayList.add(strArr[3]);
        }
        ConfusionRuleEvaluator confusionRuleEvaluator = new ConfusionRuleEvaluator(englishLight, luceneLanguageModel, false);
        confusionRuleEvaluator.setVerboseMode(false);
        Map<String, List<ConfusionSet>> loadConfusionSet = new ConfusionSetLoader().loadConfusionSet(JLanguageTool.getDataBroker().getFromResourceDirAsStream("/en/confusion_sets.txt"));
        HashSet hashSet = new HashSet();
        int i = 0;
        float f = 0.0f;
        Iterator<List<ConfusionSet>> it = loadConfusionSet.values().iterator();
        while (it.hasNext()) {
            for (ConfusionSet confusionSet : it.next()) {
                Set<ConfusionString> set = confusionSet.getSet();
                if (set.size() != 2) {
                    System.out.println("Skipping confusion set with size != 2: " + confusionSet);
                } else {
                    Iterator<ConfusionString> it2 = set.iterator();
                    ConfusionString next = it2.next();
                    ConfusionString next2 = it2.next();
                    String string = next.getString();
                    String string2 = next2.getString();
                    String str = string + " " + string2;
                    if (!hashSet.contains(str)) {
                        RuleEvalResult next3 = confusionRuleEvaluator.run(arrayList, string, string2, 1000, Arrays.asList(Long.valueOf(confusionSet.getFactor()))).values().iterator().next();
                        String str2 = next.getDescription() != null ? string + "|" + next.getDescription() : string;
                        String str3 = next2.getDescription() != null ? string2 + "|" + next2.getDescription() : string2;
                        String str4 = str2.compareTo(str3) < 0 ? str2 + "; " + str3 + "; " + confusionSet.getFactor() : str3 + "; " + str2 + "; " + confusionSet.getFactor();
                        System.out.println(str4 + StringUtils.repeat(" ", 82 - str4.length()) + "# " + next3.getSummary());
                        i++;
                        f = (float) (f + FMeasure.getWeightedFMeasure(next3.getPrecision(), next3.getRecall()));
                    }
                    hashSet.add(str);
                }
            }
        }
        System.out.println("Average f-measure: " + (f / i));
    }
}
