package org.languagetool.dev.bigdata;

import java.io.File;
import java.io.IOException;
import java.net.URISyntaxException;
import java.util.Arrays;
import java.util.regex.Pattern;
import org.apache.lucene.index.DirectoryReader;
import org.apache.lucene.index.MultiFields;
import org.apache.lucene.index.TermsEnum;
import org.apache.lucene.store.FSDirectory;
import org.apache.lucene.util.BytesRef;
import org.languagetool.JLanguageTool;
import org.languagetool.languagemodel.LuceneLanguageModel;
import org.languagetool.rules.spelling.hunspell.Hunspell;
import org.languagetool.tools.StringTools;

/* loaded from: input_file:org/languagetool/dev/bigdata/SplitWordStats.class */
public class SplitWordStats {
    private static final float MIN = 0.1f;
    private static final float MAX = 100.0f;

    private void run() throws IOException {
        File file = new File("/home/dnaber/data/google-ngram-index/de");
        File file2 = new File("/home/dnaber/data/google-ngram-index/de/2grams");
        Pattern compile = Pattern.compile("[a-zA-ZöäüÖÄÜß]+");
        Hunspell.Dictionary dictionary = Hunspell.getInstance().getDictionary(getDictionaryPath("/de/hunspell/de_DE.dic"));
        LuceneLanguageModel luceneLanguageModel = new LuceneLanguageModel(file);
        Throwable th = null;
        try {
            TermsEnum it = MultiFields.getFields(DirectoryReader.open(FSDirectory.open(file2.toPath()))).terms("ngram").iterator();
            while (true) {
                BytesRef next = it.next();
                if (next == null) {
                    break;
                }
                String utf8ToString = next.utf8ToString();
                String[] split = utf8ToString.split(" ");
                if (!StringTools.startsWithUppercase(split[1])) {
                    String replace = utf8ToString.replace(" ", "");
                    if (!dictionary.misspelled(replace) && !dictionary.misspelled(split[0]) && !dictionary.misspelled(split[1])) {
                        if (Arrays.stream(split).allMatch(str -> {
                            return str.length() > 1 && compile.matcher(str).matches();
                        })) {
                            countPair(luceneLanguageModel, utf8ToString, replace);
                        }
                    }
                }
            }
            if (luceneLanguageModel != null) {
                if (0 == 0) {
                    luceneLanguageModel.close();
                    return;
                }
                try {
                    luceneLanguageModel.close();
                } catch (Throwable th2) {
                    th.addSuppressed(th2);
                }
            }
        } catch (Throwable th3) {
            if (luceneLanguageModel != null) {
                if (0 != 0) {
                    try {
                        luceneLanguageModel.close();
                    } catch (Throwable th4) {
                        th.addSuppressed(th4);
                    }
                } else {
                    luceneLanguageModel.close();
                }
            }
            throw th3;
        }
    }

    private String getDictionaryPath(String str) throws IOException {
        try {
            String absolutePath = new File(JLanguageTool.getDataBroker().getFromResourceDirAsUrl(str).toURI()).getAbsolutePath();
            return absolutePath.substring(0, absolutePath.length() - ".dic".length());
        } catch (URISyntaxException e) {
            throw new RuntimeException(e);
        }
    }

    private static void countPair(LuceneLanguageModel luceneLanguageModel, String str, String str2) {
        float count = ((float) luceneLanguageModel.getCount(str2)) / ((float) luceneLanguageModel.getCount(Arrays.asList(str.split(" "))));
        if (count >= MIN && count > MAX) {
            System.out.println("#2 Meinten Sie " + str2 + "?");
        }
    }

    public static void main(String[] strArr) throws IOException {
        new SplitWordStats().run();
    }
}
