package org.languagetool.dev.bigdata;

import java.io.File;
import java.io.IOException;
import java.util.ArrayList;
import joptsimple.internal.Strings;
import org.apache.lucene.index.DirectoryReader;
import org.apache.lucene.index.IndexReader;
import org.apache.lucene.index.MultiFields;
import org.apache.lucene.index.Term;
import org.apache.lucene.index.TermsEnum;
import org.apache.lucene.search.IndexSearcher;
import org.apache.lucene.search.TermQuery;
import org.apache.lucene.search.TopDocs;
import org.apache.lucene.store.FSDirectory;
import org.apache.lucene.util.BytesRef;
import org.languagetool.AnalyzedTokenReadings;
import org.languagetool.JLanguageTool;
import org.languagetool.Languages;
import org.languagetool.tools.StringTools;

/* loaded from: input_file:org/languagetool/dev/bigdata/GermanUppercasePhraseFinder.class */
final class GermanUppercasePhraseFinder {
    private static final long MIN_TERM_LEN = 4;
    private static final long LIMIT = 500;

    private GermanUppercasePhraseFinder() {
    }

    public static void main(String[] strArr) throws IOException {
        if (strArr.length != 1) {
            System.out.println("Usage: " + GermanUppercasePhraseFinder.class.getSimpleName() + " <ngramIndexDir>");
            System.exit(1);
        }
        JLanguageTool jLanguageTool = new JLanguageTool(Languages.getLanguageForShortCode("de"));
        DirectoryReader open = DirectoryReader.open(FSDirectory.open(new File(strArr[0]).toPath()));
        IndexSearcher indexSearcher = new IndexSearcher(open);
        TermsEnum it = MultiFields.getFields(open).terms("ngram").iterator();
        int i = 0;
        while (true) {
            BytesRef next = it.next();
            if (next == null) {
                return;
            }
            String utf8ToString = next.utf8ToString();
            i++;
            String[] split = utf8ToString.split(" ");
            boolean z = true;
            int i2 = 0;
            ArrayList arrayList = new ArrayList();
            int length = split.length;
            int i3 = 0;
            while (true) {
                if (i3 >= length) {
                    break;
                }
                String str = split[i3];
                if (str.length() < 4) {
                    z = false;
                    break;
                }
                String uppercaseFirstChar = StringTools.uppercaseFirstChar(str);
                if (!str.equals(uppercaseFirstChar)) {
                    i2++;
                }
                arrayList.add(uppercaseFirstChar);
                i3++;
            }
            if (z && i2 != 0 && i2 != 2) {
                String join = Strings.join(arrayList, " ");
                if (!utf8ToString.equals(join)) {
                    long occurrenceCount = getOccurrenceCount(open, indexSearcher, utf8ToString);
                    long occurrenceCount2 = getOccurrenceCount(open, indexSearcher, join);
                    if (i % 10000 == 0) {
                        System.err.println(i + " @ " + utf8ToString);
                    }
                    if (occurrenceCount > LIMIT || occurrenceCount2 > LIMIT) {
                        if (occurrenceCount2 > occurrenceCount && isRelevant(jLanguageTool, utf8ToString)) {
                            System.out.printf("%.2f " + occurrenceCount2 + " " + join + " " + occurrenceCount + " " + utf8ToString + "\n", Float.valueOf(((float) occurrenceCount2) / ((float) occurrenceCount)));
                        }
                    }
                }
            }
        }
    }

    private static boolean isRelevant(JLanguageTool jLanguageTool, String str) throws IOException {
        AnalyzedTokenReadings[] tokensWithoutWhitespace = jLanguageTool.analyzeText(str).get(0).getTokensWithoutWhitespace();
        return tokensWithoutWhitespace.length == 3 && tokensWithoutWhitespace[1].hasPartialPosTag("ADJ:") && tokensWithoutWhitespace[2].hasPartialPosTag("SUB:");
    }

    private static long getOccurrenceCount(IndexReader indexReader, IndexSearcher indexSearcher, String str) throws IOException {
        TopDocs search = indexSearcher.search(new TermQuery(new Term("ngram", str)), 5);
        if (search.totalHits == 0) {
            return 0L;
        }
        return Long.parseLong(indexReader.document(search.scoreDocs[0].doc).get("count"));
    }
}
