package org.languagetool.rules.ngrams;

import java.io.IOException;
import java.util.ArrayList;
import java.util.Arrays;
import java.util.Collections;
import java.util.Iterator;
import java.util.List;
import java.util.Objects;
import java.util.Optional;
import java.util.ResourceBundle;
import java.util.Set;
import org.languagetool.AnalyzedSentence;
import org.languagetool.AnalyzedToken;
import org.languagetool.Experimental;
import org.languagetool.Language;
import org.languagetool.languagemodel.LanguageModel;
import org.languagetool.rules.Categories;
import org.languagetool.rules.ITSIssueType;
import org.languagetool.rules.Rule;
import org.languagetool.rules.RuleMatch;
import org.languagetool.rules.patterns.PatternRule;
import org.languagetool.rules.patterns.PatternToken;
import org.languagetool.synthesis.Synthesizer;
import org.languagetool.tokenizers.Tokenizer;

@Experimental
/* loaded from: input_file:org/languagetool/rules/ngrams/NgramProbabilityRule.class */
public class NgramProbabilityRule extends Rule {
    public static final String RULE_ID = "NGRAM_RULE";
    private static final boolean DEBUG = false;
    private static final List<Replacement> REPLACEMENTS = Collections.unmodifiableList(Arrays.asList(new Replacement("VBG", "VB"), new Replacement("VBG", "VBN"), new Replacement("VB", "VBG"), new Replacement("VB", "VBZ"), new Replacement("VB", "VBN"), new Replacement("VBZ", "VB"), new Replacement("VBZ", "VBP"), new Replacement("NNS", "NN"), new Replacement("NN", "NNS")));
    private static final List<AdvancedReplacement> ADV_REPLACEMENTS = Collections.unmodifiableList(Arrays.asList(new AdvancedReplacement[0]));
    private final LanguageModel lm;
    private final Language language;
    private double minProbability;

    /* JADX INFO: Access modifiers changed from: package-private */
    /* loaded from: input_file:org/languagetool/rules/ngrams/NgramProbabilityRule$AdvancedReplacement.class */
    public static class AdvancedReplacement {
        final List<PatternToken> patternTokens;
        final String alternativeText;

        AdvancedReplacement(List<PatternToken> list, String str) {
            this.patternTokens = list;
            this.alternativeText = str;
        }
    }

    /* JADX INFO: Access modifiers changed from: package-private */
    /* loaded from: input_file:org/languagetool/rules/ngrams/NgramProbabilityRule$Alternative.class */
    public class Alternative {
        final String token;
        final Probability p;

        Alternative(String str, Probability probability) {
            this.token = str;
            this.p = probability;
        }
    }

    /* JADX INFO: Access modifiers changed from: package-private */
    /* loaded from: input_file:org/languagetool/rules/ngrams/NgramProbabilityRule$Alternatives.class */
    public class Alternatives {
        final List<Alternative> alternatives;
        final boolean alternativesConsidered;

        Alternatives(List<Alternative> list, boolean z) {
            this.alternatives = list;
            this.alternativesConsidered = z;
        }
    }

    /* JADX INFO: Access modifiers changed from: package-private */
    /* loaded from: input_file:org/languagetool/rules/ngrams/NgramProbabilityRule$Replacement.class */
    public static class Replacement {
        final String tagRegex;
        final String alternativeTag;

        Replacement(String str, String str2) {
            this.tagRegex = str;
            this.alternativeTag = str2;
        }
    }

    public NgramProbabilityRule(ResourceBundle resourceBundle, LanguageModel languageModel, Language language) {
        super(resourceBundle);
        this.minProbability = 1.0E-14d;
        setCategory(Categories.TYPOS.getCategory(resourceBundle));
        setLocQualityIssueType(ITSIssueType.NonConformance);
        this.lm = (LanguageModel) Objects.requireNonNull(languageModel);
        this.language = (Language) Objects.requireNonNull(language);
    }

    @Override // org.languagetool.rules.Rule
    public String getId() {
        return RULE_ID;
    }

    @Experimental
    public void setMinProbability(double d) {
        this.minProbability = d;
    }

    @Override // org.languagetool.rules.Rule
    public RuleMatch[] match(AnalyzedSentence analyzedSentence) throws IOException {
        List<GoogleToken> googleTokens = GoogleToken.getGoogleTokens(analyzedSentence, true, getGoogleStyleWordTokenizer());
        ArrayList arrayList = new ArrayList();
        GoogleToken googleToken = null;
        GoogleToken googleToken2 = null;
        int i = 0;
        for (GoogleToken googleToken3 : googleTokens) {
            String str = googleToken3.token;
            if (googleToken != null && googleToken2 != null && i < googleTokens.size() - 1) {
                GoogleToken googleToken4 = googleTokens.get(i + 1);
                Probability pseudoProbability = this.lm.getPseudoProbability(Arrays.asList(googleToken2.token, str, googleToken4.token));
                String str2 = googleToken2 + " " + str + " " + googleToken4.token;
                if (pseudoProbability.getProb() < this.minProbability) {
                    Alternatives betterAlternatives = getBetterAlternatives(googleToken2, str, googleToken4, googleToken3, pseudoProbability, analyzedSentence);
                    if (!betterAlternatives.alternativesConsidered || betterAlternatives.alternatives.size() > 0) {
                        RuleMatch ruleMatch = new RuleMatch(this, analyzedSentence, googleToken2.startPos, googleToken4.endPos, "The phrase '" + str2 + "' rarely occurs in the reference corpus (" + pseudoProbability.getOccurrences() + " times)");
                        List<String> arrayList2 = new ArrayList<>();
                        Iterator<Alternative> it = betterAlternatives.alternatives.iterator();
                        while (it.hasNext()) {
                            arrayList2.add(googleToken2.token + " " + it.next().token + " " + googleToken4.token);
                        }
                        ruleMatch.setSuggestedReplacements(arrayList2);
                        if (acceptMatch(ruleMatch, pseudoProbability, analyzedSentence)) {
                            arrayList.add(ruleMatch);
                        }
                    } else {
                        debug("Ignoring match as all alternatives are less probable: '%s' in '%s'\n", str2, analyzedSentence.getText());
                    }
                }
            }
            googleToken = googleToken2;
            googleToken2 = googleToken3;
            i++;
        }
        return (RuleMatch[]) arrayList.toArray(new RuleMatch[arrayList.size()]);
    }

    protected boolean acceptMatch(RuleMatch ruleMatch, Probability probability, AnalyzedSentence analyzedSentence) {
        return true;
    }

    private Alternatives getBetterAlternatives(GoogleToken googleToken, String str, GoogleToken googleToken2, GoogleToken googleToken3, Probability probability, AnalyzedSentence analyzedSentence) throws IOException {
        ArrayList arrayList = new ArrayList();
        boolean z = false;
        Iterator<Replacement> it = REPLACEMENTS.iterator();
        while (it.hasNext()) {
            Optional<List<Alternative>> betterAlternatives = getBetterAlternatives(it.next(), googleToken, googleToken3, googleToken2, probability);
            if (betterAlternatives.isPresent()) {
                arrayList.addAll(betterAlternatives.get());
                z = true;
            }
        }
        for (AdvancedReplacement advancedReplacement : ADV_REPLACEMENTS) {
            for (RuleMatch ruleMatch : new PatternRule("tmpId", this.language, advancedReplacement.patternTokens, "unused_description", "unused_message", "unused_shortMessage").match(analyzedSentence)) {
                if (googleToken3.startPos > ruleMatch.getFromPos() && googleToken3.endPos < ruleMatch.getToPos()) {
                    String replace = advancedReplacement.alternativeText.replace("$1", str);
                    ArrayList arrayList2 = new ArrayList();
                    arrayList2.add(googleToken.token);
                    Collections.addAll(arrayList2, replace.split(" "));
                    arrayList2.add(googleToken2.token);
                    Probability pseudoProbability = this.lm.getPseudoProbability(arrayList2);
                    if (pseudoProbability.getProb() * 1000000.0d > probability.getProb()) {
                        arrayList.add(new Alternative(replace, pseudoProbability));
                        debug("More probable: %s\n", replace);
                    } else {
                        debug("Less probable: %s\n", replace);
                    }
                    z = true;
                }
            }
        }
        return new Alternatives(arrayList, z);
    }

    private Optional<List<Alternative>> getBetterAlternatives(Replacement replacement, GoogleToken googleToken, GoogleToken googleToken2, GoogleToken googleToken3, Probability probability) throws IOException {
        Synthesizer synthesizer;
        Optional<AnalyzedToken> byPosTag = getByPosTag(googleToken2.getPosTags(), replacement.tagRegex);
        ArrayList arrayList = new ArrayList();
        if (!byPosTag.isPresent() || (synthesizer = this.language.getSynthesizer()) == null) {
            return Optional.empty();
        }
        for (String str : synthesizer.synthesize(new AnalyzedToken(googleToken2.token, "not_used", byPosTag.get().getLemma()), replacement.alternativeTag)) {
            if (!str.equals(googleToken2)) {
                Object asList = Arrays.asList(googleToken.token, googleToken2.token, googleToken3.token);
                List<String> asList2 = Arrays.asList(googleToken.token, str, googleToken3.token);
                Probability pseudoProbability = this.lm.getPseudoProbability(asList2);
                if (pseudoProbability.getProb() >= probability.getProb()) {
                    debug("More probable alternative to '%s': %s\n", asList, asList2);
                    arrayList.add(new Alternative(str, pseudoProbability));
                } else {
                    debug("Less probable alternative to '%s': %s\n", asList, asList2);
                }
            }
        }
        return Optional.of(arrayList);
    }

    private Optional<AnalyzedToken> getByPosTag(Set<AnalyzedToken> set, String str) {
        for (AnalyzedToken analyzedToken : set) {
            if (analyzedToken.getPOSTag() != null && analyzedToken.getPOSTag().matches(str)) {
                return Optional.of(analyzedToken);
            }
        }
        return Optional.empty();
    }

    @Override // org.languagetool.rules.Rule
    public String getDescription() {
        return "Assume errors for phrases (ngrams) that occur rarely in a reference index";
    }

    protected Tokenizer getGoogleStyleWordTokenizer() {
        return this.language.getWordTokenizer();
    }

    private void debug(String str, Object... objArr) {
    }
}
