package edu.berkeley.nlp.lm.io;

import edu.berkeley.nlp.lm.ArrayEncodedNgramLanguageModel;
import edu.berkeley.nlp.lm.ArrayEncodedProbBackoffLm;
import edu.berkeley.nlp.lm.ConfigOptions;
import edu.berkeley.nlp.lm.ContextEncodedProbBackoffLm;
import edu.berkeley.nlp.lm.NgramLanguageModel;
import edu.berkeley.nlp.lm.StringWordIndexer;
import edu.berkeley.nlp.lm.StupidBackoffLm;
import edu.berkeley.nlp.lm.WordIndexer;
import edu.berkeley.nlp.lm.array.LongArray;
import edu.berkeley.nlp.lm.collections.LongRepresentable;
import edu.berkeley.nlp.lm.map.AbstractNgramMap;
import edu.berkeley.nlp.lm.map.CompressedNgramMap;
import edu.berkeley.nlp.lm.map.ContextEncodedNgramMap;
import edu.berkeley.nlp.lm.map.HashNgramMap;
import edu.berkeley.nlp.lm.map.NgramMap;
import edu.berkeley.nlp.lm.map.NgramMapWrapper;
import edu.berkeley.nlp.lm.util.Logger;
import edu.berkeley.nlp.lm.util.LongRef;
import edu.berkeley.nlp.lm.values.CompressibleProbBackoffValueContainer;
import edu.berkeley.nlp.lm.values.CompressibleValueContainer;
import edu.berkeley.nlp.lm.values.CountValueContainer;
import edu.berkeley.nlp.lm.values.ProbBackoffPair;
import edu.berkeley.nlp.lm.values.UncompressedProbBackoffValueContainer;
import edu.berkeley.nlp.lm.values.UnrankedUncompressedProbBackoffValueContainer;
import edu.berkeley.nlp.lm.values.ValueContainer;
import java.io.File;
import java.io.IOException;
import java.util.List;

/* loaded from: input_file:edu/berkeley/nlp/lm/io/LmReaders.class */
public class LmReaders {
    public static ContextEncodedProbBackoffLm<String> readContextEncodedLmFromArpa(String str) {
        return readContextEncodedLmFromArpa(str, new StringWordIndexer());
    }

    public static <W> ContextEncodedProbBackoffLm<W> readContextEncodedLmFromArpa(String str, WordIndexer<W> wordIndexer) {
        return readContextEncodedLmFromArpa(str, wordIndexer, new ConfigOptions(), Integer.MAX_VALUE);
    }

    public static <W> ContextEncodedProbBackoffLm<W> readContextEncodedLmFromArpa(String str, WordIndexer<W> wordIndexer, ConfigOptions configOptions, int i) {
        return readContextEncodedLmFromArpa(new ArpaLmReader(str, wordIndexer, i), wordIndexer, configOptions);
    }

    public static <W> ContextEncodedProbBackoffLm<W> readContextEncodedLmFromArpa(LmReader<ProbBackoffPair, ArpaLmReaderCallback<ProbBackoffPair>> lmReader, WordIndexer<W> wordIndexer, ConfigOptions configOptions) {
        FirstPassCallback<ProbBackoffPair> firstPassArpa = firstPassArpa(lmReader, false);
        return secondPassContextEncoded(configOptions, lmReader, wordIndexer, firstPassArpa, firstPassArpa.getNumNgramsForEachWord());
    }

    public static ArrayEncodedProbBackoffLm<String> readArrayEncodedLmFromArpa(String str, boolean z) {
        return readArrayEncodedLmFromArpa(str, z, new StringWordIndexer());
    }

    public static <W> ArrayEncodedProbBackoffLm<W> readArrayEncodedLmFromArpa(String str, boolean z, WordIndexer<W> wordIndexer) {
        return readArrayEncodedLmFromArpa(str, z, wordIndexer, new ConfigOptions(), Integer.MAX_VALUE);
    }

    public static <W> ArrayEncodedProbBackoffLm<W> readArrayEncodedLmFromArpa(String str, boolean z, WordIndexer<W> wordIndexer, ConfigOptions configOptions, int i) {
        return readArrayEncodedLmFromArpa(new ArpaLmReader(str, wordIndexer, i), z, wordIndexer, configOptions);
    }

    public static <W> ArrayEncodedProbBackoffLm<W> readArrayEncodedLmFromArpa(LmReader<ProbBackoffPair, ArpaLmReaderCallback<ProbBackoffPair>> lmReader, boolean z, WordIndexer<W> wordIndexer, ConfigOptions configOptions) {
        FirstPassCallback<ProbBackoffPair> firstPassArpa = firstPassArpa(lmReader, true);
        return secondPassArrayEncoded(configOptions, lmReader, wordIndexer, firstPassArpa, firstPassArpa.getNumNgramsForEachWord(), true, z);
    }

    public static NgramMapWrapper<String, LongRef> readNgramMapFromGoogleNgramDir(String str, boolean z) {
        return readNgramMapFromGoogleNgramDir(str, z, new StringWordIndexer());
    }

    public static <W> NgramMapWrapper<W, LongRef> readNgramMapFromGoogleNgramDir(String str, boolean z, WordIndexer<W> wordIndexer) {
        StupidBackoffLm stupidBackoffLm = (StupidBackoffLm) readLmFromGoogleNgramDir(str, z, false, wordIndexer, new ConfigOptions());
        return new NgramMapWrapper<>(stupidBackoffLm.getNgramMap(), stupidBackoffLm.getWordIndexer());
    }

    public static NgramMapWrapper<String, LongRef> readNgramMapFromBinary(String str, String str2) {
        return readNgramMapFromBinary(str, str2, new StringWordIndexer());
    }

    public static <W> NgramMapWrapper<W, LongRef> readNgramMapFromBinary(String str, String str2, WordIndexer<W> wordIndexer) {
        GoogleLmReader.addToIndexer(wordIndexer, str2);
        wordIndexer.trimAndLock();
        return new NgramMapWrapper<>((NgramMap) IOUtils.readObjFileHard(str), wordIndexer);
    }

    public static ArrayEncodedNgramLanguageModel<String> readLmFromGoogleNgramDir(String str, boolean z, boolean z2) {
        return readLmFromGoogleNgramDir(str, z, z2, new StringWordIndexer(), new ConfigOptions());
    }

    public static <W> ArrayEncodedNgramLanguageModel<W> readLmFromGoogleNgramDir(String str, boolean z, boolean z2, WordIndexer<W> wordIndexer, ConfigOptions configOptions) {
        if (z2) {
            KneserNeyLmReaderCallback kneserNeyLmReaderCallback = new KneserNeyLmReaderCallback(wordIndexer, 5, configOptions);
            new GoogleLmReader(str, wordIndexer, configOptions).parse((NgramOrderedLmReaderCallback<LongRef>) kneserNeyLmReaderCallback);
            return readArrayEncodedLmFromArpa(kneserNeyLmReaderCallback, z, wordIndexer, configOptions);
        }
        FirstPassCallback<LongRef> firstPassGoogle = firstPassGoogle(str, wordIndexer, configOptions);
        return secondPassGoogle(configOptions, new GoogleLmReader(str, wordIndexer, configOptions), wordIndexer, firstPassGoogle, firstPassGoogle.getNumNgramsForEachWord(), z);
    }

    public static <W> ContextEncodedProbBackoffLm<W> readContextEncodedKneserNeyLmFromTextFile(List<String> list, WordIndexer<W> wordIndexer, int i, ConfigOptions configOptions) {
        return readContextEncodedKneserNeyLmFromTextFile(list, wordIndexer, i, configOptions, getTempFile());
    }

    public static <W> ArrayEncodedProbBackoffLm<W> readKneserNeyLmFromTextFile(List<String> list, WordIndexer<W> wordIndexer, int i, ConfigOptions configOptions, boolean z) {
        return readKneserNeyLmFromTextFile(list, wordIndexer, i, z, configOptions, getTempFile());
    }

    public static <W> ContextEncodedProbBackoffLm<W> readContextEncodedKneserNeyLmFromTextFile(List<String> list, WordIndexer<W> wordIndexer, int i, ConfigOptions configOptions, File file) {
        createKneserNeyLmFromTextFiles(list, wordIndexer, i, file, configOptions);
        return readContextEncodedLmFromArpa(file.getPath(), wordIndexer, configOptions, i);
    }

    public static <W> ArrayEncodedProbBackoffLm<W> readKneserNeyLmFromTextFile(List<String> list, WordIndexer<W> wordIndexer, int i, boolean z, ConfigOptions configOptions, File file) {
        createKneserNeyLmFromTextFiles(list, wordIndexer, i, file, configOptions);
        return readArrayEncodedLmFromArpa(file.getPath(), z, wordIndexer, configOptions, i);
    }

    public static <W> void createKneserNeyLmFromTextFiles(List<String> list, WordIndexer<W> wordIndexer, int i, File file, ConfigOptions configOptions) {
        TextReader textReader = new TextReader(list, (WordIndexer) wordIndexer);
        KneserNeyLmReaderCallback kneserNeyLmReaderCallback = new KneserNeyLmReaderCallback(wordIndexer, i, configOptions);
        textReader.parse(kneserNeyLmReaderCallback);
        kneserNeyLmReaderCallback.parse((ArpaLmReaderCallback<ProbBackoffPair>) new KneserNeyFileWritingLmReaderCallback(file, wordIndexer));
    }

    public static StupidBackoffLm<String> readGoogleLmBinary(String str, String str2) {
        return readGoogleLmBinary(str, new StringWordIndexer(), str2);
    }

    public static <W> StupidBackoffLm<W> readGoogleLmBinary(String str, WordIndexer<W> wordIndexer, String str2) {
        GoogleLmReader.addToIndexer(wordIndexer, str2);
        wordIndexer.trimAndLock();
        NgramMap ngramMap = (NgramMap) IOUtils.readObjFileHard(str);
        return new StupidBackoffLm<>(ngramMap.getMaxNgramOrder(), wordIndexer, ngramMap, new ConfigOptions());
    }

    public static <W> NgramLanguageModel<W> readLmBinary(String str) {
        return (NgramLanguageModel) IOUtils.readObjFileHard(str);
    }

    public static <W> void writeLmBinary(NgramLanguageModel<W> ngramLanguageModel, String str) {
        IOUtils.writeObjFileHard(str, ngramLanguageModel);
    }

    private static File getTempFile() {
        try {
            File createTempFile = File.createTempFile("berkeleylm", "arpa");
            createTempFile.deleteOnExit();
            return createTempFile;
        } catch (IOException e) {
            throw new RuntimeException(e);
        }
    }

    private static <W> ContextEncodedProbBackoffLm<W> secondPassContextEncoded(ConfigOptions configOptions, LmReader<ProbBackoffPair, ArpaLmReaderCallback<ProbBackoffPair>> lmReader, WordIndexer<W> wordIndexer, FirstPassCallback<ProbBackoffPair> firstPassCallback, LongArray[] longArrayArr) {
        NgramMap<ProbBackoffPair> buildMapArpa = buildMapArpa(configOptions, lmReader, wordIndexer, firstPassCallback, longArrayArr, true, false, false);
        return new ContextEncodedProbBackoffLm<>(buildMapArpa.getMaxNgramOrder(), wordIndexer, (ContextEncodedNgramMap) buildMapArpa, configOptions);
    }

    private static <W> ArrayEncodedProbBackoffLm<W> secondPassArrayEncoded(ConfigOptions configOptions, LmReader<ProbBackoffPair, ArpaLmReaderCallback<ProbBackoffPair>> lmReader, WordIndexer<W> wordIndexer, FirstPassCallback<ProbBackoffPair> firstPassCallback, LongArray[] longArrayArr, boolean z, boolean z2) {
        NgramMap<ProbBackoffPair> buildMapArpa = buildMapArpa(configOptions, lmReader, wordIndexer, firstPassCallback, longArrayArr, false, z, z2);
        return new ArrayEncodedProbBackoffLm<>(buildMapArpa.getMaxNgramOrder(), wordIndexer, buildMapArpa, configOptions);
    }

    private static <W> StupidBackoffLm<W> secondPassGoogle(ConfigOptions configOptions, LmReader<LongRef, NgramOrderedLmReaderCallback<LongRef>> lmReader, WordIndexer<W> wordIndexer, FirstPassCallback<LongRef> firstPassCallback, LongArray[] longArrayArr, boolean z) {
        return new StupidBackoffLm<>(longArrayArr.length, wordIndexer, buildMapCommon(configOptions, wordIndexer, longArrayArr, firstPassCallback.getNumNgramsForEachOrder(), true, lmReader, new CountValueContainer(firstPassCallback.getValueCounter(), configOptions.valueRadix, false, new long[longArrayArr.length]), z), configOptions);
    }

    private static <W> NgramMap<ProbBackoffPair> buildMapArpa(ConfigOptions configOptions, LmReader<ProbBackoffPair, ArpaLmReaderCallback<ProbBackoffPair>> lmReader, WordIndexer<W> wordIndexer, FirstPassCallback<ProbBackoffPair> firstPassCallback, LongArray[] longArrayArr, boolean z, boolean z2, boolean z3) {
        ValueContainer compressibleProbBackoffValueContainer = z3 ? new CompressibleProbBackoffValueContainer(firstPassCallback.getValueCounter(), configOptions.valueRadix, z, firstPassCallback.getNumNgramsForEachOrder()) : configOptions.storeRankedProbBackoffs ? new UncompressedProbBackoffValueContainer(firstPassCallback.getValueCounter(), configOptions.valueRadix, z, firstPassCallback.getNumNgramsForEachOrder()) : new UnrankedUncompressedProbBackoffValueContainer(z, firstPassCallback.getNumNgramsForEachOrder());
        if (z && z3) {
            throw new RuntimeException("Compression is not supported by context-encoded LMs");
        }
        return buildMapCommon(configOptions, wordIndexer, longArrayArr, firstPassCallback.getNumNgramsForEachOrder(), z2, lmReader, compressibleProbBackoffValueContainer, z3);
    }

    private static <W, V extends Comparable<V>> NgramMap<V> buildMapCommon(ConfigOptions configOptions, WordIndexer<W> wordIndexer, LongArray[] longArrayArr, long[] jArr, boolean z, LmReader<V, ? super NgramMapAddingCallback<V>> lmReader, ValueContainer<V> valueContainer, boolean z2) {
        Logger.startTrack("Adding n-grams", new Object[0]);
        AbstractNgramMap createNgramMap = createNgramMap(configOptions, longArrayArr, jArr, z, valueContainer, z2);
        List<int[]> tryBuildingNgramMap = tryBuildingNgramMap(configOptions, wordIndexer, lmReader, createNgramMap);
        Logger.endTrack();
        if (!tryBuildingNgramMap.isEmpty()) {
            Logger.startTrack(tryBuildingNgramMap.size() + " missing suffixes or prefixes were found, doing another pass to add n-grams", new Object[0]);
            for (int[] iArr : tryBuildingNgramMap) {
                int length = iArr.length - 1;
                int i = iArr[z ? 0 : length];
                jArr[length] = jArr[length] + 1;
                longArrayArr[length].incrementCount(i, 1L);
            }
            for (int i2 = 0; i2 < jArr.length; i2++) {
                valueContainer.clearStorageForOrder(i2);
            }
            ValueContainer<V> createFreshValues2 = valueContainer.createFreshValues2(jArr);
            createNgramMap.clearStorage();
            createNgramMap = createNgramMap(configOptions, longArrayArr, jArr, z, createFreshValues2, z2);
            lmReader.parse(new NgramMapAddingCallback(createNgramMap, tryBuildingNgramMap));
            Logger.endTrack();
        }
        return createNgramMap;
    }

    private static <V, W> List<int[]> tryBuildingNgramMap(ConfigOptions configOptions, WordIndexer<W> wordIndexer, LmReader<V, ? super NgramMapAddingCallback<V>> lmReader, NgramMap<V> ngramMap) {
        NgramMapAddingCallback ngramMapAddingCallback = new NgramMapAddingCallback(ngramMap, null);
        lmReader.parse(ngramMapAddingCallback);
        if (configOptions.lockIndexer) {
            wordIndexer.trimAndLock();
        }
        return ngramMapAddingCallback.getFailures();
    }

    private static <V> AbstractNgramMap<V> createNgramMap(ConfigOptions configOptions, LongArray[] longArrayArr, long[] jArr, boolean z, ValueContainer<V> valueContainer, boolean z2) {
        return z2 ? new CompressedNgramMap((CompressibleValueContainer) valueContainer, jArr, configOptions) : HashNgramMap.createImplicitWordHashNgramMap(valueContainer, configOptions, longArrayArr, z);
    }

    private static <W> FirstPassCallback<ProbBackoffPair> firstPassArpa(LmReader<ProbBackoffPair, ArpaLmReaderCallback<ProbBackoffPair>> lmReader, boolean z) {
        return firstPassCommon(lmReader, z);
    }

    private static <W> FirstPassCallback<LongRef> firstPassGoogle(String str, WordIndexer<W> wordIndexer, ConfigOptions configOptions) {
        return firstPassCommon(new GoogleLmReader(str, wordIndexer, configOptions), true);
    }

    private static <V extends LongRepresentable<V>> FirstPassCallback<V> firstPassCommon(LmReader<V, ? super FirstPassCallback<V>> lmReader, boolean z) {
        Logger.startTrack("Counting values", new Object[0]);
        FirstPassCallback<V> firstPassCallback = new FirstPassCallback<>(z);
        lmReader.parse(firstPassCallback);
        Logger.endTrack();
        return firstPassCallback;
    }
}
