package edu.berkeley.nlp.lm.io;

import edu.berkeley.nlp.lm.WordIndexer;
import edu.berkeley.nlp.lm.util.Logger;
import edu.berkeley.nlp.lm.values.ProbBackoffPair;
import java.io.BufferedReader;
import java.io.IOException;
import java.util.ArrayList;
import java.util.List;
import org.apache.commons.compress.compressors.bzip2.BZip2Constants;

/* loaded from: input_file:edu/berkeley/nlp/lm/io/ArpaLmReader.class */
public class ArpaLmReader<W> implements LmReader<ProbBackoffPair, ArpaLmReaderCallback<ProbBackoffPair>> {
    public static final String START_SYMBOL = "<s>";
    public static final String END_SYMBOL = "</s>";
    public static final String UNK_SYMBOL = "<unk>";
    private BufferedReader reader;
    private int currentNGramLength = 1;
    int currentNGramCount = 0;
    private int lineNumber = 1;
    private final WordIndexer<W> wordIndexer;
    private final int maxOrder;
    private final String file;
    static final /* synthetic */ boolean $assertionsDisabled;

    protected String readLine() throws IOException {
        this.lineNumber++;
        return this.reader.readLine();
    }

    public ArpaLmReader(String str, WordIndexer<W> wordIndexer, int i) {
        this.file = str;
        this.wordIndexer = wordIndexer;
        this.maxOrder = i;
    }

    @Override // edu.berkeley.nlp.lm.io.LmReader
    public void parse(ArpaLmReaderCallback<ProbBackoffPair> arpaLmReaderCallback) {
        this.currentNGramLength = 1;
        this.currentNGramCount = 0;
        this.lineNumber = 1;
        this.reader = IOUtils.openInHard(this.file);
        Logger.startTrack("Parsing ARPA language model file", new Object[0]);
        arpaLmReaderCallback.initWithLengths(parseHeader());
        parseNGrams(arpaLmReaderCallback);
        Logger.endTrack();
        arpaLmReaderCallback.cleanup();
        this.wordIndexer.setStartSymbol(this.wordIndexer.getWord(this.wordIndexer.getOrAddIndexFromString(START_SYMBOL)));
        this.wordIndexer.setEndSymbol(this.wordIndexer.getWord(this.wordIndexer.getOrAddIndexFromString(END_SYMBOL)));
        this.wordIndexer.setUnkSymbol(this.wordIndexer.getWord(this.wordIndexer.getOrAddIndexFromString(UNK_SYMBOL)));
    }

    protected List<Long> parseHeader() {
        String readLine;
        ArrayList arrayList = new ArrayList();
        do {
            try {
                readLine = readLine();
                if (readLine == null) {
                    throw new RuntimeException("Something wrong with I/O.");
                }
                if (readLine.startsWith("ngram ")) {
                    int indexOf = readLine.indexOf(61);
                    if (!$assertionsDisabled && indexOf < 0) {
                        throw new AssertionError();
                    }
                    long parseLong = Long.parseLong(readLine.substring(indexOf + 1));
                    if (arrayList.size() < this.maxOrder) {
                        arrayList.add(Long.valueOf(parseLong));
                    }
                }
            } catch (IOException e) {
                throw new RuntimeException(e);
            } catch (NumberFormatException e2) {
                throw new RuntimeException(e2);
            }
        } while (!readLine.contains("\\1-grams:"));
        return arrayList;
    }

    protected void parseNGrams(ArpaLmReaderCallback<ProbBackoffPair> arpaLmReaderCallback) {
        int i = 0;
        Logger.startTrack("Reading 1-grams", new Object[0]);
        arpaLmReaderCallback.handleNgramOrderStarted(this.currentNGramLength);
        try {
            int[] iArr = new int[this.currentNGramLength];
            while (true) {
                String readLine = this.reader.readLine();
                if (readLine == null) {
                    this.reader.close();
                    Logger.endTrack();
                    arpaLmReaderCallback.handleNgramOrderFinished(this.currentNGramLength);
                    return;
                }
                if (i % BZip2Constants.BASEBLOCKSIZE == 0) {
                    Logger.logs("Read " + i + " lines");
                }
                i++;
                if (readLine.length() != 0) {
                    if (readLine.charAt(0) != '\\') {
                        parseLine(arpaLmReaderCallback, readLine, iArr);
                    } else if (readLine.startsWith("\\end")) {
                        continue;
                    } else {
                        Logger.logs(this.currentNGramCount + " " + this.currentNGramLength + "-gram read.");
                        Logger.endTrack();
                        arpaLmReaderCallback.handleNgramOrderFinished(this.currentNGramLength);
                        this.currentNGramLength++;
                        if (this.currentNGramLength > this.maxOrder) {
                            return;
                        }
                        iArr = new int[this.currentNGramLength];
                        this.currentNGramCount = 0;
                        arpaLmReaderCallback.handleNgramOrderStarted(this.currentNGramLength);
                        Logger.startTrack("Reading " + this.currentNGramLength + "-grams", new Object[0]);
                    }
                }
            }
        } catch (IOException e) {
            throw new RuntimeException(e);
        }
    }

    private void parseLine(ArpaLmReaderCallback<ProbBackoffPair> arpaLmReaderCallback, String str, int[] iArr) {
        int indexOf = str.indexOf(9);
        int indexOf2 = str.indexOf(9, indexOf + 1);
        boolean z = indexOf2 >= 0;
        int length = str.length();
        parseNGram(str, indexOf + 1, indexOf2 < 0 ? length : indexOf2, iArr);
        float parseFloat = Float.parseFloat(str.substring(0, indexOf));
        float f = 0.0f;
        if (z) {
            f = Float.parseFloat(str.substring(indexOf2 + 1, length));
        }
        if (parseFloat > 0.0d) {
            throw new RuntimeException("Bad ARPA line " + str);
        }
        arpaLmReaderCallback.call(iArr, 0, iArr.length, new ProbBackoffPair(parseFloat, f), str);
        this.currentNGramCount++;
    }

    private void parseNGram(String str, int i, int i2, int[] iArr) {
        int i3 = 0;
        int i4 = i;
        while (true) {
            int i5 = i4;
            int indexOf = str.indexOf(32, i5);
            int i6 = i3;
            i3++;
            iArr[i6] = this.wordIndexer.getOrAddIndexFromString(str.substring(i5, indexOf < 0 ? i2 : indexOf));
            if (indexOf < 0) {
                return;
            } else {
                i4 = indexOf + 1;
            }
        }
    }

    static {
        $assertionsDisabled = !ArpaLmReader.class.desiredAssertionStatus();
    }
}
