package org.apache.joshua.decoder.ff.tm.hash_based;

import cern.colt.matrix.impl.AbstractFormatter;
import java.io.BufferedWriter;
import java.io.FileOutputStream;
import java.io.IOException;
import java.io.OutputStreamWriter;
import java.util.ArrayList;
import java.util.HashMap;
import java.util.Iterator;
import java.util.List;
import org.apache.joshua.corpus.Vocabulary;
import org.apache.joshua.decoder.JoshuaConfiguration;
import org.apache.joshua.decoder.ff.FeatureFunction;
import org.apache.joshua.decoder.ff.tm.AbstractGrammar;
import org.apache.joshua.decoder.ff.tm.GrammarReader;
import org.apache.joshua.decoder.ff.tm.OwnerMap;
import org.apache.joshua.decoder.ff.tm.Rule;
import org.apache.joshua.decoder.ff.tm.Trie;
import org.apache.joshua.decoder.ff.tm.format.HieroFormatReader;
import org.apache.joshua.decoder.ff.tm.format.MosesFormatReader;
import org.apache.joshua.util.FormatUtils;
import org.apache.lucene.util.IOUtils;
import org.slf4j.Logger;
import org.slf4j.LoggerFactory;

/* loaded from: input_file:joshua-incubating-6.1.jar:org/apache/joshua/decoder/ff/tm/hash_based/MemoryBasedBatchGrammar.class */
public class MemoryBasedBatchGrammar extends AbstractGrammar {
    private static final Logger LOG = LoggerFactory.getLogger(MemoryBasedBatchGrammar.class);
    private int qtyRulesRead;
    private int qtyRuleBins;
    private int numDenseFeatures;
    private final MemoryBasedTrie root;
    private String grammarFile;
    private GrammarReader<Rule> modelReader;

    public MemoryBasedBatchGrammar(String str, JoshuaConfiguration joshuaConfiguration, int i) {
        this(null, str, joshuaConfiguration, i);
    }

    public MemoryBasedBatchGrammar(String str, String str2, JoshuaConfiguration joshuaConfiguration, int i) {
        super(str2, joshuaConfiguration, i);
        this.qtyRulesRead = 0;
        this.qtyRuleBins = 0;
        this.numDenseFeatures = 0;
        this.root = new MemoryBasedTrie();
    }

    public MemoryBasedBatchGrammar(GrammarReader<Rule> grammarReader, JoshuaConfiguration joshuaConfiguration, int i) {
        super(OwnerMap.UNKNOWN_OWNER, joshuaConfiguration, i);
        this.qtyRulesRead = 0;
        this.qtyRuleBins = 0;
        this.numDenseFeatures = 0;
        this.root = new MemoryBasedTrie();
        this.modelReader = grammarReader;
    }

    public MemoryBasedBatchGrammar(String str, String str2, String str3, String str4, int i, JoshuaConfiguration joshuaConfiguration) throws IOException {
        super(str3, joshuaConfiguration, i);
        this.qtyRulesRead = 0;
        this.qtyRuleBins = 0;
        this.numDenseFeatures = 0;
        this.root = new MemoryBasedTrie();
        Vocabulary.id(str4);
        this.grammarFile = str2;
        try {
            this.modelReader = createReader(str, str2);
        } catch (IOException e) {
            LOG.warn("Couldn't load a '{}' type grammar from file '{}'", str, str2);
        }
        if (this.modelReader != null) {
            Iterator<Rule> it = this.modelReader.iterator();
            while (it.hasNext()) {
                Rule next = it.next();
                if (next != null) {
                    addRule(next);
                }
            }
        } else {
            LOG.info("Couldn't create a GrammarReader for file {} with format {}", str2, str);
        }
        LOG.info("MemoryBasedBatchGrammar: Read {} rules with {} distinct source sides from '{}'", new Object[]{Integer.valueOf(this.qtyRulesRead), Integer.valueOf(this.qtyRuleBins), str2});
    }

    protected GrammarReader<Rule> createReader(String str, String str2) throws IOException {
        if (str2 == null) {
            return null;
        }
        if ("hiero".equals(str) || "thrax".equals(str) || "phrase".equals(str)) {
            return new HieroFormatReader(str2);
        }
        if ("moses".equals(str)) {
            return new MosesFormatReader(str2);
        }
        throw new RuntimeException(String.format("* FATAL: unknown grammar format '%s'", str));
    }

    @Override // org.apache.joshua.decoder.ff.tm.Grammar
    public int getNumRules() {
        return this.qtyRulesRead;
    }

    @Override // org.apache.joshua.decoder.ff.tm.Grammar
    public boolean hasRuleForSpan(int i, int i2, int i3) {
        return this.spanLimit == -1 ? i == 0 : i3 <= this.spanLimit;
    }

    @Override // org.apache.joshua.decoder.ff.tm.Grammar
    public Trie getTrieRoot() {
        return this.root;
    }

    @Override // org.apache.joshua.decoder.ff.tm.Grammar
    public void addRule(Rule rule) {
        this.qtyRulesRead++;
        rule.setOwner(this.owner);
        if (this.numDenseFeatures == 0) {
            this.numDenseFeatures = rule.getFeatureVector().getDenseFeatures().size();
        }
        MemoryBasedTrie memoryBasedTrie = this.root;
        int[] french = rule.getFrench();
        this.maxSourcePhraseLength = Math.max(this.maxSourcePhraseLength, french.length);
        for (int i : french) {
            MemoryBasedTrie memoryBasedTrie2 = (MemoryBasedTrie) memoryBasedTrie.match(i);
            if (null == memoryBasedTrie2) {
                memoryBasedTrie2 = new MemoryBasedTrie();
                if (!memoryBasedTrie.hasExtensions()) {
                    memoryBasedTrie.childrenTbl = new HashMap<>();
                }
                memoryBasedTrie.childrenTbl.put(Integer.valueOf(i), memoryBasedTrie2);
            }
            memoryBasedTrie = memoryBasedTrie2;
        }
        if (!memoryBasedTrie.hasRules()) {
            memoryBasedTrie.ruleBin = new MemoryBasedRuleBin(rule.getArity(), rule.getFrench());
            this.qtyRuleBins++;
        }
        memoryBasedTrie.ruleBin.addRule(rule);
    }

    @Override // org.apache.joshua.decoder.ff.tm.Grammar
    public void addOOVRules(int i, List<FeatureFunction> list) {
        int[] iArr = {i};
        int[] iArr2 = {this.joshuaConfiguration.mark_oovs ? Vocabulary.id(Vocabulary.word(i) + "_OOV") : i};
        if (this.joshuaConfiguration.oovList == null || this.joshuaConfiguration.oovList.size() == 0) {
            Rule rule = new Rule(Vocabulary.id(this.joshuaConfiguration.default_non_terminal), iArr, iArr2, "", 0, "0-0");
            addRule(rule);
            rule.estimateRuleCost(list);
        } else {
            Iterator<JoshuaConfiguration.OOVItem> it = this.joshuaConfiguration.oovList.iterator();
            while (it.hasNext()) {
                Rule rule2 = new Rule(Vocabulary.id(it.next().label), iArr, iArr2, "", 0, "0-0");
                addRule(rule2);
                rule2.estimateRuleCost(list);
            }
        }
    }

    @Override // org.apache.joshua.decoder.ff.tm.Grammar
    public void save() {
        LOG.info("Saving custom grammar to file '{}'", this.grammarFile);
        try {
            BufferedWriter bufferedWriter = new BufferedWriter(new OutputStreamWriter(new FileOutputStream(this.grammarFile), IOUtils.UTF_8));
            ArrayList arrayList = new ArrayList();
            arrayList.add(this.root);
            while (arrayList.size() > 0) {
                Trie trie = (Trie) arrayList.remove(0);
                if (trie.hasRules()) {
                    for (Rule rule : trie.getRuleCollection().getRules()) {
                        try {
                            LOG.info("  rule: {}", rule.textFormat());
                            bufferedWriter.write(rule.textFormat() + AbstractFormatter.DEFAULT_ROW_SEPARATOR);
                        } catch (IOException e) {
                            e.printStackTrace();
                            return;
                        }
                    }
                }
                if (trie.hasExtensions()) {
                    arrayList.addAll(trie.getExtensions());
                }
            }
            bufferedWriter.close();
        } catch (IOException e2) {
            e2.printStackTrace();
        }
    }

    public void addGlueRules(ArrayList<FeatureFunction> arrayList) {
        HieroFormatReader hieroFormatReader = new HieroFormatReader();
        String cleanNonTerminal = FormatUtils.cleanNonTerminal(this.joshuaConfiguration.goal_symbol);
        String cleanNonTerminal2 = FormatUtils.cleanNonTerminal(this.joshuaConfiguration.default_non_terminal);
        for (String str : new String[]{String.format("[%s] ||| %s ||| %s ||| 0", cleanNonTerminal, "<s>", "<s>"), String.format("[%s] ||| [%s,1] [%s,2] ||| [%s,1] [%s,2] ||| -1", cleanNonTerminal, cleanNonTerminal, cleanNonTerminal2, cleanNonTerminal, cleanNonTerminal2), String.format("[%s] ||| [%s,1] %s ||| [%s,1] %s ||| 0", cleanNonTerminal, cleanNonTerminal, "</s>", cleanNonTerminal, "</s>")}) {
            Rule parseLine = hieroFormatReader.parseLine(str);
            addRule(parseLine);
            parseLine.estimateRuleCost(arrayList);
        }
    }

    @Override // org.apache.joshua.decoder.ff.tm.Grammar
    public int getNumDenseFeatures() {
        return this.numDenseFeatures;
    }
}
