package opennlp.tools.namefind;

import java.io.ByteArrayInputStream;
import java.io.IOException;
import java.util.ArrayList;
import java.util.Collections;
import java.util.HashMap;
import java.util.Iterator;
import java.util.List;
import java.util.Map;
import java.util.regex.Matcher;
import java.util.regex.Pattern;
import opennlp.tools.ml.BeamSearch;
import opennlp.tools.ml.TrainerFactory;
import opennlp.tools.ml.model.MaxentModel;
import opennlp.tools.ml.model.SequenceClassificationModel;
import opennlp.tools.ml.perceptron.PerceptronTrainer;
import opennlp.tools.util.ObjectStream;
import opennlp.tools.util.Sequence;
import opennlp.tools.util.SequenceCodec;
import opennlp.tools.util.SequenceValidator;
import opennlp.tools.util.Span;
import opennlp.tools.util.TrainingParameters;
import opennlp.tools.util.featuregen.AdaptiveFeatureGenerator;
import opennlp.tools.util.featuregen.AdditionalContextFeatureGenerator;
import opennlp.tools.util.featuregen.GeneratorFactory;
import opennlp.tools.util.featuregen.WindowFeatureGenerator;

/* loaded from: input_file:opennlp/tools/namefind/NameFinderME.class */
public class NameFinderME implements TokenNameFinder {
    public static final int DEFAULT_BEAM_SIZE = 3;
    public static final String START = "start";
    public static final String CONTINUE = "cont";
    public static final String OTHER = "other";
    private SequenceCodec<String> seqCodec;
    protected SequenceClassificationModel<String> model;
    protected NameContextGenerator contextGenerator;
    private Sequence bestSequence;
    private AdditionalContextFeatureGenerator additionalContextFeatureGenerator = new AdditionalContextFeatureGenerator();
    private SequenceValidator<String> sequenceValidator;
    private static String[][] EMPTY = new String[0][0];
    private static final Pattern typedOutcomePattern = Pattern.compile("(.+)-\\w+");

    public NameFinderME(TokenNameFinderModel tokenNameFinderModel) {
        this.seqCodec = new BioCodec();
        TokenNameFinderFactory factory = tokenNameFinderModel.getFactory();
        this.seqCodec = factory.createSequenceCodec();
        this.sequenceValidator = this.seqCodec.createSequenceValidator2();
        this.model = tokenNameFinderModel.getNameFinderSequenceModel();
        this.contextGenerator = factory.createContextGenerator();
        this.contextGenerator.addFeatureGenerator(new WindowFeatureGenerator(this.additionalContextFeatureGenerator, 8, 8));
    }

    private static AdaptiveFeatureGenerator createFeatureGenerator(byte[] bArr, Map<String, Object> map) throws IOException {
        return bArr != null ? GeneratorFactory.create(new ByteArrayInputStream(bArr), str -> {
            if (map != null) {
                return map.get(str);
            }
            return null;
        }) : null;
    }

    @Override // opennlp.tools.namefind.TokenNameFinder
    public Span[] find(String[] strArr) {
        return find(strArr, EMPTY);
    }

    public Span[] find(String[] strArr, String[][] strArr2) {
        this.additionalContextFeatureGenerator.setCurrentContext(strArr2);
        this.bestSequence = this.model.bestSequence(strArr, strArr2, this.contextGenerator, this.sequenceValidator);
        List<String> outcomes = this.bestSequence.getOutcomes();
        this.contextGenerator.updateAdaptiveData(strArr, (String[]) outcomes.toArray(new String[outcomes.size()]));
        return setProbs(this.seqCodec.decode(outcomes));
    }

    @Override // opennlp.tools.namefind.TokenNameFinder
    public void clearAdaptiveData() {
        this.contextGenerator.clearAdaptiveData();
    }

    public void probs(double[] dArr) {
        this.bestSequence.getProbs(dArr);
    }

    public double[] probs() {
        return this.bestSequence.getProbs();
    }

    private Span[] setProbs(Span[] spanArr) {
        double[] probs = probs(spanArr);
        if (probs != null) {
            for (int i = 0; i < probs.length; i++) {
                spanArr[i] = new Span(spanArr[i], probs[i]);
            }
        }
        return spanArr;
    }

    public double[] probs(Span[] spanArr) {
        double[] dArr = new double[spanArr.length];
        double[] probs = this.bestSequence.getProbs();
        for (int i = 0; i < spanArr.length; i++) {
            double d = 0.0d;
            for (int start = spanArr[i].getStart(); start < spanArr[i].getEnd(); start++) {
                d += probs[start];
            }
            dArr[i] = d / spanArr[i].length();
        }
        return dArr;
    }

    public static TokenNameFinderModel train(String str, String str2, ObjectStream<NameSample> objectStream, TrainingParameters trainingParameters, TokenNameFinderFactory tokenNameFinderFactory) throws IOException {
        trainingParameters.putIfAbsent("Algorithm", PerceptronTrainer.PERCEPTRON_VALUE);
        trainingParameters.putIfAbsent("Cutoff", 0);
        trainingParameters.putIfAbsent("Iterations", 300);
        int intParameter = trainingParameters.getIntParameter(BeamSearch.BEAM_SIZE_PARAMETER, 3);
        HashMap hashMap = new HashMap();
        MaxentModel maxentModel = null;
        SequenceClassificationModel<String> sequenceClassificationModel = null;
        TrainerFactory.TrainerType trainerType = TrainerFactory.getTrainerType(trainingParameters);
        if (TrainerFactory.TrainerType.EVENT_MODEL_TRAINER.equals(trainerType)) {
            maxentModel = TrainerFactory.getEventTrainer(trainingParameters, hashMap).train(new NameFinderEventStream(objectStream, str2, tokenNameFinderFactory.createContextGenerator(), tokenNameFinderFactory.createSequenceCodec()));
        } else if (TrainerFactory.TrainerType.EVENT_MODEL_SEQUENCE_TRAINER.equals(trainerType)) {
            maxentModel = TrainerFactory.getEventModelSequenceTrainer(trainingParameters, hashMap).train(new NameSampleSequenceStream(objectStream, tokenNameFinderFactory.createContextGenerator()));
        } else {
            if (!TrainerFactory.TrainerType.SEQUENCE_TRAINER.equals(trainerType)) {
                throw new IllegalStateException("Unexpected trainer type!");
            }
            sequenceClassificationModel = TrainerFactory.getSequenceModelTrainer(trainingParameters, hashMap).train(new NameSampleSequenceStream(objectStream, tokenNameFinderFactory.createContextGenerator(), false));
        }
        return sequenceClassificationModel != null ? new TokenNameFinderModel(str, sequenceClassificationModel, tokenNameFinderFactory.getFeatureGenerator(), tokenNameFinderFactory.getResources(), hashMap, tokenNameFinderFactory.getSequenceCodec(), tokenNameFinderFactory) : new TokenNameFinderModel(str, maxentModel, intParameter, tokenNameFinderFactory.getFeatureGenerator(), tokenNameFinderFactory.getResources(), hashMap, tokenNameFinderFactory.getSequenceCodec(), tokenNameFinderFactory);
    }

    /* JADX INFO: Access modifiers changed from: package-private */
    public static String extractNameType(String str) {
        Matcher matcher = typedOutcomePattern.matcher(str);
        if (matcher.matches()) {
            return matcher.group(1);
        }
        return null;
    }

    public static Span[] dropOverlappingSpans(Span[] spanArr) {
        ArrayList arrayList = new ArrayList(spanArr.length);
        Collections.addAll(arrayList, spanArr);
        Collections.sort(arrayList);
        Iterator it = arrayList.iterator();
        Span span = null;
        while (true) {
            Span span2 = span;
            if (!it.hasNext()) {
                return (Span[]) arrayList.toArray(new Span[arrayList.size()]);
            }
            Span span3 = (Span) it.next();
            if (span2 != null && span2.intersects(span3)) {
                it.remove();
                span3 = span2;
            }
            span = span3;
        }
    }
}
