package com.johnsnowlabs.nlp.training;

import com.johnsnowlabs.ml.crf.CrfDataset;
import com.johnsnowlabs.ml.crf.DatasetMetadata;
import com.johnsnowlabs.ml.crf.TextSentenceLabels;
import com.johnsnowlabs.nlp.annotators.common.TaggedSentence;
import com.johnsnowlabs.nlp.annotators.common.WordpieceEmbeddingsSentence;
import com.johnsnowlabs.nlp.annotators.ner.crf.DictionaryFeatures$;
import com.johnsnowlabs.nlp.annotators.ner.crf.FeatureGenerator;
import com.johnsnowlabs.nlp.embeddings.WordEmbeddingsBinaryIndexer$;
import com.johnsnowlabs.nlp.embeddings.WordEmbeddingsReader;
import com.johnsnowlabs.nlp.embeddings.WordEmbeddingsTextIndexer$;
import com.johnsnowlabs.nlp.embeddings.WordEmbeddingsWriter;
import com.johnsnowlabs.nlp.util.io.ExternalResource;
import com.johnsnowlabs.nlp.util.io.ReadAs$;
import com.johnsnowlabs.storage.RocksDBConnection;
import com.johnsnowlabs.storage.RocksDBConnection$;
import java.io.File;
import scala.Enumeration;
import scala.MatchError;
import scala.None$;
import scala.Option;
import scala.Predef$;
import scala.Tuple3;
import scala.collection.GenIterable;
import scala.collection.Seq;
import scala.collection.Seq$;
import scala.collection.TraversableLike;
import scala.collection.mutable.StringBuilder;
import scala.reflect.ScalaSignature;
import scala.runtime.BoxedUnit;

/* compiled from: CoNLL2003NerReader.scala */
@ScalaSignature(bytes = "\u0006\u0001\u00055e\u0001B\u0001\u0003\u0001-\u0011!cQ8O\u00192\u0013\u0004\u0007M\u001aOKJ\u0014V-\u00193fe*\u00111\u0001B\u0001\tiJ\f\u0017N\\5oO*\u0011QAB\u0001\u0004]2\u0004(BA\u0004\t\u00031Qw\u000e\u001b8t]><H.\u00192t\u0015\u0005I\u0011aA2p[\u000e\u00011C\u0001\u0001\r!\ti\u0001#D\u0001\u000f\u0015\u0005y\u0011!B:dC2\f\u0017BA\t\u000f\u0005\u0019\te.\u001f*fM\"A1\u0003\u0001B\u0001B\u0003%A#\u0001\nx_J$W)\u001c2fI\u0012LgnZ:GS2,\u0007CA\u000b\u0019\u001d\tia#\u0003\u0002\u0018\u001d\u00051\u0001K]3eK\u001aL!!\u0007\u000e\u0003\rM#(/\u001b8h\u0015\t9b\u0002\u0003\u0005\u001d\u0001\t\u0005\t\u0015!\u0003\u001e\u0003M9xN\u001d3F[\n,G\rZ5oONtE)[7t!\tia$\u0003\u0002 \u001d\t\u0019\u0011J\u001c;\t\u0011\u0005\u0002!\u0011!Q\u0001\n\t\n\u0011B\\8s[\u0006d\u0017N_3\u0011\u00055\u0019\u0013B\u0001\u0013\u000f\u0005\u001d\u0011un\u001c7fC:D\u0001B\n\u0001\u0003\u0002\u0003\u0006IaJ\u0001\u0011K6\u0014W\r\u001a3j]\u001e\u001chi\u001c:nCR\u0004\"\u0001\u000b\u0019\u000f\u0005%rS\"\u0001\u0016\u000b\u0005-b\u0013AA5p\u0015\tiC!\u0001\u0003vi&d\u0017BA\u0018+\u0003\u0019\u0011V-\u00193Bg&\u0011\u0011G\r\u0002\u0006-\u0006dW/Z\u0005\u0003g9\u00111\"\u00128v[\u0016\u0014\u0018\r^5p]\"AQ\u0007\u0001B\u0001B\u0003%a'\u0001\u000eq_N\u001c\u0018N\u00197f\u000bb$XM\u001d8bY\u0012K7\r^5p]\u0006\u0014\u0018\u0010E\u0002\u000eoeJ!\u0001\u000f\b\u0003\r=\u0003H/[8o!\tI#(\u0003\u0002<U\t\u0001R\t\u001f;fe:\fGNU3t_V\u00148-\u001a\u0005\u0006{\u0001!\tAP\u0001\u0007y%t\u0017\u000e\u001e \u0015\r}\n%i\u0011#F!\t\u0001\u0005!D\u0001\u0003\u0011\u0015\u0019B\b1\u0001\u0015\u0011\u0015aB\b1\u0001\u001e\u0011\u0015\tC\b1\u0001#\u0011\u00151C\b1\u0001(\u0011\u0015)D\b1\u00017\u0011\u001d9\u0005A1A\u0005\n!\u000b\u0011B\\3s%\u0016\fG-\u001a:\u0016\u0003%\u0003\"\u0001\u0011&\n\u0005-\u0013!!B\"p\u001d2c\u0005BB'\u0001A\u0003%\u0011*\u0001\u0006oKJ\u0014V-\u00193fe\u0002B\u0011b\u0014\u0001A\u0002\u0003\u0007I\u0011\u0002)\u0002\u001d]|'\u000fZ#nE\u0016$G-\u001b8hgV\t\u0011\u000b\u0005\u0002S+6\t1K\u0003\u0002U\t\u0005QQ-\u001c2fI\u0012LgnZ:\n\u0005Y\u001b&\u0001F,pe\u0012,UNY3eI&twm\u001d*fC\u0012,'\u000fC\u0005Y\u0001\u0001\u0007\t\u0019!C\u00053\u0006\u0011ro\u001c:e\u000b6\u0014W\r\u001a3j]\u001e\u001cx\fJ3r)\tQV\f\u0005\u0002\u000e7&\u0011AL\u0004\u0002\u0005+:LG\u000fC\u0004_/\u0006\u0005\t\u0019A)\u0002\u0007a$\u0013\u0007\u0003\u0004a\u0001\u0001\u0006K!U\u0001\u0010o>\u0014H-R7cK\u0012$\u0017N\\4tA!9!\r\u0001b\u0001\n\u0013\u0019\u0017A\u00014h+\u0005!\u0007CA3m\u001b\u00051'BA4i\u0003\r\u0019'O\u001a\u0006\u0003S*\f1A\\3s\u0015\tYG!\u0001\u0006b]:|G/\u0019;peNL!!\u001c4\u0003!\u0019+\u0017\r^;sK\u001e+g.\u001a:bi>\u0014\bBB8\u0001A\u0003%A-A\u0002gO\u0002BQ!\u001d\u0001\u0005\nI\f\u0011C]3t_24X-R7cK\u0012$\u0017N\\4t)\r\u0019\u00181\u0002\t\u0004ir|hBA;{\u001d\t1\u00180D\u0001x\u0015\tA(\"\u0001\u0004=e>|GOP\u0005\u0002\u001f%\u00111PD\u0001\ba\u0006\u001c7.Y4f\u0013\tihPA\u0002TKFT!a\u001f\b\u0011\t\u0005\u0005\u0011qA\u0007\u0003\u0003\u0007Q1!!\u0002k\u0003\u0019\u0019w.\\7p]&!\u0011\u0011BA\u0002\u0005m9vN\u001d3qS\u0016\u001cW-R7cK\u0012$\u0017N\\4t'\u0016tG/\u001a8dK\"9\u0011Q\u00029A\u0002\u0005=\u0011!C:f]R,gnY3t!\u0011!H0!\u0005\u0011\t\u0005M\u0011Q\u0006\b\u0005\u0003+\tIC\u0004\u0003\u0002\u0018\u0005\u001db\u0002BA\r\u0003KqA!a\u0007\u0002$9!\u0011QDA\u0011\u001d\r1\u0018qD\u0005\u0002\u0013%\u0011q\u0001C\u0005\u0003\u000b\u0019I!a\u001b\u0003\n\u0007\u0005\u0015!.\u0003\u0003\u0002,\u0005\r\u0011!C!o]>$\u0018\r^3e\u0013\u0011\ty#!\r\u0003#A{7\u000fV1hO\u0016$7+\u001a8uK:\u001cWM\u0003\u0003\u0002,\u0005\r\u0001bBA\u001b\u0001\u0011%\u0011qG\u0001\fe\u0016\fG\rR1uCN,G\u000f\u0006\u0003\u0002:\u0005U\u0003\u0003\u0002;}\u0003w\u0001\u0002\"DA\u001f\u0003\u0003\nye`\u0005\u0004\u0003\u007fq!A\u0002+va2,7\u0007\u0005\u0003\u0002D\u0005-SBAA#\u0015\r9\u0017q\t\u0006\u0004\u0003\u00132\u0011AA7m\u0013\u0011\ti%!\u0012\u0003%Q+\u0007\u0010^*f]R,gnY3MC\n,Gn\u001d\t\u0005\u0003\u0003\t\t&\u0003\u0003\u0002T\u0005\r!A\u0004+bO\u001e,GmU3oi\u0016t7-\u001a\u0005\b\u0003/\n\u0019\u00041\u0001:\u0003\t)'\u000fC\u0004\u0002\\\u0001!\t!!\u0018\u0002\u001dI,\u0017\r\u001a(fe\u0012\u000bG/Y:fiR1\u0011qLA3\u0003O\u0002B!a\u0011\u0002b%!\u00111MA#\u0005)\u0019%O\u001a#bi\u0006\u001cX\r\u001e\u0005\b\u0003/\nI\u00061\u0001:\u0011)\tI'!\u0017\u0011\u0002\u0003\u0007\u00111N\u0001\t[\u0016$\u0018\rZ1uCB!QbNA7!\u0011\t\u0019%a\u001c\n\t\u0005E\u0014Q\t\u0002\u0010\t\u0006$\u0018m]3u\u001b\u0016$\u0018\rZ1uC\"I\u0011Q\u000f\u0001\u0012\u0002\u0013\u0005\u0011qO\u0001\u0019e\u0016\fGMT3s\t\u0006$\u0018m]3uI\u0011,g-Y;mi\u0012\u0012TCAA=U\u0011\tY'a\u001f,\u0005\u0005u\u0004\u0003BA@\u0003\u0013k!!!!\u000b\t\u0005\r\u0015QQ\u0001\nk:\u001c\u0007.Z2lK\u0012T1!a\"\u000f\u0003)\tgN\\8uCRLwN\\\u0005\u0005\u0003\u0017\u000b\tIA\tv]\u000eDWmY6fIZ\u000b'/[1oG\u0016\u0004")
/* loaded from: input_file:com/johnsnowlabs/nlp/training/CoNLL2003NerReader.class */
public class CoNLL2003NerReader {
    public final int com$johnsnowlabs$nlp$training$CoNLL2003NerReader$$wordEmbeddingsNDims;
    private final CoNLL nerReader = new CoNLL("document", "sentence", "token", "pos", CoNLL$.MODULE$.apply$default$5(), CoNLL$.MODULE$.apply$default$6(), CoNLL$.MODULE$.apply$default$7(), CoNLL$.MODULE$.apply$default$8(), CoNLL$.MODULE$.apply$default$9());
    private WordEmbeddingsReader com$johnsnowlabs$nlp$training$CoNLL2003NerReader$$wordEmbeddings;
    private final FeatureGenerator com$johnsnowlabs$nlp$training$CoNLL2003NerReader$$fg;

    private CoNLL nerReader() {
        return this.nerReader;
    }

    public WordEmbeddingsReader com$johnsnowlabs$nlp$training$CoNLL2003NerReader$$wordEmbeddings() {
        return this.com$johnsnowlabs$nlp$training$CoNLL2003NerReader$$wordEmbeddings;
    }

    private void com$johnsnowlabs$nlp$training$CoNLL2003NerReader$$wordEmbeddings_$eq(WordEmbeddingsReader wordEmbeddingsReader) {
        this.com$johnsnowlabs$nlp$training$CoNLL2003NerReader$$wordEmbeddings = wordEmbeddingsReader;
    }

    public FeatureGenerator com$johnsnowlabs$nlp$training$CoNLL2003NerReader$$fg() {
        return this.com$johnsnowlabs$nlp$training$CoNLL2003NerReader$$fg;
    }

    private Seq<WordpieceEmbeddingsSentence> resolveEmbeddings(Seq<TaggedSentence> seq) {
        return (Seq) ((TraversableLike) seq.zipWithIndex(Seq$.MODULE$.canBuildFrom())).map(new CoNLL2003NerReader$$anonfun$resolveEmbeddings$1(this), Seq$.MODULE$.canBuildFrom());
    }

    private Seq<Tuple3<TextSentenceLabels, TaggedSentence, WordpieceEmbeddingsSentence>> readDataset(ExternalResource externalResource) {
        Seq<CoNLLDocument> readDocs = nerReader().readDocs(externalResource);
        Seq seq = (Seq) ((TraversableLike) readDocs.flatMap(new CoNLL2003NerReader$$anonfun$2(this), Seq$.MODULE$.canBuildFrom())).map(new CoNLL2003NerReader$$anonfun$3(this), Seq$.MODULE$.canBuildFrom());
        Seq<TaggedSentence> seq2 = (Seq) readDocs.flatMap(new CoNLL2003NerReader$$anonfun$4(this), Seq$.MODULE$.canBuildFrom());
        return (Seq) ((TraversableLike) seq.zip((GenIterable) seq2.zip(resolveEmbeddings(seq2), Seq$.MODULE$.canBuildFrom()), Seq$.MODULE$.canBuildFrom())).map(new CoNLL2003NerReader$$anonfun$readDataset$1(this), Seq$.MODULE$.canBuildFrom());
    }

    public CrfDataset readNerDataset(ExternalResource externalResource, Option<DatasetMetadata> option) {
        Seq<Tuple3<TextSentenceLabels, TaggedSentence, WordpieceEmbeddingsSentence>> readDataset = readDataset(externalResource);
        return option.isEmpty() ? com$johnsnowlabs$nlp$training$CoNLL2003NerReader$$fg().generateDataset(readDataset) : new CrfDataset((Seq) readDataset.map(new CoNLL2003NerReader$$anonfun$5(this, option), Seq$.MODULE$.canBuildFrom()), (DatasetMetadata) option.get());
    }

    public Option<DatasetMetadata> readNerDataset$default$2() {
        return None$.MODULE$;
    }

    public CoNLL2003NerReader(String str, int i, boolean z, Enumeration.Value value, Option<ExternalResource> option) {
        this.com$johnsnowlabs$nlp$training$CoNLL2003NerReader$$wordEmbeddingsNDims = i;
        if (str != null) {
            Predef$.MODULE$.require(new File(str).exists());
            String stringBuilder = new StringBuilder().append(str).append(".db").toString();
            RocksDBConnection orCreate = RocksDBConnection$.MODULE$.getOrCreate(stringBuilder);
            if (!new File(stringBuilder).exists()) {
                Enumeration.Value TEXT = ReadAs$.MODULE$.TEXT();
                if (TEXT != null ? !TEXT.equals(value) : value != null) {
                    Enumeration.Value BINARY = ReadAs$.MODULE$.BINARY();
                    if (BINARY != null ? !BINARY.equals(value) : value != null) {
                        throw new MatchError(value);
                    }
                    WordEmbeddingsBinaryIndexer$.MODULE$.index(str, new WordEmbeddingsWriter(orCreate, false, i, 5000, 5000));
                    BoxedUnit boxedUnit = BoxedUnit.UNIT;
                } else {
                    WordEmbeddingsTextIndexer$.MODULE$.index(str, new WordEmbeddingsWriter(orCreate, false, i, 5000, 5000));
                    BoxedUnit boxedUnit2 = BoxedUnit.UNIT;
                }
            }
            if (new File(stringBuilder).exists()) {
                com$johnsnowlabs$nlp$training$CoNLL2003NerReader$$wordEmbeddings_$eq(new WordEmbeddingsReader(orCreate, z, i, 1000));
            }
        }
        this.com$johnsnowlabs$nlp$training$CoNLL2003NerReader$$fg = new FeatureGenerator(DictionaryFeatures$.MODULE$.read(option));
    }
}
