package com.johnsnowlabs.nlp.training;

import com.johnsnowlabs.nlp.AnnotatorType$;
import com.johnsnowlabs.nlp.DocumentAssembler;
import com.johnsnowlabs.nlp.Finisher;
import com.johnsnowlabs.nlp.HasOutputAnnotationCol;
import com.johnsnowlabs.nlp.annotator.package$PerceptronModel$;
import com.johnsnowlabs.nlp.annotators.Tokenizer;
import com.johnsnowlabs.nlp.annotators.pos.perceptron.PerceptronModel;
import com.johnsnowlabs.nlp.annotators.sbd.pragmatic.SentenceDetector;
import org.apache.spark.ml.Pipeline;
import org.apache.spark.ml.PipelineStage;
import org.apache.spark.ml.Transformer;
import org.apache.spark.rdd.RDD;
import org.apache.spark.sql.Column;
import org.apache.spark.sql.Dataset;
import org.apache.spark.sql.Row;
import org.apache.spark.sql.SparkSession;
import org.apache.spark.sql.functions$;
import org.apache.spark.sql.types.Metadata;
import org.apache.spark.sql.types.MetadataBuilder;
import scala.Predef$;
import scala.Tuple2;
import scala.Tuple6;
import scala.collection.Seq;
import scala.collection.Seq$;
import scala.collection.immutable.List$;
import scala.collection.immutable.Nil$;
import scala.reflect.ClassTag$;
import scala.reflect.api.Mirror;
import scala.reflect.api.TypeCreator;
import scala.reflect.api.Types;
import scala.reflect.api.Universe;
import scala.reflect.runtime.package$;
import scala.runtime.ScalaRunTime$;

/* compiled from: PubTator.scala */
/* loaded from: input_file:com/johnsnowlabs/nlp/training/PubTator$.class */
public final class PubTator$ {
    public static final PubTator$ MODULE$ = null;

    static {
        new PubTator$();
    }

    public Dataset<Row> readDataset(SparkSession sparkSession, String str) {
        RDD textFile = sparkSession.sparkContext().textFile(str, sparkSession.sparkContext().textFile$default$2());
        Dataset df = sparkSession.createDataFrame(textFile.filter(new PubTator$$anonfun$1()).map(new PubTator$$anonfun$2(), ClassTag$.MODULE$.apply(ScalaRunTime$.MODULE$.arrayClass(String.class))).groupBy(new PubTator$$anonfun$3(), ClassTag$.MODULE$.apply(String.class)).map(new PubTator$$anonfun$4(), ClassTag$.MODULE$.apply(Tuple2.class)).map(new PubTator$$anonfun$5(), ClassTag$.MODULE$.apply(Tuple2.class)), package$.MODULE$.universe().TypeTag().apply(package$.MODULE$.universe().runtimeMirror(getClass().getClassLoader()), new TypeCreator() { // from class: com.johnsnowlabs.nlp.training.PubTator$$typecreator1$1
            public <U extends Universe> Types.TypeApi apply(Mirror<U> mirror) {
                Universe universe = mirror.universe();
                return universe.internal().reificationSupport().TypeRef(universe.internal().reificationSupport().ThisType(mirror.staticPackage("scala").asModule().moduleClass()), mirror.staticClass("scala.Tuple2"), List$.MODULE$.apply(Predef$.MODULE$.wrapRefArray(new Types.TypeApi[]{mirror.staticClass("scala.Int").asType().toTypeConstructor(), universe.internal().reificationSupport().TypeRef(universe.internal().reificationSupport().SingleType(universe.internal().reificationSupport().ThisType(mirror.staticPackage("scala").asModule().moduleClass()), mirror.staticModule("scala.Predef")), universe.internal().reificationSupport().selectType(mirror.staticModule("scala.Predef").asModule().moduleClass(), "String"), Nil$.MODULE$)})));
            }
        })).toDF(Predef$.MODULE$.wrapRefArray(new String[]{"doc_id", "text"}));
        Dataset transform = new Pipeline().setStages(new PipelineStage[]{(DocumentAssembler) new DocumentAssembler().setInputCol("text").setOutputCol("document"), (SentenceDetector) ((HasOutputAnnotationCol) new SentenceDetector().setInputCols((Seq<String>) Predef$.MODULE$.wrapRefArray(new String[]{"document"}))).setOutputCol("sentence"), (Tokenizer) ((HasOutputAnnotationCol) new Tokenizer().setInputCols((Seq<String>) Predef$.MODULE$.wrapRefArray(new String[]{"sentence"}))).setOutputCol("token")}).fit(df).transform(df);
        RDD map = textFile.filter(new PubTator$$anonfun$6()).map(new PubTator$$anonfun$7(), ClassTag$.MODULE$.apply(ScalaRunTime$.MODULE$.arrayClass(String.class))).map(new PubTator$$anonfun$8(), ClassTag$.MODULE$.apply(Tuple6.class)).groupBy(new PubTator$$anonfun$9(), ClassTag$.MODULE$.apply(String.class)).map(new PubTator$$anonfun$10(), ClassTag$.MODULE$.apply(Tuple2.class)).map(new PubTator$$anonfun$11(), ClassTag$.MODULE$.apply(Tuple2.class));
        Metadata build = new MetadataBuilder().putString("annotatorType", AnnotatorType$.MODULE$.CHUNK()).build();
        Dataset withColumn = transform.join(sparkSession.createDataFrame(map, package$.MODULE$.universe().TypeTag().apply(package$.MODULE$.universe().runtimeMirror(getClass().getClassLoader()), new TypeCreator() { // from class: com.johnsnowlabs.nlp.training.PubTator$$typecreator2$1
            public <U extends Universe> Types.TypeApi apply(Mirror<U> mirror) {
                Universe universe = mirror.universe();
                return universe.internal().reificationSupport().TypeRef(universe.internal().reificationSupport().ThisType(mirror.staticPackage("scala").asModule().moduleClass()), mirror.staticClass("scala.Tuple2"), List$.MODULE$.apply(Predef$.MODULE$.wrapRefArray(new Types.TypeApi[]{mirror.staticClass("scala.Int").asType().toTypeConstructor(), universe.internal().reificationSupport().TypeRef(universe.internal().reificationSupport().SingleType(universe.internal().reificationSupport().ThisType(mirror.staticPackage("scala").asModule().moduleClass()), mirror.staticModule("scala.package")), universe.internal().reificationSupport().selectType(mirror.staticModule("scala.package").asModule().moduleClass(), "List"), List$.MODULE$.apply(Predef$.MODULE$.wrapRefArray(new Types.TypeApi[]{mirror.staticClass("com.johnsnowlabs.nlp.Annotation").asType().toTypeConstructor()})))})));
            }
        })).toDF(Predef$.MODULE$.wrapRefArray(new String[]{"doc_id", "chunk"})).withColumn("chunk", functions$.MODULE$.col("chunk").as("chunk", build)), Seq$.MODULE$.apply(Predef$.MODULE$.wrapRefArray(new String[]{"doc_id"}))).selectExpr(Predef$.MODULE$.wrapRefArray(new String[]{"doc_id", "sentence", "token", "chunk"})).withColumn("label", functions$.MODULE$.udf(new PubTator$$anonfun$12(), package$.MODULE$.universe().TypeTag().apply(package$.MODULE$.universe().runtimeMirror(getClass().getClassLoader()), new TypeCreator() { // from class: com.johnsnowlabs.nlp.training.PubTator$$typecreator3$1
            public <U extends Universe> Types.TypeApi apply(Mirror<U> mirror) {
                Universe universe = mirror.universe();
                return universe.internal().reificationSupport().TypeRef(universe.internal().reificationSupport().ThisType(mirror.staticPackage("scala.collection").asModule().moduleClass()), mirror.staticClass("scala.collection.Seq"), List$.MODULE$.apply(Predef$.MODULE$.wrapRefArray(new Types.TypeApi[]{mirror.staticClass("com.johnsnowlabs.nlp.Annotation").asType().toTypeConstructor()})));
            }
        }), package$.MODULE$.universe().TypeTag().apply(package$.MODULE$.universe().runtimeMirror(getClass().getClassLoader()), new TypeCreator() { // from class: com.johnsnowlabs.nlp.training.PubTator$$typecreator4$1
            public <U extends Universe> Types.TypeApi apply(Mirror<U> mirror) {
                Universe universe = mirror.universe();
                return universe.internal().reificationSupport().TypeRef(universe.internal().reificationSupport().SingleType(universe.internal().reificationSupport().SingleType(universe.internal().reificationSupport().thisPrefix(mirror.RootClass()), mirror.staticPackage("scala")), mirror.staticModule("scala.package")), universe.internal().reificationSupport().selectType(mirror.staticModule("scala.package").asModule().moduleClass(), "Seq"), List$.MODULE$.apply(Predef$.MODULE$.wrapRefArray(new Types.TypeApi[]{mirror.staticClass("org.apache.spark.sql.Row").asType().toTypeConstructor()})));
            }
        }), package$.MODULE$.universe().TypeTag().apply(package$.MODULE$.universe().runtimeMirror(getClass().getClassLoader()), new TypeCreator() { // from class: com.johnsnowlabs.nlp.training.PubTator$$typecreator5$1
            public <U extends Universe> Types.TypeApi apply(Mirror<U> mirror) {
                Universe universe = mirror.universe();
                return universe.internal().reificationSupport().TypeRef(universe.internal().reificationSupport().SingleType(universe.internal().reificationSupport().SingleType(universe.internal().reificationSupport().thisPrefix(mirror.RootClass()), mirror.staticPackage("scala")), mirror.staticModule("scala.package")), universe.internal().reificationSupport().selectType(mirror.staticModule("scala.package").asModule().moduleClass(), "Seq"), List$.MODULE$.apply(Predef$.MODULE$.wrapRefArray(new Types.TypeApi[]{mirror.staticClass("org.apache.spark.sql.Row").asType().toTypeConstructor()})));
            }
        })).apply(Predef$.MODULE$.wrapRefArray(new Column[]{functions$.MODULE$.col("token"), functions$.MODULE$.col("chunk")})).as("label", new MetadataBuilder().putString("annotatorType", AnnotatorType$.MODULE$.NAMED_ENTITY()).build()));
        return new Pipeline().setStages(new Transformer[]{(PerceptronModel) ((HasOutputAnnotationCol) package$PerceptronModel$.MODULE$.mo250pretrained().setInputCols(new String[]{"sentence", "token"})).setOutputCol("pos"), new Finisher().setInputCols((Seq<String>) Predef$.MODULE$.wrapRefArray(new String[]{"token", "pos", "label"})).setIncludeMetadata(true)}).fit(withColumn).transform(withColumn).withColumnRenamed("finished_label", "finished_ner");
    }

    private PubTator$() {
        MODULE$ = this;
    }
}
