package com.johnsnowlabs.ml.crf;

import java.io.FileInputStream;
import org.apache.commons.compress.compressors.gzip.GzipCompressorInputStream;
import scala.Array$;
import scala.MatchError;
import scala.None$;
import scala.Option;
import scala.Option$;
import scala.Predef$;
import scala.Some;
import scala.Tuple2;
import scala.collection.Iterator;
import scala.collection.Seq;
import scala.collection.Seq$;
import scala.collection.SeqLike;
import scala.collection.TraversableLike;
import scala.collection.TraversableOnce;
import scala.collection.TraversableOnce$;
import scala.collection.generic.GenericTraversableTemplate;
import scala.collection.mutable.ArrayBuffer;
import scala.collection.mutable.ArrayOps;
import scala.io.Codec$;
import scala.io.Source;
import scala.io.Source$;
import scala.math.Ordering$Int$;
import scala.reflect.ClassTag$;
import scala.runtime.BoxesRunTime;
import scala.runtime.ObjectRef;

/* compiled from: DatasetReader.scala */
/* loaded from: input_file:com/johnsnowlabs/ml/crf/DatasetReader$.class */
public final class DatasetReader$ {
    public static DatasetReader$ MODULE$;

    static {
        new DatasetReader$();
    }

    private Source getSource(String str) {
        if (!str.endsWith(".gz")) {
            return Source$.MODULE$.fromFile(str, Codec$.MODULE$.fallbackSystemCodec());
        }
        return Source$.MODULE$.fromInputStream(new GzipCompressorInputStream(new FileInputStream(str)), Codec$.MODULE$.fallbackSystemCodec());
    }

    private TraversableOnce<Tuple2<TextSentenceLabels, TextSentenceAttrs>> readWithLabels(String str, int i) {
        Iterator drop = getSource(str).getLines().drop(i);
        ObjectRef create = ObjectRef.create(new ArrayBuffer());
        ObjectRef create2 = ObjectRef.create(new ArrayBuffer());
        return drop.flatMap(str2 -> {
            String[] split = str2.split("\t");
            if (split.length <= 1) {
                return Option$.MODULE$.option2Iterable(addToResultIfExists$1(create2, create));
            }
            ((ArrayBuffer) create2.elem).append(Predef$.MODULE$.wrapRefArray(new WordAttrs[]{new WordAttrs(Predef$.MODULE$.wrapRefArray((Tuple2[]) new ArrayOps.ofRef(Predef$.MODULE$.refArrayOps((Object[]) new ArrayOps.ofRef(Predef$.MODULE$.refArrayOps(split)).drop(1))).map(str2 -> {
                String[] split2 = str2.split("=");
                return new Tuple2(split2[0], new ArrayOps.ofRef(Predef$.MODULE$.refArrayOps(split2)).size() == 1 ? "" : split2[1]);
            }, Array$.MODULE$.canBuildFrom(ClassTag$.MODULE$.apply(Tuple2.class)))), WordAttrs$.MODULE$.apply$default$2())}));
            ((ArrayBuffer) create.elem).append(Predef$.MODULE$.wrapRefArray(new String[]{(String) new ArrayOps.ofRef(Predef$.MODULE$.refArrayOps(split)).head()}));
            return Option$.MODULE$.option2Iterable(None$.MODULE$);
        });
    }

    private int readWithLabels$default$2() {
        return 0;
    }

    public CrfDataset encodeDataset(TraversableOnce<Tuple2<TextSentenceLabels, TextSentenceAttrs>> traversableOnce) {
        DatasetEncoder datasetEncoder = new DatasetEncoder(DatasetEncoder$.MODULE$.$lessinit$greater$default$1());
        return new CrfDataset(Predef$.MODULE$.wrapRefArray((Tuple2[]) TraversableOnce$.MODULE$.MonadOps(traversableOnce).map(tuple2 -> {
            if (tuple2 == null) {
                throw new MatchError(tuple2);
            }
            TextSentenceLabels textSentenceLabels = (TextSentenceLabels) tuple2._1();
            TextSentenceAttrs textSentenceAttrs = (TextSentenceAttrs) tuple2._2();
            ObjectRef create = ObjectRef.create(datasetEncoder.startLabel());
            Tuple2 unzip = ((GenericTraversableTemplate) ((TraversableLike) textSentenceLabels.labels().zip(textSentenceAttrs.words(), Seq$.MODULE$.canBuildFrom())).map(tuple2 -> {
                if (tuple2 == null) {
                    throw new MatchError(tuple2);
                }
                String str = (String) tuple2._1();
                WordAttrs wordAttrs = (WordAttrs) tuple2._2();
                Tuple2<Object, SparseArray> features = datasetEncoder.getFeatures((String) create.elem, str, (Seq) wordAttrs.strAttrs().map(tuple2 -> {
                    return new StringBuilder(1).append((String) tuple2._1()).append("=").append(tuple2._2()).toString();
                }, Seq$.MODULE$.canBuildFrom()), Predef$.MODULE$.wrapFloatArray(wordAttrs.numAttrs()));
                if (features == null) {
                    throw new MatchError(features);
                }
                int _1$mcI$sp = features._1$mcI$sp();
                Tuple2 tuple22 = new Tuple2(BoxesRunTime.boxToInteger(_1$mcI$sp), (SparseArray) features._2());
                int _1$mcI$sp2 = tuple22._1$mcI$sp();
                SparseArray sparseArray = (SparseArray) tuple22._2();
                create.elem = str;
                return new Tuple2(BoxesRunTime.boxToInteger(_1$mcI$sp2), sparseArray);
            }, Seq$.MODULE$.canBuildFrom())).unzip(Predef$.MODULE$.$conforms());
            if (unzip == null) {
                throw new MatchError(unzip);
            }
            Tuple2 tuple22 = new Tuple2((Seq) unzip._1(), (Seq) unzip._2());
            return new Tuple2(new InstanceLabels((Seq) tuple22._1()), new Instance((Seq) tuple22._2()));
        }).toArray(ClassTag$.MODULE$.apply(Tuple2.class))), datasetEncoder.getMetadata());
    }

    private InstanceLabels encodeLabels(TextSentenceLabels textSentenceLabels, DatasetMetadata datasetMetadata) {
        return new InstanceLabels((Seq) textSentenceLabels.labels().map(str -> {
            return BoxesRunTime.boxToInteger($anonfun$encodeLabels$1(datasetMetadata, str));
        }, Seq$.MODULE$.canBuildFrom()));
    }

    public Instance encodeSentence(TextSentenceAttrs textSentenceAttrs, DatasetMetadata datasetMetadata) {
        return new Instance((Seq) textSentenceAttrs.words().map(wordAttrs -> {
            return new SparseArray((Tuple2[]) ((TraversableOnce) ((SeqLike) ((Seq) ((Seq) ((TraversableLike) wordAttrs.strAttrs().flatMap(tuple2 -> {
                if (tuple2 == null) {
                    throw new MatchError(tuple2);
                }
                String str = (String) tuple2._1();
                return Option$.MODULE$.option2Iterable(datasetMetadata.attr2Id().get(new StringBuilder(1).append(str).append("=").append((String) tuple2._2()).toString()));
            }, Seq$.MODULE$.canBuildFrom())).map(obj -> {
                return $anonfun$encodeSentence$3(BoxesRunTime.unboxToInt(obj));
            }, Seq$.MODULE$.canBuildFrom())).$plus$plus(new ArrayOps.ofRef(Predef$.MODULE$.refArrayOps((Tuple2[]) new ArrayOps.ofRef(Predef$.MODULE$.refArrayOps((Object[]) new ArrayOps.ofFloat(Predef$.MODULE$.floatArrayOps(wordAttrs.numAttrs())).zipWithIndex(Array$.MODULE$.canBuildFrom(ClassTag$.MODULE$.apply(Tuple2.class))))).flatMap(tuple22 -> {
                if (tuple22 == null) {
                    throw new MatchError(tuple22);
                }
                float unboxToFloat = BoxesRunTime.unboxToFloat(tuple22._1());
                return Option$.MODULE$.option2Iterable(datasetMetadata.attr2Id().get(new StringBuilder(3).append("num").append(tuple22._2$mcI$sp()).toString()).map(obj2 -> {
                    return $anonfun$encodeSentence$5(unboxToFloat, BoxesRunTime.unboxToInt(obj2));
                }));
            }, Array$.MODULE$.canBuildFrom(ClassTag$.MODULE$.apply(Tuple2.class))))), Seq$.MODULE$.canBuildFrom())).sortBy(tuple23 -> {
                return BoxesRunTime.boxToInteger(tuple23._1$mcI$sp());
            }, Ordering$Int$.MODULE$)).distinct()).toArray(ClassTag$.MODULE$.apply(Tuple2.class)));
        }, Seq$.MODULE$.canBuildFrom()));
    }

    public CrfDataset readAndEncode(String str, int i) {
        return encodeDataset(readWithLabels(str, i));
    }

    public TraversableOnce<Tuple2<InstanceLabels, Instance>> readAndEncode(String str, int i, DatasetMetadata datasetMetadata) {
        return TraversableOnce$.MODULE$.MonadOps(readWithLabels(str, i)).map(tuple2 -> {
            if (tuple2 == null) {
                throw new MatchError(tuple2);
            }
            return new Tuple2(MODULE$.encodeLabels((TextSentenceLabels) tuple2._1(), datasetMetadata), MODULE$.encodeSentence((TextSentenceAttrs) tuple2._2(), datasetMetadata));
        });
    }

    private static final Option addToResultIfExists$1(ObjectRef objectRef, ObjectRef objectRef2) {
        if (!((ArrayBuffer) objectRef.elem).nonEmpty()) {
            return None$.MODULE$;
        }
        Tuple2 tuple2 = new Tuple2(new TextSentenceLabels((ArrayBuffer) objectRef2.elem), new TextSentenceAttrs((ArrayBuffer) objectRef.elem));
        objectRef2.elem = new ArrayBuffer();
        objectRef.elem = new ArrayBuffer();
        return new Some(tuple2);
    }

    public static final /* synthetic */ int $anonfun$encodeLabels$1(DatasetMetadata datasetMetadata, String str) {
        return BoxesRunTime.unboxToInt(datasetMetadata.label2Id().getOrElse(str, () -> {
            return -1;
        }));
    }

    public static final /* synthetic */ Tuple2 $anonfun$encodeSentence$3(int i) {
        return new Tuple2(BoxesRunTime.boxToInteger(i), BoxesRunTime.boxToFloat(1.0f));
    }

    public static final /* synthetic */ Tuple2 $anonfun$encodeSentence$5(float f, int i) {
        return new Tuple2(BoxesRunTime.boxToInteger(i), BoxesRunTime.boxToFloat(f));
    }

    private DatasetReader$() {
        MODULE$ = this;
    }
}
