package com.johnsnowlabs.nlp.annotators.ws;

import com.johnsnowlabs.nlp.Annotation;
import com.johnsnowlabs.nlp.AnnotatorModel;
import com.johnsnowlabs.nlp.AnnotatorType$;
import com.johnsnowlabs.nlp.annotators.common.IndexedTaggedWord;
import com.johnsnowlabs.nlp.annotators.common.Sentence;
import com.johnsnowlabs.nlp.annotators.common.SentenceSplit$;
import com.johnsnowlabs.nlp.annotators.common.TaggedSentence;
import com.johnsnowlabs.nlp.annotators.common.TokenizedSentence;
import com.johnsnowlabs.nlp.annotators.common.TokenizedWithSentence$;
import com.johnsnowlabs.nlp.annotators.pos.perceptron.AveragedPerceptron;
import com.johnsnowlabs.nlp.annotators.pos.perceptron.PerceptronPredictionUtils;
import com.johnsnowlabs.nlp.annotators.pos.perceptron.PerceptronUtils;
import com.johnsnowlabs.nlp.serialization.StructFeature;
import org.apache.spark.ml.util.Identifiable$;
import org.apache.spark.ml.util.MLReader;
import org.apache.spark.sql.SparkSession;
import scala.Array$;
import scala.Function2;
import scala.Function3;
import scala.Predef$;
import scala.Predef$DummyImplicit$;
import scala.Some;
import scala.StringContext;
import scala.Tuple2;
import scala.collection.Seq;
import scala.collection.Seq$;
import scala.collection.SeqLike;
import scala.collection.immutable.List;
import scala.collection.immutable.List$;
import scala.collection.immutable.Map;
import scala.collection.immutable.StringOps;
import scala.reflect.ClassTag$;
import scala.reflect.ScalaSignature;
import scala.runtime.BoxedUnit;

/* compiled from: WordSegmenterModel.scala */
@ScalaSignature(bytes = "\u0006\u0001\u00055f\u0001B\u0001\u0003\u00015\u0011!cV8sIN+w-\\3oi\u0016\u0014Xj\u001c3fY*\u00111\u0001B\u0001\u0003oNT!!\u0002\u0004\u0002\u0015\u0005tgn\u001c;bi>\u00148O\u0003\u0002\b\u0011\u0005\u0019a\u000e\u001c9\u000b\u0005%Q\u0011\u0001\u00046pQ:\u001chn\\<mC\n\u001c(\"A\u0006\u0002\u0007\r|Wn\u0001\u0001\u0014\u0007\u0001qA\u0003E\u0002\u0010!Ii\u0011AB\u0005\u0003#\u0019\u0011a\"\u00118o_R\fGo\u001c:N_\u0012,G\u000e\u0005\u0002\u0014\u00015\t!\u0001\u0005\u0002\u001655\taC\u0003\u0002\u00181\u0005Q\u0001/\u001a:dKB$(o\u001c8\u000b\u0005e!\u0011a\u00019pg&\u00111D\u0006\u0002\u001a!\u0016\u00148-\u001a9ue>t\u0007K]3eS\u000e$\u0018n\u001c8Vi&d7\u000f\u0003\u0005\u001e\u0001\t\u0015\r\u0011\"\u0011\u001f\u0003\r)\u0018\u000eZ\u000b\u0002?A\u0011\u0001E\n\b\u0003C\u0011j\u0011A\t\u0006\u0002G\u0005)1oY1mC&\u0011QEI\u0001\u0007!J,G-\u001a4\n\u0005\u001dB#AB*ue&twM\u0003\u0002&E!A!\u0006\u0001B\u0001B\u0003%q$\u0001\u0003vS\u0012\u0004\u0003\"\u0002\u0017\u0001\t\u0003i\u0013A\u0002\u001fj]&$h\b\u0006\u0002\u0013]!)Qd\u000ba\u0001?!)A\u0006\u0001C\u0001aQ\t!\u0003C\u00043\u0001\t\u0007I\u0011A\u001a\u0002\u000b5|G-\u001a7\u0016\u0003Q\u00022!\u000e\u001d;\u001b\u00051$BA\u001c\u0007\u00035\u0019XM]5bY&T\u0018\r^5p]&\u0011\u0011H\u000e\u0002\u000e'R\u0014Xo\u0019;GK\u0006$XO]3\u0011\u0005UY\u0014B\u0001\u001f\u0017\u0005I\te/\u001a:bO\u0016$\u0007+\u001a:dKB$(o\u001c8\t\ry\u0002\u0001\u0015!\u00035\u0003\u0019iw\u000eZ3mA!)\u0001\t\u0001C\u0001\u0003\u0006Aq-\u001a;N_\u0012,G.F\u0001;\u0011\u0015\u0019\u0005\u0001\"\u0001E\u0003!\u0019X\r^'pI\u0016dGCA#G\u001b\u0005\u0001\u0001\"B$C\u0001\u0004Q\u0014a\u0003;be\u001e,G/T8eK2DQ!\u0013\u0001\u0005B)\u000b\u0001\"\u00198o_R\fG/\u001a\u000b\u0003\u0017j\u00032\u0001\u0014+X\u001d\ti%K\u0004\u0002O#6\tqJ\u0003\u0002Q\u0019\u00051AH]8pizJ\u0011aI\u0005\u0003'\n\nq\u0001]1dW\u0006<W-\u0003\u0002V-\n\u00191+Z9\u000b\u0005M\u0013\u0003CA\bY\u0013\tIfA\u0001\u0006B]:|G/\u0019;j_:DQa\u0017%A\u0002-\u000b1\"\u00198o_R\fG/[8og\")Q\f\u0001C\u0005=\u0006\u0019r-\u001a;U_.,g.\u00118o_R\fG/[8ogR\u00111j\u0018\u0005\u0006Ar\u0003\r!Y\u0001\u000bC:tw\u000e^1uS>t\u0007c\u0001'UEB\u00111MZ\u0007\u0002I*\u0011Q\rB\u0001\u0007G>lWn\u001c8\n\u0005\u001d$'\u0001C*f]R,gnY3\t\u000b%\u0004A\u0011\u00016\u0002#\t,\u0018\u000e\u001c3X_J$7+Z4nK:$8\u000f\u0006\u0002LW\")A\u000e\u001ba\u0001[\u0006yA/Y4hK\u0012\u001cVM\u001c;f]\u000e,7\u000fE\u0002\"]BL!a\u001c\u0012\u0003\u000b\u0005\u0013(/Y=\u0011\u0005\r\f\u0018B\u0001:e\u00059!\u0016mZ4fIN+g\u000e^3oG\u0016DQ\u0001\u001e\u0001\u0005\nU\fQdZ3u/>\u0014H-\u00138eKb,7OQ=NCR\u001c\u0007.\u001a3He>,\bo\u001d\u000b\u0003mv\u00042\u0001T<z\u0013\tAhK\u0001\u0003MSN$\bc\u0001'xuB\u00111c_\u0005\u0003y\n\u0011QBU3hKb$\u0016mZ:J]\u001a|\u0007\"\u0002@t\u0001\u0004y\u0012\u0001\u0004;bON\u001cVM\u001c;f]\u000e,\u0007bBA\u0001\u0001\u0011%\u00111A\u0001\u0015C:tw\u000e^1uKN+w-\\3oi^{'\u000fZ:\u0015\u000f-\u000b)!!\u0003\u0002\u000e!1\u0011qA@A\u0002Y\f!d^8sI&sG-\u001a=fg\nKX*\u0019;dQ\u0016$wI]8vaNDa!a\u0003��\u0001\u0004\u0001\u0018A\u0004;bO\u001e,GmU3oi\u0016t7-\u001a\u0005\b\u0003\u001fy\b\u0019AA\t\u00035\u0019XM\u001c;f]\u000e,\u0017J\u001c3fqB\u0019\u0011%a\u0005\n\u0007\u0005U!EA\u0002J]RDq!!\u0007\u0001\t\u0013\tY\"A\u000ehKR\u001c\u0016N\\4mK&sG-\u001a=fIR\u000bwmZ3e/>\u0014Hm\u001d\u000b\u0007\u0003;\t)#a\n\u0011\t1;\u0018q\u0004\t\u0004G\u0006\u0005\u0012bAA\u0012I\n\t\u0012J\u001c3fq\u0016$G+Y4hK\u0012<vN\u001d3\t\u000f\u0005\u001d\u0011q\u0003a\u0001m\"9\u00111BA\f\u0001\u0004\u0001\bbBA\u0016\u0001\u0011%\u0011QF\u0001\u000eSNl\u0015\r^2iK\u0012<vN\u001d3\u0015\r\u0005=\u0012QGA\u001d!\r\t\u0013\u0011G\u0005\u0004\u0003g\u0011#a\u0002\"p_2,\u0017M\u001c\u0005\t\u0003o\tI\u00031\u0001\u0002 \u0005\t\u0012N\u001c3fq\u0016$G+Y4hK\u0012<vN\u001d3\t\u000f\u0005m\u0012\u0011\u0006a\u0001m\u0006\t\"/Z4fqR\u000bwm]%oM>d\u0015n\u001d;\t\u000f\u0005}\u0002\u0001\"\u0003\u0002B\u00051r-\u001a;Nk2$\u0018\u000e\u001d7f)\u0006<w-\u001a3X_J$7\u000f\u0006\u0004\u0002\u001e\u0005\r\u0013Q\t\u0005\b\u0003\u000f\ti\u00041\u0001w\u0011\u001d\tY!!\u0010A\u0002AD\u0011\"!\u0013\u0001\u0005\u0004%I!a\u0013\u0002\u0017A\u0014xnY3tgR\u000bwm]\u000b\u0003\u0003\u001b\u0002\u0012\"IA(\u0003?\ty\"a\b\n\u0007\u0005E#EA\u0005Gk:\u001cG/[8oe!A\u0011Q\u000b\u0001!\u0002\u0013\ti%\u0001\u0007qe>\u001cWm]:UC\u001e\u001c\b\u0005C\u0005\u0002Z\u0001\u0011\r\u0011\"\u0011\u0002\\\u0005\u0019r.\u001e;qkR\feN\\8uCR|'\u000fV=qKV\u0011\u0011Q\f\t\u0004\u000b\u0006}\u0013\u0002BA1\u0003G\u0012Q\"\u00118o_R\fGo\u001c:UsB,\u0017bAA3\r\t1\u0002*Y:PkR\u0004X\u000f^!o]>$\u0018\r^8s)f\u0004X\r\u0003\u0005\u0002j\u0001\u0001\u000b\u0011BA/\u0003QyW\u000f\u001e9vi\u0006sgn\u001c;bi>\u0014H+\u001f9fA!I\u0011Q\u000e\u0001C\u0002\u0013\u0005\u0013qN\u0001\u0014S:\u0004X\u000f^!o]>$\u0018\r^8s)f\u0004Xm]\u000b\u0003\u0003c\u00022!\t8 \u0011!\t)\b\u0001Q\u0001\n\u0005E\u0014\u0001F5oaV$\u0018I\u001c8pi\u0006$xN\u001d+za\u0016\u001c\beB\u0004\u0002z\tA\t!a\u001f\u0002%]{'\u000fZ*fO6,g\u000e^3s\u001b>$W\r\u001c\t\u0004'\u0005udAB\u0001\u0003\u0011\u0003\tyh\u0005\u0005\u0002~\u0005\u0005\u0015qQAG!\r\t\u00131Q\u0005\u0004\u0003\u000b\u0013#AB!osJ+g\rE\u0002\u0014\u0003\u0013K1!a#\u0003\u0005}\u0011V-\u00193bE2,\u0007K]3ue\u0006Lg.\u001a3X_J$7+Z4nK:$XM\u001d\t\u0004C\u0005=\u0015bAAIE\ta1+\u001a:jC2L'0\u00192mK\"9A&! \u0005\u0002\u0005UECAA>\u0011)\tI*! \u0002\u0002\u0013%\u00111T\u0001\fe\u0016\fGMU3t_24X\r\u0006\u0002\u0002\u001eB!\u0011qTAU\u001b\t\t\tK\u0003\u0003\u0002$\u0006\u0015\u0016\u0001\u00027b]\u001eT!!a*\u0002\t)\fg/Y\u0005\u0005\u0003W\u000b\tK\u0001\u0004PE*,7\r\u001e")
/* loaded from: input_file:com/johnsnowlabs/nlp/annotators/ws/WordSegmenterModel.class */
public class WordSegmenterModel extends AnnotatorModel<WordSegmenterModel> implements PerceptronPredictionUtils {
    private final String uid;
    private final StructFeature<AveragedPerceptron> model;
    private final Function2<IndexedTaggedWord, IndexedTaggedWord, IndexedTaggedWord> com$johnsnowlabs$nlp$annotators$ws$WordSegmenterModel$$processTags;
    private final String outputAnnotatorType;
    private final String[] inputAnnotatorTypes;
    private final String[] START;
    private final String[] END;

    public static Object load(String str) {
        return WordSegmenterModel$.MODULE$.load(str);
    }

    public static MLReader<WordSegmenterModel> read() {
        return WordSegmenterModel$.MODULE$.read();
    }

    public static void addReader(Function3<WordSegmenterModel, String, SparkSession, BoxedUnit> function3) {
        WordSegmenterModel$.MODULE$.addReader(function3);
    }

    public static String defaultLoc() {
        return WordSegmenterModel$.MODULE$.defaultLoc();
    }

    public static WordSegmenterModel pretrained(String str, String str2, String str3) {
        return WordSegmenterModel$.MODULE$.mo247pretrained(str, str2, str3);
    }

    public static WordSegmenterModel pretrained(String str, String str2) {
        return WordSegmenterModel$.MODULE$.mo248pretrained(str, str2);
    }

    public static WordSegmenterModel pretrained(String str) {
        return WordSegmenterModel$.MODULE$.mo249pretrained(str);
    }

    public static WordSegmenterModel pretrained() {
        return WordSegmenterModel$.MODULE$.mo250pretrained();
    }

    public static String defaultLang() {
        return WordSegmenterModel$.MODULE$.defaultLang();
    }

    public static Some<String> defaultModelName() {
        return WordSegmenterModel$.MODULE$.mo251defaultModelName();
    }

    @Override // com.johnsnowlabs.nlp.annotators.pos.perceptron.PerceptronPredictionUtils
    public TaggedSentence[] tag(AveragedPerceptron averagedPerceptron, TokenizedSentence[] tokenizedSentenceArr) {
        return PerceptronPredictionUtils.Cclass.tag(this, averagedPerceptron, tokenizedSentenceArr);
    }

    @Override // com.johnsnowlabs.nlp.annotators.pos.perceptron.PerceptronUtils
    public String[] START() {
        return this.START;
    }

    @Override // com.johnsnowlabs.nlp.annotators.pos.perceptron.PerceptronUtils
    public String[] END() {
        return this.END;
    }

    @Override // com.johnsnowlabs.nlp.annotators.pos.perceptron.PerceptronUtils
    public void com$johnsnowlabs$nlp$annotators$pos$perceptron$PerceptronUtils$_setter_$START_$eq(String[] strArr) {
        this.START = strArr;
    }

    @Override // com.johnsnowlabs.nlp.annotators.pos.perceptron.PerceptronUtils
    public void com$johnsnowlabs$nlp$annotators$pos$perceptron$PerceptronUtils$_setter_$END_$eq(String[] strArr) {
        this.END = strArr;
    }

    @Override // com.johnsnowlabs.nlp.annotators.pos.perceptron.PerceptronUtils
    public String normalized(String str) {
        return PerceptronUtils.Cclass.normalized(this, str);
    }

    @Override // com.johnsnowlabs.nlp.annotators.pos.perceptron.PerceptronUtils
    public Map<String, Object> getFeatures(int i, String str, String[] strArr, String str2, String str3) {
        return PerceptronUtils.Cclass.getFeatures(this, i, str, strArr, str2, str3);
    }

    public String uid() {
        return this.uid;
    }

    public StructFeature<AveragedPerceptron> model() {
        return this.model;
    }

    public AveragedPerceptron getModel() {
        return (AveragedPerceptron) $$(model());
    }

    public WordSegmenterModel setModel(AveragedPerceptron averagedPerceptron) {
        return (WordSegmenterModel) set((StructFeature<StructFeature<AveragedPerceptron>>) model(), (StructFeature<AveragedPerceptron>) averagedPerceptron);
    }

    @Override // com.johnsnowlabs.nlp.AnnotatorModel
    public Seq<Annotation> annotate(Seq<Annotation> seq) {
        return buildWordSegments(tag((AveragedPerceptron) $$(model()), (TokenizedSentence[]) TokenizedWithSentence$.MODULE$.unpack((Seq) seq.$plus$plus(getTokenAnnotations(SentenceSplit$.MODULE$.unpack(seq)), Seq$.MODULE$.canBuildFrom())).toArray(ClassTag$.MODULE$.apply(TokenizedSentence.class))));
    }

    private Seq<Annotation> getTokenAnnotations(Seq<Sentence> seq) {
        return (Seq) seq.flatMap(new WordSegmenterModel$$anonfun$1(this), Seq$.MODULE$.canBuildFrom());
    }

    public Seq<Annotation> buildWordSegments(TaggedSentence[] taggedSentenceArr) {
        return (Seq) Predef$.MODULE$.refArrayOps((Object[]) Predef$.MODULE$.refArrayOps(taggedSentenceArr).zipWithIndex(Array$.MODULE$.canBuildFrom(ClassTag$.MODULE$.apply(Tuple2.class)))).flatMap(new WordSegmenterModel$$anonfun$buildWordSegments$1(this), Array$.MODULE$.fallbackCanBuildFrom(Predef$DummyImplicit$.MODULE$.dummyImplicit()));
    }

    public List<List<RegexTagsInfo>> com$johnsnowlabs$nlp$annotators$ws$WordSegmenterModel$$getWordIndexesByMatchedGroups(String str) {
        return new StringOps(Predef$.MODULE$.augmentString(new StringContext(Predef$.MODULE$.wrapRefArray(new String[]{"(", ")(", "*)*(", "+)"})).s(Predef$.MODULE$.genericWrapArray(new Object[]{TagsType$.MODULE$.LEFT_BOUNDARY(), TagsType$.MODULE$.MIDDLE(), TagsType$.MODULE$.RIGHT_BOUNDARY()})))).r().findAllMatchIn(str).map(new WordSegmenterModel$$anonfun$com$johnsnowlabs$nlp$annotators$ws$WordSegmenterModel$$getWordIndexesByMatchedGroups$1(this)).toList();
    }

    public Seq<Annotation> com$johnsnowlabs$nlp$annotators$ws$WordSegmenterModel$$annotateSegmentWords(List<List<RegexTagsInfo>> list, TaggedSentence taggedSentence, int i) {
        return (Seq) ((List) ((SeqLike) getSingleIndexedTaggedWords(list, taggedSentence).$plus$plus(getMultipleTaggedWords(list, taggedSentence), List$.MODULE$.canBuildFrom())).sortWith(new WordSegmenterModel$$anonfun$2(this))).map(new WordSegmenterModel$$anonfun$com$johnsnowlabs$nlp$annotators$ws$WordSegmenterModel$$annotateSegmentWords$1(this, i), List$.MODULE$.canBuildFrom());
    }

    private List<IndexedTaggedWord> getSingleIndexedTaggedWords(List<List<RegexTagsInfo>> list, TaggedSentence taggedSentence) {
        return Predef$.MODULE$.refArrayOps((IndexedTaggedWord[]) Predef$.MODULE$.refArrayOps((IndexedTaggedWord[]) Predef$.MODULE$.refArrayOps((Object[]) Predef$.MODULE$.refArrayOps((Object[]) Predef$.MODULE$.refArrayOps(taggedSentence.indexedTaggedWords()).zipWithIndex(Array$.MODULE$.canBuildFrom(ClassTag$.MODULE$.apply(Tuple2.class)))).filter(new WordSegmenterModel$$anonfun$4(this, (List) list.flatMap(new WordSegmenterModel$$anonfun$3(this), List$.MODULE$.canBuildFrom())))).map(new WordSegmenterModel$$anonfun$5(this), Array$.MODULE$.canBuildFrom(ClassTag$.MODULE$.apply(IndexedTaggedWord.class)))).filter(new WordSegmenterModel$$anonfun$6(this, list))).toList();
    }

    public boolean com$johnsnowlabs$nlp$annotators$ws$WordSegmenterModel$$isMatchedWord(IndexedTaggedWord indexedTaggedWord, List<List<RegexTagsInfo>> list) {
        return ((List) list.flatMap(new WordSegmenterModel$$anonfun$8(this, indexedTaggedWord, new StringOps(Predef$.MODULE$.augmentString((String) indexedTaggedWord.metadata().getOrElse("index", new WordSegmenterModel$$anonfun$7(this)))).toInt()), List$.MODULE$.canBuildFrom())).contains("matched");
    }

    private List<IndexedTaggedWord> getMultipleTaggedWords(List<List<RegexTagsInfo>> list, TaggedSentence taggedSentence) {
        return (List) list.flatMap(new WordSegmenterModel$$anonfun$getMultipleTaggedWords$1(this, taggedSentence), List$.MODULE$.canBuildFrom());
    }

    public Function2<IndexedTaggedWord, IndexedTaggedWord, IndexedTaggedWord> com$johnsnowlabs$nlp$annotators$ws$WordSegmenterModel$$processTags() {
        return this.com$johnsnowlabs$nlp$annotators$ws$WordSegmenterModel$$processTags;
    }

    @Override // com.johnsnowlabs.nlp.HasOutputAnnotatorType
    public String outputAnnotatorType() {
        return this.outputAnnotatorType;
    }

    @Override // com.johnsnowlabs.nlp.HasInputAnnotationCols
    public String[] inputAnnotatorTypes() {
        return this.inputAnnotatorTypes;
    }

    public WordSegmenterModel(String str) {
        this.uid = str;
        PerceptronUtils.Cclass.$init$(this);
        PerceptronPredictionUtils.Cclass.$init$(this);
        this.model = new StructFeature<>(this, "POS Model", ClassTag$.MODULE$.apply(AveragedPerceptron.class));
        this.com$johnsnowlabs$nlp$annotators$ws$WordSegmenterModel$$processTags = new WordSegmenterModel$$anonfun$13(this);
        this.outputAnnotatorType = AnnotatorType$.MODULE$.TOKEN();
        this.inputAnnotatorTypes = new String[]{AnnotatorType$.MODULE$.DOCUMENT()};
    }

    public WordSegmenterModel() {
        this(Identifiable$.MODULE$.randomUID("WORD_SEGMENTER"));
    }
}
