package com.johnsnowlabs.nlp.annotators;

import com.johnsnowlabs.nlp.Annotation;
import com.johnsnowlabs.nlp.AnnotatorModel;
import com.johnsnowlabs.nlp.AnnotatorType$;
import com.johnsnowlabs.nlp.HasSimpleAnnotate;
import com.johnsnowlabs.nlp.annotators.common.IndexedToken;
import com.johnsnowlabs.nlp.annotators.common.Sentence;
import com.johnsnowlabs.nlp.annotators.common.SentenceSplit$;
import com.johnsnowlabs.nlp.annotators.common.TokenizedSentence;
import com.johnsnowlabs.nlp.annotators.common.TokenizedWithSentence$;
import com.johnsnowlabs.nlp.serialization.StructFeature;
import com.johnsnowlabs.nlp.util.regex.RuleFactory;
import org.apache.spark.ml.param.BooleanParam;
import org.apache.spark.ml.param.IntParam;
import org.apache.spark.ml.param.Param;
import org.apache.spark.ml.param.ParamPair;
import org.apache.spark.ml.param.StringArrayParam;
import org.apache.spark.ml.util.Identifiable$;
import org.apache.spark.ml.util.MLReader;
import org.apache.spark.sql.SparkSession;
import org.apache.spark.sql.expressions.UserDefinedFunction;
import scala.Array$;
import scala.Function3;
import scala.Predef$;
import scala.Some;
import scala.collection.Seq;
import scala.collection.Seq$;
import scala.collection.SeqLike;
import scala.collection.immutable.IndexedSeq;
import scala.collection.immutable.IndexedSeq$;
import scala.collection.immutable.StringOps;
import scala.collection.immutable.StringOps$;
import scala.collection.mutable.ArrayOps;
import scala.reflect.ClassTag$;
import scala.reflect.ScalaSignature;
import scala.runtime.BoxedUnit;
import scala.runtime.BoxesRunTime;
import scala.runtime.IntRef;
import scala.runtime.LazyRef;
import scala.runtime.RichInt$;
import scala.util.matching.Regex;

/* compiled from: TokenizerModel.scala */
@ScalaSignature(bytes = "\u0006\u0001\tub\u0001\u0002\u001c8\u0001\u0001C\u0001B\u0013\u0001\u0003\u0006\u0004%\te\u0013\u0005\t3\u0002\u0011\t\u0011)A\u0005\u0019\")!\f\u0001C\u00017\"9Q\f\u0001b\u0001\n\u0003q\u0006BB7\u0001A\u0003%q\fC\u0004o\u0001\t\u0007I\u0011A8\t\ry\u0004\u0001\u0015!\u0003q\u0011!y\bA1A\u0005\u0002\u0005\u0005\u0001\u0002CA\u0005\u0001\u0001\u0006I!a\u0001\t\u0013\u0005-\u0001A1A\u0005\u0002\u00055\u0001\u0002CA\u000b\u0001\u0001\u0006I!a\u0004\t\u0013\u0005]\u0001A1A\u0005\u0002\u0005e\u0001\u0002CA\u0011\u0001\u0001\u0006I!a\u0007\t\u0013\u0005\r\u0002A1A\u0005\u0002\u0005e\u0001\u0002CA\u0013\u0001\u0001\u0006I!a\u0007\t\u0011\u0005\u001d\u0002A1A\u0005\u0002=Dq!!\u000b\u0001A\u0003%\u0001\u000fC\u0005\u0002,\u0001\u0011\r\u0011\"\u0001\u0002\u000e!A\u0011Q\u0006\u0001!\u0002\u0013\ty\u0001C\u0005\u00020\u0001\u0011\r\u0011\"\u0011\u00022!A\u0011q\b\u0001!\u0002\u0013\t\u0019\u0004C\u0005\u0002B\u0001\u0011\r\u0011\"\u0011\u0002D!A\u0011Q\n\u0001!\u0002\u0013\t)\u0005\u0003\u0004[\u0001\u0011\u0005\u0011q\n\u0005\b\u0003#\u0002A\u0011AA*\u0011\u0019\tI\u0006\u0001C\u0001\u0017\"9\u00111\f\u0001\u0005\u0002\u0005u\u0003BBA1\u0001\u0011\u00051\nC\u0004\u0002d\u0001!\t!!\u001a\t\u000f\u0005-\u0004\u0001\"\u0001\u0002n!9\u0011q\u000e\u0001\u0005\u0002\u0005E\u0004bBA<\u0001\u0011\u0005\u0011\u0011\u0010\u0005\b\u0003\u0007\u0003A\u0011AAC\u0011\u001d\tI\t\u0001C\u0001\u0003\u0017Cq!!&\u0001\t\u0003\t9\nC\u0004\u0002\u001c\u0002!\t!!(\t\u000f\u0005\u0005\u0006\u0001\"\u0001\u0002$\"9\u0011q\u0015\u0001\u0005\u0002\u0005%\u0006bBAX\u0001\u0011\u0005\u0011\u0011\u0017\u0005\b\u0003k\u0003A\u0011AA7\u0011%\t9\f\u0001b\u0001\n\u0013\tI\f\u0003\u0005\u0002J\u0002\u0001\u000b\u0011BA^\u0011%\tY\r\u0001b\u0001\n\u0013\tI\f\u0003\u0005\u0002N\u0002\u0001\u000b\u0011BA^\u0011)\ty\r\u0001EC\u0002\u0013%\u0011\u0011\u0018\u0005\u000b\u0003#\u0004\u0001R1A\u0005\n\u0005e\u0006bBAj\u0001\u0011%\u0011Q\u001b\u0005\b\u00037\u0004A\u0011AAo\u0011\u001d\u0011I\u0001\u0001C!\u0005\u00179qA!\u00078\u0011\u0003\u0011YB\u0002\u00047o!\u0005!Q\u0004\u0005\u00075N\"\tA!\r\t\u0013\tM2'!A\u0005\n\tU\"A\u0004+pW\u0016t\u0017N_3s\u001b>$W\r\u001c\u0006\u0003qe\n!\"\u00198o_R\fGo\u001c:t\u0015\tQ4(A\u0002oYBT!\u0001P\u001f\u0002\u0019)|\u0007N\\:o_^d\u0017MY:\u000b\u0003y\n1aY8n\u0007\u0001\u00192\u0001A!H!\r\u00115)R\u0007\u0002s%\u0011A)\u000f\u0002\u000f\u0003:tw\u000e^1u_Jlu\u000eZ3m!\t1\u0005!D\u00018!\r\u0011\u0005*R\u0005\u0003\u0013f\u0012\u0011\u0003S1t'&l\u0007\u000f\\3B]:|G/\u0019;f\u0003\r)\u0018\u000eZ\u000b\u0002\u0019B\u0011QJ\u0016\b\u0003\u001dR\u0003\"a\u0014*\u000e\u0003AS!!U \u0002\rq\u0012xn\u001c;?\u0015\u0005\u0019\u0016!B:dC2\f\u0017BA+S\u0003\u0019\u0001&/\u001a3fM&\u0011q\u000b\u0017\u0002\u0007'R\u0014\u0018N\\4\u000b\u0005U\u0013\u0016\u0001B;jI\u0002\na\u0001P5oSRtDCA#]\u0011\u0015Q5\u00011\u0001M\u0003\u0015\u0011X\u000f\\3t+\u0005y\u0006c\u00011dK6\t\u0011M\u0003\u0002cs\u0005i1/\u001a:jC2L'0\u0019;j_:L!\u0001Z1\u0003\u001bM#(/^2u\r\u0016\fG/\u001e:f!\t17.D\u0001h\u0015\tA\u0017.A\u0003sK\u001e,\u0007P\u0003\u0002ks\u0005!Q\u000f^5m\u0013\tawMA\u0006Sk2,g)Y2u_JL\u0018A\u0002:vY\u0016\u001c\b%\u0001\u0006fq\u000e,\u0007\u000f^5p]N,\u0012\u0001\u001d\t\u0003crl\u0011A\u001d\u0006\u0003gR\fQ\u0001]1sC6T!!\u001e<\u0002\u00055d'BA<y\u0003\u0015\u0019\b/\u0019:l\u0015\tI(0\u0001\u0004ba\u0006\u001c\u0007.\u001a\u0006\u0002w\u0006\u0019qN]4\n\u0005u\u0014(\u0001E*ue&tw-\u0011:sCf\u0004\u0016M]1n\u0003-)\u0007pY3qi&|gn\u001d\u0011\u0002/\r\f7/Z*f]NLG/\u001b<f\u000bb\u001cW\r\u001d;j_:\u001cXCAA\u0002!\r\t\u0018QA\u0005\u0004\u0003\u000f\u0011(\u0001\u0004\"p_2,\u0017M\u001c)be\u0006l\u0017\u0001G2bg\u0016\u001cVM\\:ji&4X-\u0012=dKB$\u0018n\u001c8tA\u0005iA/\u0019:hKR\u0004\u0016\r\u001e;fe:,\"!a\u0004\u0011\tE\f\t\u0002T\u0005\u0004\u0003'\u0011(!\u0002)be\u0006l\u0017A\u0004;be\u001e,G\u000fU1ui\u0016\u0014h\u000eI\u0001\n[&tG*\u001a8hi\",\"!a\u0007\u0011\u0007E\fi\"C\u0002\u0002 I\u0014\u0001\"\u00138u!\u0006\u0014\u0018-\\\u0001\u000b[&tG*\u001a8hi\"\u0004\u0013!C7bq2+gn\u001a;i\u0003)i\u0017\r\u001f'f]\u001e$\b\u000eI\u0001\u000bgBd\u0017\u000e^\"iCJ\u001c\u0018aC:qY&$8\t[1sg\u0002\nAb\u001d9mSR\u0004\u0016\r\u001e;fe:\fQb\u001d9mSR\u0004\u0016\r\u001e;fe:\u0004\u0013aE8viB,H/\u00118o_R\fGo\u001c:UsB,WCAA\u001a!\u0011\t)$a\u000e\u000e\u0003\u0001IA!!\u000f\u0002<\ti\u0011I\u001c8pi\u0006$xN\u001d+za\u0016L1!!\u0010:\u0005YA\u0015m](viB,H/\u00118o_R\fGo\u001c:UsB,\u0017\u0001F8viB,H/\u00118o_R\fGo\u001c:UsB,\u0007%A\nj]B,H/\u00118o_R\fGo\u001c:UsB,7/\u0006\u0002\u0002FA1\u0011qIA%\u0003gi\u0011AU\u0005\u0004\u0003\u0017\u0012&!B!se\u0006L\u0018\u0001F5oaV$\u0018I\u001c8pi\u0006$xN\u001d+za\u0016\u001c\b\u0005F\u0001F\u0003A\u0019X\r\u001e+be\u001e,G\u000fU1ui\u0016\u0014h\u000e\u0006\u0003\u00026\u0005U\u0003BBA,3\u0001\u0007A*A\u0003wC2,X-\u0001\thKR$\u0016M]4fiB\u000bG\u000f^3s]\u0006y1/\u001a;Ta2LG\u000fU1ui\u0016\u0014h\u000e\u0006\u0003\u00026\u0005}\u0003BBA,7\u0001\u0007A*A\bhKR\u001c\u0006\u000f\\5u!\u0006$H/\u001a:o\u00035\u0019X\r^#yG\u0016\u0004H/[8ogR!\u0011QGA4\u0011\u001d\t9&\ba\u0001\u0003S\u0002R!a\u0012\u0002J1\u000bQbZ3u\u000bb\u001cW\r\u001d;j_:\u001cXCAA5\u0003!\u0019X\r\u001e*vY\u0016\u001cH\u0003BA\u001b\u0003gBa!!\u001e \u0001\u0004)\u0017a\u0003:vY\u00164\u0015m\u0019;pef\f!d]3u\u0007\u0006\u001cXmU3og&$\u0018N^3Fq\u000e,\u0007\u000f^5p]N$B!!\u000e\u0002|!9\u0011q\u000b\u0011A\u0002\u0005u\u0004\u0003BA$\u0003\u007fJ1!!!S\u0005\u001d\u0011un\u001c7fC:\f!dZ3u\u0007\u0006\u001cXmU3og&$\u0018N^3Fq\u000e,\u0007\u000f^5p]N$B!! \u0002\b\"9\u0011qK\u0011A\u0002\u0005u\u0014\u0001D:fi6Kg\u000eT3oORDG\u0003BA\u001b\u0003\u001bCq!a\u0016#\u0001\u0004\ty\t\u0005\u0003\u0002H\u0005E\u0015bAAJ%\n\u0019\u0011J\u001c;\u0002\u0019\u001d,G/T5o\u0019\u0016tw\r\u001e5\u0015\t\u0005=\u0015\u0011\u0014\u0005\b\u0003/\u001a\u0003\u0019AAH\u00031\u0019X\r^'bq2+gn\u001a;i)\u0011\t)$a(\t\u000f\u0005]C\u00051\u0001\u0002\u0010\u0006aq-\u001a;NCbdUM\\4uQR!\u0011qRAS\u0011\u001d\t9&\na\u0001\u0003\u001f\u000bQb]3u'Bd\u0017\u000e^\"iCJ\u001cH\u0003BA\u001b\u0003WCq!!,'\u0001\u0004\tI'A\u0001w\u00035\tG\rZ*qY&$8\t[1sgR!\u0011QGAZ\u0011\u0019\tik\na\u0001\u0019\u0006iq-\u001a;Ta2LGo\u00115beN\fA\u0002\u0015*P)\u0016\u001bEkX\"I\u0003J+\"!a/\u0011\t\u0005u\u0016qY\u0007\u0003\u0003\u007fSA!!1\u0002D\u0006!A.\u00198h\u0015\t\t)-\u0001\u0003kCZ\f\u0017bA,\u0002@\u0006i\u0001KU(U\u000b\u000e#vl\u0011%B%\u0002\n!B\u0011*F\u0003.{6\tS!S\u0003-\u0011%+R!L?\u000eC\u0015I\u0015\u0011\u0002\u001b\t\u0013V)Q&`!\u0006#F+\u0012*O\u00035\u0019\u0006\u000bT%U?B\u000bE\u000bV#S\u001d\u0006\u00012-Y:fI6\u000bGo\u00195Fq&\u001cHo\u001d\u000b\u0005\u0003{\n9\u000e\u0003\u0004\u0002Z>\u0002\r\u0001T\u0001\u0011G\u0006tG-\u001b3bi\u0016l\u0015\r^2iK\u0012\f1\u0001^1h)\u0011\ty.!@\u0011\r\u0005\u0005\u00181^Ay\u001d\u0011\t\u0019/a:\u000f\u0007=\u000b)/C\u0001T\u0013\r\tIOU\u0001\ba\u0006\u001c7.Y4f\u0013\u0011\ti/a<\u0003\u0007M+\u0017OC\u0002\u0002jJ\u0003B!a=\u0002z6\u0011\u0011Q\u001f\u0006\u0004\u0003o<\u0014AB2p[6|g.\u0003\u0003\u0002|\u0006U(!\u0005+pW\u0016t\u0017N_3e'\u0016tG/\u001a8dK\"9\u0011q \u0019A\u0002\t\u0005\u0011!C:f]R,gnY3t!\u0019\t\t/a;\u0003\u0004A!\u00111\u001fB\u0003\u0013\u0011\u00119!!>\u0003\u0011M+g\u000e^3oG\u0016\f\u0001\"\u00198o_R\fG/\u001a\u000b\u0005\u0005\u001b\u0011)\u0002\u0005\u0004\u0002b\u0006-(q\u0002\t\u0004\u0005\nE\u0011b\u0001B\ns\tQ\u0011I\u001c8pi\u0006$\u0018n\u001c8\t\u000f\t]\u0011\u00071\u0001\u0003\u000e\u0005Y\u0011M\u001c8pi\u0006$\u0018n\u001c8t\u00039!vn[3oSj,'/T8eK2\u0004\"AR\u001a\u0014\u000fM\u0012yB!\n\u0003,A!\u0011q\tB\u0011\u0013\r\u0011\u0019C\u0015\u0002\u0007\u0003:L(+\u001a4\u0011\u0007\u0019\u00139#C\u0002\u0003*]\u00121DU3bI\u0006\u0014G.\u001a)sKR\u0014\u0018-\u001b8fIR{7.\u001a8ju\u0016\u0014\b\u0003BA$\u0005[I1Aa\fS\u00051\u0019VM]5bY&T\u0018M\u00197f)\t\u0011Y\"A\u0006sK\u0006$'+Z:pYZ,GC\u0001B\u001c!\u0011\tiL!\u000f\n\t\tm\u0012q\u0018\u0002\u0007\u001f\nTWm\u0019;")
/* loaded from: input_file:com/johnsnowlabs/nlp/annotators/TokenizerModel.class */
public class TokenizerModel extends AnnotatorModel<TokenizerModel> implements HasSimpleAnnotate<TokenizerModel> {
    private String BREAK_PATTERN;
    private String SPLIT_PATTERN;
    private final String uid;
    private final StructFeature<RuleFactory> rules;
    private final StringArrayParam exceptions;
    private final BooleanParam caseSensitiveExceptions;
    private final Param<String> targetPattern;
    private final IntParam minLength;
    private final IntParam maxLength;
    private final StringArrayParam splitChars;
    private final Param<String> splitPattern;
    private final String outputAnnotatorType;
    private final String[] inputAnnotatorTypes;
    private final String PROTECT_CHAR;
    private final String BREAK_CHAR;
    private volatile byte bitmap$0;

    public static TokenizerModel pretrained(String str, String str2, String str3) {
        return TokenizerModel$.MODULE$.mo115pretrained(str, str2, str3);
    }

    public static TokenizerModel pretrained(String str, String str2) {
        return TokenizerModel$.MODULE$.mo116pretrained(str, str2);
    }

    public static TokenizerModel pretrained(String str) {
        return TokenizerModel$.MODULE$.mo117pretrained(str);
    }

    public static TokenizerModel pretrained() {
        return TokenizerModel$.MODULE$.mo118pretrained();
    }

    public static Some<String> defaultModelName() {
        return TokenizerModel$.MODULE$.mo119defaultModelName();
    }

    public static String defaultLoc() {
        return TokenizerModel$.MODULE$.defaultLoc();
    }

    public static String defaultLang() {
        return TokenizerModel$.MODULE$.defaultLang();
    }

    public static MLReader<TokenizerModel> read() {
        return TokenizerModel$.MODULE$.read();
    }

    public static void addReader(Function3<TokenizerModel, String, SparkSession, BoxedUnit> function3) {
        TokenizerModel$.MODULE$.addReader(function3);
    }

    public static Object load(String str) {
        return TokenizerModel$.MODULE$.load(str);
    }

    @Override // com.johnsnowlabs.nlp.HasSimpleAnnotate
    public UserDefinedFunction dfAnnotate() {
        UserDefinedFunction dfAnnotate;
        dfAnnotate = dfAnnotate();
        return dfAnnotate;
    }

    public String uid() {
        return this.uid;
    }

    public StructFeature<RuleFactory> rules() {
        return this.rules;
    }

    public StringArrayParam exceptions() {
        return this.exceptions;
    }

    public BooleanParam caseSensitiveExceptions() {
        return this.caseSensitiveExceptions;
    }

    public Param<String> targetPattern() {
        return this.targetPattern;
    }

    public IntParam minLength() {
        return this.minLength;
    }

    public IntParam maxLength() {
        return this.maxLength;
    }

    public StringArrayParam splitChars() {
        return this.splitChars;
    }

    public Param<String> splitPattern() {
        return this.splitPattern;
    }

    public String outputAnnotatorType() {
        return this.outputAnnotatorType;
    }

    public String[] inputAnnotatorTypes() {
        return this.inputAnnotatorTypes;
    }

    public TokenizerModel setTargetPattern(String str) {
        return (TokenizerModel) set(targetPattern(), str);
    }

    public String getTargetPattern() {
        return (String) $(targetPattern());
    }

    public TokenizerModel setSplitPattern(String str) {
        return (TokenizerModel) set(splitPattern(), str);
    }

    public String getSplitPattern() {
        return (String) $(splitPattern());
    }

    public TokenizerModel setExceptions(String[] strArr) {
        return (TokenizerModel) set((Param) exceptions(), (Object) strArr);
    }

    public String[] getExceptions() {
        return (String[]) $(exceptions());
    }

    public TokenizerModel setRules(RuleFactory ruleFactory) {
        return (TokenizerModel) set((StructFeature<StructFeature<RuleFactory>>) rules(), (StructFeature<RuleFactory>) ruleFactory);
    }

    public TokenizerModel setCaseSensitiveExceptions(boolean z) {
        return (TokenizerModel) set((Param) caseSensitiveExceptions(), (Object) BoxesRunTime.boxToBoolean(z));
    }

    public boolean getCaseSensitiveExceptions(boolean z) {
        return BoxesRunTime.unboxToBoolean($(caseSensitiveExceptions()));
    }

    public TokenizerModel setMinLength(int i) {
        return (TokenizerModel) set((Param) minLength(), (Object) BoxesRunTime.boxToInteger(i));
    }

    public int getMinLength(int i) {
        return BoxesRunTime.unboxToInt($(minLength()));
    }

    public TokenizerModel setMaxLength(int i) {
        return (TokenizerModel) set((Param) maxLength(), (Object) BoxesRunTime.boxToInteger(i));
    }

    public int getMaxLength(int i) {
        return BoxesRunTime.unboxToInt($(maxLength()));
    }

    public TokenizerModel setSplitChars(String[] strArr) {
        Predef$.MODULE$.require(new ArrayOps.ofRef(Predef$.MODULE$.refArrayOps(strArr)).forall(str -> {
            return BoxesRunTime.boxToBoolean($anonfun$setSplitChars$1(str));
        }), () -> {
            return "All elements in context chars must have length == 1";
        });
        return (TokenizerModel) set((Param) splitChars(), (Object) strArr);
    }

    public TokenizerModel addSplitChars(String str) {
        boolean z;
        Predef$ predef$ = Predef$.MODULE$;
        if (str.length() != 1) {
            if (str.length() == 2) {
                String substring = str.substring(0, 1);
                if (substring != null) {
                }
                predef$.require(z, () -> {
                    return "Context char must have length == 1";
                });
                return (TokenizerModel) set((Param) splitChars(), new ArrayOps.ofRef(Predef$.MODULE$.refArrayOps((Object[]) get((Param) splitChars()).getOrElse(() -> {
                    return (String[]) Array$.MODULE$.empty(ClassTag$.MODULE$.apply(String.class));
                }))).$colon$plus(str, ClassTag$.MODULE$.apply(String.class)));
            }
            z = false;
            predef$.require(z, () -> {
                return "Context char must have length == 1";
            });
            return (TokenizerModel) set((Param) splitChars(), new ArrayOps.ofRef(Predef$.MODULE$.refArrayOps((Object[]) get((Param) splitChars()).getOrElse(() -> {
                return (String[]) Array$.MODULE$.empty(ClassTag$.MODULE$.apply(String.class));
            }))).$colon$plus(str, ClassTag$.MODULE$.apply(String.class)));
        }
        z = true;
        predef$.require(z, () -> {
            return "Context char must have length == 1";
        });
        return (TokenizerModel) set((Param) splitChars(), new ArrayOps.ofRef(Predef$.MODULE$.refArrayOps((Object[]) get((Param) splitChars()).getOrElse(() -> {
            return (String[]) Array$.MODULE$.empty(ClassTag$.MODULE$.apply(String.class));
        }))).$colon$plus(str, ClassTag$.MODULE$.apply(String.class)));
    }

    public String[] getSplitChars() {
        return (String[]) $(splitChars());
    }

    private String PROTECT_CHAR() {
        return this.PROTECT_CHAR;
    }

    private String BREAK_CHAR() {
        return this.BREAK_CHAR;
    }

    /* JADX WARN: Multi-variable type inference failed */
    /* JADX WARN: Type inference failed for: r0v0 */
    /* JADX WARN: Type inference failed for: r0v1, types: [java.lang.Throwable] */
    /* JADX WARN: Type inference failed for: r0v10, types: [com.johnsnowlabs.nlp.annotators.TokenizerModel] */
    private String BREAK_PATTERN$lzycompute() {
        ?? r0 = this;
        synchronized (r0) {
            if (((byte) (this.bitmap$0 & 1)) == 0) {
                this.BREAK_PATTERN = new StringBuilder(7).append("[^(?:").append($(targetPattern())).append(")").append(PROTECT_CHAR()).append("]").toString();
                r0 = this;
                r0.bitmap$0 = (byte) (this.bitmap$0 | 1);
            }
        }
        return this.BREAK_PATTERN;
    }

    private String BREAK_PATTERN() {
        return ((byte) (this.bitmap$0 & 1)) == 0 ? BREAK_PATTERN$lzycompute() : this.BREAK_PATTERN;
    }

    /* JADX WARN: Multi-variable type inference failed */
    /* JADX WARN: Type inference failed for: r0v0 */
    /* JADX WARN: Type inference failed for: r0v1, types: [java.lang.Throwable] */
    /* JADX WARN: Type inference failed for: r0v10, types: [com.johnsnowlabs.nlp.annotators.TokenizerModel] */
    private String SPLIT_PATTERN$lzycompute() {
        ?? r0 = this;
        synchronized (r0) {
            if (((byte) (this.bitmap$0 & 2)) == 0) {
                this.SPLIT_PATTERN = new StringBuilder(4).append("[^").append(BREAK_CHAR()).append("]+").toString();
                r0 = this;
                r0.bitmap$0 = (byte) (this.bitmap$0 | 2);
            }
        }
        return this.SPLIT_PATTERN;
    }

    private String SPLIT_PATTERN() {
        return ((byte) (this.bitmap$0 & 2)) == 0 ? SPLIT_PATTERN$lzycompute() : this.SPLIT_PATTERN;
    }

    private boolean casedMatchExists(String str) {
        return BoxesRunTime.unboxToBoolean($(caseSensitiveExceptions())) ? new ArrayOps.ofRef(Predef$.MODULE$.refArrayOps((Object[]) $(exceptions()))).exists(str2 -> {
            return BoxesRunTime.boxToBoolean($anonfun$casedMatchExists$1(str, str2));
        }) : new ArrayOps.ofRef(Predef$.MODULE$.refArrayOps((Object[]) $(exceptions()))).exists(str3 -> {
            return BoxesRunTime.boxToBoolean($anonfun$casedMatchExists$2(str, str3));
        });
    }

    public Seq<TokenizedSentence> tag(Seq<Sentence> seq) {
        LazyRef lazyRef = new LazyRef();
        return (Seq) seq.map(sentence -> {
            return new TokenizedSentence((IndexedToken[]) new StringOps(Predef$.MODULE$.augmentString(this.SPLIT_PATTERN())).r().findAllMatchIn(((String) this.get((Param) this.exceptions()).map(strArr -> {
                return (String) new ArrayOps.ofRef(Predef$.MODULE$.refArrayOps(strArr)).foldRight(sentence.content(), (str, str2) -> {
                    return new StringOps(Predef$.MODULE$.augmentString(BoxesRunTime.unboxToBoolean(this.$(this.caseSensitiveExceptions())) ? str : new StringBuilder(4).append("(?i)").append(str).toString())).r().replaceAllIn(str2, match -> {
                        return match.matched().replaceAll(this.BREAK_PATTERN(), this.PROTECT_CHAR());
                    });
                });
            }).getOrElse(() -> {
                return sentence.content();
            })).replaceAll(this.BREAK_PATTERN(), this.BREAK_CHAR())).flatMap(match -> {
                return (this.get((Param) this.exceptions()).isDefined() && (match.matched().contains(this.PROTECT_CHAR()) || this.casedMatchExists(match.matched()))) ? Seq$.MODULE$.apply(Predef$.MODULE$.wrapRefArray(new IndexedToken[]{new IndexedToken(StringOps$.MODULE$.slice$extension(Predef$.MODULE$.augmentString(sentence.content()), match.start(), match.end()), sentence.start() + match.start(), (sentence.start() + match.end()) - 1)})) : (Seq) ((RuleFactory) this.$$(this.rules())).findMatchFirstOnly(match.matched()).map(ruleMatch -> {
                    IntRef create = IntRef.create(ruleMatch.content().start());
                    return (IndexedSeq) RichInt$.MODULE$.to$extension0(Predef$.MODULE$.intWrapper(1), ruleMatch.content().groupCount()).flatMap(obj -> {
                        return $anonfun$tag$9(this, ruleMatch, sentence, match, create, lazyRef, BoxesRunTime.unboxToInt(obj));
                    }, IndexedSeq$.MODULE$.canBuildFrom());
                }).getOrElse(() -> {
                    return Seq$.MODULE$.apply(Predef$.MODULE$.wrapRefArray(new IndexedToken[]{new IndexedToken(match.matched(), sentence.start() + match.start(), (sentence.start() + match.end()) - 1)}));
                });
            }).filter(indexedToken -> {
                return BoxesRunTime.boxToBoolean($anonfun$tag$13(this, indexedToken));
            }).toArray(ClassTag$.MODULE$.apply(IndexedToken.class)), sentence.index());
        }, Seq$.MODULE$.canBuildFrom());
    }

    public Seq<Annotation> annotate(Seq<Annotation> seq) {
        return TokenizedWithSentence$.MODULE$.pack(tag(SentenceSplit$.MODULE$.unpack(seq)));
    }

    public static final /* synthetic */ boolean $anonfun$setSplitChars$1(String str) {
        if (str.length() != 1) {
            if (str.length() == 2) {
                String substring = str.substring(0, 1);
                if (substring != null ? !substring.equals("\\") : "\\" != 0) {
                }
            }
            return false;
        }
        return true;
    }

    public static final /* synthetic */ boolean $anonfun$casedMatchExists$1(String str, String str2) {
        return new StringOps(Predef$.MODULE$.augmentString(str2)).r().findFirstIn(str).isDefined();
    }

    public static final /* synthetic */ boolean $anonfun$casedMatchExists$2(String str, String str2) {
        return new StringOps(Predef$.MODULE$.augmentString(new StringBuilder(4).append("(?i)").append(str2).toString())).r().findFirstIn(str).isDefined();
    }

    private final /* synthetic */ String[] splitCharsExists$lzycompute$1(LazyRef lazyRef) {
        String[] strArr;
        synchronized (lazyRef) {
            strArr = lazyRef.initialized() ? (String[]) lazyRef.value() : (String[]) lazyRef.initialize(new ArrayOps.ofRef(Predef$.MODULE$.refArrayOps((Object[]) $(splitChars()))).map(str -> {
                return new StringOps(Predef$.MODULE$.augmentString(str)).last().toString();
            }, Array$.MODULE$.canBuildFrom(ClassTag$.MODULE$.apply(String.class))));
        }
        return strArr;
    }

    private final String[] splitCharsExists$1(LazyRef lazyRef) {
        return lazyRef.initialized() ? (String[]) lazyRef.value() : splitCharsExists$lzycompute$1(lazyRef);
    }

    public static final /* synthetic */ SeqLike $anonfun$tag$9(TokenizerModel tokenizerModel, RuleFactory.RuleMatch ruleMatch, Sentence sentence, Regex.Match match, IntRef intRef, LazyRef lazyRef, int i) {
        String group = ruleMatch.content().group(i);
        boolean z = tokenizerModel.isSet(tokenizerModel.splitPattern()) && new ArrayOps.ofRef(Predef$.MODULE$.refArrayOps(group.split((String) tokenizerModel.$(tokenizerModel.splitPattern())))).size() > 1;
        boolean z2 = tokenizerModel.isSet(tokenizerModel.splitChars()) && new ArrayOps.ofRef(Predef$.MODULE$.refArrayOps(tokenizerModel.splitCharsExists$1(lazyRef))).exists(charSequence -> {
            return BoxesRunTime.boxToBoolean(group.contains(charSequence));
        });
        if (new StringOps(Predef$.MODULE$.augmentString(group)).nonEmpty() && (z || z2)) {
            try {
                return new ArrayOps.ofRef(Predef$.MODULE$.refArrayOps((Object[]) new ArrayOps.ofRef(Predef$.MODULE$.refArrayOps(z ? group.split((String) tokenizerModel.$(tokenizerModel.splitPattern())) : group.split(new ArrayOps.ofRef(Predef$.MODULE$.refArrayOps((Object[]) tokenizerModel.$(tokenizerModel.splitChars()))).mkString("|")))).map(str -> {
                    try {
                        return new IndexedToken(str, sentence.start() + match.start() + intRef.elem, (((sentence.start() + match.start()) + intRef.elem) + str.length()) - 1);
                    } finally {
                        intRef.elem += str.length() + 1;
                    }
                }, Array$.MODULE$.canBuildFrom(ClassTag$.MODULE$.apply(IndexedToken.class)))));
            } finally {
                intRef.elem--;
            }
        }
        IndexedToken indexedToken = new IndexedToken(group, sentence.start() + match.start() + intRef.elem, (((sentence.start() + match.start()) + intRef.elem) + group.length()) - 1);
        intRef.elem += group.length();
        return Seq$.MODULE$.apply(Predef$.MODULE$.wrapRefArray(new IndexedToken[]{indexedToken}));
    }

    public static final /* synthetic */ boolean $anonfun$tag$13(TokenizerModel tokenizerModel, IndexedToken indexedToken) {
        return new StringOps(Predef$.MODULE$.augmentString(indexedToken.token())).nonEmpty() && indexedToken.token().length() >= BoxesRunTime.unboxToInt(tokenizerModel.$(tokenizerModel.minLength())) && tokenizerModel.get((Param) tokenizerModel.maxLength()).forall(i -> {
            return indexedToken.token().length() <= i;
        });
    }

    public TokenizerModel(String str) {
        this.uid = str;
        HasSimpleAnnotate.$init$(this);
        this.rules = new StructFeature<>(this, "rules", ClassTag$.MODULE$.apply(RuleFactory.class));
        this.exceptions = new StringArrayParam(this, "exceptions", "Words that won't be affected by tokenization rules");
        this.caseSensitiveExceptions = new BooleanParam(this, "caseSensitiveExceptions", "Whether to care for case sensitiveness in exceptions");
        this.targetPattern = new Param<>(this, "targetPattern", "pattern to grab from text as token candidates. Defaults \\S+");
        this.minLength = new IntParam(this, "minLength", "Set the minimum allowed length for each token");
        this.maxLength = new IntParam(this, "maxLength", "Set the maximum allowed length for each token");
        this.splitChars = new StringArrayParam(this, "splitChars", "character list used to separate from the inside of tokens");
        this.splitPattern = new Param<>(this, "splitPattern", "pattern to separate from the inside of tokens. takes priority over splitChars.");
        setDefault(Predef$.MODULE$.wrapRefArray(new ParamPair[]{targetPattern().$minus$greater("\\S+"), caseSensitiveExceptions().$minus$greater(BoxesRunTime.boxToBoolean(true))}));
        this.outputAnnotatorType = AnnotatorType$.MODULE$.TOKEN();
        this.inputAnnotatorTypes = new String[]{AnnotatorType$.MODULE$.DOCUMENT()};
        this.PROTECT_CHAR = "ↈ";
        this.BREAK_CHAR = "ↇ";
    }

    public TokenizerModel() {
        this(Identifiable$.MODULE$.randomUID("REGEX_TOKENIZER"));
    }
}
