package com.johnsnowlabs.nlp.annotators.tokenizer.wordpiece;

import com.johnsnowlabs.nlp.annotators.common.IndexedToken;
import com.johnsnowlabs.nlp.annotators.common.TokenPiece;
import scala.Option;
import scala.Predef$;
import scala.collection.immutable.Map;
import scala.collection.immutable.Nil$;
import scala.collection.mutable.ArrayBuffer;
import scala.collection.mutable.ArrayBuffer$;
import scala.reflect.ClassTag$;
import scala.reflect.ScalaSignature;
import scala.runtime.BoxesRunTime;

/* compiled from: WordpieceEncoder.scala */
@ScalaSignature(bytes = "\u0006\u0001\r4Q!\u0004\b\u0001)iA\u0001\"\t\u0001\u0003\u0002\u0003\u0006Ia\t\u0005\ti\u0001\u0011\t\u0011)A\u0005]!AQ\u0007\u0001B\u0001B\u0003%\u0011\u0007\u0003\u00057\u0001\t\u0005\t\u0015!\u0003/\u0011\u00159\u0004\u0001\"\u00019\u0011\u0015y\u0004\u0001\"\u0001A\u000f!ye\"!A\t\u0002Q\u0001f\u0001C\u0007\u000f\u0003\u0003E\t\u0001F)\t\u000b]BA\u0011\u0001*\t\u000fMC\u0011\u0013!C\u0001)\"9q\fCI\u0001\n\u0003\u0001\u0007b\u00022\t#\u0003%\t\u0001\u0016\u0002\u0011/>\u0014H\r]5fG\u0016,enY8eKJT!a\u0004\t\u0002\u0013]|'\u000f\u001a9jK\u000e,'BA\t\u0013\u0003%!xn[3oSj,'O\u0003\u0002\u0014)\u0005Q\u0011M\u001c8pi\u0006$xN]:\u000b\u0005U1\u0012a\u00018ma*\u0011q\u0003G\u0001\rU>Dgn\u001d8po2\f'm\u001d\u0006\u00023\u0005\u00191m\\7\u0014\u0005\u0001Y\u0002C\u0001\u000f \u001b\u0005i\"\"\u0001\u0010\u0002\u000bM\u001c\u0017\r\\1\n\u0005\u0001j\"AB!osJ+g-\u0001\u0006w_\u000e\f'-\u001e7bef\u001c\u0001\u0001\u0005\u0003%W9\ndBA\u0013*!\t1S$D\u0001(\u0015\tA#%\u0001\u0004=e>|GOP\u0005\u0003Uu\ta\u0001\u0015:fI\u00164\u0017B\u0001\u0017.\u0005\ri\u0015\r\u001d\u0006\u0003Uu\u0001\"\u0001J\u0018\n\u0005Aj#AB*ue&tw\r\u0005\u0002\u001de%\u00111'\b\u0002\u0004\u0013:$\u0018\u0001C;oWR{7.\u001a8\u0002)5\f\u00070\u00138qkR\u001c\u0005.\u0019:t!\u0016\u0014xk\u001c:e\u0003)\u0001\u0018M\u001d;Qe\u00164\u0017\u000e_\u0001\u0007y%t\u0017\u000e\u001e \u0015\u000beZD(\u0010 \u0011\u0005i\u0002Q\"\u0001\b\t\u000b\u0005*\u0001\u0019A\u0012\t\u000fQ*\u0001\u0013!a\u0001]!9Q'\u0002I\u0001\u0002\u0004\t\u0004b\u0002\u001c\u0006!\u0003\u0005\rAL\u0001\u0007K:\u001cw\u000eZ3\u0015\u0005\u0005S\u0005c\u0001\u000fC\t&\u00111)\b\u0002\u0006\u0003J\u0014\u0018-\u001f\t\u0003\u000b\"k\u0011A\u0012\u0006\u0003\u000fJ\taaY8n[>t\u0017BA%G\u0005)!vn[3o!&,7-\u001a\u0005\u0006\u0017\u001a\u0001\r\u0001T\u0001\u0006i>\\WM\u001c\t\u0003\u000b6K!A\u0014$\u0003\u0019%sG-\u001a=fIR{7.\u001a8\u0002!]{'\u000f\u001a9jK\u000e,WI\\2pI\u0016\u0014\bC\u0001\u001e\t'\tA1\u0004F\u0001Q\u0003m!C.Z:tS:LG\u000fJ4sK\u0006$XM\u001d\u0013eK\u001a\fW\u000f\u001c;%eU\tQK\u000b\u0002/-.\nq\u000b\u0005\u0002Y;6\t\u0011L\u0003\u0002[7\u0006IQO\\2iK\u000e\\W\r\u001a\u0006\u00039v\t!\"\u00198o_R\fG/[8o\u0013\tq\u0016LA\tv]\u000eDWmY6fIZ\u000b'/[1oG\u0016\f1\u0004\n7fgNLg.\u001b;%OJ,\u0017\r^3sI\u0011,g-Y;mi\u0012\u001aT#A1+\u0005E2\u0016a\u0007\u0013mKN\u001c\u0018N\\5uI\u001d\u0014X-\u0019;fe\u0012\"WMZ1vYR$C\u0007")
/* loaded from: input_file:com/johnsnowlabs/nlp/annotators/tokenizer/wordpiece/WordpieceEncoder.class */
public class WordpieceEncoder {
    private final Map<String, Object> vocabulary;
    private final String unkToken;
    private final int maxInputCharsPerWord;
    private final String partPrefix;

    public TokenPiece[] encode(IndexedToken indexedToken) {
        int unboxToInt = BoxesRunTime.unboxToInt(this.vocabulary.apply(this.unkToken));
        if (indexedToken.token().length() > this.maxInputCharsPerWord) {
            return new TokenPiece[]{new TokenPiece(this.unkToken, indexedToken.token(), unboxToInt, true, indexedToken.begin(), indexedToken.end())};
        }
        ArrayBuffer apply = ArrayBuffer$.MODULE$.apply(Nil$.MODULE$);
        String str = indexedToken.token();
        int i = 0;
        int length = str.length();
        while (length > i && i < str.length()) {
            String sb = new StringBuilder(0).append(i > 0 ? this.partPrefix : "").append(str.substring(i, length)).toString();
            Option option = this.vocabulary.get(sb);
            if (option.nonEmpty()) {
                apply.append(Predef$.MODULE$.wrapRefArray(new TokenPiece[]{new TokenPiece(sb, indexedToken.token(), BoxesRunTime.unboxToInt(option.get()), i == 0, indexedToken.begin() + i, (indexedToken.begin() + length) - 1)}));
                i = length;
                length = str.length();
            } else {
                length--;
                if (length == i) {
                    return new TokenPiece[]{new TokenPiece(this.unkToken, indexedToken.token(), unboxToInt, true, indexedToken.begin(), indexedToken.end())};
                }
            }
        }
        return (TokenPiece[]) apply.toArray(ClassTag$.MODULE$.apply(TokenPiece.class));
    }

    public WordpieceEncoder(Map<String, Object> map, String str, int i, String str2) {
        this.vocabulary = map;
        this.unkToken = str;
        this.maxInputCharsPerWord = i;
        this.partPrefix = str2;
        Predef$.MODULE$.require(map.contains(str), () -> {
            return new StringBuilder(30).append("token ").append(this.unkToken).append(" not found in vocabulary").toString();
        });
    }
}
