package cc.factorie.app.nlp.segment;

import cc.factorie.app.nlp.Document;
import cc.factorie.app.nlp.DocumentAnnotator;
import cc.factorie.app.nlp.Token;
import cc.factorie.app.nlp.coref.Mention;
import cc.factorie.app.nlp.phrase.Phrase;
import scala.Predef$;
import scala.StringContext;
import scala.collection.Iterable;
import scala.collection.Seq;
import scala.collection.Seq$;
import scala.collection.TraversableOnce;
import scala.collection.immutable.List$;
import scala.collection.immutable.Nil$;
import scala.collection.immutable.StringOps;
import scala.collection.mutable.ArrayBuffer;
import scala.collection.mutable.StringBuilder;
import scala.reflect.ScalaSignature;
import scala.runtime.BoxedUnit;
import scala.runtime.BoxesRunTime;
import scala.util.matching.Regex;

/* compiled from: DeterministicTokenizer.scala */
@ScalaSignature(bytes = "\u0006\u0001\r\u001de\u0001B\u0001\u0003\u00015\u0011a\u0003R3uKJl\u0017N\\5ti&\u001cGk\\6f]&TXM\u001d\u0006\u0003\u0007\u0011\tqa]3h[\u0016tGO\u0003\u0002\u0006\r\u0005\u0019a\u000e\u001c9\u000b\u0005\u001dA\u0011aA1qa*\u0011\u0011BC\u0001\tM\u0006\u001cGo\u001c:jK*\t1\"\u0001\u0002dG\u000e\u00011c\u0001\u0001\u000f)A\u0011qBE\u0007\u0002!)\t\u0011#A\u0003tG\u0006d\u0017-\u0003\u0002\u0014!\t1\u0011I\\=SK\u001a\u0004\"!\u0006\f\u000e\u0003\u0011I!a\u0006\u0003\u0003#\u0011{7-^7f]R\feN\\8uCR|'\u000f\u0003\u0005\u001a\u0001\t\u0005\t\u0015!\u0003\u001b\u00035\u0019\u0017m]3TK:\u001c\u0018\u000e^5wKB\u0011qbG\u0005\u00039A\u0011qAQ8pY\u0016\fg\u000e\u0003\u0005\u001f\u0001\t\u0005\t\u0015!\u0003\u001b\u00031!xn[3oSj,7kZ7m\u0011!\u0001\u0003A!A!\u0002\u0013Q\u0012a\u0004;pW\u0016t\u0017N_3OK^d\u0017N\\3\t\u0011\t\u0002!\u0011!Q\u0001\ni\ta\u0003^8lK:L'0Z!mY\u0012\u000b7\u000f[3e/>\u0014Hm\u001d\u0005\tI\u0001\u0011\t\u0011)A\u00055\u00059\u0012M\u00192sKZ\u0004&/Z2fK\u0012\u001cHj\\<fe\u000e\f7/\u001a\u0005\u0006M\u0001!\taJ\u0001\u0007y%t\u0017\u000e\u001e \u0015\r!R3\u0006L\u0017/!\tI\u0003!D\u0001\u0003\u0011\u001dIR\u0005%AA\u0002iAqAH\u0013\u0011\u0002\u0003\u0007!\u0004C\u0004!KA\u0005\t\u0019\u0001\u000e\t\u000f\t*\u0003\u0013!a\u00015!9A%\nI\u0001\u0002\u0004Q\u0002\"\u0002\u0019\u0001\t\u0003\t\u0014!\u0006;pW\u0016t\u0017I\u001c8pi\u0006$\u0018n\u001c8TiJLgn\u001a\u000b\u0003ei\u0002\"a\r\u001d\u000e\u0003QR!!\u000e\u001c\u0002\t1\fgn\u001a\u0006\u0002o\u0005!!.\u0019<b\u0013\tIDG\u0001\u0004TiJLgn\u001a\u0005\u0006w=\u0002\r\u0001P\u0001\u0006i>\\WM\u001c\t\u0003+uJ!A\u0010\u0003\u0003\u000bQ{7.\u001a8\t\u000f\u0001\u0003!\u0019!C\u0001\u0003\u0006A\u0001/\u0019;uKJt7/F\u0001C!\r\u0019\u0005JS\u0007\u0002\t*\u0011QIR\u0001\b[V$\u0018M\u00197f\u0015\t9\u0005#\u0001\u0006d_2dWm\u0019;j_:L!!\u0013#\u0003\u0017\u0005\u0013(/Y=Ck\u001a4WM\u001d\t\u0003\u0017:s!a\u0004'\n\u00055\u0003\u0012A\u0002)sK\u0012,g-\u0003\u0002:\u001f*\u0011Q\n\u0005\u0005\u0007#\u0002\u0001\u000b\u0011\u0002\"\u0002\u0013A\fG\u000f^3s]N\u0004\u0003bB*\u0001\u0005\u0004%\t\u0001V\u0001\u0005QRlG.F\u00013\u0011\u00191\u0006\u0001)A\u0005e\u0005)\u0001\u000e^7mA!9\u0001\f\u0001b\u0001\n\u0003!\u0016a\u00035u[2\u001cu.\\7f]RDaA\u0017\u0001!\u0002\u0013\u0011\u0014\u0001\u00045u[2\u001cu.\\7f]R\u0004\u0003b\u0002/\u0001\u0005\u0004%\t\u0001V\u0001\u0006g\u001elGN\r\u0005\u0007=\u0002\u0001\u000b\u0011\u0002\u001a\u0002\rM<W\u000e\u001c\u001a!\u0011\u001d\u0001\u0007A1A\u0005\u0002Q\u000bAa]4nY\"1!\r\u0001Q\u0001\nI\nQa]4nY\u0002Bq\u0001\u001a\u0001C\u0002\u0013\u0005A+\u0001\u0006ii6d7+_7c_2DaA\u001a\u0001!\u0002\u0013\u0011\u0014a\u00035u[2\u001c\u00160\u001c2pY\u0002Bq\u0001\u001b\u0001C\u0002\u0013\u0005A+A\u0002ve2DaA\u001b\u0001!\u0002\u0013\u0011\u0014\u0001B;sY\u0002Bq\u0001\u001c\u0001C\u0002\u0013\u0005A+\u0001\u0003ve2\u0014\u0004B\u00028\u0001A\u0003%!'A\u0003ve2\u0014\u0004\u0005C\u0004q\u0001\t\u0007I\u0011\u0001+\u0002\tU\u0014Hn\r\u0005\u0007e\u0002\u0001\u000b\u0011\u0002\u001a\u0002\u000bU\u0014Hn\r\u0011\t\u000fQ\u0004!\u0019!C\u0001)\u0006)Q-\\1jY\"1a\u000f\u0001Q\u0001\nI\na!Z7bS2\u0004\u0003b\u0002=\u0001\u0005\u0004%\t\u0001V\u0001\bkN\u0004\bn\u001c8f\u0011\u0019Q\b\u0001)A\u0005e\u0005AQo\u001d9i_:,\u0007\u0005C\u0004}\u0001\t\u0007I\u0011\u0001+\u0002\u000f\u0019\u0014\b\u000f[8oK\"1a\u0010\u0001Q\u0001\nI\n\u0001B\u001a:qQ>tW\r\t\u0005\t\u0003\u0003\u0001!\u0019!C\u0001)\u0006!A-\u0019;f\u0011\u001d\t)\u0001\u0001Q\u0001\nI\nQ\u0001Z1uK\u0002B\u0001\"!\u0003\u0001\u0005\u0004%\t\u0001V\u0001\u0007I\u0016\u001c\u0017\rZ3\t\u000f\u00055\u0001\u0001)A\u0005e\u00059A-Z2bI\u0016\u0004\u0003\u0002CA\t\u0001\t\u0007I\u0011\u0001+\u0002\u0011\r,(O]3oGfDq!!\u0006\u0001A\u0003%!'A\u0005dkJ\u0014XM\\2zA!A\u0011\u0011\u0004\u0001C\u0002\u0013\u0005A+A\u0004iCNDG/Y4\t\u000f\u0005u\u0001\u0001)A\u0005e\u0005A\u0001.Y:ii\u0006<\u0007\u0005\u0003\u0005\u0002\"\u0001\u0011\r\u0011\"\u0001U\u0003\u0019\tG/^:fe\"9\u0011Q\u0005\u0001!\u0002\u0013\u0011\u0014aB1ukN,'\u000f\t\u0005\t\u0003S\u0001!\u0019!C\u0001)\u0006AQ-\\8uS\u000e|g\u000eC\u0004\u0002.\u0001\u0001\u000b\u0011\u0002\u001a\u0002\u0013\u0015lw\u000e^5d_:\u0004\u0003\u0002CA\u0019\u0001\t\u0007I\u0011\u0001+\u0002\u0011\u0019LG.\u001a8b[\u0016Dq!!\u000e\u0001A\u0003%!'A\u0005gS2,g.Y7fA!A\u0011\u0011\b\u0001C\u0002\u0013\u0005A+A\nd_:\u001cxN\\1oi:{g.\u00112ce\u001648\u000fC\u0004\u0002>\u0001\u0001\u000b\u0011\u0002\u001a\u0002)\r|gn]8oC:$hj\u001c8BE\n\u0014XM^:!\u0011!\t\t\u0005\u0001b\u0001\n\u0003!\u0016!B7p]RD\u0007bBA#\u0001\u0001\u0006IAM\u0001\u0007[>tG\u000f\u001b\u0011\t\u0011\u0005%\u0003A1A\u0005\u0002Q\u000b1\u0001Z1z\u0011\u001d\ti\u0005\u0001Q\u0001\nI\nA\u0001Z1zA!A\u0011\u0011\u000b\u0001C\u0002\u0013\u0005A+A\u0003ti\u0006$X\rC\u0004\u0002V\u0001\u0001\u000b\u0011\u0002\u001a\u0002\rM$\u0018\r^3!\u0011!\tI\u0006\u0001b\u0001\n\u0003!\u0016AB:uCR,'\u0007C\u0004\u0002^\u0001\u0001\u000b\u0011\u0002\u001a\u0002\u000fM$\u0018\r^33A!A\u0011\u0011\r\u0001C\u0002\u0013\u0005A+A\u0005i_:|'/\u001b4jG\"9\u0011Q\r\u0001!\u0002\u0013\u0011\u0014A\u00035p]>\u0014\u0018NZ5dA!A\u0011\u0011\u000e\u0001C\u0002\u0013\u0005A+\u0001\u0004tk\u001a4\u0017\u000e\u001f\u0005\b\u0003[\u0002\u0001\u0015!\u00033\u0003\u001d\u0019XO\u001a4jq\u0002B\u0001\"!\u001d\u0001\u0005\u0004%\t\u0001V\u0001\u0006a2\f7-\u001a\u0005\b\u0003k\u0002\u0001\u0015!\u00033\u0003\u0019\u0001H.Y2fA!A\u0011\u0011\u0010\u0001C\u0002\u0013\u0005A+A\u0003v]&$8\u000fC\u0004\u0002~\u0001\u0001\u000b\u0011\u0002\u001a\u0002\rUt\u0017\u000e^:!\u0011!\t\t\t\u0001b\u0001\n\u0003!\u0016aA8sO\"9\u0011Q\u0011\u0001!\u0002\u0013\u0011\u0014\u0001B8sO\u0002B\u0001\"!#\u0001\u0005\u0004%\t\u0001V\u0001\u0006Y\u0006$\u0018N\u001c\u0005\b\u0003\u001b\u0003\u0001\u0015!\u00033\u0003\u0019a\u0017\r^5oA!A\u0011\u0011\u0013\u0001C\u0002\u0013\u0005A+\u0001\u0004bE\n\u0014XM\u001e\u0005\b\u0003+\u0003\u0001\u0015!\u00033\u0003\u001d\t'M\u0019:fm\u0002B\u0011\"!'\u0001\u0005\u0004%\t!a'\u0002\u000f\u0005\u0014'M]3wgV\t!\nC\u0004\u0002 \u0002\u0001\u000b\u0011\u0002&\u0002\u0011\u0005\u0014'M]3wg\u0002B\u0001\"a)\u0001\u0005\u0004%\t\u0001V\u0001\t]>\f%M\u0019:fm\"9\u0011q\u0015\u0001!\u0002\u0013\u0011\u0014!\u00038p\u0003\n\u0014'/\u001a<!\u0011!\tY\u000b\u0001b\u0001\n\u0003!\u0016A\u00027bi&t'\u0007C\u0004\u00020\u0002\u0001\u000b\u0011\u0002\u001a\u0002\u000f1\fG/\u001b83A!A\u00111\u0017\u0001C\u0002\u0013\u0005A+\u0001\nii6d\u0017iY2f]R,G\rT3ui\u0016\u0014\bbBA\\\u0001\u0001\u0006IAM\u0001\u0014QRlG.Q2dK:$X\r\u001a'fiR,'\u000f\t\u0005\n\u0003w\u0003!\u0019!C\u0001\u00037\u000ba\u0001\\3ui\u0016\u0014\bbBA`\u0001\u0001\u0006IAS\u0001\bY\u0016$H/\u001a:!\u0011!\t\u0019\r\u0001b\u0001\n\u0003!\u0016AA1q\u0011\u001d\t9\r\u0001Q\u0001\nI\n1!\u00199!\u0011%\tY\r\u0001b\u0001\n\u0003\tY*A\u0002baJBq!a4\u0001A\u0003%!*\u0001\u0003baJ\u0002\u0003\u0002CAj\u0001\t\u0007I\u0011\u0001+\u0002\u0019\r|g\u000e\u001e:bGRLwN\u001c\u001a\t\u000f\u0005]\u0007\u0001)A\u0005e\u0005i1m\u001c8ue\u0006\u001cG/[8oe\u0001B\u0011\"a7\u0001\u0005\u0004%\t!a'\u0002\u0017\r|g\u000e\u001e:bGRLwN\u001c\u0005\b\u0003?\u0004\u0001\u0015!\u0003K\u00031\u0019wN\u001c;sC\u000e$\u0018n\u001c8!\u0011%\t\u0019\u000f\u0001b\u0001\n\u0003\tY*\u0001\u0004ba^|'\u000f\u001a\u0005\b\u0003O\u0004\u0001\u0015!\u0003K\u0003\u001d\t\u0007o^8sI\u0002B\u0001\"a;\u0001\u0005\u0004%\t\u0001V\u0001\nS:LG/[1mgJBq!a<\u0001A\u0003%!'\u0001\u0006j]&$\u0018.\u00197te\u0001B\u0001\"a=\u0001\u0005\u0004%\t\u0001V\u0001\tS:LG/[1mg\"9\u0011q\u001f\u0001!\u0002\u0013\u0011\u0014!C5oSRL\u0017\r\\:!\u0011!\tY\u0010\u0001b\u0001\n\u0003!\u0016\u0001C8sI&t\u0017\r\\:\t\u000f\u0005}\b\u0001)A\u0005e\u0005IqN\u001d3j]\u0006d7\u000f\t\u0005\t\u0005\u0007\u0001!\u0019!C\u0001)\u0006)\u0011/^8uK\"9!q\u0001\u0001!\u0002\u0013\u0011\u0014AB9v_R,\u0007\u0005\u0003\u0005\u0003\f\u0001\u0011\r\u0011\"\u0001U\u00039!\u0017m\u001d5fIB\u0013XMZ5yKNDqAa\u0004\u0001A\u0003%!'A\beCNDW\r\u001a)sK\u001aL\u00070Z:!\u0011!\u0011\u0019\u0002\u0001b\u0001\n\u0003!\u0016A\u00043bg\",GmU;gM&DXm\u001d\u0005\b\u0005/\u0001\u0001\u0015!\u00033\u0003=!\u0017m\u001d5fIN+hMZ5yKN\u0004\u0003\u0002\u0003B\u000e\u0001\t\u0007I\u0011\u0001+\u0002!\u0011\f7\u000f[3e!J,g-\u001b=X_J$\u0007b\u0002B\u0010\u0001\u0001\u0006IAM\u0001\u0012I\u0006\u001c\b.\u001a3Qe\u00164\u0017\u000e_,pe\u0012\u0004\u0003\u0002\u0003B\u0012\u0001\t\u0007I\u0011\u0001+\u0002!\u0011\f7\u000f[3e'V4g-\u001b=X_J$\u0007b\u0002B\u0014\u0001\u0001\u0006IAM\u0001\u0012I\u0006\u001c\b.\u001a3Tk\u001a4\u0017\u000e_,pe\u0012\u0004\u0003\u0002\u0003B\u0016\u0001\t\u0007I\u0011\u0001+\u0002\u0011\u0019\u0014\u0018m\u0019;j_:DqAa\f\u0001A\u0003%!'A\u0005ge\u0006\u001cG/[8oA!I!1\u0007\u0001C\u0002\u0013\u0005\u00111T\u0001\u000fG>tGO]1di\u0016$wk\u001c:e\u0011\u001d\u00119\u0004\u0001Q\u0001\n)\u000bqbY8oiJ\f7\r^3e/>\u0014H\r\t\u0005\n\u0005w\u0001!\u0019!C\u0001\u00037\u000bAaY1qg\"9!q\b\u0001!\u0002\u0013Q\u0015!B2baN\u0004\u0003\"\u0003B\"\u0001\t\u0007I\u0011AAN\u0003\u00119xN\u001d3\t\u000f\t\u001d\u0003\u0001)A\u0005\u0015\u0006)qo\u001c:eA!I!1\n\u0001C\u0002\u0013\u0005\u00111T\u0001\u0007]Vl'-\u001a:\t\u000f\t=\u0003\u0001)A\u0005\u0015\u00069a.^7cKJ\u0004\u0003\u0002\u0003B*\u0001\t\u0007I\u0011\u0001+\u0002\u000f9,XNY3se!9!q\u000b\u0001!\u0002\u0013\u0011\u0014\u0001\u00038v[\n,'O\r\u0011\t\u0011\tm\u0003A1A\u0005\u0002Q\u000b\u0001\"\u001a7mSB\u001c\u0018n\u001d\u0005\b\u0005?\u0002\u0001\u0015!\u00033\u0003%)G\u000e\\5qg&\u001c\b\u0005\u0003\u0005\u0003d\u0001\u0011\r\u0011\"\u0001U\u00031\u0011X\r]3bi\u0016$\u0007+\u001e8d\u0011\u001d\u00119\u0007\u0001Q\u0001\nI\nQB]3qK\u0006$X\r\u001a)v]\u000e\u0004\u0003\u0002\u0003B6\u0001\t\u0007I\u0011\u0001+\u0002\u000b5$\u0017m\u001d5\t\u000f\t=\u0004\u0001)A\u0005e\u00051Q\u000eZ1tQ\u0002B\u0001Ba\u001d\u0001\u0005\u0004%\t\u0001V\u0001\u0005I\u0006\u001c\b\u000eC\u0004\u0003x\u0001\u0001\u000b\u0011\u0002\u001a\u0002\u000b\u0011\f7\u000f\u001b\u0011\t\u0011\tm\u0004A1A\u0005\u0002Q\u000bA\u0001];oG\"9!q\u0010\u0001!\u0002\u0013\u0011\u0014!\u00029v]\u000e\u0004\u0003\u0002\u0003BB\u0001\t\u0007I\u0011\u0001+\u0002\rMLXNY8m\u0011\u001d\u00119\t\u0001Q\u0001\nI\nqa]=nE>d\u0007\u0005\u0003\u0005\u0003\f\u0002\u0011\r\u0011\"\u0001U\u0003!AG/\u001c7DQ\u0006\u0014\bb\u0002BH\u0001\u0001\u0006IAM\u0001\nQRlGn\u00115be\u0002B\u0001Ba%\u0001\u0005\u0004%\t\u0001V\u0001\tG\u0006$8\r[!mY\"9!q\u0013\u0001!\u0002\u0013\u0011\u0014!C2bi\u000eD\u0017\t\u001c7!\u0011!\u0011Y\n\u0001b\u0001\n\u0003!\u0016a\u00028fo2Lg.\u001a\u0005\b\u0005?\u0003\u0001\u0015!\u00033\u0003!qWm\u001e7j]\u0016\u0004\u0003\u0002\u0003BR\u0001\t\u0007I\u0011\u0001+\u0002\u000bM\u0004\u0018mY3\t\u000f\t\u001d\u0006\u0001)A\u0005e\u000511\u000f]1dK\u0002B\u0011Ba+\u0001\u0005\u0004%\t!a'\u0002!Q|7.\u001a8SK\u001e,\u0007p\u0015;sS:<\u0007b\u0002BX\u0001\u0001\u0006IAS\u0001\u0012i>\\WM\u001c*fO\u0016D8\u000b\u001e:j]\u001e\u0004\u0003\"\u0003BZ\u0001\t\u0007I\u0011\u0001B[\u0003)!xn[3o%\u0016<W\r_\u000b\u0003\u0005o\u0003BA!/\u0003D6\u0011!1\u0018\u0006\u0005\u0005{\u0013y,\u0001\u0005nCR\u001c\u0007.\u001b8h\u0015\r\u0011\t\rE\u0001\u0005kRLG.\u0003\u0003\u0003F\nm&!\u0002*fO\u0016D\b\u0002\u0003Be\u0001\u0001\u0006IAa.\u0002\u0017Q|7.\u001a8SK\u001e,\u0007\u0010\t\u0005\b\u0005\u001b\u0004A\u0011\u0001Bh\u0003\u001d\u0001(o\\2fgN$BA!5\u0003XB\u0019QCa5\n\u0007\tUGA\u0001\u0005E_\u000e,X.\u001a8u\u0011!\u0011INa3A\u0002\tE\u0017\u0001\u00033pGVlWM\u001c;\t\u000f\tu\u0007\u0001\"\u0001\u0003`\u0006Y\u0001O]3sKF\fE\u000f\u001e:t+\t\u0011\t\u000f\u0005\u0004\u0003d\nM(\u0011 \b\u0005\u0005K\u0014yO\u0004\u0003\u0003h\n5XB\u0001Bu\u0015\r\u0011Y\u000fD\u0001\u0007yI|w\u000e\u001e \n\u0003EI1A!=\u0011\u0003\u001d\u0001\u0018mY6bO\u0016LAA!>\u0003x\nA\u0011\n^3sC\ndWMC\u0002\u0003rB\u0001DAa?\u0004\u0006A)1J!@\u0004\u0002%\u0019!q`(\u0003\u000b\rc\u0017m]:\u0011\t\r\r1Q\u0001\u0007\u0001\t1\u00199Aa7\u0002\u0002\u0003\u0005)\u0011AB\u0005\u0005\ryF%M\t\u0005\u0007\u0017\u0019\t\u0002E\u0002\u0010\u0007\u001bI1aa\u0004\u0011\u0005\u001dqu\u000e\u001e5j]\u001e\u00042aDB\n\u0013\r\u0019)\u0002\u0005\u0002\u0004\u0003:L\bbBB\r\u0001\u0011\u000511D\u0001\na>\u001cH/\u0011;ueN,\"a!\b\u0011\r\t\r(1_B\u0010a\u0011\u0019\tc!\n\u0011\u000b-\u0013ipa\t\u0011\t\r\r1Q\u0005\u0003\r\u0007O\u00199\"!A\u0001\u0002\u000b\u00051\u0011\u0002\u0002\u0004?\u0012\u0012\u0004bBB\u0016\u0001\u0011\u00051QF\u0001\u0006CB\u0004H.\u001f\u000b\u0005\u0007_\u0019)\u0004E\u0003\u0003d\u000eE\"*\u0003\u0003\u00044\t](aA*fc\"91qGB\u0015\u0001\u0004Q\u0015!A:\b\u000f\rm\"\u0001#\u0001\u0004>\u00051B)\u001a;fe6Lg.[:uS\u000e$vn[3oSj,'\u000fE\u0002*\u0007\u007f1a!\u0001\u0002\t\u0002\r\u00053cAB Q!9aea\u0010\u0005\u0002\r\u0015CCAB\u001f\u0011!\u0019Iea\u0010\u0005\u0002\r-\u0013\u0001B7bS:$Ba!\u0014\u0004TA\u0019qba\u0014\n\u0007\rE\u0003C\u0001\u0003V]&$\b\u0002CB+\u0007\u000f\u0002\raa\u0016\u0002\t\u0005\u0014xm\u001d\t\u0005\u001f\re#*C\u0002\u0004\\A\u0011Q!\u0011:sCfD!ba\u0018\u0004@E\u0005I\u0011AB1\u0003m!C.Z:tS:LG\u000fJ4sK\u0006$XM\u001d\u0013eK\u001a\fW\u000f\u001c;%cU\u001111\r\u0016\u00045\r\u00154FAB4!\u0011\u0019Iga\u001d\u000e\u0005\r-$\u0002BB7\u0007_\n\u0011\"\u001e8dQ\u0016\u001c7.\u001a3\u000b\u0007\rE\u0004#\u0001\u0006b]:|G/\u0019;j_:LAa!\u001e\u0004l\t\tRO\\2iK\u000e\\W\r\u001a,be&\fgnY3\t\u0015\re4qHI\u0001\n\u0003\u0019\t'A\u000e%Y\u0016\u001c8/\u001b8ji\u0012:'/Z1uKJ$C-\u001a4bk2$HE\r\u0005\u000b\u0007{\u001ay$%A\u0005\u0002\r\u0005\u0014a\u0007\u0013mKN\u001c\u0018N\\5uI\u001d\u0014X-\u0019;fe\u0012\"WMZ1vYR$3\u0007\u0003\u0006\u0004\u0002\u000e}\u0012\u0013!C\u0001\u0007C\n1\u0004\n7fgNLg.\u001b;%OJ,\u0017\r^3sI\u0011,g-Y;mi\u0012\"\u0004BCBC\u0007\u007f\t\n\u0011\"\u0001\u0004b\u0005YB\u0005\\3tg&t\u0017\u000e\u001e\u0013he\u0016\fG/\u001a:%I\u00164\u0017-\u001e7uIU\u0002")
/* loaded from: input_file:cc/factorie/app/nlp/segment/DeterministicTokenizer.class */
public class DeterministicTokenizer implements DocumentAnnotator {
    public final boolean cc$factorie$app$nlp$segment$DeterministicTokenizer$$tokenizeSgml;
    public final boolean cc$factorie$app$nlp$segment$DeterministicTokenizer$$tokenizeNewline;
    public final boolean cc$factorie$app$nlp$segment$DeterministicTokenizer$$abbrevPreceedsLowercase;
    private final ArrayBuffer<String> patterns;
    private final String html;
    private final String htmlComment;
    private final String sgml2;
    private final String sgml;
    private final String htmlSymbol;
    private final String url;
    private final String url2;
    private final String url3;
    private final String email;
    private final String usphone;
    private final String frphone;
    private final String date;
    private final String decade;
    private final String currency;
    private final String hashtag;
    private final String atuser;
    private final String emoticon;
    private final String filename;
    private final String consonantNonAbbrevs;
    private final String month;
    private final String day;
    private final String state;
    private final String state2;
    private final String honorific;
    private final String suffix;
    private final String place;
    private final String units;
    private final String org;
    private final String latin;
    private final String abbrev;
    private final String abbrevs;
    private final String noAbbrev;
    private final String latin2;
    private final String htmlAccentedLetter;
    private final String letter;
    private final String ap;
    private final String ap2;
    private final String contraction2;
    private final String contraction;
    private final String apword;
    private final String initials2;
    private final String initials;
    private final String ordinals;
    private final String quote;
    private final String dashedPrefixes;
    private final String dashedSuffixes;
    private final String dashedPrefixWord;
    private final String dashedSuffixWord;
    private final String fraction;
    private final String contractedWord;
    private final String caps;
    private final String word;
    private final String number;
    private final String number2;
    private final String ellipsis;
    private final String repeatedPunc;
    private final String mdash;
    private final String dash;
    private final String punc;
    private final String symbol;
    private final String htmlChar;
    private final String catchAll;
    private final String newline;
    private final String space;
    private final String tokenRegexString;
    private final Regex tokenRegex;

    public static void main(String[] strArr) {
        DeterministicTokenizer$.MODULE$.main(strArr);
    }

    @Override // cc.factorie.app.nlp.DocumentAnnotator
    public Iterable<Document> processSequential(Iterable<Document> iterable) {
        return DocumentAnnotator.Cclass.processSequential(this, iterable);
    }

    @Override // cc.factorie.app.nlp.DocumentAnnotator
    public Iterable<Document> processParallel(Iterable<Document> iterable, int i) {
        return DocumentAnnotator.Cclass.processParallel(this, iterable, i);
    }

    @Override // cc.factorie.app.nlp.DocumentAnnotator
    public String documentAnnotationString(Document document) {
        return DocumentAnnotator.Cclass.documentAnnotationString(this, document);
    }

    @Override // cc.factorie.app.nlp.DocumentAnnotator
    public String phraseAnnotationString(Phrase phrase) {
        return DocumentAnnotator.Cclass.phraseAnnotationString(this, phrase);
    }

    @Override // cc.factorie.app.nlp.DocumentAnnotator
    public String mentionAnnotationString(Mention mention) {
        return DocumentAnnotator.Cclass.mentionAnnotationString(this, mention);
    }

    @Override // cc.factorie.app.nlp.DocumentAnnotator
    public int processParallel$default$2() {
        return DocumentAnnotator.Cclass.processParallel$default$2(this);
    }

    @Override // cc.factorie.app.nlp.DocumentAnnotator
    /* renamed from: tokenAnnotationString */
    public String mo339tokenAnnotationString(Token token) {
        return new StringBuilder().append(BoxesRunTime.boxToInteger(token.stringStart()).toString()).append(BoxesRunTime.boxToCharacter('\t')).append(BoxesRunTime.boxToInteger(token.stringEnd()).toString()).toString();
    }

    public ArrayBuffer<String> patterns() {
        return this.patterns;
    }

    public String html() {
        return this.html;
    }

    public String htmlComment() {
        return this.htmlComment;
    }

    public String sgml2() {
        return this.sgml2;
    }

    public String sgml() {
        return this.sgml;
    }

    public String htmlSymbol() {
        return this.htmlSymbol;
    }

    public String url() {
        return this.url;
    }

    public String url2() {
        return this.url2;
    }

    public String url3() {
        return this.url3;
    }

    public String email() {
        return this.email;
    }

    public String usphone() {
        return this.usphone;
    }

    public String frphone() {
        return this.frphone;
    }

    public String date() {
        return this.date;
    }

    public String decade() {
        return this.decade;
    }

    public String currency() {
        return this.currency;
    }

    public String hashtag() {
        return this.hashtag;
    }

    public String atuser() {
        return this.atuser;
    }

    public String emoticon() {
        return this.emoticon;
    }

    public String filename() {
        return this.filename;
    }

    public String consonantNonAbbrevs() {
        return this.consonantNonAbbrevs;
    }

    public String month() {
        return this.month;
    }

    public String day() {
        return this.day;
    }

    public String state() {
        return this.state;
    }

    public String state2() {
        return this.state2;
    }

    public String honorific() {
        return this.honorific;
    }

    public String suffix() {
        return this.suffix;
    }

    public String place() {
        return this.place;
    }

    public String units() {
        return this.units;
    }

    public String org() {
        return this.org;
    }

    public String latin() {
        return this.latin;
    }

    public String abbrev() {
        return this.abbrev;
    }

    public String abbrevs() {
        return this.abbrevs;
    }

    public String noAbbrev() {
        return this.noAbbrev;
    }

    public String latin2() {
        return this.latin2;
    }

    public String htmlAccentedLetter() {
        return this.htmlAccentedLetter;
    }

    public String letter() {
        return this.letter;
    }

    public String ap() {
        return this.ap;
    }

    public String ap2() {
        return this.ap2;
    }

    public String contraction2() {
        return this.contraction2;
    }

    public String contraction() {
        return this.contraction;
    }

    public String apword() {
        return this.apword;
    }

    public String initials2() {
        return this.initials2;
    }

    public String initials() {
        return this.initials;
    }

    public String ordinals() {
        return this.ordinals;
    }

    public String quote() {
        return this.quote;
    }

    public String dashedPrefixes() {
        return this.dashedPrefixes;
    }

    public String dashedSuffixes() {
        return this.dashedSuffixes;
    }

    public String dashedPrefixWord() {
        return this.dashedPrefixWord;
    }

    public String dashedSuffixWord() {
        return this.dashedSuffixWord;
    }

    public String fraction() {
        return this.fraction;
    }

    public String contractedWord() {
        return this.contractedWord;
    }

    public String caps() {
        return this.caps;
    }

    public String word() {
        return this.word;
    }

    public String number() {
        return this.number;
    }

    public String number2() {
        return this.number2;
    }

    public String ellipsis() {
        return this.ellipsis;
    }

    public String repeatedPunc() {
        return this.repeatedPunc;
    }

    public String mdash() {
        return this.mdash;
    }

    public String dash() {
        return this.dash;
    }

    public String punc() {
        return this.punc;
    }

    public String symbol() {
        return this.symbol;
    }

    public String htmlChar() {
        return this.htmlChar;
    }

    public String catchAll() {
        return this.catchAll;
    }

    public String newline() {
        return this.newline;
    }

    public String space() {
        return this.space;
    }

    public String tokenRegexString() {
        return this.tokenRegexString;
    }

    public Regex tokenRegex() {
        return this.tokenRegex;
    }

    @Override // cc.factorie.app.nlp.DocumentAnnotator
    public Document process(Document document) {
        document.sections().foreach(new DeterministicTokenizer$$anonfun$process$1(this, document));
        return document;
    }

    @Override // cc.factorie.app.nlp.DocumentAnnotator
    /* renamed from: prereqAttrs */
    public Iterable<Class<?>> mo305prereqAttrs() {
        return Nil$.MODULE$;
    }

    @Override // cc.factorie.app.nlp.DocumentAnnotator
    /* renamed from: postAttrs */
    public Iterable<Class<?>> mo304postAttrs() {
        return List$.MODULE$.apply(Predef$.MODULE$.wrapRefArray(new Class[]{Token.class}));
    }

    public Seq<String> apply(String str) {
        return (Seq) process(new Document(str)).tokens().toSeq().map(new DeterministicTokenizer$$anonfun$apply$4(this), Seq$.MODULE$.canBuildFrom());
    }

    public DeterministicTokenizer(boolean z, boolean z2, boolean z3, boolean z4, boolean z5) {
        this.cc$factorie$app$nlp$segment$DeterministicTokenizer$$tokenizeSgml = z2;
        this.cc$factorie$app$nlp$segment$DeterministicTokenizer$$tokenizeNewline = z3;
        this.cc$factorie$app$nlp$segment$DeterministicTokenizer$$abbrevPreceedsLowercase = z5;
        DocumentAnnotator.Cclass.$init$(this);
        this.patterns = new ArrayBuffer<>();
        this.html = "(?:<script[^>]*>(?:[^��](?!</script>))*[^��]?</script>)|(?:<style[^>]*>(?:[^��](?!</style>))*[^��]?</style>)";
        if (z2) {
            BoxedUnit boxedUnit = BoxedUnit.UNIT;
        } else {
            patterns().$plus$eq(html());
        }
        this.htmlComment = "(?:<|&lt;)!--(?:[^��](?!-->))*[^��]?--(?:>|&gt;)";
        patterns().$plus$eq(htmlComment());
        this.sgml2 = "<%(?:[^��](?!%>))*[^��]?%>";
        patterns().$plus$eq(sgml2());
        this.sgml = "</?[A-Za-z!].*?(?<!%)>";
        patterns().$plus$eq(sgml());
        this.htmlSymbol = "&(?:HT|TL|UR|LR|QC|QL|QR|amp|copy|reg|trade|odq|nbsp|cdq|lt|gt|#[0-9A-Za-z]+);";
        patterns().$plus$eq(htmlSymbol());
        this.url = "https?://[^ \t\n\f\r\"<>|()]+[^ \t\n\f\r\"<>|.!?(){},-]";
        patterns().$plus$eq(url());
        this.url2 = "(?:(?:www\\.(?:[^ \t\n\f\r\"<>|.!?(){},]+\\.)+[a-zA-Z]{2,4})|(?:(?:[^ \t\n\f\r\"`'<>|.!?(){},-_$]+\\.)+(?:com|org|net|edu|gov|cc|info|uk|de|fr|ca)))(?:/[^ \t\n\f\r\"<>|()]+[^ \t\n\f\r\"<>|.!?(){},-])?";
        patterns().$plus$eq(url2());
        this.url3 = "[A-Z]*[a-z0-9]+\\.(?:com|org|net|edu|gov|co\\.uk|ac\\.uk|de|fr|ca)";
        patterns().$plus$eq(url3());
        this.email = "(?:mailto:)?\\w+[-\\+\\.'\\w]*@(?:\\w+[-\\.\\+\\w]*\\.)*\\w+";
        patterns().$plus$eq(email());
        this.usphone = "(?:\\+?1[-\\.  ]?)?(?:\\(?:[0-9]{3}\\)[  ]?|[0-9]{3}[-  \\.])[0-9]{3}[\\-  \\.][0-9]{4}";
        patterns().$plus$eq(usphone());
        this.frphone = "(?:\\+33)?(?:\\s[012345][-\\. ])?[0-9](?:[-\\. ][0-9]{2}){3}";
        patterns().$plus$eq(frphone());
        this.date = "(?:(?:(?:(?:19|20)?[0-9]{2}[\\-/][0-3]?[0-9][\\-/][0-3]?[0-9])|(?:[0-3]?[0-9][\\-/][0-3]?[0-9][\\-/](?:19|20)?[0-9]{2}))(?![0-9]))";
        patterns().$plus$eq(date());
        this.decade = "(?:19|20)?[0-9]0s";
        patterns().$plus$eq(decade());
        this.currency = "(?:US|AU|NZ|C|CA|FJ|JY|HK|JM|KY|LR|NA|SB|SG|NT|BB|XC|BM|BN|BS|BZ|ZB|B)?\\$|&(?:euro|cent|pound);|\\p{Sc}|(?:USD|EUR|JPY|GBP|CHF|CAD|KPW|RMB|CNY|AD|GMT)(?![A-Z])";
        patterns().$plus$eq(currency());
        this.hashtag = "#[A-Za-z][A-Za-z0-9]+";
        patterns().$plus$eq(hashtag());
        this.atuser = "@[A-Za-z][A-Za-z0-9]+";
        patterns().$plus$eq(atuser());
        this.emoticon = "[#<%\\*]?[:;!#\\$%@=\\|][-\\+\\*=o^<]{0,4}[\\(\\)oODPQX\\*3{}\\[\\]]{1,5}[#><\\)\\(]?(?!\\S)|'\\.'";
        patterns().$plus$eq(emoticon());
        this.filename = "\\S+\\.(?:3gp|7z|ace|ai(?:f){0,2}|amr|asf|asp(?:x)?|asx|avi|bat|bin|bmp|bup|cab|cbr|cd(?:a|l|r)|chm|dat|divx|dll|dmg|doc|dss|dvf|dwg|eml|eps|exe|fl(?:a|v)|gif|gz|hqx|(?:s)?htm(?:l)?|ifo|indd|iso|jar|jsp|jp(?:e)?g|key|lnk|log|m4(?:a|b|p|v)|mcd|mdb|mid|mov|mp(?:2|3|4)|mp(?:e)?g|ms(?:i|wmm)|numbers|ogg|pages|pdf|php|png|pps|ppt|ps(?:d|t)?|Penn|pub|qb(?:b|w)|qxd|ra(?:m|r)|rm(?:vb)?|rtf|se(?:a|s)|sit(?:x)?|sql|ss|swf|tgz|tif|torrent|ttf|txt|vcd|vob|wav|wm(?:a|v)|wp(?:d|s)|xls|xml|xtm|zip)";
        patterns().$plus$eq(filename());
        this.consonantNonAbbrevs = "(?:Ng|cwm|nth|pm)(?=\\.)";
        patterns().$plus$eq(consonantNonAbbrevs());
        this.month = "Jan|Feb|Mar|Apr|Jun|Jul|Aug|Sep|Sept|Oct|Nov|Dec";
        this.day = "Mon|Tue|Tues|Wed|Thu|Thurs|Fri";
        this.state = "Ala|Alab|Ariz|Ark|Calif|Colo|Conn|Del|Fla|Ill|Ind|Kans|Kan|Ken|Kent|Mass|Mich|Minn|Miss|Mont|Nebr|Neb|Nev|Dak|Okla|Oreg|Tenn|Tex|Virg|Wash|Wis|Wyo";
        this.state2 = "Ak|As|Az|Ar|Ca|Co|Ct|De|Fm|Fl|Ga|Gu|Hi|Id|Il|Ia|Ks|Ky|La|Mh|Md|Ma|Mi|Mn|Ms|Mo|Mt|Ne|Nv|Mp|Pw|Pa|Pr|Tn|Tx|Ut|Vt|Vi|Va|Wa|Wi|Wy";
        this.honorific = "Adm|Attys?|Brig|Capts?|Cols?|Comms?|Co?mdrs?|Cpls?|Cpts?|Det|Drs?|Hon|Gens?|Govs?|Lieuts?|Lts?|Majs?|Miss|Messrs|Mr|Mrs|Ms|Pfc|Pres|Profs?|Pvts?|Reps?|Revs?|Sens?|Sgts?|Spc|Supts?";
        this.suffix = "Bros?|Esq|Jr|Ph\\.?[Dd]|Sr";
        this.place = "Aly|Anx|Ave?|Avn|Blvd|Boul|Byp|Cir|Cor|Rd|Squ|Sta|Ste?|Str|Ln|Mt";
        this.units = "in|fur|mi|lea|drc|oz|qtr|cwt";
        this.org = "Alt|Assns?|Bancorp|Bhd|Cos?|Comm|Comp|Corps?|Depts?|Elec|Inc|Inst|Intl|Lib|Ltd|M[ft]g|Mus|Natl|Plc|Pty|Sci|Ser|Sys|Univ";
        this.latin = "e.g|i.e";
        this.abbrev = "etc|vol|rev|dea|div|ests?|exp|exts?|gal|[BCDFGHJKLMNPQRSTVWX][bcdfghjklmnpqrstvwx]+";
        this.abbrevs = ((TraversableOnce) Seq$.MODULE$.apply(Predef$.MODULE$.wrapRefArray(new String[]{month(), day(), state(), state2(), honorific(), suffix(), place(), units(), org(), latin(), abbrev()})).flatMap(new DeterministicTokenizer$$anonfun$1(this), Seq$.MODULE$.canBuildFrom())).mkString("|");
        patterns().$plus$eq(abbrevs());
        this.noAbbrev = "[Nn]o\\.(?=\\p{Z}*\\p{Nd})";
        patterns().$plus$eq(noAbbrev());
        this.latin2 = "(?:i.e|e.g)(?!\\p{L})";
        patterns().$plus$eq(latin2());
        this.htmlAccentedLetter = "(?:&[aeiouyntlAEIOUYNTL](?:acute|grave|uml|circ|orn|tilde|ring);)";
        patterns().$plus$eq(htmlAccentedLetter());
        this.letter = new StringContext(Predef$.MODULE$.wrapRefArray(new String[]{"(?:[\\\\p{L}\\\\p{M}]|", ")"})).s(Predef$.MODULE$.genericWrapArray(new Object[]{htmlAccentedLetter()}));
        this.ap = "(?:['\u0092’]|&(?:apos|rsquo|#00?39|#00?92|#2019);)";
        this.ap2 = new StringContext(Predef$.MODULE$.wrapRefArray(new String[]{"(?:", "|&lsquo;|[`\u0091‘‛])"})).s(Predef$.MODULE$.genericWrapArray(new Object[]{ap()}));
        this.contraction2 = "what(?=cha)|wan(?=na)";
        patterns().$plus$eq(contraction2());
        this.contraction = new StringContext(Predef$.MODULE$.wrapRefArray(new String[]{"(?:[nN]", "[tT]|(?<=\\\\p{L})", "(?:d|D|s|S|m|M|re|RE|ve|VE|ll|LL)(?!\\\\p{L}))"})).s(Predef$.MODULE$.genericWrapArray(new Object[]{ap(), ap()}));
        patterns().$plus$eq(contraction());
        this.apword = new StringContext(Predef$.MODULE$.wrapRefArray(new String[]{"", "nt|", "n(?:", ")?|", "em|[OoDdLl]", "", "+|[Oo]", "clock|ma", "am|add", "l|[Cc]", "mon|", "cause|", "till?|ol", "|Dunkin", "|", "[1-9]0s|N", "|\\\\p{L}\\\\p{Ll}*[aeiou]", "[aeiou]\\\\p{Ll}*"})).s(Predef$.MODULE$.genericWrapArray(new Object[]{ap(), ap(), ap(), ap2(), ap(), letter(), ap2(), ap2(), ap2(), ap2(), ap2(), ap(), ap(), ap(), ap(), ap(), ap()}));
        patterns().$plus$eq(apword());
        this.initials2 = "\\p{L}(?:\\.\\p{L})+(?!\\p{P})";
        patterns().$plus$eq(initials2());
        this.initials = "(?:\\p{L}\\.)+(?![\\.!\\?]{2}|\\.\\p{L})";
        patterns().$plus$eq(initials());
        this.ordinals = "[0-9]{1,4}(?:st|nd|rd|th)";
        patterns().$plus$eq(ordinals());
        this.quote = new StringBuilder().append("''|``|[‘’‚‛“”\u0091\u0092\u0093\u0094‚„‟‹›«»]{1,2}|[\"“”\\p{Pf}]|&(?:quot|[rl][ad]quo);|").append(ap2()).append("{2}").toString();
        patterns().$plus$eq(quote());
        if (z4) {
            patterns().$plus$eq(new StringContext(Predef$.MODULE$.wrapRefArray(new String[]{"(?:", ")(?:[\\\\p{L}\\\\p{M}\\\\p{Nd}_]*(?:-[\\\\p{L}\\\\p{M}\\\\p{Nd}_]*)*)"})).s(Predef$.MODULE$.genericWrapArray(new Object[]{letter()})));
        } else {
            BoxedUnit boxedUnit2 = BoxedUnit.UNIT;
        }
        this.dashedPrefixes = "(?i:a|anti|arch|be|co|counter|cross|de|dis|e|en|em|ex|fore|hi|hind|mal|mid|midi|mini|mis|o|out|over|part|post|pre|pro|re|self|step|t|trans|twi|un|under|up|with|Afro|ambi|amphi|an|ana|Anglo|ante|apo|astro|auto|bi|bio|circum|cis|con|com|col|cor|contra|cryo|crypto|de|demi|demo|deutero|deuter|di|dia|dis|dif|du|duo|eco|electro|e|en|epi|Euro|ex|extra|fin|Franco|geo|gyro|hetero|hemi|homo|hydro|hyper|hypo|ideo|idio|in|Indo|in|infra|inter|intra|iso|macro|maxi|mega|meta|micro|mono|multi|neo|non|omni|ortho|paleo|pan|para|ped|per|peri|photo|pod|poly|post|pre|preter|pro|pros|proto|pseudo|pyro|quasi|retro|semi|socio|sub|sup|super|supra|sur|syn|tele|trans|tri|uh|ultra|uni|vice|x)";
        this.dashedSuffixes = "(?i:able|ahol|aholic|ation|centric|cracy|crat|dom|e-\\p{L}+|er|ery|esque|ette|fest|fi|fold|ful|gate|gon|hood|ian|ible|ing|isation|ise|ising|ism|ist|itis|ization|ize|izing|less|logist|logy|ly|most|o-torium|rama|ise)";
        this.dashedPrefixWord = new StringBuilder().append(dashedPrefixes()).append("-[\\p{L}\\p{M}][\\p{L}\\p{M}\\p{Nd}]*").toString();
        patterns().$plus$eq(dashedPrefixWord());
        this.dashedSuffixWord = new StringBuilder().append("[\\p{L}\\p{M}\\p{N}]+-").append(dashedSuffixes()).append(new StringContext(Predef$.MODULE$.wrapRefArray(new String[]{"(?!", ")"})).s(Predef$.MODULE$.genericWrapArray(new Object[]{letter()}))).toString();
        patterns().$plus$eq(dashedSuffixWord());
        this.fraction = "[¼½¾⅓⅔]|&(?:frac14|frac12|frac34);|(?:\\p{N}{1,4}[-  ])?\\p{N}{1,4}(?:\\\\?/|⁄)\\p{N}{1,4}";
        patterns().$plus$eq(fraction());
        this.contractedWord = new StringContext(Predef$.MODULE$.wrapRefArray(new String[]{"[\\\\p{L}\\\\p{M}]+(?=(?:", "))"})).s(Predef$.MODULE$.genericWrapArray(new Object[]{contraction()}));
        patterns().$plus$eq(contractedWord());
        this.caps = new StringContext(Predef$.MODULE$.wrapRefArray(new String[]{"\\\\p{Lu}+(?:[&+](?!(?:", "|", "))(?:\\\\p{Lu}(?!\\\\p{Ll}))+)+"})).s(Predef$.MODULE$.genericWrapArray(new Object[]{htmlSymbol(), htmlAccentedLetter()}));
        patterns().$plus$eq(caps());
        this.word = new StringContext(Predef$.MODULE$.wrapRefArray(new String[]{"", "(?:[\\\\p{L}\\\\p{M}\\\\p{Nd}_]|", ")*+"})).s(Predef$.MODULE$.genericWrapArray(new Object[]{letter(), letter()}));
        patterns().$plus$eq(word());
        this.number = new StringContext(Predef$.MODULE$.wrapRefArray(new String[]{"(?<![\\\\p{Nd}])[-\\\\+\\\\.,]?(?!", ")\\\\p{Nd}+(?:[\\\\.:,]\\\\p{Nd}+)*"})).s(Predef$.MODULE$.genericWrapArray(new Object[]{date()}));
        patterns().$plus$eq(number());
        this.number2 = new StringBuilder().append(ap()).append("\\p{Nd}{2}").toString();
        patterns().$plus$eq(number2());
        patterns().$plus$eq(ap2());
        this.ellipsis = "\\.{2,5}(?![!\\?])|(?:\\.[  ]){2,4}\\.|[\u0085…]";
        patterns().$plus$eq(ellipsis());
        this.repeatedPunc = "[,~\\*=\\+\\.\\?!#]+|-{4,}";
        patterns().$plus$eq(repeatedPunc());
        this.mdash = "-{2,3}|&(?:mdash|MD);|[—―]";
        patterns().$plus$eq(mdash());
        this.dash = "&(?:ndash);|[-\u0096\u0097\\p{Pd}]";
        patterns().$plus$eq(dash());
        this.punc = "\\p{P}";
        patterns().$plus$eq(punc());
        this.symbol = "\\p{S}|&(?:degree|plusmn|times|divide|infin);";
        patterns().$plus$eq(symbol());
        this.htmlChar = "&[a-z]{3,6};";
        patterns().$plus$eq(htmlChar());
        this.catchAll = "\\S";
        patterns().$plus$eq(catchAll());
        this.newline = "\n";
        if (z3) {
            patterns().$plus$eq(newline());
        } else {
            BoxedUnit boxedUnit3 = BoxedUnit.UNIT;
        }
        this.space = "(?:\\p{Z}|&nbsp;)+";
        this.tokenRegexString = ((TraversableOnce) patterns().filter(new DeterministicTokenizer$$anonfun$2(this))).mkString("|");
        this.tokenRegex = z ? new StringOps(Predef$.MODULE$.augmentString(tokenRegexString())).r() : new StringOps(Predef$.MODULE$.augmentString(new StringBuilder().append("(?i)").append(tokenRegexString()).toString())).r();
    }
}
