package org.languagetool.tokenizers.uk;

import java.util.ArrayList;
import java.util.HashMap;
import java.util.List;
import java.util.Map;
import java.util.regex.Matcher;
import java.util.regex.Pattern;
import org.languagetool.tokenizers.Tokenizer;

/* loaded from: input_file:org/languagetool/tokenizers/uk/UkrainianWordTokenizer.class */
public class UkrainianWordTokenizer implements Tokenizer {
    private static final char DECIMAL_COMMA_SUBST = 57345;
    private static final char NON_BREAKING_SPACE_SUBST = 57346;
    private static final char NON_BREAKING_DOT_SUBST = 57347;
    private static final char NON_BREAKING_COLON_SUBST = 57348;
    private static final char LEFT_BRACE_SUBST = 57349;
    private static final char RIGHT_BRACE_SUBST = 57350;
    private static final char NON_BREAKING_SLASH_SUBST = 57351;
    private static final char LEFT_ANGLE_SUBST = 57352;
    private static final char RIGHT_ANGLE_SUBST = 57353;
    private static final char SLASH_SUBST = 57360;
    private static final String NON_BREAKING_PLACEHOLDER = "\ue109";
    private static final String BREAKING_PLACEHOLDER = "\ue110";
    private static final String NON_BREAKING_PLACEHOLDER2 = "\ue120";
    private static final String DECIMAL_COMMA_REPL = "$1\ue001$2";
    private static final String DASH_NUMBERS_REPL = "$1\ue110$2\ue110$3";
    private static final String N_DASH_SPACE_REPL = "$1\ue110$2";
    private static final String COLON_NUMBERS_REPL = "$1\ue004$2";
    private static final String DATE_PATTERN_REPL = "$1\ue003$2\ue003$3";
    private static final String INITIALS_DOT_REPL_SP_2 = "$1\ue003\ue110$2\ue003\ue110$3";
    private static final String INITIALS_DOT_REPL_SP_1 = "$1\ue003\ue110$2";
    private static final String INITIALS_DOT_REPL_RSP_2 = "$1\ue110$2\ue003\ue110$3\ue003\ue110";
    private static final String INITIALS_DOT_REPL_RSP_1 = "$1\ue110$2\ue003\ue110";
    private static final String ABBR_DOT_2_SMALL_LETTERS_REPL = "$1\ue003\ue110$2\ue003\ue110";
    private static final String ONE_DOT_TWO_REPL = "$1\ue003\ue110$2";
    private static final String SOFT_HYPHEN_WRAP = "\u00ad\n";
    private static final String SOFT_HYPHEN_WRAP_SUBST = "\ue103";
    private static final int URL_START_REPLACE_CHAR = 58112;
    private static final String SPLIT_CHARS = "(!{2,3}|\\?{2,3}|\\.{3}|[!?][!?.]{1,2}|[  \\n\\r\\t,.;!?—:()\\[\\]{}<>/|\\\\…°$€₴=№§¿¡~]|%(?![-–][а-яіїєґ])|(?<!\ue109)[\"«»„”“]|(?<=[а-яіїєґА-ЯІЇЄҐ])[¹²⁰-⁹]|(?<![а-яіїєґА-ЯІЇЄҐa-zA-Z])[_*]+|[_*]+(?![а-яіїєґА-ЯІЇЄҐa-zA-Z0-9])|[\u2000-\u200f‚†- ‰-\u206f␀-⟿" + String.valueOf(Character.toChars(126976)) + "-" + String.valueOf(Character.toChars(131071)) + "\uf000-\uffff\ue110])(?!\ue120)";
    private static final Pattern SPLIT_CHARS_REGEX = Pattern.compile(SPLIT_CHARS);
    private static final Pattern WEIRD_APOSTROPH_PATTERN = Pattern.compile("([бвджзклмнпрстфхш])([\"”‟`´])([єїюя])", 66);
    public static final Pattern WORDS_WITH_BRACKETS_PATTERN = Pattern.compile("([а-яіїєґ])\\[([а-яіїєґ]+)\\]", 66);
    private static final Pattern DECIMAL_COMMA_PATTERN = Pattern.compile("([\\d]),([\\d])", 66);
    private static final Pattern DECIMAL_SPACE_PATTERN = Pattern.compile("(?<=^|[\\h\\v(])\\d{1,3}([\\h][\\d]{3})+(?=[\\h\\v(]|$)", 66);
    private static final Pattern DASH_NUMBERS_PATTERN = Pattern.compile("([IVXІХ]+)([–-])([IVXІХ]+)");
    private static final Pattern N_DASH_SPACE_PATTERN = Pattern.compile("([а-яіїєґa-z0-9])(–\\h)(?!(та|чи|і|й)[\\h\\v])", 66);
    private static final Pattern N_DASH_SPACE_PATTERN2 = Pattern.compile("([\\h.,;!?]–)([а-яіїєґa-z])", 66);
    private static final Pattern DOTTED_NUMBERS_PATTERN = Pattern.compile("([\\d])\\.([\\d])");
    private static final Pattern DOTTED_NUMBERS_PATTERN3 = Pattern.compile("([\\d])\\.([\\d]+)\\.([\\d])");
    private static final Pattern COLON_NUMBERS_PATTERN = Pattern.compile("([\\d]):([\\d])");
    private static final Pattern DATE_PATTERN = Pattern.compile("([\\d]{2})\\.([\\d]{2})\\.([\\d]{4})|([\\d]{4})\\.([\\d]{2})\\.([\\d]{2})|([\\d]{4})-([\\d]{2})-([\\d]{2})", 66);
    private static final Pattern BRACE_IN_WORD_PATTERN = Pattern.compile("([а-яіїєґ])\\(([а-яіїєґ']+)\\)", 66);
    private static final Pattern XML_TAG_PATTERN = Pattern.compile("<(/?[a-z_]+/?)>", 2);
    private static final Pattern INITIALS_DOT_PATTERN_SP_2 = Pattern.compile("([А-ЯІЇЄҐ])\\.([\\h\\v]{0,5}[А-ЯІЇЄҐ])\\.([\\h\\v]{0,5}[А-ЯІЇЄҐ][а-яіїєґ']+)");
    private static final Pattern INITIALS_DOT_PATTERN_SP_1 = Pattern.compile("([А-ЯІЇЄҐ])\\.([\\h\\v]{0,5}[А-ЯІЇЄҐ][а-яіїєґ']+)");
    private static final Pattern INITIALS_DOT_PATTERN_RSP_2 = Pattern.compile("([А-ЯІЇЄҐ][а-яіїєґ']+)([\\h\\v]?[А-ЯІЇЄҐ])\\.([\\h\\v]?[А-ЯІЇЄҐ])\\.");
    private static final Pattern INITIALS_DOT_PATTERN_RSP_1 = Pattern.compile("([А-ЯІЇЄҐ][а-яіїєґ']+)([\\h\\v]?[А-ЯІЇЄҐ])\\.");
    private static final Pattern ABBR_DOT_VO_PATTERN1 = Pattern.compile("([вВу])\\.([\\h\\v]*о)\\.");
    private static final Pattern ABBR_DOT_VO_PATTERN2 = Pattern.compile("(к)\\.([\\h\\v]*с)\\.");
    private static final Pattern ABBR_DOT_VO_PATTERN3 = Pattern.compile("(ч|ст)\\.([\\h\\v]*л)\\.");
    private static final Pattern ABBR_DOT_TYS_PATTERN1 = Pattern.compile("([0-9IІ][\\h\\v]+)(тис|арт)\\.");
    private static final Pattern ABBR_DOT_TYS_PATTERN2 = Pattern.compile("(тис|арт)\\.([\\h\\v]+[а-яіїєґ0-9])");
    private static final Pattern ABBR_DOT_ART_PATTERN = Pattern.compile("([Аа]рт|[Мм]ал|[Рр]ис)\\.([\\h]*[0-9])");
    private static final Pattern ABBR_DOT_MAN_PATTERN = Pattern.compile("(Ман)\\.([\\h]*(Сіті|[Юю]н))");
    private static final Pattern ABBR_DOT_LAT_PATTERN = Pattern.compile("([^а-яіїєґА-ЯІЇЄҐ'́-]лат)\\.([\\h\\v]+[a-zA-Z])");
    private static final Pattern ABBR_DOT_PROF_PATTERN = Pattern.compile("(?<![а-яіїєґА-ЯІЇЄҐ'́-])([Аа]кад|[Пп]роф|[Дд]оц|[Аа]сист|[Аа]рх|ап|тов|вул|о|р|ім|упоряд|др|[Пп]реп|Ів|Дж)\\.([\\h\\v]+[А-ЯІЇЄҐа-яіїєґ])");
    private static final Pattern ABBR_DOT_GUB_PATTERN = Pattern.compile("(.[А-ЯІЇЄҐ][а-яіїєґ'-]+[\\h\\v]+губ)\\.");
    private static final Pattern ABBR_DOT_DASH_PATTERN = Pattern.compile("\\b([А-ЯІЇЄҐ]ж?)\\.([-–]([А-ЯІЇЄҐ][а-яіїєґ']{2}|[А-ЯІЇЄҐ]\\.))");
    private static final Pattern ABBR_DOT_KUB_SM_PATTERN = Pattern.compile("(кв|куб)\\.([\\h\\v]*(?:[смкд]|мк)?м)");
    private static final Pattern ABBR_DOT_S_G_PATTERN = Pattern.compile("(с)\\.(-г)\\.");
    private static final Pattern ABBR_DOT_CHL_KOR_PATTERN = Pattern.compile("(чл)\\.(-кор)\\.");
    private static final Pattern ABBR_DOT_PN_ZAH_PATTERN = Pattern.compile("(пн|пд)\\.(-(зах|сх))\\.");
    private static final Pattern INVALID_MLN_DOT_PATTERN = Pattern.compile("(млн|млрд)\\.( [а-яіїєґ])");
    private static final Pattern ABBR_DOT_2_SMALL_LETTERS_PATTERN = Pattern.compile("([^а-яіїєґА-ЯІЇЄҐ'́-][векнпрстцч]{1,2})\\.(\\h*(?![смкд]?м\\.)[екмнпрстч]{1,2})\\.");
    private static final Pattern ABBR_DOT_NON_ENDING_PATTERN = Pattern.compile("(?<![а-яіїєґА-ЯІЇЄҐ'́-])(абз|австрал|ам|амер|англ|акад(ем)?|арк|ауд|біол|бл(?:изьк)?|буд|в(?!\\.+)|вип|вірм|грец(?:ьк)?|держ|див|дир|діал|дод|дол|досл|доц|доп|екон|ел|жін|зав|заст|зах|зб|зв|зневажл?|зовн|ім|івр|інж|ісп|іст|італ|к|каб|каф|канд|кв|[1-9]-кімн|кімн|кін|кл|кн|коеф|латин|мал|моб|н|[Нн]апр|нац|нпр|образн|оз|оп|оф|п|пен|перекл|перен|пл|пол|пов|пор|порівн|поч|пп|прибл|прикм|прим|присл|пров|пром|просп|[Рр]ед|[Рр]еж|розд|розм|рос|рт|рум|с|санскр|[Сс]вв?|скор|соц|співавт|[сС]т|стор|сх|табл|тт|[тТ]ел|техн|укр|філол|фр|франц|худ|цит|ч|чайн|част|ц|яп)\\.(?!\ue120|\\.+[\\h\\v]*$)");
    private static final Pattern ABBR_DOT_NON_ENDING_PATTERN_2 = Pattern.compile("([^а-яіїєґА-ЯІЇЄҐ'-]м\\.)([\\h\\v]*[А-ЯІЇЄҐ])");
    private static final Pattern ABBR_DOT_NAR_PATTERN_1 = Pattern.compile("(([0-9]|рік|[рp]\\.|[-–—])[\\h\\v]+нар)\\.");
    private static final Pattern ABBR_DOT_NAR_PATTERN_2 = Pattern.compile("\\b(нар)\\.([\\h\\v]+[0-9а-яіїєґ])");
    private static final Pattern ABBR_DOT_ENDING_PATTERN = Pattern.compile("([^а-яіїєґА-ЯІЇЄҐ'́-]((та|й|і) (інш?|под)|атм|відс|гр|коп|обл|р|рр|РР|руб|ст|стст|стол|стор|чол|шт))\\.(?!\ue120)");
    private static final Pattern ABBR_DOT_I_T_P_PATTERN = Pattern.compile("([ій][\\h\\v]+т\\.)([\\h\\v]*(д|п|ін)\\.)");
    private static final Pattern ABBR_DOT_I_T_CH_PATTERN = Pattern.compile("([ву][\\h\\v]+т\\.)([\\h\\v]*ч\\.)");
    private static final Pattern ABBR_DOT_T_ZV_PATTERN = Pattern.compile("([\\h\\v\\(]+т\\.)([\\h\\v]*зв\\.)");
    private static final Pattern ABBR_AT_THE_END = Pattern.compile("(?<![а-яіїєґА-ЯІЇЄҐ'́])(тис|губ|[А-ЯІЇЄҐ])\\.[\\h\\v]*$");
    private static final Pattern APOSTROPHE_BEGIN_PATTERN = Pattern.compile("(^|[\\h\\v(„«\"'])'(?!дно)(\\p{L})");
    private static final Pattern APOSTROPHE_END_PATTER = Pattern.compile("(\\p{L})(?<!\\b(?:мо|тре|тра|чо|нічо|бо|зара|пра))'([^\\p{L}-]|$)", 66);
    private static final Pattern YEAR_WITH_R = Pattern.compile("((?:[12][0-9]{3}[—–-])?[12][0-9]{3})(рр?\\.)");
    private static final Pattern COMPOUND_WITH_QUOTES1 = Pattern.compile("([а-яіїє]-)([«\"„])([а-яіїєґ'-]+)([»\"“])", 66);
    private static final Pattern COMPOUND_WITH_QUOTES2 = Pattern.compile("([«\"„])([а-яіїєґ0-9'-]+)([»\\\"“])(-[а-яіїє])", 66);
    private static final Pattern ABBR_DOT_RED_AVT_PATTERN = Pattern.compile("([\\h\\v]+(?:[Рр]ед|[Аа]вт))\\.([\\)\\]])");
    private static final Pattern URL_PATTERN = Pattern.compile("((https?|ftp)://|www\\.)[^\\h\\v/$.?#),]+\\.[^\\h\\v),\">]*|(mailto:)?[\\p{L}\\d._-]+@[\\p{L}\\d_-]+(\\.[\\p{L}\\d_-]+)+", 2);
    private static final Pattern LEADING_DASH_PATTERN = Pattern.compile("^([—–])([а-яіїєґА-ЯІЇЄҐA-Z])");
    private static final Pattern LEADING_DASH_PATTERN_2 = Pattern.compile("^(-)([А-ЯІЇЄҐA-Z])");
    private static final Pattern NUMBER_MISSING_SPACE = Pattern.compile("((?:[\\h\\v\ue110]|^)[а-яїієґА-ЯІЇЄҐ'-]*[а-яїієґ']?[а-яїієґ])([0-9]+(?![а-яіїєґА-ЯІЇЄҐa-zA-Z»\"“]))");
    private static final Pattern WEB_ENTITIES = Pattern.compile("([а-яіїєґ])\\.(НЕТ|net|Інфо|Info|City|Life|UA|юа|лі|media|com|фм|ru|Ру)\\b", 66);
    private static final Pattern WEB_ENTITIES2 = Pattern.compile("\\.([a-z_-]+)\\.(ua)", 66);

    public List<String> tokenize(String str) {
        HashMap<String, String> hashMap = new HashMap<>();
        if (!str.trim().isEmpty()) {
            str = adjustTextForTokenizing(str, hashMap);
        }
        ArrayList arrayList = new ArrayList();
        for (String str2 : splitWithDelimiters(str, SPLIT_CHARS_REGEX)) {
            if (!str2.equals(BREAKING_PLACEHOLDER)) {
                String replace = str2.replace((char) 57345, ',').replace((char) 57351, '/').replace((char) 57348, ':').replace((char) 57346, ' ').replace((char) 57349, '(').replace((char) 57350, ')').replace((char) 57352, '<').replace((char) 57353, '>').replace((char) 57360, '/').replace((char) 57347, '.').replace(SOFT_HYPHEN_WRAP_SUBST, SOFT_HYPHEN_WRAP).replace(NON_BREAKING_PLACEHOLDER, "").replace(NON_BREAKING_PLACEHOLDER2, "");
                if (!hashMap.isEmpty()) {
                    for (Map.Entry<String, String> entry : hashMap.entrySet()) {
                        replace = replace.replace(entry.getKey(), entry.getValue());
                    }
                }
                arrayList.add(replace);
            }
        }
        return arrayList;
    }

    private String adjustTextForTokenizing(String str, HashMap<String, String> hashMap) {
        String cleanup = cleanup(str);
        if ("—–-".indexOf(cleanup.charAt(0)) >= 0) {
            Matcher matcher = LEADING_DASH_PATTERN.matcher(cleanup);
            if (matcher.find()) {
                cleanup = matcher.replaceFirst(N_DASH_SPACE_REPL);
            } else {
                Matcher matcher2 = LEADING_DASH_PATTERN_2.matcher(cleanup);
                if (matcher2.find()) {
                    cleanup = matcher2.replaceFirst(N_DASH_SPACE_REPL);
                }
            }
        }
        if (cleanup.contains(",")) {
            cleanup = DECIMAL_COMMA_PATTERN.matcher(cleanup).replaceAll(DECIMAL_COMMA_REPL);
        }
        if (cleanup.contains("http") || cleanup.contains("www") || cleanup.contains("@") || cleanup.contains("ftp")) {
            Matcher matcher3 = URL_PATTERN.matcher(cleanup);
            int i = URL_START_REPLACE_CHAR;
            while (matcher3.find()) {
                String group = matcher3.group();
                String valueOf = String.valueOf((char) i);
                hashMap.put(valueOf, group);
                cleanup = matcher3.replaceFirst(valueOf);
                i++;
                matcher3 = URL_PATTERN.matcher(cleanup);
            }
        }
        if (cleanup.indexOf(8212) != -1) {
            cleanup = cleanup.replaceAll("—([\\h\\v])", "\ue110—$1");
        }
        boolean z = cleanup.indexOf(8211) != -1;
        if (cleanup.indexOf(45) != -1 || z) {
            cleanup = DASH_NUMBERS_PATTERN.matcher(cleanup).replaceAll(DASH_NUMBERS_REPL);
            if (z) {
                cleanup = N_DASH_SPACE_PATTERN2.matcher(N_DASH_SPACE_PATTERN.matcher(cleanup).replaceAll(N_DASH_SPACE_REPL)).replaceAll(N_DASH_SPACE_REPL);
            }
        }
        if (cleanup.indexOf("с/г") != -1) {
            cleanup = cleanup.replaceAll("с/г", "с\ue007г");
        }
        if (cleanup.indexOf("Л/ДНР") != -1) {
            cleanup = cleanup.replaceAll("Л/ДНР", "Л\ue007ДНР");
        }
        if (cleanup.indexOf("р.") != -1) {
            Matcher matcher4 = YEAR_WITH_R.matcher(cleanup);
            if (matcher4.find()) {
                cleanup = matcher4.replaceAll(N_DASH_SPACE_REPL);
            }
        }
        String replace = cleanup.replace("#", "\ue110#");
        if (replace.indexOf(37) >= 0) {
            replace = replace.replaceAll("%([^-])", "%\ue110$1");
        }
        String replaceAll = COMPOUND_WITH_QUOTES2.matcher(COMPOUND_WITH_QUOTES1.matcher(replace).replaceAll("$1$2\ue120$3\ue120$4\ue120")).replaceAll("$1\ue120$2\ue120$3\ue120$4");
        if (replaceAll.indexOf(91) != -1) {
            replaceAll = WORDS_WITH_BRACKETS_PATTERN.matcher(replaceAll).replaceAll("$1\\[\ue120$2\\]\ue120");
        }
        int indexOf = replaceAll.indexOf(46);
        String replaceFirst = replaceAll.replaceFirst("[\\h\\v]*$", "");
        boolean z2 = indexOf >= 0 && indexOf < replaceFirst.length() - 1;
        if (z2 || (indexOf == replaceFirst.length() - 1 && ABBR_AT_THE_END.matcher(replaceAll).find())) {
            replaceAll = INVALID_MLN_DOT_PATTERN.matcher(ABBR_DOT_NON_ENDING_PATTERN_2.matcher(ABBR_DOT_NON_ENDING_PATTERN.matcher(ABBR_DOT_RED_AVT_PATTERN.matcher(ABBR_DOT_T_ZV_PATTERN.matcher(ABBR_DOT_I_T_CH_PATTERN.matcher(ABBR_DOT_I_T_P_PATTERN.matcher(ABBR_DOT_PN_ZAH_PATTERN.matcher(ABBR_DOT_CHL_KOR_PATTERN.matcher(ABBR_DOT_S_G_PATTERN.matcher(ABBR_DOT_KUB_SM_PATTERN.matcher(INITIALS_DOT_PATTERN_RSP_1.matcher(INITIALS_DOT_PATTERN_RSP_2.matcher(INITIALS_DOT_PATTERN_SP_1.matcher(INITIALS_DOT_PATTERN_SP_2.matcher(ABBR_DOT_DASH_PATTERN.matcher(ABBR_DOT_GUB_PATTERN.matcher(ABBR_DOT_PROF_PATTERN.matcher(ABBR_DOT_LAT_PATTERN.matcher(ABBR_DOT_TYS_PATTERN2.matcher(ABBR_DOT_TYS_PATTERN1.matcher(ABBR_DOT_MAN_PATTERN.matcher(ABBR_DOT_ART_PATTERN.matcher(ABBR_DOT_VO_PATTERN3.matcher(ABBR_DOT_VO_PATTERN2.matcher(ABBR_DOT_VO_PATTERN1.matcher(ABBR_DOT_2_SMALL_LETTERS_PATTERN.matcher(ABBR_DOT_NAR_PATTERN_2.matcher(ABBR_DOT_NAR_PATTERN_1.matcher(DOTTED_NUMBERS_PATTERN.matcher(DOTTED_NUMBERS_PATTERN3.matcher(DATE_PATTERN.matcher(replaceAll).replaceAll(DATE_PATTERN_REPL)).replaceAll("$1.\ue120$2.\ue120$3")).replaceAll("$1.\ue120$2")).replaceAll("$1.\ue120\ue110")).replaceAll("$1.\ue120\ue110$2")).replaceAll("$1.\ue120\ue110$2.\ue120\ue110")).replaceAll(ABBR_DOT_2_SMALL_LETTERS_REPL)).replaceAll(ABBR_DOT_2_SMALL_LETTERS_REPL)).replaceAll(ABBR_DOT_2_SMALL_LETTERS_REPL)).replaceAll("$1\ue003\ue110$2")).replaceAll("$1\ue003\ue110$2")).replaceAll("$1$2\ue003\ue110")).replaceAll("$1\ue003\ue110$2")).replaceAll("$1\ue003\ue110$2")).replaceAll("$1\ue003\ue110$2")).replaceAll("$1\ue003\ue110")).replaceAll("$1\ue003$2")).replaceAll(INITIALS_DOT_REPL_SP_2)).replaceAll("$1\ue003\ue110$2")).replaceAll(INITIALS_DOT_REPL_RSP_2)).replaceAll(INITIALS_DOT_REPL_RSP_1)).replaceAll("$1.\ue120\ue110$2")).replaceAll("$1\ue003$2\ue003\ue110")).replaceAll("$1.\ue120$2.\ue120\ue110")).replaceAll("$1.\ue120\ue110$2.\ue120\ue110")).replaceAll("$1\ue120\ue110$2\ue120\ue110")).replaceAll("$1\ue120\ue110$2\ue120\ue110")).replaceAll("$1\ue120\ue110$2\ue120\ue110")).replaceAll("$1.\ue120\ue110$2")).replaceAll("$1.\ue120\ue110")).replaceAll("$1\ue120\ue110$2")).replaceAll("$1.\ue120\ue110$2");
        }
        if (z2) {
            replaceAll = WEB_ENTITIES2.matcher(WEB_ENTITIES.matcher(replaceAll).replaceAll("$1.\ue120$2")).replaceAll(".\ue120$1.\ue120$2");
        }
        String replaceAll2 = ABBR_DOT_ENDING_PATTERN.matcher(replaceAll).replaceAll("$1.\ue120\ue110");
        Matcher matcher5 = DECIMAL_SPACE_PATTERN.matcher(replaceAll2);
        if (matcher5.find()) {
            StringBuffer stringBuffer = new StringBuffer();
            do {
                matcher5.appendReplacement(stringBuffer, matcher5.group(0).replace(' ', (char) 57346).replace((char) 160, (char) 57346).replace((char) 8239, (char) 57346));
            } while (matcher5.find());
            matcher5.appendTail(stringBuffer);
            replaceAll2 = stringBuffer.toString();
        }
        if (replaceAll2.contains(":")) {
            replaceAll2 = COLON_NUMBERS_PATTERN.matcher(replaceAll2).replaceAll(COLON_NUMBERS_REPL);
        }
        if (replaceAll2.contains("(")) {
            replaceAll2 = BRACE_IN_WORD_PATTERN.matcher(replaceAll2).replaceAll("$1\ue005$2\ue006");
        }
        if (replaceAll2.contains("<")) {
            replaceAll2 = XML_TAG_PATTERN.matcher(replaceAll2).replaceAll("\ue110\ue008$1\ue009\ue110").replace("\ue008/", "\ue008\ue010").replace("/\ue009", "\ue010\ue009");
        }
        if (replaceAll2.contains("-")) {
            replaceAll2 = replaceAll2.replaceAll("([а-яіїєґА-ЯІЇЄҐ])([»\"-]+-)", N_DASH_SPACE_REPL).replaceAll("([»\"-]+-)([а-яіїєґА-ЯІЇЄҐ])", N_DASH_SPACE_REPL);
        }
        if (replaceAll2.contains(SOFT_HYPHEN_WRAP)) {
            replaceAll2 = replaceAll2.replaceAll("(?<!\\s)\u00ad\n", SOFT_HYPHEN_WRAP_SUBST);
        }
        if (replaceAll2.indexOf(39) >= 0) {
            replaceAll2 = APOSTROPHE_END_PATTER.matcher(APOSTROPHE_BEGIN_PATTERN.matcher(replaceAll2).replaceAll("$1'\ue110$2")).replaceAll("$1\ue110'$2");
        }
        if (replaceAll2.contains("+")) {
            replaceAll2 = replaceAll2.replaceAll("\\+(?=[а-яіїєґА-ЯІЇЄҐ0-9])", "\ue110+\ue110");
        }
        if (replaceAll2.length() > 1 && (replaceAll2.contains("-") || replaceAll2.contains("–"))) {
            replaceAll2 = replaceAll2.replaceAll("(?<=(^|[\\h\\v]))([-–])(?=[0-9])", "$2\ue110");
        }
        return NUMBER_MISSING_SPACE.matcher(replaceAll2).replaceAll(N_DASH_SPACE_REPL);
    }

    private static String cleanup(String str) {
        return WEIRD_APOSTROPH_PATTERN.matcher(str.replace((char) 8217, '\'').replace((char) 700, '\'').replace((char) 8216, '\'').replace((char) 8218, ',').replace((char) 8209, '-')).replaceAll("$1\ue120$2\ue120$3");
    }

    private static List<String> splitWithDelimiters(String str, Pattern pattern) {
        int i;
        ArrayList arrayList = new ArrayList();
        Matcher matcher = pattern.matcher(str);
        int i2 = 0;
        while (true) {
            i = i2;
            if (!matcher.find()) {
                break;
            }
            int start = matcher.start();
            if (i != start) {
                arrayList.add(str.substring(i, start));
            }
            arrayList.add(matcher.group());
            i2 = matcher.end();
        }
        if (i != str.length()) {
            arrayList.add(str.substring(i));
        }
        return arrayList;
    }
}
