package org.languagetool.tagging.disambiguation.uk;

import java.util.ArrayList;
import java.util.Arrays;
import java.util.HashMap;
import java.util.Iterator;
import java.util.List;
import java.util.Map;
import java.util.Set;
import java.util.regex.Pattern;
import java.util.stream.Collectors;
import org.apache.commons.lang3.StringUtils;
import org.languagetool.AnalyzedSentence;
import org.languagetool.AnalyzedToken;
import org.languagetool.AnalyzedTokenReadings;
import org.languagetool.JLanguageTool;

/* loaded from: input_file:org/languagetool/tagging/disambiguation/uk/SimpleDisambiguator.class */
class SimpleDisambiguator {
    final Map<String, TokenMatcher> DISAMBIG_REMOVE_MAP = loadMap("/uk/disambig_remove.txt");
    final Map<String, List<String>> DISAMBIG_DUPS_MAP = loadMapDups("/uk/disambig_dups.txt");

    /* JADX INFO: Access modifiers changed from: private */
    /* loaded from: input_file:org/languagetool/tagging/disambiguation/uk/SimpleDisambiguator$MatcherEntry.class */
    public static class MatcherEntry {
        private final String lemma;
        private final Pattern tagRegex;

        public MatcherEntry(String str, String str2) {
            this.lemma = str;
            this.tagRegex = Pattern.compile(str2);
        }

        public boolean matches(AnalyzedToken analyzedToken) {
            return ("*".equals(this.lemma) || this.lemma.equals(analyzedToken.getLemma())) && !analyzedToken.hasNoTag() && this.tagRegex.matcher(analyzedToken.getPOSTag()).matches();
        }

        public String toString() {
            return "MatcherEntry [lemma=" + this.lemma + ", tagRegex=" + this.tagRegex + "]";
        }
    }

    /* JADX INFO: Access modifiers changed from: package-private */
    /* loaded from: input_file:org/languagetool/tagging/disambiguation/uk/SimpleDisambiguator$TokenMatcher.class */
    public static class TokenMatcher {
        private final List<MatcherEntry> matchers;

        public TokenMatcher(List<MatcherEntry> list) {
            this.matchers = list;
        }

        public boolean matches(AnalyzedToken analyzedToken) {
            Iterator<MatcherEntry> it = this.matchers.iterator();
            while (it.hasNext()) {
                if (it.next().matches(analyzedToken)) {
                    return true;
                }
            }
            return false;
        }

        public String toString() {
            return "TokenMatcher " + this.matchers;
        }
    }

    private static Map<String, TokenMatcher> loadMap(String str) {
        HashMap hashMap = new HashMap();
        for (String str2 : JLanguageTool.getDataBroker().getFromResourceDirAsLines(str)) {
            if (!str2.startsWith("#") && !str2.trim().isEmpty()) {
                String[] split = str2.replaceFirst(" *#.*", "").trim().split(" ", 2);
                String[] split2 = split[1].split("\\|");
                ArrayList arrayList = new ArrayList();
                for (String str3 : split2) {
                    String[] split3 = str3.split(" ");
                    arrayList.add(new MatcherEntry(split3[0], split3[1]));
                }
                hashMap.put(split[0], new TokenMatcher(arrayList));
            }
        }
        return hashMap;
    }

    private static Map<String, List<String>> loadMapDups(String str) {
        HashMap hashMap = new HashMap();
        for (String str2 : JLanguageTool.getDataBroker().getFromResourceDirAsLines(str)) {
            if (!str2.startsWith("#") && !str2.trim().isEmpty()) {
                String[] split = str2.replaceFirst(" *#.*", "").trim().split(" ");
                hashMap.put(split[0], Arrays.asList(split).subList(1, split.length));
            }
        }
        return hashMap;
    }

    public void removeRareForms(AnalyzedSentence analyzedSentence) {
        int lastIndexOf;
        AnalyzedTokenReadings[] tokensWithoutWhitespace = analyzedSentence.getTokensWithoutWhitespace();
        for (int i = 1; i < tokensWithoutWhitespace.length; i++) {
            String cleanToken = tokensWithoutWhitespace[i].getCleanToken();
            if (!StringUtils.isEmpty(cleanToken)) {
                if (Character.isLowerCase(cleanToken.charAt(0))) {
                    cleanToken = cleanToken.toLowerCase();
                }
                TokenMatcher tokenMatcher = this.DISAMBIG_REMOVE_MAP.get(cleanToken);
                if (tokenMatcher == null) {
                    tokenMatcher = this.DISAMBIG_REMOVE_MAP.get(cleanToken.toLowerCase());
                    if (tokenMatcher == null && (lastIndexOf = cleanToken.lastIndexOf(45)) > 0 && cleanToken.matches(".*-(то|от|таки|бо|но)")) {
                        tokenMatcher = this.DISAMBIG_REMOVE_MAP.get(cleanToken.substring(0, lastIndexOf));
                    }
                }
                List readings = tokensWithoutWhitespace[i].getReadings();
                if (tokenMatcher != null) {
                    for (int size = readings.size() - 1; size >= 0; size--) {
                        AnalyzedToken analyzedToken = (AnalyzedToken) readings.get(size);
                        if (tokenMatcher.matches(analyzedToken)) {
                            tokensWithoutWhitespace[i].removeReading(analyzedToken, "dis_remove_rare");
                        }
                    }
                }
                Set set = (Set) readings.stream().map(analyzedToken2 -> {
                    return analyzedToken2.getLemma();
                }).filter(str -> {
                    return str != null;
                }).distinct().collect(Collectors.toSet());
                set.retainAll(this.DISAMBIG_DUPS_MAP.keySet());
                if (set.size() > 0) {
                    Set set2 = (Set) set.stream().map(str2 -> {
                        return this.DISAMBIG_DUPS_MAP.get(str2);
                    }).flatMap((v0) -> {
                        return v0.stream();
                    }).collect(Collectors.toSet());
                    for (int size2 = readings.size() - 1; size2 >= 0; size2--) {
                        AnalyzedToken analyzedToken3 = (AnalyzedToken) readings.get(size2);
                        if (set2.contains(analyzedToken3.getLemma())) {
                            tokensWithoutWhitespace[i].removeReading(analyzedToken3, "dis_remove_dups");
                        }
                    }
                }
            }
        }
    }
}
