package org.apache.lucene.analysis.uk;

import java.io.IOException;
import java.io.Reader;
import java.io.UncheckedIOException;
import java.nio.charset.StandardCharsets;
import morfologik.stemming.Dictionary;
import org.apache.lucene.analysis.Analyzer;
import org.apache.lucene.analysis.CharArraySet;
import org.apache.lucene.analysis.LowerCaseFilter;
import org.apache.lucene.analysis.StopFilter;
import org.apache.lucene.analysis.StopwordAnalyzerBase;
import org.apache.lucene.analysis.TokenStream;
import org.apache.lucene.analysis.WordlistLoader;
import org.apache.lucene.analysis.charfilter.MappingCharFilter;
import org.apache.lucene.analysis.charfilter.NormalizeCharMap;
import org.apache.lucene.analysis.miscellaneous.SetKeywordMarkerFilter;
import org.apache.lucene.analysis.morfologik.MorfologikFilter;
import org.apache.lucene.analysis.standard.StandardTokenizer;
import org.apache.lucene.util.IOUtils;

/* loaded from: input_file:org/apache/lucene/analysis/uk/UkrainianMorfologikAnalyzer.class */
public final class UkrainianMorfologikAnalyzer extends StopwordAnalyzerBase {
    private final CharArraySet stemExclusionSet;
    public static final String DEFAULT_STOPWORD_FILE = "stopwords.txt";
    private static final NormalizeCharMap NORMALIZER_MAP;

    /* loaded from: input_file:org/apache/lucene/analysis/uk/UkrainianMorfologikAnalyzer$DefaultSetHolder.class */
    private static class DefaultSetHolder {
        static final CharArraySet DEFAULT_STOP_SET;
        static final Dictionary DICTIONARY;

        private DefaultSetHolder() {
        }

        static {
            try {
                DEFAULT_STOP_SET = WordlistLoader.getSnowballWordSet(IOUtils.getDecodingReader(UkrainianMorfologikAnalyzer.class, UkrainianMorfologikAnalyzer.DEFAULT_STOPWORD_FILE, StandardCharsets.UTF_8));
                DICTIONARY = Dictionary.read(UkrainianMorfologikAnalyzer.class.getClassLoader().getResource("ua/net/nlp/ukrainian.dict"));
            } catch (IOException e) {
                throw new UncheckedIOException("Unable to load analyzer resources", e);
            }
        }
    }

    public static CharArraySet getDefaultStopSet() {
        return DefaultSetHolder.DEFAULT_STOP_SET;
    }

    public UkrainianMorfologikAnalyzer() {
        this(DefaultSetHolder.DEFAULT_STOP_SET);
    }

    public UkrainianMorfologikAnalyzer(CharArraySet charArraySet) {
        this(charArraySet, CharArraySet.EMPTY_SET);
    }

    public UkrainianMorfologikAnalyzer(CharArraySet charArraySet, CharArraySet charArraySet2) {
        super(charArraySet);
        this.stemExclusionSet = CharArraySet.unmodifiableSet(CharArraySet.copy(charArraySet2));
    }

    protected Reader initReader(String str, Reader reader) {
        return new MappingCharFilter(NORMALIZER_MAP, reader);
    }

    protected Analyzer.TokenStreamComponents createComponents(String str) {
        StandardTokenizer standardTokenizer = new StandardTokenizer();
        TokenStream stopFilter = new StopFilter(new LowerCaseFilter(standardTokenizer), this.stopwords);
        if (!this.stemExclusionSet.isEmpty()) {
            stopFilter = new SetKeywordMarkerFilter(stopFilter, this.stemExclusionSet);
        }
        return new Analyzer.TokenStreamComponents(standardTokenizer, new MorfologikFilter(stopFilter, DefaultSetHolder.DICTIONARY));
    }

    static {
        NormalizeCharMap.Builder builder = new NormalizeCharMap.Builder();
        builder.add("’", "'");
        builder.add("‘", "'");
        builder.add("ʼ", "'");
        builder.add("`", "'");
        builder.add("´", "'");
        builder.add("́", "");
        builder.add("\u00ad", "");
        builder.add("ґ", "г");
        builder.add("Ґ", "Г");
        NORMALIZER_MAP = builder.build();
    }
}
