package com.yahoo.language.opennlp;

import com.huaban.analysis.jieba.JiebaSegmenter;
import com.huaban.analysis.jieba.SegToken;
import com.yahoo.language.Language;
import com.yahoo.language.LinguisticsCase;
import com.yahoo.language.process.Normalizer;
import com.yahoo.language.process.SpecialTokenRegistry;
import com.yahoo.language.process.StemMode;
import com.yahoo.language.process.Token;
import com.yahoo.language.process.Tokenizer;
import com.yahoo.language.process.Transformer;
import com.yahoo.language.simple.SimpleNormalizer;
import com.yahoo.language.simple.SimpleToken;
import com.yahoo.language.simple.SimpleTokenType;
import com.yahoo.language.simple.SimpleTokenizer;
import com.yahoo.language.simple.SimpleTransformer;
import java.util.ArrayList;
import java.util.List;
import java.util.Optional;
import opennlp.tools.stemmer.Stemmer;
import opennlp.tools.stemmer.snowball.SnowballStemmer;

/* loaded from: input_file:com/yahoo/language/opennlp/OpenNlpTokenizer.class */
public class OpenNlpTokenizer implements Tokenizer {
    private final Mode mode;
    private final boolean snowballStemmingForEnglish;
    private final boolean createCjkGrams;
    private final Normalizer normalizer;
    private final Transformer transformer;
    private final Optional<JiebaSegmenter> chineseSegmenter;
    private final SimpleTokenizer simpleTokenizer;
    private final SpecialTokenRegistry specialTokenRegistry;

    /* JADX INFO: Access modifiers changed from: package-private */
    /* renamed from: com.yahoo.language.opennlp.OpenNlpTokenizer$1, reason: invalid class name */
    /* loaded from: input_file:com/yahoo/language/opennlp/OpenNlpTokenizer$1.class */
    public static /* synthetic */ class AnonymousClass1 {
        static final /* synthetic */ int[] $SwitchMap$com$yahoo$language$Language = new int[Language.values().length];

        static {
            try {
                $SwitchMap$com$yahoo$language$Language[Language.ARABIC.ordinal()] = 1;
            } catch (NoSuchFieldError e) {
            }
            try {
                $SwitchMap$com$yahoo$language$Language[Language.CATALAN.ordinal()] = 2;
            } catch (NoSuchFieldError e2) {
            }
            try {
                $SwitchMap$com$yahoo$language$Language[Language.DANISH.ordinal()] = 3;
            } catch (NoSuchFieldError e3) {
            }
            try {
                $SwitchMap$com$yahoo$language$Language[Language.DUTCH.ordinal()] = 4;
            } catch (NoSuchFieldError e4) {
            }
            try {
                $SwitchMap$com$yahoo$language$Language[Language.ENGLISH.ordinal()] = 5;
            } catch (NoSuchFieldError e5) {
            }
            try {
                $SwitchMap$com$yahoo$language$Language[Language.FINNISH.ordinal()] = 6;
            } catch (NoSuchFieldError e6) {
            }
            try {
                $SwitchMap$com$yahoo$language$Language[Language.FRENCH.ordinal()] = 7;
            } catch (NoSuchFieldError e7) {
            }
            try {
                $SwitchMap$com$yahoo$language$Language[Language.GERMAN.ordinal()] = 8;
            } catch (NoSuchFieldError e8) {
            }
            try {
                $SwitchMap$com$yahoo$language$Language[Language.GREEK.ordinal()] = 9;
            } catch (NoSuchFieldError e9) {
            }
            try {
                $SwitchMap$com$yahoo$language$Language[Language.HUNGARIAN.ordinal()] = 10;
            } catch (NoSuchFieldError e10) {
            }
            try {
                $SwitchMap$com$yahoo$language$Language[Language.INDONESIAN.ordinal()] = 11;
            } catch (NoSuchFieldError e11) {
            }
            try {
                $SwitchMap$com$yahoo$language$Language[Language.IRISH.ordinal()] = 12;
            } catch (NoSuchFieldError e12) {
            }
            try {
                $SwitchMap$com$yahoo$language$Language[Language.ITALIAN.ordinal()] = 13;
            } catch (NoSuchFieldError e13) {
            }
            try {
                $SwitchMap$com$yahoo$language$Language[Language.NORWEGIAN_BOKMAL.ordinal()] = 14;
            } catch (NoSuchFieldError e14) {
            }
            try {
                $SwitchMap$com$yahoo$language$Language[Language.NORWEGIAN_NYNORSK.ordinal()] = 15;
            } catch (NoSuchFieldError e15) {
            }
            try {
                $SwitchMap$com$yahoo$language$Language[Language.PORTUGUESE.ordinal()] = 16;
            } catch (NoSuchFieldError e16) {
            }
            try {
                $SwitchMap$com$yahoo$language$Language[Language.ROMANIAN.ordinal()] = 17;
            } catch (NoSuchFieldError e17) {
            }
            try {
                $SwitchMap$com$yahoo$language$Language[Language.RUSSIAN.ordinal()] = 18;
            } catch (NoSuchFieldError e18) {
            }
            try {
                $SwitchMap$com$yahoo$language$Language[Language.SPANISH.ordinal()] = 19;
            } catch (NoSuchFieldError e19) {
            }
            try {
                $SwitchMap$com$yahoo$language$Language[Language.SWEDISH.ordinal()] = 20;
            } catch (NoSuchFieldError e20) {
            }
            try {
                $SwitchMap$com$yahoo$language$Language[Language.TURKISH.ordinal()] = 21;
            } catch (NoSuchFieldError e21) {
            }
        }
    }

    /* loaded from: input_file:com/yahoo/language/opennlp/OpenNlpTokenizer$Mode.class */
    public enum Mode {
        index,
        query
    }

    public OpenNlpTokenizer() {
        this(new SimpleNormalizer(), new SimpleTransformer());
    }

    public OpenNlpTokenizer(Normalizer normalizer, Transformer transformer) {
        this(Mode.query, normalizer, transformer, false, false, false);
    }

    public OpenNlpTokenizer(Mode mode, Normalizer normalizer, Transformer transformer, boolean z, boolean z2, boolean z3) {
        this(mode, normalizer, transformer, z, z2, z3, new SpecialTokenRegistry(List.of()));
    }

    public OpenNlpTokenizer(Mode mode, Normalizer normalizer, Transformer transformer, boolean z, boolean z2, boolean z3, SpecialTokenRegistry specialTokenRegistry) {
        this(mode, normalizer, transformer, z, (Optional<JiebaSegmenter>) (z2 ? Optional.of(new JiebaSegmenter()) : Optional.empty()), z3, specialTokenRegistry);
    }

    public OpenNlpTokenizer(Mode mode, Normalizer normalizer, Transformer transformer, boolean z, Optional<JiebaSegmenter> optional, boolean z2, SpecialTokenRegistry specialTokenRegistry) {
        this.mode = mode;
        this.normalizer = normalizer;
        this.transformer = transformer;
        this.snowballStemmingForEnglish = z;
        this.chineseSegmenter = optional;
        this.createCjkGrams = z2;
        this.specialTokenRegistry = specialTokenRegistry;
        this.simpleTokenizer = new SimpleTokenizer(normalizer, transformer, specialTokenRegistry);
    }

    public Iterable<Token> tokenize(String str, Language language, StemMode stemMode, boolean z) {
        if (this.chineseSegmenter.isPresent() && (language == Language.CHINESE_SIMPLIFIED || language == Language.CHINESE_TRADITIONAL)) {
            return segmentChinese(str);
        }
        Stemmer stemmerFor = stemmerFor(language, stemMode);
        return stemmerFor == null ? this.simpleTokenizer.tokenize(str, language, stemMode, z) : this.simpleTokenizer.tokenize(str, str2 -> {
            return processToken(str2, language, stemMode, z, stemmerFor);
        });
    }

    private Iterable<Token> segmentChinese(String str) {
        if (str.isEmpty()) {
            return List.of();
        }
        ArrayList arrayList = new ArrayList();
        for (SegToken segToken : this.chineseSegmenter.get().process(str, (this.mode == Mode.index && this.createCjkGrams) ? JiebaSegmenter.SegMode.INDEX : JiebaSegmenter.SegMode.SEARCH)) {
            arrayList.add(new SimpleToken(str.substring(segToken.startOffset, segToken.startOffset + segToken.word.length())).setOffset(segToken.startOffset).setType(SimpleTokenType.valueOf(segToken.word.codePointAt(0))).setTokenString(segToken.word));
        }
        return arrayList;
    }

    private String processToken(String str, Language language, StemMode stemMode, boolean z, Stemmer stemmer) {
        String lowerCase = LinguisticsCase.toLowerCase(this.normalizer.normalize(str));
        if (z) {
            lowerCase = this.transformer.accentDrop(lowerCase, language);
        }
        if (stemMode != StemMode.NONE) {
            lowerCase = stemmer.stem(lowerCase).toString();
        }
        return lowerCase;
    }

    private Stemmer stemmerFor(Language language, StemMode stemMode) {
        SnowballStemmer.ALGORITHM algorithmFor;
        if (language == null || stemMode == StemMode.NONE) {
            return null;
        }
        if ((language != Language.ENGLISH || this.snowballStemmingForEnglish) && (algorithmFor = algorithmFor(language)) != null) {
            return new SnowballStemmer(algorithmFor);
        }
        return null;
    }

    private SnowballStemmer.ALGORITHM algorithmFor(Language language) {
        switch (AnonymousClass1.$SwitchMap$com$yahoo$language$Language[language.ordinal()]) {
            case 1:
                return SnowballStemmer.ALGORITHM.ARABIC;
            case 2:
                return SnowballStemmer.ALGORITHM.CATALAN;
            case 3:
                return SnowballStemmer.ALGORITHM.DANISH;
            case 4:
                return SnowballStemmer.ALGORITHM.DUTCH;
            case 5:
                return SnowballStemmer.ALGORITHM.ENGLISH;
            case 6:
                return SnowballStemmer.ALGORITHM.FINNISH;
            case 7:
                return SnowballStemmer.ALGORITHM.FRENCH;
            case 8:
                return SnowballStemmer.ALGORITHM.GERMAN;
            case 9:
                return SnowballStemmer.ALGORITHM.GREEK;
            case 10:
                return SnowballStemmer.ALGORITHM.HUNGARIAN;
            case 11:
                return SnowballStemmer.ALGORITHM.INDONESIAN;
            case 12:
                return SnowballStemmer.ALGORITHM.IRISH;
            case 13:
                return SnowballStemmer.ALGORITHM.ITALIAN;
            case 14:
                return SnowballStemmer.ALGORITHM.NORWEGIAN;
            case 15:
                return SnowballStemmer.ALGORITHM.NORWEGIAN;
            case 16:
                return SnowballStemmer.ALGORITHM.PORTUGUESE;
            case 17:
                return SnowballStemmer.ALGORITHM.ROMANIAN;
            case 18:
                return SnowballStemmer.ALGORITHM.RUSSIAN;
            case 19:
                return SnowballStemmer.ALGORITHM.SPANISH;
            case 20:
                return SnowballStemmer.ALGORITHM.SWEDISH;
            case 21:
                return SnowballStemmer.ALGORITHM.TURKISH;
            default:
                return null;
        }
    }

    /* JADX INFO: Access modifiers changed from: package-private */
    public OpenNlpTokenizer withMode(Mode mode) {
        return new OpenNlpTokenizer(mode, this.normalizer, this.transformer, this.snowballStemmingForEnglish, this.chineseSegmenter, this.createCjkGrams, this.specialTokenRegistry);
    }
}
