/*
 * Decompiled with CFR 0.152.
 */
package com.hankcs.lucene;

import com.hankcs.hanlp.HanLP;
import com.hankcs.hanlp.corpus.io.IOUtil;
import com.hankcs.hanlp.seg.Segment;
import com.hankcs.hanlp.seg.common.Term;
import com.hankcs.hanlp.tokenizer.TraditionalChineseTokenizer;
import com.hankcs.lucene.HanLPTokenizer;
import java.util.List;
import java.util.Map;
import java.util.Set;
import java.util.TreeSet;
import org.apache.lucene.analysis.Tokenizer;
import org.apache.lucene.analysis.util.TokenizerFactory;
import org.apache.lucene.util.AttributeFactory;

public class HanLPTokenizerFactory
extends TokenizerFactory {
    private boolean enableIndexMode;
    private boolean enablePorterStemming;
    private boolean enableNumberQuantifierRecognize;
    private boolean enableCustomDictionary;
    private boolean enableCustomDictionaryForcing;
    private boolean enableTranslatedNameRecognize;
    private boolean enableJapaneseNameRecognize;
    private boolean enableOrganizationRecognize;
    private boolean enablePlaceRecognize;
    private boolean enableNameRecognize;
    private boolean enableTraditionalChineseMode;
    private String algorithm;
    private Set<String> stopWordDictionary;

    public HanLPTokenizerFactory(Map<String, String> args) {
        super(args);
        String stopWordDictionaryPath;
        this.enableIndexMode = this.getBoolean(args, "enableIndexMode", true);
        this.enablePorterStemming = this.getBoolean(args, "enablePorterStemming", false);
        this.enableNumberQuantifierRecognize = this.getBoolean(args, "enableNumberQuantifierRecognize", false);
        this.enableCustomDictionary = this.getBoolean(args, "enableCustomDictionary", true);
        this.enableCustomDictionaryForcing = this.getBoolean(args, "enableCustomDictionaryForcing", true);
        this.enableTranslatedNameRecognize = this.getBoolean(args, "enableTranslatedNameRecognize", false);
        this.enableJapaneseNameRecognize = this.getBoolean(args, "enableJapaneseNameRecognize", false);
        this.enableOrganizationRecognize = this.getBoolean(args, "enableOrganizationRecognize", false);
        this.enableNameRecognize = this.getBoolean(args, "enableNameRecognize", false);
        this.enablePlaceRecognize = this.getBoolean(args, "enablePlaceRecognize", false);
        this.enableTraditionalChineseMode = this.getBoolean(args, "enableTraditionalChineseMode", false);
        HanLP.Config.Normalization = this.getBoolean(args, "enableNormalization", HanLP.Config.Normalization);
        this.algorithm = this.getString(args, "algorithm", "viterbi");
        Set customDictionaryPathSet = this.getSet(args, "customDictionaryPath");
        if (customDictionaryPathSet != null) {
            HanLP.Config.CustomDictionaryPath = customDictionaryPathSet.toArray(new String[0]);
        }
        if ((stopWordDictionaryPath = this.get(args, "stopWordDictionaryPath")) != null) {
            this.stopWordDictionary = new TreeSet<String>();
            this.stopWordDictionary.addAll(IOUtil.readLineListWithLessMemory((String)stopWordDictionaryPath));
        }
        if (this.getBoolean(args, "enableDebug", false)) {
            HanLP.Config.enableDebug();
        }
    }

    protected final String getString(Map<String, String> args, String name, String defaultVal) {
        String s = args.remove(name);
        return s == null ? defaultVal : s;
    }

    public Tokenizer create(AttributeFactory factory) {
        Segment segment = HanLP.newSegment((String)this.algorithm).enableOffset(true).enableIndexMode(this.enableIndexMode).enableNameRecognize(this.enableNameRecognize).enableNumberQuantifierRecognize(this.enableNumberQuantifierRecognize).enableCustomDictionary(this.enableCustomDictionary).enableCustomDictionaryForcing(this.enableCustomDictionaryForcing).enableTranslatedNameRecognize(this.enableTranslatedNameRecognize).enableJapaneseNameRecognize(this.enableJapaneseNameRecognize).enableOrganizationRecognize(this.enableOrganizationRecognize).enablePlaceRecognize(this.enablePlaceRecognize);
        if (this.enableTraditionalChineseMode) {
            Segment inner;
            segment.enableIndexMode(false);
            TraditionalChineseTokenizer.SEGMENT = inner = segment;
            segment = new Segment(){

                protected List<Term> segSentence(char[] sentence) {
                    List termList = TraditionalChineseTokenizer.segment((String)new String(sentence));
                    return termList;
                }
            };
        }
        return new HanLPTokenizer(segment, this.stopWordDictionary, this.enablePorterStemming);
    }
}

