package org.apdplat.word.tagging;

import java.util.List;
import org.apdplat.word.WordSegmenter;
import org.apdplat.word.recognition.RecognitionTool;
import org.apdplat.word.segmentation.PartOfSpeech;
import org.apdplat.word.segmentation.Word;
import org.apdplat.word.util.AutoDetector;
import org.apdplat.word.util.GenericTrie;
import org.apdplat.word.util.ResourceLoader;
import org.apdplat.word.util.WordConfTools;
import org.slf4j.Logger;
import org.slf4j.LoggerFactory;

/* loaded from: input_file:org/apdplat/word/tagging/PartOfSpeechTagging.class */
public class PartOfSpeechTagging {
    private static final Logger LOGGER = LoggerFactory.getLogger(PartOfSpeechTagging.class);
    private static final GenericTrie<String> GENERIC_TRIE = new GenericTrie<>();

    private PartOfSpeechTagging() {
    }

    public static void reload() {
        AutoDetector.loadAndWatch(new ResourceLoader() { // from class: org.apdplat.word.tagging.PartOfSpeechTagging.1
            @Override // org.apdplat.word.util.ResourceLoader
            public void clear() {
                PartOfSpeechTagging.GENERIC_TRIE.clear();
            }

            @Override // org.apdplat.word.util.ResourceLoader
            public void load(List<String> list) {
                PartOfSpeechTagging.LOGGER.info("初始化词性标注器");
                int i = 0;
                for (String str : list) {
                    try {
                        String[] split = str.split(":");
                        PartOfSpeechTagging.GENERIC_TRIE.put(split[0], split[1]);
                        i++;
                    } catch (Exception e) {
                        PartOfSpeechTagging.LOGGER.error("错误的词性数据：" + str);
                    }
                }
                PartOfSpeechTagging.LOGGER.info("词性标注器初始化完毕，词性数据条数：" + i);
            }

            @Override // org.apdplat.word.util.ResourceLoader
            public void add(String str) {
                try {
                    String[] split = str.split(":");
                    PartOfSpeechTagging.GENERIC_TRIE.put(split[0], split[1]);
                } catch (Exception e) {
                    PartOfSpeechTagging.LOGGER.error("错误的词性数据：" + str);
                }
            }

            @Override // org.apdplat.word.util.ResourceLoader
            public void remove(String str) {
                try {
                    PartOfSpeechTagging.GENERIC_TRIE.remove(str.split(":")[0]);
                } catch (Exception e) {
                    PartOfSpeechTagging.LOGGER.error("错误的词性数据：" + str);
                }
            }
        }, WordConfTools.get("part.of.speech.dic.path", "classpath:part_of_speech_dic.txt"));
    }

    public static void process(List<Word> list) {
        list.parallelStream().forEach(word -> {
            if (word.getPartOfSpeech() != null) {
                if (LOGGER.isDebugEnabled()) {
                    LOGGER.debug("忽略已经标注过的词：{}", word);
                    return;
                }
                return;
            }
            String text = word.getText();
            String str = GENERIC_TRIE.get(text);
            if (str == null) {
                if (RecognitionTool.isEnglish(text)) {
                    str = "w";
                }
                if (RecognitionTool.isNumber(text)) {
                    str = "m";
                }
                if (RecognitionTool.isChineseNumber(text)) {
                    str = "mh";
                }
                if (RecognitionTool.isFraction(text)) {
                    if (text.contains(".") || text.contains("．") || text.contains("·")) {
                        str = "mx";
                    }
                    if (text.contains("/") || text.contains("／")) {
                        str = "mf";
                    }
                }
                if (RecognitionTool.isQuantifier(text)) {
                    str = (text.contains("‰") || text.contains("%") || text.contains("％")) ? "mf" : (text.contains("时") || text.contains("分") || text.contains("秒")) ? "tq" : (text.contains("年") || text.contains("月") || text.contains("日") || text.contains("天") || text.contains("号")) ? "tdq" : "mq";
                }
            }
            word.setPartOfSpeech(PartOfSpeech.valueOf(str));
        });
    }

    public static void main(String[] strArr) {
        List<Word> segWithStopWords = WordSegmenter.segWithStopWords("我爱中国，我爱杨尚川");
        System.out.println("未标注词性：" + segWithStopWords);
        process(segWithStopWords);
        System.out.println("标注词性：" + segWithStopWords);
    }

    static {
        reload();
    }
}
