package org.apdplat.word.corpus;

import java.util.HashMap;
import java.util.List;
import java.util.Map;
import org.apdplat.word.segmentation.Word;
import org.apdplat.word.util.AutoDetector;
import org.apdplat.word.util.DoubleArrayGenericTrie;
import org.apdplat.word.util.ResourceLoader;
import org.apdplat.word.util.WordConfTools;
import org.slf4j.Logger;
import org.slf4j.LoggerFactory;

/* loaded from: input_file:org/apdplat/word/corpus/Bigram.class */
public class Bigram {
    private static final Logger LOGGER = LoggerFactory.getLogger(Bigram.class);
    private static final DoubleArrayGenericTrie DOUBLE_ARRAY_GENERIC_TRIE = new DoubleArrayGenericTrie(WordConfTools.getInt("bigram.double.array.trie.size", 5300000));
    private static int maxFrequency = 0;

    public static void reload() {
        AutoDetector.loadAndWatch(new ResourceLoader() { // from class: org.apdplat.word.corpus.Bigram.1
            @Override // org.apdplat.word.util.ResourceLoader
            public void clear() {
                Bigram.DOUBLE_ARRAY_GENERIC_TRIE.clear();
            }

            @Override // org.apdplat.word.util.ResourceLoader
            public void load(List<String> list) {
                Bigram.LOGGER.info("初始化bigram");
                HashMap hashMap = new HashMap();
                for (String str : list) {
                    try {
                        addLine(str, hashMap);
                    } catch (Exception e) {
                        Bigram.LOGGER.error("错误的bigram数据：" + str);
                    }
                }
                int size = hashMap.size();
                Bigram.DOUBLE_ARRAY_GENERIC_TRIE.putAll(hashMap);
                Bigram.LOGGER.info("bigram初始化完毕，bigram数据条数：" + size);
            }

            @Override // org.apdplat.word.util.ResourceLoader
            public void add(String str) {
                throw new RuntimeException("not yet support menthod!");
            }

            private void addLine(String str, Map<String, Integer> map) {
                String[] split = str.split("\\s+");
                int parseInt = Integer.parseInt(split[1]);
                if (parseInt > Bigram.maxFrequency) {
                    int unused = Bigram.maxFrequency = parseInt;
                }
                map.put(split[0], Integer.valueOf(parseInt));
            }

            @Override // org.apdplat.word.util.ResourceLoader
            public void remove(String str) {
                throw new RuntimeException("not yet support menthod!");
            }
        }, WordConfTools.get("bigram.path", "classpath:bigram.txt"));
    }

    public static int getMaxFrequency() {
        return maxFrequency;
    }

    public static Map<List<Word>, Float> bigram(List<Word>... listArr) {
        HashMap hashMap = new HashMap();
        HashMap hashMap2 = new HashMap();
        HashMap hashMap3 = new HashMap();
        for (List<Word> list : listArr) {
            if (hashMap.get(list) == null) {
                float f = 0.0f;
                if (list.size() > 1) {
                    String str = "";
                    for (int i = 0; i < list.size() - 1; i++) {
                        String text = list.get(i).getText();
                        String text2 = list.get(i + 1).getText();
                        float score = getScore(text, text2);
                        if (score > 0.0f) {
                            if (str.endsWith(text)) {
                                hashMap3.put(str + text2, Float.valueOf(((Float) hashMap2.get(str)).floatValue() + score));
                            }
                            str = text + text2;
                            hashMap2.put(str, Float.valueOf(score));
                            f += score;
                        }
                    }
                }
                hashMap.put(list, Float.valueOf(f));
            }
        }
        if (hashMap2.size() > 0 || hashMap3.size() > 0) {
            for (List<Word> list2 : hashMap.keySet()) {
                for (Word word : list2) {
                    for (Float f2 : new Float[]{(Float) hashMap2.get(word.getText()), (Float) hashMap3.get(word.getText())}) {
                        if (f2 != null && f2.floatValue() > 0.0f) {
                            if (LOGGER.isDebugEnabled()) {
                                LOGGER.debug(word.getText() + " 获得分值：" + f2);
                            }
                            hashMap.put(list2, Float.valueOf(((Float) hashMap.get(list2)).floatValue() + f2.floatValue()));
                        }
                    }
                }
            }
        }
        return hashMap;
    }

    public static float bigram(List<Word> list) {
        if (list.size() <= 1) {
            return 0.0f;
        }
        float f = 0.0f;
        for (int i = 0; i < list.size() - 1; i++) {
            f += getScore(list.get(i).getText(), list.get(i + 1).getText());
        }
        return f;
    }

    public static float getScore(String str, String str2) {
        float frequency = getFrequency(str, str2) / maxFrequency;
        if (LOGGER.isDebugEnabled() && frequency > 0.0f) {
            LOGGER.debug("二元模型 " + str + ":" + str2 + " 获得分值：" + frequency);
        }
        return frequency;
    }

    public static int getFrequency(String str, String str2) {
        Integer valueOf = Integer.valueOf(DOUBLE_ARRAY_GENERIC_TRIE.get(str + ":" + str2));
        if (valueOf == null) {
            return 0;
        }
        return valueOf.intValue();
    }

    static {
        reload();
    }
}
