package org.lionsoul.jcseg.segmenter;

import java.io.BufferedReader;
import java.io.IOException;
import java.io.Reader;
import java.util.ArrayList;
import java.util.Iterator;
import java.util.LinkedList;
import java.util.List;
import org.lionsoul.jcseg.IChunk;
import org.lionsoul.jcseg.ISegment;
import org.lionsoul.jcseg.IWord;
import org.lionsoul.jcseg.dic.ADictionary;
import org.lionsoul.jcseg.util.IPushbackReader;
import org.lionsoul.jcseg.util.IStringBuffer;
import org.lionsoul.jcseg.util.IntArrayList;
import org.lionsoul.jcseg.util.NumericUtil;
import org.lionsoul.jcseg.util.StringUtil;

/* loaded from: input_file:org/lionsoul/jcseg/segmenter/Segmenter.class */
public abstract class Segmenter implements ISegment {
    protected int idx;
    public final ADictionary dic;
    public final SegmenterConfig config;
    protected IPushbackReader reader = null;
    protected String behindLatin = null;
    protected int ctrlMask = 0;
    protected final LinkedList<IWord> wordPool = new LinkedList<>();
    protected final LinkedList<IWord> subWordPool = new LinkedList<>();
    protected final IStringBuffer isb = new IStringBuffer(64);
    protected final IntArrayList iaList = new IntArrayList(15);

    public Segmenter(SegmenterConfig segmenterConfig, ADictionary aDictionary) {
        this.config = segmenterConfig;
        this.dic = aDictionary;
    }

    @Override // org.lionsoul.jcseg.ISegment
    public void reset(Reader reader) throws IOException {
        if (reader != null) {
            this.reader = new IPushbackReader(new BufferedReader(reader));
        }
        this.idx = -1;
    }

    /* JADX INFO: Access modifiers changed from: protected */
    public int readNext() throws IOException {
        int read = this.reader.read();
        if (read != -1) {
            this.idx++;
        }
        return read;
    }

    /* JADX INFO: Access modifiers changed from: protected */
    public void pushBack(int i) {
        this.reader.unread(i);
        this.idx--;
    }

    protected void pushBack(String str) {
        char[] charArray = str.toCharArray();
        for (int length = charArray.length - 1; length >= 0; length--) {
            this.reader.unread(charArray[length]);
        }
        this.idx -= charArray.length;
    }

    @Override // org.lionsoul.jcseg.ISegment
    public int getStreamPosition() {
        return this.idx + 1;
    }

    public ADictionary getDict() {
        return this.dic;
    }

    public SegmenterConfig getConfig() {
        return this.config;
    }

    @Override // org.lionsoul.jcseg.ISegment
    public IWord next() throws IOException {
        if (this.wordPool.size() > 0) {
            return this.wordPool.remove();
        }
        IWord iWord = null;
        while (true) {
            int readNext = readNext();
            if (readNext == -1) {
                return null;
            }
            if (!StringUtil.isWhitespace(readNext)) {
                int i = this.idx;
                if (StringUtil.isCJKChar(readNext)) {
                    this.behindLatin = null;
                    iWord = getNextCJKWord(readNext, i);
                    if (this.behindLatin != null) {
                        pushBack(this.behindLatin);
                    }
                } else if (StringUtil.isEnChar(readNext)) {
                    iWord = getNextLatinWord(readNext, i);
                } else if (this.config.PPT_MAX_LENGTH > 0 && StringUtil.isPairPunctuation((char) readNext)) {
                    iWord = getNextPunctuationPairWord(readNext, i);
                } else if (StringUtil.isLetterNumber(readNext)) {
                    String nextLetterNumber = nextLetterNumber(readNext);
                    if (!this.config.CLEAR_STOPWORD || !this.dic.match(7, nextLetterNumber)) {
                        iWord = new Word(nextLetterNumber, 7);
                        iWord.setPartSpeechForNull(IWord.NUMERIC_POSPEECH);
                        iWord.setPosition(i);
                    }
                } else if (StringUtil.isOtherNumber(readNext)) {
                    String nextOtherNumber = nextOtherNumber(readNext);
                    if (!this.config.CLEAR_STOPWORD || !this.dic.match(7, nextOtherNumber)) {
                        iWord = new Word(nextOtherNumber, 7);
                        iWord.setPartSpeechForNull(IWord.NUMERIC_POSPEECH);
                        iWord.setPosition(i);
                    }
                } else if (StringUtil.isCnPunctuation(readNext)) {
                    String valueOf = String.valueOf((char) readNext);
                    if (!this.config.CLEAR_STOPWORD || !this.dic.match(7, valueOf)) {
                        iWord = new Word(valueOf, 10);
                        iWord.setPartSpeechForNull(IWord.PUNCTUATION);
                        iWord.setPosition(i);
                    }
                } else if (this.config.KEEP_UNREG_WORDS) {
                    String valueOf2 = String.valueOf((char) readNext);
                    if (!this.config.CLEAR_STOPWORD || !this.dic.match(7, valueOf2)) {
                        iWord = new Word(valueOf2, 11);
                        iWord.setPartSpeechForNull(IWord.UNRECOGNIZE);
                        iWord.setPosition(i);
                    }
                }
                if (iWord != null) {
                    return iWord;
                }
                if (this.wordPool.size() > 0) {
                    return this.wordPool.removeFirst();
                }
            }
        }
    }

    protected IWord getNextCJKWord(int i, int i2) throws IOException {
        String findCHName;
        char[] nextCJKSentence = nextCJKSentence(i);
        int i3 = 0;
        while (i3 < nextCJKSentence.length) {
            IWord iWord = null;
            if (i3 + 1 < nextCJKSentence.length && NumericUtil.isCNNumeric(nextCJKSentence[i3]) > -1) {
                String nextCNNumeric = nextCNNumeric(nextCJKSentence, i3);
                int length = nextCNNumeric.length();
                if ((this.ctrlMask & 2) != 0) {
                    iWord = new Word(nextCNNumeric, 9);
                    iWord.setPosition(i2 + i3);
                    iWord.setPartSpeechForNull(IWord.NUMERIC_POSPEECH);
                    this.wordPool.add(iWord);
                    if (this.config.CNFRA_TO_ARABIC) {
                        String[] split = nextCNNumeric.split("分之");
                        Word word = new Word(NumericUtil.cnNumericToArabic(split[1], true) + "/" + NumericUtil.cnNumericToArabic(split[0], true), 9);
                        word.setPosition(iWord.getPosition());
                        word.setPartSpeechForNull(IWord.NUMERIC_POSPEECH);
                        this.wordPool.add(word);
                    }
                } else if (NumericUtil.isCNNumeric(nextCJKSentence[i3 + 1]) > -1 || this.dic.match(1, nextCJKSentence[i3 + 1] + "")) {
                    StringBuilder sb = new StringBuilder();
                    sb.append(nextCNNumeric);
                    boolean z = false;
                    for (int length2 = nextCNNumeric.length(); i3 + length2 < nextCJKSentence.length && length2 < this.config.MAX_LENGTH; length2++) {
                        sb.append(nextCJKSentence[i3 + length2]);
                        String sb2 = sb.toString();
                        if (this.dic.match(0, sb2)) {
                            iWord = this.dic.get(0, sb2);
                            nextCNNumeric = sb2;
                            z = true;
                        }
                    }
                    if (z && nextCNNumeric.length() - length == 1 && this.dic.match(1, nextCNNumeric.substring(length))) {
                        nextCNNumeric = nextCNNumeric;
                        z = false;
                    }
                    Word word2 = null;
                    if (!z && this.config.CNNUM_TO_ARABIC) {
                        String str = NumericUtil.cnNumericToArabic(nextCNNumeric, true) + "";
                        if (i3 + nextCNNumeric.length() < nextCJKSentence.length && this.dic.match(1, nextCJKSentence[i3 + nextCNNumeric.length()] + "")) {
                            char c = nextCJKSentence[i3 + nextCNNumeric.length()];
                            nextCNNumeric = nextCNNumeric + c;
                            str = str + c;
                        }
                        word2 = new Word(str, 9);
                        word2.setPosition(i2 + i3);
                        word2.setLength(nextCNNumeric.length());
                        word2.setPartSpeechForNull(IWord.NUMERIC_POSPEECH);
                    }
                    if (this.config.CLEAR_STOPWORD && this.dic.match(7, nextCNNumeric)) {
                        i3 += nextCNNumeric.length();
                    } else {
                        if (iWord == null) {
                            iWord = new Word(nextCNNumeric, 9);
                            iWord.setPartSpeechForNull(IWord.NUMERIC_POSPEECH);
                        } else {
                            iWord = iWord.m9clone();
                        }
                        iWord.setPosition(i2 + i3);
                        this.wordPool.add(iWord);
                        if (word2 != null) {
                            this.wordPool.add(word2);
                        }
                    }
                }
                if (iWord != null) {
                    i3 += iWord.getLength();
                    appendCJKWordFeatures(iWord);
                }
            }
            IChunk bestChunk = getBestChunk(nextCJKSentence, i3, this.config.MAX_LENGTH);
            IWord iWord2 = bestChunk.getWords()[0];
            String str2 = iWord2.getPartSpeech() == null ? null : iWord2.getPartSpeech()[0];
            int i4 = -1;
            if (this.config.I_CN_NAME && !"nr".equals(str2) && iWord2.getLength() <= 2 && bestChunk.getWords().length > 1) {
                StringBuilder sb3 = new StringBuilder();
                sb3.append(iWord2.getValue());
                if (this.dic.match(2, iWord2.getValue()) && (findCHName = findCHName(nextCJKSentence, 0, bestChunk)) != null) {
                    i4 = 3;
                    sb3.append(findCHName);
                } else if (this.dic.match(6, iWord2.getValue()) && bestChunk.getWords()[1].getLength() <= 2 && this.dic.match(2, bestChunk.getWords()[1].getValue())) {
                    i4 = 4;
                    sb3.append(bestChunk.getWords()[1].getValue());
                }
                if (i4 != -1) {
                    iWord2 = new Word(sb3.toString(), i4);
                    iWord2.setPartSpeechForNull(IWord.NAME_POSPEECH);
                }
            }
            if (this.config.CLEAR_STOPWORD && this.dic.match(7, iWord2.getValue())) {
                i3 += iWord2.getLength();
            } else {
                IWord iWord3 = null;
                if ((this.ctrlMask & 1) != 0 && nextCJKSentence.length - i3 <= this.dic.mixPrefixLength) {
                    iWord3 = getNextMixedWord(nextCJKSentence, i3);
                    if (iWord3 != null) {
                        i4 = -1;
                    }
                }
                if (iWord3 != null) {
                    iWord2 = iWord3.m9clone();
                } else if (i4 == -1) {
                    iWord2 = iWord2.m9clone();
                }
                iWord2.setPosition(i2 + i3);
                this.wordPool.add(iWord2);
                i3 += iWord2.getLength();
                if (i4 == -1) {
                    appendCJKWordFeatures(iWord2);
                }
            }
        }
        if (this.wordPool.size() == 0) {
            return null;
        }
        return this.wordPool.remove();
    }

    protected IWord getNextLatinWord(int i, int i2) throws IOException {
        if (StringUtil.isEnPunctuation(i)) {
            String valueOf = String.valueOf((char) i);
            if (this.config.CLEAR_STOPWORD && this.dic.match(7, valueOf)) {
                return null;
            }
            Word word = new Word(valueOf, 10);
            word.setPosition(i2);
            word.setPartSpeechForNull(IWord.PUNCTUATION);
            return word;
        }
        IWord nextLatinWord = nextLatinWord(i, i2);
        nextLatinWord.setPosition(i2);
        if (this.config.CLEAR_STOPWORD && this.dic.match(7, nextLatinWord.getValue())) {
            return null;
        }
        if (!this.config.EN_SECOND_SEG || !enSecondSegFilter(nextLatinWord)) {
            appendCJKWordFeatures(nextLatinWord);
            return nextLatinWord;
        }
        this.subWordPool.clear();
        if ((this.ctrlMask & 4) != 0) {
            enSecondSeg(nextLatinWord, this.subWordPool);
        } else {
            if (!this.config.EN_WORD_SEG) {
                appendCJKWordFeatures(nextLatinWord);
                return nextLatinWord;
            }
            enWordSeg(nextLatinWord, this.subWordPool);
        }
        if (this.subWordPool.isEmpty()) {
            appendLatinWordFeatures(nextLatinWord);
            return nextLatinWord;
        }
        IWord removeFirst = this.subWordPool.removeFirst();
        appendLatinWordFeatures(removeFirst);
        Iterator<IWord> it = this.subWordPool.iterator();
        while (it.hasNext()) {
            IWord next = it.next();
            this.wordPool.add(next);
            appendLatinWordFeatures(next);
        }
        return removeFirst;
    }

    /* JADX INFO: Access modifiers changed from: protected */
    public IWord getNextMixedWord(char[] cArr, int i) throws IOException {
        int readNext;
        IStringBuffer iStringBuffer = new IStringBuffer();
        iStringBuffer.clear().append(cArr, i);
        if (!this.dic.match(8, iStringBuffer.toString())) {
            return null;
        }
        if (this.behindLatin == null) {
            this.behindLatin = nextLatinString(readNext());
        }
        iStringBuffer.append(this.behindLatin);
        String iStringBuffer2 = iStringBuffer.toString();
        IWord iWord = this.dic.match(0, iStringBuffer2) ? this.dic.get(0, iStringBuffer2) : null;
        if ((this.ctrlMask & 8) != 0 || this.dic.match(8, iStringBuffer2)) {
            this.iaList.clear();
            int i2 = 0;
            int i3 = 0;
            while (i3 < this.dic.mixSuffixLength && (readNext = readNext()) != -1) {
                iStringBuffer.append((char) readNext);
                this.iaList.add(readNext);
                String iStringBuffer3 = iStringBuffer.toString();
                if (this.dic.match(0, iStringBuffer3)) {
                    iWord = this.dic.get(0, iStringBuffer3);
                    i2 = i3 + 1;
                }
                i3++;
            }
            for (int i4 = i3 - 1; i4 >= i2; i4--) {
                pushBack(this.iaList.get(i4));
            }
        }
        iStringBuffer.clear();
        if (iWord != null) {
            this.behindLatin = null;
        }
        return iWord;
    }

    protected IWord getNextPunctuationPairWord(int i, int i2) throws IOException {
        Word word = null;
        Word word2 = null;
        String pairPunctuationText = getPairPunctuationText(i);
        String valueOf = String.valueOf((char) i);
        if (!this.config.CLEAR_STOPWORD || !this.dic.match(7, valueOf)) {
            word = new Word(valueOf, 10);
            word.setPartSpeechForNull(IWord.PUNCTUATION);
            word.setPosition(i2);
        }
        if (pairPunctuationText != null && pairPunctuationText.length() > 0 && (!this.config.CLEAR_STOPWORD || !this.dic.match(7, pairPunctuationText))) {
            word2 = new Word(pairPunctuationText, 0);
            word2.setPartSpeechForNull(IWord.PPT_POSPEECH);
            word2.setPosition(i2 + 1);
            if (word == null) {
                word = word2;
            } else {
                this.wordPool.add(word2);
            }
        }
        if (word == null && word2 == null) {
            return null;
        }
        return word;
    }

    /* JADX INFO: Access modifiers changed from: protected */
    public void appendCJKWordFeatures(IWord iWord) {
        if (this.config.APPEND_CJK_PINYIN && this.config.LOAD_CJK_PINYIN && iWord.getPinyin() != null) {
            SegKit.appendPinyin(this.config, this.wordPool, iWord);
        }
        if (this.config.APPEND_CJK_SYN && this.config.LOAD_CJK_SYN && iWord.getSyn() != null) {
            SegKit.appendSynonyms(this.config, this.wordPool, iWord);
        }
    }

    protected void appendLatinWordFeatures(IWord iWord) {
        IWord iWord2;
        boolean z = this.config.LOAD_CJK_SYN && this.config.APPEND_CJK_SYN;
        boolean z2 = this.config.LOAD_CJK_PINYIN && this.config.APPEND_CJK_PINYIN;
        if (z && iWord.getSyn() == null && z2 && iWord.getPinyin() == null) {
            IWord iWord3 = this.dic.get(0, iWord.getValue());
            iWord2 = iWord3 == null ? iWord : iWord3;
        } else {
            iWord2 = iWord;
        }
        iWord2.setPosition(iWord.getPosition());
        if (z && iWord2.getSyn() != null) {
            SegKit.appendSynonyms(this.config, this.wordPool, iWord2);
        }
        if (!z2 || iWord2.getPinyin() == null) {
            return;
        }
        SegKit.appendPinyin(this.config, this.wordPool, iWord2);
    }

    protected boolean enSecondSegFilter(IWord iWord) {
        return iWord.getType() != 2;
    }

    protected LinkedList<IWord> enSecondSeg(IWord iWord, LinkedList<IWord> linkedList) {
        char[] charArray = iWord.getValue().toCharArray();
        int position = iWord.getPosition();
        if (linkedList == null) {
            linkedList = new LinkedList<>();
        }
        if (this.config.isKeepEnSecOriginalWord()) {
            linkedList.add(iWord);
        }
        int i = 0;
        while (i < charArray.length) {
            int enCharType = StringUtil.getEnCharType(charArray[i]);
            if (enCharType == 2) {
                Word word = new Word(String.valueOf(charArray[i]), 10);
                word.setPartSpeechForNull(IWord.PUNCTUATION);
                word.setPosition(position + i);
                i++;
            } else {
                this.isb.clear().append(charArray[i]);
                for (int i2 = i + 1; i2 < charArray.length && StringUtil.getEnCharType(charArray[i2]) == enCharType; i2++) {
                    this.isb.append(charArray[i2]);
                }
                if (this.isb.length() >= this.config.EN_SEC_MIN_LEN) {
                    String iStringBuffer = this.isb.toString();
                    if (!this.config.CLEAR_STOPWORD || !this.dic.match(7, iStringBuffer)) {
                        Word word2 = new Word(iStringBuffer, iWord.getType());
                        word2.setPartSpeechForNull(iWord.getPartSpeech());
                        word2.setPosition(position + i);
                        if (this.config.EN_WORD_SEG && enCharType == 0) {
                            enWordSeg(word2, linkedList);
                        } else {
                            linkedList.addLast(word2);
                        }
                    }
                }
                i += this.isb.length();
            }
        }
        return linkedList;
    }

    protected LinkedList<IWord> enWordSeg(IWord iWord, LinkedList<IWord> linkedList) {
        char[] charArray = iWord.getValue().toCharArray();
        int i = 0;
        int position = iWord.getPosition();
        while (i < charArray.length) {
            IWord m9clone = getBestChunk(charArray, i, this.config.EN_MAX_LEN).getWords()[0].m9clone();
            m9clone.setPosition(position + i);
            linkedList.add(m9clone);
            i += m9clone.getValue().length();
        }
        return linkedList;
    }

    /* JADX INFO: Access modifiers changed from: protected */
    public IWord[] getNextMatch(int i, char[] cArr, int i2, List<IWord> list) {
        IStringBuffer iStringBuffer = new IStringBuffer(i);
        iStringBuffer.clear().append(cArr[i2]);
        if (list == null) {
            list = new ArrayList(8);
        }
        String iStringBuffer2 = iStringBuffer.toString();
        if (this.dic.match(0, iStringBuffer2)) {
            list.add(this.dic.get(0, iStringBuffer2));
        }
        for (int i3 = 1; i3 < i && i3 + i2 < cArr.length; i3++) {
            iStringBuffer.append(cArr[i3 + i2]);
            String iStringBuffer3 = iStringBuffer.toString();
            if (this.dic.match(0, iStringBuffer3)) {
                list.add(this.dic.get(0, iStringBuffer3));
            }
        }
        if (list.isEmpty()) {
            list.add(new Word(iStringBuffer2, 17));
        }
        IWord[] iWordArr = new IWord[list.size()];
        list.toArray(iWordArr);
        list.clear();
        return iWordArr;
    }

    /* JADX INFO: Access modifiers changed from: protected */
    public String findCHName(char[] cArr, int i, IChunk iChunk) {
        IWord iWord;
        IWord iWord2;
        StringBuilder sb = new StringBuilder();
        if (iChunk.getWords().length == 2) {
            IWord iWord3 = iChunk.getWords()[1];
            switch (iWord3.getLength()) {
                case 1:
                    if (!this.dic.match(3, iWord3.getValue())) {
                        return null;
                    }
                    sb.append(iWord3.getValue());
                    return sb.toString();
                case 2:
                case 3:
                    String str = iWord3.getValue().charAt(0) + "";
                    String str2 = iWord3.getValue().charAt(1) + "";
                    if (this.dic.match(4, str) && this.dic.match(5, str2)) {
                        sb.append(str);
                        sb.append(str2);
                        return sb.toString();
                    }
                    if (!this.dic.match(3, str) || (iWord2 = this.dic.get(0, str2)) == null || iWord2.getFrequency() < this.config.NAME_SINGLE_THRESHOLD) {
                        return null;
                    }
                    sb.append(str);
                    return sb.toString();
                default:
                    return null;
            }
        }
        IWord iWord4 = iChunk.getWords()[1];
        IWord iWord5 = iChunk.getWords()[2];
        switch (iWord4.getLength()) {
            case 1:
                if (!this.dic.match(4, iWord4.getValue())) {
                    if (!this.dic.match(3, iWord4.getValue())) {
                        return null;
                    }
                    sb.append(iWord4.getValue());
                    return sb.toString();
                }
                if (iWord5.getLength() == 1) {
                    if (this.dic.match(5, iWord5.getValue())) {
                        sb.append(iWord4.getValue());
                        sb.append(iWord5.getValue());
                        return sb.toString();
                    }
                    if (!this.dic.match(3, iWord4.getValue())) {
                        return null;
                    }
                    sb.append(iWord4.getValue());
                    return sb.toString();
                }
                String str3 = iWord5.getValue().charAt(0) + "";
                IWord[] nextMatch = getNextMatch(this.config.MAX_LENGTH, cArr, i + iChunk.getWords()[0].getLength() + 2, null);
                if (this.dic.match(5, str3) && (nextMatch.length > 1 || nextMatch[0].getFrequency() >= this.config.NAME_SINGLE_THRESHOLD)) {
                    sb.append(iWord4.getValue());
                    sb.append(str3);
                    return sb.toString();
                }
                if (!this.dic.match(3, iWord4.getValue())) {
                    return null;
                }
                sb.append(iWord4.getValue());
                return sb.toString();
            case 2:
                String str4 = iWord4.getValue().charAt(0) + "";
                String str5 = iWord4.getValue().charAt(1) + "";
                if (this.dic.match(4, str4) && this.dic.match(5, str5)) {
                    sb.append(iWord4.getValue());
                    return sb.toString();
                }
                if (!this.dic.match(3, str4) || (iWord = this.dic.get(0, str5)) == null || iWord.getFrequency() < this.config.NAME_SINGLE_THRESHOLD) {
                    return null;
                }
                sb.append(str4);
                return sb.toString();
            case 3:
                String str6 = iWord4.getValue().charAt(0) + "";
                String str7 = iWord4.getValue().charAt(1) + "";
                IWord iWord6 = this.dic.get(0, iWord4.getValue().charAt(2) + "");
                if (!this.dic.match(4, str6) || !this.dic.match(5, str7)) {
                    return null;
                }
                if (iWord6 != null && iWord6.getFrequency() < this.config.NAME_SINGLE_THRESHOLD) {
                    return null;
                }
                sb.append(str6);
                sb.append(str7);
                return sb.toString();
            default:
                return null;
        }
    }

    /* JADX INFO: Access modifiers changed from: protected */
    public char[] nextCJKSentence(int i) throws IOException {
        this.isb.clear();
        this.isb.append((char) i);
        this.ctrlMask &= -2;
        while (true) {
            int readNext = readNext();
            if (readNext == -1) {
                break;
            }
            if (StringUtil.isWhitespace(readNext)) {
                pushBack(readNext);
                break;
            }
            if (StringUtil.isCJKChar(readNext)) {
                this.isb.append((char) readNext);
            } else {
                pushBack(readNext);
                if (StringUtil.isEnLetter(readNext) || StringUtil.isEnNumeric(readNext)) {
                    this.ctrlMask |= 1;
                }
            }
        }
        return this.isb.toString().toCharArray();
    }

    protected IWord nextLatinWord(int i, int i2) throws IOException {
        int i3;
        int readNext;
        this.isb.clear();
        if (i > 65280) {
            i -= 65248;
        }
        if (i >= 65 && i <= 90) {
            i += 32;
        }
        this.isb.append((char) i);
        boolean z = false;
        boolean z2 = false;
        int i4 = 1;
        int enCharType = StringUtil.getEnCharType(i);
        this.ctrlMask &= -5;
        while (true) {
            int readNext2 = readNext();
            i3 = readNext2;
            if (readNext2 == -1) {
                break;
            }
            if (i3 > 65280) {
                i3 -= 65248;
            }
            int enCharType2 = StringUtil.getEnCharType(i3);
            if (enCharType2 == 3) {
                z2 = true;
                break;
            }
            if (enCharType2 == 2 && !this.config.isKeepPunctuation((char) i3)) {
                pushBack(i3);
                break;
            }
            if (enCharType2 != -1) {
                if (i3 >= 65 && i3 <= 90) {
                    i3 += 32;
                }
                this.isb.append((char) i3);
                if (enCharType2 != enCharType) {
                    i4++;
                    enCharType = enCharType2;
                }
                if (this.isb.length() > this.config.MAX_LATIN_LENGTH) {
                    break;
                }
            } else {
                pushBack(i3);
                if (StringUtil.isCJKChar(i3)) {
                    z = true;
                }
            }
        }
        String iStringBuffer = this.isb.toString();
        IWord iWord = null;
        boolean z3 = true;
        boolean z4 = false;
        int length = this.isb.length() - 1;
        while (true) {
            if (length <= 0 || this.isb.charAt(length) == '%' || !StringUtil.isEnPunctuation(this.isb.charAt(length))) {
                break;
            }
            if (this.dic.match(0, iStringBuffer)) {
                iWord = this.dic.get(0, iStringBuffer).m9clone();
                iWord.setType(2);
                iWord.setPartSpeechForNull(IWord.EN_POSPEECH);
                z3 = false;
                break;
            }
            pushBack(this.isb.charAt(length));
            this.isb.deleteCharAt(length);
            iStringBuffer = this.isb.toString();
            if (!z4) {
                i4--;
                z4 = true;
            }
            length--;
        }
        boolean z5 = i4 > 1;
        if (i3 == -1 || z2) {
            if (iWord == null) {
                iWord = wordNewOrClone(0, iStringBuffer, 5);
                iWord.setPartSpeechForNull(IWord.EN_POSPEECH);
            }
            if (z5) {
                this.ctrlMask |= 4;
            }
            return iWord;
        }
        if (!z) {
            if (z3 && (StringUtil.isDigit(iStringBuffer) || StringUtil.isDecimal(iStringBuffer))) {
                int readNext3 = readNext();
                if (this.dic.match(1, ((char) readNext3) + "")) {
                    iWord = wordNewOrClone(0, new String(iStringBuffer + ((char) readNext3)), 2);
                    iWord.setPartSpeechForNull(IWord.NUMERIC_POSPEECH);
                } else {
                    pushBack(readNext3);
                }
            }
            if (iWord == null) {
                iWord = wordNewOrClone(0, iStringBuffer, 5);
                iWord.setPartSpeechForNull(IWord.EN_POSPEECH);
                if (z5) {
                    this.ctrlMask |= 4;
                }
            }
            return iWord;
        }
        IStringBuffer iStringBuffer2 = new IStringBuffer();
        iStringBuffer2.append(iStringBuffer);
        int i5 = 0;
        int i6 = 0;
        this.iaList.clear();
        while (true) {
            if (i6 >= this.dic.mixSuffixLength || iStringBuffer2.length() >= this.config.MAX_LENGTH || (readNext = readNext()) == -1) {
                break;
            }
            if (StringUtil.isWhitespace(readNext)) {
                pushBack(readNext);
                break;
            }
            iStringBuffer2.append((char) readNext);
            this.iaList.add(readNext);
            String iStringBuffer3 = iStringBuffer2.toString();
            if (this.dic.match(0, iStringBuffer3)) {
                iWord = this.dic.get(0, iStringBuffer3);
                iWord.setType(2);
                this.ctrlMask |= 4;
                i5 = i6 + 1;
            }
            i6++;
        }
        iStringBuffer2.clear();
        for (int i7 = i6 - 1; i7 >= i5; i7--) {
            pushBack(this.iaList.get(i7));
        }
        if (z3 && i5 == 0 && (StringUtil.isDigit(iStringBuffer) || StringUtil.isDecimal(iStringBuffer))) {
            int readNext4 = readNext();
            if (this.dic.match(1, ((char) readNext4) + "")) {
                iWord = wordNewOrClone(0, new String(iStringBuffer + ((char) readNext4)), 2);
                iWord.setPartSpeechForNull(IWord.NUMERIC_POSPEECH);
            } else {
                pushBack(readNext4);
            }
        }
        if (iWord == null) {
            iWord = wordNewOrClone(0, iStringBuffer, 5);
            iWord.setPartSpeechForNull(IWord.EN_POSPEECH);
            if (z5) {
                this.ctrlMask |= 4;
            }
        } else if (i5 > 0) {
            iWord = iWord.m9clone();
        }
        return iWord;
    }

    protected String nextLatinString(int i) throws IOException {
        this.isb.clear();
        if (i > 65280) {
            i -= 65248;
        }
        if (i >= 65 && i <= 90) {
            i += 32;
        }
        this.isb.append((char) i);
        this.ctrlMask &= -9;
        while (true) {
            int readNext = readNext();
            int i2 = readNext;
            if (readNext == -1) {
                break;
            }
            if (i2 > 65280) {
                i2 -= 65248;
            }
            int enCharType = StringUtil.getEnCharType(i2);
            if (enCharType == 3) {
                break;
            }
            if (enCharType == 2 && !this.config.isKeepPunctuation((char) i2)) {
                pushBack(i2);
                break;
            }
            if (enCharType != -1) {
                if (i2 >= 65 && i2 <= 90) {
                    i2 += 32;
                }
                this.isb.append((char) i2);
                if (this.isb.length() > this.config.MAX_LATIN_LENGTH) {
                    break;
                }
            } else {
                pushBack(i2);
                if (StringUtil.isCJKChar(i2)) {
                    this.ctrlMask |= 8;
                }
            }
        }
        for (int length = this.isb.length() - 1; length > 0; length--) {
            if (this.isb.charAt(length) == '.') {
                this.isb.deleteCharAt(length);
            }
        }
        return this.isb.toString();
    }

    protected String nextLetterNumber(int i) throws IOException {
        this.isb.clear();
        this.isb.append((char) i);
        while (true) {
            int readNext = readNext();
            if (readNext == -1) {
                break;
            }
            if (StringUtil.isWhitespace(readNext)) {
                pushBack(readNext);
                break;
            }
            if (!StringUtil.isLetterNumber(readNext)) {
                pushBack(readNext);
                break;
            }
            this.isb.append((char) readNext);
        }
        return this.isb.toString();
    }

    protected String nextOtherNumber(int i) throws IOException {
        this.isb.clear();
        this.isb.append((char) i);
        while (true) {
            int readNext = readNext();
            if (readNext == -1) {
                break;
            }
            if (StringUtil.isWhitespace(readNext)) {
                pushBack(readNext);
                break;
            }
            if (!StringUtil.isOtherNumber(readNext)) {
                pushBack(readNext);
                break;
            }
            this.isb.append((char) readNext);
        }
        return this.isb.toString();
    }

    /* JADX INFO: Access modifiers changed from: protected */
    public String nextCNNumeric(char[] cArr, int i) throws IOException {
        this.isb.clear();
        this.isb.append(cArr[i]);
        this.ctrlMask &= -3;
        int i2 = i + 1;
        while (i2 < cArr.length) {
            if (NumericUtil.isCNNumeric(cArr[i2]) != -1) {
                this.isb.append(cArr[i2]);
            } else {
                if (i2 + 2 >= cArr.length || cArr[i2] != 20998 || cArr[i2 + 1] != 20043 || NumericUtil.isCNNumeric(cArr[i2 + 2]) == -1) {
                    break;
                }
                int i3 = i2;
                int i4 = i2 + 1;
                this.isb.append(cArr[i3]);
                i2 = i4 + 1;
                this.isb.append(cArr[i4]);
                this.isb.append(cArr[i2]);
                this.ctrlMask |= 2;
            }
            i2++;
        }
        return this.isb.toString();
    }

    protected String getPairPunctuationText(int i) throws IOException {
        int readNext;
        this.isb.clear();
        char punctuationPair = StringUtil.getPunctuationPair((char) i);
        boolean z = false;
        this.iaList.clear();
        int i2 = 0;
        while (true) {
            if (i2 >= this.config.PPT_MAX_LENGTH || (readNext = readNext()) == -1) {
                break;
            }
            if (readNext == punctuationPair) {
                z = true;
                pushBack(readNext);
                break;
            }
            this.isb.append((char) readNext);
            this.iaList.add(readNext);
            i2++;
        }
        if (z) {
            return this.isb.toString();
        }
        for (int i3 = i2 - 1; i3 >= 0; i3--) {
            pushBack(this.iaList.get(i3));
        }
        return null;
    }

    public IWord wordNewOrClone(int i, String str, int i2) {
        return this.dic.match(i, str) ? this.dic.get(i, str).m9clone() : new Word(str, i2);
    }

    protected IChunk getBestChunk(char[] cArr, int i, int i2) {
        return null;
    }
}
