package com.yahoo.prelude.query.parser;

import com.yahoo.language.Linguistics;
import com.yahoo.language.process.CharacterClasses;
import com.yahoo.language.process.SpecialTokens;
import com.yahoo.prelude.Index;
import com.yahoo.prelude.IndexFacts;
import com.yahoo.prelude.query.Substring;
import com.yahoo.prelude.query.parser.Token;
import com.yahoo.search.query.parser.ParserEnvironment;
import java.util.ArrayList;
import java.util.List;
import java.util.Set;

/* loaded from: input_file:com/yahoo/prelude/query/parser/Tokenizer.class */
public final class Tokenizer {
    private String source;
    private final CharacterClasses characterClasses;
    private final List<Token> tokens = new ArrayList();
    private SpecialTokens specialTokens = null;
    private boolean substringSpecialTokens = false;
    private ParserEnvironment.ParserSettings parserSettings = new ParserEnvironment.ParserSettings();
    private int parensToEat = 0;
    private int indexLastExplicitlyChangedAt = 0;

    public Tokenizer(Linguistics linguistics) {
        this.characterClasses = linguistics.getCharacterClasses();
    }

    public void setSpecialTokens(SpecialTokens specialTokens) {
        this.specialTokens = specialTokens;
    }

    public void setSubstringSpecialTokens(boolean z) {
        this.substringSpecialTokens = z;
    }

    public void setParserSettings(ParserEnvironment.ParserSettings parserSettings) {
        this.parserSettings = parserSettings;
    }

    public List<Token> tokenize(String str) {
        return tokenize(str, new IndexFacts().newSession(Set.of(), Set.of()));
    }

    public List<Token> tokenize(String str, IndexFacts.Session session) {
        return tokenize(str, "default", session);
    }

    public List<Token> tokenize(String str, String str2, IndexFacts.Session session) {
        int consumeSpecialToken;
        this.source = str;
        this.tokens.clear();
        this.parensToEat = 0;
        Index index = Index.nullIndex;
        Index index2 = session.getIndex(str2);
        if (str2 != null) {
            index = index2;
        }
        Index index3 = index;
        for (int i = 0; i < this.source.length(); i = consumeSpecialToken + 1) {
            if (index3.isExact()) {
                consumeSpecialToken = consumeExact(i, index3);
                index3 = index;
            } else {
                consumeSpecialToken = consumeSpecialToken(i);
            }
            if (consumeSpecialToken >= this.source.length()) {
                break;
            }
            int codePointAt = this.source.codePointAt(consumeSpecialToken);
            if (this.characterClasses.isSymbol(codePointAt)) {
                addToken(Token.Kind.WORD, Character.toString(codePointAt), consumeSpecialToken, consumeSpecialToken + 1);
            } else if (this.characterClasses.isLetterOrDigit(codePointAt) || (codePointAt == 39 && acceptApostropheAsWordCharacter(index3))) {
                consumeSpecialToken = consumeWordOrNumber(consumeSpecialToken, index3);
            } else if (Character.isWhitespace(codePointAt)) {
                addToken(Token.Kind.SPACE, " ", consumeSpecialToken, consumeSpecialToken + 1);
            } else if ((codePointAt == 12289 || codePointAt == 12290) && !this.parserSettings.keepIdeographicPunctuation()) {
                addToken(Token.Kind.SPACE, " ", consumeSpecialToken, consumeSpecialToken + 1);
            } else if (codePointAt == 34 || codePointAt == 8220 || codePointAt == 8221 || codePointAt == 8222 || codePointAt == 8223 || codePointAt == 8249 || codePointAt == 8250 || codePointAt == 171 || codePointAt == 187 || codePointAt == 12317 || codePointAt == 12318 || codePointAt == 12319 || codePointAt == 65282) {
                addToken(Token.Kind.QUOTE, "\"", consumeSpecialToken, consumeSpecialToken + 1);
            } else if (codePointAt == 45 || codePointAt == 65293) {
                addToken(Token.Kind.MINUS, "-", consumeSpecialToken, consumeSpecialToken + 1);
            } else if (codePointAt == 43 || codePointAt == 65291) {
                addToken(Token.Kind.PLUS, "+", consumeSpecialToken, consumeSpecialToken + 1);
            } else if (codePointAt == 46 || codePointAt == 65294) {
                addToken(Token.Kind.DOT, ".", consumeSpecialToken, consumeSpecialToken + 1);
            } else if (codePointAt == 44 || codePointAt == 65292) {
                addToken(Token.Kind.COMMA, ",", consumeSpecialToken, consumeSpecialToken + 1);
            } else if (codePointAt == 58 || codePointAt == 65306) {
                index3 = determineCurrentIndex(index2, session);
                addToken(Token.Kind.COLON, ":", consumeSpecialToken, consumeSpecialToken + 1);
            } else if (codePointAt == 40 || codePointAt == 65288) {
                addToken(Token.Kind.LBRACE, "(", consumeSpecialToken, consumeSpecialToken + 1);
                this.parensToEat++;
            } else if (codePointAt == 41 || codePointAt == 65289) {
                addToken(Token.Kind.RBRACE, ")", consumeSpecialToken, consumeSpecialToken + 1);
                this.parensToEat--;
                if (this.parensToEat < 0) {
                    this.parensToEat = 0;
                }
            } else if (codePointAt == 91 || codePointAt == 65339) {
                addToken(Token.Kind.LSQUAREBRACKET, "[", consumeSpecialToken, consumeSpecialToken + 1);
            } else if (codePointAt == 93 || codePointAt == 65341) {
                addToken(Token.Kind.RSQUAREBRACKET, "]", consumeSpecialToken, consumeSpecialToken + 1);
            } else if (codePointAt == 123) {
                addToken(Token.Kind.LCURLYBRACKET, "{", consumeSpecialToken, consumeSpecialToken + 1);
            } else if (codePointAt == 125) {
                addToken(Token.Kind.RCURLYBRACKET, "}", consumeSpecialToken, consumeSpecialToken + 1);
            } else if (codePointAt == 59 || codePointAt == 65307) {
                addToken(Token.Kind.SEMICOLON, ";", consumeSpecialToken, consumeSpecialToken + 1);
            } else if (codePointAt == 62 || codePointAt == 65310) {
                addToken(Token.Kind.GREATER, ">", consumeSpecialToken, consumeSpecialToken + 1);
            } else if (codePointAt == 60 || codePointAt == 65308) {
                addToken(Token.Kind.SMALLER, "<", consumeSpecialToken, consumeSpecialToken + 1);
            } else if (codePointAt == 33 || codePointAt == 65281) {
                addToken(Token.Kind.EXCLAMATION, "!", consumeSpecialToken, consumeSpecialToken + 1);
            } else if (codePointAt == 95 || codePointAt == 65343) {
                addToken(Token.Kind.UNDERSCORE, "_", consumeSpecialToken, consumeSpecialToken + 1);
            } else if (codePointAt == 94 || codePointAt == 65342) {
                addToken(Token.Kind.HAT, "^", consumeSpecialToken, consumeSpecialToken + 1);
            } else if (codePointAt == 42 || codePointAt == 65290) {
                addToken(Token.Kind.STAR, "*", consumeSpecialToken, consumeSpecialToken + 1);
            } else if (codePointAt == 36 || codePointAt == 65284) {
                addToken(Token.Kind.DOLLAR, "$", consumeSpecialToken, consumeSpecialToken + 1);
            } else {
                addToken(Token.Kind.NOISE, "<NOISE>", consumeSpecialToken, consumeSpecialToken + 1);
            }
        }
        addToken(Token.Kind.EOF, "<EOF>", this.source.length(), this.source.length());
        this.source = null;
        return this.tokens;
    }

    private boolean acceptApostropheAsWordCharacter(Index index) {
        if (!index.isUriIndex() && !index.isHostIndex()) {
            return true;
        }
        for (int size = this.tokens.size() - 1; size >= 0; size--) {
            switch (this.tokens.get(size).kind) {
                case COLON:
                    if (size == this.indexLastExplicitlyChangedAt) {
                        return false;
                    }
                    break;
                case SPACE:
                    return true;
            }
        }
        return true;
    }

    private Index determineCurrentIndex(Index index, IndexFacts.Session session) {
        int size = this.tokens.size();
        int i = 0;
        int i2 = 1;
        while (true) {
            if (i2 > this.tokens.size()) {
                break;
            }
            size = this.tokens.size() - i2;
            Token token = this.tokens.get(size);
            if (token.kind != Token.Kind.WORD && token.kind != Token.Kind.UNDERSCORE && token.kind != Token.Kind.NUMBER && token.kind != Token.Kind.DOT) {
                size++;
                break;
            }
            i++;
            i2++;
        }
        StringBuilder sb = new StringBuilder();
        for (int i3 = 0; i3 < i; i3++) {
            sb.append(this.tokens.get(size + i3).image);
        }
        String sb2 = sb.toString();
        if (sb2.length() > 0) {
            Index index2 = session.getIndex(session.getCanonicName(sb2));
            if (!index2.isNull()) {
                this.indexLastExplicitlyChangedAt = this.tokens.size();
                return index2;
            }
        }
        return index;
    }

    private int consumeSpecialToken(int i) {
        SpecialTokens.Token specialToken = getSpecialToken(i);
        if (specialToken == null) {
            return i;
        }
        this.tokens.add(toToken(specialToken, i, this.source));
        return i + specialToken.token().length();
    }

    private SpecialTokens.Token getSpecialToken(int i) {
        if (this.specialTokens == null) {
            return null;
        }
        return this.specialTokens.tokenize(this.source.substring(i), this.substringSpecialTokens);
    }

    private int consumeExact(int i, Index index) {
        return index.getExactTerminator() == null ? consumeHeuristicExact(i) : consumeToTerminator(i, index.getExactTerminator());
    }

    private boolean looksLikeExactEnd(int i) {
        int i2 = this.parensToEat;
        boolean z = true;
        boolean z2 = true;
        boolean z3 = false;
        int length = this.source.length();
        while (i < length) {
            int i3 = i;
            i++;
            char charAt = this.source.charAt(i3);
            if (Character.isWhitespace(charAt)) {
                return true;
            }
            if (!z3 || !Character.isDigit(charAt)) {
                z3 = false;
                if (z2 && charAt == '!') {
                    boolean z4 = true;
                    while (true) {
                        z3 = z4;
                        if (i >= length || this.source.charAt(i) != '!') {
                            break;
                        }
                        i++;
                        z4 = false;
                    }
                    z2 = false;
                } else if (z && (charAt == '*' || charAt == 65290)) {
                    z = false;
                } else {
                    if (i2 <= 0 || charAt != ')') {
                        return false;
                    }
                    i2--;
                }
            }
        }
        return true;
    }

    private int consumeHeuristicExact(int i) {
        int i2 = -1;
        int i3 = -1;
        int length = this.source.length();
        boolean z = false;
        boolean z2 = false;
        boolean z3 = false;
        boolean z4 = true;
        boolean z5 = false;
        boolean z6 = true;
        boolean z7 = true;
        int i4 = i;
        while (true) {
            if (i4 >= length) {
                break;
            }
            char charAt = this.source.charAt(i4);
            if (Character.isWhitespace(charAt)) {
                if (!z7) {
                    if (!z5) {
                        break;
                    }
                } else {
                    continue;
                    i4++;
                }
            }
            z7 = false;
            if (charAt == '\"') {
                if (!z4) {
                    if (z5 && looksLikeExactEnd(i4 + 1)) {
                        z3 = true;
                        z5 = false;
                        z2 = true;
                        break;
                    }
                } else {
                    z4 = false;
                    z5 = true;
                    i2 = i4 + 1;
                }
                i4++;
            } else {
                if (!z5) {
                    if ((charAt != '*' && charAt != 65290) || !z6) {
                        if (((charAt == '!' || charAt == '*' || charAt == 65290) && z3 && looksLikeExactEnd(i4)) || (charAt == ')' && z3 && looksLikeExactEnd(i4))) {
                            break;
                        }
                        if (!z3) {
                            i2 = i4;
                            z3 = true;
                            z4 = false;
                            z6 = false;
                        }
                    } else {
                        z = true;
                        z6 = false;
                        i3 = i4;
                    }
                } else {
                    continue;
                }
                i4++;
            }
        }
        int i5 = i4;
        if (z5) {
            z2 = false;
            i2 = -1;
            i3 = -1;
            z = false;
            z3 = false;
            boolean z8 = true;
            boolean z9 = true;
            int i6 = i;
            while (i6 < length) {
                char charAt2 = this.source.charAt(i6);
                if (!Character.isWhitespace(charAt2)) {
                    z9 = false;
                    if ((charAt2 != '*' && charAt2 != 65290) || !z8) {
                        if (((charAt2 == '!' || charAt2 == '*' || charAt2 == 65290) && z3) || (charAt2 == ')' && z3 && this.parensToEat > 0)) {
                            break;
                        }
                        if (!z3) {
                            i2 = i6;
                            z3 = true;
                            z8 = false;
                        }
                    } else {
                        z = true;
                        z8 = false;
                        i3 = i6;
                    }
                    i6++;
                } else {
                    if (!z9) {
                        break;
                    }
                    i6++;
                }
            }
            i5 = i6;
        }
        if (!z3) {
            if (z) {
                z = false;
                i2 = i3;
            } else {
                i2 = i;
            }
        }
        if (z) {
            addToken(Token.Kind.STAR, "*", i3, i3 + 1);
        }
        this.tokens.add(new Token(Token.Kind.WORD, this.source.substring(i2, i5), true, new Substring(i2, i5, this.source)));
        if (z2) {
            i5++;
        }
        return i5;
    }

    private int consumeToTerminator(int i, String str) {
        int i2 = i;
        while (i2 < this.source.length() && !terminatorStartsAt(i2, str)) {
            i2++;
        }
        this.tokens.add(new Token(Token.Kind.WORD, this.source.substring(i, i2), true, new Substring(i, i2, this.source)));
        return i2 >= this.source.length() ? i2 : i2 + str.length();
    }

    private boolean terminatorStartsAt(int i, String str) {
        int i2 = 0;
        while (i2 + i < this.source.length() && this.source.charAt(i + i2) == str.charAt(i2)) {
            i2++;
            if (i2 >= str.length()) {
                return true;
            }
        }
        return false;
    }

    private int consumeWordOrNumber(int i, Index index) {
        int i2 = i;
        SpecialTokens.Token token = null;
        boolean z = true;
        boolean z2 = true;
        while (i2 < this.source.length()) {
            if (this.substringSpecialTokens) {
                token = getSpecialToken(i2);
                if (token != null) {
                    break;
                }
            }
            int codePointAt = this.source.codePointAt(i2);
            if (!this.characterClasses.isLetter(codePointAt)) {
                if (!this.characterClasses.isLatinDigit(codePointAt)) {
                    if (codePointAt != 39 || !acceptApostropheAsWordCharacter(index)) {
                        break;
                    }
                    z = false;
                } else {
                    z2 = false;
                }
            } else {
                z = false;
                z2 = false;
            }
            i2 += Character.charCount(codePointAt);
        }
        if (i2 > i) {
            if (z2) {
                addToken(Token.Kind.NOISE, this.source.substring(i, i2), i, i2);
            } else {
                addToken(z ? Token.Kind.NUMBER : Token.Kind.WORD, this.source.substring(i, i2), i, i2);
            }
        }
        if (token == null) {
            return i2 - 1;
        }
        addToken(toToken(token, i2, this.source));
        return (i2 - 1) + token.token().length();
    }

    private void addToken(Token.Kind kind, String str, int i, int i2) {
        addToken(new Token(kind, str, false, new Substring(i, i2, this.source)));
    }

    private void addToken(Token token) {
        this.tokens.add(token);
    }

    public Token toToken(SpecialTokens.Token token, int i, String str) {
        return new Token(Token.Kind.WORD, token.replacement(), true, new Substring(i, i + token.token().length(), str));
    }
}
