/*
 * Decompiled with CFR 0.152.
 */
package com.yahoo.language.lucene;

import com.yahoo.component.provider.ComponentRegistry;
import com.yahoo.language.lucene.AnalyzerFactory;
import com.yahoo.language.lucene.LuceneAnalysisConfig;
import com.yahoo.language.process.LinguisticsParameters;
import com.yahoo.language.process.Token;
import com.yahoo.language.process.TokenType;
import com.yahoo.language.process.Tokenizer;
import com.yahoo.language.simple.SimpleToken;
import java.io.IOException;
import java.util.ArrayList;
import java.util.List;
import java.util.logging.Level;
import java.util.logging.Logger;
import org.apache.lucene.analysis.Analyzer;
import org.apache.lucene.analysis.TokenStream;
import org.apache.lucene.analysis.tokenattributes.CharTermAttribute;
import org.apache.lucene.analysis.tokenattributes.OffsetAttribute;

class LuceneTokenizer
implements Tokenizer {
    private static final Logger log = Logger.getLogger(LuceneTokenizer.class.getName());
    private static final String FIELD_NAME = "F";
    private final AnalyzerFactory analyzerFactory;

    public LuceneTokenizer(LuceneAnalysisConfig config) {
        this(config, (ComponentRegistry<Analyzer>)new ComponentRegistry());
    }

    public LuceneTokenizer(LuceneAnalysisConfig config, ComponentRegistry<Analyzer> analyzers) {
        this.analyzerFactory = new AnalyzerFactory(config, analyzers);
    }

    public Iterable<Token> tokenize(String input, LinguisticsParameters parameters) {
        if (input.isEmpty()) {
            return List.of();
        }
        List<Token> tokens = this.textToTokens(input, this.analyzerFactory.getAnalyzer(parameters.language(), parameters.stemMode(), parameters.removeAccents()));
        log.log(Level.FINEST, () -> "Tokenized '" + String.valueOf(parameters.language()) + "' text='" + input + "' into: n=" + tokens.size() + ", tokens=" + String.valueOf(tokens));
        return tokens;
    }

    private List<Token> textToTokens(String text, Analyzer analyzer) {
        ArrayList<Token> tokens = new ArrayList<Token>();
        TokenStream tokenStream = analyzer.tokenStream(FIELD_NAME, text);
        CharTermAttribute charTermAttribute = (CharTermAttribute)tokenStream.addAttribute(CharTermAttribute.class);
        OffsetAttribute offsetAttribute = (OffsetAttribute)tokenStream.addAttribute(OffsetAttribute.class);
        try {
            tokenStream.reset();
            SimpleToken current = null;
            while (tokenStream.incrementToken()) {
                String originalString = text.substring(offsetAttribute.startOffset(), offsetAttribute.endOffset());
                String tokenString = charTermAttribute.toString();
                if (this.isAtSamePositionAs((Token)current, offsetAttribute)) {
                    current.addStem(tokenString);
                    continue;
                }
                current = new SimpleToken(originalString, tokenString).setType(TokenType.ALPHABETIC).setOffset((long)offsetAttribute.startOffset());
                tokens.add((Token)current);
            }
        }
        catch (IOException e) {
            throw new RuntimeException("Failed to analyze: " + text, e);
        }
        finally {
            try {
                tokenStream.end();
                tokenStream.close();
            }
            catch (IOException e) {
                throw new RuntimeException("Failed to close stream: " + String.valueOf(e));
            }
        }
        return tokens;
    }

    private boolean isAtSamePositionAs(Token token, OffsetAttribute offsetAttribute) {
        if (token == null) {
            return false;
        }
        return (long)offsetAttribute.startOffset() == token.getOffset();
    }
}

