/*
 * Decompiled with CFR 0.152.
 */
package com.yahoo.language.lucene;

import com.yahoo.component.provider.ComponentRegistry;
import com.yahoo.language.Language;
import com.yahoo.language.lucene.AnalyzerFactory;
import com.yahoo.language.lucene.LuceneAnalysisConfig;
import com.yahoo.language.process.StemMode;
import com.yahoo.language.process.Token;
import com.yahoo.language.process.TokenScript;
import com.yahoo.language.process.TokenType;
import com.yahoo.language.process.Tokenizer;
import com.yahoo.language.simple.SimpleToken;
import java.io.IOException;
import java.util.ArrayList;
import java.util.List;
import java.util.logging.Level;
import java.util.logging.Logger;
import org.apache.lucene.analysis.Analyzer;
import org.apache.lucene.analysis.TokenStream;
import org.apache.lucene.analysis.tokenattributes.CharTermAttribute;
import org.apache.lucene.analysis.tokenattributes.OffsetAttribute;

class LuceneTokenizer
implements Tokenizer {
    private static final Logger log = Logger.getLogger(LuceneTokenizer.class.getName());
    private static final String FIELD_NAME = "F";
    private final AnalyzerFactory analyzerFactory;

    public LuceneTokenizer(LuceneAnalysisConfig config) {
        this(config, (ComponentRegistry<Analyzer>)new ComponentRegistry());
    }

    public LuceneTokenizer(LuceneAnalysisConfig config, ComponentRegistry<Analyzer> analyzers) {
        this.analyzerFactory = new AnalyzerFactory(config, analyzers);
    }

    public Iterable<Token> tokenize(String input, Language language, StemMode stemMode, boolean removeAccents) {
        if (input.isEmpty()) {
            return List.of();
        }
        List<Token> tokens = this.textToTokens(input, this.analyzerFactory.getAnalyzer(language, stemMode, removeAccents));
        log.log(Level.FINEST, () -> "Tokenized '" + language + "' text='" + input + "' into: n=" + tokens.size() + ", tokens=" + tokens);
        return tokens;
    }

    private List<Token> textToTokens(String text, Analyzer analyzer) {
        ArrayList<Token> tokens = new ArrayList<Token>();
        TokenStream tokenStream = analyzer.tokenStream(FIELD_NAME, text);
        CharTermAttribute charTermAttribute = (CharTermAttribute)tokenStream.addAttribute(CharTermAttribute.class);
        OffsetAttribute offsetAttribute = (OffsetAttribute)tokenStream.addAttribute(OffsetAttribute.class);
        try {
            tokenStream.reset();
            while (tokenStream.incrementToken()) {
                String originalString = text.substring(offsetAttribute.startOffset(), offsetAttribute.endOffset());
                String tokenString = charTermAttribute.toString();
                tokens.add((Token)new SimpleToken(originalString, tokenString).setType(TokenType.ALPHABETIC).setOffset((long)offsetAttribute.startOffset()).setScript(TokenScript.UNKNOWN));
            }
            tokenStream.end();
            tokenStream.close();
        }
        catch (IOException e) {
            throw new RuntimeException("Failed to analyze: " + text, e);
        }
        return tokens;
    }
}

