package org.apache.uima.conceptMapper.support.tokenizer;

import java.util.regex.Pattern;
import java.util.regex.PatternSyntaxException;
import org.apache.uima.analysis_engine.ResultSpecification;
import org.apache.uima.analysis_engine.annotator.AnnotatorConfigurationException;
import org.apache.uima.analysis_engine.annotator.AnnotatorContext;
import org.apache.uima.analysis_engine.annotator.AnnotatorInitializationException;
import org.apache.uima.analysis_engine.annotator.AnnotatorProcessException;
import org.apache.uima.analysis_engine.annotator.JTextAnnotator_ImplBase;
import org.apache.uima.conceptMapper.support.stemmer.Stemmer;
import org.apache.uima.jcas.JCas;

/* loaded from: input_file:org/apache/uima/conceptMapper/support/tokenizer/OffsetTokenizer.class */
public class OffsetTokenizer extends JTextAnnotator_ImplBase {
    private String text;
    public static final String PARAM_CASE_MATCH = "caseMatch";
    public static final String PARAM_STEMMER_CLASS = "Stemmer";
    public static final String PARAM_TOKEN_DELIM = "tokenDelimiters";
    private int offset;
    private int length;
    private Pattern capPat;
    private Pattern hasDigit;
    private Stemmer stemmer = null;
    private String wsDelim = " \t\n\r\f";
    private String extraDelim = ",-/();:";
    private String delim = this.wsDelim + this.extraDelim;
    private boolean caseFoldInitCap = false;
    private boolean caseFoldDigit = false;
    private boolean caseFoldAll = false;
    private boolean stemTokens = false;

    public OffsetTokenizer() {
        this.capPat = null;
        this.hasDigit = null;
        try {
            this.capPat = Pattern.compile("^[A-Z][a-z]+$");
            this.hasDigit = Pattern.compile("[0-9]");
        } catch (PatternSyntaxException e) {
            e.printStackTrace();
        }
    }

    public String getText() {
        return this.text;
    }

    public void setText(String str) {
        this.text = str;
        this.offset = 0;
        this.length = getText().length();
    }

    public Stemmer getStemmer() {
        return this.stemmer;
    }

    public void setStemmer(Stemmer stemmer) {
        this.stemmer = stemmer;
    }

    public TokenAnnotation newToken(JCas jCas) {
        return new TokenAnnotation(jCas);
    }

    public TokenAnnotation nextToken(JCas jCas) {
        StringBuilder sb = new StringBuilder();
        while (this.offset < this.length && getDelim().indexOf(getText().charAt(this.offset)) >= 0) {
            this.offset++;
        }
        if (this.offset >= this.length) {
            return null;
        }
        int i = this.offset;
        while (this.offset < this.length && getDelim().indexOf(getText().charAt(this.offset)) < 0) {
            sb.append(getText().charAt(this.offset));
            this.offset++;
        }
        TokenAnnotation newToken = newToken(jCas);
        newToken.setText(stem(foldCase(sb.toString())));
        newToken.setBegin(i);
        newToken.setEnd(this.offset);
        return newToken;
    }

    protected String foldCase(String str) {
        return shouldFoldCase(str) ? doFoldCase(str) : str;
    }

    public static String doFoldCase(String str) {
        return str.trim().toLowerCase();
    }

    public boolean shouldFoldCase(String str) {
        return this.caseFoldAll || (this.caseFoldInitCap && this.capPat.matcher(str).matches()) || (this.caseFoldDigit && this.hasDigit.matcher(str).find());
    }

    public boolean shouldStem() {
        return this.stemTokens;
    }

    protected void setDelim(String str) {
        this.delim = this.wsDelim + str;
    }

    protected void overrideDelim(String str) {
        this.delim = str;
    }

    private void setStemming(boolean z) {
        this.stemTokens = z;
    }

    private void setCaseFoldInitCap(boolean z) {
        this.caseFoldInitCap = z;
    }

    private void setCaseFoldDigit(boolean z) {
        this.caseFoldDigit = z;
    }

    private void setCaseFoldAll(boolean z) {
        this.caseFoldAll = z;
    }

    protected String getDelim() {
        return this.delim;
    }

    protected boolean getStemming() {
        return this.stemTokens;
    }

    protected boolean getCaseFoldInitCap() {
        return this.caseFoldInitCap;
    }

    protected boolean getCaseFoldDigit() {
        return this.caseFoldDigit;
    }

    protected boolean getCaseFoldAll() {
        return this.caseFoldAll;
    }

    public void initialize(AnnotatorContext annotatorContext) throws AnnotatorInitializationException, AnnotatorConfigurationException {
        super.initialize(annotatorContext);
        try {
            String[] configParameterNames = annotatorContext.getConfigParameterNames();
            Object[] objArr = new Object[configParameterNames.length];
            for (int i = 0; i < objArr.length; i++) {
                objArr[i] = annotatorContext.getConfigParameterValue(configParameterNames[i]);
            }
            processAllConfigurationParameters(configParameterNames, objArr);
            initTokenizer(configParameterNames, objArr);
        } catch (Exception e) {
            throw new AnnotatorConfigurationException(e);
        }
    }

    public void processAllConfigurationParameters(String[] strArr, Object[] objArr) throws AnnotatorConfigurationException {
        for (int i = 0; i < strArr.length; i++) {
            processConfigurationParameter(strArr[i], objArr[i]);
        }
    }

    public void process(JCas jCas, ResultSpecification resultSpecification) throws AnnotatorProcessException {
        try {
            doTokenization(jCas, jCas.getDocumentText(), getDelim());
        } catch (Exception e) {
            throw new AnnotatorProcessException(e);
        }
    }

    public void initTokenizer(String[] strArr, Object[] objArr) throws Exception {
    }

    protected void doTokenization(JCas jCas, String str, String str2) {
        int i = 0;
        overrideDelim(str2);
        setText(str);
        while (true) {
            TokenAnnotation nextToken = nextToken(jCas);
            if (null == nextToken) {
                return;
            }
            nextToken.addToIndexes();
            i++;
        }
    }

    public void processConfigurationParameter(String str, Object obj) {
        String str2;
        if (str.equals("caseMatch")) {
            String str3 = (String) obj;
            if (str3 != null) {
                if (str3.equalsIgnoreCase("insensitive")) {
                    setCaseFoldInitCap(true);
                    return;
                } else if (str3.equalsIgnoreCase("digitfold")) {
                    setCaseFoldDigit(true);
                    return;
                } else {
                    if (str3.equalsIgnoreCase("ignoreall")) {
                        setCaseFoldAll(true);
                        return;
                    }
                    return;
                }
            }
            return;
        }
        if (!str.equals("Stemmer") || obj == null) {
            if (!str.equals(PARAM_TOKEN_DELIM) || (str2 = (String) obj) == null) {
                return;
            }
            setDelim(str2);
            return;
        }
        try {
            setStemmer((Stemmer) Class.forName((String) obj).newInstance());
            setStemming(true);
        } catch (Exception e) {
            System.err.println("Exception trying to instantiate stemmer class: '" + ((String) obj) + "', original exception:" + e.getMessage());
            e.printStackTrace();
        }
    }

    protected String stem(String str) {
        return shouldStem() ? doStemming(str, getStemmer()) : str;
    }

    public static String doStemming(String str, Stemmer stemmer) {
        return stemmer.stem(str.trim());
    }
}
