/*
 * Decompiled with CFR 0.152.
 */
package com.robrua.nlp.bert;

import com.google.common.io.Resources;
import com.robrua.nlp.bert.BasicTokenizer;
import com.robrua.nlp.bert.Tokenizer;
import com.robrua.nlp.bert.WordpieceTokenizer;
import java.io.BufferedReader;
import java.io.File;
import java.io.IOException;
import java.net.URISyntaxException;
import java.nio.charset.Charset;
import java.nio.file.Files;
import java.nio.file.Path;
import java.nio.file.Paths;
import java.util.Arrays;
import java.util.HashMap;
import java.util.Map;
import java.util.stream.Stream;

public class FullTokenizer
extends Tokenizer {
    private static final boolean DEFAULT_DO_LOWER_CASE = false;
    private final BasicTokenizer basic;
    private final Map<String, Integer> vocabulary;
    private final WordpieceTokenizer wordpiece;

    private static Map<String, Integer> loadVocabulary(Path file) {
        HashMap<String, Integer> vocabulary = new HashMap<String, Integer>();
        try (BufferedReader reader = Files.newBufferedReader(file, Charset.forName("UTF-8"));){
            String line;
            int index = 0;
            while ((line = reader.readLine()) != null) {
                vocabulary.put(line.trim(), index++);
            }
        }
        catch (IOException e) {
            throw new RuntimeException(e);
        }
        return vocabulary;
    }

    private static Path toPath(String resource) {
        try {
            return Paths.get(Resources.getResource((String)resource).toURI());
        }
        catch (URISyntaxException e) {
            throw new RuntimeException(e);
        }
    }

    public FullTokenizer(File vocabulary) {
        this(Paths.get(vocabulary.toURI()), false);
    }

    public FullTokenizer(File vocabulary, boolean doLowerCase) {
        this(Paths.get(vocabulary.toURI()), doLowerCase);
    }

    public FullTokenizer(Path vocabularyPath) {
        this(vocabularyPath, false);
    }

    public FullTokenizer(Path vocabularyPath, boolean doLowerCase) {
        this.vocabulary = FullTokenizer.loadVocabulary(vocabularyPath);
        this.basic = new BasicTokenizer(doLowerCase);
        this.wordpiece = new WordpieceTokenizer(this.vocabulary);
    }

    public FullTokenizer(String vocabularyResource) {
        this(FullTokenizer.toPath(vocabularyResource), false);
    }

    public FullTokenizer(String vocabularyResource, boolean doLowerCase) {
        this(FullTokenizer.toPath(vocabularyResource), doLowerCase);
    }

    public int[] convert(String[] tokens) {
        return Arrays.stream(tokens).mapToInt(this.vocabulary::get).toArray();
    }

    @Override
    public String[] tokenize(String sequence) {
        return (String[])Arrays.stream(this.wordpiece.tokenize(this.basic.tokenize(sequence))).flatMap(Stream::of).toArray(String[]::new);
    }

    @Override
    public String[][] tokenize(String ... sequences) {
        return (String[][])Arrays.stream(this.basic.tokenize(sequences)).map(tokens -> (String[])Arrays.stream(this.wordpiece.tokenize((String)tokens)).flatMap(Stream::of).toArray(String[]::new)).toArray(x$0 -> new String[x$0][]);
    }
}

