package com.github.tjake.jlama.safetensors.tokenizer;

import com.fasterxml.jackson.annotation.JsonCreator;
import com.fasterxml.jackson.annotation.JsonProperty;
import com.google.common.base.Preconditions;
import com.google.common.collect.BiMap;
import com.google.common.collect.HashBiMap;
import com.google.common.collect.ImmutableBiMap;
import com.google.common.collect.ImmutableList;
import java.text.Normalizer;
import java.util.ArrayList;
import java.util.Collections;
import java.util.HashMap;
import java.util.Iterator;
import java.util.List;
import java.util.Map;
import java.util.Optional;
import java.util.regex.Matcher;
import java.util.stream.Collectors;
import org.slf4j.Logger;
import org.slf4j.LoggerFactory;

/* loaded from: input_file:com/github/tjake/jlama/safetensors/tokenizer/TokenizerModel.class */
public class TokenizerModel {
    private static final Logger logger = LoggerFactory.getLogger(TokenizerModel.class);
    private static final java.util.regex.Pattern gpt2Pattern = java.util.regex.Pattern.compile("(?i:'s|'t|'re|'ve|'m|'ll|'d)|[^\\r\\n\\p{L}\\p{N}]?\\p{L}+|\\p{N}| ?[^\\s\\p{L}\\p{N}]+[\\r\\n]*|\\s*[\\r\\n]+|\\s+(?!\\S)|\\s+");

    @JsonProperty("type")
    public final String type;

    @JsonProperty("unk_token")
    public final String unkToken;

    @JsonProperty("fuse_unk")
    public final boolean fuseUnk;

    @JsonProperty("byte_fallback")
    public final boolean byteFallback;

    @JsonProperty("vocab")
    public final BiMap<String, Long> vocabLookup;

    @JsonProperty("merges")
    public final Map<String, Long> merges;
    private PreTokenizer preTokenizer;
    private Normalizer normalizer;
    private java.util.regex.Pattern addedTokenPattern;
    private final boolean ignoreMerges;
    private BiMap<String, Long> addedTokens = HashBiMap.create();
    private BiMap<String, Long> specialTokens = HashBiMap.create();
    private boolean legacy = false;
    private Optional<Map<String, String>> promptTemplates = Optional.empty();
    private boolean hasToolSupport = false;
    private String eosToken = "";
    private String bosToken = "";

    /* loaded from: input_file:com/github/tjake/jlama/safetensors/tokenizer/TokenizerModel$Normalizer.class */
    public static class Normalizer {
        public final String type;
        public final List<NormalizerItem> normalizerItems;

        @JsonCreator
        public Normalizer(@JsonProperty("type") String str, @JsonProperty("normalizers") List<NormalizerItem> list) {
            this.type = str;
            this.normalizerItems = list == null ? Collections.emptyList() : ImmutableList.copyOf(list);
        }

        public String normalize(String str) {
            if (this.normalizerItems.isEmpty()) {
                return str;
            }
            Preconditions.checkArgument(this.type.equalsIgnoreCase("Sequence"), "Invalid normalizer type: " + this.type);
            Iterator<NormalizerItem> it = this.normalizerItems.iterator();
            while (it.hasNext()) {
                str = it.next().normalize(str);
            }
            return str;
        }
    }

    /* loaded from: input_file:com/github/tjake/jlama/safetensors/tokenizer/TokenizerModel$NormalizerItem.class */
    public static class NormalizerItem {
        public final String type;
        public final String prepend;
        public final Map<String, String> pattern;
        public final String content;

        @JsonCreator
        public NormalizerItem(@JsonProperty("type") String str, @JsonProperty("prepend") String str2, @JsonProperty("pattern") Map<String, String> map, @JsonProperty("content") String str3) {
            this.type = str;
            this.prepend = str2;
            this.pattern = map;
            this.content = str3;
        }

        public String normalize(String str) {
            String str2 = this.type;
            boolean z = -1;
            switch (str2.hashCode()) {
                case -1535817068:
                    if (str2.equals("Replace")) {
                        z = false;
                        break;
                    }
                    break;
                case 77195:
                    if (str2.equals("NFC")) {
                        z = 2;
                        break;
                    }
                    break;
                case 77196:
                    if (str2.equals("NFD")) {
                        z = 4;
                        break;
                    }
                    break;
                case 2393360:
                    if (str2.equals("NFKC")) {
                        z = 3;
                        break;
                    }
                    break;
                case 2393361:
                    if (str2.equals("NFKD")) {
                        z = 5;
                        break;
                    }
                    break;
                case 1346286446:
                    if (str2.equals("Prepend")) {
                        z = true;
                        break;
                    }
                    break;
            }
            switch (z) {
                case false:
                    return replace(str);
                case true:
                    return prepend(str);
                case true:
                case true:
                case true:
                case true:
                    return formNormalize(str);
                default:
                    throw new IllegalArgumentException("Invalid normalizer type: " + this.type);
            }
        }

        private String formNormalize(String str) {
            return java.text.Normalizer.normalize(str, Normalizer.Form.valueOf(this.type));
        }

        private String replace(String str) {
            for (Map.Entry<String, String> entry : this.pattern.entrySet()) {
                if (!entry.getKey().equalsIgnoreCase("String")) {
                    TokenizerModel.logger.warn("Ignoring unknown pattern key: " + entry.getKey());
                }
                str = str.replaceAll(entry.getValue(), this.content);
            }
            return str;
        }

        private String prepend(String str) {
            return this.prepend + str;
        }
    }

    /* loaded from: input_file:com/github/tjake/jlama/safetensors/tokenizer/TokenizerModel$Pattern.class */
    public static class Pattern {
        public final java.util.regex.Pattern regex;

        @JsonCreator
        public Pattern(@JsonProperty("Regex") String str) {
            this.regex = java.util.regex.Pattern.compile(str);
        }
    }

    /* loaded from: input_file:com/github/tjake/jlama/safetensors/tokenizer/TokenizerModel$PreTokenizer.class */
    public static class PreTokenizer {
        public final String type;
        public final String replacement;
        public final String prependScheme;
        public final boolean isLegacy;
        public final List<PretokenizerItem> pretokenizers;

        @JsonCreator
        public PreTokenizer(@JsonProperty("type") String str, @JsonProperty("replacement") String str2, @JsonProperty("prepend_scheme") String str3, @JsonProperty("pretokenizers") List<PretokenizerItem> list) {
            this.type = str;
            this.replacement = str2;
            this.prependScheme = str3;
            this.pretokenizers = list == null ? Collections.emptyList() : ImmutableList.copyOf(list);
            this.isLegacy = this.pretokenizers.stream().map(pretokenizerItem -> {
                return pretokenizerItem.type;
            }).anyMatch(str4 -> {
                return str4.equals("ByteLevel");
            });
        }

        public List<String> pretokenize(String str) {
            if (this.type.equalsIgnoreCase("MetaSpace")) {
                if (this.prependScheme.equalsIgnoreCase("first")) {
                    str = " " + str;
                }
                return Collections.singletonList(str.replaceAll("[ \t]+", this.replacement));
            }
            if (this.pretokenizers.isEmpty()) {
                return Collections.singletonList(str);
            }
            Preconditions.checkArgument(this.type.equalsIgnoreCase("Sequence"), "Invalid pre-tokenizer type: " + this.type);
            List<String> of = List.of(str);
            ArrayList arrayList = new ArrayList();
            for (PretokenizerItem pretokenizerItem : this.pretokenizers) {
                Iterator<String> it = of.iterator();
                while (it.hasNext()) {
                    arrayList.addAll(pretokenizerItem.pretokenize(it.next()));
                }
                of = arrayList;
                arrayList = new ArrayList();
            }
            return of;
        }
    }

    /* loaded from: input_file:com/github/tjake/jlama/safetensors/tokenizer/TokenizerModel$PretokenizerItem.class */
    public static class PretokenizerItem {
        public final String type;
        public final Pattern pattern;
        public final String behavior;
        public final Boolean invert;
        public final Boolean individual_digits;
        public final Boolean add_prefix_space;
        public final Boolean trim_offsets;
        public final Boolean use_regex;

        @JsonCreator
        public PretokenizerItem(@JsonProperty("type") String str, @JsonProperty("pattern") Pattern pattern, @JsonProperty("behavior") String str2, @JsonProperty("invert") Boolean bool, @JsonProperty("individual_digits") Boolean bool2, @JsonProperty("add_prefix_space") Boolean bool3, @JsonProperty("trim_offsets") Boolean bool4, @JsonProperty("use_regex") Boolean bool5) {
            this.type = str;
            this.pattern = pattern;
            this.behavior = str2;
            this.invert = bool;
            this.individual_digits = bool2;
            this.add_prefix_space = bool3;
            this.trim_offsets = bool4;
            this.use_regex = bool5;
        }

        public List<String> pretokenize(String str) {
            String str2 = this.type;
            boolean z = -1;
            switch (str2.hashCode()) {
                case -65155268:
                    if (str2.equals("ByteLevel")) {
                        z = 2;
                        break;
                    }
                    break;
                case 80095994:
                    if (str2.equals("Split")) {
                        z = false;
                        break;
                    }
                    break;
                case 2046925062:
                    if (str2.equals("Digits")) {
                        z = true;
                        break;
                    }
                    break;
            }
            switch (z) {
                case false:
                    return splitRegex(str);
                case true:
                    return splitDigits(str);
                case true:
                    return Collections.singletonList(str);
                default:
                    throw new IllegalArgumentException("Invalid pre-tokenizer type: " + this.type);
            }
        }

        private List<String> byteLevel(String str) {
            return List.of((String) str.codePoints().map(i -> {
                return ((Integer) BPETokenizer.alteredBytes.getOrDefault(Integer.valueOf(i), Integer.valueOf(i))).intValue();
            }).mapToObj(Character::toString).collect(Collectors.joining()));
        }

        private List<String> splitGpt2(String str) {
            return List.of((Object[]) TokenizerModel.gpt2Pattern.split(str));
        }

        private List<String> splitRegex(String str) {
            int i;
            Matcher matcher = this.pattern.regex.matcher(str);
            ArrayList arrayList = new ArrayList();
            int i2 = 0;
            while (true) {
                i = i2;
                if (!matcher.find()) {
                    break;
                }
                String substring = str.substring(i, matcher.start());
                if (!substring.isEmpty()) {
                    arrayList.add(substring);
                }
                arrayList.add(matcher.group());
                i2 = matcher.end();
            }
            String substring2 = i >= str.length() ? "" : str.substring(i);
            if (!substring2.isEmpty()) {
                arrayList.add(substring2);
            }
            return arrayList;
        }

        private List<String> splitDigits(String str) {
            return List.of((Object[]) str.split("(?<=\\D)(?=\\d)|(?<=\\d)(?=\\D)"));
        }
    }

    @JsonCreator
    public TokenizerModel(@JsonProperty("type") String str, @JsonProperty("unk_token") String str2, @JsonProperty("fuse_unk") boolean z, @JsonProperty("byte_fallback") boolean z2, @JsonProperty("vocab") Map<String, Long> map, @JsonProperty("ignore_merges") Boolean bool, @JsonProperty("merges") List<Object> list) {
        this.type = str;
        this.unkToken = str2;
        this.fuseUnk = z;
        this.byteFallback = z2;
        this.vocabLookup = HashBiMap.create(map);
        this.ignoreMerges = bool != null && bool.booleanValue();
        this.merges = new HashMap();
        if (list != null) {
            for (int i = 0; i < list.size(); i++) {
                if (list.get(i) instanceof String) {
                    this.merges.put((String) list.get(i), Long.valueOf(i));
                } else {
                    if (!(list.get(i) instanceof List)) {
                        throw new IllegalArgumentException("Invalid merge format: " + String.valueOf(list.get(i)));
                    }
                    List list2 = (List) list.get(i);
                    this.merges.put(((String) list2.get(0)) + " " + ((String) list2.get(1)), Long.valueOf(i));
                }
            }
        }
    }

    public PreTokenizer preTokenizer() {
        return this.preTokenizer;
    }

    public void setPreTokenizer(PreTokenizer preTokenizer) {
        if (preTokenizer != null) {
            this.preTokenizer = preTokenizer;
            this.legacy = preTokenizer.isLegacy;
        }
    }

    public Normalizer normalizer() {
        return this.normalizer;
    }

    public void setNormalizer(Normalizer normalizer) {
        this.normalizer = normalizer;
    }

    public void setAddedTokens(List<Map<String, Object>> list) {
        if (list == null || list.isEmpty()) {
            return;
        }
        for (Map<String, Object> map : list) {
            this.addedTokens.put((String) map.get("content"), Long.valueOf(((Integer) map.get("id")).longValue()));
            this.vocabLookup.put((String) map.get("content"), Long.valueOf(((Integer) map.get("id")).longValue()));
            if (map.containsKey("special") && ((Boolean) map.get("special")).booleanValue()) {
                this.specialTokens.put((String) map.get("content"), Long.valueOf(((Integer) map.get("id")).longValue()));
            }
        }
        this.addedTokens = ImmutableBiMap.copyOf(this.addedTokens);
        this.specialTokens = ImmutableBiMap.copyOf(this.specialTokens);
        StringBuilder sb = new StringBuilder();
        ArrayList arrayList = new ArrayList(this.addedTokens.keySet());
        for (int i = 0; i < arrayList.size(); i++) {
            if (i != 0) {
                sb.append("|");
            }
            sb.append(java.util.regex.Pattern.quote((String) arrayList.get(i)));
        }
        this.addedTokenPattern = java.util.regex.Pattern.compile(sb.toString());
    }

    public boolean ignoreMerges() {
        return this.ignoreMerges;
    }

    public Map<String, Long> addedTokens() {
        return this.addedTokens;
    }

    public java.util.regex.Pattern addedTokenPattern() {
        return this.addedTokenPattern;
    }

    public boolean isLegacy() {
        return this.legacy;
    }

    public void setLegacy(boolean z) {
        this.legacy = z;
    }

    public Optional<Map<String, String>> promptTemplates() {
        return this.promptTemplates;
    }

    public void setPromptTemplates(Map<String, String> map) {
        if (map != null) {
            this.hasToolSupport = map.values().stream().anyMatch(str -> {
                return str.toLowerCase().contains("tools");
            });
            this.promptTemplates = Optional.of(map);
        }
    }

    public boolean hasToolSupport() {
        return this.hasToolSupport;
    }

    public void setEosToken(String str) {
        this.eosToken = str;
    }

    public String eosToken() {
        return this.eosToken;
    }

    public void setBosToken(String str) {
        this.bosToken = str;
    }

    public String bosToken() {
        return this.bosToken;
    }

    public boolean isSpecialToken(long j) {
        return this.specialTokens.containsValue(Long.valueOf(j));
    }

    public boolean isSpecialToken(String str) {
        return this.specialTokens.containsKey(str);
    }

    /* JADX INFO: Access modifiers changed from: package-private */
    public static String[] split(java.util.regex.Pattern pattern, CharSequence charSequence, int i, boolean z) {
        int i2 = 0;
        int i3 = 0;
        boolean z2 = i > 0;
        ArrayList arrayList = new ArrayList();
        Matcher matcher = pattern.matcher(charSequence);
        while (matcher.find()) {
            if (!z2 || i2 < i - 1) {
                if (i3 != 0 || i3 != matcher.start() || matcher.start() != matcher.end()) {
                    arrayList.add(charSequence.subSequence(i3, matcher.start()).toString());
                    i3 = matcher.end();
                    if (z) {
                        arrayList.add(charSequence.subSequence(matcher.start(), i3).toString());
                    }
                    i2++;
                }
            } else if (i2 == i - 1) {
                arrayList.add(charSequence.subSequence(i3, charSequence.length()).toString());
                i3 = matcher.end();
                i2++;
            }
        }
        if (i3 == 0) {
            return new String[]{charSequence.toString()};
        }
        if (!z2 || i2 < i) {
            arrayList.add(charSequence.subSequence(i3, charSequence.length()).toString());
        }
        int size = arrayList.size();
        if (i == 0) {
            while (size > 0 && ((String) arrayList.get(size - 1)).isEmpty()) {
                size--;
            }
        }
        return (String[]) arrayList.subList(0, size).toArray(new String[size]);
    }
}
