/*
 * Decompiled with CFR 0.152.
 */
package cc.factorie.app.nlp.load;

import cc.factorie.app.nlp.Document;
import cc.factorie.app.nlp.Sentence;
import cc.factorie.app.nlp.Token;
import cc.factorie.app.nlp.UnknownDocumentAnnotator$;
import cc.factorie.app.nlp.load.BILOUChunkTag;
import cc.factorie.app.nlp.load.BILOUNestedChunkTag;
import cc.factorie.app.nlp.load.BIOChunkTag;
import cc.factorie.app.nlp.load.ChunkTag;
import cc.factorie.app.nlp.load.Load;
import cc.factorie.app.nlp.load.Load$class;
import cc.factorie.app.nlp.pos.PennPosTag;
import cc.factorie.variable.CategoricalVar;
import java.io.File;
import java.io.InputStream;
import scala.Function0;
import scala.Function1;
import scala.Function2;
import scala.Option;
import scala.Predef;
import scala.Predef$;
import scala.Serializable;
import scala.Tuple2;
import scala.collection.LinearSeqOptimized;
import scala.collection.Seq;
import scala.collection.Seq$;
import scala.collection.immutable.Map;
import scala.collection.immutable.StringOps;
import scala.collection.mutable.StringBuilder;
import scala.io.Source;
import scala.reflect.ClassTag$;
import scala.runtime.ObjectRef;
import scala.util.matching.Regex;

public final class LoadConll2000$
implements Load {
    public static final LoadConll2000$ MODULE$;
    private final Regex lineSplit;
    private final Map<String, String> posTranslations;

    static {
        new LoadConll2000$();
    }

    @Override
    public Seq<Document> fromString(String string) {
        return Load$class.fromString(this, string);
    }

    @Override
    public Seq<Document> fromStream(InputStream stream, String encoding) {
        return Load$class.fromStream(this, stream, encoding);
    }

    @Override
    public Seq<Document> fromFile(File file) {
        return Load$class.fromFile(this, file);
    }

    @Override
    public Seq<Document> fromFilename(String filename) {
        return Load$class.fromFilename(this, filename);
    }

    @Override
    public String fromStream$default$2() {
        return Load$class.fromStream$default$2(this);
    }

    @Override
    public Seq<Document> fromSource(Source source) {
        return this.fromSource(source, "BIO");
    }

    public Seq<Document> fromSource(Source source, String encoding) {
        Document doc = new Document();
        doc.annotators().update(Token.class, UnknownDocumentAnnotator$.MODULE$.getClass());
        doc.annotators().update(Sentence.class, UnknownDocumentAnnotator$.MODULE$.getClass());
        doc.annotators().update(PennPosTag.class, UnknownDocumentAnnotator$.MODULE$.getClass());
        doc.annotators().update(BIOChunkTag.class, UnknownDocumentAnnotator$.MODULE$.getClass());
        String string = encoding;
        Object object = "BILOU".equals(string) ? new Serializable(){

            public final BILOUChunkTag apply(Token t, String s) {
                return new BILOUChunkTag(t, s);
            }
        } : ("BIO".equals(string) ? new Serializable(){

            public final BIOChunkTag apply(Token t, String s) {
                return new BIOChunkTag(t, s);
            }
        } : ("NESTED".equals(string) ? new Serializable(){

            public final BILOUNestedChunkTag apply(Token t, String s) {
                return new BILOUNestedChunkTag(t, s);
            }
        } : new Serializable(){

            public final BIOChunkTag apply(Token t, String s) {
                return new BIOChunkTag(t, s);
            }
        }));
        Serializable newChunkLabel = object;
        ObjectRef sent = ObjectRef.create((Object)new Sentence(doc));
        source.getLines().foreach((Function1)new Serializable(doc, (Function2)newChunkLabel, sent){
            private final Document doc$1;
            private final Function2 newChunkLabel$1;
            private final ObjectRef sent$1;

            public final void apply(String line) {
                this.sent$1.elem = LoadConll2000$.MODULE$.cc$factorie$app$nlp$load$LoadConll2000$$processWordLine(this.doc$1, (Sentence)this.sent$1.elem, line, (Function2<Token, String, ChunkTag>)this.newChunkLabel$1);
            }
            {
                this.doc$1 = doc$1;
                this.newChunkLabel$1 = newChunkLabel$1;
                this.sent$1 = sent$1;
            }
        });
        return (Seq)Seq$.MODULE$.apply((Seq)Predef$.MODULE$.wrapRefArray((Object[])new Document[]{doc}));
    }

    public Regex lineSplit() {
        return this.lineSplit;
    }

    public Map<String, String> posTranslations() {
        return this.posTranslations;
    }

    public Sentence cc$factorie$app$nlp$load$LoadConll2000$$processWordLine(Document doc, Sentence sent, String line, Function2<Token, String, ChunkTag> newChunkLabel) {
        String string;
        block4: {
            Sentence sentence2;
            block3: {
                block2: {
                    string = line;
                    Option option = this.lineSplit().unapplySeq((CharSequence)string);
                    if (option.isEmpty() || option.get() == null || ((LinearSeqOptimized)option.get()).lengthCompare(3) != 0) break block2;
                    String tokenType = (String)((LinearSeqOptimized)option.get()).apply(0);
                    String posTagString = (String)((LinearSeqOptimized)option.get()).apply(1);
                    String chunkTagString = (String)((LinearSeqOptimized)option.get()).apply(2);
                    Token t = new Token(sent, new StringBuilder().append((Object)tokenType).append((Object)" ").toString());
                    t.attr().$plus$eq(new PennPosTag(t, (String)this.posTranslations().getOrElse((Object)posTagString, (Function0)new Serializable(posTagString){
                        private final String posTagString$1;

                        public final String apply() {
                            return (String)Predef$.MODULE$.identity((Object)this.posTagString$1);
                        }
                        {
                            this.posTagString$1 = posTagString$1;
                        }
                    })));
                    t.attr().$plus$eq(newChunkLabel.apply((Object)t, (Object)chunkTagString));
                    sentence2 = sent;
                    break block3;
                }
                if (!string.isEmpty()) break block4;
                sentence2 = new Sentence(doc);
            }
            return sentence2;
        }
        throw new Exception(new StringOps(Predef$.MODULE$.augmentString("Expected either a line with token pos tag chunk tag, or an empty line, received: %s")).format((Seq)Predef$.MODULE$.genericWrapArray((Object)new Object[]{string})));
    }

    public void convertBIOtoBILOU(Seq<Sentence> sentences) {
        sentences.foreach((Function1)new Serializable(){

            public final void apply(Sentence sentence2) {
                sentence2.tokens().foreach((Function1)new Serializable(this){

                    public final BILOUChunkTag apply(Token token) {
                        Token prev = null;
                        Token next2 = null;
                        if (token.sentenceHasPrev()) {
                            prev = token.sentencePrev();
                        }
                        if (token.sentenceHasNext()) {
                            next2 = token.sentenceNext();
                        }
                        token.sentenceNext();
                        String newLabel = LoadConll2000$.MODULE$.BIOtoBILOU(prev, token, next2);
                        return token.attr().$plus$eq(new BILOUChunkTag(token, newLabel));
                    }
                });
            }
        });
    }

    /*
     * WARNING - void declaration
     */
    public String BIOtoBILOU(Token prev, Token token, Token next2) {
        void var5_5;
        Object c = ((CategoricalVar)token.attr().apply(ClassTag$.MODULE$.apply(BIOChunkTag.class))).categoryValue();
        String string = "O";
        if (!(c != null ? !c.equals(string) : string != null)) {
            return "O";
        }
        String[] ts = ((String)((CategoricalVar)token.attr().apply(ClassTag$.MODULE$.apply(BIOChunkTag.class))).categoryValue()).split("-");
        String[] ps = null;
        String[] ns = null;
        if (next2 != null) {
            ns = this.splitLabel(next2);
        }
        if (prev != null) {
            ps = this.splitLabel(prev);
        }
        if (((String)((CategoricalVar)token.attr().apply(ClassTag$.MODULE$.apply(BIOChunkTag.class))).categoryValue()).contains("B-")) {
            if (next2 != null) {
                String string2 = ns[1];
                String string3 = ts[1];
                if (!(string2 != null ? !string2.equals(string3) : string3 != null)) {
                    String string4 = ns[0];
                    String string5 = "B";
                    if (string4 == null ? string5 != null : !string4.equals(string5)) {
                        return (String)((CategoricalVar)token.attr().apply(ClassTag$.MODULE$.apply(BIOChunkTag.class))).categoryValue();
                    }
                }
            }
            return new StringBuilder().append((Object)"U-").append((Object)ts[1]).toString();
        }
        if (next2 != null) {
            String string6 = ns[1];
            String string7 = ts[1];
            if (!(string6 != null ? !string6.equals(string7) : string7 != null)) {
                String string8 = ns[0];
                String string9 = "B";
                if (string8 == null ? string9 != null : !string8.equals(string9)) {
                    return new StringBuilder().append((Object)"I-").append((Object)ts[1]).toString();
                }
            }
        }
        return new StringBuilder().append((Object)"L-").append((Object)var5_5[1]).toString();
    }

    private String[] splitLabel(Token token) {
        return ((String)((CategoricalVar)token.attr().apply(ClassTag$.MODULE$.apply(BIOChunkTag.class))).categoryValue()).contains("-") ? ((String)((CategoricalVar)token.attr().apply(ClassTag$.MODULE$.apply(BIOChunkTag.class))).categoryValue()).split("-") : (String[])((Object[])new String[]{"", "O"});
    }

    private LoadConll2000$() {
        MODULE$ = this;
        Load$class.$init$(this);
        this.lineSplit = new StringOps(Predef$.MODULE$.augmentString("([^\\s]+) ([^\\s]+) ([^\\s]+)")).r();
        this.posTranslations = (Map)Predef$.MODULE$.Map().apply((Seq)Predef$.MODULE$.wrapRefArray((Object[])new Tuple2[]{Predef.ArrowAssoc$.MODULE$.$minus$greater$extension(Predef$.MODULE$.ArrowAssoc((Object)"("), (Object)"-LRB-"), Predef.ArrowAssoc$.MODULE$.$minus$greater$extension(Predef$.MODULE$.ArrowAssoc((Object)")"), (Object)"-RRB-")}));
    }
}

