001package org.hl7.fhir.utilities.turtle;
002
003/*
004  Copyright (c) 2011+, HL7, Inc.
005  All rights reserved.
006  
007  Redistribution and use in source and binary forms, with or without modification, 
008  are permitted provided that the following conditions are met:
009    
010   * Redistributions of source code must retain the above copyright notice, this 
011     list of conditions and the following disclaimer.
012   * Redistributions in binary form must reproduce the above copyright notice, 
013     this list of conditions and the following disclaimer in the documentation 
014     and/or other materials provided with the distribution.
015   * Neither the name of HL7 nor the names of its contributors may be used to 
016     endorse or promote products derived from this software without specific 
017     prior written permission.
018  
019  THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" AND 
020  ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED 
021  WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE DISCLAIMED. 
022  IN NO EVENT SHALL THE COPYRIGHT HOLDER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, 
023  INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT 
024  NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR 
025  PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, 
026  WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) 
027  ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE 
028  POSSIBILITY OF SUCH DAMAGE.
029  
030 */
031
032
033
034import java.io.IOException;
035import java.io.OutputStream;
036import java.io.OutputStreamWriter;
037import java.io.UnsupportedEncodingException;
038import java.util.ArrayList;
039import java.util.Collections;
040import java.util.HashMap;
041import java.util.HashSet;
042import java.util.List;
043import java.util.Map;
044import java.util.Set;
045import java.util.UUID;
046
047import org.hl7.fhir.exceptions.FHIRFormatError;
048import org.hl7.fhir.utilities.Utilities;
049
050public class Turtle {
051
052        public static final String GOOD_IRI_CHAR = "a-zA-Z0-9\u00A0-\uFFFE";
053
054  public static final String IRI_URL = "(([a-z])+:)*((%[0-9a-fA-F]{2})|[&'\\(\\)*+,;:@_~?!$\\/\\-\\#.\\="+GOOD_IRI_CHAR+"])+"; 
055  public static final String LANG_REGEX = "[a-z]{2}(\\-[a-zA-Z]{2})?";
056
057        // Object model
058        public abstract class Triple {
059                private String uri;
060        }
061
062        public class StringType extends Triple {
063                private String value;
064
065                public StringType(String value) {
066                        super();
067                        this.value = value;
068                }
069        }
070
071        public class Complex extends Triple {
072                protected List<Predicate> predicates = new ArrayList<Predicate>();
073
074                public Complex predicate(String predicate, String object) {
075                        predicateSet.add(predicate);
076                        objectSet.add(object);
077                        return predicate(predicate, new StringType(object));
078                }
079
080    public Complex linkedPredicate(String predicate, String object, String link, String comment) {
081      predicateSet.add(predicate);
082      objectSet.add(object);
083      return linkedPredicate(predicate, new StringType(object), link, comment);
084    }
085
086                public Complex predicate(String predicate, Triple object) {
087      Predicate p = getPredicate(predicate);
088      if (p == null) {
089        p = new Predicate();
090                        p.predicate = predicate;
091                        predicateSet.add(predicate);
092        predicates.add(p);
093      }
094                        if (object instanceof StringType)
095                                objectSet.add(((StringType) object).value);
096      p.objects.add(object);
097                        return this;
098                }
099
100    protected Predicate getPredicate(String predicate) {
101      for (Predicate p : predicates)
102        if (p.predicate.equals(predicate))
103          return p;
104      return null;
105    }
106
107    public Complex linkedPredicate(String predicate, Triple object, String link, String comment) {
108      Predicate p = getPredicate(predicate);
109      if (p == null) {
110        p = new Predicate();
111      p.predicate = predicate;
112      p.link = link;
113      p.comment = comment;
114      predicateSet.add(predicate);
115        predicates.add(p);
116      }
117      if (object instanceof StringType)
118        objectSet.add(((StringType) object).value);
119      p.objects.add(object);
120      return this;
121    }
122
123                public Complex predicate(String predicate) {
124                        predicateSet.add(predicate);
125                        Complex c = complex();
126                        predicate(predicate, c);
127                        return c;
128                }
129
130    public Complex linkedPredicate(String predicate, String link, String comment) {
131      predicateSet.add(predicate);
132      Complex c = complex();
133      linkedPredicate(predicate, c, link, comment);
134      return c;
135    }
136
137                public void prefix(String code, String url) {
138                        Turtle.this.prefix(code, url);
139                }
140        }
141
142        private class Predicate {
143                protected String predicate;
144                protected String link;
145    protected List<Triple> objects = new ArrayList<Turtle.Triple>();
146                protected String comment;
147
148                public String getPredicate() {
149                        return predicate;
150                }
151                public String makelink() {
152      if (link == null)
153        return predicate;
154      else
155        return "<a href=\""+link+"\">"+Utilities.escapeXml(predicate)+"</a>";
156    }
157                
158    public List<Triple> getObjects() {
159      return objects;
160                }
161                public String getComment() {
162                        return comment;
163                }
164        }
165
166        public class Subject extends Complex {
167                private String id;
168
169                public Predicate predicate(String predicate, Triple object, String comment) {
170      Predicate p = getPredicate(predicate);
171      if (p == null) {
172        p = new Predicate();
173                        p.predicate = predicate;
174                        predicateSet.add(predicate);
175                        predicates.add(p);
176                        p.comment = comment; 
177      }
178      if (object instanceof StringType)
179        objectSet.add(((StringType) object).value);
180      p.objects.add(object);
181                        return p;
182                }
183
184                public void comment(String comment) {
185                        if (!Utilities.noString(comment)) {
186                                predicate("rdfs:comment", literal(comment));
187                                predicate("dcterms:description", literal(comment));
188                        }
189                }
190
191                public void label(String label) {
192                        if (!Utilities.noString(label)) {
193                                predicate("rdfs:label", literal(label));
194                                predicate("dc:title", literal(label));
195                        }
196                }
197
198        }
199
200        public class Section {
201          private List<String> comments = new ArrayList<>();
202                private String name;
203                private List<Subject> subjects = new ArrayList<Subject>();
204
205                public Subject triple(String subject, String predicate, String object, String comment) {
206                        return triple(subject, predicate, new StringType(object), comment);
207                }
208
209                public Subject triple(String subject, String predicate, String object) {
210                        return triple(subject, predicate, new StringType(object));
211                }
212
213                public Subject triple(String subject, String predicate, Triple object) {
214                        return triple(subject, predicate, object, null);     
215                }
216
217                public Subject triple(String subject, String predicate, Triple object, String comment) {
218                        Subject s = subject(subject);
219                        s.predicate(predicate, object, comment);
220                        return s;
221                }
222
223                public void comment(String subject, String comment) {
224                        triple(subject, "rdfs:comment", literal(comment));
225                        triple(subject, "dcterms:description", literal(comment));
226                }
227
228                public void label(String subject, String comment) {
229                        triple(subject, "rdfs:label", literal(comment));
230                        triple(subject, "dc:title", literal(comment));
231                }
232
233                public Subject subject(String subject) {
234                        for (Subject ss : subjects) 
235                                if (ss.id.equals(subject))
236                                        return ss;
237                        Subject s = new Subject();
238                        s.id = subject;
239                        subjects.add(s);
240                        return s;
241                }
242
243    public boolean hasSubject(String subject) {
244      for (Subject ss : subjects) 
245        if (ss.id.equals(subject))
246          return true;
247      return false;
248    }
249
250    public void stringComment(String cnt) {
251      comments.add(cnt);
252    }
253        }
254
255        private List<Section> sections = new ArrayList<Section>();
256        protected Set<String> subjectSet = new HashSet<String>();
257        protected Set<String> predicateSet = new HashSet<String>();
258        protected Set<String> objectSet = new HashSet<String>();
259        protected Map<String, String> prefixes = new HashMap<String, String>();
260
261        public void prefix(String code, String url) {
262                prefixes.put(code, url);
263        }
264
265        protected boolean hasSection(String sn) {
266                for (Section s : sections)
267                        if (s.name.equals(sn))
268                                return true;
269                return false;
270
271        }
272
273        public Section section(String sn) {
274                if (hasSection(sn))
275                        throw new Error("Duplicate section name "+sn);
276                Section s = new Section();
277                s.name = sn;
278                sections.add(s);
279                return s;
280        }
281
282        protected String matches(String url, String prefixUri, String prefix) {
283                if (url.startsWith(prefixUri)) {
284                        prefixes.put(prefix, prefixUri);
285                        return prefix+":"+escape(url.substring(prefixUri.length()), false);
286                }
287                return null;
288        }
289
290        protected Complex complex() {
291                return new Complex();
292        }
293
294        private void checkPrefix(Triple object) {
295                if (object instanceof StringType)
296                        checkPrefix(((StringType) object).value);
297                else {
298                        Complex obj = (Complex) object;
299                        for (Predicate po : obj.predicates) {
300                                checkPrefix(po.getPredicate());
301        for (Triple o : po.getObjects())
302          checkPrefix(o);
303                        }
304                }
305        }
306
307        protected void checkPrefix(String pname) {
308                if (pname.startsWith("("))
309                        return;
310                if (pname.startsWith("\""))
311                        return;
312                if (pname.startsWith("<"))
313                        return;
314
315                if (pname.contains(":")) {
316                        String prefix = pname.substring(0, pname.indexOf(":"));
317                        if (!prefixes.containsKey(prefix) && !prefix.equals("http")&& !prefix.equals("urn"))
318                                throw new Error("undefined prefix "+prefix); 
319                }
320        }
321
322        protected StringType literal(String s) {
323                return new StringType("\""+escape(s, true)+"\"");
324        }
325
326  protected StringType literalTyped(String s, String t) {
327    return new StringType("\""+escape(s, true)+"\"^^xs:"+t);
328  }
329
330        public static String escape(String s, boolean string) {
331                if (s == null)
332                        return "";
333
334                StringBuilder b = new StringBuilder();
335                for (char c : s.toCharArray()) {
336                        if (c == '\r')
337                                b.append("\\r");
338                        else if (c == '\n')
339                                b.append("\\n");
340                        else if (c == '"')
341                                b.append("\\\"");
342                        else if (c == '\\')
343                                b.append("\\\\");
344                        else if (c == '/' && !string)
345                                b.append("\\/");
346                        else 
347                                b.append(c);
348                }   
349                return b.toString();
350        }
351
352        protected String pctEncode(String s) {
353                if (s == null)
354                        return "";
355
356                StringBuilder b = new StringBuilder();
357                for (char c : s.toCharArray()) {
358                        if (c >= 'A' && c <= 'Z')
359                                b.append(c);
360                        else if (c >= 'a' && c <= 'z')
361                                b.append(c);
362                        else if (c >= '0' && c <= '9')
363                                b.append(c);
364                        else if (c == '.')
365                                b.append(c);
366                        else 
367                                b.append("%"+Integer.toHexString(c));
368                }   
369                return b.toString();
370        }
371
372        protected List<String> sorted(Set<String> keys) {
373                List<String> names = new ArrayList<String>();
374                names.addAll(keys);
375                Collections.sort(names);
376                return names;
377        }
378
379        public void commit(OutputStream destination, boolean header) throws IOException {
380                LineOutputStreamWriter writer = new LineOutputStreamWriter(destination);
381                commitPrefixes(writer, header);
382                for (Section s : sections) {
383                        commitSection(writer, s);
384                }
385                writer.ln("# -------------------------------------------------------------------------------------");
386                writer.ln();
387                writer.flush();
388                writer.close();
389        }
390
391  public String asHtml() throws Exception {
392    StringBuilder b = new StringBuilder();
393    b.append("<pre class=\"rdf\" style=\"white-space: pre; overflow: hidden\"><code class=\"language-turtle\">\r\n");
394    commitPrefixes(b);
395    for (Section s : sections) {
396      commitSection(b, s);
397    }
398    b.append("</code></pre>\r\n");
399    b.append("\r\n");
400    return b.toString();
401  }
402
403        private void commitPrefixes(LineOutputStreamWriter writer, boolean header) throws IOException {
404                if (header) {
405                        writer.ln("# FHIR Sub-definitions");
406                        writer.write("# This is work in progress, and may change rapidly \r\n");
407                        writer.ln();
408                        writer.write("# A note about policy: the focus here is providing the knowledge from \r\n"); 
409                        writer.write("# the FHIR specification as a set of triples for knowledge processing. \r\n");
410                        writer.write("# Where appopriate, predicates defined external to FHIR are used. \"Where \r\n");
411                        writer.write("# appropriate\" means that the predicates are a faithful representation \r\n");
412                        writer.write("# of the FHIR semantics, and do not involve insane (or owful) syntax. \r\n");
413                        writer.ln();
414                        writer.write("# Where the community agrees on additional predicate statements (such \r\n");
415                        writer.write("# as OWL constraints) these are added in addition to the direct FHIR \r\n");
416                        writer.write("# predicates \r\n");
417                        writer.ln();
418                        writer.write("# This it not a formal ontology, though it is possible it may start to become one eventually\r\n");
419                        writer.ln();
420                        writer.write("# this file refers to concepts defined in rim.ttl and to others defined elsewhere outside HL7 \r\n");
421                        writer.ln();
422                }
423                for (String p : sorted(prefixes.keySet()))
424                        writer.ln("@prefix "+p+": <"+prefixes.get(p)+"> .");
425                writer.ln();
426                if (header) {
427                        writer.ln("# Predicates used in this file:");
428                        for (String s : sorted(predicateSet)) 
429                                writer.ln(" # "+s);
430                        writer.ln();
431                }
432        }
433
434  private void commitPrefixes(StringBuilder b) throws Exception {
435    for (String p : sorted(prefixes.keySet()))
436      b.append("@prefix "+p+": &lt;"+prefixes.get(p)+"&gt; .\r\n");
437    b.append("\r\n");
438  }
439
440        //  private String lastSubject = null;
441        //  private String lastComment = "";
442
443        private void commitSection(LineOutputStreamWriter writer, Section section) throws IOException {
444          writer.ln("# - "+section.name+" "+Utilities.padLeft("", '-', 75-section.name.length()));
445          writer.ln();
446          if (!section.comments.isEmpty()) {
447            for (String s : section.comments) {
448              writer.ln("# "+s);                  
449            }
450            writer.ln();
451          }
452                for (Subject sbj : section.subjects) {
453      if (Utilities.noString(sbj.id)) {
454        writer.write("[");
455      } else {
456                        writer.write(sbj.id);
457                        writer.write(" ");
458      }
459                        int i = 0;
460
461                        for (Predicate p : sbj.predicates) {
462                          //
463                                writer.write(p.getPredicate());
464                                writer.write(" ");
465        boolean first = true;
466        for (Triple o : p.getObjects()) {
467          if (first)
468            first = false;
469          else
470            writer.write(", ");
471          if (o instanceof StringType)
472            writer.write(((StringType) o).value);
473                                else {
474                                        writer.write("[");
475            if (write((Complex) o, writer, 4))
476                                                writer.write("\r\n  ]");
477                                        else
478                                                writer.write("]");
479                                }
480        }
481                                String comment = p.comment == null? "" : " # "+p.comment;
482                                i++;
483                                if (i < sbj.predicates.size())
484                                        writer.write(";"+comment+"\r\n  ");
485        else {
486          if (Utilities.noString(sbj.id)) 
487            writer.write("]");
488          writer.write(" ."+comment+"\r\n\r\n");
489                }
490          }
491        }
492  }
493
494  private void commitSection(StringBuilder b, Section section) throws Exception {
495    b.append("# - "+section.name+" "+Utilities.padLeft("", '-', 75-section.name.length())+"\r\n");
496    b.append("\r\n");
497    if (!section.comments.isEmpty()) {
498      for (String s : section.comments) {
499        b.append("# "+s+"\r\n");      
500      }
501      b.append("\r\n");
502    }
503    for (Subject sbj : section.subjects) {
504      b.append(Utilities.escapeXml(sbj.id));
505      b.append(" ");
506      int i = 0;
507
508      for (Predicate p : sbj.predicates) {
509        //        b.append("# test\r\n ");      
510        b.append(p.makelink());
511        b.append(" ");
512        boolean first = true;
513        for (Triple o : p.getObjects()) {
514          if (first)
515            first = false;
516          else
517            b.append(", ");
518          if (o instanceof StringType)
519            b.append(Utilities.escapeXml(((StringType) o).value));
520          else {
521            b.append("[");
522            if (write((Complex) o, b, 4))
523              b.append("\r\n  ]");
524            else
525              b.append("]");
526          }
527        }
528        String comment = p.comment == null? "" : " # "+p.comment;
529        i++;
530        if (i < sbj.predicates.size())
531          b.append(";"+Utilities.escapeXml(comment)+"\r\n  ");
532        else
533          b.append("."+Utilities.escapeXml(comment)+"\r\n\r\n");
534      }
535    }
536  }
537
538        protected class LineOutputStreamWriter extends OutputStreamWriter {
539                private LineOutputStreamWriter(OutputStream out) throws UnsupportedEncodingException {
540                        super(out, "UTF-8");
541                }
542
543                private void ln() throws IOException {
544                        write("\r\n");
545                }
546
547                private void ln(String s) throws IOException {
548                        write(s);
549                        write("\r\n");
550                }
551        }
552
553        public boolean write(Complex complex, LineOutputStreamWriter writer, int indent) throws IOException {
554                if (complex.predicates.isEmpty()) 
555                        return false;
556    if (complex.predicates.size() == 1 && complex.predicates.get(0).getObjects().size()== 1 && complex.predicates.get(0).getObjects().get(0) instanceof StringType && Utilities.noString(complex.predicates.get(0).comment)) {
557      writer.write(" "+complex.predicates.get(0).predicate+" "+((StringType) complex.predicates.get(0).getObjects().get(0)).value);
558                        return false;
559                }
560                String left = Utilities.padLeft("", ' ', indent);
561                int i = 0;
562                for (Predicate po : complex.predicates) {
563                        writer.write("\r\n");
564      boolean first = true;
565      for (Triple o : po.getObjects()) {
566        if (first) {
567          first = false;
568          writer.write(left+" "+po.getPredicate()+" ");
569        } else
570          writer.write(", ");
571        if (o instanceof StringType)
572          writer.write(((StringType) o).value);
573                        else {
574          writer.write("[");
575          if (write((Complex) o, writer, indent+2))
576            writer.write("\r\n"+left+" ]");
577                                else
578                                        writer.write(" ]");
579                        }
580      }
581                        i++;
582                        if (i < complex.predicates.size())
583                                writer.write(";");
584                        if (!Utilities.noString(po.comment)) 
585                                writer.write(" # "+escape(po.comment, false));
586                }
587                return true;      
588        }
589
590  public boolean write(Complex complex, StringBuilder b, int indent) throws Exception {
591    if (complex.predicates.isEmpty()) 
592      return false;
593    if (complex.predicates.size() == 1 && complex.predicates.get(0).getObjects().size()== 1 && complex.predicates.get(0).getObjects().get(0) instanceof StringType && Utilities.noString(complex.predicates.get(0).comment)) {
594      b.append(" "+complex.predicates.get(0).makelink()+" "+Utilities.escapeXml(((StringType) complex.predicates.get(0).getObjects().get(0)).value));
595      return false;
596    }
597    String left = Utilities.padLeft("", ' ', indent);
598    int i = 0;
599    for (Predicate po : complex.predicates) {
600      b.append("\r\n");
601      boolean first = true;
602      for (Triple o : po.getObjects()) {
603        if (first) {
604          first = false;
605          b.append(left+" "+po.makelink()+" ");
606        } else
607          b.append(", ");
608        if (o instanceof StringType)
609          b.append(Utilities.escapeXml(((StringType) o).value));
610      else {
611          b.append("[");
612          if (write((Complex) o, b, indent+2))
613          b.append(left+" ]");
614        else
615          b.append(" ]");
616      }
617      }
618      i++;
619      if (i < complex.predicates.size())
620        b.append(";");
621      if (!Utilities.noString(po.comment)) 
622        b.append(" # "+Utilities.escapeXml(escape(po.comment, false)));
623    }
624    return true;      
625  }
626
627
628  public abstract class TTLObject {
629                protected int line;
630                protected int col;
631
632    abstract public boolean hasValue(String value);
633
634    public int getLine() {
635      return line;
636    }
637
638    public int getCol() {
639      return col;
640    }
641    
642    
643        }
644
645
646        public class TTLLiteral extends TTLObject {
647
648                private String value;
649                private String type;
650                protected TTLLiteral(int line, int col) {
651                        this.line = line;
652                        this.col = col;
653                }
654    @Override
655    public boolean hasValue(String value) {
656      return value.equals(this.value);
657    }
658    public String getValue() {
659      return value;
660    }
661    public String getType() {
662      return type;
663    }
664
665        }
666
667        public class TTLURL extends TTLObject {
668                private String uri;
669
670                protected TTLURL(int line, int col) {
671                        this.line = line;
672                        this.col = col;
673                }
674
675                public String getUri() {
676                        return uri;
677                }
678
679    public void setUri(String uri) throws FHIRFormatError {
680                        if (!uri.matches(IRI_URL))
681        throw new FHIRFormatError("Illegal URI "+uri);
682                        this.uri = uri;
683                }
684
685    @Override
686    public boolean hasValue(String value) {
687      return value.equals(this.uri);
688        }
689  }
690
691  public class TTLList extends TTLObject {
692    private List<TTLObject> list = new ArrayList<Turtle.TTLObject>();
693
694    public TTLList(TTLObject obj) {
695      super();
696      list.add(obj);
697    }
698    
699    @Override
700    public boolean hasValue(String value) {
701      for (TTLObject obj : list)
702        if (obj.hasValue(value))
703          return true;
704      return false;
705    }
706
707    public List<TTLObject> getList() {
708      return list;
709    }
710    
711  }
712        public class TTLComplex extends TTLObject {
713                private Map<String, TTLObject> predicates = new HashMap<String, Turtle.TTLObject>();
714                protected TTLComplex(int line, int col) {
715                        this.line = line;
716                        this.col = col;
717                }
718    public Map<String, TTLObject> getPredicates() {
719      return predicates;
720        }
721    @Override
722    public boolean hasValue(String value) {
723      return false;
724    }
725    public void addPredicate(String uri, TTLObject obj) {
726      if (!predicates.containsKey(uri))
727        predicates.put(uri, obj);
728      else {
729        TTLObject eo = predicates.get(uri);
730        TTLList list = null; 
731        if (eo instanceof TTLList) 
732          list = (TTLList) eo; 
733        else {
734          list = new TTLList(eo);
735          predicates.put(uri, list);
736        }
737        list.list.add(obj);
738      }
739    }
740    public void addPredicates(Map<String, TTLObject> values) {
741      for (String s : values.keySet()) {
742        addPredicate(s, values.get(s));
743      }
744    }
745  }
746
747  private Map<TTLURL, TTLComplex> objects = new HashMap<TTLURL, Turtle.TTLComplex>();
748
749  private Object base;
750
751        public enum LexerTokenType {
752                TOKEN, // [, ], :, @
753                WORD, // a word 
754                URI, // a URI <>
755                LITERAL // "..."
756        }
757
758        public class Lexer {
759
760
761                private String source;
762                private LexerTokenType type;
763                private int cursor, line, col, startLine, startCol;
764                private String token;
765
766    public Lexer(String source) throws FHIRFormatError {
767                        this.source = source;
768                        cursor = 0;
769                        line = 1;
770                        col = 1;
771      readNext(false);
772                }
773
774                private void skipWhitespace() {
775                        while (cursor < source.length()) {
776                                char ch = source.charAt(cursor);
777                                if (Character.isWhitespace(ch))
778                                        grab();
779                                else if (ch == '#') {
780                                        ch = grab();
781                                        while (cursor < source.length()) {
782                                                ch = grab();
783                                                if (ch == '\r' || ch == '\n') {
784                                                        break;
785                                                }
786                                        }          
787                                } else
788                                        break;
789                        }
790                }
791
792                private char grab() {
793                        char c = source.charAt(cursor);
794                        if (c == '\n') {
795                                line++;
796                                col = 1;
797                        } else
798                                col++;
799
800                        cursor++;
801                        return c;
802                }
803
804    private void readNext(boolean postColon) throws FHIRFormatError {    
805                        token = null;
806                        type = null;
807                        skipWhitespace();
808                        if (cursor >= source.length())
809                                return;
810                        startLine = line;
811                        startCol = col;
812                        char ch = grab();
813                        StringBuilder b = new StringBuilder();
814                        switch (ch) {
815                        case '@':
816                        case '.': 
817                        case ':': 
818                        case ';': 
819                        case '^': 
820                        case ',': 
821                        case ']': 
822                        case '[': 
823                        case '(': 
824                        case ')': 
825                                type = LexerTokenType.TOKEN;
826                                b.append(ch);
827                                token = b.toString();
828                                return;
829                        case '<': 
830                                while (cursor < source.length()) {
831                                        ch = grab();
832                                        if (ch == '>')
833                                                break;
834                                        b.append(ch);
835                                }
836                                type = LexerTokenType.URI;
837                                token = unescape(b.toString(), true);
838                                return;        
839                        case '"': 
840                                b.append(ch);
841                                String end = "\"";
842                                while (cursor < source.length()) {
843                                        ch = grab();
844          if (b.length() == 2 && ch != '"' && b.equals("\"\"")) {
845                                                cursor--;
846                                                break;
847                                        }
848                                        b.append(ch);
849          if (ch == '"')
850                                        if (b.toString().equals("\"\"\""))
851                                                end = "\"\"\"";
852            else if (!b.toString().equals("\"\"") && b.toString().endsWith(end) && !b.toString().endsWith("\\"+end))
853                                                break;
854                                }
855                                type = LexerTokenType.LITERAL;
856                                token = unescape(b.toString().substring(end.length(), b.length()-end.length()), false);
857                                return;        
858                        case '\'': 
859                                b.append(ch);
860                                end = "'";
861                                while (cursor < source.length()) {
862                                        ch = grab();
863                                        if (b.equals("''") && ch != '\'') {
864                                                cursor--;
865                                                break;
866                                        }
867                                        b.append(ch);
868                                        if (b.toString().equals("'''"))
869                                                end = "'''";
870                                        else if (!b.toString().equals("''") && b.toString().endsWith(end))
871                                                break;
872                                }
873                                type = LexerTokenType.LITERAL;
874                                token = unescape(b.toString().substring(end.length(), b.length()-end.length()), false);
875                                return;        
876                        default:
877        if (Utilities.charInRange(ch, '0', '9') || Utilities.charInRange(ch, 'a', 'z') || Utilities.charInRange(ch, 'A', 'Z') || Utilities.charInSet(ch, '_', '-', '+', '%')) {
878                                        b.append(ch);
879                                        while (cursor < source.length()) {
880                                                ch = grab();
881            //                                          if (!Utilities.charInRange(ch, '0', '9') && !Utilities.charInRange(ch, 'a', 'z') && !Utilities.charInRange(ch, 'A', 'Z') && !Utilities.charInSet(ch, '_', '-', '+', '.', '\\', '#'))
882            if (Character.isWhitespace(ch) || Utilities.charInSet(ch, ';', ']', ')', '~') || (( ch == ':') && !postColon))
883                                                        break;
884                                                b.append(ch);
885                                        }
886                                        type = LexerTokenType.WORD;
887                                        token = b.toString();
888                                        cursor--;
889                                        return;        
890                                } else
891                                        throw error("unexpected lexer char "+ch);
892                        }
893                }
894
895    private String unescape(String s, boolean isUri) throws FHIRFormatError {
896                        StringBuilder b = new StringBuilder();
897                        int i = 0;
898                        while (i < s.length()) {
899                                char ch = s.charAt(i);
900                                if (ch == '\\' && i < s.length()-1) {
901                                        i++;
902                                        switch (s.charAt(i)) {
903                                        case 't': 
904                                                b.append('\t');
905                                                break;
906                                        case 'r':
907                                                b.append('\r');
908                                                break;
909                                        case 'n': 
910                                                b.append('\n');
911                                                break;
912                                        case 'f': 
913                                                b.append('\f');
914                                                break;
915                                        case '\'':
916                                                b.append('\'');
917                                                break;
918          case '\"':
919            b.append('\"');
920            break;
921                                        case '\\': 
922                                                b.append('\\');
923                                                break;
924                                        case '/': 
925                                                b.append('\\');
926                                                break;
927                                        case 'U':
928                                        case 'u':
929                                                i++;
930                                                int l = 4;
931                                                int uc = Integer.parseInt(s.substring(i, i+l), 16);
932                                                if (uc < (isUri ? 33 : 32)) {
933                                                        l = 8;
934                                                        uc = Integer.parseInt(s.substring(i, i+8), 16);
935                                                }
936                                                if (uc < (isUri ? 33 : 32) || (isUri && (uc == 0x3C || uc == 0x3E)))
937              throw new FHIRFormatError("Illegal unicode character");
938                                                b.append((char) uc);
939                                                i = i + l;
940                                                break;
941                                        default:
942            throw new FHIRFormatError("Unknown character escape \\"+s.charAt(i));
943                                        }
944                                } else {
945                                        b.append(ch);
946                                }
947        i++;
948                        }
949                        return b.toString();
950                }
951
952                public boolean done() {
953                        return type == null;
954                }
955
956    public String next(LexerTokenType type, boolean postColon) throws FHIRFormatError {
957                        if (type != null && this.type != type)
958                                throw error("Unexpected type. Found "+this.type.toString()+" looking for a "+type.toString());
959                        String res = token;
960      readNext(postColon);
961                        return res;
962                }
963
964                public String peek() throws Exception {
965                        return token;
966                }
967
968                public LexerTokenType peekType() {
969                        return type;
970                }
971
972    public void token(String token) throws FHIRFormatError {
973                        if (!token.equals(this.token))
974                                throw error("Unexpected word "+this.token+" looking for "+token);
975      next(LexerTokenType.TOKEN, token.equals(":"));
976                }
977
978                public void word(String word) throws Exception {
979                        if (!word.equals(this.token))
980                                throw error("Unexpected word "+this.token+" looking for "+word);
981      next(LexerTokenType.WORD, false);
982                }
983
984    public String word() throws FHIRFormatError {
985                        String t = token;
986      next(LexerTokenType.WORD, false);
987                        return t;
988                }
989
990    public String uri() throws FHIRFormatError {
991                        if (this.type != LexerTokenType.URI)
992                                throw error("Unexpected type. Found "+this.type.toString()+" looking for a URI");
993                        String t = token;
994      next(LexerTokenType.URI, false);
995                        return t;
996                }
997
998    public String literal() throws FHIRFormatError {
999                        if (this.type != LexerTokenType.LITERAL)
1000                                throw error("Unexpected type. Found "+this.type.toString()+" looking for a Literal");
1001                        String t = token;
1002      next(LexerTokenType.LITERAL, false);
1003                        return t;
1004                }
1005
1006                public boolean peek(LexerTokenType type, String token) {
1007                        return this.type == type && this.token.equals(token);
1008                }
1009
1010    public FHIRFormatError error(String message) {
1011      return new FHIRFormatError("Syntax Error parsing Turtle on line "+Integer.toString(line)+" col "+Integer.toString(col)+": "+message);
1012                }
1013
1014        }
1015        //      
1016        //      public void importTtl(Section sct, String ttl) throws Exception {
1017        //              if (!Utilities.noString(ttl)) {
1018        //                      //        System.out.println("import ttl: "+ttl);
1019        //                      Lexer lexer = new Lexer(ttl);
1020        //                      String subject = null;
1021        //                      String predicate = null;
1022        //                      while (!lexer.done()) {
1023        //                              if (subject == null)
1024        //                                      subject = lexer.next();
1025        //                              if (predicate == null)
1026        //                                      predicate = lexer.next();
1027        //                              if (lexer.peekType() == null) {
1028        //                                      throw new Error("Unexpected end of input parsing turtle");
1029        //                              } if (lexer.peekType() == LexerTokenType.TOKEN) {
1030        //                                      sct.triple(subject, predicate, lexer.next());
1031        //                              } else if (lexer.peek() == null) {
1032        //                                      throw new Error("Unexected - turtle lexer found no token");
1033        //                              } else if (lexer.peek().equals("[")) {
1034        //                                      sct.triple(subject, predicate, importComplex(lexer));
1035        //                              } else
1036        //                                      throw new Exception("Not done yet");
1037        //                              String n = lexer.next();
1038        //                              if (Utilities.noString(n))
1039        //                                      break;
1040        //                              if (n.equals(".")) {
1041        //                                      subject = null;
1042        //                                      predicate = null;
1043        //                              } else if (n.equals(";")) {
1044        //                                      predicate = null;
1045        //                              } else if (!n.equals(","))
1046        //                                      throw new Exception("Unexpected token "+n);          
1047        //                      }
1048        //              }
1049        //      }
1050
1051  public void parse(String source) throws FHIRFormatError {
1052                prefixes.clear();
1053                prefixes.put("_", "urn:uuid:4425b440-2c33-4488-b9fc-cf9456139995#");
1054                parse(new Lexer(source));
1055        }
1056
1057  private void parse(Lexer lexer) throws FHIRFormatError {
1058                boolean doPrefixes = true;
1059                while (!lexer.done()) {
1060      if (doPrefixes && (lexer.peek(LexerTokenType.TOKEN, "@") || lexer.peek(LexerTokenType.WORD, "PREFIX") || lexer.peek(LexerTokenType.WORD, "BASE"))) {
1061                                boolean sparqlStyle = false;
1062        boolean base = false;
1063                                if (lexer.peek(LexerTokenType.TOKEN, "@")) {
1064                                        lexer.token("@");
1065          String p = lexer.word();
1066          if (p.equals("base"))
1067            base = true;
1068          else if (!p.equals("prefix"))
1069            throw new FHIRFormatError("Unexpected token "+p);  
1070                                } else {
1071                                        sparqlStyle = true;
1072          String p = lexer.word();
1073          if (p.equals("BASE"))
1074            base = true;
1075          else if (!p.equals("PREFIX"))
1076            throw new FHIRFormatError("Unexpected token "+p);  
1077        }
1078        String prefix = null; 
1079        if (!base) {
1080          prefix = lexer.peekType() == LexerTokenType.WORD ? lexer.next(LexerTokenType.WORD, false) : null;
1081                                lexer.token(":");
1082        }
1083        String url = lexer.next(LexerTokenType.URI, false);
1084                                if (!sparqlStyle)
1085                                        lexer.token(".");
1086        if (!base)
1087                                prefix(prefix, url);
1088        else if (this.base == null)
1089          this.base = url;
1090        else
1091          throw new FHIRFormatError("Duplicate @base");  
1092                        } else if (lexer.peekType() == LexerTokenType.URI) {
1093                                doPrefixes = false;
1094                                TTLURL uri = new TTLURL(lexer.startLine, lexer.startCol);
1095                                uri.setUri(lexer.uri());
1096                                TTLComplex complex = parseComplex(lexer);
1097                                objects.put(uri, complex);
1098                                lexer.token(".");
1099                        } else if (lexer.peekType() == LexerTokenType.WORD) {
1100                                doPrefixes = false;
1101                                TTLURL uri = new TTLURL(lexer.startLine, lexer.startCol);
1102                                String pfx = lexer.word();
1103                                if (!prefixes.containsKey(pfx))
1104          throw new FHIRFormatError("Unknown prefix "+pfx);
1105                                lexer.token(":");
1106                                uri.setUri(prefixes.get(pfx)+lexer.word());
1107                                TTLComplex complex = parseComplex(lexer);
1108                                objects.put(uri, complex);
1109                                lexer.token(".");
1110                        } else if (lexer.peek(LexerTokenType.TOKEN, ":")) {
1111                                doPrefixes = false;
1112                                TTLURL uri = new TTLURL(lexer.startLine, lexer.startCol);
1113                                lexer.token(":");
1114                                if (!prefixes.containsKey(null))
1115          throw new FHIRFormatError("Unknown prefix ''");
1116                                uri.setUri(prefixes.get(null)+lexer.word());
1117                                TTLComplex complex = parseComplex(lexer);
1118                                objects.put(uri, complex);
1119                                lexer.token(".");
1120                        } else if (lexer.peek(LexerTokenType.TOKEN, "[")) {
1121                                doPrefixes = false;
1122                                lexer.token("[");
1123                                TTLComplex bnode = parseComplex(lexer);
1124                                lexer.token("]");
1125                                TTLComplex complex = null;
1126        if (!lexer.peek(LexerTokenType.TOKEN, ".")) {
1127                                        complex = parseComplex(lexer);
1128          // at this point, we collapse bnode and complex, and give bnode a fictional identity
1129          bnode.addPredicates(complex.predicates);
1130        }
1131        
1132        objects.put(anonymousId(), bnode);
1133                                lexer.token(".");
1134                        } else 
1135                                throw lexer.error("Unknown token "+lexer.token);
1136                }
1137        }
1138
1139  private TTLURL anonymousId() throws FHIRFormatError {
1140    TTLURL url = new TTLURL(-1, -1);
1141    url.setUri("urn:uuid:"+UUID.randomUUID().toString().toLowerCase());
1142    return url;
1143  }
1144
1145  private TTLComplex parseComplex(Lexer lexer) throws FHIRFormatError {
1146                TTLComplex result = new TTLComplex(lexer.startLine, lexer.startCol);
1147
1148                boolean done = lexer.peek(LexerTokenType.TOKEN, "]");
1149                while (!done) {
1150                        String uri = null;
1151                        if (lexer.peekType() == LexerTokenType.URI)
1152                                uri = lexer.uri();
1153                        else {
1154                                String t = lexer.peekType() == LexerTokenType.WORD ? lexer.word() : null;
1155                                if (lexer.type == LexerTokenType.TOKEN && lexer.token.equals(":")) {
1156                                        lexer.token(":");
1157                                        if (!prefixes.containsKey(t))
1158            throw new FHIRFormatError("unknown prefix "+t);
1159                                        uri = prefixes.get(t)+lexer.word();
1160                                } else if (t.equals("a"))
1161                                        uri = prefixes.get("rdfs")+"type";
1162                                else
1163                                        throw lexer.error("unexpected token");
1164                        }
1165
1166                        boolean inlist = false;
1167                        if (lexer.peek(LexerTokenType.TOKEN, "(")) {
1168                                inlist = true;
1169                                lexer.token("(");
1170                        }
1171
1172                        boolean rpt = false;
1173                        do {
1174                                if (lexer.peek(LexerTokenType.TOKEN, "[")) {
1175                                        lexer.token("[");
1176          result.addPredicate(uri, parseComplex(lexer));
1177                                        lexer.token("]");
1178                                } else if (lexer.peekType() == LexerTokenType.URI) {
1179                                        TTLURL u = new TTLURL(lexer.startLine, lexer.startCol);
1180                                        u.setUri(lexer.uri());
1181          result.addPredicate(uri, u);
1182                                } else if (lexer.peekType() == LexerTokenType.LITERAL) {
1183                                        TTLLiteral u = new TTLLiteral(lexer.startLine, lexer.startCol);
1184                                        u.value = lexer.literal();
1185                                        if (lexer.peek(LexerTokenType.TOKEN, "^")) {
1186                                                lexer.token("^");
1187                                                lexer.token("^");
1188                                                if (lexer.peekType() == LexerTokenType.URI) {
1189                                                        u.type = lexer.uri();
1190                                                } else {
1191                                                        String l = lexer.word();
1192                                                        lexer.token(":");
1193                                                        u.type = prefixes.get(l)+ lexer.word();
1194                                                }
1195                                        }
1196                                        if (lexer.peek(LexerTokenType.TOKEN, "@")) {
1197                                                //lang tag - skip it 
1198                                                lexer.token("@");
1199            String lang = lexer.word();
1200            if (!lang.matches(LANG_REGEX)) {
1201              throw new FHIRFormatError("Invalid Language tag "+lang);
1202            }
1203                                        }
1204          result.addPredicate(uri, u);
1205                                } else if (lexer.peekType() == LexerTokenType.WORD || lexer.peek(LexerTokenType.TOKEN, ":")) {
1206                                        int sl = lexer.startLine;
1207                                        int sc = lexer.startCol;
1208                                        String pfx = lexer.peekType() == LexerTokenType.WORD ? lexer.word() : null;
1209                                        if (Utilities.isDecimal(pfx, true, true) && !lexer.peek(LexerTokenType.TOKEN, ":")) {
1210                                                TTLLiteral u = new TTLLiteral(sl, sc);
1211                                                u.value = pfx;
1212            result.addPredicate(uri, u);                                        
1213                                        } else if (("false".equals(pfx) || "true".equals(pfx)) && !lexer.peek(LexerTokenType.TOKEN, ":")) {
1214                                                TTLLiteral u = new TTLLiteral(sl, sc);
1215                                                u.value = pfx;
1216            result.addPredicate(uri, u);                                        
1217                                        } else {
1218                                                if (!prefixes.containsKey(pfx))
1219              throw new FHIRFormatError("Unknown prefix "+(pfx == null ? "''" : pfx));                                          
1220                                                TTLURL u = new TTLURL(sl, sc);
1221                                                lexer.token(":");
1222                                                u.setUri(prefixes.get(pfx)+lexer.word());
1223            result.addPredicate(uri, u);
1224                                        } 
1225                                } else if (!lexer.peek(LexerTokenType.TOKEN, ";") && (!inlist || !lexer.peek(LexerTokenType.TOKEN, ")"))) {
1226          throw new FHIRFormatError("unexpected token "+lexer.token);
1227                                }
1228
1229                                if (inlist)
1230                                        rpt = !lexer.peek(LexerTokenType.TOKEN, ")");
1231                                else {
1232                                        rpt = lexer.peek(LexerTokenType.TOKEN, ",");
1233                                        if (rpt)
1234            lexer.readNext(false);
1235                                }
1236                        } while (rpt);
1237                        if (inlist)
1238                                lexer.token(")");
1239
1240                        if (lexer.peek(LexerTokenType.TOKEN, ";")) {
1241        while ((lexer.peek(LexerTokenType.TOKEN, ";")))
1242                                lexer.token(";");
1243        done = lexer.peek(LexerTokenType.TOKEN, ".") || lexer.peek(LexerTokenType.TOKEN, "]");
1244                        } else {
1245                                done = true;
1246                        }
1247                }
1248                return result;
1249        }
1250
1251  public Map<TTLURL, TTLComplex> getObjects() {
1252    return objects;
1253  }
1254
1255  public TTLComplex getObject(String url) {
1256    for (TTLURL t : objects.keySet()) {
1257      if (t.getUri().equals(url))
1258        return objects.get(t);
1259    }
1260    return null;
1261  }
1262
1263        //      public void parseFragment(Lexer lexer) throws Exception {
1264        //              lexer.next(); // read [
1265        //              Complex obj = new Complex();
1266        //              while (!lexer.peek().equals("]")) {
1267        //                      String predicate = lexer.next();
1268        //                      if (lexer.peekType() == LexerTokenType.TOKEN || lexer.peekType() == LexerTokenType.LITERAL) {
1269        //                              obj.predicate(predicate, lexer.next());
1270        //                      } else if (lexer.peek().equals("[")) {
1271        //                              obj.predicate(predicate, importComplex(lexer));
1272        //                      } else
1273        //                              throw new Exception("Not done yet");
1274        //                      if (lexer.peek().equals(";")) 
1275        //                              lexer.next();
1276        //              }
1277        //              lexer.next(); // read ]
1278        //              //return obj;
1279        //      }
1280        //
1281        //      public void importTtl(Section sct, String ttl) throws Exception {
1282        //              if (!Utilities.noString(ttl)) {
1283        //                      //        System.out.println("import ttl: "+ttl);
1284        //                      Lexer lexer = new Lexer(ttl);
1285        //                      String subject = null;
1286        //                      String predicate = null;
1287        //                      while (!lexer.done()) {
1288        //                              if (subject == null)
1289        //                                      subject = lexer.next();
1290        //                              if (predicate == null)
1291        //                                      predicate = lexer.next();
1292        //                              if (lexer.peekType() == null) {
1293        //                                      throw new Error("Unexpected end of input parsing turtle");
1294        //                              } if (lexer.peekType() == LexerTokenType.TOKEN) {
1295        //                                      sct.triple(subject, predicate, lexer.next());
1296        //                              } else if (lexer.peek() == null) {
1297        //                                      throw new Error("Unexected - turtle lexer found no token");
1298        //                              } else if (lexer.peek().equals("[")) {
1299        //                                      sct.triple(subject, predicate, importComplex(lexer));
1300        //                              } else
1301        //                                      throw new Exception("Not done yet");
1302        //                              String n = lexer.next();
1303        //                              if (Utilities.noString(n))
1304        //                                      break;
1305        //                              if (n.equals(".")) {
1306        //                                      subject = null;
1307        //                                      predicate = null;
1308        //                              } else if (n.equals(";")) {
1309        //                                      predicate = null;
1310        //                              } else if (!n.equals(","))
1311        //                                      throw new Exception("Unexpected token "+n);          
1312        //                      }
1313        //              }
1314        //}
1315
1316        //      private Complex importComplex(Lexer lexer) throws Exception {
1317        //              lexer.next(); // read [
1318        //              Complex obj = new Complex();
1319        //              while (!lexer.peek().equals("]")) {
1320        //                      String predicate = lexer.next();
1321        //                      if (lexer.peekType() == LexerTokenType.TOKEN || lexer.peekType() == LexerTokenType.LITERAL) {
1322        //                              obj.predicate(predicate, lexer.next());
1323        //                      } else if (lexer.peek().equals("[")) {
1324        //                              obj.predicate(predicate, importComplex(lexer));
1325        //                      } else
1326        //                              throw new Exception("Not done yet");
1327        //                      if (lexer.peek().equals(";")) 
1328        //                              lexer.next();
1329        //              }
1330        //              lexer.next(); // read ]
1331        //              return obj;
1332        //      }
1333
1334}