001package org.hl7.fhir.utilities.turtle; 002 003/* 004 Copyright (c) 2011+, HL7, Inc. 005 All rights reserved. 006 007 Redistribution and use in source and binary forms, with or without modification, 008 are permitted provided that the following conditions are met: 009 010 * Redistributions of source code must retain the above copyright notice, this 011 list of conditions and the following disclaimer. 012 * Redistributions in binary form must reproduce the above copyright notice, 013 this list of conditions and the following disclaimer in the documentation 014 and/or other materials provided with the distribution. 015 * Neither the name of HL7 nor the names of its contributors may be used to 016 endorse or promote products derived from this software without specific 017 prior written permission. 018 019 THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" AND 020 ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED 021 WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE DISCLAIMED. 022 IN NO EVENT SHALL THE COPYRIGHT HOLDER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, 023 INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT 024 NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR 025 PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, 026 WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) 027 ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE 028 POSSIBILITY OF SUCH DAMAGE. 029 030 */ 031 032 033 034import java.io.IOException; 035import java.io.OutputStream; 036import java.io.OutputStreamWriter; 037import java.io.UnsupportedEncodingException; 038import java.util.ArrayList; 039import java.util.Collections; 040import java.util.HashMap; 041import java.util.HashSet; 042import java.util.List; 043import java.util.Map; 044import java.util.Set; 045import java.util.UUID; 046 047import org.hl7.fhir.exceptions.FHIRFormatError; 048import org.hl7.fhir.utilities.Utilities; 049 050public class Turtle { 051 052 public static final String GOOD_IRI_CHAR = "a-zA-Z0-9\u00A0-\uFFFE"; 053 054 public static final String IRI_URL = "(([a-z])+:)*((%[0-9a-fA-F]{2})|[&'\\(\\)*+,;:@_~?!$\\/\\-\\#.\\="+GOOD_IRI_CHAR+"])+"; 055 public static final String LANG_REGEX = "[a-z]{2}(\\-[a-zA-Z]{2})?"; 056 057 // Object model 058 public abstract class Triple { 059 private String uri; 060 } 061 062 public class StringType extends Triple { 063 private String value; 064 065 public StringType(String value) { 066 super(); 067 this.value = value; 068 } 069 } 070 071 public class Complex extends Triple { 072 protected List<Predicate> predicates = new ArrayList<Predicate>(); 073 074 public Complex predicate(String predicate, String object) { 075 predicateSet.add(predicate); 076 objectSet.add(object); 077 return predicate(predicate, new StringType(object)); 078 } 079 080 public Complex linkedPredicate(String predicate, String object, String link, String comment) { 081 predicateSet.add(predicate); 082 objectSet.add(object); 083 return linkedPredicate(predicate, new StringType(object), link, comment); 084 } 085 086 public Complex predicate(String predicate, Triple object) { 087 Predicate p = getPredicate(predicate); 088 if (p == null) { 089 p = new Predicate(); 090 p.predicate = predicate; 091 predicateSet.add(predicate); 092 predicates.add(p); 093 } 094 if (object instanceof StringType) 095 objectSet.add(((StringType) object).value); 096 p.objects.add(object); 097 return this; 098 } 099 100 protected Predicate getPredicate(String predicate) { 101 for (Predicate p : predicates) 102 if (p.predicate.equals(predicate)) 103 return p; 104 return null; 105 } 106 107 public Complex linkedPredicate(String predicate, Triple object, String link, String comment) { 108 Predicate p = getPredicate(predicate); 109 if (p == null) { 110 p = new Predicate(); 111 p.predicate = predicate; 112 p.link = link; 113 p.comment = comment; 114 predicateSet.add(predicate); 115 predicates.add(p); 116 } 117 if (object instanceof StringType) 118 objectSet.add(((StringType) object).value); 119 p.objects.add(object); 120 return this; 121 } 122 123 public Complex predicate(String predicate) { 124 predicateSet.add(predicate); 125 Complex c = complex(); 126 predicate(predicate, c); 127 return c; 128 } 129 130 public Complex linkedPredicate(String predicate, String link, String comment) { 131 predicateSet.add(predicate); 132 Complex c = complex(); 133 linkedPredicate(predicate, c, link, comment); 134 return c; 135 } 136 137 public void prefix(String code, String url) { 138 Turtle.this.prefix(code, url); 139 } 140 } 141 142 private class Predicate { 143 protected String predicate; 144 protected String link; 145 protected List<Triple> objects = new ArrayList<Turtle.Triple>(); 146 protected String comment; 147 148 public String getPredicate() { 149 return predicate; 150 } 151 public String makelink() { 152 if (link == null) 153 return predicate; 154 else 155 return "<a href=\""+link+"\">"+Utilities.escapeXml(predicate)+"</a>"; 156 } 157 158 public List<Triple> getObjects() { 159 return objects; 160 } 161 public String getComment() { 162 return comment; 163 } 164 } 165 166 public class Subject extends Complex { 167 private String id; 168 169 public Predicate predicate(String predicate, Triple object, String comment) { 170 Predicate p = getPredicate(predicate); 171 if (p == null) { 172 p = new Predicate(); 173 p.predicate = predicate; 174 predicateSet.add(predicate); 175 predicates.add(p); 176 p.comment = comment; 177 } 178 if (object instanceof StringType) 179 objectSet.add(((StringType) object).value); 180 p.objects.add(object); 181 return p; 182 } 183 184 public void comment(String comment) { 185 if (!Utilities.noString(comment)) { 186 predicate("rdfs:comment", literal(comment)); 187 predicate("dcterms:description", literal(comment)); 188 } 189 } 190 191 public void label(String label) { 192 if (!Utilities.noString(label)) { 193 predicate("rdfs:label", literal(label)); 194 predicate("dc:title", literal(label)); 195 } 196 } 197 198 } 199 200 public class Section { 201 private List<String> comments = new ArrayList<>(); 202 private String name; 203 private List<Subject> subjects = new ArrayList<Subject>(); 204 205 public Subject triple(String subject, String predicate, String object, String comment) { 206 return triple(subject, predicate, new StringType(object), comment); 207 } 208 209 public Subject triple(String subject, String predicate, String object) { 210 return triple(subject, predicate, new StringType(object)); 211 } 212 213 public Subject triple(String subject, String predicate, Triple object) { 214 return triple(subject, predicate, object, null); 215 } 216 217 public Subject triple(String subject, String predicate, Triple object, String comment) { 218 Subject s = subject(subject); 219 s.predicate(predicate, object, comment); 220 return s; 221 } 222 223 public void comment(String subject, String comment) { 224 triple(subject, "rdfs:comment", literal(comment)); 225 triple(subject, "dcterms:description", literal(comment)); 226 } 227 228 public void label(String subject, String comment) { 229 triple(subject, "rdfs:label", literal(comment)); 230 triple(subject, "dc:title", literal(comment)); 231 } 232 233 public Subject subject(String subject) { 234 for (Subject ss : subjects) 235 if (ss.id.equals(subject)) 236 return ss; 237 Subject s = new Subject(); 238 s.id = subject; 239 subjects.add(s); 240 return s; 241 } 242 243 public boolean hasSubject(String subject) { 244 for (Subject ss : subjects) 245 if (ss.id.equals(subject)) 246 return true; 247 return false; 248 } 249 250 public void stringComment(String cnt) { 251 comments.add(cnt); 252 } 253 } 254 255 private List<Section> sections = new ArrayList<Section>(); 256 protected Set<String> subjectSet = new HashSet<String>(); 257 protected Set<String> predicateSet = new HashSet<String>(); 258 protected Set<String> objectSet = new HashSet<String>(); 259 protected Map<String, String> prefixes = new HashMap<String, String>(); 260 261 public void prefix(String code, String url) { 262 prefixes.put(code, url); 263 } 264 265 protected boolean hasSection(String sn) { 266 for (Section s : sections) 267 if (s.name.equals(sn)) 268 return true; 269 return false; 270 271 } 272 273 public Section section(String sn) { 274 if (hasSection(sn)) 275 throw new Error("Duplicate section name "+sn); 276 Section s = new Section(); 277 s.name = sn; 278 sections.add(s); 279 return s; 280 } 281 282 protected String matches(String url, String prefixUri, String prefix) { 283 if (url.startsWith(prefixUri)) { 284 prefixes.put(prefix, prefixUri); 285 return prefix+":"+escape(url.substring(prefixUri.length()), false); 286 } 287 return null; 288 } 289 290 protected Complex complex() { 291 return new Complex(); 292 } 293 294 private void checkPrefix(Triple object) { 295 if (object instanceof StringType) 296 checkPrefix(((StringType) object).value); 297 else { 298 Complex obj = (Complex) object; 299 for (Predicate po : obj.predicates) { 300 checkPrefix(po.getPredicate()); 301 for (Triple o : po.getObjects()) 302 checkPrefix(o); 303 } 304 } 305 } 306 307 protected void checkPrefix(String pname) { 308 if (pname.startsWith("(")) 309 return; 310 if (pname.startsWith("\"")) 311 return; 312 if (pname.startsWith("<")) 313 return; 314 315 if (pname.contains(":")) { 316 String prefix = pname.substring(0, pname.indexOf(":")); 317 if (!prefixes.containsKey(prefix) && !prefix.equals("http")&& !prefix.equals("urn")) 318 throw new Error("undefined prefix "+prefix); 319 } 320 } 321 322 protected StringType literal(String s) { 323 return new StringType("\""+escape(s, true)+"\""); 324 } 325 326 protected StringType literalTyped(String s, String t) { 327 return new StringType("\""+escape(s, true)+"\"^^xs:"+t); 328 } 329 330 public static String escape(String s, boolean string) { 331 if (s == null) 332 return ""; 333 334 StringBuilder b = new StringBuilder(); 335 for (char c : s.toCharArray()) { 336 if (c == '\r') 337 b.append("\\r"); 338 else if (c == '\n') 339 b.append("\\n"); 340 else if (c == '"') 341 b.append("\\\""); 342 else if (c == '\\') 343 b.append("\\\\"); 344 else if (c == '/' && !string) 345 b.append("\\/"); 346 else 347 b.append(c); 348 } 349 return b.toString(); 350 } 351 352 protected String pctEncode(String s) { 353 if (s == null) 354 return ""; 355 356 StringBuilder b = new StringBuilder(); 357 for (char c : s.toCharArray()) { 358 if (c >= 'A' && c <= 'Z') 359 b.append(c); 360 else if (c >= 'a' && c <= 'z') 361 b.append(c); 362 else if (c >= '0' && c <= '9') 363 b.append(c); 364 else if (c == '.') 365 b.append(c); 366 else 367 b.append("%"+Integer.toHexString(c)); 368 } 369 return b.toString(); 370 } 371 372 protected List<String> sorted(Set<String> keys) { 373 List<String> names = new ArrayList<String>(); 374 names.addAll(keys); 375 Collections.sort(names); 376 return names; 377 } 378 379 public void commit(OutputStream destination, boolean header) throws IOException { 380 LineOutputStreamWriter writer = new LineOutputStreamWriter(destination); 381 commitPrefixes(writer, header); 382 for (Section s : sections) { 383 commitSection(writer, s); 384 } 385 writer.ln("# -------------------------------------------------------------------------------------"); 386 writer.ln(); 387 writer.flush(); 388 writer.close(); 389 } 390 391 public String asHtml() throws Exception { 392 StringBuilder b = new StringBuilder(); 393 b.append("<pre class=\"rdf\" style=\"white-space: pre; overflow: hidden\"><code class=\"language-turtle\">\r\n"); 394 commitPrefixes(b); 395 for (Section s : sections) { 396 commitSection(b, s); 397 } 398 b.append("</code></pre>\r\n"); 399 b.append("\r\n"); 400 return b.toString(); 401 } 402 403 private void commitPrefixes(LineOutputStreamWriter writer, boolean header) throws IOException { 404 if (header) { 405 writer.ln("# FHIR Sub-definitions"); 406 writer.write("# This is work in progress, and may change rapidly \r\n"); 407 writer.ln(); 408 writer.write("# A note about policy: the focus here is providing the knowledge from \r\n"); 409 writer.write("# the FHIR specification as a set of triples for knowledge processing. \r\n"); 410 writer.write("# Where appopriate, predicates defined external to FHIR are used. \"Where \r\n"); 411 writer.write("# appropriate\" means that the predicates are a faithful representation \r\n"); 412 writer.write("# of the FHIR semantics, and do not involve insane (or owful) syntax. \r\n"); 413 writer.ln(); 414 writer.write("# Where the community agrees on additional predicate statements (such \r\n"); 415 writer.write("# as OWL constraints) these are added in addition to the direct FHIR \r\n"); 416 writer.write("# predicates \r\n"); 417 writer.ln(); 418 writer.write("# This it not a formal ontology, though it is possible it may start to become one eventually\r\n"); 419 writer.ln(); 420 writer.write("# this file refers to concepts defined in rim.ttl and to others defined elsewhere outside HL7 \r\n"); 421 writer.ln(); 422 } 423 for (String p : sorted(prefixes.keySet())) 424 writer.ln("@prefix "+p+": <"+prefixes.get(p)+"> ."); 425 writer.ln(); 426 if (header) { 427 writer.ln("# Predicates used in this file:"); 428 for (String s : sorted(predicateSet)) 429 writer.ln(" # "+s); 430 writer.ln(); 431 } 432 } 433 434 private void commitPrefixes(StringBuilder b) throws Exception { 435 for (String p : sorted(prefixes.keySet())) 436 b.append("@prefix "+p+": <"+prefixes.get(p)+"> .\r\n"); 437 b.append("\r\n"); 438 } 439 440 // private String lastSubject = null; 441 // private String lastComment = ""; 442 443 private void commitSection(LineOutputStreamWriter writer, Section section) throws IOException { 444 writer.ln("# - "+section.name+" "+Utilities.padLeft("", '-', 75-section.name.length())); 445 writer.ln(); 446 if (!section.comments.isEmpty()) { 447 for (String s : section.comments) { 448 writer.ln("# "+s); 449 } 450 writer.ln(); 451 } 452 for (Subject sbj : section.subjects) { 453 if (Utilities.noString(sbj.id)) { 454 writer.write("["); 455 } else { 456 writer.write(sbj.id); 457 writer.write(" "); 458 } 459 int i = 0; 460 461 for (Predicate p : sbj.predicates) { 462 // 463 writer.write(p.getPredicate()); 464 writer.write(" "); 465 boolean first = true; 466 for (Triple o : p.getObjects()) { 467 if (first) 468 first = false; 469 else 470 writer.write(", "); 471 if (o instanceof StringType) 472 writer.write(((StringType) o).value); 473 else { 474 writer.write("["); 475 if (write((Complex) o, writer, 4)) 476 writer.write("\r\n ]"); 477 else 478 writer.write("]"); 479 } 480 } 481 String comment = p.comment == null? "" : " # "+p.comment; 482 i++; 483 if (i < sbj.predicates.size()) 484 writer.write(";"+comment+"\r\n "); 485 else { 486 if (Utilities.noString(sbj.id)) 487 writer.write("]"); 488 writer.write(" ."+comment+"\r\n\r\n"); 489 } 490 } 491 } 492 } 493 494 private void commitSection(StringBuilder b, Section section) throws Exception { 495 b.append("# - "+section.name+" "+Utilities.padLeft("", '-', 75-section.name.length())+"\r\n"); 496 b.append("\r\n"); 497 if (!section.comments.isEmpty()) { 498 for (String s : section.comments) { 499 b.append("# "+s+"\r\n"); 500 } 501 b.append("\r\n"); 502 } 503 for (Subject sbj : section.subjects) { 504 b.append(Utilities.escapeXml(sbj.id)); 505 b.append(" "); 506 int i = 0; 507 508 for (Predicate p : sbj.predicates) { 509 // b.append("# test\r\n "); 510 b.append(p.makelink()); 511 b.append(" "); 512 boolean first = true; 513 for (Triple o : p.getObjects()) { 514 if (first) 515 first = false; 516 else 517 b.append(", "); 518 if (o instanceof StringType) 519 b.append(Utilities.escapeXml(((StringType) o).value)); 520 else { 521 b.append("["); 522 if (write((Complex) o, b, 4)) 523 b.append("\r\n ]"); 524 else 525 b.append("]"); 526 } 527 } 528 String comment = p.comment == null? "" : " # "+p.comment; 529 i++; 530 if (i < sbj.predicates.size()) 531 b.append(";"+Utilities.escapeXml(comment)+"\r\n "); 532 else 533 b.append("."+Utilities.escapeXml(comment)+"\r\n\r\n"); 534 } 535 } 536 } 537 538 protected class LineOutputStreamWriter extends OutputStreamWriter { 539 private LineOutputStreamWriter(OutputStream out) throws UnsupportedEncodingException { 540 super(out, "UTF-8"); 541 } 542 543 private void ln() throws IOException { 544 write("\r\n"); 545 } 546 547 private void ln(String s) throws IOException { 548 write(s); 549 write("\r\n"); 550 } 551 } 552 553 public boolean write(Complex complex, LineOutputStreamWriter writer, int indent) throws IOException { 554 if (complex.predicates.isEmpty()) 555 return false; 556 if (complex.predicates.size() == 1 && complex.predicates.get(0).getObjects().size()== 1 && complex.predicates.get(0).getObjects().get(0) instanceof StringType && Utilities.noString(complex.predicates.get(0).comment)) { 557 writer.write(" "+complex.predicates.get(0).predicate+" "+((StringType) complex.predicates.get(0).getObjects().get(0)).value); 558 return false; 559 } 560 String left = Utilities.padLeft("", ' ', indent); 561 int i = 0; 562 for (Predicate po : complex.predicates) { 563 writer.write("\r\n"); 564 boolean first = true; 565 for (Triple o : po.getObjects()) { 566 if (first) { 567 first = false; 568 writer.write(left+" "+po.getPredicate()+" "); 569 } else 570 writer.write(", "); 571 if (o instanceof StringType) 572 writer.write(((StringType) o).value); 573 else { 574 writer.write("["); 575 if (write((Complex) o, writer, indent+2)) 576 writer.write("\r\n"+left+" ]"); 577 else 578 writer.write(" ]"); 579 } 580 } 581 i++; 582 if (i < complex.predicates.size()) 583 writer.write(";"); 584 if (!Utilities.noString(po.comment)) 585 writer.write(" # "+escape(po.comment, false)); 586 } 587 return true; 588 } 589 590 public boolean write(Complex complex, StringBuilder b, int indent) throws Exception { 591 if (complex.predicates.isEmpty()) 592 return false; 593 if (complex.predicates.size() == 1 && complex.predicates.get(0).getObjects().size()== 1 && complex.predicates.get(0).getObjects().get(0) instanceof StringType && Utilities.noString(complex.predicates.get(0).comment)) { 594 b.append(" "+complex.predicates.get(0).makelink()+" "+Utilities.escapeXml(((StringType) complex.predicates.get(0).getObjects().get(0)).value)); 595 return false; 596 } 597 String left = Utilities.padLeft("", ' ', indent); 598 int i = 0; 599 for (Predicate po : complex.predicates) { 600 b.append("\r\n"); 601 boolean first = true; 602 for (Triple o : po.getObjects()) { 603 if (first) { 604 first = false; 605 b.append(left+" "+po.makelink()+" "); 606 } else 607 b.append(", "); 608 if (o instanceof StringType) 609 b.append(Utilities.escapeXml(((StringType) o).value)); 610 else { 611 b.append("["); 612 if (write((Complex) o, b, indent+2)) 613 b.append(left+" ]"); 614 else 615 b.append(" ]"); 616 } 617 } 618 i++; 619 if (i < complex.predicates.size()) 620 b.append(";"); 621 if (!Utilities.noString(po.comment)) 622 b.append(" # "+Utilities.escapeXml(escape(po.comment, false))); 623 } 624 return true; 625 } 626 627 628 public abstract class TTLObject { 629 protected int line; 630 protected int col; 631 632 abstract public boolean hasValue(String value); 633 634 public int getLine() { 635 return line; 636 } 637 638 public int getCol() { 639 return col; 640 } 641 642 643 } 644 645 646 public class TTLLiteral extends TTLObject { 647 648 private String value; 649 private String type; 650 protected TTLLiteral(int line, int col) { 651 this.line = line; 652 this.col = col; 653 } 654 @Override 655 public boolean hasValue(String value) { 656 return value.equals(this.value); 657 } 658 public String getValue() { 659 return value; 660 } 661 public String getType() { 662 return type; 663 } 664 665 } 666 667 public class TTLURL extends TTLObject { 668 private String uri; 669 670 protected TTLURL(int line, int col) { 671 this.line = line; 672 this.col = col; 673 } 674 675 public String getUri() { 676 return uri; 677 } 678 679 public void setUri(String uri) throws FHIRFormatError { 680 if (!uri.matches(IRI_URL)) 681 throw new FHIRFormatError("Illegal URI "+uri); 682 this.uri = uri; 683 } 684 685 @Override 686 public boolean hasValue(String value) { 687 return value.equals(this.uri); 688 } 689 } 690 691 public class TTLList extends TTLObject { 692 private List<TTLObject> list = new ArrayList<Turtle.TTLObject>(); 693 694 public TTLList(TTLObject obj) { 695 super(); 696 list.add(obj); 697 } 698 699 @Override 700 public boolean hasValue(String value) { 701 for (TTLObject obj : list) 702 if (obj.hasValue(value)) 703 return true; 704 return false; 705 } 706 707 public List<TTLObject> getList() { 708 return list; 709 } 710 711 } 712 public class TTLComplex extends TTLObject { 713 private Map<String, TTLObject> predicates = new HashMap<String, Turtle.TTLObject>(); 714 protected TTLComplex(int line, int col) { 715 this.line = line; 716 this.col = col; 717 } 718 public Map<String, TTLObject> getPredicates() { 719 return predicates; 720 } 721 @Override 722 public boolean hasValue(String value) { 723 return false; 724 } 725 public void addPredicate(String uri, TTLObject obj) { 726 if (!predicates.containsKey(uri)) 727 predicates.put(uri, obj); 728 else { 729 TTLObject eo = predicates.get(uri); 730 TTLList list = null; 731 if (eo instanceof TTLList) 732 list = (TTLList) eo; 733 else { 734 list = new TTLList(eo); 735 predicates.put(uri, list); 736 } 737 list.list.add(obj); 738 } 739 } 740 public void addPredicates(Map<String, TTLObject> values) { 741 for (String s : values.keySet()) { 742 addPredicate(s, values.get(s)); 743 } 744 } 745 } 746 747 private Map<TTLURL, TTLComplex> objects = new HashMap<TTLURL, Turtle.TTLComplex>(); 748 749 private Object base; 750 751 public enum LexerTokenType { 752 TOKEN, // [, ], :, @ 753 WORD, // a word 754 URI, // a URI <> 755 LITERAL // "..." 756 } 757 758 public class Lexer { 759 760 761 private String source; 762 private LexerTokenType type; 763 private int cursor, line, col, startLine, startCol; 764 private String token; 765 766 public Lexer(String source) throws FHIRFormatError { 767 this.source = source; 768 cursor = 0; 769 line = 1; 770 col = 1; 771 readNext(false); 772 } 773 774 private void skipWhitespace() { 775 while (cursor < source.length()) { 776 char ch = source.charAt(cursor); 777 if (Character.isWhitespace(ch)) 778 grab(); 779 else if (ch == '#') { 780 ch = grab(); 781 while (cursor < source.length()) { 782 ch = grab(); 783 if (ch == '\r' || ch == '\n') { 784 break; 785 } 786 } 787 } else 788 break; 789 } 790 } 791 792 private char grab() { 793 char c = source.charAt(cursor); 794 if (c == '\n') { 795 line++; 796 col = 1; 797 } else 798 col++; 799 800 cursor++; 801 return c; 802 } 803 804 private void readNext(boolean postColon) throws FHIRFormatError { 805 token = null; 806 type = null; 807 skipWhitespace(); 808 if (cursor >= source.length()) 809 return; 810 startLine = line; 811 startCol = col; 812 char ch = grab(); 813 StringBuilder b = new StringBuilder(); 814 switch (ch) { 815 case '@': 816 case '.': 817 case ':': 818 case ';': 819 case '^': 820 case ',': 821 case ']': 822 case '[': 823 case '(': 824 case ')': 825 type = LexerTokenType.TOKEN; 826 b.append(ch); 827 token = b.toString(); 828 return; 829 case '<': 830 while (cursor < source.length()) { 831 ch = grab(); 832 if (ch == '>') 833 break; 834 b.append(ch); 835 } 836 type = LexerTokenType.URI; 837 token = unescape(b.toString(), true); 838 return; 839 case '"': 840 b.append(ch); 841 String end = "\""; 842 while (cursor < source.length()) { 843 ch = grab(); 844 if (b.length() == 2 && ch != '"' && b.equals("\"\"")) { 845 cursor--; 846 break; 847 } 848 b.append(ch); 849 if (ch == '"') 850 if (b.toString().equals("\"\"\"")) 851 end = "\"\"\""; 852 else if (!b.toString().equals("\"\"") && b.toString().endsWith(end) && !b.toString().endsWith("\\"+end)) 853 break; 854 } 855 type = LexerTokenType.LITERAL; 856 token = unescape(b.toString().substring(end.length(), b.length()-end.length()), false); 857 return; 858 case '\'': 859 b.append(ch); 860 end = "'"; 861 while (cursor < source.length()) { 862 ch = grab(); 863 if (b.equals("''") && ch != '\'') { 864 cursor--; 865 break; 866 } 867 b.append(ch); 868 if (b.toString().equals("'''")) 869 end = "'''"; 870 else if (!b.toString().equals("''") && b.toString().endsWith(end)) 871 break; 872 } 873 type = LexerTokenType.LITERAL; 874 token = unescape(b.toString().substring(end.length(), b.length()-end.length()), false); 875 return; 876 default: 877 if (Utilities.charInRange(ch, '0', '9') || Utilities.charInRange(ch, 'a', 'z') || Utilities.charInRange(ch, 'A', 'Z') || Utilities.charInSet(ch, '_', '-', '+', '%')) { 878 b.append(ch); 879 while (cursor < source.length()) { 880 ch = grab(); 881 // if (!Utilities.charInRange(ch, '0', '9') && !Utilities.charInRange(ch, 'a', 'z') && !Utilities.charInRange(ch, 'A', 'Z') && !Utilities.charInSet(ch, '_', '-', '+', '.', '\\', '#')) 882 if (Character.isWhitespace(ch) || Utilities.charInSet(ch, ';', ']', ')', '~') || (( ch == ':') && !postColon)) 883 break; 884 b.append(ch); 885 } 886 type = LexerTokenType.WORD; 887 token = b.toString(); 888 cursor--; 889 return; 890 } else 891 throw error("unexpected lexer char "+ch); 892 } 893 } 894 895 private String unescape(String s, boolean isUri) throws FHIRFormatError { 896 StringBuilder b = new StringBuilder(); 897 int i = 0; 898 while (i < s.length()) { 899 char ch = s.charAt(i); 900 if (ch == '\\' && i < s.length()-1) { 901 i++; 902 switch (s.charAt(i)) { 903 case 't': 904 b.append('\t'); 905 break; 906 case 'r': 907 b.append('\r'); 908 break; 909 case 'n': 910 b.append('\n'); 911 break; 912 case 'f': 913 b.append('\f'); 914 break; 915 case '\'': 916 b.append('\''); 917 break; 918 case '\"': 919 b.append('\"'); 920 break; 921 case '\\': 922 b.append('\\'); 923 break; 924 case '/': 925 b.append('\\'); 926 break; 927 case 'U': 928 case 'u': 929 i++; 930 int l = 4; 931 int uc = Integer.parseInt(s.substring(i, i+l), 16); 932 if (uc < (isUri ? 33 : 32)) { 933 l = 8; 934 uc = Integer.parseInt(s.substring(i, i+8), 16); 935 } 936 if (uc < (isUri ? 33 : 32) || (isUri && (uc == 0x3C || uc == 0x3E))) 937 throw new FHIRFormatError("Illegal unicode character"); 938 b.append((char) uc); 939 i = i + l; 940 break; 941 default: 942 throw new FHIRFormatError("Unknown character escape \\"+s.charAt(i)); 943 } 944 } else { 945 b.append(ch); 946 } 947 i++; 948 } 949 return b.toString(); 950 } 951 952 public boolean done() { 953 return type == null; 954 } 955 956 public String next(LexerTokenType type, boolean postColon) throws FHIRFormatError { 957 if (type != null && this.type != type) 958 throw error("Unexpected type. Found "+this.type.toString()+" looking for a "+type.toString()); 959 String res = token; 960 readNext(postColon); 961 return res; 962 } 963 964 public String peek() throws Exception { 965 return token; 966 } 967 968 public LexerTokenType peekType() { 969 return type; 970 } 971 972 public void token(String token) throws FHIRFormatError { 973 if (!token.equals(this.token)) 974 throw error("Unexpected word "+this.token+" looking for "+token); 975 next(LexerTokenType.TOKEN, token.equals(":")); 976 } 977 978 public void word(String word) throws Exception { 979 if (!word.equals(this.token)) 980 throw error("Unexpected word "+this.token+" looking for "+word); 981 next(LexerTokenType.WORD, false); 982 } 983 984 public String word() throws FHIRFormatError { 985 String t = token; 986 next(LexerTokenType.WORD, false); 987 return t; 988 } 989 990 public String uri() throws FHIRFormatError { 991 if (this.type != LexerTokenType.URI) 992 throw error("Unexpected type. Found "+this.type.toString()+" looking for a URI"); 993 String t = token; 994 next(LexerTokenType.URI, false); 995 return t; 996 } 997 998 public String literal() throws FHIRFormatError { 999 if (this.type != LexerTokenType.LITERAL) 1000 throw error("Unexpected type. Found "+this.type.toString()+" looking for a Literal"); 1001 String t = token; 1002 next(LexerTokenType.LITERAL, false); 1003 return t; 1004 } 1005 1006 public boolean peek(LexerTokenType type, String token) { 1007 return this.type == type && this.token.equals(token); 1008 } 1009 1010 public FHIRFormatError error(String message) { 1011 return new FHIRFormatError("Syntax Error parsing Turtle on line "+Integer.toString(line)+" col "+Integer.toString(col)+": "+message); 1012 } 1013 1014 } 1015 // 1016 // public void importTtl(Section sct, String ttl) throws Exception { 1017 // if (!Utilities.noString(ttl)) { 1018 // // System.out.println("import ttl: "+ttl); 1019 // Lexer lexer = new Lexer(ttl); 1020 // String subject = null; 1021 // String predicate = null; 1022 // while (!lexer.done()) { 1023 // if (subject == null) 1024 // subject = lexer.next(); 1025 // if (predicate == null) 1026 // predicate = lexer.next(); 1027 // if (lexer.peekType() == null) { 1028 // throw new Error("Unexpected end of input parsing turtle"); 1029 // } if (lexer.peekType() == LexerTokenType.TOKEN) { 1030 // sct.triple(subject, predicate, lexer.next()); 1031 // } else if (lexer.peek() == null) { 1032 // throw new Error("Unexected - turtle lexer found no token"); 1033 // } else if (lexer.peek().equals("[")) { 1034 // sct.triple(subject, predicate, importComplex(lexer)); 1035 // } else 1036 // throw new Exception("Not done yet"); 1037 // String n = lexer.next(); 1038 // if (Utilities.noString(n)) 1039 // break; 1040 // if (n.equals(".")) { 1041 // subject = null; 1042 // predicate = null; 1043 // } else if (n.equals(";")) { 1044 // predicate = null; 1045 // } else if (!n.equals(",")) 1046 // throw new Exception("Unexpected token "+n); 1047 // } 1048 // } 1049 // } 1050 1051 public void parse(String source) throws FHIRFormatError { 1052 prefixes.clear(); 1053 prefixes.put("_", "urn:uuid:4425b440-2c33-4488-b9fc-cf9456139995#"); 1054 parse(new Lexer(source)); 1055 } 1056 1057 private void parse(Lexer lexer) throws FHIRFormatError { 1058 boolean doPrefixes = true; 1059 while (!lexer.done()) { 1060 if (doPrefixes && (lexer.peek(LexerTokenType.TOKEN, "@") || lexer.peek(LexerTokenType.WORD, "PREFIX") || lexer.peek(LexerTokenType.WORD, "BASE"))) { 1061 boolean sparqlStyle = false; 1062 boolean base = false; 1063 if (lexer.peek(LexerTokenType.TOKEN, "@")) { 1064 lexer.token("@"); 1065 String p = lexer.word(); 1066 if (p.equals("base")) 1067 base = true; 1068 else if (!p.equals("prefix")) 1069 throw new FHIRFormatError("Unexpected token "+p); 1070 } else { 1071 sparqlStyle = true; 1072 String p = lexer.word(); 1073 if (p.equals("BASE")) 1074 base = true; 1075 else if (!p.equals("PREFIX")) 1076 throw new FHIRFormatError("Unexpected token "+p); 1077 } 1078 String prefix = null; 1079 if (!base) { 1080 prefix = lexer.peekType() == LexerTokenType.WORD ? lexer.next(LexerTokenType.WORD, false) : null; 1081 lexer.token(":"); 1082 } 1083 String url = lexer.next(LexerTokenType.URI, false); 1084 if (!sparqlStyle) 1085 lexer.token("."); 1086 if (!base) 1087 prefix(prefix, url); 1088 else if (this.base == null) 1089 this.base = url; 1090 else 1091 throw new FHIRFormatError("Duplicate @base"); 1092 } else if (lexer.peekType() == LexerTokenType.URI) { 1093 doPrefixes = false; 1094 TTLURL uri = new TTLURL(lexer.startLine, lexer.startCol); 1095 uri.setUri(lexer.uri()); 1096 TTLComplex complex = parseComplex(lexer); 1097 objects.put(uri, complex); 1098 lexer.token("."); 1099 } else if (lexer.peekType() == LexerTokenType.WORD) { 1100 doPrefixes = false; 1101 TTLURL uri = new TTLURL(lexer.startLine, lexer.startCol); 1102 String pfx = lexer.word(); 1103 if (!prefixes.containsKey(pfx)) 1104 throw new FHIRFormatError("Unknown prefix "+pfx); 1105 lexer.token(":"); 1106 uri.setUri(prefixes.get(pfx)+lexer.word()); 1107 TTLComplex complex = parseComplex(lexer); 1108 objects.put(uri, complex); 1109 lexer.token("."); 1110 } else if (lexer.peek(LexerTokenType.TOKEN, ":")) { 1111 doPrefixes = false; 1112 TTLURL uri = new TTLURL(lexer.startLine, lexer.startCol); 1113 lexer.token(":"); 1114 if (!prefixes.containsKey(null)) 1115 throw new FHIRFormatError("Unknown prefix ''"); 1116 uri.setUri(prefixes.get(null)+lexer.word()); 1117 TTLComplex complex = parseComplex(lexer); 1118 objects.put(uri, complex); 1119 lexer.token("."); 1120 } else if (lexer.peek(LexerTokenType.TOKEN, "[")) { 1121 doPrefixes = false; 1122 lexer.token("["); 1123 TTLComplex bnode = parseComplex(lexer); 1124 lexer.token("]"); 1125 TTLComplex complex = null; 1126 if (!lexer.peek(LexerTokenType.TOKEN, ".")) { 1127 complex = parseComplex(lexer); 1128 // at this point, we collapse bnode and complex, and give bnode a fictional identity 1129 bnode.addPredicates(complex.predicates); 1130 } 1131 1132 objects.put(anonymousId(), bnode); 1133 lexer.token("."); 1134 } else 1135 throw lexer.error("Unknown token "+lexer.token); 1136 } 1137 } 1138 1139 private TTLURL anonymousId() throws FHIRFormatError { 1140 TTLURL url = new TTLURL(-1, -1); 1141 url.setUri("urn:uuid:"+UUID.randomUUID().toString().toLowerCase()); 1142 return url; 1143 } 1144 1145 private TTLComplex parseComplex(Lexer lexer) throws FHIRFormatError { 1146 TTLComplex result = new TTLComplex(lexer.startLine, lexer.startCol); 1147 1148 boolean done = lexer.peek(LexerTokenType.TOKEN, "]"); 1149 while (!done) { 1150 String uri = null; 1151 if (lexer.peekType() == LexerTokenType.URI) 1152 uri = lexer.uri(); 1153 else { 1154 String t = lexer.peekType() == LexerTokenType.WORD ? lexer.word() : null; 1155 if (lexer.type == LexerTokenType.TOKEN && lexer.token.equals(":")) { 1156 lexer.token(":"); 1157 if (!prefixes.containsKey(t)) 1158 throw new FHIRFormatError("unknown prefix "+t); 1159 uri = prefixes.get(t)+lexer.word(); 1160 } else if (t.equals("a")) 1161 uri = prefixes.get("rdfs")+"type"; 1162 else 1163 throw lexer.error("unexpected token"); 1164 } 1165 1166 boolean inlist = false; 1167 if (lexer.peek(LexerTokenType.TOKEN, "(")) { 1168 inlist = true; 1169 lexer.token("("); 1170 } 1171 1172 boolean rpt = false; 1173 do { 1174 if (lexer.peek(LexerTokenType.TOKEN, "[")) { 1175 lexer.token("["); 1176 result.addPredicate(uri, parseComplex(lexer)); 1177 lexer.token("]"); 1178 } else if (lexer.peekType() == LexerTokenType.URI) { 1179 TTLURL u = new TTLURL(lexer.startLine, lexer.startCol); 1180 u.setUri(lexer.uri()); 1181 result.addPredicate(uri, u); 1182 } else if (lexer.peekType() == LexerTokenType.LITERAL) { 1183 TTLLiteral u = new TTLLiteral(lexer.startLine, lexer.startCol); 1184 u.value = lexer.literal(); 1185 if (lexer.peek(LexerTokenType.TOKEN, "^")) { 1186 lexer.token("^"); 1187 lexer.token("^"); 1188 if (lexer.peekType() == LexerTokenType.URI) { 1189 u.type = lexer.uri(); 1190 } else { 1191 String l = lexer.word(); 1192 lexer.token(":"); 1193 u.type = prefixes.get(l)+ lexer.word(); 1194 } 1195 } 1196 if (lexer.peek(LexerTokenType.TOKEN, "@")) { 1197 //lang tag - skip it 1198 lexer.token("@"); 1199 String lang = lexer.word(); 1200 if (!lang.matches(LANG_REGEX)) { 1201 throw new FHIRFormatError("Invalid Language tag "+lang); 1202 } 1203 } 1204 result.addPredicate(uri, u); 1205 } else if (lexer.peekType() == LexerTokenType.WORD || lexer.peek(LexerTokenType.TOKEN, ":")) { 1206 int sl = lexer.startLine; 1207 int sc = lexer.startCol; 1208 String pfx = lexer.peekType() == LexerTokenType.WORD ? lexer.word() : null; 1209 if (Utilities.isDecimal(pfx, true, true) && !lexer.peek(LexerTokenType.TOKEN, ":")) { 1210 TTLLiteral u = new TTLLiteral(sl, sc); 1211 u.value = pfx; 1212 result.addPredicate(uri, u); 1213 } else if (("false".equals(pfx) || "true".equals(pfx)) && !lexer.peek(LexerTokenType.TOKEN, ":")) { 1214 TTLLiteral u = new TTLLiteral(sl, sc); 1215 u.value = pfx; 1216 result.addPredicate(uri, u); 1217 } else { 1218 if (!prefixes.containsKey(pfx)) 1219 throw new FHIRFormatError("Unknown prefix "+(pfx == null ? "''" : pfx)); 1220 TTLURL u = new TTLURL(sl, sc); 1221 lexer.token(":"); 1222 u.setUri(prefixes.get(pfx)+lexer.word()); 1223 result.addPredicate(uri, u); 1224 } 1225 } else if (!lexer.peek(LexerTokenType.TOKEN, ";") && (!inlist || !lexer.peek(LexerTokenType.TOKEN, ")"))) { 1226 throw new FHIRFormatError("unexpected token "+lexer.token); 1227 } 1228 1229 if (inlist) 1230 rpt = !lexer.peek(LexerTokenType.TOKEN, ")"); 1231 else { 1232 rpt = lexer.peek(LexerTokenType.TOKEN, ","); 1233 if (rpt) 1234 lexer.readNext(false); 1235 } 1236 } while (rpt); 1237 if (inlist) 1238 lexer.token(")"); 1239 1240 if (lexer.peek(LexerTokenType.TOKEN, ";")) { 1241 while ((lexer.peek(LexerTokenType.TOKEN, ";"))) 1242 lexer.token(";"); 1243 done = lexer.peek(LexerTokenType.TOKEN, ".") || lexer.peek(LexerTokenType.TOKEN, "]"); 1244 } else { 1245 done = true; 1246 } 1247 } 1248 return result; 1249 } 1250 1251 public Map<TTLURL, TTLComplex> getObjects() { 1252 return objects; 1253 } 1254 1255 public TTLComplex getObject(String url) { 1256 for (TTLURL t : objects.keySet()) { 1257 if (t.getUri().equals(url)) 1258 return objects.get(t); 1259 } 1260 return null; 1261 } 1262 1263 // public void parseFragment(Lexer lexer) throws Exception { 1264 // lexer.next(); // read [ 1265 // Complex obj = new Complex(); 1266 // while (!lexer.peek().equals("]")) { 1267 // String predicate = lexer.next(); 1268 // if (lexer.peekType() == LexerTokenType.TOKEN || lexer.peekType() == LexerTokenType.LITERAL) { 1269 // obj.predicate(predicate, lexer.next()); 1270 // } else if (lexer.peek().equals("[")) { 1271 // obj.predicate(predicate, importComplex(lexer)); 1272 // } else 1273 // throw new Exception("Not done yet"); 1274 // if (lexer.peek().equals(";")) 1275 // lexer.next(); 1276 // } 1277 // lexer.next(); // read ] 1278 // //return obj; 1279 // } 1280 // 1281 // public void importTtl(Section sct, String ttl) throws Exception { 1282 // if (!Utilities.noString(ttl)) { 1283 // // System.out.println("import ttl: "+ttl); 1284 // Lexer lexer = new Lexer(ttl); 1285 // String subject = null; 1286 // String predicate = null; 1287 // while (!lexer.done()) { 1288 // if (subject == null) 1289 // subject = lexer.next(); 1290 // if (predicate == null) 1291 // predicate = lexer.next(); 1292 // if (lexer.peekType() == null) { 1293 // throw new Error("Unexpected end of input parsing turtle"); 1294 // } if (lexer.peekType() == LexerTokenType.TOKEN) { 1295 // sct.triple(subject, predicate, lexer.next()); 1296 // } else if (lexer.peek() == null) { 1297 // throw new Error("Unexected - turtle lexer found no token"); 1298 // } else if (lexer.peek().equals("[")) { 1299 // sct.triple(subject, predicate, importComplex(lexer)); 1300 // } else 1301 // throw new Exception("Not done yet"); 1302 // String n = lexer.next(); 1303 // if (Utilities.noString(n)) 1304 // break; 1305 // if (n.equals(".")) { 1306 // subject = null; 1307 // predicate = null; 1308 // } else if (n.equals(";")) { 1309 // predicate = null; 1310 // } else if (!n.equals(",")) 1311 // throw new Exception("Unexpected token "+n); 1312 // } 1313 // } 1314 //} 1315 1316 // private Complex importComplex(Lexer lexer) throws Exception { 1317 // lexer.next(); // read [ 1318 // Complex obj = new Complex(); 1319 // while (!lexer.peek().equals("]")) { 1320 // String predicate = lexer.next(); 1321 // if (lexer.peekType() == LexerTokenType.TOKEN || lexer.peekType() == LexerTokenType.LITERAL) { 1322 // obj.predicate(predicate, lexer.next()); 1323 // } else if (lexer.peek().equals("[")) { 1324 // obj.predicate(predicate, importComplex(lexer)); 1325 // } else 1326 // throw new Exception("Not done yet"); 1327 // if (lexer.peek().equals(";")) 1328 // lexer.next(); 1329 // } 1330 // lexer.next(); // read ] 1331 // return obj; 1332 // } 1333 1334}