001package org.hl7.fhir.r5.utils; 002 003import java.util.ArrayList; 004import java.util.List; 005import java.util.StringJoiner; 006 007import org.apache.poi.xssf.model.Comments; 008import org.hl7.fhir.exceptions.FHIRException; 009 010/* 011 Copyright (c) 2011+, HL7, Inc. 012 All rights reserved. 013 014 Redistribution and use in source and binary forms, with or without modification, 015 are permitted provided that the following conditions are met: 016 017 * Redistributions of source code must retain the above copyright notice, this 018 list of conditions and the following disclaimer. 019 * Redistributions in binary form must reproduce the above copyright notice, 020 this list of conditions and the following disclaimer in the documentation 021 and/or other materials provided with the distribution. 022 * Neither the name of HL7 nor the names of its contributors may be used to 023 endorse or promote products derived from this software without specific 024 prior written permission. 025 026 THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" AND 027 ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED 028 WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE DISCLAIMED. 029 IN NO EVENT SHALL THE COPYRIGHT HOLDER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, 030 INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT 031 NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR 032 PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, 033 WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) 034 ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE 035 POSSIBILITY OF SUCH DAMAGE. 036 037 */ 038 039 040 041import org.hl7.fhir.r5.model.ExpressionNode; 042import org.hl7.fhir.utilities.CommaSeparatedStringBuilder; 043import org.hl7.fhir.utilities.SourceLocation; 044import org.hl7.fhir.utilities.Utilities; 045 046// shared lexer for concrete syntaxes 047// - FluentPath 048// - Mapping language 049 050public class FHIRLexer { 051 public class FHIRLexerException extends FHIRException { 052 053 private SourceLocation location; 054 055// public FHIRLexerException() { 056// super(); 057// } 058// 059// public FHIRLexerException(String message, Throwable cause) { 060// super(message, cause); 061// } 062// 063// public FHIRLexerException(String message) { 064// super(message); 065// } 066// 067// public FHIRLexerException(Throwable cause) { 068// super(cause); 069// } 070 071 public FHIRLexerException(String message, SourceLocation location) { 072 super(message); 073 this.location = location; 074 } 075 076 public SourceLocation getLocation() { 077 return location; 078 } 079 080 } 081 private String source; 082 private int cursor; 083 private int currentStart; 084 private String current; 085 private List<String> comments = new ArrayList<>(); 086 private SourceLocation currentLocation; 087 private SourceLocation currentStartLocation; 088 private int id; 089 private String name; 090 private boolean liquidMode; // in liquid mode, || terminates the expression and hands the parser back to the host 091 private SourceLocation commentLocation; 092 093 public FHIRLexer(String source, String name) throws FHIRLexerException { 094 this.source = source == null ? "" : source; 095 this.name = name == null ? "??" : name; 096 currentLocation = new SourceLocation(1, 1); 097 next(); 098 } 099 public FHIRLexer(String source, int i) throws FHIRLexerException { 100 this.source = source; 101 this.cursor = i; 102 currentLocation = new SourceLocation(1, 1); 103 next(); 104 } 105 public String getCurrent() { 106 return current; 107 } 108 public SourceLocation getCurrentLocation() { 109 return currentLocation; 110 } 111 112 public boolean isConstant() { 113 return FHIRPathConstant.isFHIRPathConstant(current); 114 } 115 116 public boolean isFixedName() { 117 return FHIRPathConstant.isFHIRPathFixedName(current); 118 } 119 120 public boolean isStringConstant() { 121 return FHIRPathConstant.isFHIRPathStringConstant(current); 122 } 123 124 public String take() throws FHIRLexerException { 125 String s = current; 126 next(); 127 return s; 128 } 129 130 public int takeInt() throws FHIRLexerException { 131 String s = current; 132 if (!Utilities.isInteger(s)) 133 throw error("Found "+current+" expecting an integer"); 134 next(); 135 return Integer.parseInt(s); 136 } 137 138 public boolean isToken() { 139 if (Utilities.noString(current)) 140 return false; 141 142 if (current.startsWith("$")) 143 return true; 144 145 if (current.equals("*") || current.equals("**")) 146 return true; 147 148 if ((current.charAt(0) >= 'A' && current.charAt(0) <= 'Z') || (current.charAt(0) >= 'a' && current.charAt(0) <= 'z')) { 149 for (int i = 1; i < current.length(); i++) 150 if (!( (current.charAt(1) >= 'A' && current.charAt(1) <= 'Z') || (current.charAt(1) >= 'a' && current.charAt(1) <= 'z') || 151 (current.charAt(1) >= '0' && current.charAt(1) <= '9'))) 152 return false; 153 return true; 154 } 155 return false; 156 } 157 158 public FHIRLexerException error(String msg) { 159 return error(msg, currentLocation.toString(), currentLocation); 160 } 161 162 public FHIRLexerException error(String msg, String location, SourceLocation loc) { 163 return new FHIRLexerException("Error @"+location+": "+msg, loc); 164 } 165 166 public void next() throws FHIRLexerException { 167 skipWhitespaceAndComments(); 168 current = null; 169 currentStart = cursor; 170 currentStartLocation = currentLocation; 171 if (cursor < source.length()) { 172 char ch = source.charAt(cursor); 173 if (ch == '!' || ch == '>' || ch == '<' || ch == ':' || ch == '-' || ch == '=') { 174 cursor++; 175 if (cursor < source.length() && (source.charAt(cursor) == '=' || source.charAt(cursor) == '~' || source.charAt(cursor) == '-') || (ch == '-' && source.charAt(cursor) == '>')) 176 cursor++; 177 current = source.substring(currentStart, cursor); 178 } else if (ch == '.' ) { 179 cursor++; 180 if (cursor < source.length() && (source.charAt(cursor) == '.')) 181 cursor++; 182 current = source.substring(currentStart, cursor); 183 } else if (ch >= '0' && ch <= '9') { 184 cursor++; 185 boolean dotted = false; 186 while (cursor < source.length() && ((source.charAt(cursor) >= '0' && source.charAt(cursor) <= '9') || (source.charAt(cursor) == '.') && !dotted)) { 187 if (source.charAt(cursor) == '.') 188 dotted = true; 189 cursor++; 190 } 191 if (source.charAt(cursor-1) == '.') 192 cursor--; 193 current = source.substring(currentStart, cursor); 194 } else if ((ch >= 'a' && ch <= 'z') || (ch >= 'A' && ch <= 'Z')) { 195 while (cursor < source.length() && ((source.charAt(cursor) >= 'A' && source.charAt(cursor) <= 'Z') || (source.charAt(cursor) >= 'a' && source.charAt(cursor) <= 'z') || 196 (source.charAt(cursor) >= '0' && source.charAt(cursor) <= '9') || source.charAt(cursor) == '_')) 197 cursor++; 198 current = source.substring(currentStart, cursor); 199 } else if (ch == '%') { 200 cursor++; 201 if (cursor < source.length() && (source.charAt(cursor) == '`')) { 202 cursor++; 203 while (cursor < source.length() && (source.charAt(cursor) != '`')) 204 cursor++; 205 cursor++; 206 } else 207 while (cursor < source.length() && ((source.charAt(cursor) >= 'A' && source.charAt(cursor) <= 'Z') || (source.charAt(cursor) >= 'a' && source.charAt(cursor) <= 'z') || 208 (source.charAt(cursor) >= '0' && source.charAt(cursor) <= '9') || source.charAt(cursor) == ':' || source.charAt(cursor) == '-')) 209 cursor++; 210 current = source.substring(currentStart, cursor); 211 } else if (ch == '/') { 212 cursor++; 213 if (cursor < source.length() && (source.charAt(cursor) == '/')) { 214 // this is en error - should already have been skipped 215 error("This shouldn't happen?"); 216 } 217 current = source.substring(currentStart, cursor); 218 } else if (ch == '$') { 219 cursor++; 220 while (cursor < source.length() && (source.charAt(cursor) >= 'a' && source.charAt(cursor) <= 'z')) 221 cursor++; 222 current = source.substring(currentStart, cursor); 223 } else if (ch == '{') { 224 cursor++; 225 ch = source.charAt(cursor); 226 if (ch == '}') 227 cursor++; 228 current = source.substring(currentStart, cursor); 229 } else if (ch == '"') { 230 cursor++; 231 boolean escape = false; 232 while (cursor < source.length() && (escape || source.charAt(cursor) != '"')) { 233 if (escape) 234 escape = false; 235 else 236 escape = (source.charAt(cursor) == '\\'); 237 cursor++; 238 } 239 if (cursor == source.length()) 240 throw error("Unterminated string"); 241 cursor++; 242 current = "\""+source.substring(currentStart+1, cursor-1)+"\""; 243 } else if (ch == '`') { 244 cursor++; 245 boolean escape = false; 246 while (cursor < source.length() && (escape || source.charAt(cursor) != '`')) { 247 if (escape) 248 escape = false; 249 else 250 escape = (source.charAt(cursor) == '\\'); 251 cursor++; 252 } 253 if (cursor == source.length()) 254 throw error("Unterminated string"); 255 cursor++; 256 current = "`"+source.substring(currentStart+1, cursor-1)+"`"; 257 } else if (ch == '\''){ 258 cursor++; 259 char ech = ch; 260 boolean escape = false; 261 while (cursor < source.length() && (escape || source.charAt(cursor) != ech)) { 262 if (escape) 263 escape = false; 264 else 265 escape = (source.charAt(cursor) == '\\'); 266 cursor++; 267 } 268 if (cursor == source.length()) 269 throw error("Unterminated string"); 270 cursor++; 271 current = source.substring(currentStart, cursor); 272 if (ech == '\'') 273 current = "\'"+current.substring(1, current.length() - 1)+"\'"; 274 } else if (ch == '`') { 275 cursor++; 276 boolean escape = false; 277 while (cursor < source.length() && (escape || source.charAt(cursor) != '`')) { 278 if (escape) 279 escape = false; 280 else 281 escape = (source.charAt(cursor) == '\\'); 282 cursor++; 283 } 284 if (cursor == source.length()) 285 throw error("Unterminated string"); 286 cursor++; 287 current = "`"+source.substring(currentStart+1, cursor-1)+"`"; 288 } else if (ch == '|' && liquidMode) { 289 cursor++; 290 ch = source.charAt(cursor); 291 if (ch == '|') 292 cursor++; 293 current = source.substring(currentStart, cursor); 294 } else if (ch == '@'){ 295 int start = cursor; 296 cursor++; 297 while (cursor < source.length() && isDateChar(source.charAt(cursor), start)) 298 cursor++; 299 current = source.substring(currentStart, cursor); 300 } else { // if CharInSet(ch, ['.', ',', '(', ')', '=', '$']) then 301 cursor++; 302 current = source.substring(currentStart, cursor); 303 } 304 } 305 } 306 307 private void skipWhitespaceAndComments() { 308 comments.clear(); 309 boolean last13 = false; 310 boolean done = false; 311 while (cursor < source.length() && !done) { 312 if (cursor < source.length() -1 && "//".equals(source.substring(cursor, cursor+2))) { 313 commentLocation = currentLocation; 314 int start = cursor+2; 315 while (cursor < source.length() && !((source.charAt(cursor) == '\r') || source.charAt(cursor) == '\n')) { 316 cursor++; 317 } 318 comments.add(source.substring(start, cursor).trim()); 319 } else if (cursor < source.length() - 1 && "/*".equals(source.substring(cursor, cursor+2))) { 320 commentLocation = currentLocation; 321 int start = cursor+2; 322 while (cursor < source.length() - 1 && !"*/".equals(source.substring(cursor, cursor+2))) { 323 last13 = currentLocation.checkChar(source.charAt(cursor), last13); 324 cursor++; 325 } 326 if (cursor >= source.length() -1) { 327 error("Unfinished comment"); 328 } else { 329 comments.add(source.substring(start, cursor).trim()); 330 cursor = cursor + 2; 331 } 332 } else if (Character.isWhitespace(source.charAt(cursor))) { 333 last13 = currentLocation.checkChar(source.charAt(cursor), last13); 334 cursor++; 335 } else { 336 done = true; 337 } 338 } 339 } 340 341 private boolean isDateChar(char ch,int start) { 342 int eot = source.charAt(start+1) == 'T' ? 10 : 20; 343 344 return ch == '-' || ch == ':' || ch == 'T' || ch == '+' || ch == 'Z' || Character.isDigit(ch) || (cursor-start == eot && ch == '.' && cursor < source.length()-1&& Character.isDigit(source.charAt(cursor+1))); 345 } 346 public boolean isOp() { 347 return ExpressionNode.Operation.fromCode(current) != null; 348 } 349 public boolean done() { 350 return currentStart >= source.length(); 351 } 352 public int nextId() { 353 id++; 354 return id; 355 } 356 public SourceLocation getCurrentStartLocation() { 357 return currentStartLocation; 358 } 359 360 // special case use 361 public void setCurrent(String current) { 362 this.current = current; 363 } 364 365 public boolean hasComments() { 366 return comments.size() > 0; 367 } 368 369 public List<String> getComments() { 370 return comments; 371 } 372 373 public String getAllComments() { 374 CommaSeparatedStringBuilder b = new CommaSeparatedStringBuilder("\r\n"); 375 b.addAll(comments); 376 comments.clear(); 377 return b.toString(); 378 } 379 380 public String getFirstComment() { 381 if (hasComments()) { 382 String s = comments.get(0); 383 comments.remove(0); 384 return s; 385 } else { 386 return null; 387 } 388 } 389 390 public boolean hasToken(String kw) { 391 return !done() && kw.equals(current); 392 } 393 public boolean hasToken(String... names) { 394 if (done()) 395 return false; 396 for (String s : names) 397 if (s.equals(current)) 398 return true; 399 return false; 400 } 401 402 public void token(String kw) throws FHIRLexerException { 403 if (!kw.equals(current)) 404 throw error("Found \""+current+"\" expecting \""+kw+"\""); 405 next(); 406 } 407 408 public String readConstant(String desc) throws FHIRLexerException { 409 if (!isStringConstant()) 410 throw error("Found "+current+" expecting \"["+desc+"]\""); 411 412 return processConstant(take()); 413 } 414 415 public String readFixedName(String desc) throws FHIRLexerException { 416 if (!isFixedName()) 417 throw error("Found "+current+" expecting \"["+desc+"]\""); 418 419 return processFixedName(take()); 420 } 421 422 public String processConstant(String s) throws FHIRLexerException { 423 StringBuilder b = new StringBuilder(); 424 int i = 1; 425 while (i < s.length()-1) { 426 char ch = s.charAt(i); 427 if (ch == '\\') { 428 i++; 429 switch (s.charAt(i)) { 430 case 't': 431 b.append('\t'); 432 break; 433 case 'r': 434 b.append('\r'); 435 break; 436 case 'n': 437 b.append('\n'); 438 break; 439 case 'f': 440 b.append('\f'); 441 break; 442 case '\'': 443 b.append('\''); 444 break; 445 case '"': 446 b.append('"'); 447 break; 448 case '`': 449 b.append('`'); 450 break; 451 case '\\': 452 b.append('\\'); 453 break; 454 case '/': 455 b.append('/'); 456 break; 457 case 'u': 458 i++; 459 int uc = Integer.parseInt(s.substring(i, i+4), 16); 460 b.append((char) uc); 461 i = i + 4; 462 break; 463 default: 464 throw new FHIRLexerException("Unknown character escape \\"+s.charAt(i), currentLocation); 465 } 466 } else { 467 b.append(ch); 468 i++; 469 } 470 } 471 return b.toString(); 472 } 473 474 public String processFixedName(String s) throws FHIRLexerException { 475 StringBuilder b = new StringBuilder(); 476 int i = 1; 477 while (i < s.length()-1) { 478 char ch = s.charAt(i); 479 if (ch == '\\') { 480 i++; 481 switch (s.charAt(i)) { 482 case 't': 483 b.append('\t'); 484 break; 485 case 'r': 486 b.append('\r'); 487 break; 488 case 'n': 489 b.append('\n'); 490 break; 491 case 'f': 492 b.append('\f'); 493 break; 494 case '\'': 495 b.append('\''); 496 break; 497 case '"': 498 b.append('"'); 499 break; 500 case '\\': 501 b.append('\\'); 502 break; 503 case '/': 504 b.append('/'); 505 break; 506 case 'u': 507 i++; 508 int uc = Integer.parseInt(s.substring(i, i+4), 16); 509 b.append((char) uc); 510 i = i + 4; 511 break; 512 default: 513 throw new FHIRLexerException("Unknown character escape \\"+s.charAt(i), currentLocation); 514 } 515 } else { 516 b.append(ch); 517 i++; 518 } 519 } 520 return b.toString(); 521 } 522 523 public void skipToken(String token) throws FHIRLexerException { 524 if (getCurrent().equals(token)) 525 next(); 526 527 } 528 public String takeDottedToken() throws FHIRLexerException { 529 StringBuilder b = new StringBuilder(); 530 b.append(take()); 531 while (!done() && getCurrent().equals(".")) { 532 b.append(take()); 533 b.append(take()); 534 } 535 return b.toString(); 536 } 537 538 public int getCurrentStart() { 539 return currentStart; 540 } 541 public String getSource() { 542 return source; 543 } 544 public boolean isLiquidMode() { 545 return liquidMode; 546 } 547 public void setLiquidMode(boolean liquidMode) { 548 this.liquidMode = liquidMode; 549 } 550 public SourceLocation getCommentLocation() { 551 return this.commentLocation; 552 } 553 554}