001package org.hl7.fhir.r5.utils;
002
003import java.util.ArrayList;
004import java.util.List;
005import java.util.StringJoiner;
006
007import org.apache.poi.xssf.model.Comments;
008import org.hl7.fhir.exceptions.FHIRException;
009
010/*
011  Copyright (c) 2011+, HL7, Inc.
012  All rights reserved.
013  
014  Redistribution and use in source and binary forms, with or without modification, 
015  are permitted provided that the following conditions are met:
016    
017   * Redistributions of source code must retain the above copyright notice, this 
018     list of conditions and the following disclaimer.
019   * Redistributions in binary form must reproduce the above copyright notice, 
020     this list of conditions and the following disclaimer in the documentation 
021     and/or other materials provided with the distribution.
022   * Neither the name of HL7 nor the names of its contributors may be used to 
023     endorse or promote products derived from this software without specific 
024     prior written permission.
025  
026  THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" AND 
027  ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED 
028  WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE DISCLAIMED. 
029  IN NO EVENT SHALL THE COPYRIGHT HOLDER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, 
030  INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT 
031  NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR 
032  PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, 
033  WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) 
034  ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE 
035  POSSIBILITY OF SUCH DAMAGE.
036  
037 */
038
039
040
041import org.hl7.fhir.r5.model.ExpressionNode;
042import org.hl7.fhir.utilities.CommaSeparatedStringBuilder;
043import org.hl7.fhir.utilities.SourceLocation;
044import org.hl7.fhir.utilities.Utilities;
045
046// shared lexer for concrete syntaxes 
047// - FluentPath
048// - Mapping language
049
050public class FHIRLexer {
051  public class FHIRLexerException extends FHIRException {
052
053    private SourceLocation location;
054
055//    public FHIRLexerException() {
056//      super();
057//    }
058//
059//    public FHIRLexerException(String message, Throwable cause) {
060//      super(message, cause);
061//    }
062//
063//    public FHIRLexerException(String message) {
064//      super(message);
065//    }
066//
067//    public FHIRLexerException(Throwable cause) {
068//      super(cause);
069//    }
070
071    public FHIRLexerException(String message, SourceLocation location) {
072      super(message);
073      this.location = location;
074    }
075
076    public SourceLocation getLocation() {
077      return location;
078    }
079
080  }
081  private String source;
082  private int cursor;
083  private int currentStart;
084  private String current;
085  private List<String> comments = new ArrayList<>();
086  private SourceLocation currentLocation;
087  private SourceLocation currentStartLocation;
088  private int id;
089  private String name;
090  private boolean liquidMode; // in liquid mode, || terminates the expression and hands the parser back to the host
091  private SourceLocation commentLocation;
092
093  public FHIRLexer(String source, String name) throws FHIRLexerException {
094    this.source = source == null ? "" : source;
095    this.name = name == null ? "??" : name;
096    currentLocation = new SourceLocation(1, 1);
097    next();
098  }
099  public FHIRLexer(String source, int i) throws FHIRLexerException {
100    this.source = source;
101    this.cursor = i;
102    currentLocation = new SourceLocation(1, 1);
103    next();
104  }
105  public String getCurrent() {
106    return current;
107  }
108  public SourceLocation getCurrentLocation() {
109    return currentLocation;
110  }
111
112  public boolean isConstant() {
113    return FHIRPathConstant.isFHIRPathConstant(current);
114  }
115
116  public boolean isFixedName() {
117    return FHIRPathConstant.isFHIRPathFixedName(current);
118  }
119
120  public boolean isStringConstant() {
121    return FHIRPathConstant.isFHIRPathStringConstant(current);
122  }
123
124  public String take() throws FHIRLexerException {
125    String s = current;
126    next();
127    return s;
128  }
129
130  public int takeInt() throws FHIRLexerException {
131    String s = current;
132    if (!Utilities.isInteger(s))
133      throw error("Found "+current+" expecting an integer");
134    next();
135    return Integer.parseInt(s);
136  }
137
138  public boolean isToken() {
139    if (Utilities.noString(current))
140      return false;
141
142    if (current.startsWith("$"))
143      return true;
144
145    if (current.equals("*") || current.equals("**"))
146      return true;
147
148    if ((current.charAt(0) >= 'A' && current.charAt(0) <= 'Z') || (current.charAt(0) >= 'a' && current.charAt(0) <= 'z')) {
149      for (int i = 1; i < current.length(); i++) 
150        if (!( (current.charAt(1) >= 'A' && current.charAt(1) <= 'Z') || (current.charAt(1) >= 'a' && current.charAt(1) <= 'z') ||
151            (current.charAt(1) >= '0' && current.charAt(1) <= '9')))
152          return false;
153      return true;
154    }
155    return false;
156  }
157
158  public FHIRLexerException error(String msg) {
159    return error(msg, currentLocation.toString(), currentLocation);
160  }
161
162  public FHIRLexerException error(String msg, String location, SourceLocation loc) {
163    return new FHIRLexerException("Error @"+location+": "+msg, loc);
164  }
165
166  public void next() throws FHIRLexerException {
167    skipWhitespaceAndComments();
168    current = null;
169    currentStart = cursor;
170    currentStartLocation = currentLocation;
171    if (cursor < source.length()) {
172      char ch = source.charAt(cursor);
173      if (ch == '!' || ch == '>' || ch == '<' || ch == ':' || ch == '-' || ch == '=')  {
174        cursor++;
175        if (cursor < source.length() && (source.charAt(cursor) == '=' || source.charAt(cursor) == '~' || source.charAt(cursor) == '-') || (ch == '-' && source.charAt(cursor) == '>')) 
176          cursor++;
177        current = source.substring(currentStart, cursor);
178      } else if (ch == '.' ) {
179        cursor++;
180        if (cursor < source.length() && (source.charAt(cursor) == '.')) 
181          cursor++;
182        current = source.substring(currentStart, cursor);
183      } else if (ch >= '0' && ch <= '9') {
184          cursor++;
185        boolean dotted = false;
186        while (cursor < source.length() && ((source.charAt(cursor) >= '0' && source.charAt(cursor) <= '9') || (source.charAt(cursor) == '.') && !dotted)) {
187          if (source.charAt(cursor) == '.')
188            dotted = true;
189          cursor++;
190        }
191        if (source.charAt(cursor-1) == '.')
192          cursor--;
193        current = source.substring(currentStart, cursor);
194      }  else if ((ch >= 'a' && ch <= 'z') || (ch >= 'A' && ch <= 'Z')) {
195        while (cursor < source.length() && ((source.charAt(cursor) >= 'A' && source.charAt(cursor) <= 'Z') || (source.charAt(cursor) >= 'a' && source.charAt(cursor) <= 'z') || 
196            (source.charAt(cursor) >= '0' && source.charAt(cursor) <= '9') || source.charAt(cursor) == '_')) 
197          cursor++;
198        current = source.substring(currentStart, cursor);
199      } else if (ch == '%') {
200        cursor++;
201        if (cursor < source.length() && (source.charAt(cursor) == '`')) {
202          cursor++;
203          while (cursor < source.length() && (source.charAt(cursor) != '`'))
204            cursor++;
205          cursor++;
206        } else
207        while (cursor < source.length() && ((source.charAt(cursor) >= 'A' && source.charAt(cursor) <= 'Z') || (source.charAt(cursor) >= 'a' && source.charAt(cursor) <= 'z') || 
208            (source.charAt(cursor) >= '0' && source.charAt(cursor) <= '9') || source.charAt(cursor) == ':' || source.charAt(cursor) == '-'))
209          cursor++;
210        current = source.substring(currentStart, cursor);
211      } else if (ch == '/') {
212        cursor++;
213        if (cursor < source.length() && (source.charAt(cursor) == '/')) {
214          // this is en error - should already have been skipped
215          error("This shouldn't happen?");
216        }
217        current = source.substring(currentStart, cursor);
218      } else if (ch == '$') {
219        cursor++;
220        while (cursor < source.length() && (source.charAt(cursor) >= 'a' && source.charAt(cursor) <= 'z'))
221          cursor++;
222        current = source.substring(currentStart, cursor);
223      } else if (ch == '{') {
224        cursor++;
225        ch = source.charAt(cursor);
226        if (ch == '}')
227          cursor++;
228        current = source.substring(currentStart, cursor);
229      } else if (ch == '"') {
230        cursor++;
231        boolean escape = false;
232        while (cursor < source.length() && (escape || source.charAt(cursor) != '"')) {
233          if (escape)
234            escape = false;
235          else 
236            escape = (source.charAt(cursor) == '\\');
237          cursor++;
238        }
239        if (cursor == source.length())
240          throw error("Unterminated string");
241        cursor++;
242        current = "\""+source.substring(currentStart+1, cursor-1)+"\"";
243      } else if (ch == '`') {
244        cursor++;
245        boolean escape = false;
246        while (cursor < source.length() && (escape || source.charAt(cursor) != '`')) {
247          if (escape)
248            escape = false;
249          else 
250            escape = (source.charAt(cursor) == '\\');
251          cursor++;
252        }
253        if (cursor == source.length())
254          throw error("Unterminated string");
255        cursor++;
256        current = "`"+source.substring(currentStart+1, cursor-1)+"`";
257      } else if (ch == '\''){
258        cursor++;
259        char ech = ch;
260        boolean escape = false;
261        while (cursor < source.length() && (escape || source.charAt(cursor) != ech)) {
262          if (escape)
263            escape = false;
264          else 
265            escape = (source.charAt(cursor) == '\\');
266          cursor++;
267        }
268        if (cursor == source.length())
269          throw error("Unterminated string");
270        cursor++;
271        current = source.substring(currentStart, cursor);
272        if (ech == '\'')
273          current = "\'"+current.substring(1, current.length() - 1)+"\'";
274      } else if (ch == '`') {
275        cursor++;
276        boolean escape = false;
277        while (cursor < source.length() && (escape || source.charAt(cursor) != '`')) {
278          if (escape)
279            escape = false;
280          else 
281            escape = (source.charAt(cursor) == '\\');
282          cursor++;
283        }
284        if (cursor == source.length())
285          throw error("Unterminated string");
286        cursor++;
287        current = "`"+source.substring(currentStart+1, cursor-1)+"`";
288      } else if (ch == '|' && liquidMode) {
289        cursor++;
290        ch = source.charAt(cursor);
291        if (ch == '|')
292          cursor++;
293        current = source.substring(currentStart, cursor);
294      } else if (ch == '@'){
295        int start = cursor;
296        cursor++;
297        while (cursor < source.length() && isDateChar(source.charAt(cursor), start))
298          cursor++;          
299        current = source.substring(currentStart, cursor);
300      } else { // if CharInSet(ch, ['.', ',', '(', ')', '=', '$']) then
301        cursor++;
302        current = source.substring(currentStart, cursor);
303      }
304    }
305  }
306
307  private void skipWhitespaceAndComments() {
308    comments.clear();
309    boolean last13 = false;
310    boolean done = false;
311    while (cursor < source.length() && !done) {
312      if (cursor < source.length() -1 && "//".equals(source.substring(cursor, cursor+2))) {
313        commentLocation = currentLocation;
314        int start = cursor+2;
315        while (cursor < source.length() && !((source.charAt(cursor) == '\r') || source.charAt(cursor) == '\n')) { 
316          cursor++;        
317        }
318        comments.add(source.substring(start, cursor).trim());
319      } else if (cursor < source.length() - 1 && "/*".equals(source.substring(cursor, cursor+2))) {
320        commentLocation = currentLocation;
321        int start = cursor+2;
322        while (cursor < source.length() - 1 && !"*/".equals(source.substring(cursor, cursor+2))) { 
323          last13 = currentLocation.checkChar(source.charAt(cursor), last13);
324          cursor++;        
325        }
326        if (cursor >= source.length() -1) {
327          error("Unfinished comment");
328        } else {
329          comments.add(source.substring(start, cursor).trim());
330          cursor = cursor + 2;
331        }
332      } else if (Character.isWhitespace(source.charAt(cursor))) {
333        last13 = currentLocation.checkChar(source.charAt(cursor), last13);
334        cursor++;
335      } else {
336        done = true;
337      }
338    }
339  }
340  
341  private boolean isDateChar(char ch,int start) {
342    int eot = source.charAt(start+1) == 'T' ? 10 : 20;
343    
344    return ch == '-' || ch == ':' || ch == 'T' || ch == '+' || ch == 'Z' || Character.isDigit(ch) || (cursor-start == eot && ch == '.' && cursor < source.length()-1&& Character.isDigit(source.charAt(cursor+1)));
345  }
346  public boolean isOp() {
347    return ExpressionNode.Operation.fromCode(current) != null;
348  }
349  public boolean done() {
350    return currentStart >= source.length();
351  }
352  public int nextId() {
353    id++;
354    return id;
355  }
356  public SourceLocation getCurrentStartLocation() {
357    return currentStartLocation;
358  }
359  
360  // special case use
361  public void setCurrent(String current) {
362    this.current = current;
363  }
364
365  public boolean hasComments() {
366    return comments.size() > 0;
367  }
368
369  public List<String> getComments() {
370    return comments;
371  }
372
373  public String getAllComments() {
374    CommaSeparatedStringBuilder b = new CommaSeparatedStringBuilder("\r\n");
375    b.addAll(comments);
376    comments.clear();
377    return b.toString();
378  }
379
380  public String getFirstComment() {
381    if (hasComments()) {
382      String s = comments.get(0);
383      comments.remove(0);
384      return s;      
385    } else {
386      return null;
387    }
388  }
389
390  public boolean hasToken(String kw) {
391    return !done() && kw.equals(current);
392  }
393  public boolean hasToken(String... names) {
394    if (done()) 
395      return false;
396    for (String s : names)
397      if (s.equals(current))
398        return true;
399    return false;
400  }
401  
402  public void token(String kw) throws FHIRLexerException {
403    if (!kw.equals(current)) 
404      throw error("Found \""+current+"\" expecting \""+kw+"\"");
405    next();
406  }
407  
408  public String readConstant(String desc) throws FHIRLexerException {
409    if (!isStringConstant())
410      throw error("Found "+current+" expecting \"["+desc+"]\"");
411
412    return processConstant(take());
413  }
414
415  public String readFixedName(String desc) throws FHIRLexerException {
416    if (!isFixedName())
417      throw error("Found "+current+" expecting \"["+desc+"]\"");
418
419    return processFixedName(take());
420  }
421
422  public String processConstant(String s) throws FHIRLexerException {
423    StringBuilder b = new StringBuilder();
424    int i = 1;
425    while (i < s.length()-1) {
426      char ch = s.charAt(i);
427      if (ch == '\\') {
428        i++;
429        switch (s.charAt(i)) {
430        case 't': 
431          b.append('\t');
432          break;
433        case 'r':
434          b.append('\r');
435          break;
436        case 'n': 
437          b.append('\n');
438          break;
439        case 'f': 
440          b.append('\f');
441          break;
442        case '\'':
443          b.append('\'');
444          break;
445        case '"':
446          b.append('"');
447          break;
448        case '`':
449          b.append('`');
450          break;
451        case '\\': 
452          b.append('\\');
453          break;
454        case '/': 
455          b.append('/');
456          break;
457        case 'u':
458          i++;
459          int uc = Integer.parseInt(s.substring(i, i+4), 16);
460          b.append((char) uc);
461          i = i + 4;
462          break;
463        default:
464          throw new FHIRLexerException("Unknown character escape \\"+s.charAt(i), currentLocation);
465        }
466      } else {
467        b.append(ch);
468        i++;
469      }
470    }
471    return b.toString();
472  }
473  
474  public String processFixedName(String s) throws FHIRLexerException {
475    StringBuilder b = new StringBuilder();
476    int i = 1;
477    while (i < s.length()-1) {
478      char ch = s.charAt(i);
479      if (ch == '\\') {
480        i++;
481        switch (s.charAt(i)) {
482        case 't': 
483          b.append('\t');
484          break;
485        case 'r':
486          b.append('\r');
487          break;
488        case 'n': 
489          b.append('\n');
490          break;
491        case 'f': 
492          b.append('\f');
493          break;
494        case '\'':
495          b.append('\'');
496          break;
497        case '"':
498          b.append('"');
499          break;
500        case '\\': 
501          b.append('\\');
502          break;
503        case '/': 
504          b.append('/');
505          break;
506        case 'u':
507          i++;
508          int uc = Integer.parseInt(s.substring(i, i+4), 16);
509          b.append((char) uc);
510          i = i + 4;
511          break;
512        default:
513          throw new FHIRLexerException("Unknown character escape \\"+s.charAt(i), currentLocation);
514        }
515      } else {
516        b.append(ch);
517        i++;
518      }
519    }
520    return b.toString();
521  }
522
523  public void skipToken(String token) throws FHIRLexerException {
524    if (getCurrent().equals(token))
525      next();
526    
527  }
528  public String takeDottedToken() throws FHIRLexerException {
529    StringBuilder b = new StringBuilder();
530    b.append(take());
531    while (!done() && getCurrent().equals(".")) {
532      b.append(take());
533      b.append(take());
534    }
535    return b.toString();
536  }
537  
538  public int getCurrentStart() {
539    return currentStart;
540  }
541  public String getSource() {
542    return source;
543  }
544  public boolean isLiquidMode() {
545    return liquidMode;
546  }
547  public void setLiquidMode(boolean liquidMode) {
548    this.liquidMode = liquidMode;
549  }
550  public SourceLocation getCommentLocation() {
551    return this.commentLocation;
552  }
553
554}