001package ca.uhn.fhir.util; 002 003import ca.uhn.fhir.context.FhirContext; 004import ca.uhn.fhir.context.RuntimeResourceDefinition; 005import ca.uhn.fhir.model.primitive.IdDt; 006import ca.uhn.fhir.parser.DataFormatException; 007import ca.uhn.fhir.rest.api.Constants; 008import ca.uhn.fhir.rest.server.exceptions.InvalidRequestException; 009import com.google.common.escape.Escaper; 010import com.google.common.net.PercentEscaper; 011import org.apache.commons.lang3.StringUtils; 012import org.apache.commons.text.StringSubstitutor; 013import org.apache.http.NameValuePair; 014import org.apache.http.client.utils.URLEncodedUtils; 015import org.apache.http.message.BasicNameValuePair; 016import org.hl7.fhir.instance.model.api.IPrimitiveType; 017 018import javax.annotation.Nonnull; 019import java.io.UnsupportedEncodingException; 020import java.net.MalformedURLException; 021import java.net.URI; 022import java.net.URISyntaxException; 023import java.net.URL; 024import java.net.URLDecoder; 025import java.util.ArrayList; 026import java.util.Collection; 027import java.util.HashMap; 028import java.util.List; 029import java.util.Map; 030import java.util.Map.Entry; 031import java.util.StringTokenizer; 032import java.util.stream.Collectors; 033 034import static org.apache.commons.lang3.StringUtils.defaultIfBlank; 035import static org.apache.commons.lang3.StringUtils.defaultString; 036import static org.apache.commons.lang3.StringUtils.endsWith; 037import static org.apache.commons.lang3.StringUtils.isBlank; 038 039/* 040 * #%L 041 * HAPI FHIR - Core Library 042 * %% 043 * Copyright (C) 2014 - 2022 Smile CDR, Inc. 044 * %% 045 * Licensed under the Apache License, Version 2.0 (the "License"); 046 * you may not use this file except in compliance with the License. 047 * You may obtain a copy of the License at 048 * 049 * http://www.apache.org/licenses/LICENSE-2.0 050 * 051 * Unless required by applicable law or agreed to in writing, software 052 * distributed under the License is distributed on an "AS IS" BASIS, 053 * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 054 * See the License for the specific language governing permissions and 055 * limitations under the License. 056 * #L% 057 */ 058 059public class UrlUtil { 060 private static final org.slf4j.Logger ourLog = org.slf4j.LoggerFactory.getLogger(UrlUtil.class); 061 062 private static final String URL_FORM_PARAMETER_OTHER_SAFE_CHARS = "-_.*"; 063 private static final Escaper PARAMETER_ESCAPER = new PercentEscaper(URL_FORM_PARAMETER_OTHER_SAFE_CHARS, false); 064 065 public static class UrlParts { 066 private String myParams; 067 private String myResourceId; 068 private String myResourceType; 069 private String myVersionId; 070 071 public String getParams() { 072 return myParams; 073 } 074 075 public void setParams(String theParams) { 076 myParams = theParams; 077 } 078 079 public String getResourceId() { 080 return myResourceId; 081 } 082 083 public void setResourceId(String theResourceId) { 084 myResourceId = theResourceId; 085 } 086 087 public String getResourceType() { 088 return myResourceType; 089 } 090 091 public void setResourceType(String theResourceType) { 092 myResourceType = theResourceType; 093 } 094 095 public String getVersionId() { 096 return myVersionId; 097 } 098 099 public void setVersionId(String theVersionId) { 100 myVersionId = theVersionId; 101 } 102 } 103 104 /** 105 * Resolve a relative URL - THIS METHOD WILL NOT FAIL but will log a warning and return theEndpoint if the input is invalid. 106 */ 107 public static String constructAbsoluteUrl(String theBase, String theEndpoint) { 108 if (theEndpoint == null) { 109 return null; 110 } 111 if (isAbsolute(theEndpoint)) { 112 return theEndpoint; 113 } 114 if (theBase == null) { 115 return theEndpoint; 116 } 117 118 try { 119 return new URL(new URL(theBase), theEndpoint).toString(); 120 } catch (MalformedURLException e) { 121 ourLog.warn("Failed to resolve relative URL[" + theEndpoint + "] against absolute base[" + theBase + "]", e); 122 return theEndpoint; 123 } 124 } 125 126 public static String constructRelativeUrl(String theParentExtensionUrl, String theExtensionUrl) { 127 if (theParentExtensionUrl == null) { 128 return theExtensionUrl; 129 } 130 if (theExtensionUrl == null) { 131 return null; 132 } 133 134 int parentLastSlashIdx = theParentExtensionUrl.lastIndexOf('/'); 135 int childLastSlashIdx = theExtensionUrl.lastIndexOf('/'); 136 137 if (parentLastSlashIdx == -1 || childLastSlashIdx == -1) { 138 return theExtensionUrl; 139 } 140 141 if (parentLastSlashIdx != childLastSlashIdx) { 142 return theExtensionUrl; 143 } 144 145 if (!theParentExtensionUrl.substring(0, parentLastSlashIdx).equals(theExtensionUrl.substring(0, parentLastSlashIdx))) { 146 return theExtensionUrl; 147 } 148 149 if (theExtensionUrl.length() > parentLastSlashIdx) { 150 return theExtensionUrl.substring(parentLastSlashIdx + 1); 151 } 152 153 return theExtensionUrl; 154 } 155 156 /** 157 * URL encode a value according to RFC 3986 158 * <p> 159 * This method is intended to be applied to an individual parameter 160 * name or value. For example, if you are creating the URL 161 * <code>http://example.com/fhir/Patient?key=føø</code> 162 * it would be appropriate to pass the string "føø" to this method, 163 * but not appropriate to pass the entire URL since characters 164 * such as "/" and "?" would also be escaped. 165 * </P> 166 */ 167 public static String escapeUrlParam(String theUnescaped) { 168 if (theUnescaped == null) { 169 return null; 170 } 171 return PARAMETER_ESCAPER.escape(theUnescaped); 172 } 173 174 /** 175 * Applies the same encodong as {@link #escapeUrlParam(String)} but against all 176 * values in a collection 177 */ 178 public static List<String> escapeUrlParams(@Nonnull Collection<String> theUnescaped) { 179 return theUnescaped 180 .stream() 181 .map(t -> PARAMETER_ESCAPER.escape(t)) 182 .collect(Collectors.toList()); 183 } 184 185 186 public static boolean isAbsolute(String theValue) { 187 String value = theValue.toLowerCase(); 188 return value.startsWith("http://") || value.startsWith("https://"); 189 } 190 191 public static boolean isNeedsSanitization(CharSequence theString) { 192 if (theString != null) { 193 for (int i = 0; i < theString.length(); i++) { 194 char nextChar = theString.charAt(i); 195 switch (nextChar) { 196 case '\'': 197 case '"': 198 case '<': 199 case '>': 200 case '\n': 201 case '\r': 202 return true; 203 } 204 if (nextChar < ' ') { 205 return true; 206 } 207 } 208 } 209 return false; 210 } 211 212 public static boolean isValid(String theUrl) { 213 if (theUrl == null || theUrl.length() < 8) { 214 return false; 215 } 216 217 String url = theUrl.toLowerCase(); 218 if (url.charAt(0) != 'h') { 219 return false; 220 } 221 if (url.charAt(1) != 't') { 222 return false; 223 } 224 if (url.charAt(2) != 't') { 225 return false; 226 } 227 if (url.charAt(3) != 'p') { 228 return false; 229 } 230 int slashOffset; 231 if (url.charAt(4) == ':') { 232 slashOffset = 5; 233 } else if (url.charAt(4) == 's') { 234 if (url.charAt(5) != ':') { 235 return false; 236 } 237 slashOffset = 6; 238 } else { 239 return false; 240 } 241 242 if (url.charAt(slashOffset) != '/') { 243 return false; 244 } 245 if (url.charAt(slashOffset + 1) != '/') { 246 return false; 247 } 248 249 return true; 250 } 251 252 public static RuntimeResourceDefinition parseUrlResourceType(FhirContext theCtx, String theUrl) throws DataFormatException { 253 String url = theUrl; 254 int paramIndex = url.indexOf('?'); 255 256 // Change pattern of "Observation/?param=foo" into "Observation?param=foo" 257 if (paramIndex > 0 && url.charAt(paramIndex - 1) == '/') { 258 url = url.substring(0, paramIndex - 1) + url.substring(paramIndex); 259 paramIndex--; 260 } 261 262 String resourceName = url.substring(0, paramIndex); 263 if (resourceName.contains("/")) { 264 resourceName = resourceName.substring(resourceName.lastIndexOf('/') + 1); 265 } 266 return theCtx.getResourceDefinition(resourceName); 267 } 268 269 public static Map<String, String[]> parseQueryString(String theQueryString) { 270 HashMap<String, List<String>> map = new HashMap<>(); 271 parseQueryString(theQueryString, map); 272 return toQueryStringMap(map); 273 } 274 275 private static void parseQueryString(String theQueryString, HashMap<String, List<String>> map) { 276 String query = defaultString(theQueryString); 277 if (query.startsWith("?")) { 278 query = query.substring(1); 279 } 280 281 282 StringTokenizer tok = new StringTokenizer(query, "&"); 283 while (tok.hasMoreTokens()) { 284 String nextToken = tok.nextToken(); 285 if (isBlank(nextToken)) { 286 continue; 287 } 288 289 int equalsIndex = nextToken.indexOf('='); 290 String nextValue; 291 String nextKey; 292 if (equalsIndex == -1) { 293 nextKey = nextToken; 294 nextValue = ""; 295 } else { 296 nextKey = nextToken.substring(0, equalsIndex); 297 nextValue = nextToken.substring(equalsIndex + 1); 298 } 299 300 nextKey = unescape(nextKey); 301 nextValue = unescape(nextValue); 302 303 List<String> list = map.computeIfAbsent(nextKey, k -> new ArrayList<>()); 304 list.add(nextValue); 305 } 306 } 307 308 public static Map<String, String[]> parseQueryStrings(String... theQueryString) { 309 HashMap<String, List<String>> map = new HashMap<>(); 310 for (String next : theQueryString) { 311 parseQueryString(next, map); 312 } 313 return toQueryStringMap(map); 314 } 315 316 /** 317 * Normalizes canonical URLs for comparison. Trailing "/" is stripped, 318 * and any version identifiers or fragment hash is removed 319 */ 320 public static String normalizeCanonicalUrlForComparison(String theUrl) { 321 String retVal; 322 try { 323 retVal = new URI(theUrl).normalize().toString(); 324 } catch (URISyntaxException e) { 325 retVal = theUrl; 326 } 327 while (endsWith(retVal, "/")) { 328 retVal = retVal.substring(0, retVal.length() - 1); 329 } 330 int hashOrPipeIndex = StringUtils.indexOfAny(retVal, '#', '|'); 331 if (hashOrPipeIndex != -1) { 332 retVal = retVal.substring(0, hashOrPipeIndex); 333 } 334 return retVal; 335 } 336 337 /** 338 * Parse a URL in one of the following forms: 339 * <ul> 340 * <li>[Resource Type]?[Search Params] 341 * <li>[Resource Type]/[Resource ID] 342 * <li>[Resource Type]/[Resource ID]/_history/[Version ID] 343 * </ul> 344 */ 345 public static UrlParts parseUrl(String theUrl) { 346 String url = theUrl; 347 UrlParts retVal = new UrlParts(); 348 if (url.startsWith("http")) { 349 if (url.startsWith("/")) { 350 url = url.substring(1); 351 } 352 353 int qmIdx = url.indexOf('?'); 354 if (qmIdx != -1) { 355 retVal.setParams(defaultIfBlank(url.substring(qmIdx + 1), null)); 356 url = url.substring(0, qmIdx); 357 } 358 359 IdDt id = new IdDt(url); 360 retVal.setResourceType(id.getResourceType()); 361 retVal.setResourceId(id.getIdPart()); 362 retVal.setVersionId(id.getVersionIdPart()); 363 return retVal; 364 } 365 366 int parsingStart = 0; 367 if (url.length() > 2) { 368 if (url.charAt(0) == '/') { 369 if (Character.isLetter(url.charAt(1))) { 370 parsingStart = 1; 371 } 372 } 373 } 374 375 if (url.length() > 1 && url.charAt(0) == '/' && Character.isLetter(url.charAt(1)) && url.contains("?")) { 376 url = url.substring(1); 377 } 378 int nextStart = 0; 379 boolean nextIsHistory = false; 380 381 for (int idx = parsingStart; idx < url.length(); idx++) { 382 char nextChar = url.charAt(idx); 383 boolean atEnd = (idx + 1) == url.length(); 384 if (nextChar == '?' || nextChar == '/' || atEnd) { 385 int endIdx = (atEnd && nextChar != '?') ? idx + 1 : idx; 386 String nextSubstring = url.substring(nextStart, endIdx); 387 if (retVal.getResourceType() == null) { 388 retVal.setResourceType(nextSubstring); 389 } else if (retVal.getResourceId() == null) { 390 retVal.setResourceId(nextSubstring); 391 } else if (nextIsHistory) { 392 retVal.setVersionId(nextSubstring); 393 } else { 394 if (nextSubstring.equals(Constants.URL_TOKEN_HISTORY)) { 395 nextIsHistory = true; 396 } else { 397 throw new InvalidRequestException("Invalid FHIR resource URL: " + url); 398 } 399 } 400 if (nextChar == '?') { 401 if (url.length() > idx + 1) { 402 retVal.setParams(url.substring(idx + 1)); 403 } 404 break; 405 } 406 nextStart = idx + 1; 407 } 408 } 409 410 return retVal; 411 412 } 413 414 /** 415 * This method specifically HTML-encodes the " and 416 * < characters in order to prevent injection attacks 417 */ 418 public static String sanitizeUrlPart(IPrimitiveType<?> theString) { 419 String retVal = null; 420 if (theString != null) { 421 retVal = sanitizeUrlPart(theString.getValueAsString()); 422 } 423 return retVal; 424 } 425 426 /** 427 * This method specifically HTML-encodes the " and 428 * < characters in order to prevent injection attacks. 429 * <p> 430 * The following characters are escaped: 431 * <ul> 432 * <li>'</li> 433 * <li>"</li> 434 * <li><</li> 435 * <li>></li> 436 * <li>\n (newline)</li> 437 * </ul> 438 */ 439 public static String sanitizeUrlPart(CharSequence theString) { 440 if (theString == null) { 441 return null; 442 } 443 444 boolean needsSanitization = isNeedsSanitization(theString); 445 446 if (needsSanitization) { 447 // Ok, we're sanitizing 448 StringBuilder buffer = new StringBuilder(theString.length() + 10); 449 for (int j = 0; j < theString.length(); j++) { 450 451 char nextChar = theString.charAt(j); 452 switch (nextChar) { 453 /* 454 * NB: If you add a constant here, you also need to add it 455 * to isNeedsSanitization()!! 456 */ 457 case '\'': 458 buffer.append("'"); 459 break; 460 case '"': 461 buffer.append("""); 462 break; 463 case '<': 464 buffer.append("<"); 465 break; 466 case '>': 467 buffer.append(">"); 468 break; 469 case '\n': 470 buffer.append(" "); 471 break; 472 case '\r': 473 buffer.append(" "); 474 break; 475 default: 476 if (nextChar >= ' ') { 477 buffer.append(nextChar); 478 } 479 break; 480 } 481 482 } // for build escaped string 483 484 return buffer.toString(); 485 } 486 487 return theString.toString(); 488 } 489 490 /** 491 * Applies the same logic as {@link #sanitizeUrlPart(CharSequence)} but against an array, returning an array with the 492 * same strings as the input but with sanitization applied 493 */ 494 public static String[] sanitizeUrlPart(String[] theParameterValues) { 495 String[] retVal = null; 496 if (theParameterValues != null) { 497 retVal = new String[theParameterValues.length]; 498 for (int i = 0; i < theParameterValues.length; i++) { 499 retVal[i] = sanitizeUrlPart(theParameterValues[i]); 500 } 501 } 502 return retVal; 503 } 504 505 private static Map<String, String[]> toQueryStringMap(HashMap<String, List<String>> map) { 506 HashMap<String, String[]> retVal = new HashMap<>(); 507 for (Entry<String, List<String>> nextEntry : map.entrySet()) { 508 retVal.put(nextEntry.getKey(), nextEntry.getValue().toArray(new String[0])); 509 } 510 return retVal; 511 } 512 513 public static String unescape(String theString) { 514 if (theString == null) { 515 return null; 516 } 517 for (int i = 0; i < theString.length(); i++) { 518 char nextChar = theString.charAt(i); 519 if (nextChar == '%' || nextChar == '+') { 520 try { 521 // Yes it would be nice to not use a string "UTF-8" but the equivalent 522 // method that takes Charset is JDK10+ only... sigh.... 523 return URLDecoder.decode(theString, "UTF-8"); 524 } catch (UnsupportedEncodingException e) { 525 throw new Error("UTF-8 not supported, this shouldn't happen", e); 526 } 527 } 528 } 529 return theString; 530 } 531 532 public static List<NameValuePair> translateMatchUrl(String theMatchUrl) { 533 List<NameValuePair> parameters; 534 String matchUrl = theMatchUrl; 535 int questionMarkIndex = matchUrl.indexOf('?'); 536 if (questionMarkIndex != -1) { 537 matchUrl = matchUrl.substring(questionMarkIndex + 1); 538 } 539 540 final String[] searchList = new String[]{ 541 "+", 542 "|", 543 "=>=", 544 "=<=", 545 "=>", 546 "=<" 547 }; 548 final String[] replacementList = new String[]{ 549 "%2B", 550 "%7C", 551 "=%3E%3D", 552 "=%3C%3D", 553 "=%3E", 554 "=%3C" 555 }; 556 matchUrl = StringUtils.replaceEach(matchUrl, searchList, replacementList); 557 if (matchUrl.contains(" ")) { 558 throw new InvalidRequestException("Failed to parse match URL[" + theMatchUrl + "] - URL is invalid (must not contain spaces)"); 559 } 560 561 parameters = URLEncodedUtils.parse((matchUrl), Constants.CHARSET_UTF8, '&'); 562 563 // One issue that has happened before is people putting a "+" sign into an email address in a match URL 564 // and having that turn into a " ". Since spaces are never appropriate for email addresses, let's just 565 // assume they really meant "+". 566 for (int i = 0; i < parameters.size(); i++) { 567 NameValuePair next = parameters.get(i); 568 if (next.getName().equals("email") && next.getValue().contains(" ")) { 569 BasicNameValuePair newPair = new BasicNameValuePair(next.getName(), next.getValue().replace(' ', '+')); 570 parameters.set(i, newPair); 571 } 572 } 573 574 return parameters; 575 } 576}