001package ca.uhn.fhir.util;
002
003import ca.uhn.fhir.context.FhirContext;
004import ca.uhn.fhir.context.RuntimeResourceDefinition;
005import ca.uhn.fhir.model.primitive.IdDt;
006import ca.uhn.fhir.parser.DataFormatException;
007import ca.uhn.fhir.rest.api.Constants;
008import ca.uhn.fhir.rest.server.exceptions.InvalidRequestException;
009import com.google.common.escape.Escaper;
010import com.google.common.net.PercentEscaper;
011import org.apache.commons.lang3.StringUtils;
012import org.apache.commons.text.StringSubstitutor;
013import org.apache.http.NameValuePair;
014import org.apache.http.client.utils.URLEncodedUtils;
015import org.apache.http.message.BasicNameValuePair;
016import org.hl7.fhir.instance.model.api.IPrimitiveType;
017
018import javax.annotation.Nonnull;
019import java.io.UnsupportedEncodingException;
020import java.net.MalformedURLException;
021import java.net.URI;
022import java.net.URISyntaxException;
023import java.net.URL;
024import java.net.URLDecoder;
025import java.util.ArrayList;
026import java.util.Collection;
027import java.util.HashMap;
028import java.util.List;
029import java.util.Map;
030import java.util.Map.Entry;
031import java.util.StringTokenizer;
032import java.util.stream.Collectors;
033
034import static org.apache.commons.lang3.StringUtils.defaultIfBlank;
035import static org.apache.commons.lang3.StringUtils.defaultString;
036import static org.apache.commons.lang3.StringUtils.endsWith;
037import static org.apache.commons.lang3.StringUtils.isBlank;
038
039/*
040 * #%L
041 * HAPI FHIR - Core Library
042 * %%
043 * Copyright (C) 2014 - 2022 Smile CDR, Inc.
044 * %%
045 * Licensed under the Apache License, Version 2.0 (the "License");
046 * you may not use this file except in compliance with the License.
047 * You may obtain a copy of the License at
048 *
049 *      http://www.apache.org/licenses/LICENSE-2.0
050 *
051 * Unless required by applicable law or agreed to in writing, software
052 * distributed under the License is distributed on an "AS IS" BASIS,
053 * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
054 * See the License for the specific language governing permissions and
055 * limitations under the License.
056 * #L%
057 */
058
059public class UrlUtil {
060        private static final org.slf4j.Logger ourLog = org.slf4j.LoggerFactory.getLogger(UrlUtil.class);
061
062        private static final String URL_FORM_PARAMETER_OTHER_SAFE_CHARS = "-_.*";
063        private static final Escaper PARAMETER_ESCAPER = new PercentEscaper(URL_FORM_PARAMETER_OTHER_SAFE_CHARS, false);
064
065        public static class UrlParts {
066                private String myParams;
067                private String myResourceId;
068                private String myResourceType;
069                private String myVersionId;
070
071                public String getParams() {
072                        return myParams;
073                }
074
075                public void setParams(String theParams) {
076                        myParams = theParams;
077                }
078
079                public String getResourceId() {
080                        return myResourceId;
081                }
082
083                public void setResourceId(String theResourceId) {
084                        myResourceId = theResourceId;
085                }
086
087                public String getResourceType() {
088                        return myResourceType;
089                }
090
091                public void setResourceType(String theResourceType) {
092                        myResourceType = theResourceType;
093                }
094
095                public String getVersionId() {
096                        return myVersionId;
097                }
098
099                public void setVersionId(String theVersionId) {
100                        myVersionId = theVersionId;
101                }
102        }
103
104        /**
105         * Resolve a relative URL - THIS METHOD WILL NOT FAIL but will log a warning and return theEndpoint if the input is invalid.
106         */
107        public static String constructAbsoluteUrl(String theBase, String theEndpoint) {
108                if (theEndpoint == null) {
109                        return null;
110                }
111                if (isAbsolute(theEndpoint)) {
112                        return theEndpoint;
113                }
114                if (theBase == null) {
115                        return theEndpoint;
116                }
117
118                try {
119                        return new URL(new URL(theBase), theEndpoint).toString();
120                } catch (MalformedURLException e) {
121                        ourLog.warn("Failed to resolve relative URL[" + theEndpoint + "] against absolute base[" + theBase + "]", e);
122                        return theEndpoint;
123                }
124        }
125
126        public static String constructRelativeUrl(String theParentExtensionUrl, String theExtensionUrl) {
127                if (theParentExtensionUrl == null) {
128                        return theExtensionUrl;
129                }
130                if (theExtensionUrl == null) {
131                        return null;
132                }
133
134                int parentLastSlashIdx = theParentExtensionUrl.lastIndexOf('/');
135                int childLastSlashIdx = theExtensionUrl.lastIndexOf('/');
136
137                if (parentLastSlashIdx == -1 || childLastSlashIdx == -1) {
138                        return theExtensionUrl;
139                }
140
141                if (parentLastSlashIdx != childLastSlashIdx) {
142                        return theExtensionUrl;
143                }
144
145                if (!theParentExtensionUrl.substring(0, parentLastSlashIdx).equals(theExtensionUrl.substring(0, parentLastSlashIdx))) {
146                        return theExtensionUrl;
147                }
148
149                if (theExtensionUrl.length() > parentLastSlashIdx) {
150                        return theExtensionUrl.substring(parentLastSlashIdx + 1);
151                }
152
153                return theExtensionUrl;
154        }
155
156        /**
157         * URL encode a value according to RFC 3986
158         * <p>
159         * This method is intended to be applied to an individual parameter
160         * name or value. For example, if you are creating the URL
161         * <code>http://example.com/fhir/Patient?key=føø</code>
162         * it would be appropriate to pass the string "føø" to this method,
163         * but not appropriate to pass the entire URL since characters
164         * such as "/" and "?" would also be escaped.
165         * </P>
166         */
167        public static String escapeUrlParam(String theUnescaped) {
168                if (theUnescaped == null) {
169                        return null;
170                }
171                return PARAMETER_ESCAPER.escape(theUnescaped);
172        }
173
174        /**
175         * Applies the same encodong as {@link #escapeUrlParam(String)} but against all
176         * values in a collection
177         */
178        public static List<String> escapeUrlParams(@Nonnull Collection<String> theUnescaped) {
179                return theUnescaped
180                        .stream()
181                        .map(t -> PARAMETER_ESCAPER.escape(t))
182                        .collect(Collectors.toList());
183        }
184
185
186        public static boolean isAbsolute(String theValue) {
187                String value = theValue.toLowerCase();
188                return value.startsWith("http://") || value.startsWith("https://");
189        }
190
191        public static boolean isNeedsSanitization(CharSequence theString) {
192                if (theString != null) {
193                        for (int i = 0; i < theString.length(); i++) {
194                                char nextChar = theString.charAt(i);
195                                switch (nextChar) {
196                                        case '\'':
197                                        case '"':
198                                        case '<':
199                                        case '>':
200                                        case '\n':
201                                        case '\r':
202                                                return true;
203                                }
204                                if (nextChar < ' ') {
205                                        return true;
206                                }
207                        }
208                }
209                return false;
210        }
211
212        public static boolean isValid(String theUrl) {
213                if (theUrl == null || theUrl.length() < 8) {
214                        return false;
215                }
216
217                String url = theUrl.toLowerCase();
218                if (url.charAt(0) != 'h') {
219                        return false;
220                }
221                if (url.charAt(1) != 't') {
222                        return false;
223                }
224                if (url.charAt(2) != 't') {
225                        return false;
226                }
227                if (url.charAt(3) != 'p') {
228                        return false;
229                }
230                int slashOffset;
231                if (url.charAt(4) == ':') {
232                        slashOffset = 5;
233                } else if (url.charAt(4) == 's') {
234                        if (url.charAt(5) != ':') {
235                                return false;
236                        }
237                        slashOffset = 6;
238                } else {
239                        return false;
240                }
241
242                if (url.charAt(slashOffset) != '/') {
243                        return false;
244                }
245                if (url.charAt(slashOffset + 1) != '/') {
246                        return false;
247                }
248
249                return true;
250        }
251
252        public static RuntimeResourceDefinition parseUrlResourceType(FhirContext theCtx, String theUrl) throws DataFormatException {
253                String url = theUrl;
254                int paramIndex = url.indexOf('?');
255
256                // Change pattern of "Observation/?param=foo" into "Observation?param=foo"
257                if (paramIndex > 0 && url.charAt(paramIndex - 1) == '/') {
258                        url = url.substring(0, paramIndex - 1) + url.substring(paramIndex);
259                        paramIndex--;
260                }
261
262                String resourceName = url.substring(0, paramIndex);
263                if (resourceName.contains("/")) {
264                        resourceName = resourceName.substring(resourceName.lastIndexOf('/') + 1);
265                }
266                return theCtx.getResourceDefinition(resourceName);
267        }
268
269        public static Map<String, String[]> parseQueryString(String theQueryString) {
270                HashMap<String, List<String>> map = new HashMap<>();
271                parseQueryString(theQueryString, map);
272                return toQueryStringMap(map);
273        }
274
275        private static void parseQueryString(String theQueryString, HashMap<String, List<String>> map) {
276                String query = defaultString(theQueryString);
277                if (query.startsWith("?")) {
278                        query = query.substring(1);
279                }
280
281
282                StringTokenizer tok = new StringTokenizer(query, "&");
283                while (tok.hasMoreTokens()) {
284                        String nextToken = tok.nextToken();
285                        if (isBlank(nextToken)) {
286                                continue;
287                        }
288
289                        int equalsIndex = nextToken.indexOf('=');
290                        String nextValue;
291                        String nextKey;
292                        if (equalsIndex == -1) {
293                                nextKey = nextToken;
294                                nextValue = "";
295                        } else {
296                                nextKey = nextToken.substring(0, equalsIndex);
297                                nextValue = nextToken.substring(equalsIndex + 1);
298                        }
299
300                        nextKey = unescape(nextKey);
301                        nextValue = unescape(nextValue);
302
303                        List<String> list = map.computeIfAbsent(nextKey, k -> new ArrayList<>());
304                        list.add(nextValue);
305                }
306        }
307
308        public static Map<String, String[]> parseQueryStrings(String... theQueryString) {
309                HashMap<String, List<String>> map = new HashMap<>();
310                for (String next : theQueryString) {
311                        parseQueryString(next, map);
312                }
313                return toQueryStringMap(map);
314        }
315
316        /**
317         * Normalizes canonical URLs for comparison. Trailing "/" is stripped,
318         * and any version identifiers or fragment hash is removed
319         */
320        public static String normalizeCanonicalUrlForComparison(String theUrl) {
321                String retVal;
322                try {
323                        retVal = new URI(theUrl).normalize().toString();
324                } catch (URISyntaxException e) {
325                        retVal = theUrl;
326                }
327                while (endsWith(retVal, "/")) {
328                        retVal = retVal.substring(0, retVal.length() - 1);
329                }
330                int hashOrPipeIndex = StringUtils.indexOfAny(retVal, '#', '|');
331                if (hashOrPipeIndex != -1) {
332                        retVal = retVal.substring(0, hashOrPipeIndex);
333                }
334                return retVal;
335        }
336
337        /**
338         * Parse a URL in one of the following forms:
339         * <ul>
340         * <li>[Resource Type]?[Search Params]
341         * <li>[Resource Type]/[Resource ID]
342         * <li>[Resource Type]/[Resource ID]/_history/[Version ID]
343         * </ul>
344         */
345        public static UrlParts parseUrl(String theUrl) {
346                String url = theUrl;
347                UrlParts retVal = new UrlParts();
348                if (url.startsWith("http")) {
349                        if (url.startsWith("/")) {
350                                url = url.substring(1);
351                        }
352
353                        int qmIdx = url.indexOf('?');
354                        if (qmIdx != -1) {
355                                retVal.setParams(defaultIfBlank(url.substring(qmIdx + 1), null));
356                                url = url.substring(0, qmIdx);
357                        }
358
359                        IdDt id = new IdDt(url);
360                        retVal.setResourceType(id.getResourceType());
361                        retVal.setResourceId(id.getIdPart());
362                        retVal.setVersionId(id.getVersionIdPart());
363                        return retVal;
364                }
365
366                int parsingStart = 0;
367                if (url.length() > 2) {
368                        if (url.charAt(0) == '/') {
369                                if (Character.isLetter(url.charAt(1))) {
370                                        parsingStart = 1;
371                                }
372                        }
373                }
374
375                if (url.length() > 1 && url.charAt(0) == '/' && Character.isLetter(url.charAt(1)) && url.contains("?")) {
376                        url = url.substring(1);
377                }
378                int nextStart = 0;
379                boolean nextIsHistory = false;
380
381                for (int idx = parsingStart; idx < url.length(); idx++) {
382                        char nextChar = url.charAt(idx);
383                        boolean atEnd = (idx + 1) == url.length();
384                        if (nextChar == '?' || nextChar == '/' || atEnd) {
385                                int endIdx = (atEnd && nextChar != '?') ? idx + 1 : idx;
386                                String nextSubstring = url.substring(nextStart, endIdx);
387                                if (retVal.getResourceType() == null) {
388                                        retVal.setResourceType(nextSubstring);
389                                } else if (retVal.getResourceId() == null) {
390                                        retVal.setResourceId(nextSubstring);
391                                } else if (nextIsHistory) {
392                                        retVal.setVersionId(nextSubstring);
393                                } else {
394                                        if (nextSubstring.equals(Constants.URL_TOKEN_HISTORY)) {
395                                                nextIsHistory = true;
396                                        } else {
397                                                throw new InvalidRequestException("Invalid FHIR resource URL: " + url);
398                                        }
399                                }
400                                if (nextChar == '?') {
401                                        if (url.length() > idx + 1) {
402                                                retVal.setParams(url.substring(idx + 1));
403                                        }
404                                        break;
405                                }
406                                nextStart = idx + 1;
407                        }
408                }
409
410                return retVal;
411
412        }
413
414        /**
415         * This method specifically HTML-encodes the &quot; and
416         * &lt; characters in order to prevent injection attacks
417         */
418        public static String sanitizeUrlPart(IPrimitiveType<?> theString) {
419                String retVal = null;
420                if (theString != null) {
421                        retVal = sanitizeUrlPart(theString.getValueAsString());
422                }
423                return retVal;
424        }
425
426        /**
427         * This method specifically HTML-encodes the &quot; and
428         * &lt; characters in order to prevent injection attacks.
429         * <p>
430         * The following characters are escaped:
431         * <ul>
432         *    <li>&apos;</li>
433         *    <li>&quot;</li>
434         *    <li>&lt;</li>
435         *    <li>&gt;</li>
436         *    <li>\n (newline)</li>
437         * </ul>
438         */
439        public static String sanitizeUrlPart(CharSequence theString) {
440                if (theString == null) {
441                        return null;
442                }
443
444                boolean needsSanitization = isNeedsSanitization(theString);
445
446                if (needsSanitization) {
447                        // Ok, we're sanitizing
448                        StringBuilder buffer = new StringBuilder(theString.length() + 10);
449                        for (int j = 0; j < theString.length(); j++) {
450
451                                char nextChar = theString.charAt(j);
452                                switch (nextChar) {
453                                        /*
454                                         * NB: If you add a constant here, you also need to add it
455                                         * to isNeedsSanitization()!!
456                                         */
457                                        case '\'':
458                                                buffer.append("&apos;");
459                                                break;
460                                        case '"':
461                                                buffer.append("&quot;");
462                                                break;
463                                        case '<':
464                                                buffer.append("&lt;");
465                                                break;
466                                        case '>':
467                                                buffer.append("&gt;");
468                                                break;
469                                        case '\n':
470                                                buffer.append("&#10;");
471                                                break;
472                                        case '\r':
473                                                buffer.append("&#13;");
474                                                break;
475                                        default:
476                                                if (nextChar >= ' ') {
477                                                        buffer.append(nextChar);
478                                                }
479                                                break;
480                                }
481
482                        } // for build escaped string
483
484                        return buffer.toString();
485                }
486
487                return theString.toString();
488        }
489
490        /**
491         * Applies the same logic as {@link #sanitizeUrlPart(CharSequence)} but against an array, returning an array with the
492         * same strings as the input but with sanitization applied
493         */
494        public static String[] sanitizeUrlPart(String[] theParameterValues) {
495                String[] retVal = null;
496                if (theParameterValues != null) {
497                        retVal = new String[theParameterValues.length];
498                        for (int i = 0; i < theParameterValues.length; i++) {
499                                retVal[i] = sanitizeUrlPart(theParameterValues[i]);
500                        }
501                }
502                return retVal;
503        }
504
505        private static Map<String, String[]> toQueryStringMap(HashMap<String, List<String>> map) {
506                HashMap<String, String[]> retVal = new HashMap<>();
507                for (Entry<String, List<String>> nextEntry : map.entrySet()) {
508                        retVal.put(nextEntry.getKey(), nextEntry.getValue().toArray(new String[0]));
509                }
510                return retVal;
511        }
512
513        public static String unescape(String theString) {
514                if (theString == null) {
515                        return null;
516                }
517                for (int i = 0; i < theString.length(); i++) {
518                        char nextChar = theString.charAt(i);
519                        if (nextChar == '%' || nextChar == '+') {
520                                try {
521                                        // Yes it would be nice to not use a string "UTF-8" but the equivalent
522                                        // method that takes Charset is JDK10+ only... sigh....
523                                        return URLDecoder.decode(theString, "UTF-8");
524                                } catch (UnsupportedEncodingException e) {
525                                        throw new Error("UTF-8 not supported, this shouldn't happen", e);
526                                }
527                        }
528                }
529                return theString;
530        }
531
532        public static List<NameValuePair> translateMatchUrl(String theMatchUrl) {
533                List<NameValuePair> parameters;
534                String matchUrl = theMatchUrl;
535                int questionMarkIndex = matchUrl.indexOf('?');
536                if (questionMarkIndex != -1) {
537                        matchUrl = matchUrl.substring(questionMarkIndex + 1);
538                }
539
540                final String[] searchList = new String[]{
541                        "+",
542                        "|",
543                        "=>=",
544                        "=<=",
545                        "=>",
546                        "=<"
547                };
548                final String[] replacementList = new String[]{
549                        "%2B",
550                        "%7C",
551                        "=%3E%3D",
552                        "=%3C%3D",
553                        "=%3E",
554                        "=%3C"
555                };
556                matchUrl = StringUtils.replaceEach(matchUrl, searchList, replacementList);
557                if (matchUrl.contains(" ")) {
558                        throw new InvalidRequestException("Failed to parse match URL[" + theMatchUrl + "] - URL is invalid (must not contain spaces)");
559                }
560
561                parameters = URLEncodedUtils.parse((matchUrl), Constants.CHARSET_UTF8, '&');
562
563                // One issue that has happened before is people putting a "+" sign into an email address in a match URL
564                // and having that turn into a " ". Since spaces are never appropriate for email addresses, let's just
565                // assume they really meant "+".
566                for (int i = 0; i < parameters.size(); i++) {
567                        NameValuePair next = parameters.get(i);
568                        if (next.getName().equals("email") && next.getValue().contains(" ")) {
569                                BasicNameValuePair newPair = new BasicNameValuePair(next.getName(), next.getValue().replace(' ', '+'));
570                                parameters.set(i, newPair);
571                        }
572                }
573
574                return parameters;
575        }
576}