001package ca.uhn.fhir.util; 002 003/*- 004 * #%L 005 * HAPI FHIR - Core Library 006 * %% 007 * Copyright (C) 2014 - 2021 Smile CDR, Inc. 008 * %% 009 * Licensed under the Apache License, Version 2.0 (the "License"); 010 * you may not use this file except in compliance with the License. 011 * You may obtain a copy of the License at 012 * 013 * http://www.apache.org/licenses/LICENSE-2.0 014 * 015 * Unless required by applicable law or agreed to in writing, software 016 * distributed under the License is distributed on an "AS IS" BASIS, 017 * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 018 * See the License for the specific language governing permissions and 019 * limitations under the License. 020 * #L% 021 */ 022 023import java.io.CharArrayWriter; 024import java.nio.charset.StandardCharsets; 025import java.text.Normalizer; 026import java.util.Arrays; 027 028public class StringUtil { 029 030 /** 031 * If a string ends with a given character, remove that character from the end of the string (as many times as it occurs at the end) 032 */ 033 public static String chompCharacter(String theInput, char theCharacter) { 034 String retVal = theInput; 035 while (retVal != null && retVal.length() > 0 && retVal.charAt(retVal.length() - 1) == theCharacter) { 036 retVal = retVal.substring(0, retVal.length() - 1); 037 } 038 return retVal; 039 } 040 041 public static String normalizeStringForSearchIndexing(String theString) { 042 if (theString == null) { 043 return null; 044 } 045 046 CharArrayWriter outBuffer = new CharArrayWriter(theString.length()); 047 048 /* 049 * The following block of code is used to strip out diacritical marks from latin script 050 * and also convert to upper case. E.g. "j?mes" becomes "JAMES". 051 * 052 * See http://www.unicode.org/charts/PDF/U0300.pdf for the logic 053 * behind stripping 0300-036F 054 * 055 * See #454 for an issue where we were completely stripping non latin characters 056 * See #832 for an issue where we normalize korean characters, which are decomposed 057 */ 058 String string = Normalizer.normalize(theString, Normalizer.Form.NFD); 059 for (int i = 0, n = string.length(); i < n; ++i) { 060 char c = string.charAt(i); 061 if (c >= '\u0300' && c <= '\u036F') { 062 continue; 063 } else { 064 outBuffer.append(c); 065 } 066 } 067 068 return new String(outBuffer.toCharArray()).toUpperCase(); 069 } 070 071 public static String toUtf8String(byte[] theBytes) { 072 byte[] bytes = theBytes; 073 if (theBytes.length >= 3) { 074 if (theBytes[0] == -17 && theBytes[1] == -69 && theBytes[2] == -65) { 075 bytes = Arrays.copyOfRange(theBytes, 3, theBytes.length); 076 } 077 } 078 return new String(bytes, StandardCharsets.UTF_8); 079 } 080 081 /** 082 * Gets the string prefix of the specified length. 083 * 084 * @param theString 085 * String to get the prefix from 086 * @param theCodePointCount 087 * Length of the prefix in code points 088 * @return 089 * Returns the string prefix of the specified number of codepoints. 090 */ 091 public static String left(String theString, int theCodePointCount) { 092 if (theString == null) { 093 return null; 094 } 095 096 if (theCodePointCount < 0) { 097 return ""; 098 } 099 100 // char count can only be bigger than the code point count 101 if (theString.length() <= theCodePointCount) { 102 return theString; 103 } 104 105 return theString.substring(0, theString.offsetByCodePoints(0, theCodePointCount)); 106 } 107 108}