001package ca.uhn.fhir.context.phonetic; 002 003/*- 004 * #%L 005 * HAPI FHIR - Core Library 006 * %% 007 * Copyright (C) 2014 - 2022 Smile CDR, Inc. 008 * %% 009 * Licensed under the Apache License, Version 2.0 (the "License"); 010 * you may not use this file except in compliance with the License. 011 * You may obtain a copy of the License at 012 * 013 * http://www.apache.org/licenses/LICENSE-2.0 014 * 015 * Unless required by applicable law or agreed to in writing, software 016 * distributed under the License is distributed on an "AS IS" BASIS, 017 * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 018 * See the License for the specific language governing permissions and 019 * limitations under the License. 020 * #L% 021 */ 022 023import org.apache.commons.codec.EncoderException; 024import org.apache.commons.codec.StringEncoder; 025import org.apache.commons.lang3.StringUtils; 026import org.slf4j.Logger; 027import org.slf4j.LoggerFactory; 028 029import java.util.StringJoiner; 030 031public class ApacheEncoder implements IPhoneticEncoder { 032 private static final Logger ourLog = LoggerFactory.getLogger(ApacheEncoder.class); 033 034 private final String myName; 035 private final StringEncoder myStringEncoder; 036 037 public ApacheEncoder(String theName, StringEncoder theStringEncoder) { 038 myName = theName; 039 myStringEncoder = theStringEncoder; 040 } 041 042 @Override 043 public String name() { 044 return myName; 045 } 046 047 @Override 048 public String encode(String theString) { 049 try { 050 // If the string contains a space, encode alpha parts separately so, for example, numbers are preserved in address lines. 051 if (theString.contains(" ")) { 052 return encodeStringWithSpaces(theString); 053 } 054 return myStringEncoder.encode(theString); 055 } catch (EncoderException e) { 056 ourLog.error("Failed to encode string " + theString, e); 057 return theString; 058 } 059 } 060 061 private String encodeStringWithSpaces(String theString) throws EncoderException { 062 StringJoiner joiner = new StringJoiner(" "); 063 064 // This sub-stack holds the alpha parts 065 StringJoiner alphaJoiner = new StringJoiner(" "); 066 067 for (String part : theString.split("[\\s\\W]+")) { 068 if (StringUtils.isAlpha(part)) { 069 alphaJoiner.add(part); 070 } else { 071 // Once we hit a non-alpha part, encode all the alpha parts together as a single string 072 // This is to allow encoders like METAPHONE to match Hans Peter to Hanspeter 073 alphaJoiner = encodeAlphaParts(joiner, alphaJoiner); 074 joiner.add(part); 075 } 076 } 077 encodeAlphaParts(joiner, alphaJoiner); 078 079 return joiner.toString(); 080 } 081 082 private StringJoiner encodeAlphaParts(StringJoiner theJoiner, StringJoiner theAlphaJoiner) throws EncoderException { 083 // Encode the alpha parts as a single string and then flush the alpha encoder 084 if (theAlphaJoiner.length() > 0) { 085 theJoiner.add(myStringEncoder.encode(theAlphaJoiner.toString())); 086 theAlphaJoiner = new StringJoiner(" "); 087 } 088 return theAlphaJoiner; 089 } 090}