001package ca.uhn.fhir.context.phonetic;
002
003/*-
004 * #%L
005 * HAPI FHIR - Core Library
006 * %%
007 * Copyright (C) 2014 - 2022 Smile CDR, Inc.
008 * %%
009 * Licensed under the Apache License, Version 2.0 (the "License");
010 * you may not use this file except in compliance with the License.
011 * You may obtain a copy of the License at
012 *
013 *      http://www.apache.org/licenses/LICENSE-2.0
014 *
015 * Unless required by applicable law or agreed to in writing, software
016 * distributed under the License is distributed on an "AS IS" BASIS,
017 * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
018 * See the License for the specific language governing permissions and
019 * limitations under the License.
020 * #L%
021 */
022
023import org.apache.commons.codec.EncoderException;
024import org.apache.commons.codec.StringEncoder;
025import org.apache.commons.lang3.StringUtils;
026import org.slf4j.Logger;
027import org.slf4j.LoggerFactory;
028
029import java.util.StringJoiner;
030
031public class ApacheEncoder implements IPhoneticEncoder {
032        private static final Logger ourLog = LoggerFactory.getLogger(ApacheEncoder.class);
033
034        private final String myName;
035        private final StringEncoder myStringEncoder;
036
037        public ApacheEncoder(String theName, StringEncoder theStringEncoder) {
038                myName = theName;
039                myStringEncoder = theStringEncoder;
040        }
041
042        @Override
043        public String name() {
044                return myName;
045        }
046
047        @Override
048        public String encode(String theString) {
049                try {
050                        // If the string contains a space, encode alpha parts separately so, for example, numbers are preserved in address lines.
051                        if (theString.contains(" ")) {
052                                return encodeStringWithSpaces(theString);
053                        }
054                        return myStringEncoder.encode(theString);
055                } catch (EncoderException e) {
056                        ourLog.error("Failed to encode string " + theString, e);
057                        return theString;
058                }
059        }
060
061        private String encodeStringWithSpaces(String theString) throws EncoderException {
062                StringJoiner joiner = new StringJoiner(" ");
063
064                // This sub-stack holds the alpha parts
065                StringJoiner alphaJoiner = new StringJoiner(" ");
066
067                for (String part : theString.split("[\\s\\W]+")) {
068                        if (StringUtils.isAlpha(part)) {
069                                alphaJoiner.add(part);
070                        } else {
071                                // Once we hit a non-alpha part, encode all the alpha parts together as a single string
072                                // This is to allow encoders like METAPHONE to match Hans Peter to Hanspeter
073                                alphaJoiner = encodeAlphaParts(joiner, alphaJoiner);
074                                joiner.add(part);
075                        }
076                }
077                encodeAlphaParts(joiner, alphaJoiner);
078
079                return joiner.toString();
080        }
081
082        private StringJoiner encodeAlphaParts(StringJoiner theJoiner, StringJoiner theAlphaJoiner) throws EncoderException {
083                // Encode the alpha parts as a single string and then flush the alpha encoder
084                if (theAlphaJoiner.length() > 0) {
085                        theJoiner.add(myStringEncoder.encode(theAlphaJoiner.toString()));
086                        theAlphaJoiner = new StringJoiner(" ");
087                }
088                return theAlphaJoiner;
089        }
090}