001package org.hl7.fhir.utilities.xml;
002
003/*
004  Copyright (c) 2011+, HL7, Inc.
005  All rights reserved.
006  
007  Redistribution and use in source and binary forms, with or without modification, 
008  are permitted provided that the following conditions are met:
009    
010   * Redistributions of source code must retain the above copyright notice, this 
011     list of conditions and the following disclaimer.
012   * Redistributions in binary form must reproduce the above copyright notice, 
013     this list of conditions and the following disclaimer in the documentation 
014     and/or other materials provided with the distribution.
015   * Neither the name of HL7 nor the names of its contributors may be used to 
016     endorse or promote products derived from this software without specific 
017     prior written permission.
018  
019  THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" AND 
020  ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED 
021  WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE DISCLAIMED. 
022  IN NO EVENT SHALL THE COPYRIGHT HOLDER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, 
023  INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT 
024  NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR 
025  PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, 
026  WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) 
027  ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE 
028  POSSIBILITY OF SUCH DAMAGE.
029  
030 */
031
032
033
034import java.io.ByteArrayInputStream;
035import java.io.File;
036import java.io.FileInputStream;
037import java.io.IOException;
038import java.io.InputStream;
039import java.io.InputStreamReader;
040import java.io.OutputStream;
041import java.util.ArrayList;
042import java.util.List;
043import java.util.Set;
044
045import javax.xml.parsers.DocumentBuilder;
046import javax.xml.parsers.DocumentBuilderFactory;
047import javax.xml.parsers.ParserConfigurationException;
048import javax.xml.transform.Result;
049import javax.xml.transform.Source;
050import javax.xml.transform.Transformer;
051import javax.xml.transform.TransformerException;
052import javax.xml.transform.TransformerFactory;
053import javax.xml.transform.dom.DOMSource;
054import javax.xml.transform.stream.StreamResult;
055
056import org.hl7.fhir.exceptions.FHIRException;
057import org.hl7.fhir.utilities.Utilities;
058import org.w3c.dom.Attr;
059import org.w3c.dom.Document;
060import org.w3c.dom.Element;
061import org.w3c.dom.Node;
062import org.w3c.dom.ls.DOMImplementationLS;
063import org.w3c.dom.ls.LSSerializer;
064import org.xml.sax.SAXException;
065
066public class XMLUtil {
067
068        public static final String SPACE_CHAR = "\u00A0";
069
070  public static boolean isNMToken(String name) {
071                if (name == null)
072                        return false;
073                for (int i = 0; i < name.length(); i++) 
074                        if (!isNMTokenChar(name.charAt(i)))
075                                return false;   
076                return name.length() > 0;
077        }
078
079        public static boolean isNMTokenChar(char c) {
080                return isLetter(c) || isDigit(c) || c == '.' || c == '-' || c == '_' || c == ':' || isCombiningChar(c) || isExtender(c);
081        }
082
083        private static boolean isDigit(char c) {
084                return (c >= '\u0030' && c <= '\u0039') || (c >= '\u0660' && c <= '\u0669') || (c >= '\u06F0' && c <= '\u06F9') || 
085                        (c >= '\u0966' && c <= '\u096F') || (c >= '\u09E6' && c <= '\u09EF') || (c >= '\u0A66' && c <= '\u0A6F') || 
086                        (c >= '\u0AE6' && c <= '\u0AEF') || (c >= '\u0B66' && c <= '\u0B6F') || (c >= '\u0BE7' && c <= '\u0BEF') || 
087                        (c >= '\u0C66' && c <= '\u0C6F') || (c >= '\u0CE6' && c <= '\u0CEF') || (c >= '\u0D66' && c <= '\u0D6F') || 
088                        (c >= '\u0E50' && c <= '\u0E59') || (c >= '\u0ED0' && c <= '\u0ED9') || (c >= '\u0F20' && c <= '\u0F29');
089        }
090
091        private static boolean isCombiningChar(char c) {
092                return (c >= '\u0300' && c <= '\u0345') || (c >= '\u0360' && c <= '\u0361') || (c >= '\u0483' && c <= '\u0486') || 
093                        (c >= '\u0591' && c <= '\u05A1') || (c >= '\u05A3' && c <= '\u05B9') || (c >= '\u05BB' && c <= '\u05BD') || 
094                        c == '\u05BF' || (c >= '\u05C1' && c <= '\u05C2') || c == '\u05C4' || (c >= '\u064B' && c <= '\u0652') || 
095                        c == '\u0670' || (c >= '\u06D6' && c <= '\u06DC') || (c >= '\u06DD' && c <= '\u06DF') || (c >= '\u06E0' && c <= '\u06E4') || 
096                        (c >= '\u06E7' && c <= '\u06E8') || (c >= '\u06EA' && c <= '\u06ED') || (c >= '\u0901' && c <= '\u0903') || c == '\u093C' || 
097                        (c >= '\u093E' && c <= '\u094C') || c == '\u094D' || (c >= '\u0951' && c <= '\u0954') || (c >= '\u0962' && c <= '\u0963') || 
098                        (c >= '\u0981' && c <= '\u0983') || c == '\u09BC' || c == '\u09BE' || c == '\u09BF' || (c >= '\u09C0' && c <= '\u09C4') || 
099                        (c >= '\u09C7' && c <= '\u09C8') || (c >= '\u09CB' && c <= '\u09CD') || c == '\u09D7' || (c >= '\u09E2' && c <= '\u09E3') || 
100                        c == '\u0A02' || c == '\u0A3C' || c == '\u0A3E' || c == '\u0A3F' || (c >= '\u0A40' && c <= '\u0A42') || 
101                        (c >= '\u0A47' && c <= '\u0A48') || (c >= '\u0A4B' && c <= '\u0A4D') || (c >= '\u0A70' && c <= '\u0A71') || 
102                        (c >= '\u0A81' && c <= '\u0A83') || c == '\u0ABC' || (c >= '\u0ABE' && c <= '\u0AC5') || (c >= '\u0AC7' && c <= '\u0AC9') || 
103                        (c >= '\u0ACB' && c <= '\u0ACD') || (c >= '\u0B01' && c <= '\u0B03') || c == '\u0B3C' || (c >= '\u0B3E' && c <= '\u0B43') || 
104                        (c >= '\u0B47' && c <= '\u0B48') || (c >= '\u0B4B' && c <= '\u0B4D') || (c >= '\u0B56' && c <= '\u0B57') || 
105                        (c >= '\u0B82' && c <= '\u0B83') || (c >= '\u0BBE' && c <= '\u0BC2') || (c >= '\u0BC6' && c <= '\u0BC8') || 
106                        (c >= '\u0BCA' && c <= '\u0BCD') || c == '\u0BD7' || (c >= '\u0C01' && c <= '\u0C03') || (c >= '\u0C3E' && c <= '\u0C44') || 
107                        (c >= '\u0C46' && c <= '\u0C48') || (c >= '\u0C4A' && c <= '\u0C4D') || (c >= '\u0C55' && c <= '\u0C56') || 
108                        (c >= '\u0C82' && c <= '\u0C83') || (c >= '\u0CBE' && c <= '\u0CC4') || (c >= '\u0CC6' && c <= '\u0CC8') || 
109                        (c >= '\u0CCA' && c <= '\u0CCD') || (c >= '\u0CD5' && c <= '\u0CD6') || (c >= '\u0D02' && c <= '\u0D03') || 
110                        (c >= '\u0D3E' && c <= '\u0D43') || (c >= '\u0D46' && c <= '\u0D48') || (c >= '\u0D4A' && c <= '\u0D4D') || c == '\u0D57' || 
111                        c == '\u0E31' || (c >= '\u0E34' && c <= '\u0E3A') || (c >= '\u0E47' && c <= '\u0E4E') || c == '\u0EB1' || 
112                        (c >= '\u0EB4' && c <= '\u0EB9') || (c >= '\u0EBB' && c <= '\u0EBC') || (c >= '\u0EC8' && c <= '\u0ECD') || 
113                        (c >= '\u0F18' && c <= '\u0F19') || c == '\u0F35' || c == '\u0F37' || c == '\u0F39' || c == '\u0F3E' || c == '\u0F3F' || 
114                        (c >= '\u0F71' && c <= '\u0F84') || (c >= '\u0F86' && c <= '\u0F8B') || (c >= '\u0F90' && c <= '\u0F95') || c == '\u0F97' || 
115                        (c >= '\u0F99' && c <= '\u0FAD') || (c >= '\u0FB1' && c <= '\u0FB7') || c == '\u0FB9' || (c >= '\u20D0' && c <= '\u20DC') ||
116                        c == '\u20E1' || (c >= '\u302A' && c <= '\u302F') || c == '\u3099' || c == '\u309A';
117        }
118
119        private static boolean isExtender(char c) {
120                return c == '\u00B7' || c == '\u02D0' || c == '\u02D1' || c == '\u0387' || c == '\u0640' || c == '\u0E46' || 
121                        c == '\u0EC6' || c == '\u3005' || (c >= '\u3031' && c <= '\u3035') || (c >= '\u309D' && c <= '\u309E') || 
122                        (c >= '\u30FC' && c <= '\u30FE');
123        }
124
125        private static boolean isLetter(char c) {
126                return isBaseChar(c) || isIdeographic(c);
127        }
128
129        private static boolean isBaseChar(char c) {
130                return (c >= '\u0041' && c <= '\u005A') || (c >= '\u0061' && c <= '\u007A') || (c >= '\u00C0' && c <= '\u00D6') || 
131                        (c >= '\u00D8' && c <= '\u00F6') || (c >= '\u00F8' && c <= '\u00FF') || (c >= '\u0100' && c <= '\u0131') || 
132                        (c >= '\u0134' && c <= '\u013E') || (c >= '\u0141' && c <= '\u0148') || (c >= '\u014A' && c <= '\u017E') || 
133                        (c >= '\u0180' && c <= '\u01C3') || (c >= '\u01CD' && c <= '\u01F0') || (c >= '\u01F4' && c <= '\u01F5') || 
134                        (c >= '\u01FA' && c <= '\u0217') || (c >= '\u0250' && c <= '\u02A8') || (c >= '\u02BB' && c <= '\u02C1') || 
135                        c == '\u0386' || (c >= '\u0388' && c <= '\u038A') || c == '\u038C' || (c >= '\u038E' && c <= '\u03A1') || 
136                        (c >= '\u03A3' && c <= '\u03CE') || (c >= '\u03D0' && c <= '\u03D6') || c == '\u03DA' || c == '\u03DC' || c == '\u03DE' || 
137                        c == '\u03E0' || (c >= '\u03E2' && c <= '\u03F3') || (c >= '\u0401' && c <= '\u040C') || (c >= '\u040E' && c <= '\u044F') || 
138                        (c >= '\u0451' && c <= '\u045C') || (c >= '\u045E' && c <= '\u0481') || (c >= '\u0490' && c <= '\u04C4') || 
139                        (c >= '\u04C7' && c <= '\u04C8') || (c >= '\u04CB' && c <= '\u04CC') || (c >= '\u04D0' && c <= '\u04EB') || 
140                        (c >= '\u04EE' && c <= '\u04F5') || (c >= '\u04F8' && c <= '\u04F9') || (c >= '\u0531' && c <= '\u0556') || 
141                        c == '\u0559' || (c >= '\u0561' && c <= '\u0586') || (c >= '\u05D0' && c <= '\u05EA') || (c >= '\u05F0' && c <= '\u05F2') || 
142                        (c >= '\u0621' && c <= '\u063A') || (c >= '\u0641' && c <= '\u064A') || (c >= '\u0671' && c <= '\u06B7') || 
143                        (c >= '\u06BA' && c <= '\u06BE') || (c >= '\u06C0' && c <= '\u06CE') || (c >= '\u06D0' && c <= '\u06D3') || 
144                        c == '\u06D5' || (c >= '\u06E5' && c <= '\u06E6') || (c >= '\u0905' && c <= '\u0939') || c == '\u093D' || 
145                        (c >= '\u0958' && c <= '\u0961') || (c >= '\u0985' && c <= '\u098C') || (c >= '\u098F' && c <= '\u0990') || 
146                        (c >= '\u0993' && c <= '\u09A8') || (c >= '\u09AA' && c <= '\u09B0') || c == '\u09B2' || 
147                        (c >= '\u09B6' && c <= '\u09B9') || (c >= '\u09DC' && c <= '\u09DD') || (c >= '\u09DF' && c <= '\u09E1') || 
148                        (c >= '\u09F0' && c <= '\u09F1') || (c >= '\u0A05' && c <= '\u0A0A') || (c >= '\u0A0F' && c <= '\u0A10') || 
149                        (c >= '\u0A13' && c <= '\u0A28') || (c >= '\u0A2A' && c <= '\u0A30') || (c >= '\u0A32' && c <= '\u0A33') || 
150                        (c >= '\u0A35' && c <= '\u0A36') || (c >= '\u0A38' && c <= '\u0A39') || (c >= '\u0A59' && c <= '\u0A5C') || 
151                        c == '\u0A5E' || (c >= '\u0A72' && c <= '\u0A74') || (c >= '\u0A85' && c <= '\u0A8B') || c == '\u0A8D' || 
152                        (c >= '\u0A8F' && c <= '\u0A91') || (c >= '\u0A93' && c <= '\u0AA8') || (c >= '\u0AAA' && c <= '\u0AB0') || 
153                        (c >= '\u0AB2' && c <= '\u0AB3') || (c >= '\u0AB5' && c <= '\u0AB9') || c == '\u0ABD' || c == '\u0AE0' || 
154                        (c >= '\u0B05' && c <= '\u0B0C') || (c >= '\u0B0F' && c <= '\u0B10') || (c >= '\u0B13' && c <= '\u0B28') || 
155                        (c >= '\u0B2A' && c <= '\u0B30') || (c >= '\u0B32' && c <= '\u0B33') || (c >= '\u0B36' && c <= '\u0B39') || 
156                        c == '\u0B3D' || (c >= '\u0B5C' && c <= '\u0B5D') || (c >= '\u0B5F' && c <= '\u0B61') || 
157                        (c >= '\u0B85' && c <= '\u0B8A') || (c >= '\u0B8E' && c <= '\u0B90') || (c >= '\u0B92' && c <= '\u0B95') || 
158                        (c >= '\u0B99' && c <= '\u0B9A') || c == '\u0B9C' || (c >= '\u0B9E' && c <= '\u0B9F') || 
159                        (c >= '\u0BA3' && c <= '\u0BA4') || (c >= '\u0BA8' && c <= '\u0BAA') || (c >= '\u0BAE' && c <= '\u0BB5') || 
160                        (c >= '\u0BB7' && c <= '\u0BB9') || (c >= '\u0C05' && c <= '\u0C0C') || (c >= '\u0C0E' && c <= '\u0C10') || 
161                        (c >= '\u0C12' && c <= '\u0C28') || (c >= '\u0C2A' && c <= '\u0C33') || (c >= '\u0C35' && c <= '\u0C39') || 
162                        (c >= '\u0C60' && c <= '\u0C61') || (c >= '\u0C85' && c <= '\u0C8C') || (c >= '\u0C8E' && c <= '\u0C90') || 
163                        (c >= '\u0C92' && c <= '\u0CA8') || (c >= '\u0CAA' && c <= '\u0CB3') || (c >= '\u0CB5' && c <= '\u0CB9') || 
164                        c == '\u0CDE' || (c >= '\u0CE0' && c <= '\u0CE1') || (c >= '\u0D05' && c <= '\u0D0C') || 
165                        (c >= '\u0D0E' && c <= '\u0D10') || (c >= '\u0D12' && c <= '\u0D28') || (c >= '\u0D2A' && c <= '\u0D39') || 
166                        (c >= '\u0D60' && c <= '\u0D61') || (c >= '\u0E01' && c <= '\u0E2E') || c == '\u0E30' || 
167                        (c >= '\u0E32' && c <= '\u0E33') || (c >= '\u0E40' && c <= '\u0E45') || (c >= '\u0E81' && c <= '\u0E82') || 
168                        c == '\u0E84' || (c >= '\u0E87' && c <= '\u0E88') || c == '\u0E8A' || c == '\u0E8D' || (c >= '\u0E94' && c <= '\u0E97') || 
169                        (c >= '\u0E99' && c <= '\u0E9F') || (c >= '\u0EA1' && c <= '\u0EA3') || c == '\u0EA5' || c == '\u0EA7' || 
170                        (c >= '\u0EAA' && c <= '\u0EAB') || (c >= '\u0EAD' && c <= '\u0EAE') || c == '\u0EB0' || 
171                        (c >= '\u0EB2' && c <= '\u0EB3') || c == '\u0EBD' || (c >= '\u0EC0' && c <= '\u0EC4') || 
172                        (c >= '\u0F40' && c <= '\u0F47') || (c >= '\u0F49' && c <= '\u0F69') || (c >= '\u10A0' && c <= '\u10C5') || 
173                        (c >= '\u10D0' && c <= '\u10F6') || c == '\u1100' || (c >= '\u1102' && c <= '\u1103') || 
174                        (c >= '\u1105' && c <= '\u1107') || c == '\u1109' || (c >= '\u110B' && c <= '\u110C') || 
175                        (c >= '\u110E' && c <= '\u1112') || c == '\u113C' || c == '\u113E' || c == '\u1140' || c == '\u114C' || 
176                        c == '\u114E' || c == '\u1150' || (c >= '\u1154' && c <= '\u1155') || c == '\u1159' || 
177                        (c >= '\u115F' && c <= '\u1161') || c == '\u1163' || c == '\u1165' || c == '\u1167' || c == '\u1169' || 
178                        (c >= '\u116D' && c <= '\u116E') || (c >= '\u1172' && c <= '\u1173') || c == '\u1175' || 
179                        c == '\u119E' || c == '\u11A8' || c == '\u11AB' || (c >= '\u11AE' && c <= '\u11AF') || 
180                        (c >= '\u11B7' && c <= '\u11B8') || c == '\u11BA' || (c >= '\u11BC' && c <= '\u11C2') || 
181                        c == '\u11EB' || c == '\u11F0' || c == '\u11F9' || (c >= '\u1E00' && c <= '\u1E9B') || (c >= '\u1EA0' && c <= '\u1EF9') || 
182                        (c >= '\u1F00' && c <= '\u1F15') || (c >= '\u1F18' && c <= '\u1F1D') || (c >= '\u1F20' && c <= '\u1F45') || 
183                        (c >= '\u1F48' && c <= '\u1F4D') || (c >= '\u1F50' && c <= '\u1F57') || c == '\u1F59' || c == '\u1F5B' || c == '\u1F5D' || 
184                        (c >= '\u1F5F' && c <= '\u1F7D') || (c >= '\u1F80' && c <= '\u1FB4') || (c >= '\u1FB6' && c <= '\u1FBC') || 
185                        c == '\u1FBE' || (c >= '\u1FC2' && c <= '\u1FC4') || (c >= '\u1FC6' && c <= '\u1FCC') || 
186                        (c >= '\u1FD0' && c <= '\u1FD3') || (c >= '\u1FD6' && c <= '\u1FDB') || (c >= '\u1FE0' && c <= '\u1FEC') || 
187                        (c >= '\u1FF2' && c <= '\u1FF4') || (c >= '\u1FF6' && c <= '\u1FFC') || c == '\u2126' || 
188                        (c >= '\u212A' && c <= '\u212B') || c == '\u212E' || (c >= '\u2180' && c <= '\u2182') || 
189                        (c >= '\u3041' && c <= '\u3094') || (c >= '\u30A1' && c <= '\u30FA') || (c >= '\u3105' && c <= '\u312C') || 
190                        (c >= '\uAC00' && c <= '\uD7A3');
191        }
192
193        private static boolean isIdeographic(char c) {
194                return (c >= '\u4E00' && c <= '\u9FA5') || c == '\u3007' || (c >= '\u3021' && c <= '\u3029');
195        }
196
197        public static String determineEncoding(InputStream stream) throws IOException {
198                stream.mark(20000);
199                try {
200                        int b0 = stream.read();
201                        int b1 = stream.read();
202                        int b2 = stream.read();
203                        int b3 = stream.read();
204
205                        if (b0 == 0xFE && b1 == 0xFF)
206                                return "UTF-16BE";
207                        else if (b0 == 0xFF && b1 == 0xFE)
208                                return "UTF-16LE";
209                        else if (b0 == 0xEF && b1 == 0xBB && b2 == 0xBF )
210                                return "UTF-8";
211                        else if (b0 == 0x00 && b1 == 0x3C && b2 == 0x00 && b3 == 0x3F)
212                                return "UTF-16BE";
213                        else if (b0 == 0x3C && b1 == 0x00 && b2 == 0x3F && b3 == 0x00)
214                                return "UTF-16LE";
215                        else if (b0 == 0x3C && b1 == 0x3F && b2 == 0x78 && b3 == 0x6D) {
216//                              UTF-8, ISO 646, ASCII, some part of ISO 8859, Shift-JIS, EUC, or any other 7-bit, 8-bit, or mixed-width encoding 
217//                              which ensures that the characters of ASCII have their normal positions, width, and values; the actual encoding 
218//                              declaration must be read to detect which of these applies, but since all of these encodings use the same bit patterns 
219//                              for the relevant ASCII characters, the encoding declaration itself may be read reliably
220                                InputStreamReader rdr = new InputStreamReader(stream, "US-ASCII");
221                                String hdr = readFirstLine(rdr);
222                                return extractEncoding(hdr); 
223                        } else
224                                return null;
225                } finally {
226                        stream.reset();
227                }
228        }
229
230        private static String extractEncoding(String hdr) {
231                int i = hdr.indexOf("encoding=");
232                if (i == -1)
233                        return null;
234                hdr = hdr.substring(i+9);
235                char sep = hdr.charAt(0);
236                hdr = hdr.substring(1);
237                i = hdr.indexOf(sep);
238                if (i == -1)
239                        return null;
240                return hdr.substring(0, i);
241        }
242
243        private static String readFirstLine(InputStreamReader rdr) throws IOException {
244                char[] buf = new char[1];
245                StringBuffer bldr = new StringBuffer();
246                rdr.read(buf);
247                while (buf[0] != '>') {
248                        bldr.append(buf[0]);
249                        rdr.read(buf);
250                }
251                return bldr.toString();
252        }
253
254        
255    public static boolean charSetImpliesAscii(String charset) {
256                return charset.equals("ISO-8859-1") || charset.equals("US-ASCII");
257        }
258
259        
260        /**
261         * Converts the raw characters to XML escape characters.
262         * 
263         * @param rawContent
264         * @param charset Null when charset is not known, so we assume it's unicode
265         * @param isNoLines
266         * @return escape string
267         */
268        public static String escapeXML(String rawContent, String charset, boolean isNoLines) {
269                if (rawContent == null)
270                        return "";
271                else {
272                        StringBuffer sb = new StringBuffer();
273
274                        for (int i = 0; i < rawContent.length(); i++) {
275                                char ch = rawContent.charAt(i);
276                                // We don't escape ' because our code always spits out attributes surrounded by "", which means
277                                // it's not necessary to escape ' - and it's *much* less ugly and more bandwidth-efficient when we don't.
278                                if (ch == '&')
279                                        sb.append("&amp;");
280                                else if (ch == '"')
281                                        sb.append("&quot;");
282                                else if (ch == '<')
283                                        sb.append("&lt;");
284                                else if (ch == '>')
285                                        sb.append("&gt;");
286                                else if (ch > '~' && charset != null && charSetImpliesAscii(charset)) 
287                                        // TODO - why is hashcode the only way to get the unicode number for the character
288                                        // in jre 5.0?
289                                        sb.append("&#x"+Integer.toHexString(ch).toUpperCase()+";");
290                                else if (isNoLines) {
291                                        if (ch == '\r')
292                                                sb.append("&#xA;");
293                                        else if (ch != '\n')
294                                                sb.append(ch);
295                                }
296                                else
297                                        sb.append(ch);
298                        }
299                        return sb.toString();
300                }
301        }
302
303  public static Element getFirstChild(Element e) {
304    if (e == null)
305      return null;
306    Node n = e.getFirstChild();
307    while (n != null && n.getNodeType() != Node.ELEMENT_NODE)
308      n = n.getNextSibling();
309    return (Element) n;
310  }
311
312  public static Element getNamedChild(Element e, String name) {
313    Element c = getFirstChild(e);
314    while (c != null && !name.equals(c.getLocalName()) && !name.equals(c.getNodeName()))
315      c = getNextSibling(c);
316    return c;
317  }
318
319  public static Element getNamedChildByAttribute(Element e, String name, String nname, String nvalue) {
320    Element c = getFirstChild(e);
321    while (c != null && !((name.equals(c.getLocalName()) || name.equals(c.getNodeName())) && nvalue.equals(c.getAttribute(nname))))
322      c = getNextSibling(c);
323    return c;
324  }
325
326  public static Element getNextSibling(Element e) {
327    Node n = e.getNextSibling();
328    while (n != null && n.getNodeType() != Node.ELEMENT_NODE)
329      n = n.getNextSibling();
330    return (Element) n;
331  }
332
333  public static void getNamedChildren(Element e, String name, List<Element> set) {
334    Element c = getFirstChild(e);
335    while (c != null) {
336      if (name.equals(c.getLocalName()) || name.equals(c.getNodeName()) )
337        set.add(c);
338      c = getNextSibling(c);
339    }
340  }
341
342  public static List<Element> getNamedChildren(Element e, String name) {
343    List<Element> res = new ArrayList<Element>();
344    Element c = getFirstChild(e);
345    while (c != null) {
346      if (name.equals(c.getLocalName()) || name.equals(c.getNodeName()) )
347        res.add(c);
348      c = getNextSibling(c);
349    }
350    return res;
351  }
352
353  public static String htmlToXmlEscapedPlainText(Element r) {
354    StringBuilder s = new StringBuilder();
355    Node n = r.getFirstChild();
356    boolean ws = false;
357    while (n != null) {
358      if (n.getNodeType() == Node.TEXT_NODE) {
359        String t = n.getTextContent().trim();
360        if (Utilities.noString(t))
361          ws = true;
362        else {
363          if (ws)
364            s.append(" ");
365          ws = false;
366          s.append(t);
367        }
368      }
369      if (n.getNodeType() == Node.ELEMENT_NODE) {
370        if (ws)
371          s.append(" ");
372        ws = false;
373        s.append(htmlToXmlEscapedPlainText((Element) n));
374        if (r.getNodeName().equals("br") || r.getNodeName().equals("p"))
375          s.append("\r\n");
376      }
377      n = n.getNextSibling();      
378    }
379    return s.toString();
380  }
381
382  public static String htmlToXmlEscapedPlainText(String definition) throws ParserConfigurationException, SAXException, IOException  {
383    return htmlToXmlEscapedPlainText(parseToDom("<div>"+definition+"</div>").getDocumentElement());
384  }
385
386  public static String elementToString(Element el) {
387    if (el == null)
388      return "";
389    Document document = el.getOwnerDocument();
390    DOMImplementationLS domImplLS = (DOMImplementationLS) document
391        .getImplementation();
392    LSSerializer serializer = domImplLS.createLSSerializer();
393    return serializer.writeToString(el);
394  }
395
396  public static String getNamedChildValue(Element element, String name) {
397    Element e = getNamedChild(element, name);
398    return e == null ? null : e.getAttribute("value");
399  }
400
401  public static void setNamedChildValue(Element element, String name, String value) throws FHIRException  {
402    Element e = getNamedChild(element, name);
403    if (e == null)
404      throw new FHIRException("unable to find element "+name);
405    e.setAttribute("value", value);
406  }
407
408
409        public static void getNamedChildrenWithWildcard(Element focus, String name, List<Element> children) {
410    Element c = getFirstChild(focus);
411    while (c != null) {
412        String n = c.getLocalName() != null ? c.getLocalName() : c.getNodeName(); 
413      if (name.equals(n) || (name.endsWith("[x]") && n.startsWith(name.substring(0, name.length()-3))))
414        children.add(c);
415      c = getNextSibling(c);
416    }
417  }
418
419        public static void getNamedChildrenWithTails(Element focus, String name, List<Element> children, Set<String> typeTails) {
420    Element c = getFirstChild(focus);
421    while (c != null) {
422      String n = c.getLocalName() != null ? c.getLocalName() : c.getNodeName(); 
423      if (n.equals(name) || (!n.equals("responseCode") && (n.startsWith(name) && typeTails.contains(n.substring(name.length())))))
424        children.add(c);
425      c = getNextSibling(c);
426    }
427  }
428        
429  public static boolean hasNamedChild(Element e, String name) {
430    Element c = getFirstChild(e);
431    while (c != null && !name.equals(c.getLocalName()) && !name.equals(c.getNodeName()))
432      c = getNextSibling(c);
433    return c != null;
434  }
435
436  public static Document parseToDom(String content) throws ParserConfigurationException, SAXException, IOException  {
437    DocumentBuilderFactory factory = DocumentBuilderFactory.newInstance();
438    factory.setNamespaceAware(false);
439    DocumentBuilder builder = factory.newDocumentBuilder();
440    return builder.parse(new ByteArrayInputStream(content.getBytes()));
441  }
442
443  public static Document parseToDom(byte[] content) throws ParserConfigurationException, SAXException, IOException  {
444    DocumentBuilderFactory factory = DocumentBuilderFactory.newInstance();
445    factory.setNamespaceAware(false);
446    DocumentBuilder builder = factory.newDocumentBuilder();
447    return builder.parse(new ByteArrayInputStream(content));
448  }
449
450  public static Document parseToDom(byte[] content, boolean ns) throws ParserConfigurationException, SAXException, IOException  {
451    DocumentBuilderFactory factory = DocumentBuilderFactory.newInstance();
452    factory.setNamespaceAware(ns);
453    DocumentBuilder builder = factory.newDocumentBuilder();
454    return builder.parse(new ByteArrayInputStream(content));
455  }
456
457  public static Document parseFileToDom(String filename) throws ParserConfigurationException, SAXException, IOException  {
458    DocumentBuilderFactory factory = DocumentBuilderFactory.newInstance();
459    factory.setNamespaceAware(false);
460    DocumentBuilder builder = factory.newDocumentBuilder();
461    return builder.parse(new FileInputStream(filename));
462  }
463
464  public static Document parseFileToDom(String filename, boolean ns) throws ParserConfigurationException, SAXException, IOException  {
465    DocumentBuilderFactory factory = DocumentBuilderFactory.newInstance();
466    factory.setNamespaceAware(ns);
467    DocumentBuilder builder = factory.newDocumentBuilder();
468    return builder.parse(new FileInputStream(filename));
469  }
470
471  public static Element getLastChild(Element e) {
472    if (e == null)
473      return null;
474    Node n = e.getLastChild();
475    while (n != null && n.getNodeType() != Node.ELEMENT_NODE)
476      n = n.getPreviousSibling();
477    return (Element) n;
478  }
479
480  public static Element getPrevSibling(Element e) {
481    Node n = e.getPreviousSibling();
482    while (n != null && n.getNodeType() != Node.ELEMENT_NODE)
483      n = n.getPreviousSibling();
484    return (Element) n;
485  }
486
487  public static String getNamedChildAttribute(Element element, String name, String aname) {
488    Element e = getNamedChild(element, name);
489    return e == null ? null : e.getAttribute(aname);
490  }
491
492  public static void writeDomToFile(Document doc, String filename) throws TransformerException {
493    TransformerFactory transformerFactory = TransformerFactory.newInstance();
494    Transformer transformer = transformerFactory.newTransformer();
495    DOMSource source = new DOMSource(doc);
496    StreamResult streamResult =  new StreamResult(new File(filename));
497    transformer.transform(source, streamResult);    
498  }
499
500  public static String getXsiType(org.w3c.dom.Element element) {
501    Attr a = element.getAttributeNodeNS("http://www.w3.org/2001/XMLSchema-instance", "type");
502    return (a == null ? null : a.getTextContent());
503    
504  }
505
506        public static String getDirectText(org.w3c.dom.Element node) {
507    Node n = node.getFirstChild();
508    StringBuilder b = new StringBuilder();
509    while (n != null) {
510        if (n.getNodeType() == Node.TEXT_NODE) 
511                b.append(n.getTextContent());
512        n = n.getNextSibling();
513    }
514          return b.toString().trim();
515        }
516
517  public static void deleteByName(Element e, String name) {
518    List<Element> matches = getNamedChildren(e, name);
519    for (Element m : matches)
520      e.removeChild(m);    
521  }
522
523  public static void deleteAttr(Element e, String namespaceURI, String localName) {
524    if (e.hasAttributeNS(namespaceURI, localName))
525      e.removeAttributeNS(namespaceURI, localName);
526    
527  }
528
529  public static Node[] children(Element ed) {
530    Node[] res = new Node[ed.getChildNodes().getLength()];
531    for (int i = 0; i < ed.getChildNodes().getLength(); i++)
532      res[i] = ed.getChildNodes().item(i);
533    return res;
534  }
535
536  public static Element insertChild(Document doc, Element element, String name, String namespace, int indent) {
537    Node node = doc.createTextNode("\n"+Utilities.padLeft("", ' ', indent));
538    Element child = doc.createElementNS(namespace, name);
539    element.insertBefore(child, element.getFirstChild());
540    element.insertBefore(node, element.getFirstChild());
541    return child;
542  }
543
544  public static Element insertChild(Document doc, Element element, String name, String namespace, Node before, int indent) {
545    if (before == null) {
546      Node node = doc.createTextNode("\n"+Utilities.padLeft("", ' ', indent));
547      element.insertBefore(node, before);
548    }
549    Element child = doc.createElementNS(namespace, name);
550    element.insertBefore(child, before);
551    if (before != null) {
552      Node node = doc.createTextNode("\n"+Utilities.padLeft("", ' ', indent));
553      element.insertBefore(node, before);
554    }
555    return child;
556  }
557
558  public static void addTextTag(Document doc, Element element, String name, String namespace, String text, int indent) {
559    Node node = doc.createTextNode("\n"+Utilities.padLeft("", ' ', indent));
560    element.appendChild(node);
561    Element child = doc.createElementNS(namespace, name);
562    element.appendChild(child);
563    child.setAttribute("value", text);    
564  }
565
566  public static void saveToFile(Element root, OutputStream stream) throws TransformerException {
567    Transformer transformer = TransformerFactory.newInstance().newTransformer();
568    Result output = new StreamResult(stream);
569    Source input = new DOMSource(root);
570
571    transformer.transform(input, output);
572  }
573
574  public static void spacer(Document doc, Element element, int indent) {
575    Node node = doc.createTextNode("\n"+Utilities.padLeft("", ' ', indent));
576    element.appendChild(node);
577   
578  }
579
580  public static String getNamedChildText(Element element, String name) {
581    Element e = getNamedChild(element, name);
582    return e == null ? null : e.getTextContent();
583  }
584
585        
586}