001package org.hl7.fhir.utilities.xml; 002 003/* 004 Copyright (c) 2011+, HL7, Inc. 005 All rights reserved. 006 007 Redistribution and use in source and binary forms, with or without modification, 008 are permitted provided that the following conditions are met: 009 010 * Redistributions of source code must retain the above copyright notice, this 011 list of conditions and the following disclaimer. 012 * Redistributions in binary form must reproduce the above copyright notice, 013 this list of conditions and the following disclaimer in the documentation 014 and/or other materials provided with the distribution. 015 * Neither the name of HL7 nor the names of its contributors may be used to 016 endorse or promote products derived from this software without specific 017 prior written permission. 018 019 THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" AND 020 ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED 021 WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE DISCLAIMED. 022 IN NO EVENT SHALL THE COPYRIGHT HOLDER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, 023 INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT 024 NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR 025 PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, 026 WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) 027 ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE 028 POSSIBILITY OF SUCH DAMAGE. 029 030 */ 031 032 033 034import java.io.ByteArrayInputStream; 035import java.io.File; 036import java.io.FileInputStream; 037import java.io.IOException; 038import java.io.InputStream; 039import java.io.InputStreamReader; 040import java.io.OutputStream; 041import java.util.ArrayList; 042import java.util.List; 043import java.util.Set; 044 045import javax.xml.parsers.DocumentBuilder; 046import javax.xml.parsers.DocumentBuilderFactory; 047import javax.xml.parsers.ParserConfigurationException; 048import javax.xml.transform.Result; 049import javax.xml.transform.Source; 050import javax.xml.transform.Transformer; 051import javax.xml.transform.TransformerException; 052import javax.xml.transform.TransformerFactory; 053import javax.xml.transform.dom.DOMSource; 054import javax.xml.transform.stream.StreamResult; 055 056import org.hl7.fhir.exceptions.FHIRException; 057import org.hl7.fhir.utilities.Utilities; 058import org.w3c.dom.Attr; 059import org.w3c.dom.Document; 060import org.w3c.dom.Element; 061import org.w3c.dom.Node; 062import org.w3c.dom.ls.DOMImplementationLS; 063import org.w3c.dom.ls.LSSerializer; 064import org.xml.sax.SAXException; 065 066public class XMLUtil { 067 068 public static final String SPACE_CHAR = "\u00A0"; 069 070 public static boolean isNMToken(String name) { 071 if (name == null) 072 return false; 073 for (int i = 0; i < name.length(); i++) 074 if (!isNMTokenChar(name.charAt(i))) 075 return false; 076 return name.length() > 0; 077 } 078 079 public static boolean isNMTokenChar(char c) { 080 return isLetter(c) || isDigit(c) || c == '.' || c == '-' || c == '_' || c == ':' || isCombiningChar(c) || isExtender(c); 081 } 082 083 private static boolean isDigit(char c) { 084 return (c >= '\u0030' && c <= '\u0039') || (c >= '\u0660' && c <= '\u0669') || (c >= '\u06F0' && c <= '\u06F9') || 085 (c >= '\u0966' && c <= '\u096F') || (c >= '\u09E6' && c <= '\u09EF') || (c >= '\u0A66' && c <= '\u0A6F') || 086 (c >= '\u0AE6' && c <= '\u0AEF') || (c >= '\u0B66' && c <= '\u0B6F') || (c >= '\u0BE7' && c <= '\u0BEF') || 087 (c >= '\u0C66' && c <= '\u0C6F') || (c >= '\u0CE6' && c <= '\u0CEF') || (c >= '\u0D66' && c <= '\u0D6F') || 088 (c >= '\u0E50' && c <= '\u0E59') || (c >= '\u0ED0' && c <= '\u0ED9') || (c >= '\u0F20' && c <= '\u0F29'); 089 } 090 091 private static boolean isCombiningChar(char c) { 092 return (c >= '\u0300' && c <= '\u0345') || (c >= '\u0360' && c <= '\u0361') || (c >= '\u0483' && c <= '\u0486') || 093 (c >= '\u0591' && c <= '\u05A1') || (c >= '\u05A3' && c <= '\u05B9') || (c >= '\u05BB' && c <= '\u05BD') || 094 c == '\u05BF' || (c >= '\u05C1' && c <= '\u05C2') || c == '\u05C4' || (c >= '\u064B' && c <= '\u0652') || 095 c == '\u0670' || (c >= '\u06D6' && c <= '\u06DC') || (c >= '\u06DD' && c <= '\u06DF') || (c >= '\u06E0' && c <= '\u06E4') || 096 (c >= '\u06E7' && c <= '\u06E8') || (c >= '\u06EA' && c <= '\u06ED') || (c >= '\u0901' && c <= '\u0903') || c == '\u093C' || 097 (c >= '\u093E' && c <= '\u094C') || c == '\u094D' || (c >= '\u0951' && c <= '\u0954') || (c >= '\u0962' && c <= '\u0963') || 098 (c >= '\u0981' && c <= '\u0983') || c == '\u09BC' || c == '\u09BE' || c == '\u09BF' || (c >= '\u09C0' && c <= '\u09C4') || 099 (c >= '\u09C7' && c <= '\u09C8') || (c >= '\u09CB' && c <= '\u09CD') || c == '\u09D7' || (c >= '\u09E2' && c <= '\u09E3') || 100 c == '\u0A02' || c == '\u0A3C' || c == '\u0A3E' || c == '\u0A3F' || (c >= '\u0A40' && c <= '\u0A42') || 101 (c >= '\u0A47' && c <= '\u0A48') || (c >= '\u0A4B' && c <= '\u0A4D') || (c >= '\u0A70' && c <= '\u0A71') || 102 (c >= '\u0A81' && c <= '\u0A83') || c == '\u0ABC' || (c >= '\u0ABE' && c <= '\u0AC5') || (c >= '\u0AC7' && c <= '\u0AC9') || 103 (c >= '\u0ACB' && c <= '\u0ACD') || (c >= '\u0B01' && c <= '\u0B03') || c == '\u0B3C' || (c >= '\u0B3E' && c <= '\u0B43') || 104 (c >= '\u0B47' && c <= '\u0B48') || (c >= '\u0B4B' && c <= '\u0B4D') || (c >= '\u0B56' && c <= '\u0B57') || 105 (c >= '\u0B82' && c <= '\u0B83') || (c >= '\u0BBE' && c <= '\u0BC2') || (c >= '\u0BC6' && c <= '\u0BC8') || 106 (c >= '\u0BCA' && c <= '\u0BCD') || c == '\u0BD7' || (c >= '\u0C01' && c <= '\u0C03') || (c >= '\u0C3E' && c <= '\u0C44') || 107 (c >= '\u0C46' && c <= '\u0C48') || (c >= '\u0C4A' && c <= '\u0C4D') || (c >= '\u0C55' && c <= '\u0C56') || 108 (c >= '\u0C82' && c <= '\u0C83') || (c >= '\u0CBE' && c <= '\u0CC4') || (c >= '\u0CC6' && c <= '\u0CC8') || 109 (c >= '\u0CCA' && c <= '\u0CCD') || (c >= '\u0CD5' && c <= '\u0CD6') || (c >= '\u0D02' && c <= '\u0D03') || 110 (c >= '\u0D3E' && c <= '\u0D43') || (c >= '\u0D46' && c <= '\u0D48') || (c >= '\u0D4A' && c <= '\u0D4D') || c == '\u0D57' || 111 c == '\u0E31' || (c >= '\u0E34' && c <= '\u0E3A') || (c >= '\u0E47' && c <= '\u0E4E') || c == '\u0EB1' || 112 (c >= '\u0EB4' && c <= '\u0EB9') || (c >= '\u0EBB' && c <= '\u0EBC') || (c >= '\u0EC8' && c <= '\u0ECD') || 113 (c >= '\u0F18' && c <= '\u0F19') || c == '\u0F35' || c == '\u0F37' || c == '\u0F39' || c == '\u0F3E' || c == '\u0F3F' || 114 (c >= '\u0F71' && c <= '\u0F84') || (c >= '\u0F86' && c <= '\u0F8B') || (c >= '\u0F90' && c <= '\u0F95') || c == '\u0F97' || 115 (c >= '\u0F99' && c <= '\u0FAD') || (c >= '\u0FB1' && c <= '\u0FB7') || c == '\u0FB9' || (c >= '\u20D0' && c <= '\u20DC') || 116 c == '\u20E1' || (c >= '\u302A' && c <= '\u302F') || c == '\u3099' || c == '\u309A'; 117 } 118 119 private static boolean isExtender(char c) { 120 return c == '\u00B7' || c == '\u02D0' || c == '\u02D1' || c == '\u0387' || c == '\u0640' || c == '\u0E46' || 121 c == '\u0EC6' || c == '\u3005' || (c >= '\u3031' && c <= '\u3035') || (c >= '\u309D' && c <= '\u309E') || 122 (c >= '\u30FC' && c <= '\u30FE'); 123 } 124 125 private static boolean isLetter(char c) { 126 return isBaseChar(c) || isIdeographic(c); 127 } 128 129 private static boolean isBaseChar(char c) { 130 return (c >= '\u0041' && c <= '\u005A') || (c >= '\u0061' && c <= '\u007A') || (c >= '\u00C0' && c <= '\u00D6') || 131 (c >= '\u00D8' && c <= '\u00F6') || (c >= '\u00F8' && c <= '\u00FF') || (c >= '\u0100' && c <= '\u0131') || 132 (c >= '\u0134' && c <= '\u013E') || (c >= '\u0141' && c <= '\u0148') || (c >= '\u014A' && c <= '\u017E') || 133 (c >= '\u0180' && c <= '\u01C3') || (c >= '\u01CD' && c <= '\u01F0') || (c >= '\u01F4' && c <= '\u01F5') || 134 (c >= '\u01FA' && c <= '\u0217') || (c >= '\u0250' && c <= '\u02A8') || (c >= '\u02BB' && c <= '\u02C1') || 135 c == '\u0386' || (c >= '\u0388' && c <= '\u038A') || c == '\u038C' || (c >= '\u038E' && c <= '\u03A1') || 136 (c >= '\u03A3' && c <= '\u03CE') || (c >= '\u03D0' && c <= '\u03D6') || c == '\u03DA' || c == '\u03DC' || c == '\u03DE' || 137 c == '\u03E0' || (c >= '\u03E2' && c <= '\u03F3') || (c >= '\u0401' && c <= '\u040C') || (c >= '\u040E' && c <= '\u044F') || 138 (c >= '\u0451' && c <= '\u045C') || (c >= '\u045E' && c <= '\u0481') || (c >= '\u0490' && c <= '\u04C4') || 139 (c >= '\u04C7' && c <= '\u04C8') || (c >= '\u04CB' && c <= '\u04CC') || (c >= '\u04D0' && c <= '\u04EB') || 140 (c >= '\u04EE' && c <= '\u04F5') || (c >= '\u04F8' && c <= '\u04F9') || (c >= '\u0531' && c <= '\u0556') || 141 c == '\u0559' || (c >= '\u0561' && c <= '\u0586') || (c >= '\u05D0' && c <= '\u05EA') || (c >= '\u05F0' && c <= '\u05F2') || 142 (c >= '\u0621' && c <= '\u063A') || (c >= '\u0641' && c <= '\u064A') || (c >= '\u0671' && c <= '\u06B7') || 143 (c >= '\u06BA' && c <= '\u06BE') || (c >= '\u06C0' && c <= '\u06CE') || (c >= '\u06D0' && c <= '\u06D3') || 144 c == '\u06D5' || (c >= '\u06E5' && c <= '\u06E6') || (c >= '\u0905' && c <= '\u0939') || c == '\u093D' || 145 (c >= '\u0958' && c <= '\u0961') || (c >= '\u0985' && c <= '\u098C') || (c >= '\u098F' && c <= '\u0990') || 146 (c >= '\u0993' && c <= '\u09A8') || (c >= '\u09AA' && c <= '\u09B0') || c == '\u09B2' || 147 (c >= '\u09B6' && c <= '\u09B9') || (c >= '\u09DC' && c <= '\u09DD') || (c >= '\u09DF' && c <= '\u09E1') || 148 (c >= '\u09F0' && c <= '\u09F1') || (c >= '\u0A05' && c <= '\u0A0A') || (c >= '\u0A0F' && c <= '\u0A10') || 149 (c >= '\u0A13' && c <= '\u0A28') || (c >= '\u0A2A' && c <= '\u0A30') || (c >= '\u0A32' && c <= '\u0A33') || 150 (c >= '\u0A35' && c <= '\u0A36') || (c >= '\u0A38' && c <= '\u0A39') || (c >= '\u0A59' && c <= '\u0A5C') || 151 c == '\u0A5E' || (c >= '\u0A72' && c <= '\u0A74') || (c >= '\u0A85' && c <= '\u0A8B') || c == '\u0A8D' || 152 (c >= '\u0A8F' && c <= '\u0A91') || (c >= '\u0A93' && c <= '\u0AA8') || (c >= '\u0AAA' && c <= '\u0AB0') || 153 (c >= '\u0AB2' && c <= '\u0AB3') || (c >= '\u0AB5' && c <= '\u0AB9') || c == '\u0ABD' || c == '\u0AE0' || 154 (c >= '\u0B05' && c <= '\u0B0C') || (c >= '\u0B0F' && c <= '\u0B10') || (c >= '\u0B13' && c <= '\u0B28') || 155 (c >= '\u0B2A' && c <= '\u0B30') || (c >= '\u0B32' && c <= '\u0B33') || (c >= '\u0B36' && c <= '\u0B39') || 156 c == '\u0B3D' || (c >= '\u0B5C' && c <= '\u0B5D') || (c >= '\u0B5F' && c <= '\u0B61') || 157 (c >= '\u0B85' && c <= '\u0B8A') || (c >= '\u0B8E' && c <= '\u0B90') || (c >= '\u0B92' && c <= '\u0B95') || 158 (c >= '\u0B99' && c <= '\u0B9A') || c == '\u0B9C' || (c >= '\u0B9E' && c <= '\u0B9F') || 159 (c >= '\u0BA3' && c <= '\u0BA4') || (c >= '\u0BA8' && c <= '\u0BAA') || (c >= '\u0BAE' && c <= '\u0BB5') || 160 (c >= '\u0BB7' && c <= '\u0BB9') || (c >= '\u0C05' && c <= '\u0C0C') || (c >= '\u0C0E' && c <= '\u0C10') || 161 (c >= '\u0C12' && c <= '\u0C28') || (c >= '\u0C2A' && c <= '\u0C33') || (c >= '\u0C35' && c <= '\u0C39') || 162 (c >= '\u0C60' && c <= '\u0C61') || (c >= '\u0C85' && c <= '\u0C8C') || (c >= '\u0C8E' && c <= '\u0C90') || 163 (c >= '\u0C92' && c <= '\u0CA8') || (c >= '\u0CAA' && c <= '\u0CB3') || (c >= '\u0CB5' && c <= '\u0CB9') || 164 c == '\u0CDE' || (c >= '\u0CE0' && c <= '\u0CE1') || (c >= '\u0D05' && c <= '\u0D0C') || 165 (c >= '\u0D0E' && c <= '\u0D10') || (c >= '\u0D12' && c <= '\u0D28') || (c >= '\u0D2A' && c <= '\u0D39') || 166 (c >= '\u0D60' && c <= '\u0D61') || (c >= '\u0E01' && c <= '\u0E2E') || c == '\u0E30' || 167 (c >= '\u0E32' && c <= '\u0E33') || (c >= '\u0E40' && c <= '\u0E45') || (c >= '\u0E81' && c <= '\u0E82') || 168 c == '\u0E84' || (c >= '\u0E87' && c <= '\u0E88') || c == '\u0E8A' || c == '\u0E8D' || (c >= '\u0E94' && c <= '\u0E97') || 169 (c >= '\u0E99' && c <= '\u0E9F') || (c >= '\u0EA1' && c <= '\u0EA3') || c == '\u0EA5' || c == '\u0EA7' || 170 (c >= '\u0EAA' && c <= '\u0EAB') || (c >= '\u0EAD' && c <= '\u0EAE') || c == '\u0EB0' || 171 (c >= '\u0EB2' && c <= '\u0EB3') || c == '\u0EBD' || (c >= '\u0EC0' && c <= '\u0EC4') || 172 (c >= '\u0F40' && c <= '\u0F47') || (c >= '\u0F49' && c <= '\u0F69') || (c >= '\u10A0' && c <= '\u10C5') || 173 (c >= '\u10D0' && c <= '\u10F6') || c == '\u1100' || (c >= '\u1102' && c <= '\u1103') || 174 (c >= '\u1105' && c <= '\u1107') || c == '\u1109' || (c >= '\u110B' && c <= '\u110C') || 175 (c >= '\u110E' && c <= '\u1112') || c == '\u113C' || c == '\u113E' || c == '\u1140' || c == '\u114C' || 176 c == '\u114E' || c == '\u1150' || (c >= '\u1154' && c <= '\u1155') || c == '\u1159' || 177 (c >= '\u115F' && c <= '\u1161') || c == '\u1163' || c == '\u1165' || c == '\u1167' || c == '\u1169' || 178 (c >= '\u116D' && c <= '\u116E') || (c >= '\u1172' && c <= '\u1173') || c == '\u1175' || 179 c == '\u119E' || c == '\u11A8' || c == '\u11AB' || (c >= '\u11AE' && c <= '\u11AF') || 180 (c >= '\u11B7' && c <= '\u11B8') || c == '\u11BA' || (c >= '\u11BC' && c <= '\u11C2') || 181 c == '\u11EB' || c == '\u11F0' || c == '\u11F9' || (c >= '\u1E00' && c <= '\u1E9B') || (c >= '\u1EA0' && c <= '\u1EF9') || 182 (c >= '\u1F00' && c <= '\u1F15') || (c >= '\u1F18' && c <= '\u1F1D') || (c >= '\u1F20' && c <= '\u1F45') || 183 (c >= '\u1F48' && c <= '\u1F4D') || (c >= '\u1F50' && c <= '\u1F57') || c == '\u1F59' || c == '\u1F5B' || c == '\u1F5D' || 184 (c >= '\u1F5F' && c <= '\u1F7D') || (c >= '\u1F80' && c <= '\u1FB4') || (c >= '\u1FB6' && c <= '\u1FBC') || 185 c == '\u1FBE' || (c >= '\u1FC2' && c <= '\u1FC4') || (c >= '\u1FC6' && c <= '\u1FCC') || 186 (c >= '\u1FD0' && c <= '\u1FD3') || (c >= '\u1FD6' && c <= '\u1FDB') || (c >= '\u1FE0' && c <= '\u1FEC') || 187 (c >= '\u1FF2' && c <= '\u1FF4') || (c >= '\u1FF6' && c <= '\u1FFC') || c == '\u2126' || 188 (c >= '\u212A' && c <= '\u212B') || c == '\u212E' || (c >= '\u2180' && c <= '\u2182') || 189 (c >= '\u3041' && c <= '\u3094') || (c >= '\u30A1' && c <= '\u30FA') || (c >= '\u3105' && c <= '\u312C') || 190 (c >= '\uAC00' && c <= '\uD7A3'); 191 } 192 193 private static boolean isIdeographic(char c) { 194 return (c >= '\u4E00' && c <= '\u9FA5') || c == '\u3007' || (c >= '\u3021' && c <= '\u3029'); 195 } 196 197 public static String determineEncoding(InputStream stream) throws IOException { 198 stream.mark(20000); 199 try { 200 int b0 = stream.read(); 201 int b1 = stream.read(); 202 int b2 = stream.read(); 203 int b3 = stream.read(); 204 205 if (b0 == 0xFE && b1 == 0xFF) 206 return "UTF-16BE"; 207 else if (b0 == 0xFF && b1 == 0xFE) 208 return "UTF-16LE"; 209 else if (b0 == 0xEF && b1 == 0xBB && b2 == 0xBF ) 210 return "UTF-8"; 211 else if (b0 == 0x00 && b1 == 0x3C && b2 == 0x00 && b3 == 0x3F) 212 return "UTF-16BE"; 213 else if (b0 == 0x3C && b1 == 0x00 && b2 == 0x3F && b3 == 0x00) 214 return "UTF-16LE"; 215 else if (b0 == 0x3C && b1 == 0x3F && b2 == 0x78 && b3 == 0x6D) { 216// UTF-8, ISO 646, ASCII, some part of ISO 8859, Shift-JIS, EUC, or any other 7-bit, 8-bit, or mixed-width encoding 217// which ensures that the characters of ASCII have their normal positions, width, and values; the actual encoding 218// declaration must be read to detect which of these applies, but since all of these encodings use the same bit patterns 219// for the relevant ASCII characters, the encoding declaration itself may be read reliably 220 InputStreamReader rdr = new InputStreamReader(stream, "US-ASCII"); 221 String hdr = readFirstLine(rdr); 222 return extractEncoding(hdr); 223 } else 224 return null; 225 } finally { 226 stream.reset(); 227 } 228 } 229 230 private static String extractEncoding(String hdr) { 231 int i = hdr.indexOf("encoding="); 232 if (i == -1) 233 return null; 234 hdr = hdr.substring(i+9); 235 char sep = hdr.charAt(0); 236 hdr = hdr.substring(1); 237 i = hdr.indexOf(sep); 238 if (i == -1) 239 return null; 240 return hdr.substring(0, i); 241 } 242 243 private static String readFirstLine(InputStreamReader rdr) throws IOException { 244 char[] buf = new char[1]; 245 StringBuffer bldr = new StringBuffer(); 246 rdr.read(buf); 247 while (buf[0] != '>') { 248 bldr.append(buf[0]); 249 rdr.read(buf); 250 } 251 return bldr.toString(); 252 } 253 254 255 public static boolean charSetImpliesAscii(String charset) { 256 return charset.equals("ISO-8859-1") || charset.equals("US-ASCII"); 257 } 258 259 260 /** 261 * Converts the raw characters to XML escape characters. 262 * 263 * @param rawContent 264 * @param charset Null when charset is not known, so we assume it's unicode 265 * @param isNoLines 266 * @return escape string 267 */ 268 public static String escapeXML(String rawContent, String charset, boolean isNoLines) { 269 if (rawContent == null) 270 return ""; 271 else { 272 StringBuffer sb = new StringBuffer(); 273 274 for (int i = 0; i < rawContent.length(); i++) { 275 char ch = rawContent.charAt(i); 276 // We don't escape ' because our code always spits out attributes surrounded by "", which means 277 // it's not necessary to escape ' - and it's *much* less ugly and more bandwidth-efficient when we don't. 278 if (ch == '&') 279 sb.append("&"); 280 else if (ch == '"') 281 sb.append("""); 282 else if (ch == '<') 283 sb.append("<"); 284 else if (ch == '>') 285 sb.append(">"); 286 else if (ch > '~' && charset != null && charSetImpliesAscii(charset)) 287 // TODO - why is hashcode the only way to get the unicode number for the character 288 // in jre 5.0? 289 sb.append("&#x"+Integer.toHexString(ch).toUpperCase()+";"); 290 else if (isNoLines) { 291 if (ch == '\r') 292 sb.append("
"); 293 else if (ch != '\n') 294 sb.append(ch); 295 } 296 else 297 sb.append(ch); 298 } 299 return sb.toString(); 300 } 301 } 302 303 public static Element getFirstChild(Element e) { 304 if (e == null) 305 return null; 306 Node n = e.getFirstChild(); 307 while (n != null && n.getNodeType() != Node.ELEMENT_NODE) 308 n = n.getNextSibling(); 309 return (Element) n; 310 } 311 312 public static Element getNamedChild(Element e, String name) { 313 Element c = getFirstChild(e); 314 while (c != null && !name.equals(c.getLocalName()) && !name.equals(c.getNodeName())) 315 c = getNextSibling(c); 316 return c; 317 } 318 319 public static Element getNamedChildByAttribute(Element e, String name, String nname, String nvalue) { 320 Element c = getFirstChild(e); 321 while (c != null && !((name.equals(c.getLocalName()) || name.equals(c.getNodeName())) && nvalue.equals(c.getAttribute(nname)))) 322 c = getNextSibling(c); 323 return c; 324 } 325 326 public static Element getNextSibling(Element e) { 327 Node n = e.getNextSibling(); 328 while (n != null && n.getNodeType() != Node.ELEMENT_NODE) 329 n = n.getNextSibling(); 330 return (Element) n; 331 } 332 333 public static void getNamedChildren(Element e, String name, List<Element> set) { 334 Element c = getFirstChild(e); 335 while (c != null) { 336 if (name.equals(c.getLocalName()) || name.equals(c.getNodeName()) ) 337 set.add(c); 338 c = getNextSibling(c); 339 } 340 } 341 342 public static List<Element> getNamedChildren(Element e, String name) { 343 List<Element> res = new ArrayList<Element>(); 344 Element c = getFirstChild(e); 345 while (c != null) { 346 if (name.equals(c.getLocalName()) || name.equals(c.getNodeName()) ) 347 res.add(c); 348 c = getNextSibling(c); 349 } 350 return res; 351 } 352 353 public static String htmlToXmlEscapedPlainText(Element r) { 354 StringBuilder s = new StringBuilder(); 355 Node n = r.getFirstChild(); 356 boolean ws = false; 357 while (n != null) { 358 if (n.getNodeType() == Node.TEXT_NODE) { 359 String t = n.getTextContent().trim(); 360 if (Utilities.noString(t)) 361 ws = true; 362 else { 363 if (ws) 364 s.append(" "); 365 ws = false; 366 s.append(t); 367 } 368 } 369 if (n.getNodeType() == Node.ELEMENT_NODE) { 370 if (ws) 371 s.append(" "); 372 ws = false; 373 s.append(htmlToXmlEscapedPlainText((Element) n)); 374 if (r.getNodeName().equals("br") || r.getNodeName().equals("p")) 375 s.append("\r\n"); 376 } 377 n = n.getNextSibling(); 378 } 379 return s.toString(); 380 } 381 382 public static String htmlToXmlEscapedPlainText(String definition) throws ParserConfigurationException, SAXException, IOException { 383 return htmlToXmlEscapedPlainText(parseToDom("<div>"+definition+"</div>").getDocumentElement()); 384 } 385 386 public static String elementToString(Element el) { 387 if (el == null) 388 return ""; 389 Document document = el.getOwnerDocument(); 390 DOMImplementationLS domImplLS = (DOMImplementationLS) document 391 .getImplementation(); 392 LSSerializer serializer = domImplLS.createLSSerializer(); 393 return serializer.writeToString(el); 394 } 395 396 public static String getNamedChildValue(Element element, String name) { 397 Element e = getNamedChild(element, name); 398 return e == null ? null : e.getAttribute("value"); 399 } 400 401 public static void setNamedChildValue(Element element, String name, String value) throws FHIRException { 402 Element e = getNamedChild(element, name); 403 if (e == null) 404 throw new FHIRException("unable to find element "+name); 405 e.setAttribute("value", value); 406 } 407 408 409 public static void getNamedChildrenWithWildcard(Element focus, String name, List<Element> children) { 410 Element c = getFirstChild(focus); 411 while (c != null) { 412 String n = c.getLocalName() != null ? c.getLocalName() : c.getNodeName(); 413 if (name.equals(n) || (name.endsWith("[x]") && n.startsWith(name.substring(0, name.length()-3)))) 414 children.add(c); 415 c = getNextSibling(c); 416 } 417 } 418 419 public static void getNamedChildrenWithTails(Element focus, String name, List<Element> children, Set<String> typeTails) { 420 Element c = getFirstChild(focus); 421 while (c != null) { 422 String n = c.getLocalName() != null ? c.getLocalName() : c.getNodeName(); 423 if (n.equals(name) || (!n.equals("responseCode") && (n.startsWith(name) && typeTails.contains(n.substring(name.length()))))) 424 children.add(c); 425 c = getNextSibling(c); 426 } 427 } 428 429 public static boolean hasNamedChild(Element e, String name) { 430 Element c = getFirstChild(e); 431 while (c != null && !name.equals(c.getLocalName()) && !name.equals(c.getNodeName())) 432 c = getNextSibling(c); 433 return c != null; 434 } 435 436 public static Document parseToDom(String content) throws ParserConfigurationException, SAXException, IOException { 437 DocumentBuilderFactory factory = DocumentBuilderFactory.newInstance(); 438 factory.setNamespaceAware(false); 439 DocumentBuilder builder = factory.newDocumentBuilder(); 440 return builder.parse(new ByteArrayInputStream(content.getBytes())); 441 } 442 443 public static Document parseToDom(byte[] content) throws ParserConfigurationException, SAXException, IOException { 444 DocumentBuilderFactory factory = DocumentBuilderFactory.newInstance(); 445 factory.setNamespaceAware(false); 446 DocumentBuilder builder = factory.newDocumentBuilder(); 447 return builder.parse(new ByteArrayInputStream(content)); 448 } 449 450 public static Document parseToDom(byte[] content, boolean ns) throws ParserConfigurationException, SAXException, IOException { 451 DocumentBuilderFactory factory = DocumentBuilderFactory.newInstance(); 452 factory.setNamespaceAware(ns); 453 DocumentBuilder builder = factory.newDocumentBuilder(); 454 return builder.parse(new ByteArrayInputStream(content)); 455 } 456 457 public static Document parseFileToDom(String filename) throws ParserConfigurationException, SAXException, IOException { 458 DocumentBuilderFactory factory = DocumentBuilderFactory.newInstance(); 459 factory.setNamespaceAware(false); 460 DocumentBuilder builder = factory.newDocumentBuilder(); 461 return builder.parse(new FileInputStream(filename)); 462 } 463 464 public static Document parseFileToDom(String filename, boolean ns) throws ParserConfigurationException, SAXException, IOException { 465 DocumentBuilderFactory factory = DocumentBuilderFactory.newInstance(); 466 factory.setNamespaceAware(ns); 467 DocumentBuilder builder = factory.newDocumentBuilder(); 468 return builder.parse(new FileInputStream(filename)); 469 } 470 471 public static Element getLastChild(Element e) { 472 if (e == null) 473 return null; 474 Node n = e.getLastChild(); 475 while (n != null && n.getNodeType() != Node.ELEMENT_NODE) 476 n = n.getPreviousSibling(); 477 return (Element) n; 478 } 479 480 public static Element getPrevSibling(Element e) { 481 Node n = e.getPreviousSibling(); 482 while (n != null && n.getNodeType() != Node.ELEMENT_NODE) 483 n = n.getPreviousSibling(); 484 return (Element) n; 485 } 486 487 public static String getNamedChildAttribute(Element element, String name, String aname) { 488 Element e = getNamedChild(element, name); 489 return e == null ? null : e.getAttribute(aname); 490 } 491 492 public static void writeDomToFile(Document doc, String filename) throws TransformerException { 493 TransformerFactory transformerFactory = TransformerFactory.newInstance(); 494 Transformer transformer = transformerFactory.newTransformer(); 495 DOMSource source = new DOMSource(doc); 496 StreamResult streamResult = new StreamResult(new File(filename)); 497 transformer.transform(source, streamResult); 498 } 499 500 public static String getXsiType(org.w3c.dom.Element element) { 501 Attr a = element.getAttributeNodeNS("http://www.w3.org/2001/XMLSchema-instance", "type"); 502 return (a == null ? null : a.getTextContent()); 503 504 } 505 506 public static String getDirectText(org.w3c.dom.Element node) { 507 Node n = node.getFirstChild(); 508 StringBuilder b = new StringBuilder(); 509 while (n != null) { 510 if (n.getNodeType() == Node.TEXT_NODE) 511 b.append(n.getTextContent()); 512 n = n.getNextSibling(); 513 } 514 return b.toString().trim(); 515 } 516 517 public static void deleteByName(Element e, String name) { 518 List<Element> matches = getNamedChildren(e, name); 519 for (Element m : matches) 520 e.removeChild(m); 521 } 522 523 public static void deleteAttr(Element e, String namespaceURI, String localName) { 524 if (e.hasAttributeNS(namespaceURI, localName)) 525 e.removeAttributeNS(namespaceURI, localName); 526 527 } 528 529 public static Node[] children(Element ed) { 530 Node[] res = new Node[ed.getChildNodes().getLength()]; 531 for (int i = 0; i < ed.getChildNodes().getLength(); i++) 532 res[i] = ed.getChildNodes().item(i); 533 return res; 534 } 535 536 public static Element insertChild(Document doc, Element element, String name, String namespace, int indent) { 537 Node node = doc.createTextNode("\n"+Utilities.padLeft("", ' ', indent)); 538 Element child = doc.createElementNS(namespace, name); 539 element.insertBefore(child, element.getFirstChild()); 540 element.insertBefore(node, element.getFirstChild()); 541 return child; 542 } 543 544 public static Element insertChild(Document doc, Element element, String name, String namespace, Node before, int indent) { 545 if (before == null) { 546 Node node = doc.createTextNode("\n"+Utilities.padLeft("", ' ', indent)); 547 element.insertBefore(node, before); 548 } 549 Element child = doc.createElementNS(namespace, name); 550 element.insertBefore(child, before); 551 if (before != null) { 552 Node node = doc.createTextNode("\n"+Utilities.padLeft("", ' ', indent)); 553 element.insertBefore(node, before); 554 } 555 return child; 556 } 557 558 public static void addTextTag(Document doc, Element element, String name, String namespace, String text, int indent) { 559 Node node = doc.createTextNode("\n"+Utilities.padLeft("", ' ', indent)); 560 element.appendChild(node); 561 Element child = doc.createElementNS(namespace, name); 562 element.appendChild(child); 563 child.setAttribute("value", text); 564 } 565 566 public static void saveToFile(Element root, OutputStream stream) throws TransformerException { 567 Transformer transformer = TransformerFactory.newInstance().newTransformer(); 568 Result output = new StreamResult(stream); 569 Source input = new DOMSource(root); 570 571 transformer.transform(input, output); 572 } 573 574 public static void spacer(Document doc, Element element, int indent) { 575 Node node = doc.createTextNode("\n"+Utilities.padLeft("", ' ', indent)); 576 element.appendChild(node); 577 578 } 579 580 public static String getNamedChildText(Element element, String name) { 581 Element e = getNamedChild(element, name); 582 return e == null ? null : e.getTextContent(); 583 } 584 585 586}