001package org.hl7.fhir.utilities.xls; 002 003/* 004 Copyright (c) 2011+, HL7, Inc. 005 All rights reserved. 006 007 Redistribution and use in source and binary forms, with or without modification, 008 are permitted provided that the following conditions are met: 009 010 * Redistributions of source code must retain the above copyright notice, this 011 list of conditions and the following disclaimer. 012 * Redistributions in binary form must reproduce the above copyright notice, 013 this list of conditions and the following disclaimer in the documentation 014 and/or other materials provided with the distribution. 015 * Neither the name of HL7 nor the names of its contributors may be used to 016 endorse or promote products derived from this software without specific 017 prior written permission. 018 019 THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" AND 020 ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED 021 WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE DISCLAIMED. 022 IN NO EVENT SHALL THE COPYRIGHT HOLDER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, 023 INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT 024 NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR 025 PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, 026 WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) 027 ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE 028 POSSIBILITY OF SUCH DAMAGE. 029 030 */ 031 032 033 034import java.io.InputStream; 035import java.util.ArrayList; 036import java.util.HashMap; 037import java.util.List; 038import java.util.Map; 039 040import javax.xml.parsers.DocumentBuilder; 041import javax.xml.parsers.DocumentBuilderFactory; 042 043import org.hl7.fhir.exceptions.FHIRException; 044import org.hl7.fhir.utilities.Utilities; 045import org.hl7.fhir.utilities.xml.XMLUtil; 046import org.w3c.dom.DOMException; 047import org.w3c.dom.Document; 048import org.w3c.dom.Element; 049import org.w3c.dom.Node; 050import org.w3c.dom.NodeList; 051 052public class XLSXmlParser { 053 054 private static final String XLS_NS = "urn:schemas-microsoft-com:office:spreadsheet"; 055 056 public class Row extends ArrayList<String> { private static final long serialVersionUID = 1L; } 057 058 public class Sheet { 059 public String title; 060 public Row columns; 061 public List<Row> rows = new ArrayList<Row>(); 062 063 public boolean hasColumn(String column) { 064 for (int i = 0; i < columns.size(); i++) { 065 if (columns.get(i).equalsIgnoreCase(column)) 066 return true; 067 } 068 return false; 069 } 070 071 public boolean hasColumn(int row, String column) { 072 String s = getColumn(row, column); 073 return s != null && !s.equals(""); 074 } 075 076 public String getColumn(int row, String column) { 077 int c = -1; 078 String s = ""; 079 for (int i = 0; i < columns.size(); i++) { 080 s = s + ","+columns.get(i); 081 if (columns.get(i).equalsIgnoreCase(column)) 082 c = i; 083 } 084 if (c == -1) 085 return ""; // throw new FHIRException("unable to find column "+column+" in "+s.substring(1)); 086 else if (rows.get(row).size() <= c) 087 return ""; 088 else { 089 s = rows.get(row).get(c); 090 return s == null ? "" : s.trim().replace("\t", " ").replace("\u00A0", " "); 091 } 092 } 093 094 public List<String> getColumnNamesBySuffix(String suffix) { 095 List<String> names = new ArrayList<String>(); 096 for (int i = 0; i < columns.size(); i++) { 097 if (columns.get(i).endsWith(suffix)) 098 names.add(columns.get(i)); 099 } 100 return names; 101 } 102 103 public String getByColumnPrefix(int row, String column) { 104 int c = -1; 105 String s = ""; 106 for (int i = 0; i < columns.size(); i++) { 107 s = s + ","+columns.get(i); 108 if (columns.get(i).startsWith(column)) 109 c = i; 110 } 111 if (c == -1) 112 return ""; // throw new FHIRException("unable to find column "+column+" in "+s.substring(1)); 113 else if (rows.get(row).size() <= c) 114 return ""; 115 else 116 return rows.get(row).get(c).trim(); 117 } 118 119 public List<Row> getRows() { 120 return rows; 121 } 122 123 public int getIntColumn(int row, String column) { 124 String value = getColumn(row, column); 125 if (Utilities.noString(value)) 126 return 0; 127 else 128 return Integer.parseInt(value); 129 } 130 131 public String getNonEmptyColumn(int row, String column) throws FHIRException { 132 String value = getColumn(row, column); 133 if (Utilities.noString(value)) 134 throw new FHIRException("The colummn "+column+" cannot be empty"); 135 return value; 136 } 137 138 public boolean hasColumnContent(String col) { 139 int i = columns.indexOf(col); 140 if (i == -1) 141 return false; 142 for (Row r : rows) { 143 if (r.size() > i && !Utilities.noString(r.get(i))) 144 return true; 145 } 146 return false; 147 } 148 149 150 } 151 152 private Map<String, Sheet> sheets; 153 private Document xml; 154 private String name; 155 156 public XLSXmlParser(InputStream in, String name) throws FHIRException { 157 this.name = name; 158 try { 159 xml = parseXml(in); 160 sheets = new HashMap<String, Sheet>(); 161 readXml(); 162 } catch (Exception e) { 163 throw new FHIRException("unable to load "+name+": "+e.getMessage(), e); 164 } 165 } 166 167 private Document parseXml(InputStream in) throws FHIRException { 168 try { 169 DocumentBuilderFactory factory = DocumentBuilderFactory.newInstance(); 170 factory.setNamespaceAware(true); 171 DocumentBuilder builder = factory.newDocumentBuilder(); 172 return builder.parse(in); 173 } catch (Exception e) { 174 throw new FHIRException("Error processing "+name+": "+e.getMessage(), e); 175 } 176 } 177 178 private void readXml() throws FHIRException { 179 Element root = xml.getDocumentElement(); 180 check(root.getNamespaceURI().equals(XLS_NS), "Spreadsheet namespace incorrect"); 181 check(root.getNodeName().equals("Workbook"), "Spreadsheet element name incorrect"); 182 Node node = root.getFirstChild(); 183 while (node != null) { 184 if (node.getNodeName().equals("Worksheet")) 185 processWorksheet((Element)node); 186 node = node.getNextSibling(); 187 } 188 } 189 190 private Integer rowIndex; 191 private void processWorksheet(Element node) throws FHIRException { 192 Sheet sheet = new Sheet(); 193 sheet.title = node.getAttributeNS(XLS_NS, "Name"); 194 sheets.put(node.getAttributeNS(XLS_NS, "Name"), sheet); 195 NodeList table = node.getElementsByTagNameNS(XLS_NS, "Table"); 196 check(table.getLength() == 1, "multiple table elements"); 197 NodeList rows = ((Element)table.item(0)).getElementsByTagNameNS(XLS_NS, "Row"); 198 if (rows.getLength() == 0) 199 return; 200 rowIndex = 1; 201 sheet.columns = readRow((Element) rows.item(0)); 202 for (int i = 1; i < rows.getLength(); i++) { 203 rowIndex++; 204 sheet.rows.add(readRow((Element) rows.item(i))); 205 } 206 207 //Remove empty rows at the end of the sheet 208 while( sheet.rows.size() != 0 && isEmptyRow(sheet.rows.get(sheet.rows.size()-1) ) ) 209 sheet.rows.remove(sheet.rows.size()-1); 210 } 211 212 213 private boolean isEmptyRow(Row w) 214 { 215 for( int col=0; col<w.size(); col++ ) 216 if( !w.get(col).trim().isEmpty() ) return false; 217 218 return true; 219 } 220 221 private Row readRow(Element row) throws DOMException, FHIRException { 222 Row res = new Row(); 223 int ndx = 1; 224 NodeList cells = row.getElementsByTagNameNS(XLS_NS, "Cell"); 225 for (int i = 0; i < cells.getLength(); i++) { 226 Element cell = (Element) cells.item(i); 227 if (cell.hasAttributeNS(XLS_NS, "Index")) { 228 int index = Integer.parseInt(cell.getAttributeNS(XLS_NS, "Index")); 229 while (ndx < index) { 230 res.add(""); 231 ndx++; 232 } 233 } 234 res.add(readData(cell, ndx, res.size() > 0 ? res.get(0) : "?")); 235 ndx++; 236 } 237 return res; 238 } 239 240 private String readData(Element cell, int col, String s) throws DOMException, FHIRException { 241 List<Element> data = new ArrayList<Element>(); 242 XMLUtil.getNamedChildren(cell, "Data", data); // cell.getElementsByTagNameNS(XLS_NS, "Data"); 243 if (data.size() == 0) 244 return ""; 245 check(data.size() == 1, "Multiple Data encountered ("+Integer.toString(data.size())+" @ col "+Integer.toString(col)+" - "+cell.getTextContent()+" ("+s+"))"); 246 Element d = data.get(0); 247 String type = d.getAttributeNS(XLS_NS, "Type"); 248 if ("Boolean".equals(type)) { 249 if (d.getTextContent().equals("1")) 250 return "True"; 251 else 252 return "False"; 253 } else if ("String".equals(type)) { 254 return d.getTextContent(); 255 } else if ("Number".equals(type)) { 256 return d.getTextContent(); 257 } else if ("DateTime".equals(type)) { 258 return d.getTextContent(); 259 } else if ("Error".equals(type)) { 260 return null; 261 } else 262 throw new FHIRException("Cell Type is not known ("+d.getAttributeNodeNS(XLS_NS, "Type")+") in "+getLocation()); 263 } 264 265 private void check(boolean test, String message) throws FHIRException { 266 if (!test) 267 throw new FHIRException(message+" in "+getLocation()); 268 } 269 270 private String getLocation() { 271 return name+", row "+rowIndex.toString(); 272 } 273 274 public Map<String, Sheet> getSheets() { 275 return sheets; 276 } 277 278 279}