001package org.hl7.fhir.utilities.xls;
002
003/*
004  Copyright (c) 2011+, HL7, Inc.
005  All rights reserved.
006  
007  Redistribution and use in source and binary forms, with or without modification, 
008  are permitted provided that the following conditions are met:
009    
010   * Redistributions of source code must retain the above copyright notice, this 
011     list of conditions and the following disclaimer.
012   * Redistributions in binary form must reproduce the above copyright notice, 
013     this list of conditions and the following disclaimer in the documentation 
014     and/or other materials provided with the distribution.
015   * Neither the name of HL7 nor the names of its contributors may be used to 
016     endorse or promote products derived from this software without specific 
017     prior written permission.
018  
019  THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" AND 
020  ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED 
021  WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE DISCLAIMED. 
022  IN NO EVENT SHALL THE COPYRIGHT HOLDER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, 
023  INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT 
024  NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR 
025  PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, 
026  WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) 
027  ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE 
028  POSSIBILITY OF SUCH DAMAGE.
029  
030 */
031
032
033
034import java.io.InputStream;
035import java.util.ArrayList;
036import java.util.HashMap;
037import java.util.List;
038import java.util.Map;
039
040import javax.xml.parsers.DocumentBuilder;
041import javax.xml.parsers.DocumentBuilderFactory;
042
043import org.hl7.fhir.exceptions.FHIRException;
044import org.hl7.fhir.utilities.Utilities;
045import org.hl7.fhir.utilities.xml.XMLUtil;
046import org.w3c.dom.DOMException;
047import org.w3c.dom.Document;
048import org.w3c.dom.Element;
049import org.w3c.dom.Node;
050import org.w3c.dom.NodeList;
051
052public class XLSXmlParser {
053
054  private static final String XLS_NS = "urn:schemas-microsoft-com:office:spreadsheet";
055
056  public class Row extends ArrayList<String> {  private static final long serialVersionUID = 1L; }
057  
058  public class Sheet {
059    public String title;
060    public Row columns;
061    public List<Row> rows = new ArrayList<Row>();
062
063    public boolean hasColumn(String column)  {
064      for (int i = 0; i < columns.size(); i++) {
065        if (columns.get(i).equalsIgnoreCase(column))
066          return true;
067      }
068      return false;
069    }
070    
071    public boolean hasColumn(int row, String column)  {
072      String s = getColumn(row, column);
073      return s != null && !s.equals("");     
074    }
075    
076    public String getColumn(int row, String column)  {
077      int c = -1;
078      String s = "";
079      for (int i = 0; i < columns.size(); i++) {
080        s = s + ","+columns.get(i);
081        if (columns.get(i).equalsIgnoreCase(column))
082          c = i;
083      }
084      if (c == -1)
085        return ""; // throw new FHIRException("unable to find column "+column+" in "+s.substring(1));
086      else if (rows.get(row).size() <= c)
087        return "";
088      else {
089        s = rows.get(row).get(c); 
090        return s == null ? "" : s.trim().replace("\t",  "  ").replace("\u00A0", " ");
091      }
092    }
093
094    public List<String> getColumnNamesBySuffix(String suffix)  {
095      List<String> names = new ArrayList<String>();
096      for (int i = 0; i < columns.size(); i++) {
097        if (columns.get(i).endsWith(suffix))
098          names.add(columns.get(i));
099      }
100      return names;
101    }
102
103    public String getByColumnPrefix(int row, String column)  {
104      int c = -1;
105      String s = "";
106      for (int i = 0; i < columns.size(); i++) {
107        s = s + ","+columns.get(i);
108        if (columns.get(i).startsWith(column))
109          c = i;
110      }
111      if (c == -1)
112        return ""; // throw new FHIRException("unable to find column "+column+" in "+s.substring(1));
113      else if (rows.get(row).size() <= c)
114        return "";
115      else
116        return rows.get(row).get(c).trim();
117    }
118
119    public List<Row> getRows() {
120      return rows;
121    }
122
123    public int getIntColumn(int row, String column)  {
124      String value = getColumn(row, column);
125      if (Utilities.noString(value))
126        return 0;
127      else
128        return Integer.parseInt(value);
129    }
130
131    public String getNonEmptyColumn(int row, String column) throws FHIRException  {
132     String value = getColumn(row, column);
133     if (Utilities.noString(value))
134       throw new FHIRException("The colummn "+column+" cannot be empty");
135     return value;
136    }
137
138    public boolean hasColumnContent(String col) {
139      int i = columns.indexOf(col);
140      if (i == -1)
141        return false;
142      for (Row r : rows) {
143        if (r.size() > i && !Utilities.noString(r.get(i)))
144          return true;
145      }
146      return false;
147    }
148    
149    
150  }
151  
152  private Map<String, Sheet> sheets;
153  private Document xml;
154  private String name;
155  
156  public XLSXmlParser(InputStream in, String name) throws FHIRException  {
157    this.name = name;
158    try {
159      xml = parseXml(in);
160      sheets = new HashMap<String, Sheet>();
161      readXml();
162    } catch (Exception e) {
163      throw new FHIRException("unable to load "+name+": "+e.getMessage(), e);
164    }
165  }
166
167  private Document parseXml(InputStream in) throws FHIRException  {
168    try {
169      DocumentBuilderFactory factory = DocumentBuilderFactory.newInstance();
170      factory.setNamespaceAware(true);
171      DocumentBuilder builder = factory.newDocumentBuilder();
172      return builder.parse(in);
173    } catch (Exception e) {
174      throw new FHIRException("Error processing "+name+": "+e.getMessage(), e);
175    }
176  }
177
178  private void readXml() throws FHIRException  {
179    Element root = xml.getDocumentElement();
180    check(root.getNamespaceURI().equals(XLS_NS), "Spreadsheet namespace incorrect");
181    check(root.getNodeName().equals("Workbook"), "Spreadsheet element name incorrect");
182    Node node = root.getFirstChild();
183    while (node != null) {
184      if (node.getNodeName().equals("Worksheet"))
185        processWorksheet((Element)node);
186      node = node.getNextSibling();
187    }
188  }
189  
190  private Integer rowIndex;
191  private void processWorksheet(Element node) throws FHIRException  {
192    Sheet sheet = new Sheet();
193    sheet.title = node.getAttributeNS(XLS_NS, "Name");
194    sheets.put(node.getAttributeNS(XLS_NS, "Name"), sheet);
195    NodeList table = node.getElementsByTagNameNS(XLS_NS, "Table");
196    check(table.getLength() == 1, "multiple table elements");
197    NodeList rows = ((Element)table.item(0)).getElementsByTagNameNS(XLS_NS, "Row");
198    if (rows.getLength() == 0) 
199      return;
200    rowIndex = 1;
201    sheet.columns = readRow((Element) rows.item(0));
202    for (int i = 1; i < rows.getLength(); i++) {
203      rowIndex++;
204      sheet.rows.add(readRow((Element) rows.item(i)));
205    }
206       
207    //Remove empty rows at the end of the sheet
208    while( sheet.rows.size() != 0 && isEmptyRow(sheet.rows.get(sheet.rows.size()-1) ) )
209        sheet.rows.remove(sheet.rows.size()-1);
210  }
211
212  
213  private boolean isEmptyRow(Row w)
214  { 
215          for( int col=0; col<w.size(); col++ )
216                  if( !w.get(col).trim().isEmpty() ) return false;
217          
218          return true;
219  }
220  
221  private Row readRow(Element row) throws DOMException, FHIRException  {
222    Row res = new Row();
223    int ndx = 1;    
224    NodeList cells = row.getElementsByTagNameNS(XLS_NS, "Cell");
225    for (int i = 0; i < cells.getLength(); i++) {
226      Element cell = (Element) cells.item(i);
227      if (cell.hasAttributeNS(XLS_NS, "Index")) {
228        int index = Integer.parseInt(cell.getAttributeNS(XLS_NS, "Index"));
229        while (ndx < index) {
230          res.add("");
231          ndx++;
232        }
233      }
234      res.add(readData(cell, ndx, res.size() > 0 ? res.get(0) : "?"));
235      ndx++;      
236    }
237    return res;
238  }
239
240  private String readData(Element cell, int col, String s) throws DOMException, FHIRException  {
241    List<Element> data = new ArrayList<Element>(); 
242    XMLUtil.getNamedChildren(cell, "Data", data); // cell.getElementsByTagNameNS(XLS_NS, "Data");
243    if (data.size() == 0)
244      return "";
245    check(data.size() == 1, "Multiple Data encountered ("+Integer.toString(data.size())+" @ col "+Integer.toString(col)+" - "+cell.getTextContent()+" ("+s+"))");
246    Element d = data.get(0);
247    String type = d.getAttributeNS(XLS_NS, "Type");
248    if ("Boolean".equals(type)) {
249      if (d.getTextContent().equals("1"))
250        return "True";
251      else
252        return "False";
253    } else if ("String".equals(type)) {
254      return d.getTextContent();
255    } else if ("Number".equals(type)) {
256      return d.getTextContent();
257    } else if ("DateTime".equals(type)) {
258      return d.getTextContent();
259    } else if ("Error".equals(type)) {
260      return null;
261    } else 
262      throw new FHIRException("Cell Type is not known ("+d.getAttributeNodeNS(XLS_NS, "Type")+") in "+getLocation());
263  }
264
265  private void check(boolean test, String message) throws FHIRException  {
266    if (!test)
267      throw new FHIRException(message+" in "+getLocation());
268  }
269  
270  private String getLocation() {
271    return name+", row "+rowIndex.toString();
272  }
273
274  public Map<String, Sheet> getSheets() {
275    return sheets;
276  }
277
278  
279}