PageRenderTime 74ms CodeModel.GetById 32ms RepoModel.GetById 1ms app.codeStats 0ms

/nuclos-server/src/main/java/org/nuclos/server/common/ooxml/ExcelReader.java

https://bitbucket.org/nuclos/nuclos
Java | 208 lines | 120 code | 20 blank | 68 comment | 23 complexity | 4351976d412e5714296edce28a4bb01f MD5 | raw file
Possible License(s): Apache-2.0
  1. //Copyright (C) 2010 Novabit Informationssysteme GmbH
  2. //
  3. //This file is part of Nuclos.
  4. //
  5. //Nuclos is free software: you can redistribute it and/or modify
  6. //it under the terms of the GNU Affero General Public License as published by
  7. //the Free Software Foundation, either version 3 of the License, or
  8. //(at your option) any later version.
  9. //
  10. //Nuclos is distributed in the hope that it will be useful,
  11. //but WITHOUT ANY WARRANTY; without even the implied warranty of
  12. //MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
  13. //GNU Affero General Public License for more details.
  14. //
  15. //You should have received a copy of the GNU Affero General Public License
  16. //along with Nuclos. If not, see <http://www.gnu.org/licenses/>.
  17. package org.nuclos.server.common.ooxml;
  18. import java.io.BufferedInputStream;
  19. import java.io.IOException;
  20. import java.io.InputStream;
  21. import java.util.ArrayList;
  22. import java.util.List;
  23. import org.apache.poi.openxml4j.exceptions.InvalidFormatException;
  24. import org.apache.poi.ss.SpreadsheetVersion;
  25. import org.apache.poi.ss.usermodel.Cell;
  26. import org.apache.poi.ss.usermodel.CellType;
  27. import org.apache.poi.ss.usermodel.DateUtil;
  28. import org.apache.poi.ss.usermodel.FormulaError;
  29. import org.apache.poi.ss.usermodel.Row;
  30. import org.apache.poi.ss.usermodel.Sheet;
  31. import org.apache.poi.ss.usermodel.Workbook;
  32. import org.apache.poi.ss.usermodel.WorkbookFactory;
  33. import org.apache.poi.ss.util.AreaReference;
  34. import org.apache.poi.ss.util.CellReference;
  35. /**
  36. * Helper class providing simple read-only access for examing Excel documents (both OOXML and OLE2).
  37. *
  38. * <p>Cell locations are interpreted as cell or area references using the Excel cell reference format, e.g.
  39. * {@code A1}, {@code A1:B10}, or {@code Sheet1!A1}. If the sheet name is omitted, the active sheet is used.
  40. *
  41. * <p>Cell references will yield a Java object which represents the cell value at the given location. The
  42. * mapping from Excel values to Java objects is described below.
  43. *
  44. * <p>Area references will yield a 2-dimensional Object array ({@code Object[][]}). The first component represents
  45. * the row, the second the column (0-based from within the area). For example, given the area reference {@code A1:C2},
  46. * {@code arr[0][2]} represents the cell value at {@code C1}.
  47. *
  48. * <p>Mapping from cell values to Java objects:
  49. * <table border="1">
  50. * <caption>Cell type to java representation</caption>
  51. * <tr><th>Cell type</th><th>Java representation</th></tr>
  52. * <tr><td>Text</td><td>the text value as {@link java.lang.String String}</td></tr>
  53. * <tr><td>Number</td><td>the numeric value as {@link java.lang.Double Double} (see note below)</td></tr>
  54. * <tr><td>Number (formatted as date)</td><td>the date value as {@link java.util.Date} (see note below)</td></tr>
  55. * <tr><td>Logical</td><td>the logical value as {@link java.lang.Boolean Boolean}</td></tr>
  56. * <tr><td>Formula</td><td>the cached value of the calculation (according to the type of the cached value)</td></tr>
  57. * <tr><td>Error</td><td>the error text as {@link java.lang.String String}, e.g. {@literal "#DIV/0!"}</td></tr>
  58. * <tr><td>Blank</td><td>{@literal null}</td></tr>
  59. * </table>
  60. *
  61. * <p>Note: Excel stores date values internally as numbers (i.e. number of days from a given base date,
  62. * usually 1900-01-01).
  63. * This method uses the following heuristic to distinguish number and date values: If a (numeric) cell is
  64. * formatted as date, its value is treated as date and converted into a {@link java.util.Date} object.
  65. * All other numeric values are mapped to {@link Double}. Since Excel uses internally IEEE 754,
  66. * {@link Double} is the most appropriate representation for the retrieved numeric values.
  67. */
  68. public class ExcelReader {
  69. private final Workbook workbook;
  70. public ExcelReader(InputStream is) throws IOException, InvalidFormatException {
  71. /*
  72. PushbackInputStream pbis = new PushbackInputStream(is, 16);
  73. if (POIXMLDocument.hasOOXMLHeader(pbis)) {
  74. workbook = new XSSFWorkbook(pbis);
  75. } else {
  76. workbook = new HSSFWorkbook(pbis);
  77. }
  78. */
  79. if (!is.markSupported()) {
  80. is = new BufferedInputStream(is);
  81. }
  82. workbook = WorkbookFactory.create(is);
  83. }
  84. public ExcelReader(Workbook workbook) {
  85. this.workbook = workbook;
  86. }
  87. public Object getCellValue(String cellName) {
  88. // First, test if the given cell name is a (single) cell reference
  89. // (this is needed in order to distinguish "A1" from "A1:A1").
  90. CellReference cr;
  91. try {
  92. cr = new CellReference(cellName);
  93. } catch (Exception ex) {
  94. // No -- we will later try to parse it as area reference
  95. // (a superset, so we don't care about the exception here)
  96. cr = null;
  97. }
  98. Object value;
  99. if (cr != null) {
  100. value = getSingleCellValue(cr);
  101. } else {
  102. AreaReference ar = new AreaReference(cellName, SpreadsheetVersion.EXCEL2007);
  103. value = getAreaValueArray(ar);
  104. }
  105. return value;
  106. }
  107. /**
  108. * Similar to {@link #getCellValue} but takes a (var-arg) array of cell names
  109. * and returns the corresponding values as list.
  110. */
  111. public List<Object> getCellValues(String...cellNames) {
  112. List<Object> cellValues = new ArrayList<Object>();
  113. for (String cellName : cellNames) {
  114. cellValues.add(getCellValue(cellName));
  115. }
  116. return cellValues;
  117. }
  118. /**
  119. * Returns the cell value for the given cell reference.
  120. */
  121. private Object getSingleCellValue(CellReference cr) {
  122. Object value = null;
  123. Sheet sheet;
  124. String sheetName = cr.getSheetName();
  125. if (sheetName != null) {
  126. sheet = workbook.getSheet(sheetName);
  127. if (sheet == null) {
  128. throw new IllegalArgumentException("Sheet '" + sheetName + "' does not exist");
  129. }
  130. } else {
  131. sheet = workbook.getSheetAt(workbook.getActiveSheetIndex());
  132. }
  133. Row row = sheet.getRow(cr.getRow());
  134. if (row != null) {
  135. Cell cell = row.getCell(cr.getCol());
  136. if (cell != null) {
  137. CellType cellType = cell.getCellType();
  138. if (cellType == CellType.FORMULA) {
  139. cellType = cell.getCachedFormulaResultType();
  140. }
  141. value = getCellValue(cell, cellType);
  142. }
  143. }
  144. return value;
  145. }
  146. private Object[][] getAreaValueArray(AreaReference ar) {
  147. int cols = Math.abs(ar.getFirstCell().getCol() - ar.getLastCell().getCol()) + 1;
  148. int rows = Math.abs(ar.getFirstCell().getRow() - ar.getLastCell().getRow()) + 1;
  149. CellReference[] crs = ar.getAllReferencedCells();
  150. if (crs.length != rows * cols) {
  151. throw new IllegalArgumentException("Invalid area reference " + ar);
  152. }
  153. Object[][] rect = new Object[rows][cols];
  154. for (int r = 0; r < rows; r++) {
  155. for (int c = 0; c < cols; c++) {
  156. rect[r][c] = getSingleCellValue(crs[r * cols + c]);
  157. }
  158. }
  159. return rect;
  160. }
  161. private static Object getCellValue(Cell cell, CellType cellType) {
  162. switch (cellType) {
  163. case BLANK:
  164. return null;
  165. case BOOLEAN:
  166. return cell.getBooleanCellValue();
  167. case NUMERIC:
  168. if (DateUtil.isCellDateFormatted(cell)) {
  169. return cell.getDateCellValue();
  170. } else {
  171. return cell.getNumericCellValue();
  172. }
  173. case STRING:
  174. return cell.getStringCellValue();
  175. case ERROR:
  176. return FormulaError.forInt(cell.getErrorCellValue()).getString();
  177. case FORMULA:
  178. return cell.getCellFormula();
  179. default:
  180. throw new IllegalArgumentException("Unknown POI cell type " + cellType);
  181. }
  182. }
  183. public static class InvalidCellReferenceException extends IllegalArgumentException {
  184. public InvalidCellReferenceException(String cell) {
  185. super(cell);
  186. }
  187. }
  188. }