PageRenderTime 4825ms CodeModel.GetById 22ms RepoModel.GetById 4ms app.codeStats 1ms

/src/main/java/org/sysmodb/xml/XMLGeneration.java

https://github.com/myGrid/simple-spreadsheet-extractor
Java | 354 lines | 288 code | 52 blank | 14 comment | 49 complexity | a10633d5ea2ac43cca0684f975199643 MD5 | raw file
Possible License(s): BSD-3-Clause
  1. package org.sysmodb.xml;
  2. import java.io.IOException;
  3. import java.io.Writer;
  4. import java.util.ArrayList;
  5. import java.util.List;
  6. import javax.xml.stream.XMLOutputFactory;
  7. import javax.xml.stream.XMLStreamException;
  8. import javax.xml.stream.XMLStreamWriter;
  9. import org.apache.log4j.Logger;
  10. import org.apache.poi.hssf.usermodel.HSSFDataValidation;
  11. import org.apache.poi.hssf.usermodel.HSSFSheet;
  12. import org.apache.poi.hssf.usermodel.HSSFWorkbook;
  13. import org.apache.poi.ss.usermodel.Cell;
  14. import org.apache.poi.ss.usermodel.CellStyle;
  15. import org.apache.poi.ss.usermodel.Name;
  16. import org.apache.poi.ss.usermodel.Row;
  17. import org.apache.poi.ss.usermodel.Sheet;
  18. import org.apache.poi.ss.usermodel.Workbook;
  19. import org.apache.poi.ss.util.AreaReference;
  20. import org.apache.poi.ss.util.CellRangeAddress;
  21. import org.apache.poi.ss.util.CellReference;
  22. import org.apache.poi.xssf.usermodel.XSSFDataValidation;
  23. import org.apache.poi.xssf.usermodel.XSSFSheet;
  24. import org.apache.poi.xssf.usermodel.XSSFWorkbook;
  25. import org.apache.xerces.util.XMLChar;
  26. import org.sysmodb.CellInfo;
  27. public class XMLGeneration {
  28. private final static Logger logger = Logger.getLogger(XMLGeneration.class);
  29. private final Workbook poiWorkbook;
  30. private XMLStyleHelper styleHelper = null;
  31. private List<CellStyle> styles = new ArrayList<CellStyle>();
  32. public XMLGeneration(Workbook poiWorkbook) {
  33. this.poiWorkbook = poiWorkbook;
  34. if (poiWorkbook instanceof XSSFWorkbook) {
  35. styleHelper = new XSSFXMLStyleHelper();
  36. } else {
  37. styleHelper = new HSSFXMLStyleHelper((HSSFWorkbook) poiWorkbook);
  38. }
  39. }
  40. public void outputToWriter(Writer outputWriter) throws IOException, XMLStreamException {
  41. XMLOutputFactory factory = XMLOutputFactory.newInstance();
  42. XMLStreamWriter xmlwriter = factory.createXMLStreamWriter(outputWriter);
  43. xmlwriter.writeStartDocument("1.0");
  44. streamXML(xmlwriter);
  45. xmlwriter.writeEndDocument();
  46. xmlwriter.flush();
  47. xmlwriter.close();
  48. }
  49. private void streamXML(XMLStreamWriter xmlWriter) throws XMLStreamException {
  50. xmlWriter.writeStartElement("workbook");
  51. xmlWriter.writeDefaultNamespace("http://www.sysmo-db.org/2010/xml/spreadsheet");
  52. writeNamedRanged(xmlWriter);
  53. xmlWriter.flush();
  54. writeStyles(xmlWriter);
  55. xmlWriter.flush();
  56. writeSheets(xmlWriter);
  57. xmlWriter.writeEndElement();
  58. }
  59. private void writeSheets(XMLStreamWriter xmlWriter) throws XMLStreamException {
  60. for (short i = 0; i < poiWorkbook.getNumberOfSheets(); i++) {
  61. Sheet sheet = poiWorkbook.getSheetAt(i);
  62. writeSheet(xmlWriter, i, sheet);
  63. xmlWriter.flush();
  64. }
  65. }
  66. private void writeSheet(XMLStreamWriter xmlWriter, short sheetIndex, Sheet sheet) throws XMLStreamException {
  67. xmlWriter.writeStartElement("sheet");
  68. xmlWriter.writeAttribute("name", sheet.getSheetName());
  69. xmlWriter.writeAttribute("index", String.valueOf(sheetIndex + 1));
  70. xmlWriter.writeAttribute("hidden", String.valueOf(poiWorkbook.isSheetHidden(sheetIndex)));
  71. xmlWriter.writeAttribute("very_hidden", String.valueOf(poiWorkbook.isSheetVeryHidden(sheetIndex)));
  72. writeDataValidations(xmlWriter, sheet);
  73. writeColumns(xmlWriter, sheet);
  74. writeRows(xmlWriter, sheet);
  75. xmlWriter.writeEndElement();
  76. }
  77. private void writeDataValidations(XMLStreamWriter xmlWriter, Sheet sheet) throws XMLStreamException {
  78. xmlWriter.writeStartElement("data_validations");
  79. if (sheet instanceof HSSFSheet) {
  80. writeHSSFDataValidations(xmlWriter, (HSSFSheet) sheet);
  81. } else {
  82. writeXSSFDataValidations(xmlWriter, (XSSFSheet) sheet);
  83. }
  84. xmlWriter.writeEndElement();
  85. }
  86. private void writeHSSFDataValidations(XMLStreamWriter xmlWriter, HSSFSheet sheet) throws XMLStreamException {
  87. try {
  88. List<HSSFDataValidation> validationData = sheet.getDataValidations();
  89. for (HSSFDataValidation validation : validationData) {
  90. for (CellRangeAddress address : validation.getRegions().getCellRangeAddresses()) {
  91. String formula = validation.getValidationConstraint().getFormula1();
  92. if (formula != null) {
  93. writeDataValidation(xmlWriter, address, formula);
  94. }
  95. }
  96. }
  97. } catch (IllegalStateException e) {
  98. logger.warn("Problem reading data validation table " + e.getMessage());
  99. }
  100. }
  101. private void writeDataValidation(XMLStreamWriter xmlWriter, CellRangeAddress address, String formula)
  102. throws XMLStreamException {
  103. xmlWriter.writeStartElement("data_validation");
  104. xmlWriter.writeAttribute("first_column", String.valueOf(address.getFirstColumn() + 1));
  105. xmlWriter.writeAttribute("last_column", String.valueOf(address.getLastColumn() + 1));
  106. xmlWriter.writeAttribute("first_row", String.valueOf(address.getFirstRow() + 1));
  107. xmlWriter.writeAttribute("last_row", String.valueOf(address.getLastRow() + 1));
  108. xmlWriter.writeStartElement("constraint");
  109. xmlWriter.writeCharacters(formula);
  110. xmlWriter.writeEndElement();
  111. xmlWriter.writeEndElement();
  112. }
  113. private void writeXSSFDataValidations(XMLStreamWriter xmlWriter, XSSFSheet sheet) throws XMLStreamException {
  114. List<XSSFDataValidation> validationData = sheet.getDataValidations();
  115. for (XSSFDataValidation validation : validationData) {
  116. for (CellRangeAddress address : validation.getRegions().getCellRangeAddresses()) {
  117. String formula = validation.getValidationConstraint().getFormula1();
  118. if (formula != null) {
  119. writeDataValidation(xmlWriter, address, formula);
  120. }
  121. }
  122. }
  123. }
  124. private void writeColumns(XMLStreamWriter xmlWriter, Sheet sheet) throws XMLStreamException {
  125. int firstCol = 1;
  126. int lastCol = 1;
  127. // determine first and last column
  128. for (int y = sheet.getFirstRowNum(); y <= sheet.getLastRowNum(); y++) {
  129. Row row = sheet.getRow(y);
  130. if (row == null) {
  131. continue;
  132. }
  133. int firstCell = row.getFirstCellNum();
  134. if (firstCell == -1)
  135. continue;
  136. if (firstCell < firstCol)
  137. firstCol = firstCell + 1;// Number of columns
  138. int lastCell = row.getLastCellNum();
  139. if (lastCell > lastCol)
  140. lastCol = lastCell;// Number of columns
  141. }
  142. xmlWriter.writeStartElement("columns");
  143. xmlWriter.writeAttribute("first_column", String.valueOf(firstCol));
  144. xmlWriter.writeAttribute("last_column", String.valueOf(lastCol));
  145. for (int x = firstCol - 1; x < lastCol; x++) {
  146. xmlWriter.writeStartElement("column");
  147. xmlWriter.writeAttribute("index", String.valueOf(x + 1));
  148. xmlWriter.writeAttribute("column_alpha", String.valueOf(column_alpha(x)));
  149. xmlWriter.writeAttribute("width", String.valueOf(sheet.getColumnWidth(x)));
  150. xmlWriter.writeEndElement();
  151. }
  152. xmlWriter.writeEndElement();
  153. }
  154. private void writeRows(XMLStreamWriter xmlWriter, Sheet sheet) throws XMLStreamException {
  155. int firstRow = sheet.getFirstRowNum();
  156. int lastRow = sheet.getLastRowNum();
  157. xmlWriter.writeStartElement("rows");
  158. xmlWriter.writeAttribute("first_row", String.valueOf(firstRow + 1));
  159. xmlWriter.writeAttribute("last_row", String.valueOf(lastRow + 1));
  160. for (int y = firstRow; y <= lastRow; y++) {
  161. Row row = sheet.getRow(y);
  162. if (row != null) {
  163. writeRow(xmlWriter, y, row, sheet);
  164. }
  165. }
  166. xmlWriter.writeEndElement();
  167. }
  168. private void writeRow(XMLStreamWriter xmlWriter, int index, Row row, Sheet sheet) throws XMLStreamException {
  169. xmlWriter.writeStartElement("row");
  170. xmlWriter.writeAttribute("index", String.valueOf(index + 1));
  171. if (sheet.getDefaultRowHeightInPoints() != row.getHeightInPoints()) {
  172. xmlWriter.writeAttribute("height", "" + row.getHeightInPoints() + "pt");
  173. }
  174. if (row.getFirstCellNum() != -1) {
  175. writeCells(xmlWriter, row);
  176. }
  177. xmlWriter.writeEndElement();
  178. }
  179. private void writeCells(XMLStreamWriter xmlWriter, Row row) throws XMLStreamException {
  180. for (int x = row.getFirstCellNum(); x <= row.getLastCellNum(); x++) {
  181. Cell cell = row.getCell(x);
  182. if (cell != null) {
  183. CellInfo info = new CellInfo(cell, poiWorkbook);
  184. if (info.value != null) {
  185. xmlWriter.writeStartElement("cell");
  186. xmlWriter.writeAttribute("column", String.valueOf(x + 1));
  187. xmlWriter.writeAttribute("column_alpha", column_alpha(x));
  188. xmlWriter.writeAttribute("row", String.valueOf(row.getRowNum() + 1));
  189. xmlWriter.writeAttribute("type", info.type);
  190. int styleIndex = cell.getCellStyle().getIndex();
  191. if (styles.get(styleIndex) != null) {
  192. xmlWriter.writeAttribute("style", "style" + cell.getCellStyle().getIndex());
  193. }
  194. if (info.formula != null) {
  195. xmlWriter.writeAttribute("formula", stripControlCharacters(info.formula));
  196. }
  197. xmlWriter.writeCharacters(xml10Characters(info.value));
  198. xmlWriter.writeEndElement();
  199. }
  200. }
  201. }
  202. }
  203. /**
  204. *
  205. * @param original
  206. * @return the same String but with XML 1.0 invalid characters (like form
  207. * feed) removed
  208. */
  209. private String xml10Characters(String original) {
  210. // TODO: this would be better incorporating into an custom version of
  211. // XMLStreamWriter.writeCharacters
  212. String result = "";
  213. for (int i = 0; i < original.length(); i++) {
  214. char c = original.charAt(i);
  215. if (XMLChar.isValid(c)) {
  216. result += c;
  217. }
  218. }
  219. return result;
  220. }
  221. private void writeNamedRanged(XMLStreamWriter xmlWriter) throws XMLStreamException {
  222. xmlWriter.writeStartElement("named_ranges");
  223. for (int i = 0; i < poiWorkbook.getNumberOfNames(); i++) {
  224. Name name = poiWorkbook.getNameAt(i);
  225. try {
  226. if (!name.isDeleted() && !name.isFunctionName()) {
  227. String formula = name.getRefersToFormula();
  228. AreaReference areaReference = new AreaReference(formula, getWorkbook().getSpreadsheetVersion());
  229. CellReference firstCellReference = areaReference.getFirstCell();
  230. CellReference lastCellReference = areaReference.getLastCell();
  231. formula = formula.replaceAll("\\p{C}", "?");
  232. xmlWriter.writeStartElement("named_range");
  233. xmlWriter.writeAttribute("first_column", String.valueOf(firstCellReference.getCol() + 1));
  234. xmlWriter.writeAttribute("first_row", String.valueOf(firstCellReference.getRow() + 1));
  235. xmlWriter.writeAttribute("last_column", String.valueOf(lastCellReference.getCol() + 1));
  236. xmlWriter.writeAttribute("last_row", String.valueOf(lastCellReference.getRow() + 1));
  237. xmlWriter.writeStartElement("name");
  238. xmlWriter.writeCharacters(name.getNameName());
  239. xmlWriter.writeEndElement();
  240. xmlWriter.writeStartElement("sheet_name");
  241. xmlWriter.writeCharacters(name.getSheetName());
  242. xmlWriter.writeEndElement();
  243. xmlWriter.writeStartElement("refers_to_formula");
  244. xmlWriter.writeCharacters(stripControlCharacters(formula));
  245. xmlWriter.writeEndElement();
  246. xmlWriter.writeEndElement();
  247. }
  248. } catch (RuntimeException e) {
  249. // caused by an not implemented error in POI related to macros,
  250. // and some invalid formala's that dont' relate to contiguous
  251. // ranges.
  252. }
  253. }
  254. xmlWriter.writeEndElement();
  255. }
  256. private void writeStyles(XMLStreamWriter xmlWriter) throws XMLStreamException {
  257. xmlWriter.writeStartElement("styles");
  258. gatherStyles();
  259. for (CellStyle style : styles) {
  260. if (style != null) {
  261. XMLStyleGenerator.writeStyle(xmlWriter, style, styleHelper);
  262. }
  263. }
  264. xmlWriter.writeEndElement();
  265. }
  266. private void gatherStyles() {
  267. for (short i = 0; i < getWorkbook().getNumCellStyles(); i++) {
  268. try {
  269. CellStyle style = getWorkbook().getCellStyleAt(i);
  270. if (isStyleEmpty(style)) {
  271. styles.add(i, null);
  272. } else {
  273. styles.add(i, style);
  274. }
  275. }
  276. // Sometimes XSLX messes up and reports wrong number of
  277. // styles...
  278. catch (IndexOutOfBoundsException e) {
  279. styles.add(i, null);
  280. break;
  281. }
  282. }
  283. }
  284. private boolean isStyleEmpty(CellStyle style) {
  285. return XMLStyleGenerator.isStyleEmpty(style, styleHelper);
  286. }
  287. private String column_alpha(int col) {
  288. String result = "";
  289. while (col > -1) {
  290. int letter = (col % 26);
  291. result = Character.toString((char) (letter + 65)) + result;
  292. col = (col / 26) - 1;
  293. }
  294. return result;
  295. }
  296. private Workbook getWorkbook() {
  297. return poiWorkbook;
  298. }
  299. private String stripControlCharacters(String original) {
  300. return original.replaceAll("\\p{Cntrl}", "");
  301. }
  302. }