PageRenderTime 5052ms CodeModel.GetById 31ms RepoModel.GetById 7ms app.codeStats 0ms

/ETL/src/com/util/HxlsAbstract.java

https://github.com/xiezhaohui/DataViewer
Java | 303 lines | 231 code | 41 blank | 31 comment | 35 complexity | 8bd64dddea629578a6d8e6f976c7b348 MD5 | raw file
  1. package com.util;
  2. import java.io.FileInputStream;
  3. import java.io.FileNotFoundException;
  4. import java.io.IOException;
  5. import java.io.PrintStream;
  6. import java.sql.SQLException;
  7. import java.util.ArrayList;
  8. import java.util.List;
  9. import org.apache.poi.hssf.eventusermodel.FormatTrackingHSSFListener;
  10. import org.apache.poi.hssf.eventusermodel.HSSFEventFactory;
  11. import org.apache.poi.hssf.eventusermodel.HSSFListener;
  12. import org.apache.poi.hssf.eventusermodel.HSSFRequest;
  13. import org.apache.poi.hssf.eventusermodel.MissingRecordAwareHSSFListener;
  14. import org.apache.poi.hssf.eventusermodel.EventWorkbookBuilder.SheetRecordCollectingListener;
  15. import org.apache.poi.hssf.eventusermodel.dummyrecord.LastCellOfRowDummyRecord;
  16. import org.apache.poi.hssf.eventusermodel.dummyrecord.MissingCellDummyRecord;
  17. import org.apache.poi.hssf.model.HSSFFormulaParser;
  18. import org.apache.poi.hssf.record.BOFRecord;
  19. import org.apache.poi.hssf.record.BlankRecord;
  20. import org.apache.poi.hssf.record.BoolErrRecord;
  21. import org.apache.poi.hssf.record.BoundSheetRecord;
  22. import org.apache.poi.hssf.record.FormulaRecord;
  23. import org.apache.poi.hssf.record.LabelRecord;
  24. import org.apache.poi.hssf.record.LabelSSTRecord;
  25. import org.apache.poi.hssf.record.NoteRecord;
  26. import org.apache.poi.hssf.record.NumberRecord;
  27. import org.apache.poi.hssf.record.RKRecord;
  28. import org.apache.poi.hssf.record.Record;
  29. import org.apache.poi.hssf.record.SSTRecord;
  30. import org.apache.poi.hssf.record.StringRecord;
  31. import org.apache.poi.hssf.usermodel.HSSFWorkbook;
  32. import org.apache.poi.poifs.filesystem.POIFSFileSystem;
  33. public abstract class HxlsAbstract implements HSSFListener {
  34. private int minColumns;
  35. private POIFSFileSystem fs;
  36. private PrintStream output;
  37. private int lastRowNumber;
  38. private int lastColumnNumber;
  39. /** Should we output the formula, or the value it has? */
  40. private boolean outputFormulaValues = true;
  41. /** For parsing Formulas */
  42. private SheetRecordCollectingListener workbookBuildingListener;
  43. private HSSFWorkbook stubWorkbook;
  44. // Records we pick up as we process
  45. private SSTRecord sstRecord;
  46. private FormatTrackingHSSFListener formatListener;
  47. /** So we known which sheet we're on */
  48. private int sheetIndex = -1;
  49. private BoundSheetRecord[] orderedBSRs;
  50. @SuppressWarnings("unchecked")
  51. private ArrayList boundSheetRecords = new ArrayList();
  52. // For handling formulas with string results
  53. private int nextRow;
  54. private int nextColumn;
  55. private boolean outputNextStringRecord;
  56. private int curRow;
  57. private List<String> rowlist;
  58. @SuppressWarnings( "unused")
  59. private String sheetName;
  60. public HxlsAbstract(POIFSFileSystem fs)
  61. throws SQLException {
  62. this.fs = fs;
  63. this.output = System.out;
  64. this.minColumns = -1;
  65. this.curRow = 0;
  66. this.rowlist = new ArrayList<String>();
  67. for(int i = 0; i < 30; i++)
  68. rowlist.add("");
  69. }
  70. public HxlsAbstract(String filename) throws IOException,
  71. FileNotFoundException, SQLException {
  72. this(new POIFSFileSystem(new FileInputStream(filename)));
  73. }
  74. //excel记录行操作方法,以行索引和行元素列表为参数,对一行元素进行操作,元素为String类型
  75. // public abstract void optRows(int curRow, List<String> rowlist) throws SQLException ;
  76. //excel记录行操作方法,以sheet索引,行索引和行元素列表为参数,对sheet的一行元素进行操作,元素为String类型
  77. public abstract void optRows(int sheetIndex,int curRow, List<String> rowlist) throws SQLException;
  78. /**
  79. * 遍历 excel 文件
  80. */
  81. public void process() throws IOException {
  82. MissingRecordAwareHSSFListener listener = new MissingRecordAwareHSSFListener(
  83. this);
  84. formatListener = new FormatTrackingHSSFListener(listener);
  85. HSSFEventFactory factory = new HSSFEventFactory();
  86. HSSFRequest request = new HSSFRequest();
  87. if (outputFormulaValues) {
  88. request.addListenerForAllRecords(formatListener);
  89. } else {
  90. workbookBuildingListener = new SheetRecordCollectingListener(
  91. formatListener);
  92. request.addListenerForAllRecords(workbookBuildingListener);
  93. }
  94. factory.processWorkbookEvents(request, fs);
  95. }
  96. /**
  97. * HSSFListener 监听方法,处理 Record
  98. */
  99. @SuppressWarnings("unchecked")
  100. public void processRecord(Record record) {
  101. int thisRow = -1;
  102. int thisColumn = -1;
  103. String thisStr = null;
  104. String value = null;
  105. switch (record.getSid()) {
  106. case BoundSheetRecord.sid:
  107. boundSheetRecords.add(record);
  108. break;
  109. case BOFRecord.sid:
  110. BOFRecord br = (BOFRecord) record;
  111. if (br.getType() == BOFRecord.TYPE_WORKSHEET) {
  112. // Create sub workbook if required
  113. if (workbookBuildingListener != null && stubWorkbook == null) {
  114. stubWorkbook = workbookBuildingListener
  115. .getStubHSSFWorkbook();
  116. }
  117. // Works by ordering the BSRs by the location of
  118. // their BOFRecords, and then knowing that we
  119. // process BOFRecords in byte offset order
  120. sheetIndex++;
  121. if (orderedBSRs == null) {
  122. orderedBSRs = BoundSheetRecord
  123. .orderByBofPosition(boundSheetRecords);
  124. }
  125. sheetName = orderedBSRs[sheetIndex].getSheetname();
  126. }
  127. break;
  128. case SSTRecord.sid:
  129. sstRecord = (SSTRecord) record;
  130. break;
  131. case BlankRecord.sid:
  132. BlankRecord brec = (BlankRecord) record;
  133. thisRow = brec.getRow();
  134. thisColumn = brec.getColumn();
  135. thisStr = "";
  136. break;
  137. case BoolErrRecord.sid:
  138. BoolErrRecord berec = (BoolErrRecord) record;
  139. thisRow = berec.getRow();
  140. thisColumn = berec.getColumn();
  141. thisStr = "";
  142. break;
  143. case FormulaRecord.sid:
  144. FormulaRecord frec = (FormulaRecord) record;
  145. thisRow = frec.getRow();
  146. thisColumn = frec.getColumn();
  147. if (outputFormulaValues) {
  148. if (Double.isNaN(frec.getValue())) {
  149. // Formula result is a string
  150. // This is stored in the next record
  151. outputNextStringRecord = true;
  152. nextRow = frec.getRow();
  153. nextColumn = frec.getColumn();
  154. } else {
  155. thisStr = formatListener.formatNumberDateCell(frec);
  156. }
  157. } else {
  158. thisStr = '"' + HSSFFormulaParser.toFormulaString(stubWorkbook,
  159. frec.getParsedExpression()) + '"';
  160. }
  161. break;
  162. case StringRecord.sid:
  163. if (outputNextStringRecord) {
  164. // String for formula
  165. StringRecord srec = (StringRecord) record;
  166. thisStr = srec.getString();
  167. thisRow = nextRow;
  168. thisColumn = nextColumn;
  169. outputNextStringRecord = false;
  170. }
  171. break;
  172. case LabelRecord.sid:
  173. LabelRecord lrec = (LabelRecord) record;
  174. curRow = thisRow = lrec.getRow();
  175. thisColumn = lrec.getColumn();
  176. value = lrec.getValue().trim();
  177. value = value.equals("")?" ":value;
  178. this.rowlist.add(thisColumn, value);
  179. break;
  180. case LabelSSTRecord.sid:
  181. LabelSSTRecord lsrec = (LabelSSTRecord) record;
  182. curRow = thisRow = lsrec.getRow();
  183. thisColumn = lsrec.getColumn();
  184. if (sstRecord == null) {
  185. rowlist.add(thisColumn, " ");
  186. } else {
  187. value = sstRecord
  188. .getString(lsrec.getSSTIndex()).toString().trim();
  189. value = value.equals("")?" ":value;
  190. rowlist.add(thisColumn,value);
  191. }
  192. break;
  193. case NoteRecord.sid:
  194. NoteRecord nrec = (NoteRecord) record;
  195. thisRow = nrec.getRow();
  196. thisColumn = nrec.getColumn();
  197. // TODO: Find object to match nrec.getShapeId()
  198. thisStr = '"' + "(TODO)" + '"';
  199. break;
  200. case NumberRecord.sid:
  201. NumberRecord numrec = (NumberRecord) record;
  202. curRow = thisRow = numrec.getRow();
  203. thisColumn = numrec.getColumn();
  204. value = formatListener.formatNumberDateCell(numrec).trim();
  205. value = value.equals("")?" ":value;
  206. // System.out.print(" "+rowlist.size() +thisColumn+"----> "+value.length());
  207. // Format
  208. rowlist.add(thisColumn, value);
  209. break;
  210. case RKRecord.sid:
  211. RKRecord rkrec = (RKRecord) record;
  212. thisRow = rkrec.getRow();
  213. thisColumn = rkrec.getColumn();
  214. thisStr = '"' + "(TODO)" + '"';
  215. break;
  216. default:
  217. break;
  218. }
  219. // 遇到新行的操作
  220. if (thisRow != -1 && thisRow != lastRowNumber) {
  221. lastColumnNumber = -1;
  222. }
  223. // 空值的操作
  224. if (record instanceof MissingCellDummyRecord) {
  225. MissingCellDummyRecord mc = (MissingCellDummyRecord) record;
  226. curRow = thisRow = mc.getRow();
  227. thisColumn = mc.getColumn();
  228. rowlist.add(thisColumn," ");
  229. }
  230. // 如果遇到能打印的东西,在这里打印
  231. if (thisStr != null) {
  232. if (thisColumn > 0) {
  233. output.print(',');
  234. }
  235. output.print(thisStr);
  236. }
  237. // 更新行和列的值
  238. if (thisRow > -1)
  239. lastRowNumber = thisRow;
  240. if (thisColumn > -1)
  241. lastColumnNumber = thisColumn;
  242. // 行结束时的操作
  243. if (record instanceof LastCellOfRowDummyRecord) {
  244. if (minColumns > 0) {
  245. // 列值重新置空
  246. if (lastColumnNumber == -1) {
  247. lastColumnNumber = 0;
  248. }
  249. }
  250. // 行结束时, 调用 optRows() 方法
  251. lastColumnNumber = -1;
  252. try {
  253. optRows(sheetIndex,curRow, rowlist);
  254. } catch (SQLException e) {
  255. e.printStackTrace();
  256. }
  257. rowlist.clear();
  258. for(int i = 0; i < 30; i++)
  259. rowlist.add("");
  260. }
  261. }
  262. }