PageRenderTime 43ms CodeModel.GetById 16ms RepoModel.GetById 1ms app.codeStats 0ms

/ETL/src/com/util/XxlsAbstract.java

https://github.com/xiezhaohui/DataViewer
Java | 145 lines | 104 code | 20 blank | 21 comment | 9 complexity | 8bcf7185681037ed76939e88fb0242bf MD5 | raw file
  1. package com.util;
  2. import java.io.InputStream;
  3. import java.sql.SQLException;
  4. import java.util.ArrayList;
  5. import java.util.Iterator;
  6. import java.util.List;
  7. import org.apache.poi.xssf.eventusermodel.XSSFReader;
  8. import org.apache.poi.xssf.model.SharedStringsTable;
  9. import org.apache.poi.xssf.usermodel.XSSFRichTextString;
  10. import org.apache.poi.openxml4j.opc.OPCPackage;
  11. import org.xml.sax.Attributes;
  12. import org.xml.sax.InputSource;
  13. import org.xml.sax.SAXException;
  14. import org.xml.sax.XMLReader;
  15. import org.xml.sax.helpers.DefaultHandler;
  16. import org.xml.sax.helpers.XMLReaderFactory;
  17. /**
  18. * XSSF and SAX (Event API)
  19. */
  20. public abstract class XxlsAbstract extends DefaultHandler {
  21. private SharedStringsTable sst;
  22. private String lastContents;
  23. private boolean nextIsString;
  24. private int sheetIndex = -1;
  25. private List<String> rowlist = new ArrayList<String>();
  26. private int curRow = 0;
  27. private int curCol = 0;
  28. //excel记录行操作方法,以行索引和行元素列表为参数,对一行元素进行操作,元素为String类型
  29. // public abstract void optRows(int curRow, List<String> rowlist) throws SQLException ;
  30. //excel记录行操作方法,以sheet索引,行索引和行元素列表为参数,对sheet的一行元素进行操作,元素为String类型
  31. public abstract void optRows(int sheetIndex,int curRow, List<String> rowlist) throws SQLException;
  32. //只遍历一个sheet,其中sheetId为要遍历的sheet索引,从1开始,1-3
  33. public void processOneSheet(String filename,int sheetId) throws Exception {
  34. OPCPackage pkg = OPCPackage.open(filename);
  35. XSSFReader r = new XSSFReader(pkg);
  36. SharedStringsTable sst = r.getSharedStringsTable();
  37. XMLReader parser = fetchSheetParser(sst);
  38. // rId2 found by processing the Workbook
  39. // 根据 rId# 或 rSheet# 查找sheet
  40. InputStream sheet2 = r.getSheet("rId"+sheetId);
  41. sheetIndex++;
  42. InputSource sheetSource = new InputSource(sheet2);
  43. parser.parse(sheetSource);
  44. sheet2.close();
  45. }
  46. /**
  47. * 遍历 excel 文件
  48. */
  49. public void process(String filename) throws Exception {
  50. OPCPackage pkg = OPCPackage.open(filename);
  51. XSSFReader r = new XSSFReader(pkg);
  52. SharedStringsTable sst = r.getSharedStringsTable();
  53. XMLReader parser = fetchSheetParser(sst);
  54. Iterator<InputStream> sheets = r.getSheetsData();
  55. while (sheets.hasNext()) {
  56. curRow = 0;
  57. sheetIndex++;
  58. InputStream sheet = sheets.next();
  59. InputSource sheetSource = new InputSource(sheet);
  60. parser.parse(sheetSource);
  61. sheet.close();
  62. }
  63. }
  64. public XMLReader fetchSheetParser(SharedStringsTable sst)
  65. throws SAXException {
  66. XMLReader parser = XMLReaderFactory
  67. .createXMLReader("org.apache.xerces.parsers.SAXParser");
  68. this.sst = sst;
  69. parser.setContentHandler(this);
  70. return parser;
  71. }
  72. public void startElement(String uri, String localName, String name,
  73. Attributes attributes) throws SAXException {
  74. // c => 单元格
  75. if (name.equals("c")) {
  76. // 如果下一个元素是 SST 的索引,则将nextIsString标记为true
  77. String cellType = attributes.getValue("t");
  78. if (cellType != null && cellType.equals("s")) {
  79. nextIsString = true;
  80. } else {
  81. nextIsString = false;
  82. }
  83. }
  84. // 置空
  85. lastContents = "";
  86. }
  87. public void endElement(String uri, String localName, String name)
  88. throws SAXException {
  89. // 根据SST的索引值的到单元格的真正要存储的字符串
  90. // 这时characters()方法可能会被调用多次
  91. if (nextIsString) {
  92. try {
  93. int idx = Integer.parseInt(lastContents);
  94. lastContents = new XSSFRichTextString(sst.getEntryAt(idx))
  95. .toString();
  96. } catch (Exception e) {
  97. }
  98. }
  99. // v => 单元格的值,如果单元格是字符串则v标签的值为该字符串在SST中的索引
  100. // 将单元格内容加入rowlist中,在这之前先去掉字符串前后的空白符
  101. if (name.equals("v")) {
  102. String value = lastContents.trim();
  103. value = value.equals("")?" ":value;
  104. rowlist.add(curCol, value);
  105. curCol++;
  106. }else {
  107. //如果标签名称为 row ,这说明已到行尾,调用 optRows() 方法
  108. if (name.equals("row")) {
  109. try {
  110. optRows(sheetIndex,curRow,rowlist);
  111. } catch (SQLException e) {
  112. e.printStackTrace();
  113. }
  114. rowlist.clear();
  115. curRow++;
  116. curCol = 0;
  117. }
  118. }
  119. }
  120. public void characters(char[] ch, int start, int length)
  121. throws SAXException {
  122. //得到单元格内容的值
  123. lastContents += new String(ch, start, length);
  124. }
  125. }