PageRenderTime 53ms CodeModel.GetById 15ms RepoModel.GetById 0ms app.codeStats 0ms

/engine/src/org/pentaho/di/trans/steps/excelinput/staxpoi/StaxPoiSheet.java

https://github.com/teruok78/pentaho-kettle
Java | 175 lines | 141 code | 17 blank | 17 comment | 46 complexity | cbfe0ad18a37e13a4234d2b5bc06851f MD5 | raw file
Possible License(s): Apache-2.0
  1. /**
  2. * Author = Shailesh Ahuja
  3. */
  4. package org.pentaho.di.trans.steps.excelinput.staxpoi;
  5. import java.io.IOException;
  6. import java.io.InputStream;
  7. import java.util.ArrayList;
  8. import java.util.List;
  9. import javax.xml.stream.XMLInputFactory;
  10. import javax.xml.stream.XMLStreamConstants;
  11. import javax.xml.stream.XMLStreamException;
  12. import javax.xml.stream.XMLStreamReader;
  13. import org.apache.poi.xssf.eventusermodel.XSSFReader;
  14. import org.apache.poi.xssf.model.SharedStringsTable;
  15. import org.apache.poi.xssf.usermodel.XSSFRichTextString;
  16. import org.pentaho.di.core.spreadsheet.KCell;
  17. import org.pentaho.di.core.spreadsheet.KSheet;
  18. public class StaxPoiSheet implements KSheet {
  19. private String sheetName;
  20. private InputStream sheetStream;
  21. private XMLStreamReader sheetReader;
  22. // hold the pointer to the current row so that access to the next row in the stream is quick and easy
  23. private int currentRow;
  24. private List<String> headerRow;
  25. private int numRows;
  26. private int numCols;
  27. // variable to hold the shared strings table
  28. private SharedStringsTable sst;
  29. public StaxPoiSheet( XSSFReader reader, String sheetName, String sheetID ) {
  30. this.sheetName = sheetName;
  31. try {
  32. sst = reader.getSharedStringsTable();
  33. sheetStream = reader.getSheet( sheetID );
  34. XMLInputFactory factory = XMLInputFactory.newInstance();
  35. sheetReader = factory.createXMLStreamReader( sheetStream );
  36. headerRow = new ArrayList<String>();
  37. while ( sheetReader.hasNext() ) {
  38. int event = sheetReader.next();
  39. if ( event == XMLStreamConstants.START_ELEMENT && sheetReader.getLocalName().equals( "dimension" ) ) {
  40. String dim = sheetReader.getAttributeValue( null, "ref" ).split( ":" )[1];
  41. numRows = StaxUtil.extractRowNumber( dim );
  42. numCols = StaxUtil.extractColumnNumber( dim );
  43. }
  44. if ( event == XMLStreamConstants.START_ELEMENT && sheetReader.getLocalName().equals( "row" ) ) {
  45. currentRow = Integer.parseInt( sheetReader.getAttributeValue( null, "r" ) );
  46. // calculate the number of columns in the header row
  47. while ( sheetReader.hasNext() ) {
  48. event = sheetReader.next();
  49. if ( event == XMLStreamConstants.END_ELEMENT && sheetReader.getLocalName().equals( "row" ) ) {
  50. // if the row has ended, break the inner while loop
  51. break;
  52. }
  53. if ( event == XMLStreamConstants.START_ELEMENT && sheetReader.getLocalName().equals( "c" ) ) {
  54. String attributeValue = sheetReader.getAttributeValue( null, "t" );
  55. if ( attributeValue != null && attributeValue.equals( "s" ) ) {
  56. // only if the type of the cell is string, we continue
  57. while ( sheetReader.hasNext() ) {
  58. event = sheetReader.next();
  59. if ( event == XMLStreamConstants.START_ELEMENT && sheetReader.getLocalName().equals( "v" ) ) {
  60. int idx = Integer.parseInt( sheetReader.getElementText() );
  61. String content = new XSSFRichTextString( sst.getEntryAt( idx ) ).toString();
  62. headerRow.add( content );
  63. break;
  64. }
  65. }
  66. } else {
  67. break;
  68. }
  69. }
  70. }
  71. // we have parsed the header row
  72. break;
  73. }
  74. }
  75. // numCols = headerRow.size();
  76. } catch ( Exception e ) {
  77. e.printStackTrace();
  78. throw new RuntimeException( e.getMessage() );
  79. }
  80. }
  81. @Override
  82. public KCell[] getRow( int rownr ) {
  83. // convert 0 based index to 1 based
  84. rownr += 1;
  85. try {
  86. while ( sheetReader.hasNext() ) {
  87. int event = sheetReader.next();
  88. if ( event == XMLStreamConstants.START_ELEMENT && sheetReader.getLocalName().equals( "row" ) ) {
  89. String rowIndicator = sheetReader.getAttributeValue( null, "r" );
  90. currentRow = Integer.parseInt( rowIndicator );
  91. if ( currentRow < rownr ) {
  92. continue;
  93. }
  94. KCell[] cells = new StaxPoiCell[numCols];
  95. for ( int i = 0; i < numCols; i++ ) {
  96. // go to the "c" <cell> tag
  97. while ( sheetReader.hasNext() ) {
  98. if ( event == XMLStreamConstants.START_ELEMENT && sheetReader.getLocalName().equals( "c" ) ) {
  99. break;
  100. }
  101. event = sheetReader.next();
  102. }
  103. String cellLocation = sheetReader.getAttributeValue( null, "r" );
  104. int columnIndex = StaxUtil.extractColumnNumber( cellLocation ) - 1;
  105. String cellType = sheetReader.getAttributeValue( null, "t" );
  106. // go to the "v" <value> tag
  107. while ( sheetReader.hasNext() ) {
  108. event = sheetReader.next();
  109. if ( event == XMLStreamConstants.START_ELEMENT && sheetReader.getLocalName().equals( "v" ) ) {
  110. break;
  111. }
  112. if ( event == XMLStreamConstants.END_ELEMENT && sheetReader.getLocalName().equals( "c" ) ) {
  113. // we have encountered an empty/incomplete row, so we set the max rows to current row number
  114. // TODO: accept empty row is option is check and go till the end of the xml (need to detect the end)
  115. numRows = currentRow;
  116. return new KCell[] {};
  117. }
  118. }
  119. String content = null;
  120. if ( cellType != null && cellType.equals( "s" ) ) {
  121. int idx = Integer.parseInt( sheetReader.getElementText() );
  122. content = new XSSFRichTextString( sst.getEntryAt( idx ) ).toString();
  123. } else {
  124. content = sheetReader.getElementText();
  125. }
  126. cells[columnIndex] = new StaxPoiCell( content, currentRow );
  127. }
  128. return cells;
  129. }
  130. }
  131. } catch ( Exception e ) {
  132. throw new RuntimeException( e );
  133. }
  134. numRows = currentRow;
  135. return new KCell[] {};
  136. }
  137. @Override
  138. public String getName() {
  139. return sheetName;
  140. }
  141. @Override
  142. public int getRows() {
  143. return numRows;
  144. }
  145. @Override
  146. public KCell getCell( int colnr, int rownr ) {
  147. if ( rownr == 0 && colnr < numCols ) {
  148. // only possible to return header
  149. return new StaxPoiCell( headerRow.get( colnr ), rownr );
  150. }
  151. return null;
  152. // throw new RuntimeException("getCell(col, row) is not supported yet");
  153. }
  154. public void close() throws IOException, XMLStreamException {
  155. sheetStream.close();
  156. sheetReader.close();
  157. }
  158. }