PageRenderTime 43ms CodeModel.GetById 16ms RepoModel.GetById 0ms app.codeStats 0ms

/filesearch/SearchLocalFile/src/com/searchlocal/filereader/ExcelToDicReader.java

http://filesearch.googlecode.com/
Java | 240 lines | 220 code | 20 blank | 0 comment | 39 complexity | be6ab22e16fe5ee369375c82e0f7f8c2 MD5 | raw file
  1. package com.searchlocal.filereader;
  2. import java.io.File;
  3. import java.io.FileInputStream;
  4. import java.io.FileNotFoundException;
  5. import java.io.FileOutputStream;
  6. import java.io.IOException;
  7. import java.io.InputStream;
  8. import java.util.ArrayList;
  9. import java.util.Date;
  10. import java.util.Iterator;
  11. import java.util.List;
  12. import org.apache.poi.hssf.usermodel.HSSFCell;
  13. import org.apache.poi.hssf.usermodel.HSSFDateUtil;
  14. import org.apache.poi.hssf.usermodel.HSSFRow;
  15. import org.apache.poi.hssf.usermodel.HSSFSheet;
  16. import org.apache.poi.hssf.usermodel.HSSFWorkbook;
  17. import org.apache.poi.poifs.filesystem.POIFSFileSystem;
  18. import org.apache.poi.xssf.usermodel.XSSFCell;
  19. import org.apache.poi.xssf.usermodel.XSSFRow;
  20. import org.apache.poi.xssf.usermodel.XSSFSheet;
  21. import org.apache.poi.xssf.usermodel.XSSFWorkbook;
  22. import com.searchlocal.constants.Constant;
  23. import com.searchlocal.util.CLogger;
  24. import com.searchlocal.util.StringUtils;
  25. public class ExcelToDicReader {
  26. private static CLogger logger = new CLogger(ExcelReader.class);
  27. public static boolean getExcelFile(File excelfile, File dicFile) {
  28. InputStream inputStream = null;
  29. POIFSFileSystem fileSystem;
  30. HSSFWorkbook workbook = null;
  31. List wordList = null;
  32. try {
  33. inputStream = new FileInputStream(excelfile);
  34. String filepath = excelfile.getAbsolutePath();
  35. if (StringUtils.is2007Doc(filepath)) {
  36. XSSFWorkbook book = new XSSFWorkbook(inputStream);
  37. wordList = getWordList(book);
  38. } else {
  39. fileSystem = new POIFSFileSystem(inputStream);
  40. workbook = new HSSFWorkbook(fileSystem);
  41. wordList = getWordList(workbook);
  42. }
  43. } catch (FileNotFoundException e) {
  44. logger.error("LG_E001", excelfile.getAbsolutePath(), e);
  45. } catch (IOException e) {
  46. logger.error("LG_E003", excelfile.getAbsolutePath(), e);
  47. } finally {
  48. if (inputStream != null) {
  49. try {
  50. inputStream.close();
  51. } catch (IOException e) {
  52. e.printStackTrace();
  53. }
  54. }
  55. }
  56. copyFile(dicFile, wordList);
  57. return true;
  58. }
  59. public static List getWordList(HSSFWorkbook workbook) {
  60. HSSFSheet sheet;
  61. HSSFRow row;
  62. HSSFCell cell;
  63. String value;
  64. List<String> words = new ArrayList<String>();
  65. for (int i = 0; i < workbook.getNumberOfSheets(); i++) {
  66. sheet = workbook.getSheetAt(i);
  67. for (int j = 0; j <= sheet.getLastRowNum(); j++) {
  68. row = sheet.getRow(j);
  69. if (row != null) {
  70. for (Iterator iter = row.cellIterator(); iter.hasNext();) {
  71. cell = (HSSFCell) iter.next();
  72. if (cell != null) {
  73. int cellType = cell.getCellType();
  74. value = getCellValue(cell, cellType);
  75. if (value != null && !"".equals(value)) {
  76. words.add(value);
  77. }
  78. }
  79. }
  80. }
  81. }
  82. }
  83. return words;
  84. }
  85. public static List getWordList(XSSFWorkbook workbook) {
  86. XSSFSheet sheet;
  87. XSSFRow row;
  88. XSSFCell cell;
  89. String value;
  90. List<String> words = new ArrayList<String>();
  91. for (int i = 0; i < workbook.getNumberOfSheets(); i++) {
  92. sheet = workbook.getSheetAt(i);
  93. for (int j = 0; j <= sheet.getLastRowNum(); j++) {
  94. row = sheet.getRow(j);
  95. if (row != null) {
  96. for (Iterator iter = row.cellIterator(); iter.hasNext();) {
  97. cell = (XSSFCell) iter.next();
  98. if (cell != null) {
  99. int cellType = cell.getCellType();
  100. value = get2007CellValue(cell, cellType);
  101. if (value != null && !"".equals(value)) {
  102. words.add(value);
  103. }
  104. }
  105. }
  106. }
  107. }
  108. }
  109. return words;
  110. }
  111. public static String getExcelContent(File excelfile) {
  112. InputStream inputStream;
  113. POIFSFileSystem fileSystem;
  114. HSSFWorkbook workbook = null;
  115. try {
  116. inputStream = new FileInputStream(excelfile);
  117. fileSystem = new POIFSFileSystem(inputStream);
  118. workbook = new HSSFWorkbook(fileSystem);
  119. } catch (FileNotFoundException e) {
  120. logger.error("LG_E001", excelfile.getAbsolutePath(), e);
  121. } catch (IOException e) {
  122. logger.error("LG_E003", excelfile.getAbsolutePath(), e);
  123. }
  124. HSSFSheet sheet;
  125. HSSFRow row;
  126. HSSFCell cell;
  127. String value;
  128. StringBuffer bf = new StringBuffer();
  129. bf.append("\r\n");
  130. for (int i = 0; i < workbook.getNumberOfSheets(); i++) {
  131. sheet = workbook.getSheetAt(i);
  132. for (int j = 0; j <= sheet.getLastRowNum(); j++) {
  133. row = sheet.getRow(j);
  134. if (row != null) {
  135. for (Iterator iter = row.cellIterator(); iter.hasNext();) {
  136. cell = (HSSFCell) iter.next();
  137. if (cell != null) {
  138. int cellType = cell.getCellType();
  139. value = getCellValue(cell, cellType);
  140. if (value != null && !"".equals(value)) {
  141. bf.append(value);
  142. bf.append("\r\n");
  143. }
  144. }
  145. }
  146. }
  147. }
  148. }
  149. return bf.toString();
  150. }
  151. private static String get2007CellValue(XSSFCell cell, int cellType) {
  152. String returnvalue = "";
  153. switch (cellType) {
  154. case XSSFCell.CELL_TYPE_NUMERIC:
  155. if (HSSFDateUtil.isCellDateFormatted(cell)) {
  156. Date date = cell.getDateCellValue();
  157. returnvalue = date.toString();
  158. break;
  159. }
  160. returnvalue = String.valueOf(cell.getNumericCellValue());
  161. break;
  162. case XSSFCell.CELL_TYPE_STRING:
  163. returnvalue = cell.toString();
  164. break;
  165. case HSSFCell.CELL_TYPE_BOOLEAN:
  166. returnvalue = cell.getBooleanCellValue() ? "true" : "false";
  167. break;
  168. default:
  169. returnvalue = cell.toString();
  170. break;
  171. }
  172. return returnvalue;
  173. }
  174. private static String getCellValue(HSSFCell cell, int cellType) {
  175. String returnvalue = "";
  176. switch (cellType) {
  177. case HSSFCell.CELL_TYPE_NUMERIC:
  178. if (HSSFDateUtil.isCellDateFormatted(cell)) {
  179. Date date = cell.getDateCellValue();
  180. returnvalue = date.toString();
  181. break;
  182. }
  183. returnvalue = String.valueOf(cell.getNumericCellValue());
  184. break;
  185. case HSSFCell.CELL_TYPE_STRING:
  186. returnvalue = cell.toString();
  187. break;
  188. case HSSFCell.CELL_TYPE_BOOLEAN:
  189. returnvalue = cell.getBooleanCellValue() ? "true" : "false";
  190. break;
  191. default:
  192. returnvalue = cell.toString();
  193. break;
  194. }
  195. return returnvalue;
  196. }
  197. private static void copyFile(File dicFile, List<String> values) {
  198. try {
  199. FileOutputStream fos = new FileOutputStream(dicFile);
  200. for (String b : values) {
  201. fos.write(b.getBytes());
  202. fos.write("\r\n".getBytes());
  203. }
  204. fos.close();
  205. } catch (Exception e) {
  206. e.printStackTrace();
  207. }
  208. }
  209. public static void main(String[] args) {
  210. try {
  211. ExcelToDicReader reader = new ExcelToDicReader();
  212. String excelFile = "E:\\test2007doc\\tasklist.xlsx";
  213. String dicfilepath = Constant.dicpath + "custom.dic";
  214. boolean create = reader.getExcelFile(new File(excelFile), new File(dicfilepath));
  215. System.out.println("ExcelText2007=======" + create);
  216. } catch (Exception e) {
  217. e.printStackTrace();
  218. }
  219. }
  220. }