PageRenderTime 48ms CodeModel.GetById 20ms RepoModel.GetById 0ms app.codeStats 0ms

/src/com/Frank/ExcelHandler.java

https://github.com/frank19900731/NewsExtractor
Java | 202 lines | 147 code | 21 blank | 34 comment | 38 complexity | b5379cf7d138e0cf4e5b2366e8ad2254 MD5 | raw file
  1. /**
  2. * @(#)ExcelHandler.Java 1.00 2012/05/22
  3. *
  4. * Copyright (c) 2012 清华大学自动化系 Bigeye 实验室版权所有
  5. * Department of Automation, Tsinghua University. All rights reserved.
  6. *
  7. * @author 宋成儒
  8. *
  9. * This software aims to extract title, time, source and text content
  10. * from news webpages. We grab news webpages from Baidu Rss and stored
  11. * them in Mysql database. To support the web demo of this project, we
  12. * also provide with interfaces for web communication .
  13. */
  14. package com.Frank;
  15. import java.io.FileInputStream;
  16. import java.io.FileOutputStream;
  17. import java.io.InputStream;
  18. import java.sql.ResultSet;
  19. import java.text.SimpleDateFormat;
  20. import java.util.Date;
  21. import org.apache.poi.hssf.usermodel.HSSFCell;
  22. import org.apache.poi.hssf.usermodel.HSSFDateUtil;
  23. import org.apache.poi.hssf.usermodel.HSSFRow;
  24. import org.apache.poi.hssf.usermodel.HSSFSheet;
  25. import org.apache.poi.hssf.usermodel.HSSFWorkbook;
  26. /** ExcelHandler类支持将数据库中的数据导出到excel中,以便人工标注,
  27. * 也支持将标注后的结果导回数据库中 */
  28. public class ExcelHandler {
  29. private String tablename;
  30. private DatabaseUtils daUtils;
  31. /** 生成函数
  32. * @param table 导入导出涉及的数据表名 */
  33. public ExcelHandler(String table) {
  34. tablename = table;
  35. daUtils = new DatabaseUtils();
  36. try {
  37. daUtils.ps = daUtils.conn.prepareStatement("Update " + tablename + " SET TextTitle=?, Source=?, Time=?, Text=?, Video=? where Url=?");
  38. } catch (Exception e) {
  39. e.printStackTrace();
  40. }
  41. }
  42. /** 导出数据库数据到excel文件中
  43. * @param file excel文件的存储位置
  44. * @param res 包含要导出数据的sql结果集
  45. * @return 导出是否成功 */
  46. public boolean writeExcel(String file, ResultSet res)
  47. {
  48. HSSFWorkbook workbook = new HSSFWorkbook();
  49. HSSFSheet sheet1 = workbook.createSheet("sheet1");
  50. int count = 0;
  51. try {
  52. while(res.next())
  53. {
  54. HSSFRow row = sheet1.createRow(count);
  55. row.createCell(0).setCellValue(res.getString("Url"));
  56. count++;
  57. }
  58. FileOutputStream fileOut = new FileOutputStream(file);
  59. workbook.write(fileOut);
  60. fileOut.close();
  61. } catch (Exception e) {
  62. e.printStackTrace();
  63. System.out.println("Output Failed!");
  64. return false;
  65. }
  66. System.out.println("Output Ended!");
  67. return true;
  68. }
  69. /** 导入人工标注的数据到数据库中
  70. * @param is excel文件数据流
  71. * @return 导入是否成功 */
  72. public boolean readExcel(InputStream is) {
  73. int selectCol[]={0,9,7,8,10,6};
  74. String fields[]={"","","","","","",""};
  75. try {
  76. HSSFWorkbook workbook = new HSSFWorkbook(is);
  77. for (int numSheets = 0; numSheets < workbook.getNumberOfSheets(); numSheets++) {
  78. if (null != workbook.getSheetAt(numSheets)) {
  79. HSSFSheet aSheet = workbook.getSheetAt(numSheets);
  80. for (int rowNumOfSheet = 1; rowNumOfSheet <= aSheet.getLastRowNum(); rowNumOfSheet++) {
  81. if (null != aSheet.getRow(rowNumOfSheet)) {
  82. HSSFRow aRow = aSheet.getRow(rowNumOfSheet);
  83. fields[6] = "" + (rowNumOfSheet+1);
  84. for (int index = 0; index < 6; index++) {
  85. if (null != aRow.getCell(selectCol[index])) {
  86. HSSFCell aCell = aRow.getCell(selectCol[index]);
  87. if (aCell.getCellType() == HSSFCell.CELL_TYPE_NUMERIC) {
  88. if (HSSFDateUtil.isCellDateFormatted(aCell)) {
  89. double d = aCell.getNumericCellValue();
  90. Date date = HSSFDateUtil.getJavaDate(d);
  91. SimpleDateFormat sFormat = new SimpleDateFormat("yyyy年MM月dd");
  92. fields[index]=sFormat.format(date);
  93. } else {
  94. fields[index]="" + (long)aCell.getNumericCellValue();
  95. }
  96. }
  97. else {
  98. fields[index]=aCell.getStringCellValue();
  99. }
  100. fields[index]=fields[index].trim();
  101. if(fields[index].length() == 0) {
  102. if(index==5)
  103. fields[index]="0";
  104. else
  105. fields[index]=" ";
  106. }
  107. }
  108. else {
  109. if(index==5)
  110. fields[index]="0";
  111. else
  112. fields[index]=" ";
  113. }
  114. }
  115. daUtils.ps.setString(1, fields[1]);
  116. daUtils.ps.setString(2, fields[2]);
  117. daUtils.ps.setString(3, fields[3]);
  118. daUtils.ps.setString(4, fields[4]);
  119. daUtils.ps.setString(5, fields[5]);
  120. daUtils.ps.setString(6, fields[0]);
  121. daUtils.ps.executeUpdate();
  122. }
  123. }
  124. }
  125. }
  126. } catch (Exception e) {
  127. e.printStackTrace();
  128. }
  129. return true;
  130. }
  131. /** 根据TFIDF值对关键词进行过滤
  132. * @param is excel文件数据流 */
  133. public void tfidfFilter(InputStream is) {
  134. String currstr = "";
  135. double val[] = new double[11];
  136. int total = 0;
  137. try {
  138. HSSFWorkbook workbook = new HSSFWorkbook(is);
  139. for (int numSheets = 0; numSheets < workbook.getNumberOfSheets(); numSheets++) {
  140. if (null != workbook.getSheetAt(numSheets)) {
  141. HSSFSheet aSheet = workbook.getSheetAt(numSheets);
  142. for (int rowNumOfSheet = 0; rowNumOfSheet <= aSheet.getLastRowNum(); rowNumOfSheet++) {
  143. if (null != aSheet.getRow(rowNumOfSheet)) {
  144. HSSFRow aRow = aSheet.getRow(rowNumOfSheet);
  145. for (int index = 0; index <= aRow.getLastCellNum(); index++) {
  146. if (null != aRow.getCell(index)) {
  147. HSSFCell aCell = aRow.getCell(index);
  148. if(aCell.getCellType() == HSSFCell.CELL_TYPE_STRING) {
  149. currstr = aCell.getStringCellValue();
  150. } else if (aCell.getCellType() == HSSFCell.CELL_TYPE_NUMERIC) {
  151. val[index-1] = aCell.getNumericCellValue();
  152. }
  153. }
  154. }
  155. int count = 0;
  156. for(double value : val)
  157. if(value >= 0.2)
  158. count++;
  159. if(count >= 7) {
  160. total++;
  161. System.out.println(currstr);
  162. }
  163. }
  164. }
  165. }
  166. }
  167. System.out.println();
  168. System.out.println(total);
  169. } catch (Exception e) {
  170. e.printStackTrace();
  171. }
  172. }
  173. // public static void main(String args[]) {
  174. // ExcelHandler exHandler = new ExcelHandler("");
  175. // try {
  176. // exHandler.tfidfFilter(new FileInputStream("F:/Android/OnlineDemo/output/Extract-Tfidf.xls"));
  177. // } catch (Exception e) {
  178. // e.printStackTrace();
  179. // }
  180. // }
  181. }