PageRenderTime 7551ms CodeModel.GetById 44ms RepoModel.GetById 6ms app.codeStats 0ms

/DocDoku-ear/DocDoku-ejb/src/main/java/com/docdoku/server/IndexerBean.java

http://docdoku.googlecode.com/
Java | 230 lines | 185 code | 10 blank | 35 comment | 43 complexity | f55fa907a1cca9f68137597a49c91ee7 MD5 | raw file
Possible License(s): GPL-3.0
  1. /*
  2. * DocDoku, Professional Open Source
  3. * Copyright 2006, 2007, 2008, 2009, 2010, 2011, 2012 DocDoku SARL
  4. *
  5. * This file is part of DocDoku.
  6. *
  7. * DocDoku is free software: you can redistribute it and/or modify
  8. * it under the terms of the GNU General Public License as published by
  9. * the Free Software Foundation, either version 3 of the License, or
  10. * (at your option) any later version.
  11. *
  12. * DocDoku is distributed in the hope that it will be useful,
  13. * but WITHOUT ANY WARRANTY; without even the implied warranty of
  14. * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
  15. * GNU General Public License for more details.
  16. *
  17. * You should have received a copy of the GNU General Public License
  18. * along with DocDoku. If not, see <http://www.gnu.org/licenses/>.
  19. */
  20. package com.docdoku.server;
  21. import java.io.BufferedInputStream;
  22. import java.io.BufferedReader;
  23. import java.io.File;
  24. import java.io.FileInputStream;
  25. import java.io.FileNotFoundException;
  26. import java.io.FileReader;
  27. import java.io.IOException;
  28. import java.io.InputStream;
  29. import java.io.Reader;
  30. import java.io.StringReader;
  31. import java.util.zip.ZipEntry;
  32. import java.util.zip.ZipInputStream;
  33. import javax.annotation.Resource;
  34. import javax.ejb.Asynchronous;
  35. import javax.ejb.ConcurrencyManagement;
  36. import javax.ejb.ConcurrencyManagementType;
  37. import javax.ejb.EJBException;
  38. import javax.ejb.Lock;
  39. import javax.ejb.LockType;
  40. import javax.ejb.Singleton;
  41. import javax.xml.parsers.ParserConfigurationException;
  42. import javax.xml.parsers.SAXParser;
  43. import javax.xml.parsers.SAXParserFactory;
  44. import org.apache.lucene.analysis.standard.StandardAnalyzer;
  45. import org.apache.lucene.document.Document;
  46. import org.apache.lucene.document.Field;
  47. import org.apache.lucene.index.CorruptIndexException;
  48. import org.apache.lucene.index.IndexReader;
  49. import org.apache.lucene.index.IndexWriter;
  50. import org.apache.lucene.index.Term;
  51. import org.apache.lucene.store.Directory;
  52. import org.apache.lucene.store.FSDirectory;
  53. import org.apache.lucene.store.LockObtainFailedException;
  54. import org.apache.lucene.util.Version;
  55. import org.apache.poi.hslf.extractor.PowerPointExtractor;
  56. import org.apache.poi.hwpf.extractor.WordExtractor;
  57. import org.xml.sax.SAXException;
  58. import org.xml.sax.helpers.DefaultHandler;
  59. /**
  60. * Singleton class IndexerBean
  61. *
  62. * @author Florent.Garin
  63. */
  64. @Singleton(name="IndexerBean")
  65. @ConcurrencyManagement(ConcurrencyManagementType.CONTAINER)
  66. public class IndexerBean {
  67. @Resource(name = "indexPath")
  68. private String indexPath;
  69. @Asynchronous
  70. @Lock(LockType.WRITE)
  71. public void removeFromIndex(String fullName) {
  72. IndexWriter indexWriter = null;
  73. Directory indexDir = null;
  74. try {
  75. indexDir = FSDirectory.open(new File(indexPath));
  76. indexWriter = new IndexWriter(indexDir, new StandardAnalyzer(Version.LUCENE_30),IndexWriter.MaxFieldLength.LIMITED);
  77. indexWriter.deleteDocuments(new Term("fullName", fullName));
  78. } catch (LockObtainFailedException ex) {
  79. try {
  80. if (IndexWriter.isLocked(indexDir)) {
  81. IndexWriter.unlock(indexDir);
  82. }
  83. } catch (IOException pIOEx) {
  84. throw new EJBException(pIOEx);
  85. }
  86. throw new EJBException(ex);
  87. } catch (CorruptIndexException ex) {
  88. throw new EJBException(ex);
  89. } catch (IOException ex) {
  90. throw new EJBException(ex);
  91. } finally {
  92. try {
  93. if (indexWriter != null) {
  94. indexWriter.close();
  95. }
  96. } catch (IOException ex) {
  97. throw new EJBException(ex);
  98. }
  99. }
  100. }
  101. @Asynchronous
  102. @Lock(LockType.WRITE)
  103. public void addToIndex(String fullName, String pathName) {
  104. IndexWriter indexWriter = null;
  105. Directory indexDir = null;
  106. try {
  107. indexDir = FSDirectory.open(new File(indexPath));
  108. indexWriter = new IndexWriter(indexDir, new StandardAnalyzer(Version.LUCENE_30), IndexWriter.MaxFieldLength.LIMITED);
  109. int ext = pathName.lastIndexOf('.');
  110. String extension = "";
  111. if (ext != -1) {
  112. extension = pathName.substring(ext);
  113. }
  114. if (extension.equals(".odt")
  115. || extension.equals(".ods")
  116. || extension.equals(".odp")
  117. || extension.equals(".odg")
  118. || extension.equals(".odc")
  119. || extension.equals(".odf")
  120. || extension.equals(".odb")
  121. || extension.equals(".odi")
  122. || extension.equals(".odm")) {
  123. final StringBuilder text = new StringBuilder();
  124. ZipInputStream zipOpenDoc = new ZipInputStream(new BufferedInputStream(new FileInputStream(pathName)));
  125. ZipEntry zipEntry;
  126. while ((zipEntry = zipOpenDoc.getNextEntry()) != null) {
  127. if (zipEntry.getName().equals("content.xml")) {
  128. SAXParserFactory saxParserFactory = SAXParserFactory.newInstance();
  129. SAXParser parser = saxParserFactory.newSAXParser();
  130. parser.parse(zipOpenDoc, new DefaultHandler() {
  131. @Override
  132. public void characters(char[] ch,
  133. int start,
  134. int length)
  135. throws SAXException {
  136. for (int i = start; i < start + length; i++) {
  137. text.append(ch[i]);
  138. }
  139. text.append("\r\n");
  140. }
  141. });
  142. break;
  143. }
  144. }
  145. zipOpenDoc.close();
  146. Reader contentReader = new StringReader(text.toString());
  147. addDoc(indexWriter, contentReader, fullName);
  148. contentReader.close();
  149. } else if (extension.equals(".doc")) {
  150. //MSWord Document
  151. InputStream wordStream = new BufferedInputStream(new FileInputStream(pathName));
  152. WordExtractor wordExtractor = new WordExtractor(wordStream);
  153. Reader contentReader = new StringReader(wordExtractor.getText());
  154. wordStream.close();
  155. addDoc(indexWriter, contentReader, fullName);
  156. contentReader.close();
  157. } else if (extension.equals(".ppt") || extension.equals(".pps")) {
  158. //MSPowerPoint Document
  159. InputStream pptStream = new BufferedInputStream(new FileInputStream(pathName));
  160. PowerPointExtractor pptExtractor = new PowerPointExtractor(pptStream);
  161. Reader contentReader = new StringReader(pptExtractor.getText(true, true));
  162. pptStream.close();
  163. addDoc(indexWriter, contentReader, fullName);
  164. pptExtractor.close();
  165. contentReader.close();
  166. } else if (extension.equals(".txt")) {
  167. //Text Document
  168. Reader contentReader = new BufferedReader(new FileReader(pathName));
  169. addDoc(indexWriter, contentReader, fullName);
  170. contentReader.close();
  171. } else if (extension.equals(".xls")) {
  172. //MSExcelExtractor Document
  173. //InputStream excelStream=new BufferedInputStream(new FileInputStream(pathName));
  174. //ExcelExtractor excelExtractor= new ExcelExtractor(excelStream);
  175. //Reader contentReader=new StringReader(excelExtractor.getText());
  176. //excelStream.close();
  177. //addDoc(indexWriter,contentReader,fullName);
  178. //excelExtractor.close();
  179. //contentReader.close();
  180. } else if (extension.equals(".html") || extension.equals(".htm")) {
  181. } else if (extension.equals(".csv")) {
  182. } else if (extension.equals(".xml")) {
  183. } else if (extension.equals(".rtf")) {
  184. } else if (extension.equals(".pdf")) {
  185. } else if (extension.equals(".msg")) {
  186. }
  187. } catch (CorruptIndexException ex) {
  188. throw new EJBException(ex);
  189. } catch (LockObtainFailedException ex) {
  190. try {
  191. if (IndexWriter.isLocked(indexDir)) {
  192. IndexWriter.unlock(indexDir);
  193. }
  194. } catch (IOException pIOEx) {
  195. throw new EJBException(pIOEx);
  196. }
  197. throw new EJBException(ex);
  198. } catch (ParserConfigurationException ex) {
  199. throw new EJBException(ex);
  200. } catch (SAXException ex) {
  201. throw new EJBException(ex);
  202. } catch (IOException ex) {
  203. throw new EJBException(ex);
  204. } finally {
  205. try {
  206. if (indexWriter != null) {
  207. indexWriter.close();
  208. }
  209. } catch (IOException ex) {
  210. throw new EJBException(ex);
  211. }
  212. }
  213. }
  214. private void addDoc(IndexWriter pIndexWriter, Reader pContentReader, String pFullName) throws FileNotFoundException, CorruptIndexException, IOException {
  215. Document doc = new Document();
  216. doc.add(new Field("fullName", pFullName, Field.Store.YES, Field.Index.NOT_ANALYZED));
  217. doc.add(new Field("content", pContentReader));
  218. pIndexWriter.addDocument(doc);
  219. }
  220. }