PageRenderTime 4611ms CodeModel.GetById 2ms RepoModel.GetById 2ms app.codeStats 0ms

/components/registry/org.wso2.carbon.registry.indexing/src/main/java/org/wso2/carbon/registry/indexing/indexer/MSWordIndexer.java

https://github.com/GayanM/carbon-registry
Java | 34 lines | 27 code | 7 blank | 0 comment | 0 complexity | 332e838ed86180213afd01ca220ec2ed MD5 | raw file
  1. package org.wso2.carbon.registry.indexing.indexer;
  2. import java.io.ByteArrayInputStream;
  3. import java.io.IOException;
  4. import org.apache.commons.logging.Log;
  5. import org.apache.commons.logging.LogFactory;
  6. import org.apache.poi.hwpf.extractor.WordExtractor;
  7. import org.apache.poi.poifs.filesystem.POIFSFileSystem;
  8. import org.apache.solr.common.SolrException;
  9. import org.apache.solr.common.SolrException.ErrorCode;
  10. import org.wso2.carbon.registry.indexing.AsyncIndexer.File2Index;
  11. import org.wso2.carbon.registry.indexing.solr.IndexDocument;
  12. public class MSWordIndexer implements Indexer {
  13. public static final Log log = LogFactory.getLog(MSWordIndexer.class);
  14. public IndexDocument getIndexedDocument(File2Index fileData)
  15. throws SolrException {
  16. try {
  17. POIFSFileSystem fs = new POIFSFileSystem(new ByteArrayInputStream(fileData.data));
  18. WordExtractor extractor = new WordExtractor(fs);
  19. String wordText = extractor.getText();
  20. return new IndexDocument(fileData.path, wordText, null);
  21. } catch (IOException e) {
  22. String msg = "Failed to write to the index";
  23. log.error(msg, e);
  24. throw new SolrException(ErrorCode.SERVER_ERROR, msg);
  25. }
  26. }
  27. }