PageRenderTime 45ms CodeModel.GetById 17ms RepoModel.GetById 0ms app.codeStats 0ms

/search/search-impl/impl/src/java/org/sakaiproject/search/component/adapter/contenthosting/PoiContentDigester.java

https://bitbucket.org/fudan/sakai
Java | 107 lines | 61 code | 13 blank | 33 comment | 7 complexity | 7520c09a61381e4c705efedc76a6043c MD5 | raw file
Possible License(s): LGPL-2.1, GPL-2.0, AGPL-3.0, MPL-2.0-no-copyleft-exception, CC0-1.0, CC-BY-SA-3.0, IPL-1.0, Apache-2.0, MIT, BSD-3-Clause, LGPL-2.0
  1. /**********************************************************************************
  2. * $URL: https://source.sakaiproject.org/svn/search/tags/search-1.4.3/search-impl/impl/src/java/org/sakaiproject/search/component/adapter/contenthosting/PoiContentDigester.java $
  3. * $Id: PoiContentDigester.java 73517 2010-02-15 13:35:25Z david.horwitz@uct.ac.za $
  4. ***********************************************************************************
  5. *
  6. * Copyright (c) 2003, 2004, 2005, 2006, 2007, 2008, 2009 The Sakai Foundation
  7. *
  8. * Licensed under the Educational Community License, Version 2.0 (the "License");
  9. * you may not use this file except in compliance with the License.
  10. * You may obtain a copy of the License at
  11. *
  12. * http://www.osedu.org/licenses/ECL-2.0
  13. *
  14. * Unless required by applicable law or agreed to in writing, software
  15. * distributed under the License is distributed on an "AS IS" BASIS,
  16. * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
  17. * See the License for the specific language governing permissions and
  18. * limitations under the License.
  19. *
  20. **********************************************************************************/
  21. package org.sakaiproject.search.component.adapter.contenthosting;
  22. import java.io.IOException;
  23. import java.io.InputStream;
  24. import java.io.Reader;
  25. import java.io.StringReader;
  26. import org.apache.commons.logging.Log;
  27. import org.apache.commons.logging.LogFactory;
  28. import org.apache.poi.POITextExtractor;
  29. import org.apache.poi.extractor.ExtractorFactory;
  30. import org.sakaiproject.content.api.ContentResource;
  31. /**
  32. * @author ieb
  33. */
  34. public class PoiContentDigester extends BaseContentDigester
  35. {
  36. private static Log log = LogFactory.getLog(PoiContentDigester.class);
  37. static
  38. {
  39. System.setProperty("org.apache.poi.util.POILogger",
  40. "org.apache.poi.util.NullLogger");
  41. }
  42. /*
  43. * (non-Javadoc)
  44. *
  45. * @see org.sakaiproject.search.component.adapter.contenthosting.BaseContentDigester#getContent(org.sakaiproject.content.api.ContentResource)
  46. */
  47. public String getContent(ContentResource contentResource)
  48. {
  49. log.debug("Digesting with PoiContentDigester");
  50. if (contentResource == null) {
  51. throw new RuntimeException("Attempt to digest null document!");
  52. }
  53. if (contentResource != null && contentResource.getContentLength() > maxDigestSize)
  54. {
  55. throw new RuntimeException("Attempt to get too much content as a string on "
  56. + contentResource.getReference());
  57. }
  58. InputStream contentStream = null;
  59. try
  60. {
  61. contentStream = contentResource.streamContent();
  62. POITextExtractor DocExt = ExtractorFactory.createExtractor(contentStream);
  63. return DocExt.getText();
  64. }
  65. catch (Exception e)
  66. {
  67. log.warn("Poi can't digest: " + contentResource.getId() + " POI returned: " + e);
  68. throw new RuntimeException("Failed to read content for indexing ", e);
  69. }
  70. finally
  71. {
  72. if (contentStream != null)
  73. {
  74. try
  75. {
  76. contentStream.close();
  77. }
  78. catch (IOException e)
  79. {
  80. log.debug(e);
  81. }
  82. }
  83. }
  84. }
  85. /*
  86. * (non-Javadoc)
  87. *
  88. * @see org.sakaiproject.search.component.adapter.contenthosting.BaseContentDigester#getContentReader(org.sakaiproject.content.api.ContentResource)
  89. */
  90. public Reader getContentReader(ContentResource contentResource)
  91. {
  92. return new StringReader(getContent(contentResource));
  93. }
  94. }