PageRenderTime 7382ms CodeModel.GetById 18ms RepoModel.GetById 8ms app.codeStats 0ms

/converge-server/modules/converge-ejb/src/main/java/dk/i2m/converge/ejb/facades/SearchEngineBean.java

https://bitbucket.org/interactivemediamanagement/converge-1.x
Java | 1076 lines | 840 code | 138 blank | 98 comment | 132 complexity | d9af8e2661b4e99851cb162b3e56d220 MD5 | raw file
  1. /*
  2. * Copyright (C) 2010 - 2012 Interactive Media Management
  3. *
  4. * This program is free software: you can redistribute it and/or modify
  5. * it under the terms of the GNU General Public License as published by
  6. * the Free Software Foundation, either version 3 of the License, or
  7. * (at your option) any later version.
  8. *
  9. * This program is distributed in the hope that it will be useful,
  10. * but WITHOUT ANY WARRANTY; without even the implied warranty of
  11. * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
  12. * GNU General Public License for more details.
  13. *
  14. * You should have received a copy of the GNU General Public License
  15. * along with this program. If not, see <http://www.gnu.org/licenses/>.
  16. */
  17. package dk.i2m.converge.ejb.facades;
  18. import dk.i2m.converge.core.DataNotFoundException;
  19. import dk.i2m.converge.core.ConfigurationKey;
  20. import dk.i2m.converge.core.content.NewsItem;
  21. import dk.i2m.converge.core.content.NewsItemActor;
  22. import dk.i2m.converge.core.content.NewsItemPlacement;
  23. import dk.i2m.converge.core.content.catalogue.MediaItem;
  24. import dk.i2m.converge.core.content.catalogue.MediaItemRendition;
  25. import dk.i2m.converge.core.metadata.*;
  26. import dk.i2m.converge.core.search.IndexQueueEntry;
  27. import dk.i2m.converge.core.search.QueueEntryOperation;
  28. import dk.i2m.converge.core.search.QueueEntryType;
  29. import dk.i2m.converge.core.search.SearchEngineIndexingException;
  30. import dk.i2m.converge.core.security.UserAccount;
  31. import dk.i2m.converge.core.utils.BeanComparator;
  32. import dk.i2m.converge.domain.search.IndexField;
  33. import dk.i2m.converge.domain.search.SearchFacet;
  34. import dk.i2m.converge.domain.search.SearchResult;
  35. import dk.i2m.converge.domain.search.SearchResults;
  36. import dk.i2m.converge.ejb.services.*;
  37. import java.io.ByteArrayOutputStream;
  38. import java.io.IOException;
  39. import java.net.MalformedURLException;
  40. import java.net.URL;
  41. import java.text.DateFormat;
  42. import java.text.MessageFormat;
  43. import java.text.ParseException;
  44. import java.text.SimpleDateFormat;
  45. import java.util.*;
  46. import java.util.logging.Level;
  47. import java.util.logging.Logger;
  48. import javax.annotation.Resource;
  49. import javax.ejb.*;
  50. import org.apache.commons.lang.StringUtils;
  51. import org.apache.poi.hssf.usermodel.HSSFHeader;
  52. import org.apache.poi.hssf.usermodel.HSSFSheet;
  53. import org.apache.poi.hssf.usermodel.HSSFWorkbook;
  54. import org.apache.poi.hssf.usermodel.HeaderFooter;
  55. import org.apache.poi.ss.usermodel.*;
  56. import org.apache.poi.ss.util.WorkbookUtil;
  57. import org.apache.solr.client.solrj.SolrQuery;
  58. import org.apache.solr.client.solrj.SolrRequest.METHOD;
  59. import org.apache.solr.client.solrj.SolrServer;
  60. import org.apache.solr.client.solrj.SolrServerException;
  61. import org.apache.solr.client.solrj.impl.BinaryRequestWriter;
  62. import org.apache.solr.client.solrj.impl.CommonsHttpSolrServer;
  63. import org.apache.solr.client.solrj.response.FacetField;
  64. import org.apache.solr.client.solrj.response.QueryResponse;
  65. import org.apache.solr.common.SolrDocument;
  66. import org.apache.solr.common.SolrDocumentList;
  67. import org.apache.solr.common.SolrInputDocument;
  68. import org.apache.tika.Tika;
  69. /**
  70. * Stateless session bean implementing a search engine service.
  71. *
  72. * @author Allan Lykke Christensen
  73. */
  74. @Stateless
  75. public class SearchEngineBean implements SearchEngineLocal {
  76. private static final Logger LOG = Logger.getLogger(SearchEngineBean.class.
  77. getName());
  78. /** Internationalisation of messages. */
  79. //private ResourceBundle i18n = ResourceBundle.getBundle("dk.i2m.converge.i18n.ServiceMessages");
  80. @EJB private ConfigurationServiceLocal cfgService;
  81. @EJB private UserFacadeLocal userFacade;
  82. @EJB private DaoServiceLocal daoService;
  83. @EJB private NewsItemFacadeLocal newsItemFacade;
  84. @EJB private CatalogueFacadeLocal catalogueFacade;
  85. @EJB private MetaDataServiceLocal metaDataService;
  86. @Resource private SessionContext ctx;
  87. private DateFormat solrDateFormat = new SimpleDateFormat(
  88. "yyyy-MM-dd'T'HH:mm:ss.SSS'Z'");
  89. @Override
  90. public IndexQueueEntry addToIndexQueue(QueueEntryType type, Long id,
  91. QueueEntryOperation operation) {
  92. IndexQueueEntry entry = new IndexQueueEntry(type, id, operation);
  93. Map<String, Object> params = QueryBuilder.with("entryId", entry.getId()).
  94. and("type", entry.getType()).and("operation",
  95. entry.getOperation()).parameters();
  96. List<IndexQueueEntry> entries =
  97. daoService.findWithNamedQuery(
  98. IndexQueueEntry.FIND_BY_TYPE_ID_AND_OPERATION, params);
  99. if (entries.isEmpty()) {
  100. return daoService.create(entry);
  101. } else {
  102. return entries.iterator().next();
  103. }
  104. }
  105. @Override
  106. @TransactionAttribute(TransactionAttributeType.NEVER)
  107. public List<IndexQueueEntry> getIndexQueue() {
  108. List<IndexQueueEntry> queue = daoService.findAll(IndexQueueEntry.class);
  109. Collections.sort(queue, new BeanComparator("added", false));
  110. return queue;
  111. }
  112. @Override
  113. @TransactionAttribute(TransactionAttributeType.REQUIRES_NEW)
  114. public void removeFromQueue(Long id) {
  115. daoService.delete(IndexQueueEntry.class, id);
  116. }
  117. /**
  118. * Remove an item from the search engine.
  119. * <p/>
  120. * @param id Unique identifier of the item to remove
  121. */
  122. @Override
  123. public void removeItem(Long id) {
  124. try {
  125. getSolrServer().deleteById(String.valueOf(id));
  126. } catch (SolrServerException ex) {
  127. LOG.log(Level.SEVERE, null, ex);
  128. } catch (IOException ex) {
  129. LOG.log(Level.SEVERE, null, ex);
  130. }
  131. }
  132. @Override
  133. public void processIndexingQueue() {
  134. SolrServer solrServer = getSolrServer();
  135. List<IndexQueueEntry> items = getIndexQueue();
  136. for (IndexQueueEntry entry : items) {
  137. if (entry.getOperation().equals(QueueEntryOperation.REMOVE)) {
  138. try {
  139. solrServer.deleteById(String.valueOf(entry.getEntryId()));
  140. removeFromQueue(entry.getId());
  141. } catch (Exception ex) {
  142. LOG.log(Level.WARNING, entry.getType().name()
  143. + " #{0} could not be removed from index", entry.
  144. getEntryId());
  145. LOG.log(Level.WARNING, ex.getMessage(), ex);
  146. }
  147. } else {
  148. switch (entry.getType()) {
  149. case NEWS_ITEM:
  150. try {
  151. NewsItem newsItem =
  152. newsItemFacade.findNewsItemById(entry.
  153. getEntryId());
  154. index(newsItem, solrServer);
  155. removeFromQueue(entry.getId());
  156. } catch (DataNotFoundException ex) {
  157. LOG.log(Level.WARNING,
  158. "NewsItem #{0} does not exist in the database. Skipping indexing.",
  159. entry.getEntryId());
  160. removeFromQueue(entry.getId());
  161. } catch (SearchEngineIndexingException ex) {
  162. LOG.log(Level.WARNING,
  163. "NewsItem #{0} could not be indexed", entry.
  164. getEntryId());
  165. LOG.log(Level.WARNING, ex.getMessage(), ex);
  166. }
  167. break;
  168. case MEDIA_ITEM:
  169. try {
  170. MediaItem mediaItem = catalogueFacade.
  171. findMediaItemById(entry.getEntryId());
  172. index(mediaItem, solrServer);
  173. removeFromQueue(entry.getId());
  174. } catch (DataNotFoundException ex) {
  175. LOG.log(Level.WARNING,
  176. "MediaItem #{0} does not exist in the database. Skipping indexing.",
  177. entry.getEntryId());
  178. removeFromQueue(entry.getId());
  179. } catch (SearchEngineIndexingException ex) {
  180. LOG.log(Level.WARNING,
  181. "MediaItem #{0} could not be indexed",
  182. entry.getEntryId());
  183. LOG.log(Level.WARNING, ex.getMessage(), ex);
  184. }
  185. break;
  186. }
  187. }
  188. }
  189. }
  190. /** {@inheritDoc} */
  191. @Override
  192. public SearchResults search(String query, int start, int rows,
  193. String... filterQueries) {
  194. return search(query, start, rows, "score", false, filterQueries);
  195. }
  196. @Override
  197. public SearchResults search(String query, int start, int rows,
  198. String sortField, boolean sortOrder, String... filterQueries) {
  199. return search(query, start, rows, "score", false, null, null,
  200. filterQueries);
  201. }
  202. /**
  203. * Queries the search engine.
  204. *
  205. * @param query Query string
  206. * @param start First record to retrieve
  207. * @param rows Number of rows to retrieve
  208. * @param sortField Field to sort by
  209. * @param sortOrder Ascending ({@code true}) or descending ({@code false})
  210. * @param dateFrom Search results must not be older than this date
  211. * @param dateTo Search results must not be newer than this date
  212. * @param filterQueries Filter queries to include in the search
  213. * @return {@link SearchResults} matching the {@code query}
  214. */
  215. @Override
  216. public SearchResults search(String query, int start, int rows,
  217. String sortField, boolean sortOrder, Date dateFrom, Date dateTo,
  218. String... filterQueries) {
  219. long startTime = System.currentTimeMillis();
  220. SearchResults searchResults = new SearchResults();
  221. try {
  222. final DateFormat ORIGINAL_FORMAT = new SimpleDateFormat(
  223. "yyyy-MM-dd'T'HH:mm:ss'Z'");
  224. final DateFormat NEW_FORMAT = new SimpleDateFormat("MMMM yyyy");
  225. SolrQuery solrQuery = new SolrQuery();
  226. solrQuery.setStart(start);
  227. solrQuery.setRows(rows);
  228. StringBuilder queryString = new StringBuilder(query);
  229. // Check if the query has date restrictions
  230. if (dateFrom != null || dateTo != null) {
  231. // Construct date query
  232. if (!query.isEmpty()) {
  233. queryString.append(" AND date:");
  234. }
  235. if (dateFrom == null) {
  236. queryString.append("[* TO ");
  237. } else {
  238. queryString.append("[");
  239. queryString.append(solrDateFormat.format(dateFrom));
  240. queryString.append(" TO ");
  241. }
  242. if (dateTo == null) {
  243. queryString.append("*]");
  244. } else {
  245. queryString.append(solrDateFormat.format(dateTo));
  246. queryString.append("]");
  247. }
  248. }
  249. solrQuery.setQuery(queryString.toString());
  250. solrQuery.setFacet(true);
  251. if (sortOrder) {
  252. solrQuery.setSortField(sortField, SolrQuery.ORDER.asc);
  253. } else {
  254. solrQuery.setSortField(sortField, SolrQuery.ORDER.desc);
  255. }
  256. solrQuery.addFacetField(IndexField.TYPE.getName());
  257. solrQuery.addFacetField(IndexField.OUTLET.getName());
  258. solrQuery.addFacetField(IndexField.REPOSITORY.getName());
  259. solrQuery.addFacetField(IndexField.SECTION.getName());
  260. solrQuery.addFacetField(IndexField.SUBJECT.getName());
  261. solrQuery.addFacetField(IndexField.ORGANISATION.getName());
  262. solrQuery.addFacetField(IndexField.PERSON.getName());
  263. solrQuery.addFacetField(IndexField.LOCATION.getName());
  264. solrQuery.addFacetField(IndexField.POINT_OF_INTEREST.getName());
  265. // for (UserRole userRole : userFacade.getUserRoles()) {
  266. // solrQuery.addFacetField(userRole.getName());
  267. // }
  268. solrQuery.addFilterQuery(filterQueries);
  269. solrQuery.setFacetMinCount(1);
  270. solrQuery.setIncludeScore(true);
  271. solrQuery.setHighlight(true).setHighlightSnippets(1); //set other params as needed
  272. solrQuery.setParam("hl.fl", "title,story,caption");
  273. solrQuery.setParam("hl.fragsize", "500");
  274. solrQuery.setParam("hl.simple.pre",
  275. "<span class=\"searchHighlight\">");
  276. solrQuery.setParam("hl.simple.post", "</span>");
  277. solrQuery.setParam("facet.date", "date");
  278. solrQuery.setParam("facet.date.start", "NOW/YEAR-10YEAR");
  279. solrQuery.setParam("facet.date.end", "NOW");
  280. solrQuery.setParam("facet.date.gap", "+1MONTH");
  281. SolrServer srv = getSolrServer();
  282. // POST is used to support UTF-8
  283. QueryResponse qr = srv.query(solrQuery, METHOD.POST);
  284. SolrDocumentList sdl = qr.getResults();
  285. searchResults.setNumberOfResults(sdl.getNumFound());
  286. for (SolrDocument d : sdl) {
  287. // Copy all fields to map for easy access
  288. HashMap<String, Object> values = new HashMap<String, Object>();
  289. for (Iterator<Map.Entry<String, Object>> i = d.iterator(); i.
  290. hasNext();) {
  291. Map.Entry<String, Object> e2 = i.next();
  292. values.put(e2.getKey(), e2.getValue());
  293. }
  294. String type = (String) values.get("type");
  295. SearchResult hit = null;
  296. if ("Story".equalsIgnoreCase(type)) {
  297. hit = generateStoryHit(qr, values);
  298. } else if ("Media".equalsIgnoreCase(type)) {
  299. hit = generateMediaHit(qr, values);
  300. }
  301. generateTags(hit, qr, values);
  302. if (hit != null) {
  303. hit.setScore((Float) d.getFieldValue("score"));
  304. searchResults.getHits().add(hit);
  305. }
  306. }
  307. List<FacetField> facets = qr.getFacetFields();
  308. for (FacetField facet : facets) {
  309. List<FacetField.Count> facetEntries = facet.getValues();
  310. if (facetEntries != null) {
  311. for (FacetField.Count fcount : facetEntries) {
  312. if (!searchResults.getFacets().containsKey(
  313. facet.getName())) {
  314. searchResults.getFacets().put(facet.getName(),
  315. new ArrayList<SearchFacet>());
  316. }
  317. SearchFacet sf = new SearchFacet(fcount.getName(),
  318. fcount.getAsFilterQuery(), fcount.getCount());
  319. // Check if the filter query is already active
  320. for (String fq : filterQueries) {
  321. if (fq.equals(fcount.getAsFilterQuery())) {
  322. sf.setSelected(true);
  323. }
  324. }
  325. // Ensure that the facet is not already there
  326. if (!searchResults.getFacets().get(facet.getName()).
  327. contains(sf)) {
  328. searchResults.getFacets().get(facet.getName()).add(
  329. sf);
  330. }
  331. }
  332. }
  333. }
  334. for (FacetField facet : qr.getFacetDates()) {
  335. List<FacetField.Count> facetEntries = facet.getValues();
  336. if (facetEntries != null) {
  337. for (FacetField.Count fcount : facetEntries) {
  338. if (fcount.getCount() != 0) {
  339. if (!searchResults.getFacets().containsKey(facet.
  340. getName())) {
  341. searchResults.getFacets().put(facet.getName(),
  342. new ArrayList<SearchFacet>());
  343. }
  344. String facetLabel = "";
  345. try {
  346. Date facetDate = ORIGINAL_FORMAT.parse(fcount.
  347. getName());
  348. facetLabel = NEW_FORMAT.format(facetDate);
  349. } catch (ParseException ex) {
  350. LOG.log(Level.SEVERE, null, ex);
  351. facetLabel = fcount.getName();
  352. }
  353. String realFilterQuery = "date:[" + fcount.getName()
  354. + " TO " + fcount.getName() + "+1MONTH]";
  355. SearchFacet sf = new SearchFacet(facetLabel,
  356. realFilterQuery, fcount.getCount());
  357. // Check if the filter query is already active
  358. for (String fq : filterQueries) {
  359. if (fq.equals(realFilterQuery)) {
  360. sf.setSelected(true);
  361. }
  362. }
  363. // Ensure that the facet is not already there
  364. if (!searchResults.getFacets().get(facet.getName()).
  365. contains(sf)) {
  366. searchResults.getFacets().get(facet.getName()).
  367. add(sf);
  368. }
  369. }
  370. }
  371. }
  372. }
  373. } catch (SolrServerException ex) {
  374. LOG.log(Level.SEVERE, null, ex);
  375. }
  376. long endTime = System.currentTimeMillis();
  377. searchResults.setSearchTime(endTime - startTime);
  378. searchResults.setStart(start);
  379. searchResults.setResultsPerPage(rows);
  380. return searchResults;
  381. }
  382. /**
  383. * Generates an overview reports of a set of {@link SearchResults}. The
  384. * search results will be extracted (fetched) so that it is not just the
  385. * partial set of {@link SearchResults} that will be included in the report.
  386. * <p/>
  387. * @param results {@link SearchResults} for which to generate the report
  388. * @return Binary data representing the report
  389. */
  390. @Override
  391. public byte[] generateReport(SearchResults results) {
  392. ResourceBundle i18n;
  393. try {
  394. String uid = ctx.getCallerPrincipal().getName();
  395. UserAccount user = userFacade.findById(uid);
  396. Locale userLocale = user.getPreferredLocale();
  397. i18n = ResourceBundle.getBundle(
  398. "dk.i2m.converge.i18n.ServiceMessages", userLocale);
  399. } catch (DataNotFoundException ex) {
  400. i18n = ResourceBundle.getBundle(
  401. "dk.i2m.converge.i18n.ServiceMessages");
  402. }
  403. String lblSheetName = i18n.getString(
  404. "SearchEngineBean_generateReport_SHEET_NAME");
  405. String lblHeaderLeft = i18n.getString(
  406. "SearchEngineBean_generateReport_HEADER_LEFT");
  407. String lblHeaderRight = i18n.getString(
  408. "SearchEngineBean_generateReport_HEADER_RIGHT");
  409. String lblFooterLeft = i18n.getString(
  410. "SearchEngineBean_generateReport_FOOTER_LEFT");
  411. String lblFooterRight = i18n.getString(
  412. "SearchEngineBean_generateReport_FOOTER_RIGHT");
  413. String lblDateFormat = i18n.getString(
  414. "SearchEngineBean_generateReport_DATE_FORMAT");
  415. String lblRowHeaderId = i18n.getString(
  416. "SearchEngineBean_generateReport_ROW_HEADER_ID");
  417. String lblRowHeaderDate = i18n.getString(
  418. "SearchEngineBean_generateReport_ROW_HEADER_DATE");
  419. String lblRowHeaderTitle = i18n.getString(
  420. "SearchEngineBean_generateReport_ROW_HEADER_TITLE");
  421. String lblRowHeaderOutlet = i18n.getString(
  422. "SearchEngineBean_generateReport_ROW_HEADER_OUTLET");
  423. String lblRowHeaderSection = i18n.getString(
  424. "SearchEngineBean_generateReport_ROW_HEADER_SECTION");
  425. HSSFWorkbook wb = new HSSFWorkbook();
  426. String sheetName = WorkbookUtil.createSafeSheetName(lblSheetName);
  427. int overviewSheetRow = 0;
  428. Font storyFont = wb.createFont();
  429. storyFont.setFontHeightInPoints((short) 12);
  430. storyFont.setBoldweight(Font.BOLDWEIGHT_NORMAL);
  431. // Create style with borders
  432. CellStyle style = wb.createCellStyle();
  433. style.setBorderBottom(CellStyle.BORDER_THIN);
  434. style.setBottomBorderColor(IndexedColors.BLACK.getIndex());
  435. style.setBorderLeft(CellStyle.BORDER_THIN);
  436. style.setLeftBorderColor(IndexedColors.BLACK.getIndex());
  437. style.setBorderRight(CellStyle.BORDER_THIN);
  438. style.setRightBorderColor(IndexedColors.BLACK.getIndex());
  439. style.setBorderTop(CellStyle.BORDER_THIN);
  440. style.setTopBorderColor(IndexedColors.BLACK.getIndex());
  441. // Create style for date cells
  442. CreationHelper createHelper = wb.getCreationHelper();
  443. CellStyle dateStyle = wb.createCellStyle();
  444. dateStyle.setDataFormat(createHelper.createDataFormat().getFormat(
  445. lblDateFormat));
  446. dateStyle.setBorderBottom(CellStyle.BORDER_THIN);
  447. dateStyle.setBottomBorderColor(IndexedColors.BLACK.getIndex());
  448. dateStyle.setBorderLeft(CellStyle.BORDER_THIN);
  449. dateStyle.setLeftBorderColor(IndexedColors.BLACK.getIndex());
  450. dateStyle.setBorderRight(CellStyle.BORDER_THIN);
  451. dateStyle.setRightBorderColor(IndexedColors.BLACK.getIndex());
  452. dateStyle.setBorderTop(CellStyle.BORDER_THIN);
  453. dateStyle.setTopBorderColor(IndexedColors.BLACK.getIndex());
  454. HSSFSheet overviewSheet = wb.createSheet(sheetName);
  455. // Create sheet header
  456. HSSFHeader sheetHeader = overviewSheet.getHeader();
  457. sheetHeader.setLeft(lblHeaderLeft);
  458. sheetHeader.setRight(lblHeaderRight);
  459. // Create sheet footer
  460. Footer footer = overviewSheet.getFooter();
  461. String footerLeft = MessageFormat.format(lblFooterLeft,
  462. new Object[]{HeaderFooter.page(), HeaderFooter.numPages()});
  463. String footerRight = MessageFormat.format(lblFooterRight,
  464. new Object[]{HeaderFooter.date(), HeaderFooter.time()});
  465. footer.setLeft(footerLeft);
  466. footer.setRight(footerRight);
  467. // Freeze the header row
  468. overviewSheet.createFreezePane(0, 1, 0, 1);
  469. Row row = overviewSheet.createRow(0);
  470. row.createCell(0).setCellValue(lblRowHeaderId);
  471. row.getCell(0).setCellStyle(style);
  472. row.createCell(1).setCellValue(lblRowHeaderDate);
  473. row.getCell(1).setCellStyle(style);
  474. row.createCell(2).setCellValue(lblRowHeaderTitle);
  475. row.getCell(2).setCellStyle(style);
  476. row.createCell(3).setCellValue(lblRowHeaderOutlet);
  477. row.getCell(3).setCellStyle(style);
  478. row.createCell(4).setCellValue(lblRowHeaderSection);
  479. row.getCell(4).setCellStyle(style);
  480. overviewSheetRow++;
  481. for (SearchResult result : results.getHits()) {
  482. try {
  483. NewsItem newsItem =
  484. newsItemFacade.findNewsItemFromArchive(result.getId());
  485. if (newsItem.getPlacements().isEmpty()) {
  486. row = overviewSheet.createRow(overviewSheetRow);
  487. row.createCell(0).setCellValue(result.getId());
  488. row.getCell(0).setCellStyle(style);
  489. row.createCell(1).setCellValue(newsItem.getUpdated());
  490. row.getCell(1).setCellStyle(dateStyle);
  491. row.createCell(2).setCellValue(newsItem.getTitle());
  492. row.getCell(2).setCellStyle(style);
  493. row.createCell(3).setCellValue(
  494. newsItem.getOutlet().getTitle());
  495. row.getCell(3).setCellStyle(style);
  496. row.createCell(4).setCellValue("");
  497. row.getCell(4).setCellStyle(style);
  498. } else {
  499. for (NewsItemPlacement nip : newsItem.getPlacements()) {
  500. try {
  501. row = overviewSheet.createRow(overviewSheetRow);
  502. row.createCell(0).setCellValue(result.getId());
  503. row.getCell(0).setCellStyle(style);
  504. row.createCell(1).setCellValue(nip.getEdition().
  505. getPublicationDate());
  506. row.getCell(1).setCellStyle(dateStyle);
  507. row.createCell(2).setCellValue(newsItem.getTitle());
  508. row.getCell(2).setCellStyle(style);
  509. row.createCell(3).setCellValue(nip.getOutlet().
  510. getTitle());
  511. row.getCell(3).setCellStyle(style);
  512. row.createCell(4).setCellValue(nip.getSection().
  513. getFullName());
  514. row.getCell(4).setCellStyle(style);
  515. } catch (Exception ex) {
  516. LOG.log(Level.INFO,
  517. "Failed to output line in report. {0}", ex.
  518. getMessage());
  519. }
  520. }
  521. }
  522. overviewSheetRow++;
  523. } catch (DataNotFoundException ex) {
  524. }
  525. }
  526. // Auto-size
  527. for (int i = 0; i <= 2; i++) {
  528. overviewSheet.autoSizeColumn(i);
  529. }
  530. wb.setRepeatingRowsAndColumns(0, 0, 0, 0, 0);
  531. overviewSheet.setFitToPage(true);
  532. overviewSheet.setAutobreaks(true);
  533. ByteArrayOutputStream baos = new ByteArrayOutputStream();
  534. try {
  535. wb.write(baos);
  536. } catch (IOException ex) {
  537. LOG.log(Level.SEVERE, null, ex);
  538. }
  539. return baos.toByteArray();
  540. }
  541. /**
  542. * Communicate to the Solr server that the search engine index should be
  543. * updated.
  544. * <p/>
  545. * @throws SearchEngineIndexingException If an unexpected response was received from the Solr server
  546. */
  547. @Override
  548. public void optimizeIndex() throws SearchEngineIndexingException {
  549. try {
  550. getSolrServer().optimize();
  551. } catch (SolrServerException ex) {
  552. throw new SearchEngineIndexingException(ex);
  553. } catch (IOException ex) {
  554. throw new SearchEngineIndexingException(ex);
  555. }
  556. }
  557. /**
  558. * Generates a {link SearchResult} for a media item.
  559. *
  560. * @param qr QueryResponse from Solr
  561. * @param values Fields available
  562. * @return {@link SearchResult}
  563. */
  564. private SearchResult generateMediaHit(QueryResponse qr,
  565. HashMap<String, Object> values) {
  566. String id = (String) values.get(IndexField.ID.getName());
  567. StringBuilder caption = new StringBuilder("");
  568. StringBuilder title = new StringBuilder("");
  569. StringBuilder note = new StringBuilder("");
  570. Map<String, List<String>> highlighting = qr.getHighlighting().get(id);
  571. boolean highlightingExist = highlighting != null;
  572. if (highlightingExist && highlighting.get(IndexField.STORY.getName())
  573. != null) {
  574. for (String hl : highlighting.get(IndexField.STORY.getName())) {
  575. caption.append(hl);
  576. }
  577. } else if (highlighting.get(IndexField.STORY.getName()) != null) {
  578. caption.append(StringUtils.abbreviate((String) values.get(IndexField.STORY.
  579. getName()), 500));
  580. } else {
  581. caption.append(StringUtils.abbreviate((String) values.get(IndexField.CAPTION.
  582. getName()), 500));
  583. }
  584. if (highlightingExist && highlighting.get(IndexField.TITLE.getName())
  585. != null) {
  586. for (String hl : qr.getHighlighting().get(id).get(IndexField.TITLE.
  587. getName())) {
  588. title.append(hl);
  589. }
  590. } else {
  591. title.append((String) values.get(IndexField.TITLE.getName()));
  592. }
  593. String format = (String) values.get(IndexField.MEDIA_FORMAT.getName());
  594. note.append((String) values.get(IndexField.TYPE.getName()));
  595. note.append(" - ");
  596. note.append(format);
  597. note.append(" - ");
  598. note.append((String) values.get(IndexField.REPOSITORY.getName()));
  599. SearchResult hit = new SearchResult();
  600. hit.setId(Long.valueOf(id));
  601. hit.setTitle(title.toString());
  602. hit.setDescription(caption.toString());
  603. hit.setNote(note.toString());
  604. hit.setLink("{0}/MediaItemArchive.xhtml?id=" + values.get(IndexField.ID.
  605. getName()));
  606. hit.setType((String) values.get(IndexField.TYPE.getName()));
  607. hit.setFormat(format);
  608. if (values.containsKey(IndexField.THUMB_URL.getName())) {
  609. hit.setPreview(true);
  610. hit.setPreviewLink((String) values.get(
  611. IndexField.THUMB_URL.getName()));
  612. hit.setDirectLink((String) values.get(
  613. IndexField.DIRECT_URL.getName()));
  614. try {
  615. Tika tika = new Tika();
  616. String contentType = tika.detect(new URL(hit.getPreviewLink()));
  617. hit.setPreviewContentType(contentType);
  618. } catch (IOException ex) {
  619. LOG.log(Level.WARNING, "Could not set the content type "
  620. + "of the preview link. {0}",
  621. new Object[]{ex.getMessage()});
  622. }
  623. } else {
  624. hit.setPreview(false);
  625. }
  626. if (values.containsKey(IndexField.DATE.getName())) {
  627. if (values.get(IndexField.DATE.getName()) instanceof List) {
  628. hit.setDates((List<Date>) values.get(IndexField.DATE.getName()));
  629. } else {
  630. hit.addDate((Date) values.get(IndexField.DATE.getName()));
  631. }
  632. }
  633. return hit;
  634. }
  635. /**
  636. * Generates a {link SearchResult} for a story.
  637. *
  638. * @param qr QueryResponse from Solr
  639. * @param values Fields available
  640. * @return {@link SearchResult}
  641. */
  642. private SearchResult generateStoryHit(QueryResponse qr,
  643. HashMap<String, Object> values) {
  644. String id = (String) values.get(IndexField.ID.getName());
  645. StringBuilder story = new StringBuilder();
  646. StringBuilder title = new StringBuilder();
  647. StringBuilder note = new StringBuilder();
  648. Map<String, List<String>> highlighting = qr.getHighlighting().get(id);
  649. boolean highlightingExist = highlighting != null;
  650. if (highlightingExist && highlighting.get(IndexField.STORY.getName())
  651. != null) {
  652. for (String hl : highlighting.get(IndexField.STORY.getName())) {
  653. story.append(hl);
  654. }
  655. } else {
  656. story.append(StringUtils.abbreviate((String) values.get(IndexField.STORY.
  657. getName()), 500));
  658. }
  659. if (highlightingExist && highlighting.get(IndexField.TITLE.getName())
  660. != null) {
  661. for (String hl : qr.getHighlighting().get(id).get(IndexField.TITLE.
  662. getName())) {
  663. title.append(hl);
  664. }
  665. } else {
  666. title.append((String) values.get(IndexField.TITLE.getName()));
  667. }
  668. note.append((String) values.get(IndexField.TYPE.getName()));
  669. note.append(" - Words: ");
  670. if (values.containsKey(IndexField.WORD_COUNT.getName())) {
  671. note.append(String.valueOf(values.get(
  672. IndexField.WORD_COUNT.getName())));
  673. } else {
  674. note.append("Unknown");
  675. }
  676. note.append("<br/>");
  677. if (values.containsKey(IndexField.PLACEMENT.getName())) {
  678. if (values.get(IndexField.PLACEMENT.getName()) instanceof String) {
  679. note.append(values.get(IndexField.PLACEMENT.getName()));
  680. } else if (values.get(IndexField.PLACEMENT.getName()) instanceof List) {
  681. List<String> placements =
  682. (List<String>) values.get(IndexField.PLACEMENT.getName());
  683. for (String placement : placements) {
  684. note.append(placement);
  685. note.append("<br/>");
  686. }
  687. } else {
  688. LOG.warning("Unexpected value returned from search engine");
  689. }
  690. }
  691. SearchResult hit = new SearchResult();
  692. hit.setId(Long.valueOf(id));
  693. hit.setTitle(title.toString());
  694. hit.setDescription(story.toString());
  695. hit.setNote(note.toString());
  696. hit.setLink("{0}/NewsItemArchive.xhtml?id=" + id);
  697. hit.setType((String) values.get(IndexField.TYPE.getName()));
  698. return hit;
  699. }
  700. /**
  701. * Gets the instance of the Apache Solr server used for indexing.
  702. *
  703. * @return Instance of the Apache Solr server
  704. * @throws IllegalStateException If the search engine is not properly configured
  705. */
  706. private SolrServer getSolrServer() {
  707. try {
  708. String url =
  709. cfgService.getString(ConfigurationKey.SEARCH_ENGINE_URL);
  710. Integer socketTimeout = cfgService.getInteger(
  711. ConfigurationKey.SEARCH_ENGINE_SOCKET_TIMEOUT);
  712. Integer connectionTimeout = cfgService.getInteger(
  713. ConfigurationKey.SEARCH_ENGINE_CONNECTION_TIMEOUT);
  714. Integer maxTotalConnectionsPerHost =
  715. cfgService.getInteger(
  716. ConfigurationKey.SEARCH_ENGINE_MAX_TOTAL_CONNECTIONS_PER_HOST);
  717. Integer maxTotalConnections =
  718. cfgService.getInteger(
  719. ConfigurationKey.SEARCH_ENGINE_MAX_TOTAL_CONNECTIONS);
  720. Integer maxRetries = cfgService.getInteger(
  721. ConfigurationKey.SEARCH_ENGINE_MAX_RETRIES);
  722. Boolean followRedirects = cfgService.getBoolean(
  723. ConfigurationKey.SEARCH_ENGINE_FOLLOW_REDIRECTS);
  724. Boolean allowCompression = cfgService.getBoolean(
  725. ConfigurationKey.SEARCH_ENGINE_ALLOW_COMPRESSION);
  726. CommonsHttpSolrServer solrServer = new CommonsHttpSolrServer(url);
  727. solrServer.setRequestWriter(new BinaryRequestWriter());
  728. solrServer.setSoTimeout(socketTimeout);
  729. solrServer.setConnectionTimeout(connectionTimeout);
  730. solrServer.setDefaultMaxConnectionsPerHost(
  731. maxTotalConnectionsPerHost);
  732. solrServer.setMaxTotalConnections(maxTotalConnections);
  733. solrServer.setFollowRedirects(followRedirects);
  734. solrServer.setAllowCompression(allowCompression);
  735. solrServer.setMaxRetries(maxRetries);
  736. return solrServer;
  737. } catch (MalformedURLException ex) {
  738. LOG.log(Level.SEVERE, "Invalid search engine configuration. {0}",
  739. ex.getMessage());
  740. LOG.log(Level.FINE, "", ex);
  741. throw new IllegalStateException(
  742. "Invalid search engine configuration", ex);
  743. }
  744. }
  745. private void generateTags(SearchResult hit, QueryResponse qr,
  746. HashMap<String, Object> values) {
  747. if (values.containsKey(IndexField.DATE.getName())) {
  748. if (values.get(IndexField.DATE.getName()) instanceof Date) {
  749. hit.addDate((Date) values.get(IndexField.DATE.getName()));
  750. } else if (values.get(IndexField.DATE.getName()) instanceof List) {
  751. hit.setDates((List<Date>) values.get(IndexField.DATE.getName()));
  752. } else {
  753. LOG.warning("Unexpected value returned from search engine");
  754. }
  755. }
  756. List<String> tags = new ArrayList<String>();
  757. if (values.containsKey(IndexField.CONCEPT.getName())) {
  758. if (values.get(IndexField.CONCEPT.getName()) instanceof String) {
  759. Object tag = values.get(IndexField.CONCEPT.getName());
  760. tags.add((String) tag);
  761. } else if (values.get(IndexField.CONCEPT.getName()) instanceof List) {
  762. tags =
  763. (List<String>) values.get(IndexField.CONCEPT.getName());
  764. } else {
  765. LOG.warning("Unexpected value returned from search engine");
  766. }
  767. }
  768. hit.setTags(tags.toArray(new String[tags.size()]));
  769. }
  770. private void index(NewsItem ni, SolrServer solrServer) throws
  771. SearchEngineIndexingException {
  772. SolrInputDocument solrDoc = new SolrInputDocument();
  773. solrDoc.addField(IndexField.ID.getName(), ni.getId(), 1.0f);
  774. solrDoc.addField(IndexField.TITLE.getName(), ni.getTitle(), 1.0f);
  775. solrDoc.addField(IndexField.TYPE.getName(), "Story");
  776. solrDoc.addField(IndexField.BYLINE.getName(), ni.getByLine());
  777. solrDoc.addField(IndexField.BRIEF.getName(), ni.getBrief());
  778. solrDoc.addField(IndexField.STORY.getName(),
  779. dk.i2m.converge.core.utils.StringUtils.stripHtml(ni.getStory()));
  780. try {
  781. solrDoc.addField(IndexField.LANG.getName(),
  782. ni.getLanguage().getCode());
  783. } catch (NullPointerException ex) {
  784. }
  785. solrDoc.addField(IndexField.LANGUAGE.getName(),
  786. ni.getLanguage().getName());
  787. solrDoc.addField(IndexField.WORD_COUNT.getName(), ni.getWordCount());
  788. for (NewsItemPlacement placement : ni.getPlacements()) {
  789. if (placement.getEdition() != null) {
  790. if (placement.getEdition().getPublicationDate() != null) {
  791. solrDoc.addField(IndexField.DATE.getName(), placement.
  792. getEdition().getPublicationDate().getTime());
  793. }
  794. solrDoc.addField(IndexField.EDITION_NUMBER.getName(), placement.
  795. getEdition().getNumber());
  796. solrDoc.addField(IndexField.EDITION_VOLUME.getName(), placement.
  797. getEdition().getVolume());
  798. }
  799. if (placement.getSection() != null) {
  800. solrDoc.addField(IndexField.SECTION.getName(), placement.
  801. getSection().getFullName());
  802. }
  803. if (placement.getOutlet() != null) {
  804. solrDoc.addField(IndexField.OUTLET.getName(), placement.
  805. getOutlet().getTitle());
  806. }
  807. solrDoc.addField(IndexField.PLACEMENT.getName(),
  808. placement.toString());
  809. }
  810. // for (WorkflowStateTransition wst : ni.getHistory()) {
  811. // doc.add(new Field(IndexField.ACTOR_UID.getName(), wst.getUser().getUsername(), Field.Store.YES, Field.Index.ANALYZED));
  812. // doc.add(new Field(IndexField.ACTOR_NAME.getName(), wst.getUser().getFullName(), Field.Store.YES, Field.Index.ANALYZED));
  813. // }
  814. for (NewsItemActor actor : ni.getActors()) {
  815. solrDoc.addField(IndexField.ACTOR.getName(), actor.getUser().
  816. getFullName());
  817. // Dynamic fields for the actors role
  818. solrDoc.addField(actor.getRole().getName(), actor.getUser().
  819. getFullName());
  820. }
  821. for (Concept concept : ni.getConcepts()) {
  822. if (concept instanceof Subject) {
  823. solrDoc.addField(IndexField.SUBJECT.getName(), concept.
  824. getFullTitle());
  825. }
  826. if (concept instanceof Person) {
  827. solrDoc.addField(IndexField.PERSON.getName(), concept.
  828. getFullTitle());
  829. }
  830. if (concept instanceof Organisation) {
  831. solrDoc.addField(IndexField.ORGANISATION.getName(), concept.
  832. getFullTitle());
  833. }
  834. if (concept instanceof GeoArea) {
  835. solrDoc.addField(IndexField.LOCATION.getName(), concept.
  836. getFullTitle());
  837. }
  838. if (concept instanceof PointOfInterest) {
  839. solrDoc.addField(IndexField.POINT_OF_INTEREST.getName(),
  840. concept.getFullTitle());
  841. }
  842. solrDoc.addField(IndexField.CONCEPT.getName(),
  843. concept.getFullTitle());
  844. }
  845. try {
  846. solrServer.add(solrDoc);
  847. } catch (SolrServerException ex) {
  848. throw new SearchEngineIndexingException(ex);
  849. } catch (IOException ex) {
  850. throw new SearchEngineIndexingException(ex);
  851. }
  852. }
  853. public void index(MediaItem mi, SolrServer solrServer) throws
  854. SearchEngineIndexingException {
  855. if (mi.isOriginalAvailable()) {
  856. MediaItemRendition mir = mi.getOriginal();
  857. SolrInputDocument solrDoc = new SolrInputDocument();
  858. solrDoc.addField(IndexField.ID.getName(), mi.getId(), 1.0f);
  859. solrDoc.addField(IndexField.TYPE.getName(), "Media");
  860. String mediaFormat;
  861. String contentType = mi.getOriginal().getContentType();
  862. String story = "";
  863. if (mir.isAudio()) {
  864. mediaFormat = "Audio";
  865. } else if (mir.isVideo()) {
  866. mediaFormat = "Video";
  867. } else if (mir.isImage()) {
  868. mediaFormat = "Image";
  869. } else if (mir.isDocument()) {
  870. mediaFormat = "Document";
  871. story = metaDataService.extractContent(mir);
  872. } else {
  873. mediaFormat = "Unknown";
  874. }
  875. solrDoc.addField(IndexField.MEDIA_FORMAT.getName(), mediaFormat);
  876. solrDoc.addField(IndexField.TITLE.getName(), mi.getTitle(), 1.0f);
  877. solrDoc.addField(IndexField.BYLINE.getName(), mi.getByLine());
  878. solrDoc.addField(IndexField.STORY.getName(),
  879. dk.i2m.converge.core.utils.StringUtils.stripHtml(mi.
  880. getDescription()) + " " + story);
  881. solrDoc.addField(IndexField.CAPTION.getName(),
  882. dk.i2m.converge.core.utils.StringUtils.stripHtml(mi.
  883. getDescription()));
  884. solrDoc.addField(IndexField.CONTENT_TYPE.getName(), mi.getOriginal().
  885. getContentType());
  886. solrDoc.addField(IndexField.REPOSITORY.getName(), mi.getCatalogue().
  887. getName());
  888. if (mi.getMediaDate() != null) {
  889. solrDoc.addField(IndexField.DATE.getName(), mi.getMediaDate().
  890. getTime());
  891. }
  892. if (mi.isPreviewAvailable()) {
  893. solrDoc.addField(IndexField.THUMB_URL.getName(), mi.getPreview().
  894. getAbsoluteFilename());
  895. solrDoc.addField(IndexField.DIRECT_URL.getName(),
  896. mi.getPreview().getFileLocation());
  897. }
  898. solrDoc.addField(IndexField.ACTOR.getName(), mi.getOwner().
  899. getFullName());
  900. for (Concept concept : mi.getConcepts()) {
  901. if (concept instanceof Subject) {
  902. solrDoc.addField(IndexField.SUBJECT.getName(), concept.
  903. getFullTitle());
  904. }
  905. if (concept instanceof Person) {
  906. solrDoc.addField(IndexField.PERSON.getName(), concept.
  907. getFullTitle());
  908. }
  909. if (concept instanceof Organisation) {
  910. solrDoc.addField(IndexField.ORGANISATION.getName(), concept.
  911. getFullTitle());
  912. }
  913. if (concept instanceof GeoArea) {
  914. solrDoc.addField(IndexField.LOCATION.getName(), concept.
  915. getFullTitle());
  916. }
  917. if (concept instanceof PointOfInterest) {
  918. solrDoc.addField(IndexField.POINT_OF_INTEREST.getName(),
  919. concept.getFullTitle());
  920. }
  921. solrDoc.addField(IndexField.CONCEPT.getName(), concept.
  922. getFullTitle());
  923. }
  924. try {
  925. solrServer.add(solrDoc);
  926. } catch (SolrServerException ex) {
  927. throw new SearchEngineIndexingException(ex);
  928. } catch (IOException ex) {
  929. throw new SearchEngineIndexingException(ex);
  930. }
  931. } else {
  932. LOG.log(Level.FINE,
  933. "Ignoring MediaItem #{0}. Missing original {1} rendition",
  934. new Object[]{mi.getId(), mi.getCatalogue().
  935. getOriginalRendition().getName()});
  936. }
  937. }
  938. }