PageRenderTime 103ms CodeModel.GetById 29ms RepoModel.GetById 1ms app.codeStats 0ms

/src/java/org/infoglue/cms/controllers/kernel/impl/simple/LuceneController.java

http://github.com/bogeblad/infoglue
Java | 2561 lines | 1961 code | 360 blank | 240 comment | 301 complexity | c06b35b1e91eb53d9083d79c8d6754eb MD5 | raw file
  1. /* ===============================================================================
  2. *
  3. * Part of the InfoGlue Content Management Platform (www.infoglue.org)
  4. *
  5. * ===============================================================================
  6. *
  7. * Copyright (C)
  8. *
  9. * This program is free software; you can redistribute it and/or modify it under
  10. * the terms of the GNU General Public License version 2, as published by the
  11. * Free Software Foundation. See the file LICENSE.html for more information.
  12. *
  13. * This program is distributed in the hope that it will be useful, but WITHOUT
  14. * ANY WARRANTY, including the implied warranty of MERCHANTABILITY or FITNESS
  15. * FOR A PARTICULAR PURPOSE. See the GNU General Public License for more details.
  16. *
  17. * You should have received a copy of the GNU General Public License along with
  18. * this program; if not, write to the Free Software Foundation, Inc. / 59 Temple
  19. * Place, Suite 330 / Boston, MA 02111-1307 / USA.
  20. *
  21. * ===============================================================================
  22. */
  23. package org.infoglue.cms.controllers.kernel.impl.simple;
  24. import java.io.ByteArrayOutputStream;
  25. import java.io.File;
  26. import java.io.FileInputStream;
  27. import java.io.IOException;
  28. import java.io.InputStream;
  29. import java.io.OutputStreamWriter;
  30. import java.io.StringReader;
  31. import java.io.Writer;
  32. import java.nio.channels.OverlappingFileLockException;
  33. import java.util.ArrayList;
  34. import java.util.Calendar;
  35. import java.util.Collection;
  36. import java.util.Date;
  37. import java.util.HashMap;
  38. import java.util.HashSet;
  39. import java.util.Iterator;
  40. import java.util.List;
  41. import java.util.Map;
  42. import java.util.Set;
  43. import java.util.concurrent.atomic.AtomicBoolean;
  44. import org.apache.log4j.Level;
  45. import org.apache.log4j.Logger;
  46. import org.apache.lucene.analysis.Analyzer;
  47. import org.apache.lucene.analysis.standard.StandardAnalyzer;
  48. import org.apache.lucene.document.DateTools;
  49. import org.apache.lucene.document.Document;
  50. import org.apache.lucene.document.Field;
  51. import org.apache.lucene.document.NumericField;
  52. import org.apache.lucene.index.IndexReader;
  53. import org.apache.lucene.index.IndexWriter;
  54. import org.apache.lucene.index.IndexWriterConfig;
  55. import org.apache.lucene.index.Term;
  56. import org.apache.lucene.queryParser.MultiFieldQueryParser;
  57. import org.apache.lucene.queryParser.QueryParser;
  58. import org.apache.lucene.search.BooleanClause;
  59. import org.apache.lucene.search.IndexSearcher;
  60. import org.apache.lucene.search.Query;
  61. import org.apache.lucene.search.ScoreDoc;
  62. import org.apache.lucene.search.Sort;
  63. import org.apache.lucene.search.TermQuery;
  64. import org.apache.lucene.search.TopDocs;
  65. import org.apache.lucene.store.Directory;
  66. import org.apache.lucene.store.NIOFSDirectory;
  67. import org.apache.lucene.store.SingleInstanceLockFactory;
  68. import org.apache.lucene.util.Version;
  69. import org.apache.poi.hwpf.HWPFDocument;
  70. import org.apache.poi.hwpf.extractor.WordExtractor;
  71. import org.apache.poi.poifs.filesystem.POIFSFileSystem;
  72. import org.exolab.castor.jdo.Database;
  73. import org.exolab.castor.jdo.OQLQuery;
  74. import org.exolab.castor.jdo.QueryResults;
  75. import org.infoglue.cms.applications.databeans.ProcessBean;
  76. import org.infoglue.cms.entities.content.Content;
  77. import org.infoglue.cms.entities.content.ContentCategory;
  78. import org.infoglue.cms.entities.content.ContentVO;
  79. import org.infoglue.cms.entities.content.ContentVersion;
  80. import org.infoglue.cms.entities.content.ContentVersionVO;
  81. import org.infoglue.cms.entities.content.DigitalAsset;
  82. import org.infoglue.cms.entities.content.DigitalAssetVO;
  83. import org.infoglue.cms.entities.content.SmallestContentVersionVO;
  84. import org.infoglue.cms.entities.content.impl.simple.ContentImpl;
  85. import org.infoglue.cms.entities.content.impl.simple.ContentVersionImpl;
  86. import org.infoglue.cms.entities.content.impl.simple.DigitalAssetImpl;
  87. import org.infoglue.cms.entities.content.impl.simple.MediumDigitalAssetImpl;
  88. import org.infoglue.cms.entities.content.impl.simple.SmallestContentVersionImpl;
  89. import org.infoglue.cms.entities.kernel.BaseEntityVO;
  90. import org.infoglue.cms.entities.management.CategoryAttribute;
  91. import org.infoglue.cms.entities.management.ContentTypeDefinitionVO;
  92. import org.infoglue.cms.entities.management.LanguageVO;
  93. import org.infoglue.cms.entities.structure.SiteNode;
  94. import org.infoglue.cms.entities.structure.SiteNodeVO;
  95. import org.infoglue.cms.entities.structure.SiteNodeVersion;
  96. import org.infoglue.cms.entities.structure.SiteNodeVersionVO;
  97. import org.infoglue.cms.entities.structure.impl.simple.PureSiteNodeImpl;
  98. import org.infoglue.cms.entities.structure.impl.simple.SiteNodeImpl;
  99. import org.infoglue.cms.entities.structure.impl.simple.SiteNodeVersionImpl;
  100. import org.infoglue.cms.entities.structure.impl.simple.SmallSiteNodeImpl;
  101. import org.infoglue.cms.exception.SystemException;
  102. import org.infoglue.cms.util.CmsPropertyHandler;
  103. import org.infoglue.cms.util.NotificationListener;
  104. import org.infoglue.cms.util.NotificationMessage;
  105. import org.infoglue.deliver.util.CacheController;
  106. import org.infoglue.deliver.util.RequestAnalyser;
  107. import org.infoglue.deliver.util.Timer;
  108. import org.pdfbox.pdmodel.PDDocument;
  109. import org.pdfbox.util.PDFTextStripper;
  110. public class LuceneController extends BaseController implements NotificationListener
  111. {
  112. private static Directory directory = null;
  113. private static IndexWriter writer = null;
  114. private static IndexReader indexReader = null;
  115. private static int reopened = 0;
  116. private final static Logger logger = Logger.getLogger(LuceneController.class.getName());
  117. private static int indexedDocumentsSinceLastOptimize = 0;
  118. private Integer lastCommitedContentVersionId = -1;
  119. private static Integer numberOfVersionToIndexInBatch = 1000;
  120. private static AtomicBoolean indexingInitialized = new AtomicBoolean(false);
  121. private static AtomicBoolean stopIndexing = new AtomicBoolean(false);
  122. private static AtomicBoolean deleteIndexOnStop = new AtomicBoolean(false);
  123. public static void setNumberOfVersionToIndexInBatch(Integer numberOfVersionToIndexInBatch)
  124. {
  125. numberOfVersionToIndexInBatch = numberOfVersionToIndexInBatch;
  126. }
  127. public static void stopIndexing()
  128. {
  129. stopIndexing.set(true);
  130. }
  131. /**
  132. * Default Constructor
  133. */
  134. public static LuceneController getController()
  135. {
  136. return new LuceneController();
  137. }
  138. private static List<NotificationMessage> qeuedMessages = new ArrayList<NotificationMessage>();
  139. private static List<NotificationMessage> maturedQeuedMessages = new ArrayList<NotificationMessage>();
  140. private StandardAnalyzer getStandardAnalyzer() throws Exception
  141. {
  142. return new StandardAnalyzer(Version.LUCENE_34);
  143. }
  144. private Directory getDirectory() throws Exception
  145. {
  146. if(LuceneController.directory != null)
  147. return directory;
  148. String index = CmsPropertyHandler.getContextDiskPath() + File.separator + "lucene" + File.separator + "index";
  149. index = index.replaceAll("//", "/");
  150. //System.out.println("index:" + index);
  151. File INDEX_DIR = new File(index);
  152. directory = new NIOFSDirectory(INDEX_DIR);
  153. directory.setLockFactory(new SingleInstanceLockFactory());
  154. boolean indexExists = IndexReader.indexExists(directory);
  155. if(!indexExists)
  156. {
  157. createIndex(directory);
  158. }
  159. return directory;
  160. }
  161. private void createIndex(Directory directory) throws Exception
  162. {
  163. IndexWriterConfig config = new IndexWriterConfig(Version.LUCENE_34, getStandardAnalyzer());
  164. IndexWriter indexWriter = new IndexWriter(directory, config);
  165. indexWriter.deleteDocuments(new Term("initializer", "true"));
  166. indexWriter.close(true);
  167. }
  168. private IndexWriter getIndexWriter() throws Exception
  169. {
  170. //Singleton returns
  171. if(writer != null)
  172. return writer;
  173. Timer t = new Timer();
  174. Directory directory = getDirectory();
  175. StandardAnalyzer analyzer = new StandardAnalyzer(Version.LUCENE_34);
  176. IndexWriterConfig config = new IndexWriterConfig(Version.LUCENE_34, analyzer);
  177. if(getIsIndexedLocked(true))
  178. {
  179. logger.warn("Directory is locked - leaving the messages in the qeuedMessages list...");
  180. throw new Exception("Lock not granted");
  181. }
  182. else
  183. {
  184. writer = new IndexWriter(directory, config);
  185. return writer;
  186. }
  187. }
  188. private IndexReader getIndexReader() throws Exception
  189. {
  190. if(indexReader == null)
  191. {
  192. indexReader = IndexReader.open(getDirectory(), true);
  193. }
  194. synchronized (indexReader)
  195. {
  196. if(!indexReader.isCurrent())
  197. {
  198. reopened++;
  199. indexReader.close();
  200. indexReader = IndexReader.open(getDirectory(), true);
  201. //indexReader = IndexReader.openIfChanged(indexReader, true);
  202. logger.info("reopened:" + reopened);
  203. }
  204. }
  205. return indexReader;
  206. }
  207. private IndexSearcher getIndexSearcher() throws Exception
  208. {
  209. return new IndexSearcher(getIndexReader());
  210. }
  211. private Boolean getIsIndexedLocked() throws Exception
  212. {
  213. return getIsIndexedLocked(false);
  214. }
  215. private Boolean getIsIndexedLocked(boolean returnIfFileLockException) throws Exception
  216. {
  217. Directory directory = getDirectory();
  218. try
  219. {
  220. return IndexWriter.isLocked(directory);
  221. }
  222. catch (OverlappingFileLockException e)
  223. {
  224. return returnIfFileLockException;
  225. }
  226. }
  227. private void unlockIndex() throws Exception
  228. {
  229. Directory directory = getDirectory();
  230. IndexWriter.unlock(directory);
  231. }
  232. public Map<String,Object> getIndexInformation() throws Exception
  233. {
  234. Map<String,Object> info = new HashMap<String,Object>();
  235. try
  236. {
  237. Directory directory = getDirectory();
  238. IndexReader reader = getIndexReader();
  239. int maxDoc = reader.maxDoc();
  240. int numDoc = reader.numDocs();
  241. long lastModified = getIndexReader().lastModified(directory);
  242. info.put("maxDoc", new Integer(maxDoc));
  243. info.put("numDoc", new Integer(numDoc));
  244. info.put("lastModified", new Date(lastModified));
  245. info.put("lastCommitedContentVersionId", getLastCommitedContentVersionId());
  246. List<LanguageVO> languageVOList = LanguageController.getController().getLanguageVOList();
  247. Iterator<LanguageVO> languageVOListIterator = languageVOList.iterator();
  248. outer:while(languageVOListIterator.hasNext())
  249. {
  250. LanguageVO languageVO = (LanguageVO)languageVOListIterator.next();
  251. info.put("indexAllLastCommittedContentVersionId_" + languageVO.getId(), getIndexAllLastCommittedContentVersionId(languageVO.getId()));
  252. info.put("indexAllLastCommittedMetaContentVersionId_" + languageVO.getId(), getIndexAllLastCommittedMetaContentVersionId(languageVO.getId()));
  253. }
  254. //reader.close();
  255. //directory.close();
  256. }
  257. catch (Exception e)
  258. {
  259. logger.error("Error creating index:" + e.getMessage(), e);
  260. throw e;
  261. }
  262. return info;
  263. }
  264. public Integer getIndexAllLastCommittedContentVersionId(Integer languageId) throws Exception
  265. {
  266. Integer indexAllLastCommittedContentVersionId = null;
  267. try
  268. {
  269. Document indexAllDocumentMetaData = getIndexAllStatusDocument();
  270. if(indexAllDocumentMetaData != null && indexAllDocumentMetaData.get("lastCommitedContentVersionId_" + languageId) != null && !indexAllDocumentMetaData.get("lastCommitedContentVersionId_" + languageId).equals("null"))
  271. indexAllLastCommittedContentVersionId = new Integer(indexAllDocumentMetaData.get("lastCommitedContentVersionId_" + languageId));
  272. }
  273. catch (Exception e)
  274. {
  275. logger.error("Error creating index:" + e.getMessage(), e);
  276. throw e;
  277. }
  278. return indexAllLastCommittedContentVersionId;
  279. }
  280. public Integer getIndexAllLastCommittedMetaContentVersionId(Integer languageId) throws Exception
  281. {
  282. Integer indexAllLastCommittedSiteNodeVersionId = null;
  283. try
  284. {
  285. Document indexAllDocumentMetaData = getIndexAllStatusDocument();
  286. if(indexAllDocumentMetaData != null && indexAllDocumentMetaData.get("lastCommitedMetaContentVersionId_" + languageId) != null && !indexAllDocumentMetaData.get("lastCommitedMetaContentVersionId_" + languageId).equals("null"))
  287. indexAllLastCommittedSiteNodeVersionId = new Integer(indexAllDocumentMetaData.get("lastCommitedMetaContentVersionId_" + languageId));
  288. }
  289. catch (Exception e)
  290. {
  291. logger.error("Error creating index:" + e.getMessage(), e);
  292. throw e;
  293. }
  294. return indexAllLastCommittedSiteNodeVersionId;
  295. }
  296. public Document createStatusDocument(Integer lastCommitedContentVersionId) throws Exception
  297. {
  298. Document doc = new Document();
  299. doc.add(new Field("lastCommitedContentVersionId", "" + lastCommitedContentVersionId, Field.Store.YES, Field.Index.NOT_ANALYZED));
  300. doc.add(new Field("lastCommitedModifiedDate", "" + new Date().getTime(), Field.Store.YES, Field.Index.NOT_ANALYZED));
  301. doc.add(new Field("meta", new StringReader("lastCommitedContentVersionId")));
  302. return doc;
  303. }
  304. public Document getStatusDocument() throws Exception
  305. {
  306. List<Document> docs = queryDocuments("meta", "lastCommitedContentVersionId", 5);
  307. logger.info(docs.size() + " total matching documents for 'lastCommitedContentVersionId'");
  308. return (docs != null && docs.size() > 0 ? docs.get(0) : null);
  309. }
  310. public Document getIndexAllStatusDocument() throws Exception
  311. {
  312. List<Document> docs = queryDocuments(new Term("meta", "indexAllRunning"), 5);
  313. logger.info(docs.size() + " total matching documents for 'indexAllRunning'");
  314. return (docs != null && docs.size() > 0 ? docs.get(0) : null);
  315. }
  316. public Integer getLastCommitedContentVersionId() throws Exception
  317. {
  318. Integer lastCommitedContentVersionId = -1;
  319. Document doc = getStatusDocument();
  320. logger.info("STATUS doc:" + doc);
  321. if(doc != null)
  322. {
  323. String lastCommitedContentVersionIdString = doc.get("lastCommitedContentVersionId");
  324. logger.info("doc:" + doc);
  325. logger.info("lastCommitedContentVersionId:" + lastCommitedContentVersionIdString);
  326. lastCommitedContentVersionId = Integer.parseInt(lastCommitedContentVersionIdString);
  327. }
  328. return lastCommitedContentVersionId;
  329. }
  330. private void setLastCommitedContentVersionId(IndexWriter writer, Integer lastCommitedContentVersionId) throws Exception
  331. {
  332. Integer prevLastCommitedContentVersionId = getLastCommitedContentVersionId();
  333. logger.info("prevLastCommitedContentVersionId:" + prevLastCommitedContentVersionId);
  334. logger.info("lastCommitedContentVersionId:" + lastCommitedContentVersionId);
  335. if(lastCommitedContentVersionId == -1 || prevLastCommitedContentVersionId > lastCommitedContentVersionId)
  336. return;
  337. logger.info("setLastCommitedContentVersionId:" + lastCommitedContentVersionId);
  338. Query query = new QueryParser(Version.LUCENE_34, "meta", getStandardAnalyzer()).parse("lastCommitedContentVersionId");
  339. writer.deleteDocuments(query);
  340. writer.addDocument(createStatusDocument(lastCommitedContentVersionId));
  341. }
  342. public Date getLastCommitedModifiedDate() throws Exception
  343. {
  344. Date lastCommitedModifiedDate = new Date(10000);
  345. Document doc = getStatusDocument();
  346. if(doc != null)
  347. {
  348. String lastCommitedModifiedDateString = doc.get("lastCommitedModifiedDate");
  349. logger.info("doc:" + doc);
  350. logger.info("lastCommitedModifiedDate:" + lastCommitedModifiedDateString);
  351. Date d = new Date();
  352. d.setTime(Long.parseLong(lastCommitedModifiedDateString));
  353. lastCommitedModifiedDate = d;
  354. }
  355. return lastCommitedModifiedDate;
  356. }
  357. private void registerIndexAllProcessOngoing(Integer lastCommitedContentVersionId, Integer lastCommitedSiteNodeVersionId, Integer languageId) throws Exception
  358. {
  359. //Document doc = new Document();
  360. IndexWriter writer = getIndexWriter();
  361. IndexSearcher searcher = getIndexSearcher();
  362. Term term = new Term("meta", "indexAllRunning");
  363. TermQuery query = new TermQuery(term);
  364. //Query query = new QueryParser(Version.LUCENE_34, "meta", getStandardAnalyzer()).parse("indexAllRunning");
  365. TopDocs hits = searcher.search(query, 50);
  366. //System.out.println("hits:" + hits);
  367. //System.out.println("hits.scoreDocs.length:" + hits.scoreDocs.length);
  368. if(hits.scoreDocs.length > 1)
  369. System.out.println("Must be wrong - should only be one of these docs:" + hits.scoreDocs.length);
  370. if(hits.scoreDocs.length > 0)
  371. {
  372. for(ScoreDoc scoreDoc : hits.scoreDocs)
  373. {
  374. org.apache.lucene.document.Document docExisting = searcher.doc(scoreDoc.doc);
  375. //System.out.println("Updating doc...:" + docExisting);
  376. //System.out.println("lastCommitedContentVersionId:" + lastCommitedContentVersionId);
  377. //System.out.println("lastCommitedSiteNodeVersionId:" + lastCommitedSiteNodeVersionId);
  378. //System.out.println("languageId:" + languageId);
  379. if(lastCommitedContentVersionId != null && lastCommitedContentVersionId != -1)
  380. {
  381. docExisting.removeFields("lastCommitedContentVersionId_" + languageId);
  382. docExisting.add(new Field("lastCommitedContentVersionId_" + languageId, "" + lastCommitedContentVersionId, Field.Store.YES, Field.Index.NOT_ANALYZED));
  383. }
  384. if(lastCommitedSiteNodeVersionId != null && lastCommitedSiteNodeVersionId != -1)
  385. {
  386. docExisting.removeFields("lastCommitedMetaContentVersionId_" + languageId);
  387. docExisting.add(new Field("lastCommitedMetaContentVersionId_" + languageId, "" + lastCommitedSiteNodeVersionId, Field.Store.YES, Field.Index.NOT_ANALYZED));
  388. }
  389. docExisting.removeFields("lastCommitedModifiedDate");
  390. docExisting.add(new Field("lastCommitedModifiedDate", "" + new Date().getTime(), Field.Store.YES, Field.Index.NOT_ANALYZED));
  391. //docExisting.add(new Field("meta", new StringReader("indexAllRunning")));
  392. //docExisting.add(new Field("meta", "indexAllRunning", Field.Store.YES, Field.Index.NOT_ANALYZED));
  393. writer.updateDocument(term, docExisting);
  394. //System.out.println("Updating doc...:" + docExisting);
  395. //Term t = new Term("meta", "indexAllRunning");
  396. break;
  397. }
  398. }
  399. else
  400. {
  401. Document docExisting = new Document();
  402. //System.out.println("lastCommitedContentVersionId:" + lastCommitedContentVersionId);
  403. //System.out.println("lastCommitedSiteNodeVersionId:" + lastCommitedSiteNodeVersionId);
  404. //System.out.println("languageId:" + languageId);
  405. if(lastCommitedContentVersionId != null)
  406. docExisting.add(new Field("lastCommitedContentVersionId_" + languageId, "" + lastCommitedContentVersionId, Field.Store.YES, Field.Index.NOT_ANALYZED));
  407. if(lastCommitedSiteNodeVersionId != null)
  408. docExisting.add(new Field("lastCommitedMetaContentVersionId_" + languageId, "" + lastCommitedSiteNodeVersionId, Field.Store.YES, Field.Index.NOT_ANALYZED));
  409. docExisting.add(new Field("lastCommitedModifiedDate", "" + new Date().getTime(), Field.Store.YES, Field.Index.NOT_ANALYZED));
  410. //docExisting.add(new Field("meta", new StringReader("indexAllRunning")));
  411. docExisting.add(new Field("meta", "indexAllRunning", Field.Store.YES, Field.Index.NOT_ANALYZED));
  412. writer.addDocument(docExisting);
  413. }
  414. searcher.close();
  415. //Query query = new QueryParser(Version.LUCENE_34, "meta", getStandardAnalyzer()).parse("indexAllRunning");
  416. //writer.deleteDocuments(query);
  417. //writer.updateDocument(term, doc);
  418. //writer.addDocument(doc);
  419. //writer.close(true);
  420. writer.commit();
  421. }
  422. private void registerIndexAllProcessDone() throws Exception
  423. {
  424. IndexWriter writer = getIndexWriter();
  425. //Query query = new QueryParser(Version.LUCENE_34, "meta", getStandardAnalyzer()).parse("indexAllRunning");
  426. Term term = new Term("meta", "indexAllRunning");
  427. TermQuery query = new TermQuery(term);
  428. writer.deleteDocuments(query);
  429. writer.commit();
  430. }
  431. public void clearIndex() throws Exception
  432. {
  433. if (indexingInitialized.compareAndSet(false, true))
  434. {
  435. logger.warn("Clearing index..");
  436. try
  437. {
  438. logger.info("NumDocs:" + getIndexReader().numDocs());
  439. IndexWriter writer = getIndexWriter();
  440. writer.deleteAll();
  441. //writer.close(true);
  442. writer.commit();
  443. logger.info("NumDocs after delete:" + getIndexReader().numDocs());
  444. }
  445. catch (Exception e)
  446. {
  447. stopIndexing.set(true);
  448. deleteIndexOnStop.set(true);
  449. logger.error("Error clearing index:" + e.getMessage(), e);
  450. }
  451. finally
  452. {
  453. logger.info("Releasing indexing flag");
  454. this.indexingInitialized.set(false);
  455. stopIndexing.set(false);
  456. }
  457. }
  458. else
  459. {
  460. stopIndexing.set(true);
  461. deleteIndexOnStop.set(true);
  462. logger.error("Could not delete index while indexing. Queueing it....");
  463. }
  464. }
  465. public TopDocs query(String text, Integer numberOfHits) throws Exception
  466. {
  467. return query("contents", text, numberOfHits);
  468. }
  469. public TopDocs query(String field, String text, Integer numberOfHits) throws Exception
  470. {
  471. IndexSearcher searcher = getIndexSearcher();
  472. Query query = new QueryParser(Version.LUCENE_34, "contents", getStandardAnalyzer()).parse(text);
  473. TopDocs hits = searcher.search(query, numberOfHits);
  474. logger.info(hits.totalHits + " total matching documents for '" + text + "'");
  475. return hits;
  476. }
  477. public List<Document> queryDocuments(Term term, Integer numberOfHits) throws Exception
  478. {
  479. IndexSearcher searcher = getIndexSearcher();
  480. Query query = new TermQuery(term);
  481. TopDocs hits = searcher.search(query, numberOfHits);
  482. logger.info(hits.totalHits + " total matching documents for '" + term.field() + ":" + term.text() + "'");
  483. List<Document> docs = new ArrayList<Document>();
  484. for(ScoreDoc scoreDoc : hits.scoreDocs)
  485. {
  486. org.apache.lucene.document.Document doc = searcher.doc(scoreDoc.doc);
  487. docs.add(doc);
  488. }
  489. searcher.close();
  490. return docs;
  491. }
  492. public List<Document> queryDocuments(String field, String text, Integer numberOfHits) throws Exception
  493. {
  494. IndexSearcher searcher = getIndexSearcher();
  495. Query query = new QueryParser(Version.LUCENE_34, field, getStandardAnalyzer()).parse(text);
  496. logger.info("query:" + query);
  497. TopDocs hits = searcher.search(query, numberOfHits);
  498. logger.info(hits.totalHits + " total matching documents for '" + field + ":" + text + "'");
  499. List<Document> docs = new ArrayList<Document>();
  500. for(ScoreDoc scoreDoc : hits.scoreDocs)
  501. {
  502. org.apache.lucene.document.Document doc = searcher.doc(scoreDoc.doc);
  503. docs.add(doc);
  504. }
  505. searcher.close();
  506. return docs;
  507. }
  508. public TopDocs query(String[] fields, BooleanClause.Occur[] flags, String[] queries, Sort sort, Integer numberOfHits) throws Exception
  509. {
  510. IndexSearcher searcher = getIndexSearcher();
  511. Query query = MultiFieldQueryParser.parse(Version.LUCENE_34, queries, fields, flags, getStandardAnalyzer());
  512. //Query query = new QueryParser(Version.LUCENE_34, "contents", getStandardAnalyzer()).parse(text);
  513. TopDocs hits = searcher.search(query, numberOfHits);
  514. logger.info(hits.totalHits + " total matching documents for '" + queries + "'");
  515. return hits;
  516. }
  517. public List<Document> queryDocuments(String[] fields, BooleanClause.Occur[] flags, String[] queries, Sort sort, Integer numberOfHits, Map searchMetaData) throws Exception
  518. {
  519. IndexSearcher searcher = getIndexSearcher();
  520. Query query = MultiFieldQueryParser.parse(Version.LUCENE_34, queries, fields, flags, getStandardAnalyzer());
  521. logger.info("query:" + query);
  522. //Query query = new QueryParser(Version.LUCENE_34, "contents", getStandardAnalyzer()).parse(text);
  523. TopDocs hits = searcher.search(query, numberOfHits);
  524. searchMetaData.put("totalHits", hits.totalHits);
  525. logger.info(hits.totalHits + " total matching documents for '" + query + "'");
  526. //System.out.println(hits.totalHits + " total matching documents for '" + queries + "'");
  527. List<Document> docs = new ArrayList<Document>();
  528. for(ScoreDoc scoreDoc : hits.scoreDocs)
  529. {
  530. org.apache.lucene.document.Document doc = searcher.doc(scoreDoc.doc);
  531. docs.add(doc);
  532. }
  533. searcher.close();
  534. return docs;
  535. }
  536. private void query(IndexSearcher searcher, Analyzer analyzer, String text) throws Exception
  537. {
  538. Query query = new QueryParser(Version.LUCENE_34, "contents", analyzer).parse(text);
  539. TopDocs hits = searcher.search(query, 50);
  540. logger.info(hits.totalHits + " total matching documents for '" + text + "'");
  541. for(ScoreDoc scoreDoc : hits.scoreDocs)
  542. {
  543. org.apache.lucene.document.Document doc = searcher.doc(scoreDoc.doc);
  544. String cvId = doc.get("contentVersionId");
  545. logger.info("cvId: " + cvId);
  546. }
  547. }
  548. public boolean indexAll() throws Exception
  549. {
  550. if(!CmsPropertyHandler.getInternalSearchEngine().equalsIgnoreCase("lucene"))
  551. return false;
  552. logger.warn("INDEXING ALL - correct: " + indexingInitialized + "/" + deleteIndexOnStop + "/" + stopIndexing + "?");
  553. Thread.currentThread().setPriority(Thread.MIN_PRIORITY);
  554. if(deleteIndexOnStop.get())
  555. {
  556. clearIndex();
  557. deleteIndexOnStop.set(false);
  558. stopIndexing.set(false);
  559. }
  560. else
  561. {
  562. stopIndexing.set(false);
  563. }
  564. logger.warn("Resetting stopIndexing to false....");
  565. logger.warn("------------------------------Got indexAll directive....");
  566. if (indexingInitialized.compareAndSet(false, true))
  567. {
  568. //createTestIndex();
  569. //indexingInitialized.set(false);
  570. //if(true)
  571. // return true;
  572. try
  573. {
  574. Timer t = new Timer();
  575. Timer t2 = new Timer();
  576. //Indexing all normal contents now
  577. logger.info("Indexing all normal contents: " + CmsPropertyHandler.getContextDiskPath());
  578. List<LanguageVO> languageVOList = LanguageController.getController().getLanguageVOList();
  579. Iterator<LanguageVO> languageVOListIterator = languageVOList.iterator();
  580. outer:while(languageVOListIterator.hasNext())
  581. {
  582. LanguageVO languageVO = (LanguageVO)languageVOListIterator.next();
  583. logger.info("Getting notification messages for " + languageVO.getName());
  584. Integer previousIndexAllLastContentVersionId = getIndexAllLastCommittedContentVersionId(languageVO.getId());
  585. int startID = 0;
  586. if(previousIndexAllLastContentVersionId != null)
  587. startID = previousIndexAllLastContentVersionId;
  588. logger.info("Starting from " + startID);
  589. int newLastContentVersionId = getContentNotificationMessages(languageVO, startID);
  590. logger.info("newLastContentVersionId: " + newLastContentVersionId + " on " + languageVO.getName());
  591. registerIndexAllProcessOngoing(newLastContentVersionId, null, languageVO.getId());
  592. //previousIndexAllLastContentVersionId = newLastContentVersionId;
  593. RequestAnalyser.getRequestAnalyser().registerComponentStatistics("getNotificationMessages", t.getElapsedTime());
  594. logger.info("newLastContentVersionId " + newLastContentVersionId);
  595. while(newLastContentVersionId != -1)
  596. {
  597. logger.info("stopIndexing.get():" + stopIndexing.get());
  598. if(stopIndexing.get())
  599. break outer;
  600. Thread.sleep(5000);
  601. newLastContentVersionId = getContentNotificationMessages(languageVO, newLastContentVersionId);
  602. logger.info("newLastContentVersionId: " + newLastContentVersionId + " on " + languageVO.getName());
  603. registerIndexAllProcessOngoing(newLastContentVersionId, null, languageVO.getId());
  604. //previousIndexAllLastContentVersionId = newLastContentVersionId;
  605. RequestAnalyser.getRequestAnalyser().registerComponentStatistics("getNotificationMessages 2", t.getElapsedTime());
  606. logger.info("newLastContentVersionId " + newLastContentVersionId);
  607. }
  608. }
  609. languageVOList = LanguageController.getController().getLanguageVOList();
  610. languageVOListIterator = languageVOList.iterator();
  611. outer:while(languageVOListIterator.hasNext())
  612. {
  613. LanguageVO languageVO = (LanguageVO)languageVOListIterator.next();
  614. logger.info("languageVO from " + languageVO);
  615. List<NotificationMessage> notificationMessages = new ArrayList<NotificationMessage>();
  616. Integer previousIndexAllLastMetaContentVersionId = getIndexAllLastCommittedMetaContentVersionId(languageVO.getId());
  617. logger.info("previousIndexAllLastMetaContentVersionId: " + previousIndexAllLastMetaContentVersionId);
  618. int startID = 0;
  619. if(previousIndexAllLastMetaContentVersionId != null)
  620. startID = previousIndexAllLastMetaContentVersionId;
  621. logger.info("Starting from " + startID);
  622. int newLastMetaContentVersionId = getPageNotificationMessages(notificationMessages, languageVO, startID);
  623. logger.info("newLastSiteNodeVersionId " + newLastMetaContentVersionId + " on " + languageVO.getName());
  624. logger.info("notificationMessages: " + notificationMessages.size());
  625. registerIndexAllProcessOngoing(null, newLastMetaContentVersionId, languageVO.getId());
  626. //previousIndexAllLastMetaContentVersionId = newLastMetaContentVersionId;
  627. RequestAnalyser.getRequestAnalyser().registerComponentStatistics("getNotificationMessagesForStructure", t.getElapsedTime());
  628. logger.info("newLastMetaContentVersionId " + newLastMetaContentVersionId);
  629. while(newLastMetaContentVersionId != -1)
  630. {
  631. logger.info("stopIndexing.get():" + stopIndexing.get());
  632. if(stopIndexing.get())
  633. break outer;
  634. Thread.sleep(5000);
  635. newLastMetaContentVersionId = getPageNotificationMessages(notificationMessages, languageVO, newLastMetaContentVersionId);
  636. logger.info("newLastMetaContentVersionId " + newLastMetaContentVersionId + " on " + languageVO.getName());
  637. logger.info("notificationMessages: " + notificationMessages.size());
  638. registerIndexAllProcessOngoing(null, newLastMetaContentVersionId, languageVO.getId());
  639. //previousIndexAllLastMetaContentVersionId = newLastMetaContentVersionId;
  640. RequestAnalyser.getRequestAnalyser().registerComponentStatistics("getNotificationMessages 2", t.getElapsedTime());
  641. logger.info("newLastMetaContentVersionId " + newLastMetaContentVersionId);
  642. }
  643. }
  644. registerIndexAllProcessDone();
  645. t2.printElapsedTime("All indexing took");
  646. }
  647. catch (Exception e)
  648. {
  649. logger.error("Error indexing notifications:" + e.getMessage(), e);
  650. }
  651. finally
  652. {
  653. logger.error("Releasing indexing flag");
  654. this.indexingInitialized.set(false);
  655. }
  656. }
  657. else
  658. {
  659. logger.warn("-------------------: Allready running index all...");
  660. return false;
  661. }
  662. return true;
  663. }
  664. private void createTestIndex()
  665. {
  666. System.out.println("STARTING TEST");
  667. try
  668. {
  669. clearIndex();
  670. IndexWriter writer = getIndexWriter();
  671. for(int i=0; i<10000; i++)
  672. {
  673. // make a new, empty document
  674. Document doc = new Document();
  675. doc.add(new NumericField("publishDateTime", Field.Store.YES, true).setLongValue(23423423423L));
  676. doc.add(new NumericField("modificationDateTime", Field.Store.YES, true).setLongValue(23423423423L));
  677. doc.add(new Field("modified", DateTools.timeToString(23423423423L, DateTools.Resolution.MINUTE), Field.Store.YES, Field.Index.NOT_ANALYZED));
  678. doc.add(new Field("contentVersionId", "324234234", Field.Store.YES, Field.Index.NOT_ANALYZED));
  679. doc.add(new Field("contentId", "324234234", Field.Store.YES, Field.Index.NOT_ANALYZED));
  680. doc.add(new Field("contentTypeDefinitionId", "344", Field.Store.YES, Field.Index.NOT_ANALYZED));
  681. doc.add(new Field("languageId", "33", Field.Store.YES, Field.Index.NOT_ANALYZED));
  682. doc.add(new Field("repositoryId", "22", Field.Store.YES, Field.Index.NOT_ANALYZED));
  683. doc.add(new Field("lastModifier", "Mattias Bogeblad", Field.Store.YES, Field.Index.NOT_ANALYZED));
  684. doc.add(new Field("stateId", "3", Field.Store.YES, Field.Index.NOT_ANALYZED));
  685. doc.add(new Field("isAsset", "false", Field.Store.YES, Field.Index.NOT_ANALYZED));
  686. doc.add(new Field("contents", new StringReader(i + " fwe foweif oiwejfoijweoifiweuhfi uehwiufh weiuhfiuwehfiew iufiuwehfi ewiufh iuwehfiuehwiufiweuhfiu ehwifhw eifew efiwehfiuwe" +
  687. "ff wehfiuehwiufiuwehfiuehw iufhwei uhfiehwiufweiuhf iwefihw eifiuwe ifhwe ifihew iufi weuhfiuwe" +
  688. "dfbsdjfsjdjfjksdf s f jdsjkfs dkjfh ksdfk sdkfhkds fksd " +
  689. "fjsd fsdhf uiweo p fiieowhf iehwiufiewhfiewfhw efn ewfowe ifioewf owehfowe")));
  690. doc.add(new Field("uid", "" + i, Field.Store.NO, Field.Index.NOT_ANALYZED));
  691. writer.addDocument(doc);
  692. if(i == 1000 || i == 2000 ||i == 3000 ||i == 4000 ||i == 5000 ||i == 6000 ||i == 7000 ||i == 8000 ||i == 9000)
  693. {
  694. //writer.optimize();
  695. //writer.optimize(true);
  696. logger.info("Sleeping...:" + getIndexInformation().get("numDoc"));
  697. Thread.sleep(5000);
  698. }
  699. }
  700. //writer.close(true);
  701. writer.commit();
  702. }
  703. catch (Exception e)
  704. {
  705. e.printStackTrace();
  706. }
  707. }
  708. /**
  709. * This method gets called when a new notification has come.
  710. * It then iterates through the listeners and notifies them.
  711. */
  712. public void addNotificationMessage(NotificationMessage notificationMessage)
  713. {
  714. if(notificationMessage.getClassName().equals(ContentImpl.class.getName()) ||
  715. notificationMessage.getClassName().equals(ContentVersionImpl.class.getName()) ||
  716. notificationMessage.getClassName().equals(SiteNodeImpl.class.getName()) ||
  717. notificationMessage.getClassName().equals(PureSiteNodeImpl.class.getName()) ||
  718. notificationMessage.getClassName().equals(SmallSiteNodeImpl.class.getName()) ||
  719. notificationMessage.getClassName().equals(SiteNodeVersionImpl.class.getName()) ||
  720. notificationMessage.getClassName().equals(DigitalAssetImpl.class.getName()) ||
  721. notificationMessage.getClassName().equals(MediumDigitalAssetImpl.class.getName()))
  722. {
  723. if(qeuedMessages.size() == 1000)
  724. {
  725. logger.warn("qeuedMessages went over 1000 - seems wrong");
  726. //Thread.dumpStack();
  727. }
  728. synchronized (qeuedMessages)
  729. {
  730. qeuedMessages.add(notificationMessage);
  731. }
  732. }
  733. else
  734. {
  735. logger.info("Skipping indexing:" + notificationMessage.getClassName());
  736. }
  737. }
  738. /**
  739. * This method gets called when a new NotificationMessage is available.
  740. * The writer just calls the transactionHistoryController which stores it.
  741. */
  742. public void notify(NotificationMessage notificationMessage)
  743. {
  744. try
  745. {
  746. if(logger.isInfoEnabled())
  747. logger.info("Indexing:" + notificationMessage.getName() + ":" + notificationMessage.getType() + ":" + notificationMessage.getObjectId() + ":" + notificationMessage.getObjectName());
  748. addNotificationMessage(notificationMessage);
  749. }
  750. catch(Exception e)
  751. {
  752. logger.error("Error notifying: " + e.getMessage());
  753. }
  754. }
  755. public void process() throws Exception
  756. {
  757. logger.info("Process inside LuceneController");
  758. notifyListeners(false, true);
  759. }
  760. public void notifyListeners(boolean forceVersionIndexing, boolean checkForIndexingJobs) throws IOException, Exception
  761. {
  762. if(!CmsPropertyHandler.getInternalSearchEngine().equalsIgnoreCase("lucene") || CmsPropertyHandler.getContextDiskPath().contains("@deploy.dir"))
  763. return;
  764. boolean initDoneLocally = false;
  765. boolean finishDoneLocally = false;
  766. logger.info("------------------------------->notifyListeners before check in " + CmsPropertyHandler.getContextRootPath() + "/" + deleteIndexOnStop.get() + "/" + stopIndexing.get());
  767. if(deleteIndexOnStop.get())
  768. {
  769. clearIndex();
  770. deleteIndexOnStop.set(false);
  771. stopIndexing.set(false);
  772. }
  773. else
  774. {
  775. stopIndexing.set(false);
  776. }
  777. if (!checkForIndexingJobs || indexingInitialized.compareAndSet(false, true))
  778. {
  779. if(checkForIndexingJobs)
  780. initDoneLocally = true;
  781. List<NotificationMessage> internalMessageList = new ArrayList<NotificationMessage>();
  782. List<NotificationMessage> revisitedInternalMessageList = new ArrayList<NotificationMessage>();
  783. synchronized (qeuedMessages)
  784. {
  785. //logger.error("internalMessageList: " + internalMessageList.size() + "/" + qeuedMessages.size());
  786. internalMessageList.addAll(qeuedMessages);
  787. //logger.error("internalMessageList: " + internalMessageList.size() + "/" + qeuedMessages.size());
  788. qeuedMessages.clear();
  789. //logger.error("internalMessageList: " + internalMessageList.size() + "/" + qeuedMessages.size());
  790. }
  791. synchronized (maturedQeuedMessages)
  792. {
  793. logger.info("maturedQeuedMessages:" + maturedQeuedMessages.size());
  794. if(maturedQeuedMessages.size() > 0)
  795. {
  796. logger.info("Was a matured message - let's take it also");
  797. internalMessageList.addAll(maturedQeuedMessages);
  798. revisitedInternalMessageList.addAll(maturedQeuedMessages);
  799. //logger.error("internalMessageList: " + internalMessageList.size() + "/" + qeuedMessages.size());
  800. maturedQeuedMessages.clear();
  801. }
  802. }
  803. //Should implement equals on NotificationMessage later
  804. List<NotificationMessage> baseEntitiesToIndexMessageList = new ArrayList<NotificationMessage>();
  805. List<String> existingSignatures = new ArrayList<String>();
  806. logger.info("Before AAAAA:" + internalMessageList.size() + ":" + existingSignatures.size());
  807. Iterator<NotificationMessage> cleanupInternalMessageListIterator = internalMessageList.iterator();
  808. while(cleanupInternalMessageListIterator.hasNext())
  809. {
  810. NotificationMessage notificationMessage = cleanupInternalMessageListIterator.next();
  811. logger.info("Indexing........:" + notificationMessage.getClassName());
  812. if(notificationMessage.getClassName().equals(ContentImpl.class.getName()) || notificationMessage.getClassName().equals(Content.class.getName()))
  813. {
  814. ContentVO contentVO = ContentController.getContentController().getLocklessContentVOWithId((Integer)notificationMessage.getObjectId());
  815. //ContentVO contentVO = ContentController.getContentController().getContentVOWithId((Integer)notificationMessage.getObjectId());
  816. if(contentVO != null)
  817. {
  818. ContentTypeDefinitionVO ctdVO = null;
  819. try
  820. {
  821. ctdVO = ContentTypeDefinitionController.getController().getContentTypeDefinitionVOWithId(contentVO.getContentTypeDefinitionId());
  822. }
  823. catch (SystemException sex)
  824. {
  825. logger.warn("Failed to get the content type definition for content with Id: " + contentVO.getContentId() + ". The content will not be indexed. Message: " + sex.getMessage());
  826. logger.info("Failed to get the content type definition for content with Id: " + contentVO.getContentId(), sex);
  827. }
  828. if(ctdVO != null && ctdVO.getName().equals("Meta info"))
  829. {
  830. SiteNodeVO siteNodeVO = SiteNodeController.getController().getSiteNodeVOWithMetaInfoContentId(contentVO.getContentId());
  831. if(siteNodeVO != null && notificationMessage != null)
  832. {
  833. NotificationMessage newNotificationMessage = new NotificationMessage("" + siteNodeVO.getName(), SiteNodeImpl.class.getName(), "SYSTEM", notificationMessage.getType(), siteNodeVO.getId(), "" + siteNodeVO.getName());
  834. String key = "" + newNotificationMessage.getClassName() + "_" + newNotificationMessage.getObjectId() + "_" + "_" + newNotificationMessage.getType();
  835. if(!existingSignatures.contains(key))
  836. {
  837. logger.info("++++++++++++++Got an META PAGE notification - just adding it AS A PAGE instead: " + newNotificationMessage.getObjectId());
  838. baseEntitiesToIndexMessageList.add(newNotificationMessage);
  839. existingSignatures.add(key);
  840. }
  841. else
  842. {
  843. logger.info("++++++++++++++Skipping Content notification - duplicate existed: " + notificationMessage.getObjectId());
  844. }
  845. }
  846. }
  847. else
  848. {
  849. String key = "" + notificationMessage.getClassName() + "_" + notificationMessage.getObjectId() + "_" + "_" + notificationMessage.getType();
  850. if(!existingSignatures.contains(key))
  851. {
  852. logger.info("++++++++++++++Got an Content notification - just adding it: " + notificationMessage.getObjectId());
  853. baseEntitiesToIndexMessageList.add(notificationMessage);
  854. existingSignatures.add(key);
  855. }
  856. else
  857. {
  858. logger.info("++++++++++++++Skipping Content notification - duplicate existed: " + notificationMessage.getObjectId());
  859. }
  860. }
  861. }
  862. else
  863. {
  864. logger.info("The content seems to be missing from the database. A guess is that it's new or deleted. Let's try later.");
  865. if(!revisitedInternalMessageList.contains(notificationMessage))
  866. maturedQeuedMessages.add(notificationMessage);
  867. else
  868. logger.info("No - allready tried it again.. skipping.");
  869. }
  870. }
  871. else if(notificationMessage.getClassName().equals(ContentVersionImpl.class.getName()) || notificationMessage.getClassName().equals(ContentVersion.class.getName()))
  872. {
  873. logger.info("++++++++++++++Got an ContentVersion notification - focus on content: " + notificationMessage.getObjectId());
  874. //ContentVersionVO contentVersionVO = ContentVersionController.getContentVersionController().getContentVersionVOWithId((Integer)notificationMessage.getObjectId());
  875. ContentVersionVO contentVersionVO = ContentVersionController.getContentVersionController().getLocklessContentVersionVOWithId((Integer)notificationMessage.getObjectId());
  876. if(contentVersionVO != null)
  877. {
  878. ContentVO contentVO = ContentController.getContentController().getLocklessContentVOWithId(contentVersionVO.getContentId());
  879. if(contentVO.getContentTypeDefinitionId() != null)
  880. {
  881. ContentTypeDefinitionVO ctdVO = null;
  882. try
  883. {
  884. ctdVO = ContentTypeDefinitionController.getController().getContentTypeDefinitionVOWithId(contentVO.getContentTypeDefinitionId());
  885. }
  886. catch (SystemException sex)
  887. {
  888. logger.warn("Failed to get the content type definition for content with Id: " + contentVO.getContentId() + ". The content version will not be indexed. Message: " + sex.getMessage());
  889. logger.info("Failed to get the content type definition for content with Id: " + contentVO.getContentId(), sex);
  890. }
  891. if(ctdVO != null && ctdVO.getName().equals("Meta info"))
  892. {
  893. SiteNodeVO siteNodeVO = SiteNodeController.getController().getSiteNodeVOWithMetaInfoContentId(contentVO.getContentId());
  894. if (siteNodeVO == null)
  895. {
  896. logger.warn("Got meta info notification but could not find a page for the Content-id. Content.id: " + contentVO.getContentId());
  897. }
  898. else
  899. {
  900. NotificationMessage newNotificationMessage = new NotificationMessage("" + siteNodeVO.getName(), SiteNodeImpl.class.getName(), "SYSTEM", notificationMessage.getType(), siteNodeVO.getId(), "" + siteNodeVO.getName());
  901. String key = "" + newNotificationMessage.getClassName() + "_" + newNotificationMessage.getObjectId() + "_" + newNotificationMessage.getType();
  902. if(!existingSignatures.contains(key))
  903. {
  904. logger.info("++++++++++++++Got an META PAGE notification - just adding it AS A PAGE instead: " + newNotificationMessage.getObjectId());
  905. baseEntitiesToIndexMessageList.add(newNotificationMessage);
  906. existingSignatures.add(key);
  907. }
  908. else
  909. {
  910. logger.info("++++++++++++++Skipping Content notification - duplicate existed: " + notificationMessage.getObjectId());
  911. }
  912. }
  913. }
  914. else
  915. {
  916. NotificationMessage newNotificationMessage = new NotificationMessage("" + contentVersionVO.getContentName(), ContentImpl.class.getName(), "SYSTEM", notificationMessage.getType(), contentVersionVO.getContentId(), "" + contentVersionVO.getContentName());
  917. String key = "" + newNotificationMessage.getClassName() + "_" + newNotificationMessage.getObjectId() + "_" + newNotificationMessage.getType();
  918. if(!existingSignatures.contains(key))
  919. {
  920. logger.info("++++++++++++++Got an Content notification - just adding it: " + newNotificationMessage.getObjectId());
  921. baseEntitiesToIndexMessageList.add(newNotificationMessage);
  922. existingSignatures.add(key);
  923. }
  924. else
  925. {
  926. logger.info("++++++++++++++Skipping Content notification - duplicate existed: " + notificationMessage.getObjectId());
  927. }
  928. }
  929. }
  930. }
  931. else
  932. {
  933. logger.info("The content version seems to be missing from the database. A guess is that it's new or deleted. Let's try later.");
  934. if(!revisitedInternalMessageList.contains(notificationMessage))
  935. maturedQeuedMessages.add(notificationMessage);
  936. else
  937. logger.info("No - allready tried it again.. skipping.");
  938. }
  939. }
  940. else if(notificationMessage.getClassName().equals(DigitalAssetImpl.class.getName()) ||
  941. notificationMessage.getClassName().equals(MediumDigitalAssetImpl.class.getName()) ||
  942. notificationMessage.getClassName().equals(DigitalAsset.class.getName()) ||
  943. notificationMessage.getClassName().equals(SiteNodeImpl.class.getName()) ||
  944. notificationMessage.getClassName().equals(SmallSiteNodeImpl.class.getName()) ||
  945. notificationMessage.getClassName().equals(PureSiteNodeImpl.class.getName()) ||
  946. notificationMessage.getClassName().equals(SiteNode.class.getName()) ||
  947. notificationMessage.getClassName().equals(SiteNodeVersionImpl.class.getName()) ||
  948. notificationMessage.getClassName().equals(SiteNodeVersion.class.getName()))
  949. {
  950. logger.info("notificationMessage.getClassName():" + notificationMessage.getClassName());
  951. String key = "" + notificationMessage.getClassName() + "_" + notificationMessage.getObjectId() + "_" + "_" + notificationMessage.getType();
  952. if(notificationMessage.getClassName().equals(SiteNodeVersionImpl.class.getName()) || notificationMessage.getClassName().equals(SiteNodeVersion.class.getName()))
  953. {
  954. logger.info("PPPPPPPPPPPPPPPPPPPPPPPPPP:" + notificationMessage.getObjectId());
  955. try
  956. {
  957. SiteNodeVersionVO siteNodeVersionVO = SiteNodeVersionController.getController().getSiteNodeVersionVOWithId((Integer)notificationMessage.getObjectId());
  958. SiteNodeVO siteNodeVO = SiteNodeController.getController().getSiteNodeVOWithId(siteNodeVersionVO.getSiteNodeId());
  959. NotificationMessage newNotificationMessage = new NotificationMessage("" + siteNodeVO.getName(), SiteNodeImpl.class.getName(), "SYSTEM", notificationMessage.getType(), siteNodeVO.getId(), "" + siteNodeVO.getName());
  960. key = "" + newNotificationMessage.getClassName() + "_" + newNotificationMessage.getObjectId() + "_" + newNotificationMessage.getType();
  961. if(!existingSignatures.contains(key))
  962. {
  963. logger.info("++++++++++++++Got an SiteNodeVersionImpl notification - just adding it as SiteNodeImpl: " + newNotificationMessage.getClassName() + ":" + newNotificationMessage.getObjectId());
  964. baseEntitiesToIndexMessageList.add(newNotificationMessage);
  965. existingSignatures.add(key);
  966. }
  967. else
  968. {
  969. logger.info("++++++++++++++Skipping notification - duplicate existed: " + notificationMessage.getClassName() + ":" + notificationMessage.getObjectId());
  970. }
  971. }
  972. catch(Exception e)
  973. {
  974. logger.warn("Got an error handling SiteNodeVersion with ID: " + notificationMessage.getObjectId() + ":" + e.getMessage());
  975. }
  976. }
  977. else if(notificationMessage.getClassName().equals(SiteNodeImpl.class.getName()) || notificationMessage.getClassName().equals(SiteNode.class.getName()) || notificationMessage.getClassName().equals(SmallSiteNodeImpl.class.getName()) || notificationMessage.getClassName().equals(PureSiteNodeImpl.class.getName()))
  978. {
  979. if(!existingSignatures.contains(key))
  980. {
  981. logger.info("++++++++++++++Got an Page notification - just adding it: " + notificationMessage.getClassName() + ":" + notificationMessage.getObjectId());
  982. baseEntitiesToIndexMessageList.add(notificationMessage);
  983. existingSignatures.add(key);
  984. }
  985. else
  986. {
  987. logger.info("++++++++++++++Skipping notification - duplicate existed: " + notificationMessage.getClassName() + ":" + notificationMessage.getObjectId());
  988. }
  989. }
  990. else
  991. {
  992. NotificationMessage newNotificationMessage = new NotificationMessage("" + notificationMessage.getName(), DigitalAssetImpl.class.getName(), "SYSTEM", notificationMessage.getType(), notificationMessage.getObjectId(), "" + notificationMessage.getName());
  993. key = "" + newNotificationMessage.getClassName() + "_" + newNotificationMessage.getObjectId() + "_" + "_" + newNotificationMessage.getType();
  994. if(!existingSignatures.contains(key))
  995. {
  996. logger.info("++++++++++++++Got an Content notification - just adding it: " + notificationMessage.getClassName() + ":" + notificationMessage.getObjectId());
  997. baseEntitiesToIndexMessageList.add(newNotificationMessage);
  998. existingSignatures.add(key);
  999. }
  1000. else
  1001. {
  1002. logger.info("++++++++++++++Skipping notification - duplicate existed: " + notificationMessage.getClassName() + ":" + notificationMessage.getObjectId());
  1003. }
  1004. }
  1005. }
  1006. }
  1007. internalMessageList = baseEntitiesToIndexMessageList;
  1008. logger.info("After in [" + CmsPropertyHandler.getContextRootPath() + "]:" + internalMessageList.size() + ":" + existingSignatures.size());
  1009. try
  1010. {
  1011. logger.info("notifyListeners actually running");
  1012. if(getIsIndexedLocked())
  1013. {
  1014. logger.warn("The index should not be locked as no indexing is registered to be carried out. Lets unlock it as it may be the result of a crash.");
  1015. unlockIndex();
  1016. }
  1017. //logger.error("Starting indexin of " + qeuedMessages.size());
  1018. Timer t = new Timer();
  1019. IndexWriter writer = getIndexWriter();
  1020. //t.printElapsedTime("Creating writer took");
  1021. Database db = CastorDatabaseService.getDatabase();
  1022. beginTransaction(db);
  1023. try
  1024. {
  1025. int numberOfMessages = internalMessageList.size();
  1026. Iterator internalMessageListIterator = internalMessageList.iterator();
  1027. while(internalMessageListIterator.hasNext())
  1028. {
  1029. NotificationMessage notificationMessage = (NotificationMessage)internalMessageListIterator.next();
  1030. try
  1031. {
  1032. if(logger.isInfoEnabled())
  1033. logger.info("Starting indexin of " + notificationMessage);
  1034. indexInformation(notificationMessage, writer, internalMessageList, forceVersionIndexing, db);
  1035. internalMessageListIterator.remove();
  1036. }
  1037. catch (Exception e)
  1038. {
  1039. e.printStackTrace();
  1040. }
  1041. }
  1042. //t.printElapsedTime("Indexing " + numberOfMessages + " documents took");
  1043. //Map<String,String> commitUserData = new HashMap<String,String>();
  1044. //internalMessageList.clear();
  1045. //writer.commit(commitUserData);
  1046. logger.info("##############lastCommitedContentVersionId before close:" + lastCommitedContentVersionId);
  1047. if(lastCommitedContentVersionId > -1)
  1048. {
  1049. Integer previousLastCommittedContentVersionId = getLastCommitedContentVersionId();
  1050. logger.info("##############previousLastCommittedContentVersionId before close:" + previousLastCommittedContentVersionId);
  1051. if(previousLastCommittedContentVersionId < lastCommitedContentVersionId)
  1052. {
  1053. try
  1054. {
  1055. logger.info("*************ADDING status doc " + lastCommitedContentVersionId + "**************");
  1056. setLastCommitedContentVersionId(writer, lastCommitedContentVersionId);
  1057. }
  1058. catch (Exception e)
  1059. {
  1060. logger.error("*************ERROR: ADDING status doc**************", e);
  1061. }
  1062. }
  1063. else
  1064. {
  1065. logger.warn("The content version was not a higher number than what was allready indexed - lets not add status....");
  1066. }
  1067. }
  1068. commitTransaction(db);
  1069. }
  1070. catch(Exception e)
  1071. {
  1072. logger.error("An error occurred so we should not complete the transaction:" + e.getMessage(), e);
  1073. rollbackTransaction(db);
  1074. }
  1075. finally
  1076. {
  1077. writer.commit();
  1078. //writer.close(true);
  1079. }
  1080. logger.info("OOOOOOOOOOOOOO:" + getLastCommitedContentVersionId());
  1081. }
  1082. catch (Exception e)
  1083. {
  1084. logger.error("Error indexing notifications:" + e.getMessage());
  1085. logger.warn("Error indexing notifications:" + e.getMessage(), e);
  1086. }
  1087. finally
  1088. {
  1089. logger.info("Releasing indexing flag");
  1090. try
  1091. {
  1092. if(internalMessageList.size() > 0)
  1093. {
  1094. synchronized (qeuedMessages)
  1095. {
  1096. logger.info("Returning internalMessageList:" + internalMessageList.size() + " to qeuedMessages as some failed.");
  1097. qeuedMessages.addAll(internalMessageList);
  1098. internalMessageList.clear();
  1099. }
  1100. }
  1101. }
  1102. catch (Exception e)
  1103. {
  1104. e.printStackTrace();
  1105. }
  1106. if(checkForIndexingJobs)
  1107. {
  1108. this.indexingInitialized.set(false);
  1109. finishDoneLocally = true;
  1110. }
  1111. }
  1112. if(initDoneLocally && !finishDoneLocally)
  1113. logger.error("EEEEEEEEEEEEEEERRRRRRRRRRRRRRROOOOOOOOOOOORRRRRRRR aaaaaaa");
  1114. logger.info("internalMessageList 1:" + internalMessageList.size() + " / " + qeuedMessages.size());
  1115. }
  1116. else
  1117. {
  1118. logger.info("------------------------------->Indexing job allready running... skipping in " + CmsPropertyHandler.getContextRootPath());
  1119. }
  1120. logger.info("queued messages 1:" + qeuedMessages.size());
  1121. }
  1122. public void index() throws Exception
  1123. {
  1124. if(!CmsPropertyHandler.getInternalSearchEngine().equalsIgnoreCase("lucene"))
  1125. return;
  1126. logger.info("Start index: " + CmsPropertyHandler.getContextRootPath() + "/" + deleteIndexOnStop.get() + "/" + stopIndexing.get());
  1127. if(deleteIndexOnStop.get())
  1128. {
  1129. clearIndex();
  1130. deleteIndexOnStop.set(false);
  1131. stopIndexing.set(false);
  1132. }
  1133. else
  1134. {
  1135. stopIndexing.set(false);
  1136. }
  1137. logger.info("################# starting index");
  1138. //if (indexStarted.compareAndSet(false, true))
  1139. //{
  1140. IndexReader indexReader = null;
  1141. try
  1142. {
  1143. Integer lastCommitedContentVersionId = getLastCommitedContentVersionId();
  1144. Document indexAllDocumentMetaData = getIndexAllStatusDocument();
  1145. //Integer previousIndexAllLastContentVersionId = getIndexAllLastCommittedContentVersionId();
  1146. logger.info("lastCommitedContentVersionId:" + lastCommitedContentVersionId);
  1147. Date lastCommitedModifiedDate = getLastCommitedModifiedDate();
  1148. Calendar yesterday = Calendar.getInstance();
  1149. yesterday.add(Calendar.HOUR_OF_DAY, -1);
  1150. logger.info("lastCommitedContentVersionId: " + lastCommitedContentVersionId);
  1151. logger.info("lastCommitedModifiedDate: " + lastCommitedModifiedDate);
  1152. indexReader = getIndexReader();
  1153. boolean didIndex = false;
  1154. if(lastCommitedContentVersionId == -1 || indexAllDocumentMetaData != null || indexReader.numDocs() < 100)
  1155. {
  1156. logger.info("indexAll as it seemed to be not ready.....");
  1157. logger.info("###########################IndexAll");
  1158. didIndex = indexAll();
  1159. }
  1160. else //Skipping indexing for now..
  1161. {
  1162. logger.info("###########################indexIncremental");
  1163. didIndex = indexIncremental(lastCommitedContentVersionId, yesterday.getTime());
  1164. }
  1165. if(didIndex)
  1166. {
  1167. CacheController.clearCache("pageCache");
  1168. CacheController.clearCache("pageCacheExtra");
  1169. }
  1170. }
  1171. catch (Exception e)
  1172. {
  1173. logger.error("Error indexing notifications:" + e.getMessage());
  1174. logger.warn("Error indexing notifications:" + e.getMessage(), e);
  1175. }
  1176. /*
  1177. }
  1178. else
  1179. {
  1180. logger.error("################# skipping index, was allready started");
  1181. }
  1182. */
  1183. }
  1184. public boolean indexIncremental(Integer lastCommitedContentVersionId, Date lastCommitedDateTime) throws Exception
  1185. {
  1186. if(!CmsPropertyHandler.getInternalSearchEngine().equalsIgnoreCase("lucene"))
  1187. return false;
  1188. Timer t = new Timer();
  1189. Timer t2 = new Timer();
  1190. logger.info("Indexing incremental:" + lastCommitedContentVersionId + "/" + lastCommitedDateTime);
  1191. //Map<String,String> lastCommitData = reader.getCommitUserData();
  1192. List<LanguageVO> languageVOList = LanguageController.getController().getLanguageVOList();
  1193. Iterator<LanguageVO> languageVOListIterator = languageVOList.iterator();
  1194. outer:while(languageVOListIterator.hasNext())
  1195. {
  1196. LanguageVO languageVO = (LanguageVO)languageVOListIterator.next();
  1197. List<NotificationMessage> notificationMessages = new ArrayList<NotificationMessage>();
  1198. //logger.error("Getting notification messages for " + languageVO.getName());
  1199. int newLastContentVersionId = getNotificationMessages(notificationMessages, languageVO, lastCommitedContentVersionId, lastCommitedDateTime, 1000);
  1200. while(newLastContentVersionId != -1)
  1201. {
  1202. Thread.sleep(5000);
  1203. if(stopIndexing.get())
  1204. break outer;
  1205. logger.info("Queueing " + notificationMessages.size() + " notificationMessages for indexing");
  1206. for(NotificationMessage notificationMessage : notificationMessages)
  1207. {
  1208. notify(notificationMessage);
  1209. }
  1210. notifyListeners(true, false);
  1211. notificationMessages.clear();
  1212. //t.printElapsedTime("Indexing size():" + notificationMessages.size() + " took");
  1213. Integer newLastContentVersionIdCandidate = getNotificationMessages(notificationMessages, languageVO, newLastContentVersionId, lastCommitedDateTime, 1000);
  1214. logger.info("newLastContentVersionIdCandidate:" + newLastContentVersionIdCandidate + "=" + newLastContentVersionId);
  1215. if(newLastContentVersionIdCandidate > newLastContentVersionId)
  1216. newLastContentVersionId = newLastContentVersionIdCandidate;
  1217. else
  1218. break;
  1219. //t.printElapsedTime("newLastContentVersionId:" + newLastContentVersionId + " took");
  1220. }
  1221. }
  1222. if(logger.isInfoEnabled())
  1223. t2.printElapsedTime("All indexing took");
  1224. return true;
  1225. }
  1226. private int getNotificationMessagesForStructure(List<NotificationMessage> notificationMessages, LanguageVO languageVO, int lastSiteNodeVersionId) throws Exception
  1227. {
  1228. Timer t = new Timer();
  1229. logger.info("getNotificationMessages:" + lastSiteNodeVersionId);
  1230. int newLastSiteNodeVersionId = -1;
  1231. Database db = CastorDatabaseService.getDatabase();
  1232. try
  1233. {
  1234. beginTransaction(db);
  1235. ContentTypeDefinitionVO contentTypeDefinitionVO = ContentTypeDefinitionController.getController().getContentTypeDefinitionVOWithName("Meta info", db);
  1236. ContentVersionVO lastContentVersionVO = ContentVersionController.getContentVersionController().getLatestContentVersionVO(languageVO.getId(), db);
  1237. Integer maxContentVersionId = (lastContentVersionVO == null ? 1000 : lastContentVersionVO.getId());
  1238. logger.info("maxContentVersionId:" + maxContentVersionId + " for " + languageVO.getName());
  1239. List<ContentVersionVO> versions = new ArrayList<ContentVersionVO>();
  1240. if(CmsPropertyHandler.getApplicationName().equalsIgnoreCase("cms"))
  1241. {
  1242. versions = ContentVersionController.getContentVersionController().getContentVersionVOList(contentTypeDefinitionVO.getId(), null, languageVO.getId(), false, 0, newLastSiteNodeVersionId, numberOfVersionToIndexInBatch, numberOfVersionToIndexInBatch*10, true, db, true, maxContentVersionId);
  1243. }
  1244. else
  1245. {
  1246. versions = ContentVersionController.getContentVersionController().getContentVersionVOList(contentTypeDefinitionVO.getId(), null, languageVO.getId(), false, Integer.parseInt(CmsPropertyHandler.getOperatingMode()), newLastSiteNodeVersionId, numberOfVersionToIndexInBatch, numberOfVersionToIndexInBatch*10, true, db, true, maxContentVersionId);
  1247. }
  1248. RequestAnalyser.getRequestAnalyser().registerComponentStatistics("Index all : getContentVersionVOList", t.getElapsedTime());
  1249. logger.info("versions in getNotificationMessagesForStructure:" + versions.size());
  1250. logger.info("Looping versions:" + versions.size());
  1251. for(ContentVersionVO version : versions)
  1252. {
  1253. NotificationMessage notificationMessage = new NotificationMessage("LuceneController", ContentVersionImpl.class.getName(), "SYSTEM", NotificationMessage.TRANS_UPDATE, version.getId(), "dummy");
  1254. notificationMessages.add(notificationMessage);
  1255. newLastSiteNodeVersionId = version.getId().intValue();
  1256. }
  1257. logger.info("Finished round 1:" + notificationMessages.size() + ":" + newLastSiteNodeVersionId);
  1258. }
  1259. catch ( Exception e )
  1260. {
  1261. rollbackTransaction(db);
  1262. throw new SystemException("An error occurred when we tried to fetch a list of users in this role. Reason:" + e.getMessage(), e);
  1263. }
  1264. commitTransaction(db);
  1265. return newLastSiteNodeVersionId;
  1266. }
  1267. private int getContentNotificationMessages(LanguageVO languageVO, int lastContentVersionId) throws Exception
  1268. {
  1269. Timer t = new Timer();
  1270. logger.info("getNotificationMessages:" + languageVO.getName() + " : " + lastContentVersionId);
  1271. logger.info("notifyListeners actually running");
  1272. if(getIsIndexedLocked())
  1273. {
  1274. logger.info("The index should not be locked as no indexing is registered to be carried out. Lets unlock it as it may be the result of a crash.");
  1275. unlockIndex();
  1276. }
  1277. IndexWriter writer = getIndexWriter();
  1278. //t.printElapsedTime("Creating writer took");
  1279. int newLastContentVersionId = -1;
  1280. Database db = CastorDatabaseService.getDatabase();
  1281. try
  1282. {
  1283. beginTransaction(db);
  1284. logger.info("lastContentVersionId:" + lastContentVersionId);
  1285. if(lastContentVersionId < 1)
  1286. {
  1287. SmallestContentVersionVO firstContentVersionVO = ContentVersionController.getContentVersionController().getFirstContentVersionId(languageVO.getId(), db);
  1288. if(firstContentVersionVO != null)
  1289. lastContentVersionId = firstContentVersionVO.getId();
  1290. }
  1291. logger.info("lastContentVersionId 2:" + lastContentVersionId);
  1292. ContentTypeDefinitionVO contentTypeDefinitionVO = ContentTypeDefinitionController.getController().getContentTypeDefinitionVOWithName("Meta info", db);
  1293. ContentVersionVO lastContentVersionVO = ContentVersionController.getContentVersionController().getLatestContentVersionVO(languageVO.getId(), db);
  1294. Integer maxContentVersionId = (lastContentVersionVO == null ? 1000 : lastContentVersionVO.getId());
  1295. logger.info("maxContentVersionId 1:" + maxContentVersionId + " for " + languageVO.getName());
  1296. List<ContentVersionVO> versions = new ArrayList<ContentVersionVO>();
  1297. if(CmsPropertyHandler.getApplicationName().equalsIgnoreCase("cms"))
  1298. {
  1299. versions = ContentVersionController.getContentVersionController().getContentVersionVOList(null, contentTypeDefinitionVO.getId(), languageVO.getId(), false, 0, lastContentVersionId, numberOfVersionToIndexInBatch, numberOfVersionToIndexInBatch*10, true, db, false, maxContentVersionId);
  1300. }
  1301. else
  1302. {
  1303. versions = ContentVersionController.getContentVersionController().getContentVersionVOList(null, contentTypeDefinitionVO.getId(), languageVO.getId(), false, Integer.parseInt(CmsPropertyHandler.getOperatingMode()), lastContentVersionId, numberOfVersionToIndexInBatch, numberOfVersionToIndexInBatch*10, true, db, false, maxContentVersionId);
  1304. }
  1305. RequestAnalyser.getRequestAnalyser().registerComponentStatistics("Index all : getContentVersionVOList", t.getElapsedTime());
  1306. logger.info("versions in getContentNotificationMessages:" + versions.size());
  1307. logger.info("Looping versions:" + versions.size());
  1308. for(ContentVersionVO version : versions)
  1309. {
  1310. if(stopIndexing.get())
  1311. return newLastContentVersionId;
  1312. Document document = getDocumentFromContentVersion(version, db);
  1313. String uid = document.get("uid");
  1314. logger.info("document: " + document);
  1315. writer.deleteDocuments(new Term("uid", "" + uid));
  1316. if(logger.isDebugEnabled())
  1317. logger.debug("Adding document with uid:" + uid + " - " + document);
  1318. if(document != null)
  1319. writer.addDocument(document);
  1320. logger.info("version assetCount:" + version.getAssetCount());
  1321. if(version.getAssetCount() == null || version.getAssetCount() > 0)
  1322. {
  1323. List digitalAssetVOList = DigitalAssetController.getDigitalAssetVOList(version.getId(), db);
  1324. RequestAnalyser.getRequestAnalyser().registerComponentStatistics("getDigitalAssetVOList", (t.getElapsedTimeNanos() / 1000));
  1325. if(digitalAssetVOList.size() > 0)
  1326. {
  1327. logger.info("digitalAssetVOList:" + digitalAssetVOList.size());
  1328. Iterator digitalAssetVOListIterator = digitalAssetVOList.iterator();
  1329. while(digitalAssetVOListIterator.hasNext())
  1330. {
  1331. DigitalAssetVO assetVO = (DigitalAssetVO)digitalAssetVOListIterator.next();
  1332. Document assetDocument = getDocumentFromDigitalAsset(assetVO, version, db);
  1333. String assetUid = assetDocument.get("uid");
  1334. writer.deleteDocuments(new Term("uid", "" + assetUid));
  1335. if(logger.isDebugEnabled())
  1336. logger.debug("Adding document with assetUid:" + assetUid + " - " + assetDocument);
  1337. if(assetDocument != null)
  1338. writer.addDocument(assetDocument);
  1339. }
  1340. }
  1341. }
  1342. newLastContentVersionId = version.getId().intValue();
  1343. }
  1344. //logger.info("Finished round 2:" + notificationMessages.size() + ":" + newLastContentVersionId);
  1345. }
  1346. catch ( Exception e )
  1347. {
  1348. logger.error("Error in lucene indexing: " + e.getMessage(), e);
  1349. rollbackTransaction(db);
  1350. throw new SystemException("An error occurred when we tried to getContentNotificationMessages. Reason:" + e.getMessage(), e);
  1351. }
  1352. finally
  1353. {
  1354. try{setLastCommitedContentVersionId(writer, newLastContentVersionId); writer.commit(); /*writer.close(true);*/}catch (Exception e) {e.printStackTrace();}
  1355. }
  1356. commitTransaction(db);
  1357. return newLastContentVersionId;
  1358. }
  1359. private int getPageNotificationMessages(List notificationMessages, LanguageVO languageVO, int lastContentVersionId) throws Exception
  1360. {
  1361. Timer t = new Timer();
  1362. logger.info("getNotificationMessages:" + languageVO.getName() + " : " + lastContentVersionId);
  1363. logger.info("notifyListeners actually running");
  1364. if(getIsIndexedLocked())
  1365. {
  1366. logger.info("The index should not be locked as no indexing is registered to be carried out. Lets unlock it as it may be the result of a crash.");
  1367. unlockIndex();
  1368. }
  1369. IndexWriter writer = getIndexWriter();
  1370. //t.printElapsedTime("Creating writer took");
  1371. int newLastContentVersionId = -1;
  1372. Database db = CastorDatabaseService.getDatabase();
  1373. try
  1374. {
  1375. beginTransaction(db);
  1376. ContentTypeDefinitionVO contentTypeDefinitionVO = ContentTypeDefinitionController.getController().getContentTypeDefinitionVOWithName("Meta info", db);
  1377. ContentVersionVO lastContentVersionVO = ContentVersionController.getContentVersionController().getLatestContentVersionVO(languageVO.getId(), db);
  1378. Integer maxContentVersionId = (lastContentVersionVO == null ? 1000 : lastContentVersionVO.getId());
  1379. logger.info("maxContentVersionId:" + maxContentVersionId + " for " + languageVO.getName());
  1380. List<ContentVersionVO> versions = new ArrayList<ContentVersionVO>();
  1381. if(CmsPropertyHandler.getApplicationName().equalsIgnoreCase("cms"))
  1382. {
  1383. versions = ContentVersionController.getContentVersionController().getContentVersionVOList(contentTypeDefinitionVO.getId(), null, languageVO.getId(), false, 0, lastContentVersionId, numberOfVersionToIndexInBatch, numberOfVersionToIndexInBatch*10, true, db, true, maxContentVersionId);
  1384. }
  1385. else
  1386. {
  1387. versions = ContentVersionController.getContentVersionController().getContentVersionVOList(contentTypeDefinitionVO.getId(), null, languageVO.getId(), false, Integer.parseInt(CmsPropertyHandler.getOperatingMode()), lastContentVersionId, numberOfVersionToIndexInBatch, numberOfVersionToIndexInBatch*10, true, db, true, maxContentVersionId);
  1388. }
  1389. logger.info("versions:" + versions.size());
  1390. RequestAnalyser.getRequestAnalyser().registerComponentStatistics("Index all : getContentVersionVOList", t.getElapsedTime());
  1391. logger.info("versions in getContentNotificationMessages:" + versions.size());
  1392. logger.info("Looping versions:" + versions.size());
  1393. for(ContentVersionVO version : versions)
  1394. {
  1395. if(stopIndexing.get())
  1396. return newLastContentVersionId;
  1397. Document documents = getSiteNodeDocument(version, writer, db);
  1398. if (documents != null)
  1399. {
  1400. String uid = documents.get("uid");
  1401. logger.debug("Regging doc: " + documents);
  1402. writer.deleteDocuments(new Term("uid", "" + uid));
  1403. if(logger.isDebugEnabled())
  1404. logger.debug("Adding document with uid:" + uid + " - " + documents);
  1405. writer.addDocument(documents);
  1406. }
  1407. else if(logger.isInfoEnabled())
  1408. {
  1409. logger.info("Failed to get document for SiteNode. Meta info content.id: " + version.getContentVersionId());
  1410. }
  1411. /*
  1412. logger.info("version assetCount:" + version.getAssetCount());
  1413. if(version.getAssetCount() == null || version.getAssetCount() > 0)
  1414. {
  1415. List digitalAssetVOList = DigitalAssetController.getDigitalAssetVOList(version.getId(), db);
  1416. RequestAnalyser.getRequestAnalyser().registerComponentStatistics("getDigitalAssetVOList", (t.getElapsedTimeNanos() / 1000));
  1417. if(digitalAssetVOList.size() > 0)
  1418. {
  1419. logger.info("digitalAssetVOList:" + digitalAssetVOList.size());
  1420. Iterator digitalAssetVOListIterator = digitalAssetVOList.iterator();
  1421. while(digitalAssetVOListIterator.hasNext())
  1422. {
  1423. DigitalAssetVO assetVO = (DigitalAssetVO)digitalAssetVOListIterator.next();
  1424. NotificationMessage assetNotificationMessage = new NotificationMessage("LuceneController", DigitalAssetImpl.class.getName(), "SYSTEM", NotificationMessage.TRANS_UPDATE, assetVO.getId(), "dummy");
  1425. notificationMessages.add(assetNotificationMessage);
  1426. }
  1427. }
  1428. }
  1429. NotificationMessage notificationMessage = new NotificationMessage("LuceneController", ContentVersionImpl.class.getName(), "SYSTEM", NotificationMessage.TRANS_UPDATE, version.getId(), "dummy");
  1430. notificationMessages.add(notificationMessage);
  1431. */
  1432. newLastContentVersionId = version.getId().intValue();
  1433. }
  1434. logger.info("Finished round 3:" + notificationMessages.size() + ":" + newLastContentVersionId);
  1435. }
  1436. catch ( Exception e )
  1437. {
  1438. rollbackTransaction(db);
  1439. throw new SystemException("An error occurred when we tried to fetch a list of users in this role. Reason:" + e.getMessage(), e);
  1440. }
  1441. finally
  1442. {
  1443. try{setLastCommitedContentVersionId(writer, newLastContentVersionId); writer.commit(); /*writer.close(true);*/}catch (Exception e) {e.printStackTrace();}
  1444. }
  1445. commitTransaction(db);
  1446. return newLastContentVersionId;
  1447. }
  1448. public void testSQL()
  1449. {
  1450. try {
  1451. getNotificationMessages(new ArrayList(), LanguageController.getController().getLanguageVOWithCode("sv"), 100000, new Date(), 1000);
  1452. } catch (Exception e) {
  1453. // TODO Auto-generated catch block
  1454. e.printStackTrace();
  1455. logger.error("Errro:" + e.getMessage(), e);
  1456. }
  1457. }
  1458. private int getNotificationMessages(List notificationMessages, LanguageVO languageVO, int lastContentVersionId, Date lastCheckDateTime, int batchSize) throws Exception
  1459. {
  1460. Timer t = new Timer();
  1461. logger.info("getNotificationMessages:" + languageVO.getName() + " : " + lastContentVersionId + ":" + lastCheckDateTime);
  1462. int newLastContentVersionId = -1;
  1463. Database db = CastorDatabaseService.getDatabase();
  1464. try
  1465. {
  1466. beginTransaction(db);
  1467. logger.info("**************Getting contents start:" + t.getElapsedTime() + ":" + lastCheckDateTime);
  1468. Calendar date = Calendar.getInstance();
  1469. date.setTime(lastCheckDateTime);
  1470. date.add(Calendar.DAY_OF_YEAR, -1);
  1471. //String SQL = "select cv.contentVersionId, cv.stateId, cv.modifiedDateTime, cv.versionComment, cv.isCheckedOut, cv.isActive, cv.contentId, cv.languageId, cv.versionModifier FROM cmContentVersion cv where cv.languageId = $1 AND cv.isActive = $2 AND ((cv.contentVersionId > $3 AND cv.contentVersionId < $4) OR cv.modifiedDateTime > $5) ORDER BY cv.contentVersionId";
  1472. //if(CmsPropertyHandler.getUseShortTableNames() != null && CmsPropertyHandler.getUseShortTableNames().equalsIgnoreCase("true"))
  1473. // SQL = "select cv.contVerId, cv.stateId, cv.modifiedDateTime, cv.verComment, cv.isCheckedOut, cv.isActive, cv.contId, cv.languageId, cv.versionModifier FROM cmContVer cv where cv.languageId = $1 AND cv.isActive = $2 AND ((cv.contVerId > $3 AND cv.contVerId < $4) OR cv.modifiedDateTime > TO_DATE('2013-03-20','YYYY-MM-DD')) ORDER BY cv.contVerId";
  1474. //System.out.println("SQL:" + SQL);
  1475. //OQLQuery oql = db.getOQLQuery("CALL SQL " + SQL + " AS org.infoglue.cms.entities.content.impl.simple.SmallestContentVersionImpl");
  1476. //if(CmsPropertyHandler.getUseShortTableNames() != null && CmsPropertyHandler.getUseShortTableNames().equalsIgnoreCase("true"))
  1477. // oql = db.getOQLQuery("CALL SQL " + SQL + " AS org.infoglue.cms.entities.content.impl.simple.SmallestContentVersionImpl");
  1478. //oracle.sql.DATE oracleDate = new oracle.sql.DATE(new java.sql.Date(date.getTime().getTime()));
  1479. OQLQuery oql = db.getOQLQuery( "SELECT cv FROM " + SmallestContentVersionImpl.class.getName() + " cv WHERE cv.languageId = $1 AND cv.isActive = $2 AND ((cv.contentVersionId > $3 AND cv.contentVersionId < $4) OR cv.modifiedDateTime > $5) ORDER BY cv.contentVersionId limit $6");
  1480. //OQLQuery oql = db.getOQLQuery( "SELECT cv FROM " + SmallestContentVersionImpl.class.getName() + " cv WHERE cv.languageId = $1 AND cv.isActive = $2 AND ((cv.contentVersionId > $3 AND cv.contentVersionId < $4)) ORDER BY cv.contentVersionId limit $5");
  1481. oql.bind(languageVO.getId());
  1482. oql.bind(true);
  1483. oql.bind(lastContentVersionId);
  1484. oql.bind(lastContentVersionId+(batchSize*10));
  1485. //oql.bind(date.getTime());
  1486. oql.bind(date.getTime());
  1487. oql.bind(batchSize);
  1488. QueryResults results = oql.execute(Database.READONLY);
  1489. if(logger.isInfoEnabled())
  1490. logger.info("Getting contents took: " + t.getElapsedTime());
  1491. int processedItems = 0;
  1492. Integer previousContentId = null;
  1493. while (results.hasMore())
  1494. {
  1495. SmallestContentVersionImpl smallestContentVersionImpl = (SmallestContentVersionImpl)results.next();
  1496. if(previousContentId == null || !previousContentId.equals(smallestContentVersionImpl.getContentId()))
  1497. {
  1498. List digitalAssetVOList = DigitalAssetController.getDigitalAssetVOList(smallestContentVersionImpl.getId(), db);
  1499. if(digitalAssetVOList.size() > 0)
  1500. {
  1501. logger.info("digitalAssetVOList:" + digitalAssetVOList.size());
  1502. Iterator digitalAssetVOListIterator = digitalAssetVOList.iterator();
  1503. while(digitalAssetVOListIterator.hasNext())
  1504. {
  1505. DigitalAssetVO assetVO = (DigitalAssetVO)digitalAssetVOListIterator.next();
  1506. if(assetVO.getAssetFileSize() < 10000000) //Do not index large files
  1507. {
  1508. NotificationMessage assetNotificationMessage = new NotificationMessage("LuceneController", DigitalAssetImpl.class.getName(), "SYSTEM", NotificationMessage.TRANS_UPDATE, assetVO.getId(), "dummy");
  1509. notificationMessages.add(assetNotificationMessage);
  1510. }
  1511. }
  1512. }
  1513. NotificationMessage notificationMessage = new NotificationMessage("LuceneController", ContentVersionImpl.class.getName(), "SYSTEM", NotificationMessage.TRANS_UPDATE, smallestContentVersionImpl.getId(), "dummy");
  1514. notificationMessages.add(notificationMessage);
  1515. previousContentId = smallestContentVersionImpl.getContentId();
  1516. }
  1517. newLastContentVersionId = smallestContentVersionImpl.getId().intValue();
  1518. lastCommitedContentVersionId = newLastContentVersionId;
  1519. processedItems++;
  1520. logger.info("previousContentId:" + previousContentId + "/" + processedItems);
  1521. if(processedItems > batchSize)
  1522. {
  1523. System.out.println("Batch full...");
  1524. break;
  1525. }
  1526. }
  1527. results.close();
  1528. logger.info("Finished round 4:" + processedItems + ":" + newLastContentVersionId);
  1529. }
  1530. catch ( Exception e )
  1531. {
  1532. rollbackTransaction(db);
  1533. throw new SystemException("An error occurred when we tried to fetch a list of users in this role. Reason:" + e.getMessage(), e);
  1534. }
  1535. commitTransaction(db);
  1536. return newLastContentVersionId;
  1537. }
  1538. private void indexInformation(NotificationMessage notificationMessage, IndexWriter writer, List<NotificationMessage> internalMessageList, Boolean forceVersionIndexing, Database db)
  1539. {
  1540. Timer t = new Timer();
  1541. try
  1542. {
  1543. try
  1544. {
  1545. //writer.setMaxMergeDocs(500000);
  1546. if(logger.isInfoEnabled())
  1547. logger.info("Indexing to directory '" + writer.getDirectory().toString() + "'...");
  1548. List<Document> documents = getDocumentsForIncremental(notificationMessage, writer, forceVersionIndexing, db);
  1549. Iterator<Document> documentsIterator = documents.iterator();
  1550. while(documentsIterator.hasNext())
  1551. {
  1552. Document indexingDocument = documentsIterator.next();
  1553. String uid = indexingDocument.get("uid");
  1554. if(logger.isDebugEnabled())
  1555. logger.debug("Adding document with uid:" + uid + " - " + indexingDocument);
  1556. //logger.error("Adding document with uid:" + uid + " - " + indexingDocument);
  1557. if(indexingDocument != null)
  1558. writer.addDocument(indexingDocument);
  1559. }
  1560. }
  1561. catch (Exception e)
  1562. {
  1563. logger.error("Error indexing:" + e.getMessage(), e);
  1564. }
  1565. finally
  1566. {
  1567. indexedDocumentsSinceLastOptimize++;
  1568. if(indexedDocumentsSinceLastOptimize > 1000)
  1569. {
  1570. indexedDocumentsSinceLastOptimize = 0;
  1571. }
  1572. }
  1573. }
  1574. catch (Exception e)
  1575. {
  1576. logger.error("Error indexing:" + e.getMessage(), e);
  1577. }
  1578. }
  1579. private List<Document> getDocumentsForIncremental(NotificationMessage notificationMessage, IndexWriter writer, Boolean forceVersionIndexing, Database db) throws Exception
  1580. {
  1581. Timer t = new Timer();
  1582. List<Document> returnDocuments = new ArrayList<Document>();
  1583. logger.info("2222222222 notificationMessage.getClassName():" + notificationMessage.getClassName() + " in " + CmsPropertyHandler.getApplicationName());
  1584. Set<Integer> contentIdsToIndex = new HashSet<Integer>();
  1585. Set<Integer> siteNodeIdsToIndex = new HashSet<Integer>();
  1586. if(notificationMessage.getClassName().equals(ContentImpl.class.getName()) || notificationMessage.getClassName().equals(Content.class.getName()))
  1587. {
  1588. logger.info("++++++++++++++Got an Content notification: " + notificationMessage.getObjectId());
  1589. ContentVO contentVO = ContentController.getContentController().getContentVOWithId((Integer)notificationMessage.getObjectId(), db);
  1590. //ContentVO contentVO = ContentController.getContentController().getContentVOWithId((Integer)notificationMessage.getObjectId());
  1591. RequestAnalyser.getRequestAnalyser().registerComponentStatistics("getContentVOWithId", (t.getElapsedTimeNanos() / 1000));
  1592. contentIdsToIndex.add(contentVO.getId());
  1593. }
  1594. else if(notificationMessage.getClassName().equals(ContentVersionImpl.class.getName()) || notificationMessage.getClassName().equals(ContentVersion.class.getName()))
  1595. {
  1596. logger.info("++++++++++++++Got an ContentVersion notification: " + notificationMessage.getObjectId());
  1597. ContentVersionVO contentVersionVO = ContentVersionController.getContentVersionController().getContentVersionVOWithId((Integer)notificationMessage.getObjectId(), db);
  1598. RequestAnalyser.getRequestAnalyser().registerComponentStatistics("getContentVersionVOWithId", t.getElapsedTime());
  1599. contentIdsToIndex.add(contentVersionVO.getContentId());
  1600. }
  1601. else if(notificationMessage.getClassName().equals(DigitalAssetImpl.class.getName()) || notificationMessage.getClassName().equals(DigitalAsset.class.getName()))
  1602. {
  1603. logger.info("++++++++++++++Got an DigitalAssetImpl notification: " + notificationMessage.getObjectId());
  1604. Database db2 = CastorDatabaseService.getDatabase();
  1605. beginTransaction(db2);
  1606. try
  1607. {
  1608. DigitalAssetVO asset = DigitalAssetController.getController().getLocklessSmallDigitalAssetVOWithId((Integer)notificationMessage.getObjectId(), db2);
  1609. //DigitalAssetVO asset = DigitalAssetController.getSmallDigitalAssetVOWithId((Integer)notificationMessage.getObjectId(), db2);
  1610. if(asset != null)
  1611. {
  1612. List<SmallestContentVersionVO> contentVersionVOList = DigitalAssetController.getContentVersionVOListConnectedToAssetWithId((Integer)notificationMessage.getObjectId());
  1613. if(logger.isInfoEnabled())
  1614. logger.info("contentVersionVOList:" + contentVersionVOList.size());
  1615. Iterator<SmallestContentVersionVO> contentVersionsIterator = contentVersionVOList.iterator();
  1616. while(contentVersionsIterator.hasNext())
  1617. {
  1618. SmallestContentVersionVO version = contentVersionsIterator.next();
  1619. RequestAnalyser.getRequestAnalyser().registerComponentStatistics("contentVersionsIterator", t.getElapsedTime());
  1620. ContentVersionVO cvVO = ContentVersionController.getContentVersionController().getContentVersionVOWithId(version.getId(), db2);
  1621. Document document = getDocumentFromDigitalAsset(asset, cvVO, db);
  1622. RequestAnalyser.getRequestAnalyser().registerComponentStatistics("getDocumentFromDigitalAsset", t.getElapsedTime());
  1623. logger.info("00000000000000000: Adding asset document:" + document);
  1624. if(document != null)
  1625. returnDocuments.add(document);
  1626. }
  1627. }
  1628. else
  1629. {
  1630. logger.info("Asset was probably deleted - ignoring it.");
  1631. }
  1632. commitTransaction(db2);
  1633. }
  1634. catch(Exception e)
  1635. {
  1636. logger.error("An error occurred so we should not complete the transaction:" + e, e);
  1637. rollbackTransaction(db2);
  1638. throw new SystemException(e.getMessage());
  1639. }
  1640. }
  1641. else if(notificationMessage.getClassName().equals(SiteNodeImpl.class.getName()) || notificationMessage.getClassName().equals(SiteNode.class.getName()) || notificationMessage.getClassName().equals(SmallSiteNodeImpl.class.getName()) || notificationMessage.getClassName().equals(PureSiteNodeImpl.class.getName()))
  1642. {
  1643. SiteNodeVO siteNodeVO = SiteNodeController.getController().getSiteNodeVOWithId((Integer)notificationMessage.getObjectId(), db);
  1644. if (siteNodeVO == null)
  1645. {
  1646. logger.warn("Could not find SiteNode with id: " + notificationMessage.getObjectId());
  1647. }
  1648. else
  1649. {
  1650. siteNodeIdsToIndex.add(siteNodeVO.getId());
  1651. }
  1652. }
  1653. logger.info("Indexing:" + siteNodeIdsToIndex.size());
  1654. for(Integer siteNodeId : siteNodeIdsToIndex)
  1655. {
  1656. //Deleting all info based on content
  1657. Analyzer analyzer = new StandardAnalyzer(Version.LUCENE_34);
  1658. logger.info("Deleting all info on:" + siteNodeId);
  1659. Query query = new QueryParser(Version.LUCENE_34, "siteNodeId", analyzer).parse("" + siteNodeId);
  1660. writer.deleteDocuments(query);
  1661. //End
  1662. logger.info("QQQQQQQQQQQQQQQQQQQQQQQQQQQQQQQQQQ:" + notificationMessage.getObjectId());
  1663. SiteNodeVO siteNodeVO = SiteNodeController.getController().getSiteNodeVOWithId((Integer)notificationMessage.getObjectId(), db);
  1664. logger.info("$$$$$$$$$$Getting doc for " + siteNodeVO.getName());
  1665. Document document = getDocumentFromSiteNode(siteNodeVO, writer, db);
  1666. logger.info("document " + document);
  1667. RequestAnalyser.getRequestAnalyser().registerComponentStatistics("getDocumentFromSiteNode", t.getElapsedTime());
  1668. if(document != null)
  1669. returnDocuments.add(document);
  1670. }
  1671. logger.info("Indexing contentIdsToIndex:" + contentIdsToIndex.size());
  1672. for(Integer contentId : contentIdsToIndex)
  1673. {
  1674. //Deleting all info based on content
  1675. Analyzer analyzer = new StandardAnalyzer(Version.LUCENE_34);
  1676. logger.info("Deleting all info on:" + contentId);
  1677. String[] fields = new String[]{"isAsset","contentId"};
  1678. String[] queries = new String[]{"true","" + contentId};
  1679. BooleanClause.Occur[] flags = new BooleanClause.Occur[]{BooleanClause.Occur.MUST_NOT,BooleanClause.Occur.MUST};
  1680. Query query = MultiFieldQueryParser.parse(Version.LUCENE_34, queries, fields, flags, analyzer);
  1681. //Query query = new QueryParser(Version.LUCENE_34, "contentId", analyzer).parse("" + contentId);
  1682. writer.deleteDocuments(query);
  1683. //End
  1684. ContentVO contentVO = ContentController.getContentController().getContentVOWithId(contentId, db);
  1685. Document document = getDocumentFromContent(contentVO, notificationMessage, writer, forceVersionIndexing, db);
  1686. RequestAnalyser.getRequestAnalyser().registerComponentStatistics("getDocumentFromContent", (t.getElapsedTimeNanos() / 1000));
  1687. if(document != null)
  1688. {
  1689. returnDocuments.add(document);
  1690. logger.info("++++++++++++++Forcing cv indexing");
  1691. List<ContentVersionVO> versions = new ArrayList<ContentVersionVO>();
  1692. if(CmsPropertyHandler.getApplicationName().equalsIgnoreCase("cms"))
  1693. {
  1694. //List<LanguageVO> languages = LanguageController.getController().getLanguageVOList(contentVO.getRepositoryId());
  1695. List<LanguageVO> languages = LanguageController.getController().getLanguageVOList(contentVO.getRepositoryId(), db);
  1696. RequestAnalyser.getRequestAnalyser().registerComponentStatistics("getLanguageVOList", (t.getElapsedTimeNanos() / 1000));
  1697. for(LanguageVO language : languages)
  1698. {
  1699. ContentVersionVO latestVersion = ContentVersionController.getContentVersionController().getLatestActiveContentVersionVO(contentVO.getId(), language.getId(), Integer.parseInt(CmsPropertyHandler.getOperatingMode()), db);
  1700. RequestAnalyser.getRequestAnalyser().registerComponentStatistics("getLatestActiveContentVersionVO", (t.getElapsedTimeNanos() / 1000));
  1701. if(latestVersion != null)
  1702. versions.add(latestVersion);
  1703. ContentVersionVO latestVersionPublishedVersion = ContentVersionController.getContentVersionController().getLatestActiveContentVersionVO(contentVO.getId(), language.getId(), ContentVersionVO.PUBLISHED_STATE, db);
  1704. RequestAnalyser.getRequestAnalyser().registerComponentStatistics("getLatestActiveContentVersionVO", (t.getElapsedTimeNanos() / 1000));
  1705. if(latestVersionPublishedVersion != null && latestVersionPublishedVersion.getId().intValue() != latestVersion.getId().intValue())
  1706. versions.add(latestVersionPublishedVersion);
  1707. }
  1708. }
  1709. else
  1710. {
  1711. List<LanguageVO> languages = LanguageController.getController().getLanguageVOList(contentVO.getRepositoryId(), db);
  1712. RequestAnalyser.getRequestAnalyser().registerComponentStatistics("getLanguageVOList", (t.getElapsedTimeNanos() / 1000));
  1713. for(LanguageVO language : languages)
  1714. {
  1715. ContentVersionVO version = ContentVersionController.getContentVersionController().getLatestActiveContentVersionVO(contentVO.getId(), language.getId(), Integer.parseInt(CmsPropertyHandler.getOperatingMode()), db);
  1716. RequestAnalyser.getRequestAnalyser().registerComponentStatistics("getLatestActiveContentVersionVO", (t.getElapsedTimeNanos() / 1000));
  1717. if(version != null)
  1718. versions.add(version);
  1719. }
  1720. }
  1721. logger.info("versions:" + versions.size());
  1722. for(ContentVersionVO version : versions)
  1723. {
  1724. logger.info("version:" + version.getId());
  1725. Document versionDocument = getDocumentFromContentVersion(version, db);
  1726. RequestAnalyser.getRequestAnalyser().registerComponentStatistics("getDocumentFromContentVersion", t.getElapsedTime());
  1727. if(versionDocument != null)
  1728. returnDocuments.add(versionDocument);
  1729. if(version.getId() > this.lastCommitedContentVersionId)
  1730. lastCommitedContentVersionId = version.getId();
  1731. }
  1732. }
  1733. }
  1734. return returnDocuments;
  1735. }
  1736. private List<Document> getDocumentsForContentVersion(ContentVersionVO contentVersionVO, Database db) throws Exception
  1737. {
  1738. Timer t = new Timer();
  1739. List<Document> returnDocuments = new ArrayList<Document>();
  1740. //ContentVO contentVO = ContentController.getContentController().getContentVOWithId(contentVersionVO.getContentId(), db);
  1741. //RequestAnalyser.getRequestAnalyser().registerComponentStatistics("getContentVOWithId", (t.getElapsedTimeNanos() / 1000));
  1742. Document document = getDocumentFromContentVersion(contentVersionVO, db);
  1743. RequestAnalyser.getRequestAnalyser().registerComponentStatistics("getDocumentFromContentVersion", t.getElapsedTime());
  1744. if(document != null)
  1745. returnDocuments.add(document);
  1746. return returnDocuments;
  1747. }
  1748. public Document getDocumentFromSiteNode(SiteNodeVO siteNodeVO, IndexWriter writer, Database db) throws Exception, InterruptedException
  1749. {
  1750. logger.info("getDocumentFromSiteNode:" + siteNodeVO.getName() + ":" + siteNodeVO.getIsDeleted());
  1751. if(siteNodeVO == null || siteNodeVO.getIsDeleted())
  1752. {
  1753. logger.info("Adding a delete directive to the indexer");
  1754. String uid = "siteNodeId_" + siteNodeVO.getId();
  1755. logger.info("Deleting documents:" + "uid=" + uid);
  1756. logger.info("Before delete:" + writer.numDocs());
  1757. //writer.deleteDocuments(new Term("uid", "" + uid));
  1758. Analyzer analyzer = new StandardAnalyzer(Version.LUCENE_34);
  1759. Query query = new QueryParser(Version.LUCENE_34, "siteNodeId", analyzer).parse("" + siteNodeVO.getId());
  1760. writer.deleteDocuments(query);
  1761. logger.info("Before delete:" + writer.numDocs());
  1762. return null;
  1763. }
  1764. // make a new, empty document
  1765. Document doc = new Document();
  1766. // Add the last modified date of the file a field named "modified".
  1767. // Use a field that is indexed (i.e. searchable), but don't tokenize
  1768. // the field into words.
  1769. doc.add(new NumericField("publishDateTime", Field.Store.YES, true).setLongValue(siteNodeVO.getPublishDateTime().getTime()));
  1770. doc.add(new Field("modified", DateTools.timeToString(new Date().getTime(), DateTools.Resolution.MINUTE), Field.Store.YES, Field.Index.NOT_ANALYZED));
  1771. doc.add(new Field("siteNodeId", "" + siteNodeVO.getId(), Field.Store.YES, Field.Index.NOT_ANALYZED));
  1772. doc.add(new Field("repositoryId", "" + siteNodeVO.getRepositoryId(), Field.Store.YES, Field.Index.NOT_ANALYZED));
  1773. doc.add(new Field("lastModifier", "" + siteNodeVO.getCreatorName(), Field.Store.YES, Field.Index.NOT_ANALYZED));
  1774. doc.add(new Field("isAsset", "false", Field.Store.YES, Field.Index.NOT_ANALYZED));
  1775. doc.add(new Field("isSiteNode", "true", Field.Store.YES, Field.Index.NOT_ANALYZED));
  1776. SiteNodeVersionVO siteNodeVersionVO = SiteNodeVersionController.getController().getLatestActiveSiteNodeVersionVO(db, siteNodeVO.getId());
  1777. if(siteNodeVersionVO != null)
  1778. {
  1779. doc.add(new NumericField("modificationDateTime", Field.Store.YES, true).setLongValue(siteNodeVersionVO.getModifiedDateTime().getTime()));
  1780. doc.add(new Field("siteNodeVersionId", "" + siteNodeVersionVO.getId(), Field.Store.YES, Field.Index.NOT_ANALYZED));
  1781. doc.add(new Field("stateId", "" + siteNodeVersionVO.getStateId(), Field.Store.YES, Field.Index.NOT_ANALYZED));
  1782. doc.add(new Field("path", "" + getSiteNodePath(siteNodeVO.getId(), db), Field.Store.YES, Field.Index.NOT_ANALYZED));
  1783. }
  1784. // Add the uid as a field, so that index can be incrementally
  1785. // maintained.
  1786. // This field is not stored with document, it is indexed, but it is not
  1787. // tokenized prior to indexing.
  1788. doc.add(new Field("uid", "siteNodeId_" + siteNodeVO.getId(), Field.Store.NO, Field.Index.NOT_ANALYZED));
  1789. // Add the tag-stripped contents as a Reader-valued Text field so it
  1790. // will
  1791. // get tokenized and indexed.
  1792. doc.add(new Field("contents", new StringReader(siteNodeVO.getName())));
  1793. if(siteNodeVO.getMetaInfoContentId() != null && siteNodeVO.getMetaInfoContentId() > -1)
  1794. {
  1795. List<LanguageVO> languages = LanguageController.getController().getLanguageVOList(siteNodeVO.getRepositoryId(), db);
  1796. for(LanguageVO language : languages)
  1797. {
  1798. ContentVersionVO cvVO = ContentVersionController.getContentVersionController().getLatestActiveContentVersionVO(siteNodeVO.getMetaInfoContentId(), language.getId(), Integer.parseInt(CmsPropertyHandler.getOperatingMode()), db);
  1799. if(cvVO != null)
  1800. doc.add(new Field("contents", new StringReader(cvVO.getVersionValue())));
  1801. }
  1802. }
  1803. // return the document
  1804. return doc;
  1805. }
  1806. public Document getSiteNodeDocument(ContentVersionVO contentVersionVO, IndexWriter writer, Database db) throws Exception, InterruptedException
  1807. {
  1808. Timer t = new Timer();
  1809. ContentVO contentVO = ContentController.getContentController().getContentVOWithId(contentVersionVO.getContentId(), db);
  1810. RequestAnalyser.getRequestAnalyser().registerComponentStatistics("getContentVOWithId", (t.getElapsedTimeNanos() / 1000));
  1811. if(contentVO.getIsDeleted())
  1812. return null;
  1813. if (contentVersionVO.getSiteNodeId() == null || contentVersionVO.getSiteNodeName() == null)
  1814. {
  1815. logger.warn("Content version does not have a SiteNode connected. Will not index content version. ContentVersion.id: " + contentVersionVO.getContentVersionId());
  1816. return null;
  1817. }
  1818. // make a new, empty document
  1819. Document doc = new Document();
  1820. // Add the last modified date of the file a field named "modified".
  1821. // Use a field that is indexed (i.e. searchable), but don't tokenize
  1822. // the field into words.
  1823. doc.add(new NumericField("publishDateTime", Field.Store.YES, true).setLongValue(contentVersionVO.getModifiedDateTime().getTime()));
  1824. doc.add(new Field("modified", DateTools.timeToString(new Date().getTime(), DateTools.Resolution.MINUTE), Field.Store.YES, Field.Index.NOT_ANALYZED));
  1825. doc.add(new Field("siteNodeId", "" + contentVersionVO.getSiteNodeId(), Field.Store.YES, Field.Index.NOT_ANALYZED));
  1826. doc.add(new Field("repositoryId", "" + contentVO.getRepositoryId(), Field.Store.YES, Field.Index.NOT_ANALYZED));
  1827. doc.add(new Field("lastModifier", "" + contentVersionVO.getVersionModifier(), Field.Store.YES, Field.Index.NOT_ANALYZED));
  1828. doc.add(new Field("isAsset", "false", Field.Store.YES, Field.Index.NOT_ANALYZED));
  1829. doc.add(new Field("isSiteNode", "true", Field.Store.YES, Field.Index.NOT_ANALYZED));
  1830. //doc.add(new Field("contentTypeDefinitionId", "" + ContentTypeDefinitionController.getController().getContentTypeDefinitionVOWithName("Meta info", db).getId(), Field.Store.YES, Field.Index.NOT_ANALYZED));
  1831. try
  1832. {
  1833. SiteNodeVersionVO siteNodeVersionVO = SiteNodeVersionController.getController().getLatestActiveSiteNodeVersionVO(db, contentVersionVO.getSiteNodeId());
  1834. if(siteNodeVersionVO != null)
  1835. doc.add(new Field("siteNodeVersionId", "" + siteNodeVersionVO.getId(), Field.Store.YES, Field.Index.NOT_ANALYZED));
  1836. else
  1837. logger.warn("No site node version found on siteNode: " + contentVersionVO.getSiteNodeId());
  1838. }
  1839. catch (Exception e)
  1840. {
  1841. e.printStackTrace();
  1842. }
  1843. doc.add(new NumericField("modificationDateTime", Field.Store.YES, true).setLongValue(contentVersionVO.getModifiedDateTime().getTime()));
  1844. doc.add(new Field("stateId", "" + contentVersionVO.getStateId(), Field.Store.YES, Field.Index.NOT_ANALYZED));
  1845. doc.add(new Field("path", "" + getSiteNodePath(contentVersionVO.getSiteNodeId(), db), Field.Store.YES, Field.Index.NOT_ANALYZED));
  1846. // Add the uid as a field, so that index can be incrementally
  1847. // maintained.
  1848. // This field is not stored with document, it is indexed, but it is not
  1849. // tokenized prior to indexing.
  1850. doc.add(new Field("uid", "siteNodeId_" + contentVersionVO.getSiteNodeId(), Field.Store.NO, Field.Index.NOT_ANALYZED));
  1851. // Add the tag-stripped contents as a Reader-valued Text field so it
  1852. // will
  1853. // get tokenized and indexed.
  1854. String pageName = contentVersionVO.getSiteNodeName();
  1855. if(pageName == null)
  1856. {
  1857. logger.info("Have to read again...");
  1858. SiteNodeVO siteNodeVO = SiteNodeController.getController().getSiteNodeVOWithId(contentVersionVO.getSiteNodeId(), db);
  1859. pageName = siteNodeVO.getName();
  1860. }
  1861. String versionValue = contentVersionVO.getVersionValue();
  1862. if(versionValue == null)
  1863. {
  1864. logger.info("Have to read version again...");
  1865. ContentVersionVO cvVO = ContentVersionController.getContentVersionController().getContentVersionVOWithId(contentVersionVO.getContentVersionId(), db);
  1866. versionValue = cvVO.getVersionValue();
  1867. }
  1868. doc.add(new Field("contents", new StringReader(versionValue)));
  1869. doc.add(new Field("contents", new StringReader(pageName)));
  1870. // return the document
  1871. return doc;
  1872. }
  1873. public Document getDocumentFromContent(ContentVO contentVO, NotificationMessage message, IndexWriter writer, boolean indexVersions, Database db) throws Exception, InterruptedException
  1874. {
  1875. logger.info("getDocumentFromContent:" + contentVO.getName() + ":" + contentVO.getIsDeleted());
  1876. if(contentVO == null || contentVO.getIsDeleted())
  1877. {
  1878. //NotificationMessage notificationMessage = new NotificationMessage(message.getName(), message.getClassName(), message.getSystemUserName(), NotificationMessage.TRANS_DELETE, message.getObjectId(), message.getObjectName());
  1879. logger.info("Adding a delete directive to the indexer");
  1880. //internalMessageList.add(notificationMessage);
  1881. String uid = "contentId_" + contentVO.getId();
  1882. logger.info("Deleting documents:" + "uid=" + uid);
  1883. logger.info("Before delete:" + writer.numDocs());
  1884. //writer.deleteDocuments(new Term("uid", "" + uid));
  1885. Analyzer analyzer = new StandardAnalyzer(Version.LUCENE_34);
  1886. Query query = new QueryParser(Version.LUCENE_34, "contentId", analyzer).parse("" + contentVO.getId());
  1887. writer.deleteDocuments(query);
  1888. logger.info("Before delete:" + writer.numDocs());
  1889. return null;
  1890. }
  1891. // make a new, empty document
  1892. Document doc = new Document();
  1893. // Add the last modified date of the file a field named "modified".
  1894. // Use a field that is indexed (i.e. searchable), but don't tokenize
  1895. // the field into words.
  1896. doc.add(new NumericField("publishDateTime", Field.Store.YES, true).setLongValue(contentVO.getPublishDateTime().getTime()));
  1897. doc.add(new Field("modified", DateTools.timeToString(new Date().getTime(), DateTools.Resolution.MINUTE), Field.Store.YES, Field.Index.NOT_ANALYZED));
  1898. doc.add(new Field("contentId", "" + contentVO.getContentId(), Field.Store.YES, Field.Index.NOT_ANALYZED));
  1899. doc.add(new Field("contentTypeDefinitionId", "" + contentVO.getContentTypeDefinitionId(), Field.Store.YES, Field.Index.NOT_ANALYZED));
  1900. doc.add(new Field("repositoryId", "" + contentVO.getRepositoryId(), Field.Store.YES, Field.Index.NOT_ANALYZED));
  1901. doc.add(new Field("lastModifier", "" + contentVO.getCreatorName(), Field.Store.YES, Field.Index.NOT_ANALYZED));
  1902. doc.add(new Field("isAsset", "false", Field.Store.YES, Field.Index.NOT_ANALYZED));
  1903. doc.add(new Field("path", "" + getContentPath(contentVO.getId(), db), Field.Store.YES, Field.Index.NOT_ANALYZED));
  1904. // Add the uid as a field, so that index can be incrementally
  1905. // maintained.
  1906. // This field is not stored with document, it is indexed, but it is not
  1907. // tokenized prior to indexing.
  1908. doc.add(new Field("uid", "contentId_" + contentVO.getId(), Field.Store.NO, Field.Index.NOT_ANALYZED));
  1909. // Add the tag-stripped contents as a Reader-valued Text field so it
  1910. // will
  1911. // get tokenized and indexed.
  1912. doc.add(new Field("contents", new StringReader(contentVO.getName())));
  1913. // return the document
  1914. return doc;
  1915. }
  1916. public Document getDocumentFromContentVersion(ContentVersionVO contentVersionVO, Database db) throws Exception, InterruptedException
  1917. {
  1918. logger.info("getting document from content version:" + contentVersionVO.getContentName());
  1919. Timer t = new Timer();
  1920. //ContentVO contentVO = ContentController.getContentController().getContentVOWithId(contentVersionVO.getContentId());
  1921. ContentVO contentVO = ContentController.getContentController().getContentVOWithId(contentVersionVO.getContentId(), db);
  1922. RequestAnalyser.getRequestAnalyser().registerComponentStatistics("getContentVOWithId", (t.getElapsedTimeNanos() / 1000));
  1923. if(contentVO.getIsDeleted())
  1924. return null;
  1925. // make a new, empty document
  1926. Document doc = new Document();
  1927. // Add the last modified date of the file a field named "modified".
  1928. // Use a field that is indexed (i.e. searchable), but don't tokenize
  1929. // the field into words.
  1930. logger.info("contentVersionVO:" + contentVersionVO.getContentName());
  1931. doc.add(new NumericField("publishDateTime", Field.Store.YES, true).setLongValue(contentVO.getPublishDateTime().getTime()));
  1932. doc.add(new NumericField("modificationDateTime", Field.Store.YES, true).setLongValue(contentVersionVO.getModifiedDateTime().getTime()));
  1933. doc.add(new Field("modified", DateTools.timeToString(contentVersionVO.getModifiedDateTime().getTime(), DateTools.Resolution.MINUTE), Field.Store.YES, Field.Index.NOT_ANALYZED));
  1934. doc.add(new Field("contentVersionId", "" + contentVersionVO.getId(), Field.Store.YES, Field.Index.NOT_ANALYZED));
  1935. doc.add(new Field("contentId", "" + contentVersionVO.getContentId(), Field.Store.YES, Field.Index.NOT_ANALYZED));
  1936. doc.add(new Field("contentTypeDefinitionId", "" + contentVO.getContentTypeDefinitionId(), Field.Store.YES, Field.Index.NOT_ANALYZED));
  1937. doc.add(new Field("languageId", "" + contentVersionVO.getLanguageId(), Field.Store.YES, Field.Index.NOT_ANALYZED));
  1938. doc.add(new Field("repositoryId", "" + contentVO.getRepositoryId(), Field.Store.YES, Field.Index.NOT_ANALYZED));
  1939. doc.add(new Field("lastModifier", "" + contentVersionVO.getVersionModifier(), Field.Store.YES, Field.Index.NOT_ANALYZED));
  1940. doc.add(new Field("stateId", "" + contentVersionVO.getStateId(), Field.Store.YES, Field.Index.NOT_ANALYZED));
  1941. doc.add(new Field("isAsset", "false", Field.Store.YES, Field.Index.NOT_ANALYZED));
  1942. doc.add(new Field("path", "" + getContentPath(contentVO.getId(), db), Field.Store.YES, Field.Index.NOT_ANALYZED));
  1943. RequestAnalyser.getRequestAnalyser().registerComponentStatistics("Indexing normalFields", (t.getElapsedTimeNanos() / 1000));
  1944. //Testing adding the categories for this version
  1945. try
  1946. {
  1947. if(contentVO.getContentTypeDefinitionId() != null)
  1948. {
  1949. ContentTypeDefinitionVO ctdVO = null;
  1950. try
  1951. {
  1952. ctdVO = ContentTypeDefinitionController.getController().getContentTypeDefinitionVOWithId(contentVO.getContentTypeDefinitionId(), db);
  1953. }
  1954. catch (SystemException sex)
  1955. {
  1956. logger.warn("Failed to get the content type definition for content with Id: " + contentVO.getContentId() + ". The categories for the content will not be indexed. Message: " + sex.getMessage());
  1957. logger.info("Failed to get the content type definition for content with Id: " + contentVO.getContentId(), sex);
  1958. }
  1959. if (ctdVO != null)
  1960. {
  1961. RequestAnalyser.getRequestAnalyser().registerComponentStatistics("getContentTypeDefinitionVOWithId", (t.getElapsedTimeNanos() / 1000));
  1962. List<CategoryAttribute> categoryKeys = ContentTypeDefinitionController.getController().getDefinedCategoryKeys(ctdVO, true);
  1963. RequestAnalyser.getRequestAnalyser().registerComponentStatistics("getDefinedCategoryKeys", (t.getElapsedTimeNanos() / 1000));
  1964. for(CategoryAttribute categoryKey : categoryKeys)
  1965. {
  1966. logger.info("categoryKey:" + categoryKey.getValue() + " for content:" + contentVO.getName());
  1967. //List<ContentCategoryVO> contentCategoryVOList = ContentCategoryController.getController().findByContentVersionAttribute(categoryKey.getValue(), contentVersionVO.getId());
  1968. List<ContentCategory> contentCategoryVOList = ContentCategoryController.getController().findByContentVersionAttribute(categoryKey.getValue(), contentVersionVO.getId(), db, true);
  1969. RequestAnalyser.getRequestAnalyser().registerComponentStatistics("Indexing categories", (t.getElapsedTimeNanos() / 1000));
  1970. logger.info("contentCategoryVOList:" + contentCategoryVOList.size());
  1971. for(ContentCategory contentCategory : contentCategoryVOList)
  1972. {
  1973. doc.add(new Field("categories", "" + contentCategory.getAttributeName().replaceAll(" ", "_").toLowerCase() + "eq" + contentCategory.getCategory().getId(), Field.Store.YES, Field.Index.NOT_ANALYZED));
  1974. doc.add(new Field("categories", "" + contentCategory.getAttributeName() + "=" + contentCategory.getCategory().getId(), Field.Store.YES, Field.Index.NOT_ANALYZED));
  1975. doc.add(new Field("" + contentCategory.getAttributeName() + "_categoryId", "" + contentCategory.getCategory().getId(), Field.Store.YES, Field.Index.NOT_ANALYZED));
  1976. }
  1977. }
  1978. }
  1979. }
  1980. }
  1981. catch (Exception e)
  1982. {
  1983. logger.error("Problem indexing categories for contentVO: " + contentVO.getName() + "(" + contentVO.getId() + "): " + e.getMessage(), e);
  1984. }
  1985. RequestAnalyser.getRequestAnalyser().registerComponentStatistics("Indexing categories", (t.getElapsedTimeNanos() / 1000));
  1986. //End test
  1987. // Add the uid as a field, so that index can be incrementally
  1988. // maintained.
  1989. // This field is not stored with document, it is indexed, but it is not
  1990. // tokenized prior to indexing.
  1991. doc.add(new Field("uid", "contentVersionId_" + contentVersionVO.getId(), Field.Store.NO, Field.Index.NOT_ANALYZED));
  1992. // Add the tag-stripped contents as a Reader-valued Text field so it
  1993. // will
  1994. // get tokenized and indexed.
  1995. doc.add(new Field("contents", new StringReader(contentVersionVO.getVersionValue())));
  1996. doc.add(new Field("contents", new StringReader(contentVersionVO.getContentName())));
  1997. RequestAnalyser.getRequestAnalyser().registerComponentStatistics("Indexing end fields", (t.getElapsedTimeNanos() / 1000));
  1998. // return the document
  1999. return doc;
  2000. }
  2001. public Document getDocumentFromDigitalAsset(DigitalAssetVO digitalAssetVO, ContentVersionVO contentVersionVO, Database db) throws Exception, InterruptedException
  2002. {
  2003. //ContentVO contentVO = ContentController.getContentController().getContentVOWithId(contentVersionVO.getContentId(), db);
  2004. ContentVO contentVO = ContentController.getContentController().getLocklessContentVOWithId(contentVersionVO.getContentId(), db);
  2005. if(contentVO == null || contentVO.getIsDeleted())
  2006. return null;
  2007. // make a new, empty document
  2008. Document doc = new Document();
  2009. // Add the last modified date of the file a field named "modified".
  2010. // Use a field that is indexed (i.e. searchable), but don't tokenize
  2011. // the field into words.
  2012. //doc.add(new Field("modified", DateTools.timeToString(contentVersionVO.getModifiedDateTime().getTime(), DateTools.Resolution.MINUTE), Field.Store.YES, Field.Index.NOT_ANALYZED));
  2013. doc.add(new NumericField("modificationDateTime", Field.Store.YES, true).setLongValue(contentVersionVO.getModifiedDateTime().getTime()));
  2014. doc.add(new Field("digitalAssetId", "" + digitalAssetVO.getId(), Field.Store.YES, Field.Index.NOT_ANALYZED));
  2015. doc.add(new Field("contentVersionId", "" + contentVersionVO.getId(), Field.Store.YES, Field.Index.NOT_ANALYZED));
  2016. doc.add(new Field("contentId", "" + contentVersionVO.getContentId(), Field.Store.YES, Field.Index.NOT_ANALYZED));
  2017. doc.add(new Field("contentTypeDefinitionId", "" + contentVO.getContentTypeDefinitionId(), Field.Store.YES, Field.Index.NOT_ANALYZED));
  2018. doc.add(new Field("languageId", "" + contentVersionVO.getLanguageId(), Field.Store.YES, Field.Index.NOT_ANALYZED));
  2019. doc.add(new Field("repositoryId", "" + contentVO.getRepositoryId(), Field.Store.YES, Field.Index.NOT_ANALYZED));
  2020. doc.add(new Field("lastModifier", "" + contentVersionVO.getVersionModifier(), Field.Store.YES, Field.Index.NOT_ANALYZED));
  2021. doc.add(new Field("stateId", "" + contentVersionVO.getStateId(), Field.Store.YES, Field.Index.NOT_ANALYZED));
  2022. doc.add(new Field("isAsset", "true", Field.Store.YES, Field.Index.NOT_ANALYZED));
  2023. doc.add(new Field("path", "" + getContentPath(contentVO.getId(), db), Field.Store.YES, Field.Index.NOT_ANALYZED));
  2024. // Add the uid as a field, so that index can be incrementally
  2025. // maintained.
  2026. // This field is not stored with document, it is indexed, but it is not
  2027. // tokenized prior to indexing.
  2028. doc.add(new Field("uid", "digitalAssetId_" + digitalAssetVO.getId(), Field.Store.NO, Field.Index.NOT_ANALYZED));
  2029. //doc.add(new Field("uid", "" + contentVersionVO.getId(), Field.Store.NO, Field.Index.NOT_ANALYZED));
  2030. // Add the tag-stripped contents as a Reader-valued Text field so it
  2031. // will
  2032. // get tokenized and indexed.
  2033. doc.add(new Field("contents", new StringReader(digitalAssetVO.getAssetKey() + " " + digitalAssetVO.getAssetFileName() + " " + digitalAssetVO.getAssetContentType())));
  2034. if (CmsPropertyHandler.getIndexDigitalAssetContent())
  2035. {
  2036. //String url = DigitalAssetController.getController().getDigitalAssetUrl(digitalAssetVO, db);
  2037. //if(logger.isInfoEnabled())
  2038. // logger.info("url if we should index file:" + url);
  2039. try
  2040. {
  2041. String filePath = DigitalAssetController.getController().getDigitalAssetFilePath(digitalAssetVO, db);
  2042. if(logger.isInfoEnabled())
  2043. logger.info("filePath if we should index file:" + filePath);
  2044. File file = new File(filePath);
  2045. String text = extractTextToIndex(digitalAssetVO, file);
  2046. doc.add(new Field("contents", new StringReader(text)));
  2047. }
  2048. catch(Exception e)
  2049. {
  2050. logger.warn("Problem getting asset:" + digitalAssetVO.getId() + ": " + e.getMessage());
  2051. }
  2052. }
  2053. return doc;
  2054. }
  2055. private String extractTextToIndex(DigitalAssetVO digitalAssetVO, File file)
  2056. {
  2057. String text = "";
  2058. if(logger.isInfoEnabled())
  2059. logger.info("Asset content type:" + digitalAssetVO.getAssetContentType());
  2060. if(digitalAssetVO.getAssetContentType().equalsIgnoreCase("application/pdf"))
  2061. {
  2062. try
  2063. {
  2064. Writer output = null;
  2065. PDDocument document = null;
  2066. try
  2067. {
  2068. document = PDDocument.load(file);
  2069. ByteArrayOutputStream baos = new ByteArrayOutputStream();
  2070. if(!document.isEncrypted())
  2071. {
  2072. output = new OutputStreamWriter(baos, "UTF-8");
  2073. PDFTextStripper stripper = new PDFTextStripper();
  2074. //stripper.setSortByPosition( sort );
  2075. //stripper.setStartPage( startPage );
  2076. //stripper.setEndPage( endPage );
  2077. stripper.writeText( document, output );
  2078. text = baos.toString("UTF-8");
  2079. if(logger.isInfoEnabled())
  2080. logger.info("PDF Document has " + text.length() + " chars\n\n" + text);
  2081. }
  2082. }
  2083. catch (Exception e)
  2084. {
  2085. logger.warn("Error indexing file: " + file + "\nMessage: " + e.getMessage());
  2086. }
  2087. finally
  2088. {
  2089. if( output != null )
  2090. {
  2091. output.close();
  2092. }
  2093. if( document != null )
  2094. {
  2095. document.close();
  2096. }
  2097. }
  2098. }
  2099. catch (Exception e)
  2100. {
  2101. logger.warn("Error indexing:" + e.getMessage());
  2102. }
  2103. }
  2104. else if(digitalAssetVO.getAssetContentType().equalsIgnoreCase("application/msword"))
  2105. {
  2106. try
  2107. {
  2108. InputStream is = new FileInputStream(file);
  2109. POIFSFileSystem fs = new POIFSFileSystem(new FileInputStream(file));
  2110. is.close();
  2111. // Create a document for this file
  2112. HWPFDocument doc = new HWPFDocument(fs);
  2113. // Create a WordExtractor to read the text of the word document
  2114. WordExtractor we = new WordExtractor(doc);
  2115. // Extract all paragraphs in the document as strings
  2116. text = we.getText();
  2117. // Output the document
  2118. if(logger.isInfoEnabled())
  2119. logger.info("Word Document has " + text.length() + " chars\n\n" + text);
  2120. }
  2121. catch (Exception e)
  2122. {
  2123. logger.warn("Error indexing file: " + file + "\nMessage: " + e.getMessage());
  2124. }
  2125. }
  2126. return text;
  2127. }
  2128. public void deleteVersionFromIndex(String contentVersionId)
  2129. {
  2130. try
  2131. {
  2132. IndexWriter writer = getIndexWriter();
  2133. logger.info("Deleting contentVersionId:" + contentVersionId);
  2134. writer.deleteDocuments(new Term("contentVersionId", "" + contentVersionId));
  2135. writer.commit();
  2136. }
  2137. catch (Exception e)
  2138. {
  2139. logger.error("Error deleteVersionFromIndex:" + e.getMessage(), e);
  2140. }
  2141. }
  2142. public String getContentPath(Integer contentId, Database db) throws Exception
  2143. {
  2144. StringBuffer sb = new StringBuffer();
  2145. ContentVO contentVO = ContentController.getContentController().getContentVOWithId(contentId, db);
  2146. if (contentVO.getName() == null || contentVO.getName().equals(""))
  2147. {
  2148. sb.insert(0, "]");
  2149. sb.insert(0, contentVO.getId());
  2150. sb.insert(0, "[");
  2151. }
  2152. else
  2153. {
  2154. sb.insert(0, contentVO.getName());
  2155. }
  2156. while(contentVO.getParentContentId() != null)
  2157. {
  2158. contentVO = ContentController.getContentController().getContentVOWithId(contentVO.getParentContentId(), db);
  2159. sb.insert(0, "/");
  2160. if (contentVO.getName() == null || contentVO.getName().equals(""))
  2161. {
  2162. sb.insert(0, "]");
  2163. sb.insert(0, contentVO.getId());
  2164. sb.insert(0, "[");
  2165. }
  2166. else
  2167. {
  2168. sb.insert(0, contentVO.getName());
  2169. }
  2170. }
  2171. sb.insert(0, "/");
  2172. return sb.toString();
  2173. }
  2174. public String getSiteNodePath(Integer siteNodeId, Database db) throws Exception
  2175. {
  2176. StringBuffer sb = new StringBuffer();
  2177. SiteNodeVO siteNodeVO = SiteNodeController.getController().getSiteNodeVOWithId(siteNodeId, db);
  2178. while(siteNodeVO != null)
  2179. {
  2180. sb.insert(0, "/" + siteNodeVO.getName());
  2181. if(siteNodeVO.getParentSiteNodeId() != null)
  2182. siteNodeVO = SiteNodeController.getController().getSiteNodeVOWithId(siteNodeVO.getParentSiteNodeId(), db);
  2183. else
  2184. siteNodeVO = null;
  2185. }
  2186. return sb.toString();
  2187. }
  2188. /**
  2189. * This is a method that never should be called.
  2190. */
  2191. public BaseEntityVO getNewVO()
  2192. {
  2193. return null;
  2194. }
  2195. public void setContextParameters(Map map)
  2196. {
  2197. // TODO Auto-generated method stub
  2198. }
  2199. }