PageRenderTime 925ms CodeModel.GetById 37ms RepoModel.GetById 0ms app.codeStats 0ms

/checklistbank-index/src/main/java/org/gbif/checklistbank/index/NameUsageIndexer.java

http://gbif-ecat.googlecode.com/
Java | 190 lines | 107 code | 25 blank | 58 comment | 7 complexity | 473b95ff78a76ac193c5be1894861a8d MD5 | raw file
  1. package org.gbif.checklistbank.index;
  2. import org.gbif.checklistbank.api.model.NameUsage;
  3. import org.gbif.checklistbank.api.service.NameUsageService;
  4. import java.util.ArrayList;
  5. import java.util.List;
  6. import java.util.concurrent.Callable;
  7. import com.google.inject.Guice;
  8. import com.google.inject.Inject;
  9. import com.google.inject.Injector;
  10. import com.google.inject.name.Named;
  11. import org.apache.commons.lang.time.StopWatch;
  12. import org.apache.solr.client.solrj.SolrServer;
  13. import org.apache.solr.client.solrj.embedded.EmbeddedSolrServer;
  14. import org.apache.solr.client.solrj.response.SolrPingResponse;
  15. import org.apache.solr.common.SolrInputDocument;
  16. import org.apache.solr.core.CoreContainer;
  17. /**
  18. * Checklist Bank multithread indexer.
  19. * This class creates a pool of threads
  20. */
  21. public class NameUsageIndexer extends ThreadPoolRunner<Integer> {
  22. @Inject
  23. private NameUsageService nameUsageService;
  24. /**
  25. * {@link NameUsage}/{@link SolrInputDocument} converter.
  26. */
  27. @Inject
  28. private SolrDocumentConverter<NameUsage> solrDocumentConverter;
  29. /**
  30. * Determines how often the status has to be written into the log.
  31. */
  32. protected Integer logBatchSize;
  33. /**
  34. * Total # of records to be processed by the Indexer.
  35. * This parameter should be known in advance.
  36. */
  37. protected Integer recordCount;
  38. /**
  39. * Defines if the Solr index should be optimized at the end of the process.
  40. */
  41. protected boolean optimize;
  42. /**
  43. * If true, every logBatchSize a commit command is sent to Solr.
  44. */
  45. protected boolean useBatchCommit;
  46. /**
  47. * Default page size for iterating over the {@link NameUsage} objects.
  48. */
  49. private Integer pageSize = 50;
  50. /**
  51. * SolrServer instance.
  52. */
  53. protected SolrServer indexWriter;
  54. /**
  55. * Solr home environment variable.
  56. */
  57. private static final String SOLR_HOME = "solr.solr.home";
  58. /**
  59. * Entry point for execution.
  60. */
  61. public static void main(String[] args) throws Exception {
  62. if (args.length > 2) {
  63. // Sets the SOLR_HOME environment variable is is specified in the command line.
  64. System.setProperty(SOLR_HOME, args[1]);
  65. }
  66. // Creates the injector
  67. Injector injector = Guice.createInjector(new IndexingModule(args[0]));
  68. // Gets the indexer instance
  69. NameUsageIndexer nameUsageIndexer = injector.getInstance(NameUsageIndexer.class);
  70. nameUsageIndexer.run();
  71. }
  72. /**
  73. * Default constructor.
  74. *
  75. * @param confFile configuration file name.
  76. */
  77. @Inject
  78. public NameUsageIndexer(@Named("indexingConfFile") String confFile) {
  79. // Superclass constructor that initializes the Properties config field.
  80. super(confFile);
  81. // Gets the required parameters using the Properties config field.
  82. this.logBatchSize = Integer.parseInt(this.config.getProperty("logBatchSize"));
  83. this.pageSize = Integer.parseInt(this.config.getProperty("pageSize"));
  84. this.recordCount = Integer.parseInt(this.config.getProperty("recordCount"));
  85. this.optimize = Boolean.parseBoolean(this.config.getProperty("optimize"));
  86. this.useBatchCommit = Boolean.parseBoolean(this.config.getProperty("useBatchCommit"));
  87. // Invokes the initialization of the Solr server and required dependencies.
  88. this.init();
  89. }
  90. @Override
  91. public List<? extends Callable<Integer>> createJobList() {
  92. try {
  93. StopWatch stopWatch = new StopWatch();
  94. stopWatch.start();
  95. List<NameUsageIndexingJob> jobs = this.createJobs();
  96. stopWatch.stop();
  97. log.info(String.format("Job creation done in: %s", stopWatch.getTime()));
  98. return jobs;
  99. } catch (Exception e) {
  100. log.error("Error splitting file", e);
  101. }
  102. return null;
  103. }
  104. /**
  105. * Creates a list of NameUsageIndexingJob.
  106. * Each element contains the necessary information for running the indexing task assigned.
  107. *
  108. * @return a {@link List} of {@link NameUsageIndexingJob}.
  109. */
  110. private List<NameUsageIndexingJob> createJobs() {
  111. ArrayList<NameUsageIndexingJob> jobs = new ArrayList<NameUsageIndexingJob>();
  112. Integer startPos = 0, endPos = 0;
  113. // records to be processed for each job
  114. Integer recordsPerJob = this.recordCount / this.poolSize;
  115. Integer remainingTasks = this.recordCount % this.poolSize;
  116. for (int i = 0; i < this.poolSize; i++) {
  117. endPos += (recordsPerJob);
  118. // Job creation
  119. if ((i + 1) == this.poolSize) {
  120. endPos += remainingTasks;
  121. }
  122. jobs.add(new NameUsageIndexingJob(indexWriter, nameUsageService, logBatchSize, useBatchCommit, pageSize,
  123. startPos, endPos, solrDocumentConverter));
  124. startPos = endPos + 1;
  125. }
  126. return jobs;
  127. }
  128. /**
  129. * Initialize the indexer: Solr settings and any other required dependencies.
  130. */
  131. private void init() {
  132. this.initSolr();
  133. }
  134. /**
  135. * Initialize an {@link EmbeddedSolrServer} instance using the configuration settings.
  136. */
  137. private void initSolr() {
  138. try {
  139. String solrHome = this.config.getProperty(SOLR_HOME);
  140. if (solrHome != null && !solrHome.isEmpty()) {
  141. System.setProperty(SOLR_HOME, solrHome);
  142. }
  143. CoreContainer.Initializer initializer = new CoreContainer.Initializer();
  144. initializer.setAbortOnConfigurationError(true);
  145. CoreContainer coreContainer = initializer.initialize();
  146. indexWriter = new EmbeddedSolrServer(coreContainer, "");
  147. SolrPingResponse solrPingResponse = indexWriter.ping();
  148. log.info("Solr server started, ping response in: " + solrPingResponse.getQTime());
  149. } catch (Exception e) {
  150. log.error("Error starting up the server", e);
  151. throw new RuntimeException(e);
  152. }
  153. }
  154. @Override
  155. protected void shutdownService(int tasksCount) {
  156. try {
  157. super.shutdownService(tasksCount);
  158. // This statement is used because the Guice container is not stopped inside the threadpool.
  159. System.exit(0);
  160. log.info("Shuttingdown completed!");
  161. } catch (Exception e) {
  162. log.error("Error shutingdown the index", e);
  163. }
  164. }
  165. }