/checklistbank-index/src/main/java/org/gbif/checklistbank/index/NameUsageIndexer.java
Java | 190 lines | 107 code | 25 blank | 58 comment | 7 complexity | 473b95ff78a76ac193c5be1894861a8d MD5 | raw file
- package org.gbif.checklistbank.index;
- import org.gbif.checklistbank.api.model.NameUsage;
- import org.gbif.checklistbank.api.service.NameUsageService;
- import java.util.ArrayList;
- import java.util.List;
- import java.util.concurrent.Callable;
- import com.google.inject.Guice;
- import com.google.inject.Inject;
- import com.google.inject.Injector;
- import com.google.inject.name.Named;
- import org.apache.commons.lang.time.StopWatch;
- import org.apache.solr.client.solrj.SolrServer;
- import org.apache.solr.client.solrj.embedded.EmbeddedSolrServer;
- import org.apache.solr.client.solrj.response.SolrPingResponse;
- import org.apache.solr.common.SolrInputDocument;
- import org.apache.solr.core.CoreContainer;
- /**
- * Checklist Bank multithread indexer.
- * This class creates a pool of threads
- */
- public class NameUsageIndexer extends ThreadPoolRunner<Integer> {
- @Inject
- private NameUsageService nameUsageService;
- /**
- * {@link NameUsage}/{@link SolrInputDocument} converter.
- */
- @Inject
- private SolrDocumentConverter<NameUsage> solrDocumentConverter;
- /**
- * Determines how often the status has to be written into the log.
- */
- protected Integer logBatchSize;
- /**
- * Total # of records to be processed by the Indexer.
- * This parameter should be known in advance.
- */
- protected Integer recordCount;
- /**
- * Defines if the Solr index should be optimized at the end of the process.
- */
- protected boolean optimize;
- /**
- * If true, every logBatchSize a commit command is sent to Solr.
- */
- protected boolean useBatchCommit;
- /**
- * Default page size for iterating over the {@link NameUsage} objects.
- */
- private Integer pageSize = 50;
- /**
- * SolrServer instance.
- */
- protected SolrServer indexWriter;
- /**
- * Solr home environment variable.
- */
- private static final String SOLR_HOME = "solr.solr.home";
- /**
- * Entry point for execution.
- */
- public static void main(String[] args) throws Exception {
- if (args.length > 2) {
- // Sets the SOLR_HOME environment variable is is specified in the command line.
- System.setProperty(SOLR_HOME, args[1]);
- }
- // Creates the injector
- Injector injector = Guice.createInjector(new IndexingModule(args[0]));
- // Gets the indexer instance
- NameUsageIndexer nameUsageIndexer = injector.getInstance(NameUsageIndexer.class);
- nameUsageIndexer.run();
- }
- /**
- * Default constructor.
- *
- * @param confFile configuration file name.
- */
- @Inject
- public NameUsageIndexer(@Named("indexingConfFile") String confFile) {
- // Superclass constructor that initializes the Properties config field.
- super(confFile);
- // Gets the required parameters using the Properties config field.
- this.logBatchSize = Integer.parseInt(this.config.getProperty("logBatchSize"));
- this.pageSize = Integer.parseInt(this.config.getProperty("pageSize"));
- this.recordCount = Integer.parseInt(this.config.getProperty("recordCount"));
- this.optimize = Boolean.parseBoolean(this.config.getProperty("optimize"));
- this.useBatchCommit = Boolean.parseBoolean(this.config.getProperty("useBatchCommit"));
- // Invokes the initialization of the Solr server and required dependencies.
- this.init();
- }
- @Override
- public List<? extends Callable<Integer>> createJobList() {
- try {
- StopWatch stopWatch = new StopWatch();
- stopWatch.start();
- List<NameUsageIndexingJob> jobs = this.createJobs();
- stopWatch.stop();
- log.info(String.format("Job creation done in: %s", stopWatch.getTime()));
- return jobs;
- } catch (Exception e) {
- log.error("Error splitting file", e);
- }
- return null;
- }
- /**
- * Creates a list of NameUsageIndexingJob.
- * Each element contains the necessary information for running the indexing task assigned.
- *
- * @return a {@link List} of {@link NameUsageIndexingJob}.
- */
- private List<NameUsageIndexingJob> createJobs() {
- ArrayList<NameUsageIndexingJob> jobs = new ArrayList<NameUsageIndexingJob>();
- Integer startPos = 0, endPos = 0;
- // records to be processed for each job
- Integer recordsPerJob = this.recordCount / this.poolSize;
- Integer remainingTasks = this.recordCount % this.poolSize;
- for (int i = 0; i < this.poolSize; i++) {
- endPos += (recordsPerJob);
- // Job creation
- if ((i + 1) == this.poolSize) {
- endPos += remainingTasks;
- }
- jobs.add(new NameUsageIndexingJob(indexWriter, nameUsageService, logBatchSize, useBatchCommit, pageSize,
- startPos, endPos, solrDocumentConverter));
- startPos = endPos + 1;
- }
- return jobs;
- }
- /**
- * Initialize the indexer: Solr settings and any other required dependencies.
- */
- private void init() {
- this.initSolr();
- }
- /**
- * Initialize an {@link EmbeddedSolrServer} instance using the configuration settings.
- */
- private void initSolr() {
- try {
- String solrHome = this.config.getProperty(SOLR_HOME);
- if (solrHome != null && !solrHome.isEmpty()) {
- System.setProperty(SOLR_HOME, solrHome);
- }
- CoreContainer.Initializer initializer = new CoreContainer.Initializer();
- initializer.setAbortOnConfigurationError(true);
- CoreContainer coreContainer = initializer.initialize();
- indexWriter = new EmbeddedSolrServer(coreContainer, "");
- SolrPingResponse solrPingResponse = indexWriter.ping();
- log.info("Solr server started, ping response in: " + solrPingResponse.getQTime());
- } catch (Exception e) {
- log.error("Error starting up the server", e);
- throw new RuntimeException(e);
- }
- }
- @Override
- protected void shutdownService(int tasksCount) {
- try {
- super.shutdownService(tasksCount);
- // This statement is used because the Guice container is not stopped inside the threadpool.
- System.exit(0);
- log.info("Shuttingdown completed!");
- } catch (Exception e) {
- log.error("Error shutingdown the index", e);
- }
- }
- }