PageRenderTime 42ms CodeModel.GetById 13ms RepoModel.GetById 0ms app.codeStats 0ms

/utility/infinit.e.mongo-indexer/src/com/ikanow/infinit/e/utility/MongoAssociationFeatureTxfer.java

https://github.com/IKANOW/Infinit.e
Java | 262 lines | 171 code | 40 blank | 51 comment | 33 complexity | 6991fb55fb3494a0d81a5c8a0337787f MD5 | raw file
Possible License(s): BSD-3-Clause
  1. /*******************************************************************************
  2. * Copyright 2012, The Infinit.e Open Source Project
  3. *
  4. * Licensed under the Apache License, Version 2.0 (the "License");
  5. * you may not use this file except in compliance with the License.
  6. * You may obtain a copy of the License at
  7. *
  8. * http://www.apache.org/licenses/LICENSE-2.0
  9. *
  10. * Unless required by applicable law or agreed to in writing, software
  11. * distributed under the License is distributed on an "AS IS" BASIS,
  12. * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
  13. * See the License for the specific language governing permissions and
  14. * limitations under the License.
  15. ******************************************************************************/
  16. package com.ikanow.infinit.e.utility;
  17. import java.io.IOException;
  18. import java.util.ArrayList;
  19. import java.util.HashMap;
  20. import java.util.LinkedList;
  21. import java.util.List;
  22. import java.util.Map;
  23. import org.bson.BSONObject;
  24. import org.bson.types.ObjectId;
  25. import org.elasticsearch.common.settings.ImmutableSettings;
  26. import org.elasticsearch.common.settings.ImmutableSettings.Builder;
  27. import com.google.gson.Gson;
  28. import com.ikanow.infinit.e.data_model.index.ElasticSearchManager;
  29. import com.ikanow.infinit.e.data_model.index.IndexManager;
  30. import com.ikanow.infinit.e.data_model.index.feature.event.AssociationFeaturePojoIndexMap;
  31. import com.ikanow.infinit.e.data_model.store.DbManager;
  32. import com.ikanow.infinit.e.data_model.store.MongoDbManager;
  33. import com.ikanow.infinit.e.data_model.store.config.source.SourcePojo;
  34. import com.ikanow.infinit.e.data_model.store.document.AssociationPojo;
  35. import com.ikanow.infinit.e.data_model.store.feature.association.AssociationFeaturePojo;
  36. import com.ikanow.infinit.e.processing.generic.GenericProcessingController;
  37. import com.ikanow.infinit.e.processing.generic.aggregation.AssociationAggregationUtils;
  38. import com.mongodb.BasicDBObject;
  39. import com.mongodb.DBCollection;
  40. import com.mongodb.DBCursor;
  41. import com.mongodb.MongoException;
  42. public class MongoAssociationFeatureTxfer
  43. {
  44. //___________________________________________________________________________________________________
  45. // MAIN
  46. /**
  47. * @param args: 0,1 is the location of the MongoDB host/port, 2/3 is the location of the ES index host/port
  48. * @throws MongoException
  49. * @throws NumberFormatException
  50. * @throws IOException
  51. */
  52. public static void main(String sConfigPath, String sQuery, boolean bDelete, boolean bRebuildIndex, int nSkip, int nLimit, String chunksDescription) throws NumberFormatException, MongoException, IOException {
  53. MongoAssociationFeatureTxfer txferManager = new MongoAssociationFeatureTxfer();
  54. // Command line processing
  55. com.ikanow.infinit.e.data_model.Globals.setIdentity(com.ikanow.infinit.e.data_model.Globals.Identity.IDENTITY_SERVICE);
  56. if (null != sConfigPath) {
  57. com.ikanow.infinit.e.data_model.Globals.overrideConfigLocation(sConfigPath);
  58. }
  59. if (bRebuildIndex) {
  60. new GenericProcessingController().InitializeIndex(false, false, true);
  61. }
  62. BasicDBObject query = null;
  63. if (null == sQuery) {
  64. query = new BasicDBObject();
  65. }
  66. else {
  67. query = (BasicDBObject) com.mongodb.util.JSON.parse(sQuery);
  68. }
  69. if (bDelete) {
  70. txferManager.doDelete(query, nLimit);
  71. }
  72. else {
  73. if (null == chunksDescription) {
  74. txferManager.doTransfer(query, nSkip, nLimit, null);
  75. }
  76. else {
  77. txferManager.doChunkedTransfer(query, nSkip, nLimit, chunksDescription);
  78. }
  79. }
  80. }
  81. //___________________________________________________________________________________________________
  82. // Wrapper for doing transfer in chunks:
  83. private void doChunkedTransfer(BasicDBObject query, int nSkip, int nLimit, String chunksDescription) throws IOException
  84. {
  85. List<BasicDBObject> chunkList = MongoIndexerUtils.getChunks("feature.association", chunksDescription);
  86. System.out.println("CHUNKS: Found " + chunkList.size() + " chunks");
  87. //DEBUG
  88. //System.out.println("Chunklist= " + chunkList);
  89. for (BasicDBObject chunk: chunkList) {
  90. BasicDBObject cleanQuery = new BasicDBObject();
  91. cleanQuery.putAll((BSONObject)query);
  92. String id = null;
  93. try {
  94. id = (String) chunk.remove("$id");
  95. System.out.println("CHUNK: " + id);
  96. doTransfer(cleanQuery, 0, 0, chunk);
  97. }
  98. catch (Exception e) {
  99. System.out.println("FAILED CHUNK: " + id + " ... " + e.getMessage());
  100. }
  101. }
  102. }//TESTED
  103. //___________________________________________________________________________________________________
  104. // PROCESSING LOOP (new interface)
  105. Map<String, SourcePojo> _sourceCache = new HashMap<String, SourcePojo>();
  106. private void doTransfer(BasicDBObject query, int nSkip, int nLimit, BasicDBObject chunk)
  107. {
  108. ElasticSearchManager elasticManager = null;
  109. // Initialize the DB:
  110. DBCollection eventFeatureDB = DbManager.getFeature().getAssociation();
  111. // Initialize the ES (create the index if it doesn't already):
  112. // 1. Set-up the entity feature index
  113. ElasticSearchManager.setDefaultClusterName("infinite-aws");
  114. // (delete the index)
  115. //elasticManager = ElasticSearchManager.getIndex("association_index");
  116. //elasticManager.deleteMe();
  117. // Create the index if necessary
  118. String sMapping = new Gson().toJson(new AssociationFeaturePojoIndexMap.Mapping(), AssociationFeaturePojoIndexMap.Mapping.class);
  119. Builder localSettings = ImmutableSettings.settingsBuilder();
  120. localSettings.put("number_of_shards", 1).put("number_of_replicas", 0);
  121. localSettings.put("index.analysis.analyzer.suggestAnalyzer.tokenizer", "standard");
  122. localSettings.putArray("index.analysis.analyzer.suggestAnalyzer.filter", "standard", "lowercase");
  123. elasticManager = ElasticSearchManager.createIndex("association_index", null, false, null, sMapping, localSettings);
  124. // Get the index (necessary if already created)
  125. if (null == elasticManager)
  126. {
  127. elasticManager = ElasticSearchManager.getIndex("association_index");
  128. }
  129. // Now query the DB:
  130. DBCursor dbc = null;
  131. dbc = eventFeatureDB.find(query);
  132. if (null != chunk) {
  133. if (chunk.containsField(DbManager.min_)) {
  134. dbc = dbc.addSpecial(DbManager.min_, chunk.get(DbManager.min_));
  135. }
  136. if (chunk.containsField(DbManager.max_)) {
  137. dbc = dbc.addSpecial(DbManager.max_, chunk.get(DbManager.max_));
  138. }
  139. }
  140. dbc = dbc.skip(nSkip).limit(nLimit).batchSize(1000);
  141. if (null == chunk) {
  142. int nCount = dbc.count() - nSkip;
  143. if (nCount < 0) nCount = 0;
  144. System.out.println("Found " + nCount + " records to sync, process first " + (0==nLimit?nCount:nLimit));
  145. if (0 == nCount) { // Nothing to do...
  146. return;
  147. }
  148. }
  149. List<AssociationFeaturePojo> events = new LinkedList<AssociationFeaturePojo>();
  150. int nSynced = 0;
  151. // Loop over array and invoke the cleansing function for each one
  152. while ( dbc.hasNext() )
  153. {
  154. BasicDBObject dbo = (BasicDBObject) dbc.next();
  155. AssociationFeaturePojo evt = AssociationFeaturePojo.fromDb(dbo,AssociationFeaturePojo.class);
  156. // If this table has just been rebuilt from the document then the indexes are all wrong ...
  157. // recalculate and save
  158. if ('#' == evt.getIndex().charAt(0)) {
  159. AssociationPojo singleEvt = new AssociationPojo();
  160. singleEvt.setEntity1_index(evt.getEntity1_index());
  161. singleEvt.setEntity2_index(evt.getEntity2_index());
  162. singleEvt.setVerb_category(evt.getVerb_category());
  163. singleEvt.setGeo_index(evt.getGeo_index());
  164. evt.setIndex(AssociationAggregationUtils.getEventFeatureIndex(singleEvt));
  165. eventFeatureDB.update(new BasicDBObject("_id", dbo.get("_id")),
  166. new BasicDBObject(MongoDbManager.set_,
  167. new BasicDBObject(AssociationFeaturePojo.index_, evt.getIndex())), false, true);
  168. // (has to be a multi-update even though it's unique because it's sharded on index)
  169. }
  170. // Handle groups (system group is: "4c927585d591d31d7b37097a")
  171. if (null == evt.getCommunityId())
  172. {
  173. evt.setCommunityId(new ObjectId("4c927585d591d31d7b37097a"));
  174. }
  175. // Bulk add prep
  176. events.add(evt);
  177. nSynced++;
  178. if ( events.size() > 1000 )
  179. {
  180. elasticManager.bulkAddDocuments(IndexManager.mapListToIndex(events, AssociationFeaturePojo.listType(), new AssociationFeaturePojoIndexMap()), "_id", null,true);
  181. events.clear();
  182. }
  183. }
  184. // End loop over entities
  185. //write whatevers left
  186. elasticManager.bulkAddDocuments(IndexManager.mapListToIndex(events, AssociationFeaturePojo.listType(), new AssociationFeaturePojoIndexMap()), "_id", null,true);
  187. if (null != chunk) {
  188. System.out.println("Found " + nSynced + " records to sync in chunk");
  189. }
  190. }
  191. //___________________________________________________________________________________________________
  192. // DELETE DOCUMENTS FROM A QUERY
  193. private void doDelete(BasicDBObject query, int nLimit)
  194. {
  195. try
  196. {
  197. // Initialize the DB:
  198. DBCollection eventFeatureDB = DbManager.getFeature().getAssociation();
  199. DBCursor cur = eventFeatureDB.find(query).limit(nLimit);
  200. // (this internally works in batches of 1000; just get _id)
  201. System.out.println("Found " + cur.count() + " records to delete");
  202. if (nLimit > 0) {
  203. System.out.println("(limited to " + nLimit + " records)");
  204. }
  205. ArrayList<AssociationFeaturePojo> events = new ArrayList<AssociationFeaturePojo>();
  206. LinkedList<String> eventIds = new LinkedList<String>();
  207. while (cur.hasNext())
  208. {
  209. AssociationFeaturePojo event = AssociationFeaturePojo.fromDb(cur.next(),AssociationFeaturePojo.class);
  210. events.add(event);
  211. eventIds.add(new StringBuffer(event.getIndex()).append(":").append(event.getCommunityId()).toString());
  212. eventFeatureDB.remove(new BasicDBObject("index", event.getIndex()));
  213. }
  214. ElasticSearchManager elasticManager = ElasticSearchManager.getIndex("association_index");
  215. elasticManager.bulkDeleteDocuments(eventIds);
  216. } catch (NumberFormatException e) {
  217. e.printStackTrace();
  218. } catch (MongoException e) {
  219. e.printStackTrace();
  220. }
  221. }
  222. }