PageRenderTime 143ms CodeModel.GetById 18ms RepoModel.GetById 0ms app.codeStats 0ms

/core/infinit.e.harvest.library/src/com/ikanow/infinit/e/harvest/enrichment/custom/GeoReference.java

https://github.com/IKANOW/Infinit.e
Java | 311 lines | 182 code | 40 blank | 89 comment | 90 complexity | c02811bb47adc5c480d49430d0b8c058 MD5 | raw file
Possible License(s): BSD-3-Clause
  1. /*******************************************************************************
  2. * Copyright 2012, The Infinit.e Open Source Project.
  3. *
  4. * This program is free software: you can redistribute it and/or modify
  5. * it under the terms of the GNU Affero General Public License, version 3,
  6. * as published by the Free Software Foundation.
  7. *
  8. * This program is distributed in the hope that it will be useful,
  9. * but WITHOUT ANY WARRANTY; without even the implied warranty of
  10. * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
  11. * GNU Affero General Public License for more details.
  12. *
  13. * You should have received a copy of the GNU Affero General Public License
  14. * along with this program. If not, see <http://www.gnu.org/licenses/>.
  15. ******************************************************************************/
  16. package com.ikanow.infinit.e.harvest.enrichment.custom;
  17. import java.util.ArrayList;
  18. import java.util.HashMap;
  19. import java.util.List;
  20. import org.apache.commons.lang.WordUtils;
  21. import com.google.gson.reflect.TypeToken;
  22. import com.ikanow.infinit.e.data_model.store.DbManager;
  23. import com.ikanow.infinit.e.data_model.store.feature.geo.GeoFeaturePojo;
  24. import com.mongodb.BasicDBObject;
  25. import com.mongodb.DBCollection;
  26. import com.mongodb.DBCursor;
  27. /**
  28. * GeoReference
  29. * @author cvitter
  30. */
  31. public class GeoReference
  32. {
  33. // Private class variables
  34. private static HashMap<GeoFeaturePojo, List<GeoFeaturePojo>> _cache = new HashMap<GeoFeaturePojo, List<GeoFeaturePojo>>();
  35. private static GeoFeaturePojo geoInfo = null;
  36. /**
  37. * enrichGeoInfo
  38. * @param geoInfo
  39. * @param exactMatchOnly
  40. * @param hasGeoindex
  41. * @return
  42. */
  43. public synchronized static List<GeoFeaturePojo> enrichGeoInfo(GeoFeaturePojo geoInfo, Boolean exactMatchOnly, Boolean hasGeoindex)
  44. {
  45. return enrichGeoInfo(geoInfo, exactMatchOnly, hasGeoindex, -1);
  46. }
  47. /**
  48. * enrichGeoInfo
  49. * @param geoInfo
  50. * @param exactMatchOnly
  51. * @param hasGeoindex
  52. * @param nMaxReturns
  53. * @return
  54. */
  55. public synchronized static List<GeoFeaturePojo> enrichGeoInfo(GeoFeaturePojo g, Boolean exactMatchOnly, Boolean hasGeoindex, int nMaxReturns)
  56. {
  57. geoInfo = g;
  58. BasicDBObject query = null;
  59. DBCursor result = null;
  60. try
  61. {
  62. // Get cached values and return if the geoInfo object passed in matches a cached value
  63. List<GeoFeaturePojo> cachedVal = _cache.get(geoInfo);
  64. if (null != cachedVal) {
  65. return cachedVal;
  66. }
  67. // Establish the collection manager object use to connect to MongoDB
  68. // The connection will persist for lifetime of import
  69. DBCollection geoDb = DbManager.getFeature().getGeo();
  70. // If only search_field has been provided set exactMatchOnly = true
  71. if ((geoInfo.getSearch_field() != null) && (geoInfo.getCity() == null) && (geoInfo.getRegion() == null) && (geoInfo.getCountry() == null))
  72. {
  73. exactMatchOnly = true;
  74. }
  75. // Exact match
  76. if (exactMatchOnly)
  77. {
  78. query = getQuery(hasGeoindex, 1);
  79. //DEBUG
  80. //System.out.println(query);
  81. result = getGeoReference(geoDb, query, nMaxReturns);
  82. }
  83. // Loose match, broaden/modify search on each of up to 4 attempts
  84. else
  85. {
  86. for (int i = 1; i <= 4; i++)
  87. {
  88. query = getQuery(hasGeoindex, i);
  89. if (null != query) {
  90. result = getGeoReference(geoDb, query, nMaxReturns);
  91. if (result.count() > 0) { break; }
  92. }
  93. }
  94. }
  95. if (result.count() > 0)
  96. {
  97. List<GeoFeaturePojo> gpl = GeoFeaturePojo.listFromDb(result, GeoFeaturePojo.listType());
  98. _cache.put(geoInfo, gpl);
  99. return gpl;
  100. }
  101. //No value returned, cache a null value so we don't waste time searching for this value again
  102. else
  103. {
  104. _cache.put(geoInfo, null);
  105. return null;
  106. }
  107. }
  108. catch (Exception e)
  109. {
  110. return null;
  111. }
  112. }
  113. /**
  114. * getQuery
  115. * @param g
  116. * @param hasGeoindex
  117. * @param attempt
  118. * @return
  119. */
  120. //TODO (INF-1864): running this in non-strict mode can cripple the DB since search field might not
  121. //be set ... at least need to cache such queries (almost always the US every time!)....
  122. private static BasicDBObject getQuery(Boolean hasGeoindex, int attempt)
  123. {
  124. BasicDBObject query = new BasicDBObject();
  125. // SearchField
  126. String searchField = (geoInfo.getSearch_field() != null) ? geoInfo.getSearch_field().toLowerCase() : null;
  127. // Cities are all lower case in the georeference collection, set toLowerCase here
  128. String city = (geoInfo.getCity() != null) ? geoInfo.getCity().toLowerCase() : null;
  129. // Use WordUtils.capitalize to set first char of region and country words to Upper Case
  130. String region = (geoInfo.getRegion() != null) ? WordUtils.capitalize(geoInfo.getRegion()) : null;
  131. String country = (geoInfo.getCountry() != null) ? WordUtils.capitalize(geoInfo.getCountry()) : null;
  132. String countryCode = geoInfo.getCountry_code();
  133. // If the only field sent was the search_field
  134. if ((searchField != null) && (city == null) && (region == null) && (country == null) && (countryCode == null))
  135. {
  136. query.put("search_field", searchField);
  137. }
  138. // Otherwise...
  139. else
  140. {
  141. switch (attempt)
  142. {
  143. case 1:
  144. // Set the searchField if it is null
  145. if (searchField == null && city != null) searchField = city.toLowerCase();
  146. if (searchField == null && region != null) searchField = region.toLowerCase();
  147. if (searchField == null && country != null) searchField = country.toLowerCase();
  148. //
  149. if (searchField != null) query.put("search_field", searchField);
  150. if (city != null) query.put("city", city);
  151. if (region != null) query.put("region", region);
  152. if (country != null) query.put("country", country);
  153. if (null == searchField) { // only country code specified...
  154. query.put("city", new BasicDBObject(DbManager.exists_, false));
  155. query.put("region", new BasicDBObject(DbManager.exists_, false));
  156. }
  157. if (countryCode != null) query.put("country_code", countryCode);
  158. break;
  159. case 2:
  160. if (city != null)
  161. {
  162. query.put("search_field", city.toLowerCase());
  163. query.put("city", city);
  164. }
  165. else if (region != null)
  166. {
  167. query.put("search_field", region.toLowerCase());
  168. query.put("region", region);
  169. }
  170. else
  171. {
  172. query.put("search_field", country.toLowerCase());
  173. }
  174. if (country != null) query.put("country", country);
  175. if (countryCode != null) query.put("country_code", countryCode);
  176. break;
  177. case 3:
  178. if (searchField == null && region != null) searchField = region.toLowerCase();
  179. if (searchField == null && country != null) searchField = country.toLowerCase();
  180. if (searchField != null) query.put("search_field", searchField);
  181. if (region != null) query.put("region", region);
  182. if (country != null) query.put("country", country);
  183. if (countryCode != null) query.put("country_code", countryCode);
  184. break;
  185. default:
  186. if (country != null) query.put("search_field", country.toLowerCase());
  187. if (country != null) query.put("country", country);
  188. if (countryCode != null) query.put("country_code", countryCode);
  189. break;
  190. }
  191. }
  192. if (query.isEmpty()) {
  193. return null;
  194. }
  195. // Only return records with GeoIndex objects
  196. if (hasGeoindex)
  197. {
  198. BasicDBObject ne = new BasicDBObject();
  199. ne.append(DbManager.exists_, true);
  200. query.put("geoindex", ne);
  201. }
  202. return query;
  203. }
  204. /**
  205. * getGeoReference
  206. * @param cm
  207. * @param query
  208. * @param nMaxReturns
  209. * @return
  210. */
  211. private static DBCursor getGeoReference(DBCollection geoDb, BasicDBObject query, int nMaxReturns)
  212. {
  213. if (nMaxReturns == -1)
  214. {
  215. return geoDb.find(query);
  216. }
  217. else
  218. {
  219. return geoDb.find(query).limit(nMaxReturns);
  220. }
  221. }
  222. public synchronized static void resetCache() {
  223. _cache.clear();
  224. }
  225. public synchronized static void reset() {
  226. resetCache();
  227. }
  228. /**
  229. * getNearestCity
  230. * Get the city closest to the lat/lon pair passed in
  231. * @param lat
  232. * @param lon
  233. * @return List<GeoReferencePojo>
  234. */
  235. public static List<GeoFeaturePojo> getNearestCity(DBCollection geoDb, String lat, String lon)
  236. {
  237. return getNearestCities(geoDb, lat, lon, 1);
  238. }
  239. /**
  240. * getNearestCities
  241. * Get n-cities near a lat/lon pair, results returned ordered by distance from
  242. * the lat/lon pair
  243. * @param lat
  244. * @param lon
  245. * @param nMaxReturns
  246. * @return List<GeoReferencePojo>
  247. */
  248. public static List<GeoFeaturePojo> getNearestCities(DBCollection geoDb, String lat, String lon, int nMaxReturns)
  249. {
  250. try
  251. {
  252. // Create Double[] from lat, lon
  253. Double[] d = new Double[] { Double.parseDouble(lat) , Double.parseDouble(lon)};
  254. // Build query object to return the shell equivalent of:
  255. // db.georeference.find({geoindex : {$near : [lat.lon]}})
  256. BasicDBObject query = new BasicDBObject();
  257. BasicDBObject near = new BasicDBObject();
  258. near.append("$near", d);
  259. query.put("geoindex", near);
  260. // Perform query
  261. DBCursor result = geoDb.find(query).limit(nMaxReturns);
  262. // Convert results to List<GeoReferencePojo>
  263. List<GeoFeaturePojo> gpl = GeoFeaturePojo.listFromDb(result, new TypeToken<ArrayList<GeoFeaturePojo>>(){});
  264. return gpl;
  265. }
  266. catch (Exception e)
  267. {
  268. return null;
  269. }
  270. }
  271. }