PageRenderTime 82ms CodeModel.GetById 21ms RepoModel.GetById 0ms app.codeStats 1ms

/core/infinit.e.api.server/WEB-INF/src/com/ikanow/infinit/e/api/knowledge/QueryHandler.java

https://github.com/IKANOW/Infinit.e
Java | 3048 lines | 2059 code | 327 blank | 662 comment | 689 complexity | b78d32d20dbf27f2fa21cc4895940870 MD5 | raw file
Possible License(s): BSD-3-Clause
  1. /*******************************************************************************
  2. * Copyright 2012, The Infinit.e Open Source Project.
  3. *
  4. * This program is free software: you can redistribute it and/or modify
  5. * it under the terms of the GNU Affero General Public License, version 3,
  6. * as published by the Free Software Foundation.
  7. *
  8. * This program is distributed in the hope that it will be useful,
  9. * but WITHOUT ANY WARRANTY; without even the implied warranty of
  10. * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
  11. * GNU Affero General Public License for more details.
  12. *
  13. * You should have received a copy of the GNU Affero General Public License
  14. * along with this program. If not, see <http://www.gnu.org/licenses/>.
  15. ******************************************************************************/
  16. package com.ikanow.infinit.e.api.knowledge;
  17. import java.io.IOException;
  18. import java.net.UnknownHostException;
  19. import java.util.ArrayList;
  20. import java.util.Arrays;
  21. import java.util.Calendar;
  22. import java.util.Collection;
  23. import java.util.Date;
  24. import java.util.HashMap;
  25. import java.util.LinkedList;
  26. import java.util.List;
  27. import java.util.Map;
  28. import java.util.Set;
  29. import java.util.TreeSet;
  30. import java.util.concurrent.Semaphore;
  31. import java.util.concurrent.TimeUnit;
  32. import java.util.regex.Pattern;
  33. import org.apache.commons.lang.time.DateUtils;
  34. import org.apache.commons.lang.time.DateFormatUtils;
  35. import org.apache.log4j.Logger;
  36. import org.bson.types.ObjectId;
  37. import org.elasticsearch.action.search.SearchResponse;
  38. import org.elasticsearch.client.action.search.SearchRequestBuilder;
  39. import org.elasticsearch.common.joda.time.Interval;
  40. import org.elasticsearch.common.unit.DistanceUnit;
  41. import org.elasticsearch.index.query.BaseQueryBuilder;
  42. import org.elasticsearch.index.query.BoolFilterBuilder;
  43. import org.elasticsearch.index.query.BoolQueryBuilder;
  44. import org.elasticsearch.index.query.CrossVersionQueryBuilders;
  45. import org.elasticsearch.index.query.CustomFiltersScoreQueryBuilder;
  46. import org.elasticsearch.index.query.FilterBuilders;
  47. import org.elasticsearch.index.query.GeoDistanceFilterBuilder;
  48. import org.elasticsearch.index.query.QueryBuilders;
  49. import org.elasticsearch.index.query.RangeQueryBuilder;
  50. import org.elasticsearch.search.sort.SortOrder;
  51. import com.google.common.collect.ArrayListMultimap;
  52. import com.google.gson.Gson;
  53. import com.google.gson.GsonBuilder;
  54. import com.ikanow.infinit.e.api.knowledge.aliases.AliasLookupTable;
  55. import com.ikanow.infinit.e.api.knowledge.aliases.AliasManager;
  56. import com.ikanow.infinit.e.api.knowledge.processing.AggregationUtils;
  57. import com.ikanow.infinit.e.api.knowledge.processing.QueryDecayFactory;
  58. import com.ikanow.infinit.e.api.knowledge.processing.ScoringUtils;
  59. import com.ikanow.infinit.e.api.social.sharing.ShareHandler;
  60. import com.ikanow.infinit.e.api.utils.PropertiesManager;
  61. import com.ikanow.infinit.e.api.utils.SimpleBooleanParser;
  62. import com.ikanow.infinit.e.api.utils.SocialUtils;
  63. import com.ikanow.infinit.e.data_model.Globals;
  64. import com.ikanow.infinit.e.data_model.api.BasePojoApiMap;
  65. import com.ikanow.infinit.e.data_model.api.ResponsePojo;
  66. import com.ikanow.infinit.e.data_model.api.ResponsePojo.ResponseObject;
  67. import com.ikanow.infinit.e.data_model.api.knowledge.AdvancedQueryPojo;
  68. import com.ikanow.infinit.e.data_model.api.knowledge.StatisticsPojo;
  69. import com.ikanow.infinit.e.data_model.control.DocumentQueueControlPojo;
  70. import com.ikanow.infinit.e.data_model.index.ElasticSearchManager;
  71. import com.ikanow.infinit.e.data_model.index.document.DocumentPojoIndexMap;
  72. import com.ikanow.infinit.e.data_model.interfaces.query.IQueryExtension;
  73. import com.ikanow.infinit.e.data_model.store.DbManager;
  74. import com.ikanow.infinit.e.data_model.store.custom.mapreduce.CustomMapReduceJobPojo;
  75. import com.ikanow.infinit.e.data_model.store.document.AssociationPojo;
  76. import com.ikanow.infinit.e.data_model.store.document.DocumentPojo;
  77. import com.ikanow.infinit.e.data_model.store.document.EntityPojo;
  78. import com.ikanow.infinit.e.data_model.store.feature.entity.EntityFeaturePojo;
  79. import com.ikanow.infinit.e.data_model.store.social.sharing.SharePojo;
  80. import com.ikanow.infinit.e.data_model.utils.GeoOntologyMapping;
  81. import com.mongodb.BasicDBObject;
  82. import com.mongodb.DBCollection;
  83. import com.mongodb.DBCursor;
  84. import com.mongodb.MongoException;
  85. import com.mongodb.ReadPreference;
  86. //
  87. // This code contains all the processing logic for the (beta)
  88. // Advanced Queries
  89. //
  90. //(remove this during active development - want to just depress a deprecation warning but no way of doing this for both 0.19 and 1.0)
  91. //@SuppressWarnings("deprecation")
  92. @SuppressWarnings("all")
  93. public class QueryHandler {
  94. private final StringBuffer _logMsg = new StringBuffer();
  95. private static final Logger _logger = Logger.getLogger(QueryHandler.class);
  96. public QueryHandler() {}
  97. private static Semaphore _concurrentAccessLock = null;
  98. private boolean acquireConcurrentAccessLock() throws InterruptedException {
  99. if (null == _concurrentAccessLock) {
  100. _concurrentAccessLock = new Semaphore(2);
  101. }
  102. return _concurrentAccessLock.tryAcquire(10, TimeUnit.MINUTES);
  103. }
  104. private void releaseConcurrentAccessLock() {
  105. _concurrentAccessLock.release();
  106. }
  107. // Query cache (re-created per request, but there's some static things in here for performance):
  108. private AliasLookupTable _aliasLookup = null;
  109. private LinkedList<AdvancedQueryPojo.QueryTermPojo> _extraFullTextTerms = null;
  110. // (used to allow entity terms to add top level (full text) terms)
  111. private static PropertiesManager _properties = null;
  112. private static com.ikanow.infinit.e.data_model.utils.PropertiesManager _dataModelProps = null;
  113. private static String _aggregationAccuracy = "full";
  114. private static ArrayList<Class<IQueryExtension>> _queryExtensions = null;
  115. private AdvancedQueryPojo.QueryScorePojo _scoringParams;
  116. // (need this here so we can set the adjust param for complex queries)
  117. private static int _replicaSetDistributionRatio = -1;
  118. ////////////////////////////////////////////////////////////////////////
  119. // 0] Top level processing
  120. public ResponsePojo doQuery(String userIdStr, AdvancedQueryPojo query, String communityIdStrList, StringBuffer errorString) throws UnknownHostException, MongoException, IOException, InstantiationException, IllegalAccessException {
  121. if (null == _properties) {
  122. _properties = new PropertiesManager();
  123. _aggregationAccuracy = _properties.getAggregationAccuracy();
  124. _dataModelProps = new com.ikanow.infinit.e.data_model.utils.PropertiesManager();
  125. _replicaSetDistributionRatio = 1 + _dataModelProps.getDocDbReadDistributionRatio();
  126. String[] queryExtensions = _properties.getQueryExtensions();
  127. if (null != queryExtensions) {
  128. _queryExtensions = new ArrayList<Class<IQueryExtension>>(queryExtensions.length);
  129. for (String s: queryExtensions) {
  130. try {
  131. Class<IQueryExtension> queryExtensionClass = (Class<IQueryExtension>) Class.forName(s);
  132. _queryExtensions.add(queryExtensionClass);
  133. }
  134. catch (Exception e) {
  135. _logger.error("Failed to load query extension: " + s, e);
  136. }
  137. catch (Error e) {
  138. _logger.error("Failed to load query extension: " + s, e);
  139. }
  140. }//(end list over query extensions)
  141. if (_queryExtensions.isEmpty()) {
  142. _queryExtensions = null;
  143. }
  144. }//TESTED (see test.QueryExtensionsTestCode)
  145. }
  146. ObjectId queryId = null;
  147. _scoringParams = query.score;
  148. // (NOTE CAN'T ACCESS "query" UNTIL AFTER 0.1 BECAUSE THAT CAN CHANGE IT)
  149. long nSysTime = (_nNow = System.currentTimeMillis());
  150. ResponsePojo rp = new ResponsePojo();
  151. // communityIdList is CSV
  152. String[] communityIdStrs = SocialUtils.getCommunityIds(userIdStr, communityIdStrList);
  153. //(timing)
  154. long nQuerySetupTime = System.currentTimeMillis();
  155. ElasticSearchManager indexMgr = getIndexManager(communityIdStrs);
  156. SearchRequestBuilder searchSettings = indexMgr.getSearchOptions();
  157. StringBuffer querySummary = new StringBuffer();
  158. BaseQueryBuilder queryObj = null;
  159. InternalTempFilterInfo tempFilterInfo = null;
  160. try {
  161. queryObj = getBaseQuery(query, communityIdStrs, communityIdStrList, userIdStr, querySummary);
  162. if (null == queryObj) { // only occurs if has 1 element with ftext starting $cache:
  163. return getSavedQueryInstead(query.qt.get(0).ftext.substring(7), communityIdStrs, query); // (step over cache preamble)
  164. }
  165. tempFilterInfo = getBaseFilter(query, communityIdStrs);
  166. }
  167. catch (Exception e) {
  168. Globals.populateStackTrace(errorString, e);
  169. if (null != e.getCause()) {
  170. errorString.append("[CAUSE=").append(e.getCause().getMessage()).append("]");
  171. Globals.populateStackTrace(errorString, e.getCause());
  172. }
  173. errorString.append(": " + e.getMessage());
  174. return null;
  175. }
  176. //DEBUG
  177. //querySummary.append(new Gson().toJson(query, AdvancedQueryPojo.class));
  178. // 0.4] Pre-Lucene Scoring
  179. // 0.4.1] General
  180. // Different options:
  181. // a] Get the most recent N documents matching the query, score post-query
  182. // b] Get the N highest (Lucene) scoring documents, incorporate significance post-query if desired
  183. // In both cases, N depends on whether significance calculation is taking place (and on the "skip" param)
  184. int nRecordsToOutput = query.output.docs.numReturn;
  185. int nRecordsToSkip = query.output.docs.skip;
  186. int nRecordsToGet = query.score.numAnalyze;
  187. final int nMAXRECORDSTOOUTPUT = 10000;
  188. final int nMAXRECORDSTOGET = 20000;
  189. // Some sanity checking on doc numbers:
  190. if (nRecordsToOutput > nMAXRECORDSTOOUTPUT) { // Upper limit...
  191. errorString.append(": Max # docs to return is 10000.");
  192. return null;
  193. }
  194. if (nRecordsToGet < nRecordsToOutput) {
  195. nRecordsToGet = nRecordsToOutput;
  196. }
  197. else if (nRecordsToGet > nMAXRECORDSTOGET) { // Upper limit...
  198. nRecordsToGet = nMAXRECORDSTOGET; // (we can do something sensible with this so carry on regardless)
  199. }
  200. boolean bUseSignificance = (query.score.sigWeight > 0.0);
  201. boolean bNeedExtraResultsForEnts =
  202. ((query.output.aggregation != null) && (query.output.aggregation.entsNumReturn != null) && (query.output.aggregation.entsNumReturn > 0))
  203. ||
  204. (query.output.docs.enable && (query.output.docs.numReturn > 0) && (query.output.docs.ents) && (query.score.scoreEnts));
  205. if (bUseSignificance || bNeedExtraResultsForEnts) {
  206. // Some logic taken from the original "knowledge/search"
  207. while ( (nRecordsToSkip + nRecordsToOutput > nRecordsToGet) && (nRecordsToGet <= nMAXRECORDSTOGET) )
  208. {
  209. nRecordsToGet += nRecordsToGet;
  210. }
  211. if (nRecordsToGet > nMAXRECORDSTOGET) {
  212. errorString.append(": Can only skip through to 20000 documents.");
  213. return null;
  214. }
  215. searchSettings.setSize(nRecordsToGet);
  216. //TESTED
  217. }
  218. else if (query.output.docs.enable) { // In this case we just need the minimum number of records
  219. // (whether searching by date or by relevance)
  220. searchSettings.setFrom(nRecordsToSkip);
  221. nRecordsToSkip = 0; // (so it isn't double counted in the processing module)
  222. nRecordsToGet = nRecordsToOutput;
  223. searchSettings.setSize(nRecordsToGet);
  224. //TESTED
  225. }
  226. else { // In thise case we're just outputting aggregations, and not even ones that come from the docs
  227. nRecordsToGet = 0; // (use this variable everywhere where we care about bring docs back either to output or for suitable aggregation)
  228. searchSettings.setSize(0);
  229. }
  230. // Sort on score if relevance is being used
  231. if (nRecordsToGet > 0) {
  232. if (query.score.relWeight > 0.0) { // (b) above
  233. // Using score is default, nothing to do
  234. }
  235. else { // (a) above
  236. // Debug code, if rel weight negative then use date to check Lucene score is better...
  237. if (query.score.relWeight < 0.0) {
  238. query.score.relWeight = -query.score.relWeight;
  239. }
  240. // Set Lucene to order:
  241. searchSettings.addSort(DocumentPojo.publishedDate_, SortOrder.DESC);
  242. }//TOTEST
  243. }//(if docs aren't enabled, don't need to worry about sorting)
  244. // 0.4.2] Prox scoring (needs to happen after [0.3]
  245. // Add proximity scoring:
  246. boolean bLowAccuracyDecay = false;
  247. if ((nRecordsToGet > 0) || (null == _scoringParams.adjustAggregateSig) || _scoringParams.adjustAggregateSig) {
  248. // (ie if we're getting docs or applying scores to entities)
  249. if (!_aggregationAccuracy.equals("full")) {
  250. bLowAccuracyDecay = true;
  251. }
  252. queryObj = addProximityBasedScoring(queryObj, searchSettings, query.score, tempFilterInfo.parentFilterObj, bLowAccuracyDecay);
  253. if (null == _scoringParams.adjustAggregateSig) { // auto-decide .. if ftext is set and is non-trivial
  254. if ((null != query.score.timeProx) || (null != query.score.geoProx)) {
  255. // (These are set to null above if badly formed)
  256. _scoringParams.adjustAggregateSig = true;
  257. }
  258. }
  259. }// (else not worth the effort)
  260. // 0.4.3] Source weightings (if any)
  261. queryObj = applyManualWeights(queryObj, query.score);
  262. // 0.5] Pre-lucene output options
  263. // only return the id field and score
  264. // (Both _id and score come back as default options, SearchHit:: getId and getScore, don't need anything else)
  265. // Facets
  266. // (These are needed for the case where we need to perform aggregations manually)
  267. Integer manualEntsNumReturn = null;
  268. Integer manualEventsNumReturn = null;
  269. Integer manualFactsNumReturn = null;
  270. Integer manualGeoNumReturn = null;
  271. //DEBUG
  272. //System.out.println(new Gson().toJson(query.output.aggregation));
  273. if ((null != query.output.aggregation) && (null != query.output.aggregation.raw)) { // Like query, specify raw aggregation (Facets)
  274. // Gross raw handling for facets
  275. if ((null != query.raw) && (null != query.raw.query)) {
  276. // Don't currently support raw query and raw facets because I can't work out how to apply
  277. // the override on group/source!
  278. errorString.append(": Not currently allowed raw query and raw facets");
  279. return null;
  280. }
  281. else { // Normal code
  282. searchSettings.setFacets(query.output.aggregation.raw.getBytes());
  283. }
  284. }
  285. else { // Apply various aggregation (=="facet") outputs to searchSettings
  286. boolean bSpecialCase = (null != query.raw) && (null != query.raw.query);
  287. if (!_aggregationAccuracy.equals("full")) {
  288. if (null != query.output.aggregation) {
  289. if (_aggregationAccuracy.equals("low")) {
  290. manualEntsNumReturn = query.output.aggregation.entsNumReturn;
  291. manualEventsNumReturn = query.output.aggregation.eventsNumReturn;
  292. manualFactsNumReturn = query.output.aggregation.factsNumReturn;
  293. manualGeoNumReturn = query.output.aggregation.geoNumReturn;
  294. }
  295. query.output.aggregation.entsNumReturn = null;
  296. query.output.aggregation.eventsNumReturn = null;
  297. query.output.aggregation.factsNumReturn = null;
  298. query.output.aggregation.geoNumReturn = null;
  299. // (allow time aggregation)
  300. // (allow source aggregation)
  301. }
  302. }
  303. AggregationUtils.parseOutputAggregation(query.output.aggregation, _aliasLookup,
  304. tempFilterInfo.entityTypeFilterStrings, tempFilterInfo.assocVerbFilterStrings,
  305. searchSettings, bSpecialCase?tempFilterInfo.parentFilterObj:null);
  306. // In partial accuracy case, restore aggregation
  307. if (null != manualEntsNumReturn) {
  308. query.output.aggregation.entsNumReturn = manualEntsNumReturn;
  309. }
  310. if (null != manualEventsNumReturn) {
  311. query.output.aggregation.eventsNumReturn = manualEventsNumReturn;
  312. }
  313. if (null != manualFactsNumReturn) {
  314. query.output.aggregation.factsNumReturn = manualFactsNumReturn;
  315. }
  316. if (null != manualGeoNumReturn) {
  317. query.output.aggregation.geoNumReturn = manualGeoNumReturn;
  318. }
  319. //TESTED
  320. }
  321. //TESTED x2
  322. //(timing)
  323. nQuerySetupTime = System.currentTimeMillis() - nQuerySetupTime;
  324. // 0.6] Perform Lucene query
  325. // 0.6.1: query extensions: pre-query hook
  326. ArrayList<IQueryExtension> queryExtensions = null;
  327. if (null != _queryExtensions) {
  328. queryId = new ObjectId();
  329. queryExtensions = new ArrayList<IQueryExtension>(_queryExtensions.size());
  330. for (Class<IQueryExtension> queryExtensionClass: _queryExtensions) {
  331. // Don't catch any exceptions thrown here - let it bubble upwards
  332. IQueryExtension queryExtension = queryExtensionClass.newInstance();
  333. queryExtension.preQueryActivities(queryId, query, communityIdStrs);
  334. queryExtensions.add(queryExtension);
  335. }
  336. }//TESTED (see test.QueryExtensionsTestCode)
  337. // 0.6.2: the main query
  338. if ((null != query.explain) && query.explain) { // (for diagnostic - will return lucene explanation)
  339. searchSettings.setExplain(true);
  340. }
  341. SearchResponse queryResults = null;
  342. if ((null != query.raw) && (null != query.raw.query))
  343. {
  344. // (Can bypass all other settings)
  345. searchSettings.setQuery(query.raw.query);
  346. queryResults = indexMgr.doQuery(null, tempFilterInfo.parentFilterObj, searchSettings);
  347. }//TESTED '{ "raw": { "match_all": {} } }'
  348. else
  349. {
  350. // Where I can, use the source filter as part of the query so that
  351. // facets will apply to query+filter, not just filter
  352. queryObj = QueryBuilders.boolQuery().must(queryObj).must(QueryBuilders.constantScoreQuery(tempFilterInfo.parentFilterObj).boost(0.0F));
  353. queryResults = indexMgr.doQuery(queryObj, null, searchSettings);
  354. }//TESTED '{}' etc
  355. long nLuceneTime = queryResults.getTookInMillis();
  356. // 0.7] Lucene scores
  357. long nProcTime = 0;
  358. long nProcTime_tmp = System.currentTimeMillis();
  359. StatisticsPojo stats = new StatisticsPojo();
  360. stats.found = queryResults.getHits().getTotalHits();
  361. stats.start = (long)nRecordsToSkip;
  362. if (nRecordsToGet > 0) {
  363. stats.setScore(queryResults.getHits(), (null != query.score.geoProx)||(null != query.score.timeProx), (null != query.explain) && query.explain);
  364. }
  365. //DEBUG
  366. //System.out.println(new Gson().toJson(queryResults));
  367. nProcTime += (System.currentTimeMillis() - nProcTime_tmp);
  368. // 0.8] Get data from Mongo + handle scoring
  369. //(timing)
  370. long nMongoTime = System.currentTimeMillis();
  371. List<BasicDBObject> docs = null;
  372. //(aggregation)
  373. LinkedList<BasicDBObject> lowAccuracyAggregatedEntities = null; // (always low accuracy)
  374. LinkedList<BasicDBObject> standaloneEvents = null;
  375. LinkedList<BasicDBObject> lowAccuracyAggregatedEvents = null;
  376. LinkedList<BasicDBObject> lowAccuracyAggregatedFacts = null;
  377. AggregationUtils.GeoContainer lowAccuracyAggregatedGeo = null;
  378. AggregationUtils.GeoContainer extraAliasAggregatedGeo = null;
  379. ScoringUtils scoreStats = null;
  380. if (null != stats.getIds()) {
  381. DBCursor docs0 = this.getDocIds(DbManager.getDocument().getMetadata(), stats.getIds(), nRecordsToGet, query.output, query.score);
  382. nMongoTime = System.currentTimeMillis() - nMongoTime;
  383. nProcTime_tmp = System.currentTimeMillis();
  384. // Entity aggregation (CURRENTLY ALWAYS LOW AGGREGATION):
  385. if ((null != query.output.aggregation) && (null != query.output.aggregation.entsNumReturn) && (query.output.aggregation.entsNumReturn > 0)) {
  386. lowAccuracyAggregatedEntities = new LinkedList<BasicDBObject>();
  387. }
  388. // Standalone events:
  389. if ((query.output.docs != null) && (query.output.docs.eventsTimeline != null) && query.output.docs.eventsTimeline) {
  390. standaloneEvents = new LinkedList<BasicDBObject>();
  391. }
  392. // Low accuracy aggregations:
  393. if ((null != manualEventsNumReturn) && (manualEventsNumReturn > 0)) {
  394. lowAccuracyAggregatedEvents = new LinkedList<BasicDBObject>();
  395. }
  396. if ((null != manualFactsNumReturn) && (manualFactsNumReturn > 0)) {
  397. lowAccuracyAggregatedFacts = new LinkedList<BasicDBObject>();
  398. }
  399. if ((null != manualGeoNumReturn) && (manualGeoNumReturn > 0)) {
  400. lowAccuracyAggregatedGeo = new AggregationUtils.GeoContainer();
  401. }
  402. else if ((null != query.output.aggregation) && (null != query.output.aggregation.geoNumReturn) && (query.output.aggregation.geoNumReturn > 0))
  403. {
  404. // (only if not using low accuracy aggregation ... otherwise it all gets dumped in lowAccuracyAggregatedGeo)
  405. extraAliasAggregatedGeo = new AggregationUtils.GeoContainer();
  406. }
  407. scoreStats = new ScoringUtils();
  408. try {
  409. boolean lockAcquired = true;
  410. try {
  411. lockAcquired = this.acquireConcurrentAccessLock();
  412. } catch (InterruptedException e) {
  413. //(that's fine just carry on)
  414. lockAcquired = false;
  415. }
  416. if (!lockAcquired) {
  417. rp.setResponse(new ResponseObject("Query", false, "Query engine busy, please try again later."));
  418. return rp;
  419. }
  420. scoreStats.setAliasLookupTable(_aliasLookup);
  421. docs = scoreStats.calcTFIDFAndFilter(DbManager.getDocument().getMetadata(),
  422. docs0, query.score, query.output, stats, bLowAccuracyDecay,
  423. nRecordsToSkip, nRecordsToOutput,
  424. communityIdStrs,
  425. tempFilterInfo.entityTypeFilterStrings, tempFilterInfo.assocVerbFilterStrings,
  426. standaloneEvents,
  427. lowAccuracyAggregatedEntities,
  428. lowAccuracyAggregatedGeo, extraAliasAggregatedGeo,
  429. lowAccuracyAggregatedEvents, lowAccuracyAggregatedFacts);
  430. }
  431. finally {
  432. scoreStats.clearAsMuchMemoryAsPossible();
  433. this.releaseConcurrentAccessLock();
  434. }
  435. nProcTime += (System.currentTimeMillis() - nProcTime_tmp);
  436. }
  437. else {
  438. nMongoTime = 0;
  439. }
  440. //TESTED (all queries)
  441. // 0.9] Output:
  442. rp.setResponse(new ResponseObject("Query", true, querySummary.toString()));
  443. // 0.9.1] Stats:
  444. stats.resetArrays();
  445. rp.setStats(stats); // (only actually uses the response pojo, but get rid of big fields anyway...)
  446. // 0.9.2] Facets:
  447. if (null != lowAccuracyAggregatedEntities) { // Entity aggregation
  448. rp.setEntities(lowAccuracyAggregatedEntities);
  449. }
  450. if (null != standaloneEvents) {
  451. rp.setEventsTimeline(standaloneEvents);
  452. }
  453. if (null != lowAccuracyAggregatedGeo) {
  454. rp.setGeo(lowAccuracyAggregatedGeo.geotags, (int)lowAccuracyAggregatedGeo.maxCount, (int)lowAccuracyAggregatedGeo.minCount);
  455. }
  456. if (null != lowAccuracyAggregatedEvents) {
  457. rp.setEvents(lowAccuracyAggregatedEvents);
  458. }
  459. if (null != lowAccuracyAggregatedFacts) {
  460. rp.setFacts(lowAccuracyAggregatedFacts);
  461. }
  462. if ((null != query.output.aggregation) && (null != query.output.aggregation.raw)) {
  463. rp.setFacets(queryResults.getFacets().facetsAsMap());
  464. }
  465. else if ((null != queryResults.getFacets()) && (null != queryResults.getFacets().getFacets())) { // "Logical" aggregation
  466. if (0.0 == query.score.sigWeight) {
  467. scoreStats = null; // (don't calculate event/fact aggregated significance if it's not wanted)
  468. }
  469. AggregationUtils.loadAggregationResults(rp, queryResults.getFacets().getFacets(), query.output.aggregation, scoreStats, _aliasLookup, tempFilterInfo.entityTypeFilterStrings, tempFilterInfo.assocVerbFilterStrings, extraAliasAggregatedGeo);
  470. } // (end facets not overwritten)
  471. scoreStats = null; // (now definitely never need scoreStats)
  472. // 0.9.3] Documents
  473. if (query.output.docs.enable) {
  474. if ((null != docs) && (docs.size() > 0)) {
  475. rp.setData(docs, (BasePojoApiMap<BasicDBObject>)null);
  476. }
  477. else { // (ensure there's always an empty list)
  478. docs = new ArrayList<BasicDBObject>(0);
  479. rp.setData(docs, (BasePojoApiMap<BasicDBObject>)null);
  480. }
  481. }
  482. else { // (ensure there's always an empty list)
  483. docs = new ArrayList<BasicDBObject>(0);
  484. rp.setData(docs, (BasePojoApiMap<BasicDBObject>)null);
  485. }
  486. // 0.9.4] query extensions: post-query hook
  487. if (null != queryExtensions) {
  488. for (IQueryExtension queryExtension: queryExtensions) {
  489. // Don't catch any exceptions thrown here - let it bubble upwards
  490. queryExtension.postQueryActivities(queryId, docs, rp);
  491. }
  492. }//TESTED (see test.QueryExtensionsTestCode)
  493. // 0.9.5] Timing/logging
  494. long nTotalTime = System.currentTimeMillis() - nSysTime;
  495. rp.getResponse().setTime(nTotalTime);
  496. _logMsg.setLength(0);
  497. _logMsg.append("knowledge/query querylen=").append(querySummary.length());
  498. _logMsg.append(" query=").append(querySummary.toString());
  499. _logMsg.append(" userid=").append(userIdStr);
  500. _logMsg.append(" groups=").append(communityIdStrList);
  501. _logMsg.append(" found=").append(stats.found);
  502. _logMsg.append(" luceneTime=").append(nLuceneTime).append(" ms");
  503. _logMsg.append(" setupTime=").append(nQuerySetupTime).append(" ms");
  504. _logMsg.append(" procTime=").append(nProcTime).append(" ms");
  505. _logMsg.append(" mongoTime=").append(nMongoTime).append(" ms");
  506. _logMsg.append(" time=").append(nTotalTime).append(" ms");
  507. _logger.info(_logMsg.toString());
  508. //DEBUG
  509. //System.out.println(_logMsg.toString());
  510. // Exceptions percolate up to the resource and are handled there...
  511. return rp;
  512. }
  513. ////////////////////////////////////////////////////////////////////////
  514. // Utility version of the above query call - just converts the advanced query pojo into an elasticsearch object that can
  515. // be queried
  516. public static class QueryInfo {
  517. public ElasticSearchManager indexMgr;
  518. public BaseQueryBuilder queryObj;
  519. public String querySummary;
  520. }
  521. public QueryInfo convertInfiniteQuery(AdvancedQueryPojo query, String[] communityIdStrs, String userIdStr) {
  522. // Fill in the blanks (a decent attempt has been made to fill out the blanks inside these options)
  523. if (null == query.input) {
  524. query.input = new AdvancedQueryPojo.QueryInputPojo();
  525. }
  526. if (null == query.score) {
  527. query.score = new AdvancedQueryPojo.QueryScorePojo();
  528. }
  529. if (null == query.output) {
  530. query.output = new AdvancedQueryPojo.QueryOutputPojo();
  531. }
  532. if (null == query.output.docs) { // (Docs are sufficiently important we'll make sure they're always present)
  533. query.output.docs = new AdvancedQueryPojo.QueryOutputPojo.DocumentOutputPojo();
  534. }
  535. // Other intialization
  536. _nNow = System.currentTimeMillis();
  537. // Now onto the logic:
  538. QueryInfo queryInfo = new QueryInfo();
  539. StringBuffer sb = new StringBuffer(userIdStr);
  540. for (String sCommunityId: communityIdStrs) {
  541. sb.append(',').append(sCommunityId);
  542. }
  543. queryInfo.indexMgr = getIndexManager(communityIdStrs);
  544. StringBuffer info = new StringBuffer();
  545. queryInfo.queryObj = getBaseQuery(query, communityIdStrs, sb.toString(), userIdStr, info);
  546. queryInfo.querySummary = info.toString();
  547. InternalTempFilterInfo tempFilterInfo = getBaseFilter(query, communityIdStrs);
  548. queryInfo.queryObj = QueryBuilders.boolQuery().must(queryInfo.queryObj).
  549. must(QueryBuilders.constantScoreQuery(tempFilterInfo.parentFilterObj).boost(0.0F));
  550. return queryInfo;
  551. }//TOTEST
  552. ////////////////////////////////////////////////////////////////////////
  553. //0b] QUERY BREAKDOWN
  554. ////////////////////////////////////////////////////////////////////////
  555. // 0.b.1) indexes
  556. private ElasticSearchManager getIndexManager(String[] communityIdStrs)
  557. {
  558. // Create a multi-index to check against all relevant shards:
  559. StringBuffer sb = new StringBuffer(DocumentPojoIndexMap.globalDocumentIndexCollection_);
  560. sb.append(',').append(DocumentPojoIndexMap.manyGeoDocumentIndexCollection_);
  561. for (String sCommunityId: communityIdStrs) {
  562. sb.append(',').append("docs_").append(sCommunityId);
  563. }
  564. sb.append('/').append(DocumentPojoIndexMap.documentType_);
  565. ElasticSearchManager indexMgr = ElasticSearchManager.getIndex(sb.toString());
  566. return indexMgr;
  567. }//TESTED (cut and paste from original code)
  568. ////////////////////////////////////////////////////////////////////////
  569. // 0.b.1) filter
  570. private static class InternalTempFilterInfo {
  571. BoolFilterBuilder parentFilterObj;
  572. String[] entityTypeFilterStrings;
  573. String[] assocVerbFilterStrings;
  574. }
  575. private InternalTempFilterInfo getBaseFilter(AdvancedQueryPojo query, String communityIdStrs[])
  576. {
  577. BoolFilterBuilder parentFilterObj =
  578. FilterBuilders.boolFilter().must(FilterBuilders.termsFilter(DocumentPojo.communityId_, communityIdStrs));
  579. BoolFilterBuilder sourceFilter = this.parseSourceManagement(query.input);
  580. if (null != sourceFilter) {
  581. parentFilterObj = parentFilterObj.must(sourceFilter);
  582. }//TESTED
  583. // 0.2] Output filtering
  584. // Output filters: parse (also used by aggregation, scoring)
  585. String[] entityTypeFilterStrings = null;
  586. String[] assocVerbFilterStrings = null;
  587. if ((null != query.output) && (null != query.output.filter)) {
  588. if (null != query.output.filter.entityTypes) {
  589. entityTypeFilterStrings = query.output.filter.entityTypes;
  590. if (0 == entityTypeFilterStrings.length) {
  591. entityTypeFilterStrings = null;
  592. }
  593. else if ((1 == entityTypeFilterStrings.length) && (entityTypeFilterStrings[0].isEmpty())) {
  594. entityTypeFilterStrings = null;
  595. }
  596. // (note this is important because it means we can always check entityTypeFilterStrings[0].getCharAt(0) for -ve filtering)
  597. }
  598. if (null != query.output.filter.assocVerbs) {
  599. assocVerbFilterStrings = query.output.filter.assocVerbs;
  600. if (0 == assocVerbFilterStrings.length) {
  601. assocVerbFilterStrings = null;
  602. }
  603. else if ((1 == assocVerbFilterStrings.length) && (assocVerbFilterStrings[0].isEmpty())) {
  604. assocVerbFilterStrings = null;
  605. }
  606. // (note this is important because it means we can always check assocVerbFilterStrings[0].getCharAt(0) for -ve filtering)
  607. }
  608. }
  609. // Now apply output filters to query
  610. BoolFilterBuilder outputFilter = this.parseOutputFiltering(entityTypeFilterStrings, assocVerbFilterStrings);
  611. if (null != outputFilter) {
  612. parentFilterObj = parentFilterObj.must(outputFilter);
  613. }
  614. //TESTED
  615. InternalTempFilterInfo out = new InternalTempFilterInfo();
  616. out.parentFilterObj = parentFilterObj;
  617. out.entityTypeFilterStrings = entityTypeFilterStrings;
  618. out.assocVerbFilterStrings = assocVerbFilterStrings;
  619. return out;
  620. }//TESTED (cut/paste from original code)
  621. ////////////////////////////////////////////////////////////////////////
  622. // 0.b.2] Query
  623. // (if it returns null then call getSavedQueryInstead instead)
  624. private BaseQueryBuilder getBaseQuery(AdvancedQueryPojo query, String communityIdStrs[], String communityIdStrList, String userIdStr, StringBuffer querySummary)
  625. {
  626. // Intialize alias if so required:
  627. if ((null == query.expandAlias) || query.expandAlias) {
  628. AliasManager aliasManager = AliasManager.getAliasManager();
  629. if (null != aliasManager) {
  630. _aliasLookup = aliasManager.getAliasLookupTable(null, communityIdStrs, null, userIdStr);
  631. }
  632. }
  633. // (end initialize index)
  634. BaseQueryBuilder queryObj = null;
  635. // 0.1] Input data (/filtering)
  636. if (null != query.input.name) { // This is actually a share id visible to this user
  637. query = getStoredQueryArtefact(query.input.name, query, userIdStr);
  638. }
  639. // 0.3] Query terms
  640. int nQueryElements = 0;
  641. if (null != query.qt) {
  642. nQueryElements = query.qt.size();
  643. if ((1 == nQueryElements) && (null != query.qt.get(0).ftext) && (query.qt.get(0).ftext.startsWith("$cache:"))) {
  644. return null;
  645. }
  646. if (nQueryElements > 0) { // NORMAL CASE
  647. this.handleEntityExpansion(DbManager.getFeature().getEntity(), query.qt, userIdStr, communityIdStrList);
  648. BaseQueryBuilder queryElements[] = new BaseQueryBuilder[nQueryElements];
  649. StringBuffer sQueryElements[] = new StringBuffer[nQueryElements];
  650. for (int i = 0; i < nQueryElements; ++i) {
  651. _extraFullTextTerms = null;
  652. queryElements[i] = this.parseQueryTerm(query.qt.get(i), (sQueryElements[i] = new StringBuffer()));
  653. // Extra full text terms generated by aliasing:
  654. if (null != _extraFullTextTerms) {
  655. BoolQueryBuilder extraTerms = QueryBuilders.boolQuery().should(queryElements[i]);
  656. StringBuffer discard = new StringBuffer(); // (we already have added the info the query elements)
  657. for (AdvancedQueryPojo.QueryTermPojo qtExtra: _extraFullTextTerms) {
  658. extraTerms = extraTerms.should(this.parseQueryTerm(qtExtra, discard));
  659. }
  660. queryElements[i] = extraTerms;
  661. _extraFullTextTerms = null; // (reset ready for next term...)
  662. }//TESTED
  663. }//end loop over query terms
  664. queryObj = this.parseLogic(query.logic, queryElements, sQueryElements, querySummary);
  665. if (null == queryObj) { //error parsing logic
  666. throw new RuntimeException("Error parsing logic");
  667. }
  668. }
  669. else { //(QT exists but doesn't have any elements)
  670. queryObj = QueryBuilders.matchAllQuery();
  671. querySummary.append('*');
  672. }
  673. }//TESTED
  674. else {
  675. queryObj = QueryBuilders.matchAllQuery();
  676. querySummary.append('*');
  677. } //(QT not specified)
  678. return queryObj;
  679. }//TESTED (cut/paste from original code)
  680. ////////////////////////////////////////////////////////////////////////
  681. //1] QUERY UTILITIES
  682. ////////////////////////////////////////////////////////////////////////
  683. // 1.0] Stored queries/datasets
  684. // Saved queries (ie the entire dataset)
  685. private ResponsePojo getSavedQueryInstead(String storedQueryNameOrId, String[] communityIdStrs, AdvancedQueryPojo query) {
  686. ResponsePojo rp = null;
  687. ObjectId oid = null;
  688. BasicDBObject jobQuery = null;
  689. try {
  690. oid = new ObjectId(storedQueryNameOrId);
  691. jobQuery = new BasicDBObject(CustomMapReduceJobPojo._id_, oid);
  692. }
  693. catch (Exception e) {
  694. jobQuery = new BasicDBObject(CustomMapReduceJobPojo.jobtitle_, storedQueryNameOrId);
  695. }
  696. CustomMapReduceJobPojo savedJob = CustomMapReduceJobPojo.fromDb(DbManager.getCustom().getLookup().findOne(jobQuery), CustomMapReduceJobPojo.class);
  697. if (null != savedJob) { // Is this even a saved job?
  698. if (null != savedJob.jarURL) {
  699. savedJob = null;
  700. }
  701. }
  702. if (null != savedJob) { // Authorization
  703. boolean auth = false;
  704. String communityIdStrList = Arrays.toString(communityIdStrs);
  705. for (ObjectId commId: savedJob.communityIds) {
  706. if (communityIdStrList.contains(commId.toString())) {
  707. auth = true;
  708. break;
  709. }
  710. }
  711. if (!auth) {
  712. savedJob = null;
  713. }
  714. if (null == savedJob) {
  715. throw new RuntimeException("Can't find saved query, or is a custom job not a query, or authorization error");
  716. }
  717. // OK go get the results of the job
  718. DBCollection coll = DbManager.getCollection(savedJob.getOutputDatabase(), savedJob.outputCollection);
  719. BasicDBObject result = (BasicDBObject) coll.findOne(); // (at some point support multiple saved queries)
  720. if (null == result) {
  721. throw new RuntimeException("Saved query is empty");
  722. }
  723. BasicDBObject apiResultToConvert = (BasicDBObject) result.get("value");
  724. if (null == apiResultToConvert) {
  725. throw new RuntimeException("Saved query has invalid format");
  726. }
  727. rp = ResponsePojo.fromDb(apiResultToConvert);
  728. }
  729. else if (null != oid) { // Support new user/doc queues
  730. SharePojo share = SharePojo.fromDb(DbManager.getSocial().getShare().findOne(jobQuery), SharePojo.class);
  731. if ((null == share) || (null == share.getShare()) ||
  732. (!share.getType().equals(DocumentQueueControlPojo.UserQueue) && !share.getType().equals(DocumentQueueControlPojo.SavedQueryQueue))
  733. )
  734. {
  735. throw new RuntimeException("Can't find saved query, or is a custom job not a query, or authorization error");
  736. }
  737. else { // share.share is a DocumentQueueControlPojo
  738. DocumentQueueControlPojo queue = DocumentQueueControlPojo.fromApi(share.getShare(), DocumentQueueControlPojo.class);
  739. BasicDBObject docQuery1 = new BasicDBObject(DocumentPojo._id_, new BasicDBObject(DbManager.in_, queue.getQueueList()));
  740. BasicDBObject docQuery2 = new BasicDBObject(DocumentPojo.updateId_, new BasicDBObject(DbManager.in_, queue.getQueueList()));
  741. BasicDBObject docQuery = new BasicDBObject(DbManager.or_, Arrays.asList(docQuery1, docQuery2));
  742. DBCursor dbc = DbManager.getDocument().getMetadata().find(docQuery).limit(query.score.numAnalyze);
  743. ScoringUtils scoreStats = new ScoringUtils();
  744. List<BasicDBObject> docs = null;
  745. StatisticsPojo stats = new StatisticsPojo();
  746. stats.setSavedScores(query.output.docs.skip, dbc.count());
  747. try {
  748. boolean lockAcquired = true;
  749. try {
  750. lockAcquired = this.acquireConcurrentAccessLock();
  751. } catch (InterruptedException e) {
  752. //(that's fine just carry on)
  753. lockAcquired = false;
  754. }
  755. if (!lockAcquired) {
  756. rp.setResponse(new ResponseObject("Query", false, "Query engine busy, please try again later."));
  757. return rp;
  758. }
  759. scoreStats.setAliasLookupTable(_aliasLookup);
  760. docs = scoreStats.calcTFIDFAndFilter(DbManager.getDocument().getMetadata(),
  761. dbc, query.score, query.output, stats, false,
  762. query.output.docs.skip, query.output.docs.numReturn,
  763. communityIdStrs,
  764. null, null,
  765. null,
  766. null,
  767. null, null,
  768. null, null);
  769. }
  770. finally {
  771. scoreStats.clearAsMuchMemoryAsPossible();
  772. this.releaseConcurrentAccessLock();
  773. }
  774. rp = new ResponsePojo();
  775. rp.setResponse(new ResponseObject("Query", true, "Saved Query: " + share.getTitle()));
  776. rp.setStats(stats);
  777. if ((null != docs) && (docs.size() > 0)) {
  778. rp.setData(docs, (BasePojoApiMap<BasicDBObject>)null);
  779. }
  780. else { // (ensure there's always an empty list)
  781. docs = new ArrayList<BasicDBObject>(0);
  782. rp.setData(docs, (BasePojoApiMap<BasicDBObject>)null);
  783. }
  784. }//end if user or saved query queue
  785. }
  786. return rp;
  787. }//TESTED
  788. // Stored queries (ie just the query JSON)
  789. static AdvancedQueryPojo getStoredQueryArtefact(String shareIdStr, AdvancedQueryPojo query, String userIdStr) {
  790. ResponsePojo rp2 = new ShareHandler().getShare(userIdStr, shareIdStr, true);
  791. if ((null != rp2.getData() || !rp2.getResponse().isSuccess())) {
  792. SharePojo share = (SharePojo) rp2.getData();
  793. if (null != share) {
  794. if (share.getType().equalsIgnoreCase("dataset")) {
  795. query.input = new com.google.gson.Gson().fromJson(share.getShare(), AdvancedQueryPojo.QueryInputPojo.class);
  796. }
  797. else if (share.getType().equalsIgnoreCase("query")) {
  798. query = new com.google.gson.Gson().fromJson(share.getShare(), AdvancedQueryPojo.class);
  799. }
  800. else { // Unrecognized share
  801. throw new RuntimeException("Unexpected share type: " + share.getType());
  802. }
  803. }
  804. else {
  805. throw new RuntimeException("Invalid return from share: " + rp2.getData().toString());
  806. }
  807. }
  808. else {
  809. throw new RuntimeException(rp2.getResponse().getMessage());
  810. }
  811. return query;
  812. }
  813. ////////////////////////////////////////////////////////////////////////
  814. // 1.1] Source management utility
  815. BoolFilterBuilder parseSourceManagement(AdvancedQueryPojo.QueryInputPojo input) {
  816. BoolFilterBuilder sourceFilter = null;
  817. if ((null != input.tags) || (null != input.typeAndTags)
  818. || (null != input.sources))
  819. {
  820. sourceFilter = FilterBuilders.boolFilter();
  821. }//TESTED
  822. if (null != input.tags) {
  823. sourceFilter = sourceFilter.should(FilterBuilders.termsFilter(DocumentPojo.tags_, input.tags.toArray()));
  824. }//TESTED '{ "input": { "tags": [ "healthcare", "cyber" ] } }'
  825. if (null != input.typeAndTags) {
  826. BoolFilterBuilder typeAndTagFilter = FilterBuilders.boolFilter();
  827. for (AdvancedQueryPojo.QueryInputPojo.TypeAndTagTermPojo tt: input.typeAndTags) {
  828. if (null != tt.tags) {
  829. typeAndTagFilter = typeAndTagFilter.should(
  830. FilterBuilders.boolFilter().must(FilterBuilders.termFilter(DocumentPojo.mediaType_, tt.type)).
  831. must(FilterBuilders.termsFilter(DocumentPojo.tags_, tt.tags.toArray())));
  832. }
  833. else {
  834. typeAndTagFilter = typeAndTagFilter.should(FilterBuilders.termFilter(DocumentPojo.mediaType_, tt.type));
  835. }
  836. }
  837. sourceFilter = sourceFilter.should(typeAndTagFilter);
  838. }//TESTED '{ "input": { "typeAndTags": [ { "type": "Social" }, { "type": "Video", "tags": [ "education", "MIT" ] } ] } }'
  839. if (null != input.sources) {
  840. if ((null == input.srcInclude) || input.srcInclude) {
  841. sourceFilter = sourceFilter.should(FilterBuilders.termsFilter(DocumentPojo.sourceKey_, input.sources.toArray()));
  842. }
  843. else {
  844. sourceFilter = sourceFilter.mustNot(FilterBuilders.termsFilter(DocumentPojo.sourceKey_, input.sources.toArray()));
  845. }
  846. }//TESTED '{ "input": { "srcInclude": false, "sources": [ "http.twitter.com.statuses.public_timeline.atom", "http.gdata.youtube.com.feeds.base.users.mit.uploads.alt=rss.v=2.orderby=published.client=ytapi-youtube-profile" ] } }'
  847. //(also "srcInclude" not set - checked got the complement of the result)
  848. return sourceFilter;
  849. }
  850. ////////////////////////////////////////////////////////////////////////
  851. // 1.X2] Output filter parsing
  852. private BoolFilterBuilder addNegativeSelectorToFilter(EntityFeaturePojo docDiscardAlias, BoolFilterBuilder outputFilter, int recursionLevel) {
  853. if ((null != docDiscardAlias.getAlias()) && !docDiscardAlias.getAlias().isEmpty()) {
  854. if (null == outputFilter) {
  855. outputFilter = FilterBuilders.boolFilter();
  856. }
  857. outputFilter = outputFilter.mustNot(FilterBuilders.nestedFilter(DocumentPojo.entities_,
  858. FilterBuilders.termsFilter(EntityPojo.docQuery_index_, docDiscardAlias.getAlias().toArray())));
  859. if (recursionLevel <= 1) { // (only go two deep for now)
  860. for (String aliasIndex: docDiscardAlias.getAlias()) {
  861. EntityFeaturePojo docDiscardSubAlias = _aliasLookup.getAliases(aliasIndex);
  862. if (null != docDiscardSubAlias) {
  863. outputFilter = addNegativeSelectorToFilter(docDiscardSubAlias, outputFilter, 1 + recursionLevel);
  864. }
  865. }//TESTED
  866. }
  867. }//TESTED (by hand)
  868. if (null != docDiscardAlias.getSemanticLinks()) { // (recall: we've abused this field for text queries)
  869. for (String textQuery: docDiscardAlias.getSemanticLinks()) {
  870. //(probably not a very efficient construct, but nothing about this is efficient, just functional, so we'll leave it for now)
  871. outputFilter = outputFilter.mustNot(FilterBuilders.queryFilter(
  872. CrossVersionQueryBuilders.matchPhraseQuery(DocumentPojo.fullText_, textQuery)));
  873. outputFilter = outputFilter.mustNot(FilterBuilders.queryFilter(
  874. CrossVersionQueryBuilders.matchPhraseQuery("_all", textQuery)));
  875. }
  876. }//TESTED (by hand)
  877. return outputFilter;
  878. }//TESTED
  879. BoolFilterBuilder parseOutputFiltering(String[] entityTypeFilterStrings, String[] assocVerbFilterStrings)
  880. {
  881. BoolFilterBuilder outputFilter = null;
  882. // First off: document discard aliases:
  883. if (null != _aliasLookup) { // Check out the document discard table...
  884. EntityFeaturePojo docDiscardAlias = _aliasLookup.getAliases("DOCUMENT_DISCARD");
  885. if (null != docDiscardAlias) {
  886. outputFilter = addNegativeSelectorToFilter(docDiscardAlias, outputFilter, 0);
  887. }
  888. }//TESTED (by hand, nothing repeatable)
  889. // Other simple filter types:
  890. if (null != entityTypeFilterStrings) {
  891. if ('-' != entityTypeFilterStrings[0].charAt(0)) { // (negative entity type, don't add to filter)
  892. outputFilter = FilterBuilders.boolFilter();
  893. outputFilter.must(FilterBuilders.nestedFilter(DocumentPojo.entities_,
  894. FilterBuilders.termsFilter(EntityPojo.docQuery_type_, entityTypeFilterStrings)));
  895. }
  896. }
  897. if (null != assocVerbFilterStrings) {
  898. if ('-' != assocVerbFilterStrings[0].charAt(0)) { // (negative verb, don't add to filter)
  899. if (null == outputFilter) {
  900. outputFilter = FilterBuilders.boolFilter();
  901. }
  902. BoolFilterBuilder verbFilter = FilterBuilders.boolFilter();
  903. StringBuffer sb = new StringBuffer();
  904. for (String assocVerb: assocVerbFilterStrings) {
  905. sb.setLength(0);
  906. sb.append('"').append(assocVerb).append('"');
  907. verbFilter.should(FilterBuilders.nestedFilter(DocumentPojo.associations_,
  908. QueryBuilders.queryString(sb.toString()).field(AssociationPojo.docQuery_verb_category_)));
  909. //(closest to exact that we can manage, obv verb_cat should actually be not_analyzed)
  910. }
  911. outputFilter.must(verbFilter);
  912. }
  913. }
  914. return outputFilter;
  915. }//TESTED
  916. ////////////////////////////////////////////////////////////////////////
  917. // 1.2] Query term parsing
  918. // (Not needed any more, but kept here for illustrative purposes)
  919. //private static Pattern _luceneExactPattern = Pattern.compile("([\"+~*?:|&(){}\\[\\]\\^\\!\\-\\\\ ])");
  920. private BaseQueryBuilder parseQueryTerm(AdvancedQueryPojo.QueryTermPojo qt, StringBuffer sQueryTerm) {
  921. BaseQueryBuilder term = null;
  922. BoolQueryBuilder boolTerm = null;
  923. sQueryTerm.setLength(0);
  924. sQueryTerm.append('(');
  925. // 1.1] Free text (Lucene)
  926. boolean nonEmpty = false;
  927. if (null != qt.ftext) { // NOTE term building code below depends on this being 1st clause
  928. nonEmpty = true;
  929. if (qt.ftext.startsWith("$cache")) { // currently not supported
  930. throw new RuntimeException("Don't currently support nested cached queries - coming soon.");
  931. }
  932. sQueryTerm.append('(');
  933. if (null != qt.metadataField) {
  934. sQueryTerm.append(qt.metadataField).append(':');
  935. }
  936. sQueryTerm.append(qt.ftext);
  937. sQueryTerm.append(')');
  938. if (null != qt.metadataField) { // Metadata only
  939. term = QueryBuilders.queryString(qt.ftext).field(qt.metadataField);
  940. }
  941. else {
  942. term = QueryBuilders.queryString(qt.ftext).field("_all").field(DocumentPojo.fullText_);
  943. }
  944. if (null == _scoringParams.adjustAggregateSig) { // auto-decide .. if ftext is set and is non-trivial
  945. if (qt.ftext.contains(" ")) {
  946. _scoringParams.adjustAggregateSig = true;
  947. }
  948. }
  949. }//TESTED (logic0)
  950. // 1.2] Exact text
  951. if (null != qt.etext) { // NOTE term building code below depends on this being 2nd clause
  952. nonEmpty = true;
  953. BaseQueryBuilder termQ = null;
  954. if (sQueryTerm.length() > 1) {
  955. sQueryTerm.append(" AND ");
  956. }
  957. if (qt.etext.equals("*")) { // Special case
  958. termQ = QueryBuilders.matchAllQuery();
  959. }
  960. else { // Normal query
  961. if (null != qt.metadataField) { // Metadata only
  962. termQ = CrossVersionQueryBuilders.matchPhraseQuery(qt.metadataField, qt.etext);
  963. }
  964. else { // Normal query
  965. termQ = QueryBuilders.boolQuery().
  966. should(CrossVersionQueryBuilders.matchPhraseQuery("_all", qt.etext)).
  967. should(CrossVersionQueryBuilders.matchPhraseQuery(DocumentPojo.fullText_, qt.etext));
  968. }
  969. }
  970. sQueryTerm.append('(');
  971. if (null != qt.metadataField) {
  972. sQueryTerm.append(qt.metadataField).append(':');
  973. }
  974. sQueryTerm.append('"');
  975. sQueryTerm.append(qt.etext);
  976. sQueryTerm.append("\")");
  977. if (null == term) {
  978. term = termQ;
  979. }
  980. else {
  981. term = (boolTerm = QueryBuilders.boolQuery().must(term).must(termQ));
  982. }
  983. }//TESTED (logic1)
  984. // Here's where it starts getting interesting:
  985. // 1.3] Entity
  986. if ((null != qt.entity) || (null != qt.entityValue) || ((null == qt.assoc) && (null != qt.sentiment))) { // (if no association specified then sentiment applies to entities)
  987. nonEmpty = true;
  988. if (sQueryTerm.length() > 1) {
  989. sQueryTerm.append(" AND ");
  990. }
  991. sQueryTerm.append('(');
  992. BaseQueryBuilder termQ = QueryBuilders.nestedQuery(DocumentPojo.entities_, this.parseEntityTerm(qt, sQueryTerm, EntityPojo.docQuery_index_)).scoreMode("max").boost((float)1.0);
  993. if (null == term) {
  994. term = termQ;
  995. }
  996. else if (null == boolTerm) {
  997. term = (boolTerm = QueryBuilders.boolQuery().must(term).must(termQ));
  998. }
  999. else {
  1000. term = (boolTerm = boolTerm.must(termQ));
  1001. }
  1002. sQueryTerm.append(')');
  1003. }//TESTED: logic2* TOTEST: alias expansion code (logic3)
  1004. // 1.4] Dates
  1005. if (null != qt.time) {
  1006. nonEmpty = true;
  1007. if (sQueryTerm.length() > 1) {
  1008. sQueryTerm.append(" AND ");
  1009. }
  1010. sQueryTerm.append('(');
  1011. BaseQueryBuilder termQ = this.parseDateTerm(qt.time, sQueryTerm);
  1012. if (null == term) {
  1013. term = termQ;
  1014. }
  1015. else if (null == boolTerm) {
  1016. term = (boolTerm = QueryBuilders.boolQuery().must(term).must(termQ));
  1017. }
  1018. else {
  1019. term = (boolTerm = boolTerm.must(termQ));
  1020. }
  1021. sQueryTerm.append(')');
  1022. }//TESTED (logic5-10)
  1023. // 1.5] Geo
  1024. if (null != qt.geo)
  1025. {
  1026. nonEmpty = true;
  1027. if (sQueryTerm.length() > 1)
  1028. {
  1029. sQueryTerm.append(" AND ");
  1030. }
  1031. sQueryTerm.append('(');
  1032. BaseQueryBuilder termQ = this.parseGeoTerm(qt.geo, sQueryTerm, GeoParseField.ALL);
  1033. if (null != termQ)
  1034. {
  1035. if (null == term)
  1036. {
  1037. term = termQ;
  1038. }
  1039. else if (null == boolTerm)
  1040. {
  1041. term = (boolTerm = QueryBuilders.boolQuery().must(term).must(termQ));
  1042. }
  1043. else
  1044. {
  1045. term = (boolTerm = boolTerm.must(termQ));
  1046. }
  1047. }
  1048. sQueryTerm.append(')');
  1049. } // (end geo)
  1050. if (null == qt.assoc) qt.assoc = qt.event;
  1051. //(continue to support the old "event" name for another release)
  1052. if (null != qt.assoc) {
  1053. nonEmpty = true;
  1054. if (sQueryTerm.length() > 1) {
  1055. sQueryTerm.append(" AND ");
  1056. }
  1057. sQueryTerm.append('(');
  1058. BaseQueryBuilder termQ = QueryBuilders.nestedQuery(DocumentPojo.associations_, this.parseAssociationTerm(qt.assoc, qt.sentiment, sQueryTerm));
  1059. if (null != termQ) {
  1060. if (null == term) {
  1061. term = termQ;
  1062. }
  1063. else if (null == boolTerm) {
  1064. term = (boolTerm = QueryBuilders.boolQuery().must(term).must(termQ));
  1065. }
  1066. else {
  1067. term = (boolTerm = boolTerm.must(termQ));
  1068. }
  1069. }
  1070. sQueryTerm.append(')');
  1071. } // (end event)
  1072. if (!nonEmpty) {
  1073. throw new RuntimeException("One+ of your query terms is empty!");
  1074. }//TOTEST
  1075. sQueryTerm.append(')');
  1076. return term;
  1077. }//TESTED (logic*) TOTEST event logic
  1078. //TESTED: each of the above cases with the following GUI commands:
  1079. // infiniteService.send('{"raw": { "match_all": {} } }');
  1080. // infiniteService.send('{ "input": { "tags": [ "healthcare", "cyber" ] } }');
  1081. // infiniteService.send('{ "input": { "typeAndTags": [ { "type": "Social" }, { "type": "Video", "tags": [ "education", "MIT" ] } ] } }');
  1082. // infiniteService.send('{ "input": { "typeAndTags": [ { "type": "Social" }, { "type": "Video", "tags": [ "education", "MIT" ] } ] } }');
  1083. // infiniteService.send('{ "input": { "sources": [ "http.twitter.com.statuses.public_timeline.atom", "http.gdata.youtube.com.feeds.base.users.mit.uploads.alt=rss.v=2.orderby=published.client=ytapi-youtube-profile" ] } }');
  1084. // infiniteService.send('{ "input": { "srcInclude": false, "sources": [ "http.twitter.com.statuses.public_timeline.atom", "http.gdata.youtube.com.feeds.base.users.mit.uploads.alt=rss.v=2.orderby=published.client=ytapi-youtube-profile" ] } }');
  1085. // infiniteService.send('{ "qt": [ { "etext":"barack obama" } ] }'); // (148 results)
  1086. // infiniteService.send('{ "qt": [ { "ftext":"barack obama" } ] }'); // (790 results)
  1087. // infiniteService.send('{ "qt": [ { "ftext":"+barack +obama" } ] }'); // (151 results)
  1088. // infiniteService.send('{ "qt": [ { "entity":"barack obama/person" } ] }'); // (132 results)
  1089. // infiniteService.send('{ "qt": [ { "time": { "min": "20100301", "max": "20100310" } } ] }'); // (worked - by inspection of timeline)
  1090. // infiniteService.send('{ "qt": [ { "geo": { "centerll": "(29.9569263889,15.7460923611)", "dist": "100mi" } } ] }'); //(259 results)
  1091. // infiniteService.send('{ "qt": [ { "geo": { "minll": "(28.9569263889,14.7460923611)", "maxll": "(30.9569263889,16.7460923611)" } } ] }'); //(259 results)
  1092. ////////////////////////////////////////////////////////////////////////
  1093. // 1.2.1] Entity term parsing
  1094. BaseQueryBuilder parseEntityTerm(AdvancedQueryPojo.QueryTermPojo qt, StringBuffer sQueryTerm, String sFieldName)
  1095. {
  1096. BaseQueryBuilder termQ = null;
  1097. // 1.3a] Entity decomposed
  1098. if (null != qt.entityValue)
  1099. {
  1100. qt.entityValue = qt.entityValue.toLowerCase();
  1101. if (null == qt.entityType) { // Allow arbitrary types
  1102. termQ = QueryBuilders.prefixQuery(sFieldName, qt.entityValue + "/");
  1103. sQueryTerm.append(sFieldName).append(":\"").append(qt.entityValue).append("/*\"");
  1104. }
  1105. else { //Equivalent to above
  1106. qt.entityType = qt.entityType.toLowerCase();
  1107. qt.entity = qt.entityValue + "/" + qt.entityType;
  1108. }
  1109. }//TESTED (use logic3f, logic3e)
  1110. // 1.3b] Entity index
  1111. else if (null != qt.entity)
  1112. {
  1113. qt.entity = qt.entity.toLowerCase();
  1114. int nIndex1 = qt.entity.lastIndexOf(':');
  1115. int nIndex2 = qt.entity.lastIndexOf('/');
  1116. if (nIndex1 > nIndex2) {
  1117. qt.entity = qt.entity.substring(0, nIndex1) + "/" + qt.entity.substring(nIndex1 + 1);
  1118. }//TESTED logic2
  1119. }//TESTED: logic2
  1120. // 1.3c] Logic
  1121. if ((null == termQ) && (null != qt.entity)) { // entities.index or fully-specified value,type
  1122. sQueryTerm.append(sFieldName).append(":\"").append(qt.entity).append('"');
  1123. // Just leave this fixed for entity expansion since we don't really support events anyway
  1124. // we'll have to sort this out later
  1125. BoolQueryBuilder termBoolQ = null;
  1126. if ((null != qt.entityOpt) && qt.entityOpt.expandAlias) {
  1127. // Alias expansion code
  1128. // Easy bit:
  1129. termBoolQ = QueryBuilders.boolQuery().should(QueryBuilders.termQuery(sFieldName, qt.entity));
  1130. // Interesting bit:
  1131. if (null != _tmpAliasMap) {
  1132. String[] terms = _tmpAliasMap.get(qt.entity).toArray(new String[0]);
  1133. if ((null != terms) && (terms.length > 0)) {
  1134. termQ = termBoolQ.should(QueryBuilders.termsQuery(EntityPojo.docQuery_actual_name_, terms));
  1135. sQueryTerm.append(" OR actual_name:$aliases");
  1136. }
  1137. }
  1138. }//TESTED logic3a,b,f - actual_name expansion
  1139. boolean isKeyword = false;
  1140. if (null != qt.entity) {
  1141. isKeyword = qt.entity.endsWith("/keyword");
  1142. }
  1143. else if (null != qt.entityType) {
  1144. isKeyword = qt.entityType.equalsIgnoreCase("Keyword");
  1145. }
  1146. if (((null != qt.entityOpt) && qt.entityOpt.rawText) || isKeyword)
  1147. {
  1148. if (null == this._extraFullTextTerms) {
  1149. _extraFullTextTerms = new LinkedList<AdvancedQueryPojo.QueryTermPojo>();
  1150. }
  1151. String dName = qt.entityValue;
  1152. if (null == dName) { // Derive disambiguated name if not directly specified
  1153. int nIndex2 = qt.entity.lastIndexOf('/');
  1154. dName = qt.entity.substring(0, nIndex2);
  1155. }//TESTED
  1156. if (EntityPojo.docQuery_index_ == sFieldName) { // (note: can use pointers here)
  1157. AdvancedQueryPojo.QueryTermPojo qtExtra = new AdvancedQueryPojo.QueryTermPojo();
  1158. qtExtra.etext = dName;
  1159. _extraFullTextTerms.add(qtExtra);
  1160. sQueryTerm.append(" OR ((\"").append(dName).append('"').append("))");
  1161. }
  1162. else if (AssociationPojo.docQuery_geo_index_ != sFieldName) { // (geo has no non-indexed form)
  1163. String nonIndexField = (AssociationPojo.docQuery_entity1_index_ == sFieldName) ? AssociationPojo.docQuery_entity1_ : AssociationPojo.docQuery_entity2_;
  1164. if (null == termBoolQ) {
  1165. termBoolQ = QueryBuilders.boolQuery();
  1166. }
  1167. termQ = termBoolQ = termBoolQ.should(CrossVersionQueryBuilders.matchPhraseQuery(nonIndexField, dName));
  1168. sQueryTerm.append(" OR ").append(nonIndexField).append(":\"").append(dName).append('"');
  1169. }//TESTED
  1170. }
  1171. //TESTED (entity+association) - entity options, add dname as exact text query
  1172. if (null != _aliasLookup) {
  1173. EntityFeaturePojo masterAlias = _aliasLookup.getAliases(qt.entity);
  1174. // (need to do it this way round to get the semantic links)
  1175. if (null != masterAlias) {
  1176. if (null == termBoolQ) {
  1177. termBoolQ = QueryBuilders.boolQuery();
  1178. }
  1179. sQueryTerm.append(" OR ").append(sFieldName).append(":$manual_aliases");
  1180. termBoolQ = termBoolQ.should(QueryBuilders.termQuery(sFieldName, qt.entity));
  1181. termQ = termBoolQ = termBoolQ.should(QueryBuilders.termsQuery(sFieldName, masterAlias.getAlias().toArray()));
  1182. // If want to add manual aliases as full text also...
  1183. if ((null != qt.entityOpt) && qt.entityOpt.rawText) {
  1184. if (null == this._extraFullTextTerms) {
  1185. _extraFullTextTerms = new LinkedList<AdvancedQueryPojo.QueryTermPojo>();
  1186. }
  1187. String nonIndexField = null;
  1188. if (AssociationPojo.docQuery_geo_index_ != sFieldName) { // (geo has no non-indexed form)
  1189. nonIndexField = (AssociationPojo.docQuery_entity1_index_ == sFieldName) ? AssociationPojo.docQuery_entity1_ : AssociationPojo.docQuery_entity2_;
  1190. }
  1191. // (slightly annoying because I have to derive the dnames for all of them)
  1192. for (String alias: masterAlias.getAlias()) {
  1193. int nIndex2 = alias.lastIndexOf('/');
  1194. String dName = alias.substring(0, nIndex2);
  1195. if (EntityPojo.docQuery_index_ == sFieldName) { // (note: can use pointers here)
  1196. AdvancedQueryPojo.QueryTermPojo qtExtra = new AdvancedQueryPojo.QueryTermPojo();
  1197. qtExtra.etext = dName;
  1198. _extraFullTextTerms.add(qtExtra);
  1199. }
  1200. else if (null != nonIndexField) {
  1201. termQ = termBoolQ = termBoolQ.should(CrossVersionQueryBuilders.matchPhraseQuery(nonIndexField, dName));
  1202. }
  1203. }
  1204. if (EntityPojo.docQuery_index_ == sFieldName) { // (note: can use pointers here)
  1205. sQueryTerm.append(" OR (($manual_aliases").append("))");
  1206. }
  1207. else if (null != nonIndexField) {
  1208. sQueryTerm.append(" OR ").append(nonIndexField).append(":$manual_aliases");
  1209. }
  1210. }//TESTED (entity + association) - aliases #1
  1211. // Recall: we're abusing linkdata to contain aliases:
  1212. if ((null != masterAlias.getSemanticLinks()) && !masterAlias.getSemanticLinks().isEmpty()) {
  1213. String nonIndexField = null;
  1214. if (AssociationPojo.docQuery_geo_index_ != sFieldName) { // (geo has no non-indexed form)
  1215. nonIndexField = (AssociationPojo.docQuery_entity1_index_ == sFieldName) ? AssociationPojo.docQuery_entity1_ : AssociationPojo.docQuery_entity2_;
  1216. }
  1217. if (null == this._extraFullTextTerms) {
  1218. _extraFullTextTerms = new LinkedList<AdvancedQueryPojo.QueryTermPojo>();
  1219. }
  1220. for (String textAlias: masterAlias.getSemanticLinks()) {
  1221. if (EntityPojo.docQuery_index_ == sFieldName) { // (note: can use pointers here)
  1222. AdvancedQueryPojo.QueryTermPojo qtExtra = new AdvancedQueryPojo.QueryTermPojo();
  1223. qtExtra.etext = textAlias;
  1224. _extraFullTextTerms.add(qtExtra);
  1225. }
  1226. else if (null != nonIndexField) {
  1227. termQ = termBoolQ = termBoolQ.should(CrossVersionQueryBuilders.matchPhraseQuery(nonIndexField, textAlias));
  1228. }
  1229. }
  1230. if (EntityPojo.docQuery_index_ == sFieldName) { // (note: can use pointers here)
  1231. sQueryTerm.append(" OR (($manual_text_aliases").append("))");
  1232. }
  1233. else if (null != nonIndexField) {
  1234. sQueryTerm.append(" OR ").append(nonIndexField).append(":$manual_text_aliases");
  1235. }
  1236. }//TESTED (entity + association) - aliases #2
  1237. }
  1238. }//TESTED (by hand) - alias lookups
  1239. if (null == termQ) {
  1240. termQ = QueryBuilders.termQuery(sFieldName, qt.entity);
  1241. }
  1242. } //TESTED logic3* - end if entity options (apart from sentiment, which is handled below)
  1243. // Sentiment... only apply for entities (association sentiment is handled in parseAssociationTerm)
  1244. if ((null != qt.sentiment) && (EntityPojo.docQuery_index_ == sFieldName)) { // (note: can use pointers here)
  1245. RangeQueryBuilder sentimentQ = QueryBuilders.rangeQuery(EntityPojo.docQuery_sentiment_);
  1246. if (null != qt.sentiment.min) {
  1247. sentimentQ.from(qt.sentiment.min);
  1248. }
  1249. if (null != qt.sentiment.max) {
  1250. sentimentQ.to(qt.sentiment.max);
  1251. }
  1252. if (null == termQ) {
  1253. termQ = sentimentQ;
  1254. sQueryTerm.append("sentiment:[").append(qt.sentiment.min).append(',').append(qt.sentiment.max).append(']');
  1255. }
  1256. else {
  1257. termQ = QueryBuilders.boolQuery().must(termQ).must(sentimentQ);
  1258. sQueryTerm.append(" AND ").append("sentiment:[").append(qt.sentiment.min).append(',').append(qt.sentiment.max).append(']');
  1259. // (don't mind the nulls in the range)
  1260. }
  1261. }//TESTED (combined sentiment and standalone sentiment)
  1262. return termQ;
  1263. }
  1264. /////////////////////////////////////
  1265. private Set<String> _tmpEntityExpansionList = null;
  1266. private Map<String, Set<String>> _tmpAliasMap = null;
  1267. // 1.2.1.2] Utility function for alias expansion
  1268. void handleEntityExpansion(DBCollection entityFeatureDb, List<AdvancedQueryPojo.QueryTermPojo> qtList, String userIdStr, String communityIdList) {
  1269. for (AdvancedQueryPojo.QueryTermPojo qt: qtList) {
  1270. if ((null != qt.entityOpt) && qt.entityOpt.expandAlias) {
  1271. String s = null;
  1272. if (null != qt.entity) {
  1273. int nIndex1 = qt.entity.lastIndexOf(':');
  1274. int nIndex2 = qt.entity.lastIndexOf('/');
  1275. if (nIndex1 > nIndex2) {
  1276. s = qt.entity.substring(0, nIndex1) + "/" + qt.entity.substring(nIndex1 + 1);
  1277. }//TESTED logic2 (cut and paste)
  1278. else {
  1279. s = qt.entity;
  1280. }
  1281. }
  1282. else if ((null != qt.entityValue) && (null != qt.entityType)) {
  1283. s = qt.entityValue + "/" + qt.entityType;
  1284. }
  1285. if (null != s) {
  1286. if (null == _tmpEntityExpansionList) {
  1287. _tmpEntityExpansionList = new TreeSet<String>();
  1288. }
  1289. _tmpEntityExpansionList.add(s);
  1290. }
  1291. }//(end if alias specified)
  1292. } // (end loop over query terms)
  1293. if (null != _tmpEntityExpansionList) {
  1294. try {
  1295. _tmpAliasMap = SearchHandler.findAliases(entityFeatureDb, EntityPojo.index_, _tmpEntityExpansionList, userIdStr, communityIdList);
  1296. //(NOTE: this is intentionally _not_ EntityPojo.docQuery_index, that's just for referencing nested queries/filters in elasticsearch)
  1297. }
  1298. catch (Exception e) {
  1299. // Just carry on without expansion
  1300. }
  1301. }
  1302. } //TESTED (logic3 - cases: {entity, entityValue+entityType, entityValue, none of above})
  1303. ////////////////////////////////////////////////////////////////////////
  1304. // 1.2.2] Date term parsing
  1305. private long _nNow = 0;
  1306. BaseQueryBuilder parseDateTerm(AdvancedQueryPojo.QueryTermPojo.TimeTermPojo time, StringBuffer sQueryTerm)
  1307. {
  1308. return parseDateTerm(time, sQueryTerm, DocumentPojo.publishedDate_);
  1309. }
  1310. private BaseQueryBuilder parseDateTerm(AdvancedQueryPojo.QueryTermPojo.TimeTermPojo time, StringBuffer sQueryTerm, String sFieldName)
  1311. {
  1312. BaseQueryBuilder termQ = null;
  1313. long nMinTime = 0L;
  1314. long nMaxTime = _nNow;
  1315. Interval interval = parseMinMaxDates(time, nMinTime, nMaxTime);
  1316. nMinTime = interval.getStartMillis();
  1317. nMaxTime = interval.getEndMillis();
  1318. termQ = QueryBuilders.constantScoreQuery(
  1319. FilterBuilders.numericRangeFilter(sFieldName).from(nMinTime).to(nMaxTime)).boost((float)1.0);
  1320. sQueryTerm.append(sFieldName).append(":[").
  1321. append(0==nMinTime?"0":new Date(nMinTime)).append(" TO ").append(new Date(nMaxTime)).append(']');
  1322. return termQ;
  1323. }
  1324. // Don't currently have a use for this guy - would be part of a combined time query?
  1325. // private BaseQueryBuilder parseMonthTerm(AdvancedQueryPojo.QueryTermPojo.TimeTermPojo time, StringBuffer sQueryTerm, String sFieldName)
  1326. // {
  1327. // BaseQueryBuilder termQ = null;
  1328. // long nMinTime = 0L;
  1329. // long nMaxTime = _nNow;
  1330. // Interval interval = parseMinMaxDates(time, nMinTime, nMaxTime);
  1331. // nMinTime = interval.getStartMillis();
  1332. // nMaxTime = interval.getEndMillis();
  1333. //
  1334. // // Convert min/max dates to YYYYMM
  1335. // Calendar c = Calendar.getInstance();
  1336. // c.setTime(new Date(nMinTime));
  1337. // nMinTime = (c.get(Calendar.YEAR)*100 + c.get(Calendar.MONTH)+1L);
  1338. // c.setTime(new Date(nMaxTime));
  1339. // nMaxTime = (c.get(Calendar.YEAR)*100 + c.get(Calendar.MONTH)+1L);
  1340. //
  1341. // termQ = QueryBuilders.constantScoreQuery(
  1342. // FilterBuilders.numericRangeFilter(sFieldName).from(nMinTime).to(nMaxTime)).boost((float)1.0);
  1343. //
  1344. // sQueryTerm.append("association.").append(sFieldName).append(":[").
  1345. // append(nMinTime).append(" TO ").append(nMaxTime).append(']');
  1346. //
  1347. // return termQ;
  1348. // }
  1349. // 1.2.2.1] Even lower level date parsing
  1350. public static Interval parseMinMaxDates(AdvancedQueryPojo.QueryTermPojo.TimeTermPojo time, long nMinTime, long nMaxTime) {
  1351. if ((null != time.min) && (time.min.length() > 0)) {
  1352. if (time.min.equals("now")) {
  1353. nMinTime = nMaxTime;
  1354. }
  1355. else if (time.min.startsWith("now")) { // now+N[hdmy]
  1356. // now+Xi or now-Xi
  1357. long sgn = 1L;
  1358. if ('-' == time.min.charAt(3)) { //now+ or now-
  1359. sgn = -1L;
  1360. }
  1361. long offset = sgn*getInterval(time.min.substring(4), 'd'); // (default interval is day)
  1362. nMinTime = nMaxTime + offset; // (maxtime is now)
  1363. }
  1364. else if (time.min.equals("midnight")) {
  1365. nMinTime = DateUtils.truncate(new Date(nMaxTime), Calendar.DAY_OF_MONTH).getTime();
  1366. }
  1367. else if (time.min.startsWith("midnight")) { // midnight+N[hdmy]
  1368. // midnight+Xi or midnight-Xi
  1369. long sgn = 1L;
  1370. if ('-' == time.min.charAt(8)) { //now+ or now-
  1371. sgn = -1L;
  1372. }
  1373. long offset = sgn*getInterval(time.min.substring(9), 'd'); // (default interval is day)
  1374. nMinTime = DateUtils.truncate(new Date(nMaxTime), Calendar.DAY_OF_MONTH).getTime() + offset; // (maxtime is now)
  1375. }
  1376. else {
  1377. try {
  1378. nMinTime = Long.parseLong(time.min); // (unix time format)
  1379. if (nMinTime <= 99999999) { // More likely to be YYYYMMDD
  1380. // OK try a bunch of common date parsing formats
  1381. nMinTime = parseDate(time.min);
  1382. } // TESTED for nMaxTime
  1383. }
  1384. catch (NumberFormatException e) {
  1385. // OK try a bunch of common date parsing formats
  1386. nMinTime = parseDate(time.min);
  1387. }
  1388. }
  1389. }
  1390. if ((null != time.max) && (time.max.length() > 0)) {
  1391. if (time.max.equals("midnight")) {
  1392. nMaxTime = DateUtils.truncate(new Date(nMaxTime), Calendar.DAY_OF_MONTH).getTime();
  1393. }
  1394. else if (time.max.startsWith("midnight")) { // midnight+N[hdmy]
  1395. // midnight+Xi or midnight-Xi
  1396. long sgn = 1L;
  1397. if ('-' == time.max.charAt(8)) { //now+ or now-
  1398. sgn = -1L;
  1399. }
  1400. long offset = sgn*getInterval(time.min.substring(9), 'd'); // (default interval is day)
  1401. nMaxTime = DateUtils.truncate(new Date(nMaxTime), Calendar.DAY_OF_MONTH).getTime() + offset; // (maxtime is now)
  1402. }
  1403. else if (!time.max.equals("now")) { // (What we have by default)
  1404. if (time.max.startsWith("now")) { // now+N[hdmy]
  1405. // now+Xi or now-Xi
  1406. long sgn = 1L;
  1407. if ('-' == time.max.charAt(3)) { //now+ or now-
  1408. sgn = -1L;
  1409. }
  1410. long offset = sgn*getInterval(time.max.substring(4), 'd'); // (default interval is day)
  1411. nMaxTime = nMaxTime + offset; // (maxtime is now)
  1412. }
  1413. else {
  1414. try {
  1415. nMaxTime = Long.parseLong(time.max); // (unix time format)
  1416. if (nMaxTime <= 99999999) { // More likely to be YYYYMMDD
  1417. // OK try a bunch of common date parsing formats
  1418. nMaxTime = parseDate(time.max);
  1419. // max time, so should be 24h-1s ahead ...
  1420. nMaxTime = nMaxTime - (nMaxTime % (24*3600*1000));
  1421. nMaxTime += 24*3600*1000 - 1;
  1422. } //TESTED (logic5, logic10 for maxtime)
  1423. }
  1424. catch (NumberFormatException e) {
  1425. // OK try a bunch of common date parsing formats
  1426. nMaxTime = parseDate(time.max);
  1427. // If day only is specified, should be the entire day...
  1428. if (!time.max.contains(":")) {
  1429. nMaxTime = nMaxTime - (nMaxTime % (24*3600*1000));
  1430. nMaxTime += 24*3600*1000 - 1;
  1431. }
  1432. }//TOTEST max time
  1433. }
  1434. }
  1435. } //TESTED (logic5)
  1436. return new Interval(nMinTime, nMaxTime);
  1437. }
  1438. ////////////////////////////////////////////////////////////////////////
  1439. // 1.2.2] Geo term parsing
  1440. //(the fieldName is always locs normally, geotag for child events, events.geotag for subevents)
  1441. /**
  1442. * Parses the GeoTermPojo arguments into a lucene query. Currently there are multiple geo options
  1443. *
  1444. * 1. Center lat/lng with radius (args centerll && dist)
  1445. * 2. Bouding box, top left, bottom right corners (args minll, maxll)
  1446. * NOT IMPLEMENTED 3. Geo name search (args name, OPTIONAL dist)
  1447. * NOT IMPLEMENTED 4. Polygon search (args poly OPTIONAL dist)
  1448. *
  1449. * OPTIONAL for all arg ontology_type (will apply a heuristic search only grabbing onts that level and below
  1450. *
  1451. */
  1452. BaseQueryBuilder parseGeoTerm(AdvancedQueryPojo.QueryTermPojo.GeoTermPojo geo, StringBuffer sQueryTerm, GeoParseField parseFields)
  1453. {
  1454. BoolQueryBuilder boolQ = QueryBuilders.boolQuery().minimumNumberShouldMatch(1);
  1455. List<String> ont_terms = null;
  1456. //Get ontology types
  1457. if ( null != geo.ontology_type )
  1458. {
  1459. //get all ontology terms we are looking for
  1460. ont_terms = GeoOntologyMapping.getOntologyList(geo.ontology_type);
  1461. }
  1462. else
  1463. {
  1464. ont_terms = GeoOntologyMapping.getOntologyList(null);
  1465. }
  1466. if ((null != geo.centerll) && (null != geo.dist))
  1467. {
  1468. double lat, lon;
  1469. if ('(' == geo.centerll.charAt(0)) {
  1470. geo.centerll = geo.centerll.substring(1, geo.centerll.length() - 1);
  1471. }
  1472. String[] latlon = geo.centerll.split("\\s*,\\s*");
  1473. if (2 == latlon.length)
  1474. {
  1475. lat = Double.parseDouble(latlon[0]);
  1476. lon = Double.parseDouble(latlon[1]);
  1477. char c = geo.dist.charAt(geo.dist.length() - 1);
  1478. if ((c < 0x30) || (c > 0x39)) // not a digit, difference calculation is different
  1479. {
  1480. //ENT
  1481. //Add in ontology_type if necessary
  1482. //in the end this results in query = CURR_GEO_QUERY AND (ONT_TYPE = [ont1 OR ont2 OR ont3])
  1483. if ( parseFields == GeoParseField.ALL || parseFields == GeoParseField.ENT )
  1484. {
  1485. //use a 2nd variable so we dont have to keep casting termQ to BoolQuery
  1486. BoolQueryBuilder subQ = QueryBuilders.boolQuery().must(QueryBuilders.constantScoreQuery(FilterBuilders.geoDistanceFilter(EntityPojo.docQuery_geotag_).distance(geo.dist).point(lat, lon)).boost(1.0F));
  1487. subQ.must(QueryBuilders.termQuery(EntityPojo.docQuery_ontology_type_, ont_terms.toArray()));
  1488. boolQ.should(QueryBuilders.nestedQuery(DocumentPojo.entities_, subQ).scoreMode("max").boost((float)1.0));
  1489. }
  1490. //ASSOC AND DOCGEO (only added if ont is point or null)
  1491. if ( parseFields == GeoParseField.ALL || parseFields == GeoParseField.ASSOC )
  1492. boolQ.should(QueryBuilders.nestedQuery(DocumentPojo.associations_, FilterBuilders.geoDistanceFilter(AssociationPojo.docQuery_geotag_).distance(geo.dist).point(lat, lon)).scoreMode("max").boost((float)1.0));
  1493. if ( parseFields == GeoParseField.ALL || parseFields == GeoParseField.DOC )
  1494. boolQ.should(QueryBuilders.constantScoreQuery(FilterBuilders.geoDistanceFilter(DocumentPojo.docGeo_).distance(geo.dist).point(lat, lon)));
  1495. }
  1496. else // (identical to the above except geo distance parsing is different)
  1497. {
  1498. //ENT
  1499. //Add in ontology_type if necessary
  1500. //in the end this results in query = CURR_GEO_QUERY AND (ONT_TYPE = [ont1 OR ont2 OR ont3])
  1501. if ( parseFields == GeoParseField.ALL || parseFields == GeoParseField.ENT )
  1502. {
  1503. //use a 2nd variable so we dont have to keep casting termQ to BoolQuery
  1504. BoolQueryBuilder subQ = QueryBuilders.boolQuery().must(QueryBuilders.constantScoreQuery(FilterBuilders.geoDistanceFilter(EntityPojo.docQuery_geotag_).distance(Double.parseDouble(geo.dist), DistanceUnit.KILOMETERS).point(lat, lon)).boost(1.0F));
  1505. subQ.must(QueryBuilders.termsQuery(EntityPojo.docQuery_ontology_type_, ont_terms.toArray()));
  1506. boolQ.should(QueryBuilders.nestedQuery(DocumentPojo.entities_, subQ).scoreMode("max").boost((float)1.0));
  1507. }
  1508. //ASSOC AND DOCGEO (only added if ont is point or null)
  1509. if ( parseFields == GeoParseField.ALL || parseFields == GeoParseField.ASSOC )
  1510. boolQ.should(QueryBuilders.nestedQuery(DocumentPojo.associations_, FilterBuilders.geoDistanceFilter(AssociationPojo.docQuery_geotag_).distance(Double.parseDouble(geo.dist), DistanceUnit.KILOMETERS).point(lat, lon)).scoreMode("max").boost((float)1.0));
  1511. if ( parseFields == GeoParseField.ALL || parseFields == GeoParseField.DOC )
  1512. boolQ.should(QueryBuilders.constantScoreQuery(FilterBuilders.geoDistanceFilter(DocumentPojo.docGeo_).distance(Double.parseDouble(geo.dist), DistanceUnit.KILOMETERS).point(lat, lon)));
  1513. }
  1514. sQueryTerm.append("dist(*.geotag, (").append(geo.centerll).append(")) < ").append(geo.dist);
  1515. }
  1516. }//TESTED logic11,logic12
  1517. else if ((null != geo.minll) && (null != geo.maxll))
  1518. {
  1519. double latmin, lonmin, latmax, lonmax;
  1520. if ('(' == geo.minll.charAt(0)) {
  1521. geo.minll = geo.minll.substring(1, geo.minll.length() - 1);
  1522. }
  1523. String[] latlon1 = geo.minll.split("\\s*,\\s*");
  1524. if ('(' == geo.maxll.charAt(0)) {
  1525. geo.maxll = geo.maxll.substring(1, geo.maxll.length() - 1);
  1526. }
  1527. String[] latlon2 = geo.maxll.split("\\s*,\\s*");
  1528. if ((2 == latlon1.length) && (2 == latlon2.length))
  1529. {
  1530. latmin = Double.parseDouble(latlon1[0]);
  1531. lonmin = Double.parseDouble(latlon1[1]);
  1532. latmax = Double.parseDouble(latlon2[0]);
  1533. lonmax = Double.parseDouble(latlon2[1]);
  1534. // top left = max,min
  1535. latmin = latmin < latmax ? latmin : latmax;
  1536. latmax = latmin >= latmax ? latmin : latmax;
  1537. lonmin = lonmin < lonmax ? lonmin : lonmax;
  1538. lonmax = lonmin >= lonmax ? lonmin : lonmax;
  1539. // If we've got this far, we've found all the different locations
  1540. if ( parseFields == GeoParseField.ALL || parseFields == GeoParseField.ENT )
  1541. {
  1542. //use a 2nd variable so we dont have to keep casting termQ to BoolQuery
  1543. BoolQueryBuilder subQ = QueryBuilders.boolQuery().must(QueryBuilders.constantScoreQuery(FilterBuilders.geoBoundingBoxFilter(EntityPojo.docQuery_geotag_).topLeft(latmax,lonmin).bottomRight(latmin, lonmax)).boost(1.0F));
  1544. subQ.must(QueryBuilders.termsQuery(EntityPojo.docQuery_ontology_type_, ont_terms.toArray()));
  1545. boolQ.should(QueryBuilders.nestedQuery(DocumentPojo.entities_, subQ).scoreMode("max").boost((float)1.0));
  1546. }
  1547. //ASSOC AND DOCGEO (only added if ont is point or null)
  1548. if ( parseFields == GeoParseField.ALL || parseFields == GeoParseField.ASSOC )
  1549. boolQ.should(QueryBuilders.nestedQuery(DocumentPojo.associations_, FilterBuilders.geoBoundingBoxFilter(AssociationPojo.docQuery_geotag_).topLeft(latmax,lonmin).bottomRight(latmin, lonmax)).scoreMode("max").boost((float)1.0));
  1550. if ( parseFields == GeoParseField.ALL || parseFields == GeoParseField.DOC )
  1551. boolQ.should(QueryBuilders.constantScoreQuery(FilterBuilders.geoBoundingBoxFilter(DocumentPojo.docGeo_).topLeft(latmax,lonmin).bottomRight(latmin, lonmax)));
  1552. sQueryTerm.append("*.geotag: [(").append(geo.minll).append("), (").append(geo.maxll).append(")]");
  1553. }
  1554. }//TESTED logic13,logic14
  1555. else if ( (null != geo.name))
  1556. {
  1557. //TODO (INF-1239): NOT IMPLEMENTED YET
  1558. }
  1559. else if ( (null != geo.polys) )
  1560. {
  1561. //TODO (INF-1118): NOT IMPLEMENTED YET
  1562. }
  1563. return boolQ;
  1564. }
  1565. ////////////////////////////////////////////////////////////////////////
  1566. // 1.2.2.1] Sub-Sub-Utility function to parse all the different date strings I can think of
  1567. private static String[] _allowedDatesArray = null;
  1568. // (odd, static initialization doesn't work; just initialize first time in utility fn)
  1569. private static long parseDate(String sDate) {
  1570. if (null == _allowedDatesArray) {
  1571. _allowedDatesArray = new String[]
  1572. {
  1573. "yyyy'-'DDD", "yyyy'-'MM'-'dd", "yyyyMMdd", "dd MMM yyyy", "dd MMM yy",
  1574. "MM/dd/yy", "MM/dd/yyyy", "MM.dd.yy", "MM.dd.yyyy",
  1575. "yyyy'-'MM'-'dd hh:mm:ss", "dd MMM yy hh:mm:ss", "dd MMM yyyy hh:mm:ss",
  1576. "MM/dd/yy hh:mm:ss", "MM/dd/yyyy hh:mm:ss", "MM.dd.yy hh:mm:ss", "MM.dd.yyyy hh:mm:ss",
  1577. DateFormatUtils.ISO_DATE_FORMAT.getPattern(),
  1578. DateFormatUtils.ISO_DATETIME_FORMAT.getPattern(),
  1579. DateFormatUtils.ISO_DATE_TIME_ZONE_FORMAT.getPattern(),
  1580. DateFormatUtils.ISO_DATETIME_TIME_ZONE_FORMAT.getPattern(),
  1581. DateFormatUtils.SMTP_DATETIME_FORMAT.getPattern()
  1582. };
  1583. }
  1584. try {
  1585. Date date = DateUtils.parseDate(sDate, _allowedDatesArray);
  1586. return date.getTime();
  1587. }
  1588. catch (Exception e) { // Error all the way out
  1589. throw new RuntimeException(e);
  1590. }
  1591. }//TESTED (logic5)
  1592. ////////////////////////////////////////////////////////////////////////
  1593. // 1.2.3] Event term parsing - this one is pretty complex
  1594. BaseQueryBuilder parseAssociationTerm(AdvancedQueryPojo.QueryTermPojo.AssociationTermPojo assoc, AdvancedQueryPojo.QueryTermPojo.SentimentModifierPojo sentiment, StringBuffer sQueryTerm )
  1595. {
  1596. boolean bFirstTerm = true;
  1597. BoolQueryBuilder query = QueryBuilders.boolQuery();
  1598. sQueryTerm.append("association:(");
  1599. @SuppressWarnings("unused")
  1600. int nTerms = 0; // (might be used later)
  1601. if (null != assoc.entity1) {
  1602. bFirstTerm = false;
  1603. sQueryTerm.append("(");
  1604. this.parseAssociationSubTerm(assoc.entity1, sQueryTerm, query, AssociationPojo.docQuery_entity1_, AssociationPojo.docQuery_entity1_index_);
  1605. sQueryTerm.append(')');
  1606. nTerms++;
  1607. }//TESTED
  1608. if (null != assoc.entity2) {
  1609. if (!bFirstTerm) {
  1610. sQueryTerm.append(" AND ");
  1611. }
  1612. bFirstTerm = false;
  1613. sQueryTerm.append("(");
  1614. this.parseAssociationSubTerm(assoc.entity2, sQueryTerm, query, AssociationPojo.docQuery_entity2_, AssociationPojo.docQuery_entity2_index_);
  1615. sQueryTerm.append(')');
  1616. nTerms++;
  1617. }//TESTED
  1618. if (null != assoc.verb) {
  1619. if (!bFirstTerm) {
  1620. sQueryTerm.append(" AND ");
  1621. }
  1622. bFirstTerm = false;
  1623. sQueryTerm.append("(verb,verb_category:").append(assoc.verb).append(")");
  1624. query.must(QueryBuilders.boolQuery().should(QueryBuilders.queryString(assoc.verb).field(AssociationPojo.docQuery_verb_)).
  1625. should(QueryBuilders.queryString(assoc.verb).field(AssociationPojo.docQuery_verb_category_)));
  1626. sQueryTerm.append(')');
  1627. nTerms++;
  1628. }//TESTED
  1629. if (null != assoc.geo)
  1630. {
  1631. if (!bFirstTerm) {
  1632. sQueryTerm.append(" AND ");
  1633. }
  1634. bFirstTerm = false;
  1635. sQueryTerm.append("(");
  1636. query.must(this.parseGeoTerm(assoc.geo, sQueryTerm, GeoParseField.ASSOC));
  1637. sQueryTerm.append(')');
  1638. nTerms++;
  1639. }//TOTEST
  1640. if (null != assoc.time)
  1641. {
  1642. if (!bFirstTerm) {
  1643. sQueryTerm.append(" AND ");
  1644. }
  1645. bFirstTerm = false;
  1646. sQueryTerm.append("(");
  1647. // Top level: one of start OR end has to start inside the range (this is the first bit)
  1648. // OR it must envelop the query
  1649. // OK this one is a bit tricky because an event has a start+end time ... I think both
  1650. // have to be inside the time range (fortunately because that's the easy case!)
  1651. // (Note time_start and time_end don't exist inside the document object)
  1652. StringBuffer sbDummy = new StringBuffer();
  1653. BoolQueryBuilder combo2 = QueryBuilders.boolQuery();
  1654. combo2.should(this.parseDateTerm(assoc.time, sQueryTerm, AssociationPojo.docQuery_time_start_));
  1655. sQueryTerm.append(") OR/CONTAINS (");
  1656. combo2.should(this.parseDateTerm(assoc.time, sQueryTerm, AssociationPojo.docQuery_time_end_));
  1657. // (complex bit, start must be < and end must be >)
  1658. BoolQueryBuilder combo3 = QueryBuilders.boolQuery();
  1659. AdvancedQueryPojo.QueryTermPojo.TimeTermPojo event1 = new AdvancedQueryPojo.QueryTermPojo.TimeTermPojo();
  1660. AdvancedQueryPojo.QueryTermPojo.TimeTermPojo event2 = new AdvancedQueryPojo.QueryTermPojo.TimeTermPojo();
  1661. sQueryTerm.append("))");
  1662. event1.min = "10010101";
  1663. event1.max = assoc.time.min;
  1664. event2.min = assoc.time.max;
  1665. event2.max = "99990101"; // (ie the end of time, sort of!)
  1666. combo3.must(this.parseDateTerm(event1, sbDummy, AssociationPojo.docQuery_time_start_)); // ie start time earlier than query
  1667. combo3.must(this.parseDateTerm(event2, sbDummy, AssociationPojo.docQuery_time_end_)); // AND end time later than query
  1668. query.must(combo2.should(combo3));
  1669. query.must(combo2); // (this one is very simple - either one of the times must be inside the range)
  1670. nTerms++;
  1671. }//TESTED
  1672. if (null != assoc.type) {
  1673. if (!bFirstTerm) {
  1674. sQueryTerm.append(" AND ");
  1675. }
  1676. bFirstTerm = false;
  1677. sQueryTerm.append("(event_type:").append(assoc.type).append(")");
  1678. query.must(QueryBuilders.termQuery(AssociationPojo.docQuery_assoc_type_, assoc.type));
  1679. sQueryTerm.append(')');
  1680. nTerms++;
  1681. }//TOTEST
  1682. if (null != sentiment) {
  1683. if (!bFirstTerm) {
  1684. sQueryTerm.append(" AND ");
  1685. }
  1686. bFirstTerm = false;
  1687. RangeQueryBuilder sentimentQ = QueryBuilders.rangeQuery(AssociationPojo.docQuery_sentiment_);
  1688. if (null != sentiment.min) {
  1689. sentimentQ.from(sentiment.min);
  1690. }
  1691. if (null != sentiment.max) {
  1692. sentimentQ.to(sentiment.max);
  1693. }
  1694. query.must(sentimentQ);
  1695. sQueryTerm.append("sentiment:[").append(sentiment.min).append(',').append(sentiment.max).append(']');
  1696. }//TOTEST (combined sentiment only)
  1697. sQueryTerm.append(')');
  1698. return query;
  1699. } //TESTED/TOTEST (see above)
  1700. // 1.2.3.2] Event term parsing utility
  1701. void parseAssociationSubTerm(AdvancedQueryPojo.QueryTermPojo entity, StringBuffer sQueryTerm, BoolQueryBuilder combo,
  1702. String sFieldName, String sIndexName)
  1703. {
  1704. boolean bFirstTerm = true;
  1705. // 3 cases: etext, ftext, or entity (in 2 subcases)...
  1706. if ((null != entity.entity) && (!entity.entity.isEmpty())) { //1a
  1707. combo.must(this.parseEntityTerm(entity, sQueryTerm, sIndexName));
  1708. }
  1709. else if ((null != entity.entityValue) && (!entity.entityValue.isEmpty())) { //1b
  1710. combo.must(this.parseEntityTerm(entity, sQueryTerm, sIndexName));
  1711. }
  1712. if ((null != entity.etext) && (!entity.etext.isEmpty())) { //2
  1713. if (!bFirstTerm) {
  1714. sQueryTerm.append(" AND ");
  1715. }
  1716. bFirstTerm = false;
  1717. sQueryTerm.append("(\"");
  1718. sQueryTerm.append(entity.etext);
  1719. sQueryTerm.append("\")");
  1720. combo.must(CrossVersionQueryBuilders.matchPhraseQuery(sFieldName, entity.etext));
  1721. }
  1722. if ((null != entity.ftext) && (!entity.ftext.isEmpty())) { //3
  1723. if (!bFirstTerm) {
  1724. sQueryTerm.append(" AND ");
  1725. }
  1726. bFirstTerm = false;
  1727. sQueryTerm.append('(');
  1728. sQueryTerm.append(entity.ftext);
  1729. sQueryTerm.append(')');
  1730. combo.must(QueryBuilders.queryString(entity.ftext).field(sFieldName));
  1731. }
  1732. } //TESTED
  1733. ////////////////////////////////////////////////////////////////////////
  1734. // 1.3] Utility to parse boolean logic
  1735. private static Pattern _logicTidyUp = Pattern.compile("qt\\[\\d+\\]", Pattern.CASE_INSENSITIVE);
  1736. private BoolQueryBuilder parseLogic(String logic, BaseQueryBuilder qt[], StringBuffer qtRead[], StringBuffer query)
  1737. {
  1738. BoolQueryBuilder bq = QueryBuilders.boolQuery();
  1739. int nQueryElements = qt.length;
  1740. if (null == logic) { // No logic specified, just and them all together
  1741. for (int i = 0; i < nQueryElements; ++i) {
  1742. if (null != qt[i]) {
  1743. bq = bq.must(qt[i]);
  1744. if (0 != i) {
  1745. query.append(" and ");
  1746. }
  1747. query.append(qtRead[i]);
  1748. }
  1749. }
  1750. return bq;
  1751. }//TESTED
  1752. // Non-degenerate case, parse logic string (first convert qt[X] to X):
  1753. SimpleBooleanParser.SimpleBooleanParserMTree tree =
  1754. SimpleBooleanParser.parseExpression(_logicTidyUp.matcher(logic).replaceAll("$1"));
  1755. if (null == tree) {
  1756. return null;
  1757. }
  1758. else {
  1759. parseLogicRecursive(tree, bq, qt, qtRead, query);
  1760. }
  1761. return bq;
  1762. } //TESTED
  1763. /////////////////////////////////////////
  1764. // 1.3.1] Recursive utility for creating a binary object from the tree
  1765. void parseLogicRecursive(SimpleBooleanParser.SimpleBooleanParserMTree node, BoolQueryBuilder levelUp, BaseQueryBuilder qt[],
  1766. StringBuffer qtRead[], StringBuffer query)
  1767. {
  1768. if (null == node.terms) {
  1769. if ((node.bNegated) || (node.nTerm < 0)) {
  1770. int nIndex = Math.abs(node.nTerm) - 1; // (turn into index)
  1771. levelUp.mustNot(qt[nIndex]);
  1772. query.append("not ").append(qtRead[nIndex].toString());
  1773. }
  1774. else {
  1775. int nIndex = node.nTerm - 1;
  1776. levelUp.must(qt[nIndex]);
  1777. query.append(qtRead[nIndex].toString());
  1778. }
  1779. return;
  1780. }
  1781. boolean bFirstPass = true;
  1782. for (SimpleBooleanParser.SimpleBooleanParserMTree child: node.terms) {
  1783. if (null == child.terms) {
  1784. if (child.nTerm < 0) { // Term negated
  1785. int nIndex = (-child.nTerm) - 1;
  1786. if ('&' == node.op) {
  1787. levelUp.mustNot(qt[nIndex]); // (turn into index)
  1788. if (bFirstPass) {
  1789. query.append("not ");
  1790. }
  1791. else {
  1792. query.append(" and not ");
  1793. }
  1794. query.append(qtRead[nIndex].toString());
  1795. }
  1796. else {
  1797. levelUp.should(QueryBuilders.boolQuery().mustNot(qt[nIndex]));
  1798. if (bFirstPass) {
  1799. query.append("not");
  1800. }
  1801. else {
  1802. query.append(" or not ");
  1803. }
  1804. query.append(qtRead[nIndex].toString());
  1805. }
  1806. }
  1807. else { // Term not negated
  1808. int nIndex = child.nTerm - 1;
  1809. if ('&' == node.op) {
  1810. levelUp.must(qt[nIndex]);
  1811. if (!bFirstPass) {
  1812. query.append(" and ");
  1813. }
  1814. query.append(qtRead[nIndex].toString());
  1815. }
  1816. else {
  1817. levelUp.should(qt[nIndex]);
  1818. if (!bFirstPass) {
  1819. query.append(" or ");
  1820. }
  1821. query.append(qtRead[nIndex].toString());
  1822. }
  1823. }
  1824. } // actual term, not a node
  1825. else { // (null != child.terms)
  1826. // The term is a node, recurse!
  1827. BoolQueryBuilder newLevel = QueryBuilders.boolQuery();
  1828. if ('&' == node.op) {
  1829. if (child.bNegated) {
  1830. levelUp.mustNot(newLevel);
  1831. if (!bFirstPass) {
  1832. query.append(" and ");
  1833. }
  1834. query.append("not (");
  1835. }
  1836. else {
  1837. levelUp.must(newLevel);
  1838. if (!bFirstPass) {
  1839. query.append(" and ");
  1840. }
  1841. query.append("(");
  1842. }
  1843. }
  1844. else {
  1845. if (child.bNegated) {
  1846. levelUp.should(QueryBuilders.boolQuery().mustNot(newLevel));
  1847. if (!bFirstPass) {
  1848. query.append(" or ");
  1849. }
  1850. query.append("not (");
  1851. }
  1852. else {
  1853. levelUp.should(newLevel);
  1854. if (!bFirstPass) {
  1855. query.append(" or ");
  1856. }
  1857. query.append("(");
  1858. }
  1859. }
  1860. parseLogicRecursive(child, newLevel, qt, qtRead, query);
  1861. query.append(")");
  1862. } // (end node is a term need to recurse)
  1863. bFirstPass = false;
  1864. } // end loop over child nodes
  1865. }//TESTED
  1866. ////////////////////////////////////////////////////////////////////////
  1867. // 2] Complex scoring
  1868. // 2.1] Proximity adjustments
  1869. private static BaseQueryBuilder addProximityBasedScoring(BaseQueryBuilder currQuery, SearchRequestBuilder searchSettings,
  1870. AdvancedQueryPojo.QueryScorePojo scoreParams,
  1871. BoolFilterBuilder parentFilterObj, boolean bLowAccuracyGeo)
  1872. {
  1873. Map<String, Object> params = new HashMap<String, Object>();
  1874. Object[] paramDoublesScript = new Object[6];
  1875. Object[] paramDoublesDecay = new Object[6];
  1876. double[] geoDecay = parseGeoDecay(scoreParams); // (encapsulate this away since it can also be called by ScoringUtils)
  1877. if ((null != geoDecay) && !bLowAccuracyGeo) {
  1878. double dlat = geoDecay[0];
  1879. double dlon = geoDecay[1];
  1880. double dInvDist = geoDecay[2];
  1881. paramDoublesScript[0] = dInvDist;
  1882. paramDoublesScript[1] = dlat;
  1883. paramDoublesScript[2] = dlon;
  1884. paramDoublesDecay[0] = dInvDist;
  1885. paramDoublesDecay[1] = dlat;
  1886. paramDoublesDecay[2] = dlon;
  1887. }
  1888. else // geo prox not specified/malformed, or low accuracy
  1889. {
  1890. if (!bLowAccuracyGeo) {
  1891. scoreParams.geoProx = null;
  1892. }
  1893. paramDoublesScript[0] = -1.0;
  1894. paramDoublesScript[1] = -1.0;
  1895. paramDoublesScript[2] = -1.0;
  1896. paramDoublesDecay[0] = -1.0;
  1897. paramDoublesDecay[1] = -1.0;
  1898. paramDoublesDecay[2] = -1.0;
  1899. }
  1900. if ((null != geoDecay) && (null != parentFilterObj)) { // Regardless of high/low accuracy, add 0.5% filter
  1901. GeoDistanceFilterBuilder geoFilter = FilterBuilders.geoDistanceFilter(EntityPojo.docQuery_geotag_).
  1902. point(geoDecay[0], geoDecay[1]).distance(200.0/geoDecay[2], DistanceUnit.KILOMETERS);
  1903. parentFilterObj.must(FilterBuilders.nestedFilter(DocumentPojo.entities_, geoFilter));
  1904. }//TESTED
  1905. //Time decay portion
  1906. if ((null != scoreParams.timeProx) && (null != scoreParams.timeProx.time) && (null != scoreParams.timeProx.decay) &&
  1907. !scoreParams.timeProx.time.isEmpty() && !scoreParams.timeProx.decay.isEmpty())
  1908. {
  1909. long nDecayCenter = System.currentTimeMillis();
  1910. if (!scoreParams.timeProx.time.equals("now"))
  1911. {
  1912. nDecayCenter = parseDate(scoreParams.timeProx.time);
  1913. }
  1914. //Parse decay time:
  1915. long nDecayTime = getInterval(scoreParams.timeProx.decay, 'w');
  1916. double dInvDecay = 1.0/(double)nDecayTime;
  1917. scoreParams.timeProx.nTime = nDecayCenter;
  1918. scoreParams.timeProx.dInvDecay = dInvDecay;
  1919. // Add 0.5% filter
  1920. if (null != parentFilterObj) {
  1921. long nMinTime = nDecayCenter - 200*nDecayTime;
  1922. long nMaxTime = nDecayCenter + 200*nDecayTime;
  1923. parentFilterObj.must(FilterBuilders.numericRangeFilter(DocumentPojo.publishedDate_).from(nMinTime).to(nMaxTime));
  1924. }//TESTED
  1925. paramDoublesScript[3] = dInvDecay;
  1926. paramDoublesScript[4] = nDecayCenter;
  1927. paramDoublesDecay[3] = dInvDecay;
  1928. paramDoublesDecay[4] = nDecayCenter;
  1929. }
  1930. else
  1931. {
  1932. scoreParams.timeProx = null;
  1933. paramDoublesScript[3] = -1.0;
  1934. paramDoublesScript[4] = -1.0;
  1935. paramDoublesDecay[3] = -1.0;
  1936. paramDoublesDecay[4] = -1.0;
  1937. }
  1938. if ( scoreParams.timeProx == null && scoreParams.geoProx == null )
  1939. {
  1940. //if there is no timeprox or geoprox, just run the query w/o script
  1941. return currQuery;
  1942. }
  1943. else
  1944. {
  1945. if (null != searchSettings) // just handles test cases where searchSettings==null
  1946. {
  1947. paramDoublesDecay[5] = true;
  1948. Map<String,Object> scriptParams = new HashMap<String, Object>();
  1949. scriptParams.put("param", paramDoublesDecay);
  1950. searchSettings.addScriptField("decay", QueryDecayFactory.getLanguage(), QueryDecayFactory.getScriptName(), scriptParams);
  1951. }
  1952. //if there is a decay, add the script to the query
  1953. paramDoublesScript[5] = false;
  1954. params.put("param", paramDoublesScript);
  1955. return QueryBuilders.customScoreQuery(currQuery).script(QueryDecayFactory.getScriptName()).params(params).lang(QueryDecayFactory.getLanguage());
  1956. }
  1957. }//TESTED
  1958. // Utility to parse out geo for "software emulated case"
  1959. // returns lat/lon/distance (or null if anything goes wrong)
  1960. // Also called from ScoringUtils
  1961. public static double[] parseGeoDecay(AdvancedQueryPojo.QueryScorePojo scoreParams) {
  1962. //Geo decay portion
  1963. if ((null != scoreParams.geoProx) && (null != scoreParams.geoProx.ll) && (null != scoreParams.geoProx.decay) &&
  1964. !scoreParams.geoProx.ll.equals(",") && !scoreParams.geoProx.ll.isEmpty() && !scoreParams.geoProx.decay.isEmpty())
  1965. {
  1966. if ('(' == scoreParams.geoProx.ll.charAt(0))
  1967. {
  1968. scoreParams.geoProx.ll = scoreParams.geoProx.ll.substring(1, scoreParams.geoProx.ll.length() - 1);
  1969. }
  1970. String[] latlon = scoreParams.geoProx.ll.split("\\s*,\\s*");
  1971. if (2 == latlon.length)
  1972. {
  1973. double[] lat_lon_invdist = new double[3];
  1974. lat_lon_invdist[0] = Double.parseDouble(latlon[0]);
  1975. lat_lon_invdist[1] = Double.parseDouble(latlon[1]);
  1976. double dDist = getDistance(scoreParams.geoProx.decay); // (Returns it in km)
  1977. if (0.0 == dDist) dDist = 0.00001; // (robustness, whatever)
  1978. lat_lon_invdist[2] = (1.0/dDist);
  1979. return lat_lon_invdist;
  1980. }
  1981. }
  1982. return null;
  1983. }
  1984. // Utility to get the ms count of an interval
  1985. public static long getInterval(String interval, char defaultInterval) {
  1986. if (interval.equals("month")) { // Special case
  1987. return 30L*24L*3600L*1000L;
  1988. }
  1989. int nLastIndex = interval.length() - 1;
  1990. long nDecayTime;
  1991. char c = interval.charAt(nLastIndex);
  1992. if (c >= 0x40) { // it's a digit, interpret:
  1993. nDecayTime = Long.parseLong(interval.substring(0, nLastIndex));
  1994. }
  1995. else { // No digit use default
  1996. c = defaultInterval;
  1997. nDecayTime = Long.parseLong(interval);
  1998. }
  1999. if ('h' == c) {
  2000. nDecayTime *= 3600L*1000L;
  2001. }
  2002. else if ('d' == c) {
  2003. nDecayTime *= 24L*3600L*1000L;
  2004. }
  2005. else if ('w' == c) {
  2006. nDecayTime *= 7L*24L*3600L*1000L;
  2007. }
  2008. else if ('m' == c) {
  2009. nDecayTime *= 30L*24L*3600L*1000L;
  2010. }
  2011. else if ('y' == c) {
  2012. nDecayTime *= 365L*24L*3600L*1000L;
  2013. }
  2014. return nDecayTime;
  2015. }//TESTED
  2016. private static double getDistance(String distance) { // [0-9]+{m,km,nm)
  2017. double dDist = 0.0;
  2018. int nCharIndex1 = distance.length() - 1;
  2019. char c = distance.charAt(nCharIndex1);
  2020. if (c == 'm') {
  2021. c = distance.charAt(nCharIndex1 - 1);
  2022. if (c == 'k') { // km
  2023. dDist = Double.parseDouble(distance.substring(0, nCharIndex1 - 1));
  2024. }
  2025. else if (c == 'n') { // nm
  2026. dDist = Double.parseDouble(distance.substring(0, nCharIndex1 - 1))*1.852;
  2027. }
  2028. else { // m==mi
  2029. dDist = Double.parseDouble(distance.substring(0, nCharIndex1))*1.150779;
  2030. }
  2031. }
  2032. else if (c == 'i') { // mi
  2033. dDist = Double.parseDouble(distance.substring(0, nCharIndex1 - 1))*1.150779;
  2034. }
  2035. else { // Default to km
  2036. dDist = Double.parseDouble(distance.substring(0, nCharIndex1 + 1));
  2037. }
  2038. return dDist;
  2039. }//TESTED
  2040. ////////////////////////////////////////////////////////////////////////
  2041. // 2.2] Manual weighting
  2042. private BaseQueryBuilder applyManualWeights(BaseQueryBuilder queryObj, AdvancedQueryPojo.QueryScorePojo score)
  2043. {
  2044. if ((null != score.tagWeights) || (null != score.typeWeights) || (null != score.sourceWeights)) {
  2045. CustomFiltersScoreQueryBuilder manualWeights = QueryBuilders.customFiltersScoreQuery(queryObj);
  2046. manualWeights.scoreMode("avg"); // Only tags can match multiple filters, in which case we average them
  2047. if (null != score.sourceWeights) {
  2048. // Find all weightings with the same score:
  2049. ArrayListMultimap<Float, String> invSourceWeights = ArrayListMultimap.create();
  2050. for (Map.Entry<String, Double> sourceKeyEl: score.sourceWeights.entrySet()) {
  2051. invSourceWeights.put((float)(double)sourceKeyEl.getValue(), sourceKeyEl.getKey());
  2052. }
  2053. for (Map.Entry<Float, Collection<String>> invSourceKeyEl: invSourceWeights.asMap().entrySet()) {
  2054. manualWeights.add(FilterBuilders.termsFilter(DocumentPojo.sourceKey_, invSourceKeyEl.getValue().toArray()), invSourceKeyEl.getKey());
  2055. }
  2056. }//TESTED
  2057. if (null != score.typeWeights) {
  2058. // Find all weightings with the same score:
  2059. ArrayListMultimap<Float, String> invTypeWeights = ArrayListMultimap.create();
  2060. for (Map.Entry<String, Double> typeEl: score.typeWeights.entrySet()) {
  2061. invTypeWeights.put((float)(double)typeEl.getValue(), typeEl.getKey());
  2062. }
  2063. for (Map.Entry<Float, Collection<String>> invTypeEl: invTypeWeights.asMap().entrySet()) {
  2064. if (null == score.sourceWeights) { // Easy case
  2065. manualWeights.add(FilterBuilders.termsFilter(DocumentPojo.mediaType_, invTypeEl.getValue().toArray()), invTypeEl.getKey());
  2066. }
  2067. else { // Need to filter out sources they are matched with higher prio
  2068. BoolFilterBuilder typesNotSources = FilterBuilders.boolFilter();
  2069. typesNotSources = typesNotSources.must(FilterBuilders.termsFilter(DocumentPojo.mediaType_, invTypeEl.getValue().toArray())).
  2070. mustNot(FilterBuilders.termsFilter(DocumentPojo.sourceKey_, score.sourceWeights.keySet().toArray()));
  2071. manualWeights.add(typesNotSources, invTypeEl.getKey());
  2072. }
  2073. }
  2074. }//TESTED
  2075. if (null != score.tagWeights) {
  2076. // Find all weightings with the same score:
  2077. ArrayListMultimap<Float, String> invTagWeights = ArrayListMultimap.create();
  2078. for (Map.Entry<String, Double> tagEl: score.tagWeights.entrySet()) {
  2079. invTagWeights.put((float)(double)tagEl.getValue(), tagEl.getKey());
  2080. }
  2081. for (Map.Entry<Float, Collection<String>> invTagEl: invTagWeights.asMap().entrySet()) {
  2082. if ((null == score.sourceWeights) && (null == score.typeWeights)) { // Easy case
  2083. manualWeights.add(FilterBuilders.termsFilter(DocumentPojo.tags_, invTagEl.getValue().toArray()), invTagEl.getKey());
  2084. }
  2085. else { // need to exclude types or sources
  2086. BoolFilterBuilder typesNotSources = FilterBuilders.boolFilter();
  2087. BoolFilterBuilder tagsAndNothingElse = typesNotSources.must(FilterBuilders.termsFilter(DocumentPojo.tags_, invTagEl.getValue().toArray()));
  2088. if (null != score.sourceWeights) {
  2089. tagsAndNothingElse = tagsAndNothingElse.mustNot(FilterBuilders.termsFilter(DocumentPojo.sourceKey_, score.sourceWeights.keySet().toArray()));
  2090. }
  2091. if (null != score.typeWeights) {
  2092. tagsAndNothingElse = tagsAndNothingElse.mustNot(FilterBuilders.termsFilter(DocumentPojo.mediaType_, score.typeWeights.keySet().toArray()));
  2093. }
  2094. manualWeights.add(tagsAndNothingElse, invTagEl.getKey());
  2095. }
  2096. }
  2097. }//TESTED
  2098. queryObj = manualWeights;
  2099. }
  2100. return queryObj;
  2101. }//TESTED
  2102. ////////////////////////////////////////////////////////////////////////
  2103. // 3] Output parsing
  2104. // (Aggregation output parsing delegated to processing.AggregationUtils)
  2105. ////////////////////////////////////////////////////////////////////////
  2106. // 4] Query management
  2107. private DBCursor getDocIds(DBCollection docDb, ObjectId[] ids, int nFromServerLimit, AdvancedQueryPojo.QueryOutputPojo output, AdvancedQueryPojo.QueryScorePojo score)
  2108. {
  2109. DBCursor docdCursor = null;
  2110. try {
  2111. BasicDBObject query = new BasicDBObject();
  2112. query.put("_id", new BasicDBObject("$in", ids));
  2113. BasicDBObject fields = new BasicDBObject(DocumentPojo.fullText_, 0); // (used to discard community ids -plus legacy versions-, now need it)
  2114. if (!output.docs.metadata) {
  2115. fields.put(DocumentPojo.metadata_, 0);
  2116. }
  2117. boolean bNotAggEnts = ((output.aggregation == null) || (output.aggregation.entsNumReturn == null) || (output.aggregation.entsNumReturn == 0));
  2118. if (bNotAggEnts && (null != score) && (null != score.sigWeight) && (score.sigWeight > 0.0)) {
  2119. bNotAggEnts = false; // (special case, use agg entities to score docs)
  2120. }
  2121. if (!output.docs.ents && bNotAggEnts) {
  2122. fields.put(DocumentPojo.entities_, 0);
  2123. }
  2124. boolean bNotAggEvents = ((output.aggregation == null) || (output.aggregation.eventsNumReturn == null) || (output.aggregation.eventsNumReturn == 0));
  2125. boolean bNotAggFacts = ((output.aggregation == null) || (output.aggregation.factsNumReturn == null) || (output.aggregation.factsNumReturn == 0));
  2126. boolean bNoStandaloneEvents = (null == output.docs.eventsTimeline) || (null == output.docs.numEventsTimelineReturn) || (output.docs.numEventsTimelineReturn == 0);
  2127. if (!output.docs.events && !output.docs.facts && !output.docs.summaries && bNoStandaloneEvents && bNotAggEvents && bNotAggFacts) {
  2128. fields.put(DocumentPojo.associations_, 0);
  2129. }
  2130. //TESTED
  2131. //cm = new CollectionManager();
  2132. boolean bPrimary = true;
  2133. if (_replicaSetDistributionRatio > 0) {
  2134. if (0 != (new Date().getTime() % _replicaSetDistributionRatio)) {
  2135. bPrimary = false;
  2136. }
  2137. }
  2138. if (bPrimary) { // Get from the primary
  2139. docdCursor = docDb.find(query, fields).batchSize(nFromServerLimit);
  2140. }
  2141. else { // Try and get from the secondary if possible
  2142. docdCursor = docDb.find(query, fields).batchSize(nFromServerLimit).setReadPreference(ReadPreference.secondaryPreferred());
  2143. }
  2144. } catch (Exception e) {
  2145. // If an exception occurs log the error
  2146. _logger.error("Address Exception Message: " + e.getMessage(), e);
  2147. }
  2148. return docdCursor;
  2149. }
  2150. //___________________________________________________________________________________
  2151. // Utility function: create a populated query object (by defaults if necessary)
  2152. public static AdvancedQueryPojo createQueryPojo(String queryJson) {
  2153. GsonBuilder gb = AdvancedQueryPojo.getDefaultBuilder();
  2154. gb.registerTypeAdapter(AdvancedQueryPojo.QueryRawPojo.class, new AdvancedQueryPojo.QueryRawPojo.Deserializer());
  2155. AdvancedQueryPojo query = gb.create().fromJson(queryJson, AdvancedQueryPojo.class);
  2156. // Fill in the blanks (a decent attempt has been made to fill out the blanks inside these options)
  2157. if (null == query.input) {
  2158. query.input = new AdvancedQueryPojo.QueryInputPojo();
  2159. }
  2160. if (null == query.score) {
  2161. query.score = new AdvancedQueryPojo.QueryScorePojo();
  2162. }
  2163. if (null == query.output) {
  2164. query.output = new AdvancedQueryPojo.QueryOutputPojo();
  2165. }
  2166. if (null == query.output.docs) { // (Docs are sufficiently important we'll make sure they're always present)
  2167. query.output.docs = new AdvancedQueryPojo.QueryOutputPojo.DocumentOutputPojo();
  2168. }
  2169. return query;
  2170. }//TESTED
  2171. ////////////////////////////////////////////////////////////////////////
  2172. // 5] Unit testing code
  2173. //static private final QueryHandler _test = new QueryHandler(true);
  2174. @SuppressWarnings("unused")
  2175. private QueryHandler(boolean bTest) {
  2176. _scoringParams = new AdvancedQueryPojo.QueryScorePojo();
  2177. this.testParsingCode();
  2178. }
  2179. private void testParsingCode() {
  2180. // (these are used for logic testing below)
  2181. List<BaseQueryBuilder> qtTerms = new LinkedList<BaseQueryBuilder>();
  2182. List<StringBuffer> qtReadTerms = new LinkedList<StringBuffer>();
  2183. // Various query terms
  2184. AdvancedQueryPojo.QueryTermPojo qt0 = new AdvancedQueryPojo.QueryTermPojo();
  2185. qt0.ftext = "ftext +test";
  2186. AdvancedQueryPojo.QueryTermPojo qt1 = new AdvancedQueryPojo.QueryTermPojo();
  2187. qt1.ftext = "ftext"; qt1.etext = "etext +test";
  2188. AdvancedQueryPojo.QueryTermPojo qt2 = new AdvancedQueryPojo.QueryTermPojo();
  2189. qt2.etext = "etext test"; qt2.entity = "entity:type";
  2190. StringBuffer result = new StringBuffer();
  2191. BaseQueryBuilder resJson = null;
  2192. // "logic0":
  2193. resJson = this.parseQueryTerm(qt0, result);
  2194. qtTerms.add(resJson); qtReadTerms.add(new StringBuffer(result.toString()));
  2195. String logic0a = new Gson().toJson(resJson);
  2196. String logic0b = result.toString();
  2197. String answer0a = "{\"queryString\":\"ftext +test\",\"fuzzyMinSim\":-1.0,\"boost\":-1.0,\"fuzzyPrefixLength\":-1,\"phraseSlop\":-1,\"tieBreaker\":-1.0}";
  2198. String answer0b = "((ftext +test))";
  2199. if (!logic0a.equals(answer0a) || !logic0b.equals(answer0b))
  2200. {
  2201. System.out.println("Fail 0"); System.out.println(logic0a); System.out.println(answer0a); System.out.println(logic0b); System.out.println(answer0b);
  2202. }
  2203. // "logic1":
  2204. resJson = this.parseQueryTerm(qt1, result);
  2205. qtTerms.add(resJson); qtReadTerms.add(new StringBuffer(result.toString()));
  2206. String logic1a = new Gson().toJson(resJson);
  2207. String logic1b = result.toString();
  2208. String answer1a = "{\"clauses\":[{\"queryBuilder\":{\"queryString\":\"ftext\",\"fuzzyMinSim\":-1.0,\"boost\":-1.0,\"fuzzyPrefixLength\":-1,\"phraseSlop\":-1,\"tieBreaker\":-1.0},\"occur\":\"MUST\"},{\"queryBuilder\":{\"queryString\":\"etext\\\\ \\\\+test\",\"fuzzyMinSim\":-1.0,\"boost\":-1.0,\"fuzzyPrefixLength\":-1,\"phraseSlop\":-1,\"tieBreaker\":-1.0},\"occur\":\"MUST\"}],\"boost\":-1.0,\"minimumNumberShouldMatch\":-1}";
  2209. String answer1b = "((ftext) AND (etext\\ \\+test))";
  2210. if (!logic1a.equals(answer1a) || !logic1b.equals(answer1b))
  2211. {
  2212. System.out.println("Fail 1"); System.out.println(logic1a); System.out.println(answer1a); System.out.println(logic1b); System.out.println(answer1b);
  2213. }
  2214. // "logic2":
  2215. resJson = this.parseQueryTerm(qt2, result);
  2216. qtTerms.add(resJson); qtReadTerms.add(new StringBuffer(result.toString()));
  2217. String logic2a = new Gson().toJson(resJson);
  2218. String logic2b = result.toString();
  2219. String answer2a = "{\"clauses\":[{\"queryBuilder\":{\"queryString\":\"etext\\\\ test\",\"fuzzyMinSim\":-1.0,\"boost\":-1.0,\"fuzzyPrefixLength\":-1,\"phraseSlop\":-1,\"tieBreaker\":-1.0},\"occur\":\"MUST\"},{\"queryBuilder\":{\"name\":\"entities.index\",\"value\":\"entity/type\",\"boost\":-1.0},\"occur\":\"MUST\"}],\"boost\":-1.0,\"minimumNumberShouldMatch\":-1}";
  2220. String answer2b = "((etext\\ test) AND (entities.index:\"entity/type\"))";
  2221. if (!logic2a.equals(answer2a) || !logic2b.equals(answer2b))
  2222. {
  2223. System.out.println("Fail 2"); System.out.println(logic2a); System.out.println(answer2a); System.out.println(logic2b); System.out.println(answer2b);
  2224. }
  2225. // (entityValue/entityType tested by logic3 below)
  2226. // Alias expansion (leave this commented out since results depend on current DB - ie check by eye)
  2227. AdvancedQueryPojo.QueryTermPojo qt3a = new AdvancedQueryPojo.QueryTermPojo();
  2228. qt3a.ftext = "ftext"; qt3a.etext = "etext"; qt3a.entity = "barack obama/person";
  2229. qt3a.entityOpt = new AdvancedQueryPojo.QueryTermPojo.EntityOptionPojo();
  2230. qt3a.entityOpt.expandAlias = true;
  2231. AdvancedQueryPojo.QueryTermPojo qt3b = new AdvancedQueryPojo.QueryTermPojo();
  2232. qt3b.entity = "new york city,new york,united states:city";
  2233. qt3b.entityOpt = new AdvancedQueryPojo.QueryTermPojo.EntityOptionPojo();
  2234. qt3b.entityOpt.expandAlias = true;
  2235. AdvancedQueryPojo.QueryTermPojo qt3c = new AdvancedQueryPojo.QueryTermPojo();
  2236. qt3c.entity = "entity3/type3";
  2237. qt3c.entityOpt = new AdvancedQueryPojo.QueryTermPojo.EntityOptionPojo();
  2238. qt3c.entityOpt.expandAlias = false;
  2239. AdvancedQueryPojo.QueryTermPojo qt3d = new AdvancedQueryPojo.QueryTermPojo();
  2240. qt3d.entity = "entity4/type4";
  2241. qt3d.entityOpt = null;
  2242. AdvancedQueryPojo.QueryTermPojo qt3e = new AdvancedQueryPojo.QueryTermPojo();
  2243. qt3e.etext = "etext"; qt3e.entityValue = "facebook inc"; // no entity type, ie shouldn't request anything
  2244. qt3e.entityOpt = new AdvancedQueryPojo.QueryTermPojo.EntityOptionPojo();
  2245. qt3e.entityOpt.expandAlias = true;
  2246. AdvancedQueryPojo.QueryTermPojo qt3f = new AdvancedQueryPojo.QueryTermPojo();
  2247. qt3f.entityValue = "facebook inc"; qt3f.entityType = "company";
  2248. qt3f.entityOpt = new AdvancedQueryPojo.QueryTermPojo.EntityOptionPojo();
  2249. qt3f.entityOpt.expandAlias = true;
  2250. AdvancedQueryPojo.QueryTermPojo qt3g = new AdvancedQueryPojo.QueryTermPojo();
  2251. qt3g.ftext = "No entity so should ignore the entityOpt parameters";
  2252. qt3g.entityOpt = new AdvancedQueryPojo.QueryTermPojo.EntityOptionPojo();
  2253. qt3g.entityOpt.expandAlias = true;
  2254. List<AdvancedQueryPojo.QueryTermPojo> qtList = Arrays.asList(qt3a, qt3b, qt3c, qt3d, qt3e, qt3f, qt3g);
  2255. this.handleEntityExpansion(null, qtList, null, "4c927585d591d31d7b37097a");
  2256. String sAnswer_3_1 = "[barack obama/person, facebook inc/company, new york city,new york,united states/city]";
  2257. String sResults_3_1 = Arrays.toString(_tmpEntityExpansionList.toArray());
  2258. if (!sAnswer_3_1.equals(sResults_3_1)) {
  2259. System.out.println("Fail 3.1"); System.out.println(sAnswer_3_1); System.out.println(sResults_3_1);
  2260. }
  2261. String [] sResults_3_2 = _tmpAliasMap.get("barack obama/person").toArray(new String[0]);
  2262. if (null != sResults_3_2) {
  2263. //DEBUG
  2264. //System.out.println(Arrays.toString(sResults_3_2));
  2265. resJson = this.parseQueryTerm(qt3a, result);
  2266. String logic3a_1 = new Gson().toJson(resJson);
  2267. String logic3a_2 = result.toString();
  2268. //DEBUG
  2269. //System.out.println(logic3a_1); System.out.println(logic3a_2);
  2270. if (!logic3a_2.contains("$aliases")) {
  2271. System.out.println("Fail 3.2a"); System.out.println(logic3a_1); System.out.println(logic3a_2);
  2272. }
  2273. }
  2274. else {
  2275. System.out.println("Fail 3.2a");
  2276. }
  2277. sResults_3_2 = _tmpAliasMap.get("facebook inc/company").toArray(new String[0]);
  2278. if (null != sResults_3_2) {
  2279. //DEBUG
  2280. //System.out.println(Arrays.toString(sResults_3_2));
  2281. resJson = this.parseQueryTerm(qt3b, result);
  2282. String logic3b_1 = new Gson().toJson(resJson);
  2283. String logic3b_2 = result.toString();
  2284. //DEBUG
  2285. //System.out.println(logic3b_1); System.out.println(logic3b_2);
  2286. if (!logic3b_2.contains("$aliases")) {
  2287. System.out.println("Fail 3.2b"); System.out.println(logic3b_1); System.out.println(logic3b_2);
  2288. }
  2289. }
  2290. else {
  2291. System.out.println("Fail 3.2b");
  2292. }
  2293. sResults_3_2 = _tmpAliasMap.get("new york city,new york,united states/city").toArray(new String[0]);
  2294. if (null != sResults_3_2) {
  2295. //DEBUG
  2296. //System.out.println(Arrays.toString(sResults_3_2));
  2297. resJson = this.parseQueryTerm(qt3f, result);
  2298. String logic3f_1 = new Gson().toJson(resJson);
  2299. String logic3f_2 = result.toString();
  2300. //DEBUG
  2301. //System.out.println(logic3f_1); System.out.println(logic3f_2);
  2302. if (!logic3f_2.contains("$aliases")) {
  2303. System.out.println("Fail 3.2f"); System.out.println(logic3f_1); System.out.println(logic3f_2);
  2304. }
  2305. }
  2306. else {
  2307. System.out.println("Fail 3.2f");
  2308. }
  2309. // Just check we don't normally get aliases:
  2310. resJson = this.parseQueryTerm(qt3e, result);
  2311. String logic3e_1 = new Gson().toJson(resJson);
  2312. String logic3e_2 = result.toString();
  2313. //DEBUG
  2314. //System.out.println(logic3e_1); System.out.println(logic3e_2);
  2315. if (logic3e_2.contains("$aliases")) {
  2316. System.out.println("Fail 3.ef"); System.out.println(logic3e_1); System.out.println(logic3e_2);
  2317. }
  2318. //Date debugging:
  2319. _nNow = 1284666757165L; //Thu, 16 Sep 2010 19:52:37 GMT
  2320. // Lots of nasty time cases, sigh
  2321. AdvancedQueryPojo.QueryTermPojo qt5 = new AdvancedQueryPojo.QueryTermPojo();
  2322. qt5.entity = "entity/type"; qt5.time = new AdvancedQueryPojo.QueryTermPojo.TimeTermPojo();
  2323. AdvancedQueryPojo.QueryTermPojo qt6 = new AdvancedQueryPojo.QueryTermPojo();
  2324. qt6.time = new AdvancedQueryPojo.QueryTermPojo.TimeTermPojo();
  2325. qt6.time.min = "1284666757164"; qt6.time.max = "now";
  2326. AdvancedQueryPojo.QueryTermPojo qt7 = new AdvancedQueryPojo.QueryTermPojo();
  2327. qt7.time = new AdvancedQueryPojo.QueryTermPojo.TimeTermPojo();
  2328. qt7.time.max = "1284666757164";
  2329. AdvancedQueryPojo.QueryTermPojo qt8 = new AdvancedQueryPojo.QueryTermPojo();
  2330. qt8.time = new AdvancedQueryPojo.QueryTermPojo.TimeTermPojo();
  2331. qt8.time.min = "02/10/2000"; qt8.time.max = "02.10.2000";
  2332. AdvancedQueryPojo.QueryTermPojo qt9 = new AdvancedQueryPojo.QueryTermPojo();
  2333. qt9.time = new AdvancedQueryPojo.QueryTermPojo.TimeTermPojo();
  2334. qt9.time.min = "10 Feb 2000"; qt9.time.max = "10 Feb 2000 00:00:00";
  2335. AdvancedQueryPojo.QueryTermPojo qt9b = new AdvancedQueryPojo.QueryTermPojo();
  2336. qt9b.time = new AdvancedQueryPojo.QueryTermPojo.TimeTermPojo();
  2337. qt9b.time.min = "10 Feb 2000"; qt9b.time.max = "10 Feb 2000";
  2338. AdvancedQueryPojo.QueryTermPojo qt10 = new AdvancedQueryPojo.QueryTermPojo();
  2339. qt10.time = new AdvancedQueryPojo.QueryTermPojo.TimeTermPojo();
  2340. qt10.time.max = "20000210";
  2341. // "logic5":
  2342. resJson = this.parseQueryTerm(qt5, result);
  2343. String logic5a = new Gson().toJson(resJson);
  2344. String logic5b = result.toString();
  2345. String answer5a = "{\"clauses\":[{\"queryBuilder\":{\"name\":\"entities.index\",\"value\":\"entity/type\",\"boost\":-1.0},\"occur\":\"MUST\"},{\"queryBuilder\":{\"filterBuilder\":{\"name\":\"publishedDate\",\"from\":0,\"to\":1284666757165,\"includeLower\":true,\"includeUpper\":true},\"boost\":1.0},\"occur\":\"MUST\"}],\"boost\":-1.0,\"minimumNumberShouldMatch\":-1}";
  2346. String answer5b = "((entities.index:\"entity/type\") AND (publishedDate:[0 TO Thu Sep 16 15:52:37 EDT 2010]))";
  2347. if (!logic5a.equals(answer5a) || !logic5b.equals(answer5b))
  2348. {
  2349. System.out.println("Fail 5"); System.out.println(logic5a); System.out.println(answer5a); System.out.println(logic5b); System.out.println(answer5b);
  2350. }
  2351. // "logic6":
  2352. resJson = this.parseQueryTerm(qt6, result);
  2353. String logic6a = new Gson().toJson(resJson);
  2354. String logic6b = result.toString();
  2355. String answer6a = "{\"filterBuilder\":{\"name\":\"publishedDate\",\"from\":1284666757164,\"to\":1284666757165,\"includeLower\":true,\"includeUpper\":true},\"boost\":1.0}";
  2356. String answer6b = "((publishedDate:[Thu Sep 16 15:52:37 EDT 2010 TO Thu Sep 16 15:52:37 EDT 2010]))";
  2357. if (!logic6a.equals(answer6a) || !logic6b.equals(answer6b))
  2358. {
  2359. System.out.println("Fail 6"); System.out.println(logic6a); System.out.println(answer6a); System.out.println(logic6b); System.out.println(answer6b);
  2360. }
  2361. // "logic7"
  2362. resJson = this.parseQueryTerm(qt7, result);
  2363. qtTerms.add(resJson); qtReadTerms.add(new StringBuffer(result.toString()));
  2364. String logic7a = new Gson().toJson(resJson);
  2365. String logic7b = result.toString();
  2366. String answer7a = "{\"filterBuilder\":{\"name\":\"publishedDate\",\"from\":0,\"to\":1284666757164,\"includeLower\":true,\"includeUpper\":true},\"boost\":1.0}";
  2367. String answer7b = "((publishedDate:[0 TO Thu Sep 16 15:52:37 EDT 2010]))";
  2368. if (!logic7a.equals(answer7a) || !logic7b.equals(answer7b))
  2369. {
  2370. System.out.println("Fail 7"); System.out.println(logic7a); System.out.println(answer7a); System.out.println(logic7b); System.out.println(answer7b);
  2371. }
  2372. // "logic8"
  2373. resJson = this.parseQueryTerm(qt8, result);
  2374. String logic8a = new Gson().toJson(resJson);
  2375. String logic8b = result.toString();
  2376. String answer8a = "{\"filterBuilder\":{\"name\":\"publishedDate\",\"from\":950158800000,\"to\":950227199999,\"includeLower\":true,\"includeUpper\":true},\"boost\":1.0}";
  2377. String answer8b = "((publishedDate:[Thu Feb 10 00:00:00 EST 2000 TO Thu Feb 10 18:59:59 EST 2000]))";
  2378. if (!logic8a.equals(answer8a) || !logic8b.equals(answer8b))
  2379. {
  2380. System.out.println("Fail 8"); System.out.println(logic8a); System.out.println(answer8a); System.out.println(logic8b); System.out.println(answer8b);
  2381. }
  2382. // "logic9" (different to 8 because hour specified)
  2383. resJson = this.parseQueryTerm(qt9, result);
  2384. String logic9a = new Gson().toJson(resJson);
  2385. String logic9b = result.toString();
  2386. String answer9a = "{\"filterBuilder\":{\"name\":\"publishedDate\",\"from\":950158800000,\"to\":950158800000,\"includeLower\":true,\"includeUpper\":true},\"boost\":1.0}";
  2387. String answer9b = "((publishedDate:[Thu Feb 10 00:00:00 EST 2000 TO Thu Feb 10 00:00:00 EST 2000]))";
  2388. if (!logic9a.equals(answer9a) || !logic9b.equals(answer9b))
  2389. {
  2390. System.out.println("Fail 9"); System.out.println(logic9a); System.out.println(answer9a); System.out.println(logic9b); System.out.println(answer9b);
  2391. }
  2392. // "logic9b" (answer identical to 8...)
  2393. resJson = this.parseQueryTerm(qt9b, result);
  2394. String logic9ba = new Gson().toJson(resJson);
  2395. String logic9bb = result.toString();
  2396. String answer9ba = "{\"filterBuilder\":{\"name\":\"publishedDate\",\"from\":950158800000,\"to\":950227199999,\"includeLower\":true,\"includeUpper\":true},\"boost\":1.0}";
  2397. String answer9bb = "((publishedDate:[Thu Feb 10 00:00:00 EST 2000 TO Thu Feb 10 18:59:59 EST 2000]))";
  2398. if (!logic9ba.equals(answer9ba) || !logic9bb.equals(answer9bb))
  2399. {
  2400. System.out.println("Fail 9b"); System.out.println(logic9ba); System.out.println(answer9ba); System.out.println(logic9bb); System.out.println(answer9bb);
  2401. }
  2402. // "logic10"
  2403. resJson = this.parseQueryTerm(qt10, result);
  2404. String logic10a = new Gson().toJson(resJson);
  2405. String logic10b = result.toString();
  2406. String answer10a = "{\"filterBuilder\":{\"name\":\"publishedDate\",\"from\":0,\"to\":950227199999,\"includeLower\":true,\"includeUpper\":true},\"boost\":1.0}";
  2407. String answer10b = "((publishedDate:[0 TO Thu Feb 10 18:59:59 EST 2000]))";
  2408. if (!logic10a.equals(answer10a) || !logic10b.equals(answer10b))
  2409. {
  2410. System.out.println("Fail 10"); System.out.println(logic10a); System.out.println(answer10a); System.out.println(logic10b); System.out.println(answer10b);
  2411. }
  2412. // GEO test cases:
  2413. AdvancedQueryPojo.QueryTermPojo qt11 = new AdvancedQueryPojo.QueryTermPojo();
  2414. qt11.geo = new AdvancedQueryPojo.QueryTermPojo.GeoTermPojo();
  2415. qt11.geo.centerll = "40.12,-71.34";
  2416. qt11.geo.dist = "100km";
  2417. AdvancedQueryPojo.QueryTermPojo qt12 = new AdvancedQueryPojo.QueryTermPojo();
  2418. qt12.geo = new AdvancedQueryPojo.QueryTermPojo.GeoTermPojo();
  2419. qt12.geo.centerll = "(4.1,-171.34)";
  2420. qt12.geo.dist = "100";
  2421. AdvancedQueryPojo.QueryTermPojo qt13 = new AdvancedQueryPojo.QueryTermPojo();
  2422. qt13.geo = new AdvancedQueryPojo.QueryTermPojo.GeoTermPojo();
  2423. qt13.geo.minll = "(4.1,-171.34)";
  2424. qt13.geo.maxll = "40.12,-71.34";
  2425. AdvancedQueryPojo.QueryTermPojo qt14 = new AdvancedQueryPojo.QueryTermPojo();
  2426. qt14.geo = new AdvancedQueryPojo.QueryTermPojo.GeoTermPojo();
  2427. qt14.geo.minll = "4.1,-171.34";
  2428. qt14.geo.maxll = "(40.12,-71.34)";
  2429. // "logic11"
  2430. resJson = this.parseQueryTerm(qt11, result);
  2431. qtTerms.add(resJson); qtReadTerms.add(new StringBuffer(result.toString()));
  2432. String logic11a = new Gson().toJson(resJson);
  2433. String logic11b = result.toString();
  2434. String answer11a = "{\"filterBuilder\":{\"name\":\"locs\",\"distance\":\"100km\",\"lat\":40.12,\"lon\":-71.34},\"boost\":1.0}";
  2435. String answer11b = "((dist(*.geotag, (40.12,-71.34)) < 100km))";
  2436. if (!logic11a.equals(answer11a) || !logic11b.equals(answer11b))
  2437. {
  2438. System.out.println("Fail 11"); System.out.println(logic11a); System.out.println(answer11a); System.out.println(logic11b); System.out.println(answer11b);
  2439. }
  2440. // "logic12"
  2441. resJson = this.parseQueryTerm(qt12, result);
  2442. String logic12a = new Gson().toJson(resJson);
  2443. String logic12b = result.toString();
  2444. String answer12a = "{\"filterBuilder\":{\"name\":\"locs\",\"distance\":\"100.0km\",\"lat\":4.1,\"lon\":-171.34},\"boost\":1.0}";
  2445. String answer12b = "((dist(*.geotag, (4.1,-171.34)) < 100))";
  2446. if (!logic12a.equals(answer12a) || !logic12b.equals(answer12b))
  2447. {
  2448. System.out.println("Fail 12"); System.out.println(logic12a); System.out.println(answer12a); System.out.println(logic12b); System.out.println(answer12b);
  2449. }
  2450. // "logic13"
  2451. resJson = this.parseQueryTerm(qt13, result);
  2452. String logic13a = new Gson().toJson(resJson);
  2453. String logic13b = result.toString();
  2454. String answer13a = "{\"filterBuilder\":{\"name\":\"locs\",\"topLeft\":{\"lat\":40.12,\"lon\":-171.34},\"bottomRight\":{\"lat\":4.1,\"lon\":-71.34}},\"boost\":1.0}";
  2455. String answer13b = "((*.geotag: [(4.1,-171.34), (40.12,-71.34)]))";
  2456. if (!logic13a.equals(answer13a) || !logic13b.equals(answer13b))
  2457. {
  2458. System.out.println("Fail 13"); System.out.println(logic13a); System.out.println(answer13a); System.out.println(logic13b); System.out.println(answer13b);
  2459. }
  2460. // "logic14"
  2461. resJson = this.parseQueryTerm(qt14, result);
  2462. String logic14a = new Gson().toJson(resJson);
  2463. String logic14b = result.toString();
  2464. String answer14a = "{\"filterBuilder\":{\"name\":\"locs\",\"topLeft\":{\"lat\":40.12,\"lon\":-171.34},\"bottomRight\":{\"lat\":4.1,\"lon\":-71.34}},\"boost\":1.0}";
  2465. String answer14b = "((*.geotag: [(4.1,-171.34), (40.12,-71.34)]))";
  2466. if (!logic14a.equals(answer14a) || !logic14b.equals(answer14b))
  2467. {
  2468. System.out.println("Fail 14"); System.out.println(logic14a); System.out.println(answer14a); System.out.println(logic14b); System.out.println(answer14b);
  2469. }
  2470. // Logic test code
  2471. // (saved 5 terms in the qtTerms and qtReadTerms: 0,1,2,7,11)
  2472. String parser1 = "1 and 2 AND 3";
  2473. SimpleBooleanParser.SimpleBooleanParserMTree tree = SimpleBooleanParser.parseExpression(parser1);
  2474. String parserres = SimpleBooleanParser.traverse(tree, false);
  2475. String parserans = "$0: & (3 2 1 ) ";
  2476. if (!parserans.equals(parserres)) {
  2477. System.out.println("Fail p1"); System.out.println(parser1); System.out.println(parserres);
  2478. }
  2479. BoolQueryBuilder bq = QueryBuilders.boolQuery(); result.setLength(0);
  2480. this.parseLogicRecursive(tree, bq, qtTerms.toArray(new BaseQueryBuilder[6]), qtReadTerms.toArray(new StringBuffer[6]), result);
  2481. String parseransQ = "((etext\\ test) AND (entities.index:\"entity/type\")) and ((ftext) AND (etext\\ \\+test)) and ((ftext +test))";
  2482. if (!parseransQ.equals(result.toString())) {
  2483. System.out.println("Fail p1"); System.out.println(parseransQ); System.out.println(result.toString());
  2484. }
  2485. String parser2 = "1 or 2 and 3 or 4";
  2486. tree = SimpleBooleanParser.parseExpression(parser2);
  2487. parserres = SimpleBooleanParser.traverse(tree, false);
  2488. parserans = "$0: | ($1 1 ) $1: | (4 $2 ) $2: & (3 2 ) ";
  2489. if (!parserans.equals(parserres)) {
  2490. System.out.println("Fail p2"); System.out.println(parser2); System.out.println(parserres);
  2491. }
  2492. bq = QueryBuilders.boolQuery(); result.setLength(0);
  2493. this.parseLogicRecursive(tree, bq, qtTerms.toArray(new BaseQueryBuilder[6]), qtReadTerms.toArray(new StringBuffer[6]), result);
  2494. parseransQ = "(((publishedDate:[0 TO Thu Sep 16 15:52:37 EDT 2010])) or (((etext\\ test) AND (entities.index:\"entity/type\")) and ((ftext) AND (etext\\ \\+test)))) or ((ftext +test))";
  2495. if (!parseransQ.equals(result.toString())) {
  2496. System.out.println("Fail p2"); System.out.println(parseransQ); System.out.println(result.toString());
  2497. }
  2498. String parser3 = "(1 or 2) and 3 or 4";
  2499. tree = SimpleBooleanParser.parseExpression(parser3);
  2500. parserres = SimpleBooleanParser.traverse(tree, false);
  2501. parserans = "$0: | (4 $1 ) $1: & (3 $2 ) $2: | (2 1 ) ";
  2502. if (!parserans.equals(parserres)) {
  2503. System.out.println("Fail p3"); System.out.println(parser3); System.out.println(parserres);
  2504. }
  2505. bq = QueryBuilders.boolQuery(); result.setLength(0);
  2506. this.parseLogicRecursive(tree, bq, qtTerms.toArray(new BaseQueryBuilder[6]), qtReadTerms.toArray(new StringBuffer[6]), result);
  2507. parseransQ = "((publishedDate:[0 TO Thu Sep 16 15:52:37 EDT 2010])) or (((etext\\ test) AND (entities.index:\"entity/type\")) and (((ftext) AND (etext\\ \\+test)) or ((ftext +test))))";
  2508. if (!parseransQ.equals(result.toString())) {
  2509. System.out.println("Fail p3"); System.out.println(parseransQ); System.out.println(result.toString());
  2510. }
  2511. String parser4 = "1 or 2 and (3 or 4)";
  2512. tree = SimpleBooleanParser.parseExpression(parser4);
  2513. parserres = SimpleBooleanParser.traverse(tree, false);
  2514. parserans = "$0: | ($1 1 ) $1: & ($2 2 ) $2: | (4 3 ) ";
  2515. if (!parserans.equals(parserres)) {
  2516. System.out.println("Fail p4"); System.out.println(parser4); System.out.println(parserres);
  2517. }
  2518. bq = QueryBuilders.boolQuery(); result.setLength(0);
  2519. this.parseLogicRecursive(tree, bq, qtTerms.toArray(new BaseQueryBuilder[6]), qtReadTerms.toArray(new StringBuffer[6]), result);
  2520. parseransQ = "((((publishedDate:[0 TO Thu Sep 16 15:52:37 EDT 2010])) or ((etext\\ test) AND (entities.index:\"entity/type\"))) and ((ftext) AND (etext\\ \\+test))) or ((ftext +test))";
  2521. if (!parseransQ.equals(result.toString())) {
  2522. System.out.println("Fail p4"); System.out.println(parseransQ); System.out.println(result.toString());
  2523. }
  2524. String parser5 = "1 or not 2 and not (3 or 4)";
  2525. tree = SimpleBooleanParser.parseExpression(parser5);
  2526. parserres = SimpleBooleanParser.traverse(tree, false);
  2527. parserans = "$0: | ($1 1 ) $1: & ($2 -2 ) $2: -| (4 3 ) ";
  2528. if (!parserans.equals(parserres)) {
  2529. System.out.println("Fail p5"); System.out.println(parser5); System.out.println(parserres);
  2530. }
  2531. bq = QueryBuilders.boolQuery(); result.setLength(0);
  2532. this.parseLogicRecursive(tree, bq, qtTerms.toArray(new BaseQueryBuilder[6]), qtReadTerms.toArray(new StringBuffer[6]), result);
  2533. parseransQ = "(not (((publishedDate:[0 TO Thu Sep 16 15:52:37 EDT 2010])) or ((etext\\ test) AND (entities.index:\"entity/type\"))) and not ((ftext) AND (etext\\ \\+test))) or ((ftext +test))";
  2534. if (!parseransQ.equals(result.toString())) {
  2535. System.out.println("Fail p5"); System.out.println(parseransQ); System.out.println(result.toString());
  2536. }
  2537. String parser6 = "not (1 or (2 and (3 or 4) and 5))";
  2538. tree = SimpleBooleanParser.parseExpression(parser6);
  2539. parserres = SimpleBooleanParser.traverse(tree, false);
  2540. parserans = "$0: & ($1 ) $1: -| ($2 1 ) $2: & (5 $3 2 ) $3: | (4 3 ) ";
  2541. if (!parserans.equals(parserres)) {
  2542. System.out.println("Fail p6"); System.out.println(parser6); System.out.println(parserres);
  2543. }
  2544. bq = QueryBuilders.boolQuery(); result.setLength(0);
  2545. this.parseLogicRecursive(tree, bq, qtTerms.toArray(new BaseQueryBuilder[6]), qtReadTerms.toArray(new StringBuffer[6]), result);
  2546. parseransQ = "not ((((dist(*.geotag, (40.12,-71.34)) < 100km)) and (((publishedDate:[0 TO Thu Sep 16 15:52:37 EDT 2010])) or ((etext\\ test) AND (entities.index:\"entity/type\"))) and ((ftext) AND (etext\\ \\+test))) or ((ftext +test)))";
  2547. if (!parseransQ.equals(result.toString())) {
  2548. System.out.println("Fail p6"); System.out.println(parseransQ); System.out.println(result.toString());
  2549. }
  2550. String parser7 = "not (1 or (2 and (3 or 4) or 5))";
  2551. tree = SimpleBooleanParser.parseExpression(parser7);
  2552. parserres = SimpleBooleanParser.traverse(tree, false);
  2553. parserans = "$0: & ($1 ) $1: -| ($2 1 ) $2: | (5 $3 ) $3: & ($4 2 ) $4: | (4 3 ) ";
  2554. if (!parserans.equals(parserres)) {
  2555. System.out.println("Fail p7"); System.out.println(parser7); System.out.println(parserres);
  2556. }
  2557. bq = QueryBuilders.boolQuery(); result.setLength(0);
  2558. this.parseLogicRecursive(tree, bq, qtTerms.toArray(new BaseQueryBuilder[6]), qtReadTerms.toArray(new StringBuffer[6]), result);
  2559. parseransQ = "not ((((dist(*.geotag, (40.12,-71.34)) < 100km)) or ((((publishedDate:[0 TO Thu Sep 16 15:52:37 EDT 2010])) or ((etext\\ test) AND (entities.index:\"entity/type\"))) and ((ftext) AND (etext\\ \\+test)))) or ((ftext +test)))";
  2560. if (!parseransQ.equals(result.toString())) {
  2561. System.out.println("Fail p7"); System.out.println(parseransQ); System.out.println(result.toString());
  2562. }
  2563. // Pure parsing tests:
  2564. String parser8 = "( 1 OR 2 ) OR ( 3 OR 4 ) AND 5";
  2565. tree = SimpleBooleanParser.parseExpression(parser8);
  2566. parserres = SimpleBooleanParser.traverse(tree, false);
  2567. parserans = "$0: | ($1 $2 ) $2: | (2 1 ) $1: & (5 $3 ) $3: | (4 3 ) ";
  2568. if (!parserans.equals(parserres)) {
  2569. System.out.println("Fail p8"); System.out.println(parser8); System.out.println(parserres);
  2570. }
  2571. String parser9 = "(( 1 OR 2 ) OR ( 3 OR 4 )) AND 5";
  2572. tree = SimpleBooleanParser.parseExpression(parser9);
  2573. parserres = SimpleBooleanParser.traverse(tree, false);
  2574. parserans = "$0: & (5 $1 ) $1: | ($2 $3 ) $3: | (2 1 ) $2: | (4 3 ) ";
  2575. if (!parserans.equals(parserres)) {
  2576. System.out.println("Fail p9"); System.out.println(parser9); System.out.println(parserres);
  2577. }
  2578. // Some proximity test code
  2579. // First off, check out the distance code
  2580. Double d1 = getDistance("1000");
  2581. if (d1 != 1000.0) {
  2582. System.out.println("1000 vs " + d1);
  2583. }
  2584. d1 = getDistance("10000m");
  2585. if (d1 != 10000*1.150779) {
  2586. System.out.println("1000m vs " + d1);
  2587. }
  2588. d1 = getDistance("1000mi");
  2589. if (d1 != 1000*1.150779) {
  2590. System.out.println("1000mi vs " + d1);
  2591. }
  2592. d1 = getDistance("1000km");
  2593. if (d1 != 1000.0) {
  2594. System.out.println("1000km vs " + d1);
  2595. }
  2596. d1 = getDistance("1000nm");
  2597. if (d1 != 1000.0*1.852) {
  2598. System.out.println("1000nm vs " + d1);
  2599. }
  2600. // Then interval test code
  2601. Long l1 = getInterval("month", 'x');
  2602. if (2592000000L != l1) {
  2603. System.out.println("month vs " + l1);
  2604. }
  2605. l1 = getInterval("1", 'd'); // (day)
  2606. if (86400000L != l1) {
  2607. System.out.println("1d vs " + l1);
  2608. }
  2609. l1 = getInterval("10", 'm'); // (month)
  2610. if (25920000000L != l1) {
  2611. System.out.println("10m vs " + l1);
  2612. }
  2613. l1 = getInterval("1", 'y'); // (year)
  2614. if (31536000000L != l1) {
  2615. System.out.println("1y vs " + l1);
  2616. }
  2617. // OK this is the difficult bit:
  2618. AdvancedQueryPojo.QueryScorePojo scoreParams = new AdvancedQueryPojo.QueryScorePojo();
  2619. // Can't unit test this properly, so just rely on the "TEST CODE"
  2620. //NO PROXIMITY SCORING
  2621. addProximityBasedScoring(QueryBuilders.matchAllQuery(), null, scoreParams, null, false);
  2622. // Geo only:
  2623. scoreParams.geoProx = new AdvancedQueryPojo.QueryScorePojo.GeoProxTermPojo();
  2624. scoreParams.geoProx.ll = "10.0,20.0";
  2625. scoreParams.geoProx.decay = "100km";
  2626. addProximityBasedScoring(QueryBuilders.matchAllQuery(), null, scoreParams, null, false);
  2627. // Geo+time:
  2628. scoreParams.geoProx.ll = "(10.0,20.0)"; // (double check this version works)
  2629. scoreParams.geoProx.decay = "1000nm";
  2630. scoreParams.timeProx = new AdvancedQueryPojo.QueryScorePojo.TimeProxTermPojo();
  2631. scoreParams.timeProx.decay = "month";
  2632. scoreParams.timeProx.time = "2000-01-01";
  2633. addProximityBasedScoring(QueryBuilders.matchAllQuery(), null, scoreParams, null, false);
  2634. // Time only:
  2635. scoreParams.geoProx = null;
  2636. scoreParams.timeProx.decay = "1m";
  2637. addProximityBasedScoring(QueryBuilders.matchAllQuery(), null, scoreParams, null, false);
  2638. }
  2639. public enum GeoParseField
  2640. {
  2641. ALL,ASSOC,DOC,ENT;
  2642. }
  2643. }