/core/infinit.e.api.server/WEB-INF/src/com/ikanow/infinit/e/api/knowledge/QueryHandler.java
Java | 3048 lines | 2059 code | 327 blank | 662 comment | 689 complexity | b78d32d20dbf27f2fa21cc4895940870 MD5 | raw file
Possible License(s): BSD-3-Clause
Large files files are truncated, but you can click here to view the full file
- /*******************************************************************************
- * Copyright 2012, The Infinit.e Open Source Project.
- *
- * This program is free software: you can redistribute it and/or modify
- * it under the terms of the GNU Affero General Public License, version 3,
- * as published by the Free Software Foundation.
- *
- * This program is distributed in the hope that it will be useful,
- * but WITHOUT ANY WARRANTY; without even the implied warranty of
- * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
- * GNU Affero General Public License for more details.
- *
- * You should have received a copy of the GNU Affero General Public License
- * along with this program. If not, see <http://www.gnu.org/licenses/>.
- ******************************************************************************/
- package com.ikanow.infinit.e.api.knowledge;
- import java.io.IOException;
- import java.net.UnknownHostException;
- import java.util.ArrayList;
- import java.util.Arrays;
- import java.util.Calendar;
- import java.util.Collection;
- import java.util.Date;
- import java.util.HashMap;
- import java.util.LinkedList;
- import java.util.List;
- import java.util.Map;
- import java.util.Set;
- import java.util.TreeSet;
- import java.util.concurrent.Semaphore;
- import java.util.concurrent.TimeUnit;
- import java.util.regex.Pattern;
- import org.apache.commons.lang.time.DateUtils;
- import org.apache.commons.lang.time.DateFormatUtils;
- import org.apache.log4j.Logger;
- import org.bson.types.ObjectId;
- import org.elasticsearch.action.search.SearchResponse;
- import org.elasticsearch.client.action.search.SearchRequestBuilder;
- import org.elasticsearch.common.joda.time.Interval;
- import org.elasticsearch.common.unit.DistanceUnit;
- import org.elasticsearch.index.query.BaseQueryBuilder;
- import org.elasticsearch.index.query.BoolFilterBuilder;
- import org.elasticsearch.index.query.BoolQueryBuilder;
- import org.elasticsearch.index.query.CrossVersionQueryBuilders;
- import org.elasticsearch.index.query.CustomFiltersScoreQueryBuilder;
- import org.elasticsearch.index.query.FilterBuilders;
- import org.elasticsearch.index.query.GeoDistanceFilterBuilder;
- import org.elasticsearch.index.query.QueryBuilders;
- import org.elasticsearch.index.query.RangeQueryBuilder;
- import org.elasticsearch.search.sort.SortOrder;
- import com.google.common.collect.ArrayListMultimap;
- import com.google.gson.Gson;
- import com.google.gson.GsonBuilder;
- import com.ikanow.infinit.e.api.knowledge.aliases.AliasLookupTable;
- import com.ikanow.infinit.e.api.knowledge.aliases.AliasManager;
- import com.ikanow.infinit.e.api.knowledge.processing.AggregationUtils;
- import com.ikanow.infinit.e.api.knowledge.processing.QueryDecayFactory;
- import com.ikanow.infinit.e.api.knowledge.processing.ScoringUtils;
- import com.ikanow.infinit.e.api.social.sharing.ShareHandler;
- import com.ikanow.infinit.e.api.utils.PropertiesManager;
- import com.ikanow.infinit.e.api.utils.SimpleBooleanParser;
- import com.ikanow.infinit.e.api.utils.SocialUtils;
- import com.ikanow.infinit.e.data_model.Globals;
- import com.ikanow.infinit.e.data_model.api.BasePojoApiMap;
- import com.ikanow.infinit.e.data_model.api.ResponsePojo;
- import com.ikanow.infinit.e.data_model.api.ResponsePojo.ResponseObject;
- import com.ikanow.infinit.e.data_model.api.knowledge.AdvancedQueryPojo;
- import com.ikanow.infinit.e.data_model.api.knowledge.StatisticsPojo;
- import com.ikanow.infinit.e.data_model.control.DocumentQueueControlPojo;
- import com.ikanow.infinit.e.data_model.index.ElasticSearchManager;
- import com.ikanow.infinit.e.data_model.index.document.DocumentPojoIndexMap;
- import com.ikanow.infinit.e.data_model.interfaces.query.IQueryExtension;
- import com.ikanow.infinit.e.data_model.store.DbManager;
- import com.ikanow.infinit.e.data_model.store.custom.mapreduce.CustomMapReduceJobPojo;
- import com.ikanow.infinit.e.data_model.store.document.AssociationPojo;
- import com.ikanow.infinit.e.data_model.store.document.DocumentPojo;
- import com.ikanow.infinit.e.data_model.store.document.EntityPojo;
- import com.ikanow.infinit.e.data_model.store.feature.entity.EntityFeaturePojo;
- import com.ikanow.infinit.e.data_model.store.social.sharing.SharePojo;
- import com.ikanow.infinit.e.data_model.utils.GeoOntologyMapping;
- import com.mongodb.BasicDBObject;
- import com.mongodb.DBCollection;
- import com.mongodb.DBCursor;
- import com.mongodb.MongoException;
- import com.mongodb.ReadPreference;
- //
- // This code contains all the processing logic for the (beta)
- // Advanced Queries
- //
- //(remove this during active development - want to just depress a deprecation warning but no way of doing this for both 0.19 and 1.0)
- //@SuppressWarnings("deprecation")
- @SuppressWarnings("all")
- public class QueryHandler {
- private final StringBuffer _logMsg = new StringBuffer();
- private static final Logger _logger = Logger.getLogger(QueryHandler.class);
-
- public QueryHandler() {}
-
- private static Semaphore _concurrentAccessLock = null;
- private boolean acquireConcurrentAccessLock() throws InterruptedException {
- if (null == _concurrentAccessLock) {
- _concurrentAccessLock = new Semaphore(2);
- }
- return _concurrentAccessLock.tryAcquire(10, TimeUnit.MINUTES);
- }
- private void releaseConcurrentAccessLock() {
- _concurrentAccessLock.release();
- }
-
- // Query cache (re-created per request, but there's some static things in here for performance):
- private AliasLookupTable _aliasLookup = null;
-
- private LinkedList<AdvancedQueryPojo.QueryTermPojo> _extraFullTextTerms = null;
- // (used to allow entity terms to add top level (full text) terms)
-
- private static PropertiesManager _properties = null;
- private static com.ikanow.infinit.e.data_model.utils.PropertiesManager _dataModelProps = null;
- private static String _aggregationAccuracy = "full";
- private static ArrayList<Class<IQueryExtension>> _queryExtensions = null;
-
- private AdvancedQueryPojo.QueryScorePojo _scoringParams;
- // (need this here so we can set the adjust param for complex queries)
-
- private static int _replicaSetDistributionRatio = -1;
-
- ////////////////////////////////////////////////////////////////////////
-
- // 0] Top level processing
-
- public ResponsePojo doQuery(String userIdStr, AdvancedQueryPojo query, String communityIdStrList, StringBuffer errorString) throws UnknownHostException, MongoException, IOException, InstantiationException, IllegalAccessException {
- if (null == _properties) {
- _properties = new PropertiesManager();
- _aggregationAccuracy = _properties.getAggregationAccuracy();
- _dataModelProps = new com.ikanow.infinit.e.data_model.utils.PropertiesManager();
- _replicaSetDistributionRatio = 1 + _dataModelProps.getDocDbReadDistributionRatio();
-
- String[] queryExtensions = _properties.getQueryExtensions();
- if (null != queryExtensions) {
- _queryExtensions = new ArrayList<Class<IQueryExtension>>(queryExtensions.length);
- for (String s: queryExtensions) {
- try {
- Class<IQueryExtension> queryExtensionClass = (Class<IQueryExtension>) Class.forName(s);
- _queryExtensions.add(queryExtensionClass);
- }
- catch (Exception e) {
- _logger.error("Failed to load query extension: " + s, e);
- }
- catch (Error e) {
- _logger.error("Failed to load query extension: " + s, e);
- }
- }//(end list over query extensions)
- if (_queryExtensions.isEmpty()) {
- _queryExtensions = null;
- }
- }//TESTED (see test.QueryExtensionsTestCode)
- }
- ObjectId queryId = null;
- _scoringParams = query.score;
-
- // (NOTE CAN'T ACCESS "query" UNTIL AFTER 0.1 BECAUSE THAT CAN CHANGE IT)
-
- long nSysTime = (_nNow = System.currentTimeMillis());
-
- ResponsePojo rp = new ResponsePojo();
-
- // communityIdList is CSV
- String[] communityIdStrs = SocialUtils.getCommunityIds(userIdStr, communityIdStrList);
-
- //(timing)
- long nQuerySetupTime = System.currentTimeMillis();
-
- ElasticSearchManager indexMgr = getIndexManager(communityIdStrs);
- SearchRequestBuilder searchSettings = indexMgr.getSearchOptions();
- StringBuffer querySummary = new StringBuffer();
- BaseQueryBuilder queryObj = null;
- InternalTempFilterInfo tempFilterInfo = null;
- try {
- queryObj = getBaseQuery(query, communityIdStrs, communityIdStrList, userIdStr, querySummary);
- if (null == queryObj) { // only occurs if has 1 element with ftext starting $cache:
- return getSavedQueryInstead(query.qt.get(0).ftext.substring(7), communityIdStrs, query); // (step over cache preamble)
- }
- tempFilterInfo = getBaseFilter(query, communityIdStrs);
- }
- catch (Exception e) {
- Globals.populateStackTrace(errorString, e);
- if (null != e.getCause()) {
- errorString.append("[CAUSE=").append(e.getCause().getMessage()).append("]");
- Globals.populateStackTrace(errorString, e.getCause());
- }
- errorString.append(": " + e.getMessage());
- return null;
- }
-
- //DEBUG
- //querySummary.append(new Gson().toJson(query, AdvancedQueryPojo.class));
-
- // 0.4] Pre-Lucene Scoring
-
- // 0.4.1] General
-
- // Different options:
- // a] Get the most recent N documents matching the query, score post-query
- // b] Get the N highest (Lucene) scoring documents, incorporate significance post-query if desired
- // In both cases, N depends on whether significance calculation is taking place (and on the "skip" param)
-
- int nRecordsToOutput = query.output.docs.numReturn;
- int nRecordsToSkip = query.output.docs.skip;
- int nRecordsToGet = query.score.numAnalyze;
- final int nMAXRECORDSTOOUTPUT = 10000;
- final int nMAXRECORDSTOGET = 20000;
-
- // Some sanity checking on doc numbers:
- if (nRecordsToOutput > nMAXRECORDSTOOUTPUT) { // Upper limit...
- errorString.append(": Max # docs to return is 10000.");
- return null;
- }
- if (nRecordsToGet < nRecordsToOutput) {
- nRecordsToGet = nRecordsToOutput;
- }
- else if (nRecordsToGet > nMAXRECORDSTOGET) { // Upper limit...
- nRecordsToGet = nMAXRECORDSTOGET; // (we can do something sensible with this so carry on regardless)
- }
-
- boolean bUseSignificance = (query.score.sigWeight > 0.0);
- boolean bNeedExtraResultsForEnts =
- ((query.output.aggregation != null) && (query.output.aggregation.entsNumReturn != null) && (query.output.aggregation.entsNumReturn > 0))
- ||
- (query.output.docs.enable && (query.output.docs.numReturn > 0) && (query.output.docs.ents) && (query.score.scoreEnts));
-
- if (bUseSignificance || bNeedExtraResultsForEnts) {
-
- // Some logic taken from the original "knowledge/search"
- while ( (nRecordsToSkip + nRecordsToOutput > nRecordsToGet) && (nRecordsToGet <= nMAXRECORDSTOGET) )
- {
- nRecordsToGet += nRecordsToGet;
- }
- if (nRecordsToGet > nMAXRECORDSTOGET) {
- errorString.append(": Can only skip through to 20000 documents.");
- return null;
- }
- searchSettings.setSize(nRecordsToGet);
-
- //TESTED
- }
- else if (query.output.docs.enable) { // In this case we just need the minimum number of records
- // (whether searching by date or by relevance)
- searchSettings.setFrom(nRecordsToSkip);
- nRecordsToSkip = 0; // (so it isn't double counted in the processing module)
- nRecordsToGet = nRecordsToOutput;
- searchSettings.setSize(nRecordsToGet);
- //TESTED
- }
- else { // In thise case we're just outputting aggregations, and not even ones that come from the docs
- nRecordsToGet = 0; // (use this variable everywhere where we care about bring docs back either to output or for suitable aggregation)
- searchSettings.setSize(0);
- }
-
- // Sort on score if relevance is being used
-
- if (nRecordsToGet > 0) {
- if (query.score.relWeight > 0.0) { // (b) above
- // Using score is default, nothing to do
- }
- else { // (a) above
- // Debug code, if rel weight negative then use date to check Lucene score is better...
- if (query.score.relWeight < 0.0) {
- query.score.relWeight = -query.score.relWeight;
- }
- // Set Lucene to order:
- searchSettings.addSort(DocumentPojo.publishedDate_, SortOrder.DESC);
- }//TOTEST
- }//(if docs aren't enabled, don't need to worry about sorting)
-
- // 0.4.2] Prox scoring (needs to happen after [0.3]
- // Add proximity scoring:
- boolean bLowAccuracyDecay = false;
- if ((nRecordsToGet > 0) || (null == _scoringParams.adjustAggregateSig) || _scoringParams.adjustAggregateSig) {
- // (ie if we're getting docs or applying scores to entities)
-
- if (!_aggregationAccuracy.equals("full")) {
- bLowAccuracyDecay = true;
- }
- queryObj = addProximityBasedScoring(queryObj, searchSettings, query.score, tempFilterInfo.parentFilterObj, bLowAccuracyDecay);
-
- if (null == _scoringParams.adjustAggregateSig) { // auto-decide .. if ftext is set and is non-trivial
- if ((null != query.score.timeProx) || (null != query.score.geoProx)) {
- // (These are set to null above if badly formed)
- _scoringParams.adjustAggregateSig = true;
- }
- }
-
- }// (else not worth the effort)
-
- // 0.4.3] Source weightings (if any)
-
- queryObj = applyManualWeights(queryObj, query.score);
-
- // 0.5] Pre-lucene output options
-
- // only return the id field and score
- // (Both _id and score come back as default options, SearchHit:: getId and getScore, don't need anything else)
- // Facets
-
- // (These are needed for the case where we need to perform aggregations manually)
- Integer manualEntsNumReturn = null;
- Integer manualEventsNumReturn = null;
- Integer manualFactsNumReturn = null;
- Integer manualGeoNumReturn = null;
-
- //DEBUG
- //System.out.println(new Gson().toJson(query.output.aggregation));
-
- if ((null != query.output.aggregation) && (null != query.output.aggregation.raw)) { // Like query, specify raw aggregation (Facets)
- // Gross raw handling for facets
- if ((null != query.raw) && (null != query.raw.query)) {
- // Don't currently support raw query and raw facets because I can't work out how to apply
- // the override on group/source!
- errorString.append(": Not currently allowed raw query and raw facets");
- return null;
- }
- else { // Normal code
- searchSettings.setFacets(query.output.aggregation.raw.getBytes());
- }
- }
- else { // Apply various aggregation (=="facet") outputs to searchSettings
- boolean bSpecialCase = (null != query.raw) && (null != query.raw.query);
-
- if (!_aggregationAccuracy.equals("full")) {
- if (null != query.output.aggregation) {
- if (_aggregationAccuracy.equals("low")) {
- manualEntsNumReturn = query.output.aggregation.entsNumReturn;
- manualEventsNumReturn = query.output.aggregation.eventsNumReturn;
- manualFactsNumReturn = query.output.aggregation.factsNumReturn;
- manualGeoNumReturn = query.output.aggregation.geoNumReturn;
- }
- query.output.aggregation.entsNumReturn = null;
- query.output.aggregation.eventsNumReturn = null;
- query.output.aggregation.factsNumReturn = null;
- query.output.aggregation.geoNumReturn = null;
- // (allow time aggregation)
- // (allow source aggregation)
- }
- }
- AggregationUtils.parseOutputAggregation(query.output.aggregation, _aliasLookup,
- tempFilterInfo.entityTypeFilterStrings, tempFilterInfo.assocVerbFilterStrings,
- searchSettings, bSpecialCase?tempFilterInfo.parentFilterObj:null);
- // In partial accuracy case, restore aggregation
- if (null != manualEntsNumReturn) {
- query.output.aggregation.entsNumReturn = manualEntsNumReturn;
- }
- if (null != manualEventsNumReturn) {
- query.output.aggregation.eventsNumReturn = manualEventsNumReturn;
- }
- if (null != manualFactsNumReturn) {
- query.output.aggregation.factsNumReturn = manualFactsNumReturn;
- }
- if (null != manualGeoNumReturn) {
- query.output.aggregation.geoNumReturn = manualGeoNumReturn;
- }
- //TESTED
- }
- //TESTED x2
-
- //(timing)
- nQuerySetupTime = System.currentTimeMillis() - nQuerySetupTime;
-
- // 0.6] Perform Lucene query
-
- // 0.6.1: query extensions: pre-query hook
- ArrayList<IQueryExtension> queryExtensions = null;
- if (null != _queryExtensions) {
- queryId = new ObjectId();
- queryExtensions = new ArrayList<IQueryExtension>(_queryExtensions.size());
- for (Class<IQueryExtension> queryExtensionClass: _queryExtensions) {
- // Don't catch any exceptions thrown here - let it bubble upwards
- IQueryExtension queryExtension = queryExtensionClass.newInstance();
- queryExtension.preQueryActivities(queryId, query, communityIdStrs);
- queryExtensions.add(queryExtension);
- }
- }//TESTED (see test.QueryExtensionsTestCode)
-
- // 0.6.2: the main query
- if ((null != query.explain) && query.explain) { // (for diagnostic - will return lucene explanation)
- searchSettings.setExplain(true);
- }
-
- SearchResponse queryResults = null;
- if ((null != query.raw) && (null != query.raw.query))
- {
- // (Can bypass all other settings)
- searchSettings.setQuery(query.raw.query);
- queryResults = indexMgr.doQuery(null, tempFilterInfo.parentFilterObj, searchSettings);
- }//TESTED '{ "raw": { "match_all": {} } }'
- else
- {
- // Where I can, use the source filter as part of the query so that
- // facets will apply to query+filter, not just filter
- queryObj = QueryBuilders.boolQuery().must(queryObj).must(QueryBuilders.constantScoreQuery(tempFilterInfo.parentFilterObj).boost(0.0F));
-
- queryResults = indexMgr.doQuery(queryObj, null, searchSettings);
- }//TESTED '{}' etc
-
- long nLuceneTime = queryResults.getTookInMillis();
- // 0.7] Lucene scores
-
- long nProcTime = 0;
- long nProcTime_tmp = System.currentTimeMillis();
-
- StatisticsPojo stats = new StatisticsPojo();
- stats.found = queryResults.getHits().getTotalHits();
- stats.start = (long)nRecordsToSkip;
-
- if (nRecordsToGet > 0) {
- stats.setScore(queryResults.getHits(), (null != query.score.geoProx)||(null != query.score.timeProx), (null != query.explain) && query.explain);
- }
- //DEBUG
- //System.out.println(new Gson().toJson(queryResults));
-
- nProcTime += (System.currentTimeMillis() - nProcTime_tmp);
-
- // 0.8] Get data from Mongo + handle scoring
- //(timing)
- long nMongoTime = System.currentTimeMillis();
- List<BasicDBObject> docs = null;
-
- //(aggregation)
- LinkedList<BasicDBObject> lowAccuracyAggregatedEntities = null; // (always low accuracy)
- LinkedList<BasicDBObject> standaloneEvents = null;
- LinkedList<BasicDBObject> lowAccuracyAggregatedEvents = null;
- LinkedList<BasicDBObject> lowAccuracyAggregatedFacts = null;
- AggregationUtils.GeoContainer lowAccuracyAggregatedGeo = null;
- AggregationUtils.GeoContainer extraAliasAggregatedGeo = null;
-
- ScoringUtils scoreStats = null;
- if (null != stats.getIds()) {
- DBCursor docs0 = this.getDocIds(DbManager.getDocument().getMetadata(), stats.getIds(), nRecordsToGet, query.output, query.score);
- nMongoTime = System.currentTimeMillis() - nMongoTime;
-
- nProcTime_tmp = System.currentTimeMillis();
-
- // Entity aggregation (CURRENTLY ALWAYS LOW AGGREGATION):
- if ((null != query.output.aggregation) && (null != query.output.aggregation.entsNumReturn) && (query.output.aggregation.entsNumReturn > 0)) {
- lowAccuracyAggregatedEntities = new LinkedList<BasicDBObject>();
- }
-
- // Standalone events:
- if ((query.output.docs != null) && (query.output.docs.eventsTimeline != null) && query.output.docs.eventsTimeline) {
- standaloneEvents = new LinkedList<BasicDBObject>();
- }
-
- // Low accuracy aggregations:
-
- if ((null != manualEventsNumReturn) && (manualEventsNumReturn > 0)) {
- lowAccuracyAggregatedEvents = new LinkedList<BasicDBObject>();
- }
- if ((null != manualFactsNumReturn) && (manualFactsNumReturn > 0)) {
- lowAccuracyAggregatedFacts = new LinkedList<BasicDBObject>();
- }
-
- if ((null != manualGeoNumReturn) && (manualGeoNumReturn > 0)) {
- lowAccuracyAggregatedGeo = new AggregationUtils.GeoContainer();
- }
- else if ((null != query.output.aggregation) && (null != query.output.aggregation.geoNumReturn) && (query.output.aggregation.geoNumReturn > 0))
- {
- // (only if not using low accuracy aggregation ... otherwise it all gets dumped in lowAccuracyAggregatedGeo)
- extraAliasAggregatedGeo = new AggregationUtils.GeoContainer();
- }
-
- scoreStats = new ScoringUtils();
- try {
- boolean lockAcquired = true;
- try {
- lockAcquired = this.acquireConcurrentAccessLock();
-
- } catch (InterruptedException e) {
- //(that's fine just carry on)
- lockAcquired = false;
- }
- if (!lockAcquired) {
- rp.setResponse(new ResponseObject("Query", false, "Query engine busy, please try again later."));
- return rp;
- }
-
- scoreStats.setAliasLookupTable(_aliasLookup);
- docs = scoreStats.calcTFIDFAndFilter(DbManager.getDocument().getMetadata(),
- docs0, query.score, query.output, stats, bLowAccuracyDecay,
- nRecordsToSkip, nRecordsToOutput,
- communityIdStrs,
- tempFilterInfo.entityTypeFilterStrings, tempFilterInfo.assocVerbFilterStrings,
- standaloneEvents,
- lowAccuracyAggregatedEntities,
- lowAccuracyAggregatedGeo, extraAliasAggregatedGeo,
- lowAccuracyAggregatedEvents, lowAccuracyAggregatedFacts);
- }
- finally {
- scoreStats.clearAsMuchMemoryAsPossible();
- this.releaseConcurrentAccessLock();
- }
- nProcTime += (System.currentTimeMillis() - nProcTime_tmp);
- }
- else {
- nMongoTime = 0;
- }
- //TESTED (all queries)
-
- // 0.9] Output:
- rp.setResponse(new ResponseObject("Query", true, querySummary.toString()));
-
- // 0.9.1] Stats:
- stats.resetArrays();
- rp.setStats(stats); // (only actually uses the response pojo, but get rid of big fields anyway...)
- // 0.9.2] Facets:
- if (null != lowAccuracyAggregatedEntities) { // Entity aggregation
- rp.setEntities(lowAccuracyAggregatedEntities);
- }
- if (null != standaloneEvents) {
- rp.setEventsTimeline(standaloneEvents);
- }
- if (null != lowAccuracyAggregatedGeo) {
- rp.setGeo(lowAccuracyAggregatedGeo.geotags, (int)lowAccuracyAggregatedGeo.maxCount, (int)lowAccuracyAggregatedGeo.minCount);
- }
- if (null != lowAccuracyAggregatedEvents) {
- rp.setEvents(lowAccuracyAggregatedEvents);
- }
- if (null != lowAccuracyAggregatedFacts) {
- rp.setFacts(lowAccuracyAggregatedFacts);
- }
-
- if ((null != query.output.aggregation) && (null != query.output.aggregation.raw)) {
- rp.setFacets(queryResults.getFacets().facetsAsMap());
- }
- else if ((null != queryResults.getFacets()) && (null != queryResults.getFacets().getFacets())) { // "Logical" aggregation
- if (0.0 == query.score.sigWeight) {
- scoreStats = null; // (don't calculate event/fact aggregated significance if it's not wanted)
- }
- AggregationUtils.loadAggregationResults(rp, queryResults.getFacets().getFacets(), query.output.aggregation, scoreStats, _aliasLookup, tempFilterInfo.entityTypeFilterStrings, tempFilterInfo.assocVerbFilterStrings, extraAliasAggregatedGeo);
-
- } // (end facets not overwritten)
-
- scoreStats = null; // (now definitely never need scoreStats)
-
- // 0.9.3] Documents
- if (query.output.docs.enable) {
- if ((null != docs) && (docs.size() > 0)) {
- rp.setData(docs, (BasePojoApiMap<BasicDBObject>)null);
- }
- else { // (ensure there's always an empty list)
- docs = new ArrayList<BasicDBObject>(0);
- rp.setData(docs, (BasePojoApiMap<BasicDBObject>)null);
- }
- }
- else { // (ensure there's always an empty list)
- docs = new ArrayList<BasicDBObject>(0);
- rp.setData(docs, (BasePojoApiMap<BasicDBObject>)null);
- }
-
- // 0.9.4] query extensions: post-query hook
- if (null != queryExtensions) {
- for (IQueryExtension queryExtension: queryExtensions) {
- // Don't catch any exceptions thrown here - let it bubble upwards
- queryExtension.postQueryActivities(queryId, docs, rp);
- }
- }//TESTED (see test.QueryExtensionsTestCode)
-
- // 0.9.5] Timing/logging
-
- long nTotalTime = System.currentTimeMillis() - nSysTime;
- rp.getResponse().setTime(nTotalTime);
-
- _logMsg.setLength(0);
- _logMsg.append("knowledge/query querylen=").append(querySummary.length());
- _logMsg.append(" query=").append(querySummary.toString());
- _logMsg.append(" userid=").append(userIdStr);
- _logMsg.append(" groups=").append(communityIdStrList);
- _logMsg.append(" found=").append(stats.found);
- _logMsg.append(" luceneTime=").append(nLuceneTime).append(" ms");
- _logMsg.append(" setupTime=").append(nQuerySetupTime).append(" ms");
- _logMsg.append(" procTime=").append(nProcTime).append(" ms");
- _logMsg.append(" mongoTime=").append(nMongoTime).append(" ms");
- _logMsg.append(" time=").append(nTotalTime).append(" ms");
- _logger.info(_logMsg.toString());
- //DEBUG
- //System.out.println(_logMsg.toString());
-
- // Exceptions percolate up to the resource and are handled there...
- return rp;
- }
-
- ////////////////////////////////////////////////////////////////////////
-
- // Utility version of the above query call - just converts the advanced query pojo into an elasticsearch object that can
- // be queried
- public static class QueryInfo {
- public ElasticSearchManager indexMgr;
- public BaseQueryBuilder queryObj;
- public String querySummary;
- }
-
- public QueryInfo convertInfiniteQuery(AdvancedQueryPojo query, String[] communityIdStrs, String userIdStr) {
- // Fill in the blanks (a decent attempt has been made to fill out the blanks inside these options)
- if (null == query.input) {
- query.input = new AdvancedQueryPojo.QueryInputPojo();
- }
- if (null == query.score) {
- query.score = new AdvancedQueryPojo.QueryScorePojo();
- }
- if (null == query.output) {
- query.output = new AdvancedQueryPojo.QueryOutputPojo();
- }
- if (null == query.output.docs) { // (Docs are sufficiently important we'll make sure they're always present)
- query.output.docs = new AdvancedQueryPojo.QueryOutputPojo.DocumentOutputPojo();
- }
- // Other intialization
- _nNow = System.currentTimeMillis();
-
- // Now onto the logic:
- QueryInfo queryInfo = new QueryInfo();
-
- StringBuffer sb = new StringBuffer(userIdStr);
- for (String sCommunityId: communityIdStrs) {
- sb.append(',').append(sCommunityId);
- }
-
- queryInfo.indexMgr = getIndexManager(communityIdStrs);
- StringBuffer info = new StringBuffer();
- queryInfo.queryObj = getBaseQuery(query, communityIdStrs, sb.toString(), userIdStr, info);
- queryInfo.querySummary = info.toString();
- InternalTempFilterInfo tempFilterInfo = getBaseFilter(query, communityIdStrs);
- queryInfo.queryObj = QueryBuilders.boolQuery().must(queryInfo.queryObj).
- must(QueryBuilders.constantScoreQuery(tempFilterInfo.parentFilterObj).boost(0.0F));
-
- return queryInfo;
- }//TOTEST
-
- ////////////////////////////////////////////////////////////////////////
-
- //0b] QUERY BREAKDOWN
- ////////////////////////////////////////////////////////////////////////
-
- // 0.b.1) indexes
-
- private ElasticSearchManager getIndexManager(String[] communityIdStrs)
- {
- // Create a multi-index to check against all relevant shards:
- StringBuffer sb = new StringBuffer(DocumentPojoIndexMap.globalDocumentIndexCollection_);
- sb.append(',').append(DocumentPojoIndexMap.manyGeoDocumentIndexCollection_);
- for (String sCommunityId: communityIdStrs) {
- sb.append(',').append("docs_").append(sCommunityId);
- }
- sb.append('/').append(DocumentPojoIndexMap.documentType_);
- ElasticSearchManager indexMgr = ElasticSearchManager.getIndex(sb.toString());
- return indexMgr;
- }//TESTED (cut and paste from original code)
-
- ////////////////////////////////////////////////////////////////////////
-
- // 0.b.1) filter
-
- private static class InternalTempFilterInfo {
- BoolFilterBuilder parentFilterObj;
- String[] entityTypeFilterStrings;
- String[] assocVerbFilterStrings;
- }
-
- private InternalTempFilterInfo getBaseFilter(AdvancedQueryPojo query, String communityIdStrs[])
- {
- BoolFilterBuilder parentFilterObj =
- FilterBuilders.boolFilter().must(FilterBuilders.termsFilter(DocumentPojo.communityId_, communityIdStrs));
-
- BoolFilterBuilder sourceFilter = this.parseSourceManagement(query.input);
-
- if (null != sourceFilter) {
- parentFilterObj = parentFilterObj.must(sourceFilter);
- }//TESTED
-
- // 0.2] Output filtering
-
- // Output filters: parse (also used by aggregation, scoring)
-
- String[] entityTypeFilterStrings = null;
- String[] assocVerbFilterStrings = null;
- if ((null != query.output) && (null != query.output.filter)) {
- if (null != query.output.filter.entityTypes) {
- entityTypeFilterStrings = query.output.filter.entityTypes;
- if (0 == entityTypeFilterStrings.length) {
- entityTypeFilterStrings = null;
- }
- else if ((1 == entityTypeFilterStrings.length) && (entityTypeFilterStrings[0].isEmpty())) {
- entityTypeFilterStrings = null;
- }
- // (note this is important because it means we can always check entityTypeFilterStrings[0].getCharAt(0) for -ve filtering)
- }
- if (null != query.output.filter.assocVerbs) {
- assocVerbFilterStrings = query.output.filter.assocVerbs;
- if (0 == assocVerbFilterStrings.length) {
- assocVerbFilterStrings = null;
- }
- else if ((1 == assocVerbFilterStrings.length) && (assocVerbFilterStrings[0].isEmpty())) {
- assocVerbFilterStrings = null;
- }
- // (note this is important because it means we can always check assocVerbFilterStrings[0].getCharAt(0) for -ve filtering)
- }
- }
-
- // Now apply output filters to query
-
- BoolFilterBuilder outputFilter = this.parseOutputFiltering(entityTypeFilterStrings, assocVerbFilterStrings);
- if (null != outputFilter) {
- parentFilterObj = parentFilterObj.must(outputFilter);
- }
- //TESTED
- InternalTempFilterInfo out = new InternalTempFilterInfo();
- out.parentFilterObj = parentFilterObj;
- out.entityTypeFilterStrings = entityTypeFilterStrings;
- out.assocVerbFilterStrings = assocVerbFilterStrings;
-
- return out;
- }//TESTED (cut/paste from original code)
-
- ////////////////////////////////////////////////////////////////////////
-
- // 0.b.2] Query
- // (if it returns null then call getSavedQueryInstead instead)
-
- private BaseQueryBuilder getBaseQuery(AdvancedQueryPojo query, String communityIdStrs[], String communityIdStrList, String userIdStr, StringBuffer querySummary)
- {
- // Intialize alias if so required:
- if ((null == query.expandAlias) || query.expandAlias) {
- AliasManager aliasManager = AliasManager.getAliasManager();
- if (null != aliasManager) {
- _aliasLookup = aliasManager.getAliasLookupTable(null, communityIdStrs, null, userIdStr);
- }
- }
- // (end initialize index)
-
- BaseQueryBuilder queryObj = null;
-
- // 0.1] Input data (/filtering)
- if (null != query.input.name) { // This is actually a share id visible to this user
- query = getStoredQueryArtefact(query.input.name, query, userIdStr);
- }
-
- // 0.3] Query terms
-
- int nQueryElements = 0;
-
- if (null != query.qt) {
- nQueryElements = query.qt.size();
-
- if ((1 == nQueryElements) && (null != query.qt.get(0).ftext) && (query.qt.get(0).ftext.startsWith("$cache:"))) {
- return null;
- }
- if (nQueryElements > 0) { // NORMAL CASE
-
- this.handleEntityExpansion(DbManager.getFeature().getEntity(), query.qt, userIdStr, communityIdStrList);
-
- BaseQueryBuilder queryElements[] = new BaseQueryBuilder[nQueryElements];
- StringBuffer sQueryElements[] = new StringBuffer[nQueryElements];
- for (int i = 0; i < nQueryElements; ++i) {
- _extraFullTextTerms = null;
- queryElements[i] = this.parseQueryTerm(query.qt.get(i), (sQueryElements[i] = new StringBuffer()));
-
- // Extra full text terms generated by aliasing:
- if (null != _extraFullTextTerms) {
- BoolQueryBuilder extraTerms = QueryBuilders.boolQuery().should(queryElements[i]);
- StringBuffer discard = new StringBuffer(); // (we already have added the info the query elements)
- for (AdvancedQueryPojo.QueryTermPojo qtExtra: _extraFullTextTerms) {
- extraTerms = extraTerms.should(this.parseQueryTerm(qtExtra, discard));
- }
- queryElements[i] = extraTerms;
- _extraFullTextTerms = null; // (reset ready for next term...)
- }//TESTED
-
- }//end loop over query terms
-
- queryObj = this.parseLogic(query.logic, queryElements, sQueryElements, querySummary);
-
- if (null == queryObj) { //error parsing logic
- throw new RuntimeException("Error parsing logic");
- }
- }
- else { //(QT exists but doesn't have any elements)
- queryObj = QueryBuilders.matchAllQuery();
- querySummary.append('*');
- }
- }//TESTED
- else {
- queryObj = QueryBuilders.matchAllQuery();
- querySummary.append('*');
- } //(QT not specified)
-
- return queryObj;
- }//TESTED (cut/paste from original code)
-
- ////////////////////////////////////////////////////////////////////////
-
- //1] QUERY UTILITIES
- ////////////////////////////////////////////////////////////////////////
-
- // 1.0] Stored queries/datasets
- // Saved queries (ie the entire dataset)
-
- private ResponsePojo getSavedQueryInstead(String storedQueryNameOrId, String[] communityIdStrs, AdvancedQueryPojo query) {
- ResponsePojo rp = null;
- ObjectId oid = null;
- BasicDBObject jobQuery = null;
- try {
- oid = new ObjectId(storedQueryNameOrId);
- jobQuery = new BasicDBObject(CustomMapReduceJobPojo._id_, oid);
- }
- catch (Exception e) {
- jobQuery = new BasicDBObject(CustomMapReduceJobPojo.jobtitle_, storedQueryNameOrId);
- }
- CustomMapReduceJobPojo savedJob = CustomMapReduceJobPojo.fromDb(DbManager.getCustom().getLookup().findOne(jobQuery), CustomMapReduceJobPojo.class);
-
- if (null != savedJob) { // Is this even a saved job?
- if (null != savedJob.jarURL) {
- savedJob = null;
- }
- }
- if (null != savedJob) { // Authorization
- boolean auth = false;
- String communityIdStrList = Arrays.toString(communityIdStrs);
- for (ObjectId commId: savedJob.communityIds) {
-
- if (communityIdStrList.contains(commId.toString())) {
- auth = true;
- break;
- }
- }
- if (!auth) {
- savedJob = null;
- }
- if (null == savedJob) {
- throw new RuntimeException("Can't find saved query, or is a custom job not a query, or authorization error");
- }
- // OK go get the results of the job
- DBCollection coll = DbManager.getCollection(savedJob.getOutputDatabase(), savedJob.outputCollection);
- BasicDBObject result = (BasicDBObject) coll.findOne(); // (at some point support multiple saved queries)
- if (null == result) {
- throw new RuntimeException("Saved query is empty");
- }
- BasicDBObject apiResultToConvert = (BasicDBObject) result.get("value");
- if (null == apiResultToConvert) {
- throw new RuntimeException("Saved query has invalid format");
- }
- rp = ResponsePojo.fromDb(apiResultToConvert);
- }
- else if (null != oid) { // Support new user/doc queues
- SharePojo share = SharePojo.fromDb(DbManager.getSocial().getShare().findOne(jobQuery), SharePojo.class);
- if ((null == share) || (null == share.getShare()) ||
- (!share.getType().equals(DocumentQueueControlPojo.UserQueue) && !share.getType().equals(DocumentQueueControlPojo.SavedQueryQueue))
- )
- {
- throw new RuntimeException("Can't find saved query, or is a custom job not a query, or authorization error");
- }
- else { // share.share is a DocumentQueueControlPojo
- DocumentQueueControlPojo queue = DocumentQueueControlPojo.fromApi(share.getShare(), DocumentQueueControlPojo.class);
- BasicDBObject docQuery1 = new BasicDBObject(DocumentPojo._id_, new BasicDBObject(DbManager.in_, queue.getQueueList()));
- BasicDBObject docQuery2 = new BasicDBObject(DocumentPojo.updateId_, new BasicDBObject(DbManager.in_, queue.getQueueList()));
- BasicDBObject docQuery = new BasicDBObject(DbManager.or_, Arrays.asList(docQuery1, docQuery2));
- DBCursor dbc = DbManager.getDocument().getMetadata().find(docQuery).limit(query.score.numAnalyze);
- ScoringUtils scoreStats = new ScoringUtils();
- List<BasicDBObject> docs = null;
- StatisticsPojo stats = new StatisticsPojo();
- stats.setSavedScores(query.output.docs.skip, dbc.count());
- try {
- boolean lockAcquired = true;
- try {
- lockAcquired = this.acquireConcurrentAccessLock();
-
- } catch (InterruptedException e) {
- //(that's fine just carry on)
- lockAcquired = false;
- }
- if (!lockAcquired) {
- rp.setResponse(new ResponseObject("Query", false, "Query engine busy, please try again later."));
- return rp;
- }
- scoreStats.setAliasLookupTable(_aliasLookup);
- docs = scoreStats.calcTFIDFAndFilter(DbManager.getDocument().getMetadata(),
- dbc, query.score, query.output, stats, false,
- query.output.docs.skip, query.output.docs.numReturn,
- communityIdStrs,
- null, null,
- null,
- null,
- null, null,
- null, null);
- }
- finally {
- scoreStats.clearAsMuchMemoryAsPossible();
- this.releaseConcurrentAccessLock();
- }
- rp = new ResponsePojo();
- rp.setResponse(new ResponseObject("Query", true, "Saved Query: " + share.getTitle()));
- rp.setStats(stats);
- if ((null != docs) && (docs.size() > 0)) {
- rp.setData(docs, (BasePojoApiMap<BasicDBObject>)null);
- }
- else { // (ensure there's always an empty list)
- docs = new ArrayList<BasicDBObject>(0);
- rp.setData(docs, (BasePojoApiMap<BasicDBObject>)null);
- }
- }//end if user or saved query queue
- }
- return rp;
-
- }//TESTED
-
- // Stored queries (ie just the query JSON)
-
- static AdvancedQueryPojo getStoredQueryArtefact(String shareIdStr, AdvancedQueryPojo query, String userIdStr) {
-
- ResponsePojo rp2 = new ShareHandler().getShare(userIdStr, shareIdStr, true);
- if ((null != rp2.getData() || !rp2.getResponse().isSuccess())) {
- SharePojo share = (SharePojo) rp2.getData();
- if (null != share) {
- if (share.getType().equalsIgnoreCase("dataset")) {
- query.input = new com.google.gson.Gson().fromJson(share.getShare(), AdvancedQueryPojo.QueryInputPojo.class);
- }
- else if (share.getType().equalsIgnoreCase("query")) {
- query = new com.google.gson.Gson().fromJson(share.getShare(), AdvancedQueryPojo.class);
- }
- else { // Unrecognized share
- throw new RuntimeException("Unexpected share type: " + share.getType());
- }
- }
- else {
- throw new RuntimeException("Invalid return from share: " + rp2.getData().toString());
- }
- }
- else {
- throw new RuntimeException(rp2.getResponse().getMessage());
- }
- return query;
- }
-
- ////////////////////////////////////////////////////////////////////////
- // 1.1] Source management utility
-
- BoolFilterBuilder parseSourceManagement(AdvancedQueryPojo.QueryInputPojo input) {
-
- BoolFilterBuilder sourceFilter = null;
-
- if ((null != input.tags) || (null != input.typeAndTags)
- || (null != input.sources))
- {
- sourceFilter = FilterBuilders.boolFilter();
- }//TESTED
-
- if (null != input.tags) {
- sourceFilter = sourceFilter.should(FilterBuilders.termsFilter(DocumentPojo.tags_, input.tags.toArray()));
- }//TESTED '{ "input": { "tags": [ "healthcare", "cyber" ] } }'
-
- if (null != input.typeAndTags) {
- BoolFilterBuilder typeAndTagFilter = FilterBuilders.boolFilter();
- for (AdvancedQueryPojo.QueryInputPojo.TypeAndTagTermPojo tt: input.typeAndTags) {
- if (null != tt.tags) {
- typeAndTagFilter = typeAndTagFilter.should(
- FilterBuilders.boolFilter().must(FilterBuilders.termFilter(DocumentPojo.mediaType_, tt.type)).
- must(FilterBuilders.termsFilter(DocumentPojo.tags_, tt.tags.toArray())));
- }
- else {
- typeAndTagFilter = typeAndTagFilter.should(FilterBuilders.termFilter(DocumentPojo.mediaType_, tt.type));
- }
- }
- sourceFilter = sourceFilter.should(typeAndTagFilter);
- }//TESTED '{ "input": { "typeAndTags": [ { "type": "Social" }, { "type": "Video", "tags": [ "education", "MIT" ] } ] } }'
-
- if (null != input.sources) {
- if ((null == input.srcInclude) || input.srcInclude) {
- sourceFilter = sourceFilter.should(FilterBuilders.termsFilter(DocumentPojo.sourceKey_, input.sources.toArray()));
- }
- else {
- sourceFilter = sourceFilter.mustNot(FilterBuilders.termsFilter(DocumentPojo.sourceKey_, input.sources.toArray()));
- }
- }//TESTED '{ "input": { "srcInclude": false, "sources": [ "http.twitter.com.statuses.public_timeline.atom", "http.gdata.youtube.com.feeds.base.users.mit.uploads.alt=rss.v=2.orderby=published.client=ytapi-youtube-profile" ] } }'
- //(also "srcInclude" not set - checked got the complement of the result)
-
- return sourceFilter;
- }
- ////////////////////////////////////////////////////////////////////////
-
- // 1.X2] Output filter parsing
- private BoolFilterBuilder addNegativeSelectorToFilter(EntityFeaturePojo docDiscardAlias, BoolFilterBuilder outputFilter, int recursionLevel) {
- if ((null != docDiscardAlias.getAlias()) && !docDiscardAlias.getAlias().isEmpty()) {
- if (null == outputFilter) {
- outputFilter = FilterBuilders.boolFilter();
- }
- outputFilter = outputFilter.mustNot(FilterBuilders.nestedFilter(DocumentPojo.entities_,
- FilterBuilders.termsFilter(EntityPojo.docQuery_index_, docDiscardAlias.getAlias().toArray())));
-
- if (recursionLevel <= 1) { // (only go two deep for now)
- for (String aliasIndex: docDiscardAlias.getAlias()) {
- EntityFeaturePojo docDiscardSubAlias = _aliasLookup.getAliases(aliasIndex);
- if (null != docDiscardSubAlias) {
- outputFilter = addNegativeSelectorToFilter(docDiscardSubAlias, outputFilter, 1 + recursionLevel);
- }
- }//TESTED
- }
-
- }//TESTED (by hand)
- if (null != docDiscardAlias.getSemanticLinks()) { // (recall: we've abused this field for text queries)
- for (String textQuery: docDiscardAlias.getSemanticLinks()) {
- //(probably not a very efficient construct, but nothing about this is efficient, just functional, so we'll leave it for now)
- outputFilter = outputFilter.mustNot(FilterBuilders.queryFilter(
- CrossVersionQueryBuilders.matchPhraseQuery(DocumentPojo.fullText_, textQuery)));
- outputFilter = outputFilter.mustNot(FilterBuilders.queryFilter(
- CrossVersionQueryBuilders.matchPhraseQuery("_all", textQuery)));
- }
- }//TESTED (by hand)
- return outputFilter;
- }//TESTED
-
- BoolFilterBuilder parseOutputFiltering(String[] entityTypeFilterStrings, String[] assocVerbFilterStrings)
- {
- BoolFilterBuilder outputFilter = null;
-
- // First off: document discard aliases:
-
- if (null != _aliasLookup) { // Check out the document discard table...
- EntityFeaturePojo docDiscardAlias = _aliasLookup.getAliases("DOCUMENT_DISCARD");
- if (null != docDiscardAlias) {
- outputFilter = addNegativeSelectorToFilter(docDiscardAlias, outputFilter, 0);
- }
- }//TESTED (by hand, nothing repeatable)
-
- // Other simple filter types:
-
- if (null != entityTypeFilterStrings) {
- if ('-' != entityTypeFilterStrings[0].charAt(0)) { // (negative entity type, don't add to filter)
- outputFilter = FilterBuilders.boolFilter();
-
- outputFilter.must(FilterBuilders.nestedFilter(DocumentPojo.entities_,
- FilterBuilders.termsFilter(EntityPojo.docQuery_type_, entityTypeFilterStrings)));
- }
- }
- if (null != assocVerbFilterStrings) {
- if ('-' != assocVerbFilterStrings[0].charAt(0)) { // (negative verb, don't add to filter)
- if (null == outputFilter) {
- outputFilter = FilterBuilders.boolFilter();
- }
- BoolFilterBuilder verbFilter = FilterBuilders.boolFilter();
- StringBuffer sb = new StringBuffer();
- for (String assocVerb: assocVerbFilterStrings) {
- sb.setLength(0);
- sb.append('"').append(assocVerb).append('"');
- verbFilter.should(FilterBuilders.nestedFilter(DocumentPojo.associations_,
- QueryBuilders.queryString(sb.toString()).field(AssociationPojo.docQuery_verb_category_)));
- //(closest to exact that we can manage, obv verb_cat should actually be not_analyzed)
- }
- outputFilter.must(verbFilter);
- }
- }
- return outputFilter;
- }//TESTED
-
- ////////////////////////////////////////////////////////////////////////
- // 1.2] Query term parsing
- // (Not needed any more, but kept here for illustrative purposes)
- //private static Pattern _luceneExactPattern = Pattern.compile("([\"+~*?:|&(){}\\[\\]\\^\\!\\-\\\\ ])");
- private BaseQueryBuilder parseQueryTerm(AdvancedQueryPojo.QueryTermPojo qt, StringBuffer sQueryTerm) {
-
- BaseQueryBuilder term = null;
- BoolQueryBuilder boolTerm = null;
-
- sQueryTerm.setLength(0);
- sQueryTerm.append('(');
- // 1.1] Free text (Lucene)
-
- boolean nonEmpty = false;
-
- if (null != qt.ftext) { // NOTE term building code below depends on this being 1st clause
- nonEmpty = true;
- if (qt.ftext.startsWith("$cache")) { // currently not supported
- throw new RuntimeException("Don't currently support nested cached queries - coming soon.");
- }
-
- sQueryTerm.append('(');
- if (null != qt.metadataField) {
- sQueryTerm.append(qt.metadataField).append(':');
- }
- sQueryTerm.append(qt.ftext);
- sQueryTerm.append(')');
- if (null != qt.metadataField) { // Metadata only
- term = QueryBuilders.queryString(qt.ftext).field(qt.metadataField);
- }
- else {
- term = QueryBuilders.queryString(qt.ftext).field("_all").field(DocumentPojo.fullText_);
- }
- if (null == _scoringParams.adjustAggregateSig) { // auto-decide .. if ftext is set and is non-trivial
- if (qt.ftext.contains(" ")) {
- _scoringParams.adjustAggregateSig = true;
- }
- }
- }//TESTED (logic0)
-
- // 1.2] Exact text
-
- if (null != qt.etext) { // NOTE term building code below depends on this being 2nd clause
- nonEmpty = true;
- BaseQueryBuilder termQ = null;
- if (sQueryTerm.length() > 1) {
- sQueryTerm.append(" AND ");
- }
- if (qt.etext.equals("*")) { // Special case
- termQ = QueryBuilders.matchAllQuery();
- }
- else { // Normal query
- if (null != qt.metadataField) { // Metadata only
- termQ = CrossVersionQueryBuilders.matchPhraseQuery(qt.metadataField, qt.etext);
- }
- else { // Normal query
- termQ = QueryBuilders.boolQuery().
- should(CrossVersionQueryBuilders.matchPhraseQuery("_all", qt.etext)).
- should(CrossVersionQueryBuilders.matchPhraseQuery(DocumentPojo.fullText_, qt.etext));
- }
- }
- sQueryTerm.append('(');
- if (null != qt.metadataField) {
- sQueryTerm.append(qt.metadataField).append(':');
- }
- sQueryTerm.append('"');
- sQueryTerm.append(qt.etext);
- sQueryTerm.append("\")");
- if (null == term) {
- term = termQ;
- }
- else {
- term = (boolTerm = QueryBuilders.boolQuery().must(term).must(termQ));
- }
- }//TESTED (logic1)
-
- // Here's where it starts getting interesting:
-
- // 1.3] Entity
-
- if ((null != qt.entity) || (null != qt.entityValue) || ((null == qt.assoc) && (null != qt.sentiment))) { // (if no association specified then sentiment applies to entities)
- nonEmpty = true;
- if (sQueryTerm.length() > 1) {
- sQueryTerm.append(" AND ");
- }
- sQueryTerm.append('(');
-
- BaseQueryBuilder termQ = QueryBuilders.nestedQuery(DocumentPojo.entities_, this.parseEntityTerm(qt, sQueryTerm, EntityPojo.docQuery_index_)).scoreMode("max").boost((float)1.0);
-
- if (null == term) {
- term = termQ;
- }
- else if (null == boolTerm) {
- term = (boolTerm = QueryBuilders.boolQuery().must(term).must(termQ));
- }
- else {
- term = (boolTerm = boolTerm.must(termQ));
- }
- sQueryTerm.append(')');
-
- }//TESTED: logic2* TOTEST: alias expansion code (logic3)
-
- // 1.4] Dates
-
- if (null != qt.time) {
- nonEmpty = true;
- if (sQueryTerm.length() > 1) {
- sQueryTerm.append(" AND ");
- }
- sQueryTerm.append('(');
-
- BaseQueryBuilder termQ = this.parseDateTerm(qt.time, sQueryTerm);
-
- if (null == term) {
- term = termQ;
- }
- else if (null == boolTerm) {
- term = (boolTerm = QueryBuilders.boolQuery().must(term).must(termQ));
- }
- else {
- term = (boolTerm = boolTerm.must(termQ));
- }
- sQueryTerm.append(')');
-
- }//TESTED (logic5-10)
- // 1.5] Geo
-
- if (null != qt.geo)
- {
- nonEmpty = true;
- if (sQueryTerm.length() > 1)
- {
- sQueryTerm.append(" AND ");
- }
- sQueryTerm.append('(');
-
- BaseQueryBuilder termQ = this.parseGeoTerm(qt.geo, sQueryTerm, GeoParseField.ALL);
- if (null != termQ)
- {
- if (null == term)
- {
- term = termQ;
- }
- else if (null == boolTerm)
- {
- term = (boolTerm = QueryBuilders.boolQuery().must(term).must(termQ));
- }
- else
- {
- term = (boolTerm = boolTerm.must(termQ));
- }
- }
-
- sQueryTerm.append(')');
- } // (end geo)
-
- if (null == qt.assoc) qt.assoc = qt.event;
- //(continue to support the old "event" name for another release)
- if (null != qt.assoc) {
- nonEmpty = true;
- if (sQueryTerm.length() > 1) {
- sQueryTerm.append(" AND ");
- }
- sQueryTerm.append('(');
-
- BaseQueryBuilder termQ = QueryBuilders.nestedQuery(DocumentPojo.associations_, this.parseAssociationTerm(qt.assoc, qt.sentiment, sQueryTerm));
- if (null != termQ) {
- if (null == term) {
- term = termQ;
- }
- else if (null == boolTerm) {
- term = (boolTerm = QueryBuilders.boolQuery().must(term).must(termQ));
- }
- else {
- term = (boolTerm = boolTerm.must(termQ));
- }
- }
-
- sQueryTerm.append(')');
- } // (end event)
- if (!nonEmpty) {
- throw new RuntimeException("One+ of your query terms is empty!");
- }//TOTEST
-
- sQueryTerm.append(')');
- return term;
-
- }//TESTED (logic*) TOTEST event logic
- //TESTED: each of the above cases with the following GUI commands:
- // infiniteService.send('{"raw": { "match_all": {} } }');
- // infiniteService.send('{ "input": { "tags": [ "healthcare", "cyber" ] } }');
- // infiniteService.send('{ "input": { "typeAndTags": [ { "type": "Social" }, { "type": "Video", "tags": [ "education", "MIT" ] } ] } }');
- // infiniteService.send('{ "input": { "typeAndTags": [ { "type": "Social" }, { "type": "Video", "tags": [ "education…
Large files files are truncated, but you can click here to view the full file