PageRenderTime 7247ms CodeModel.GetById 25ms RepoModel.GetById 4ms app.codeStats 0ms

/corelib-search/src/main/java/eu/europeana/corelib/search/impl/SearchServiceImpl.java

https://gitlab.com/Aaeinstein54/corelib
Java | 1063 lines | 857 code | 130 blank | 76 comment | 200 complexity | eca6ebb8fa68f76d1a405363be51a471 MD5 | raw file
  1. /*
  2. * Copyright 2007-2012 The Europeana Foundation
  3. *
  4. * Licenced under the EUPL, Version 1.1 (the "Licence") and subsequent versions as approved
  5. * by the European Commission;
  6. * You may not use this work except in compliance with the Licence.
  7. *
  8. * You may obtain a copy of the Licence at:
  9. * http://joinup.ec.europa.eu/software/page/eupl
  10. *
  11. * Unless required by applicable law or agreed to in writing, software distributed under
  12. * the Licence is distributed on an "AS IS" basis, without warranties or conditions of
  13. * any kind, either express or implied.
  14. * See the Licence for the specific language governing permissions and limitations under
  15. * the Licence.
  16. */
  17. package eu.europeana.corelib.search.impl;
  18. import com.google.common.base.Charsets;
  19. import com.google.common.hash.HashCode;
  20. import com.google.common.hash.HashFunction;
  21. import com.google.common.hash.Hashing;
  22. import com.google.gson.Gson;
  23. import com.google.gson.reflect.TypeToken;
  24. import com.mongodb.BasicDBObject;
  25. import com.mongodb.DB;
  26. import com.mongodb.DBCollection;
  27. import com.mongodb.DBCursor;
  28. import eu.europeana.corelib.definitions.edm.beans.BriefBean;
  29. import eu.europeana.corelib.definitions.edm.beans.FullBean;
  30. import eu.europeana.corelib.definitions.edm.beans.IdBean;
  31. import eu.europeana.corelib.definitions.edm.entity.Aggregation;
  32. import eu.europeana.corelib.definitions.edm.entity.Proxy;
  33. import eu.europeana.corelib.definitions.edm.entity.WebResource;
  34. import eu.europeana.corelib.definitions.exception.ProblemType;
  35. import eu.europeana.corelib.definitions.solr.model.Query;
  36. import eu.europeana.corelib.definitions.solr.model.Term;
  37. import eu.europeana.corelib.edm.exceptions.MongoDBException;
  38. import eu.europeana.corelib.edm.exceptions.SolrTypeException;
  39. import eu.europeana.corelib.edm.model.metainfo.WebResourceMetaInfoImpl;
  40. import eu.europeana.corelib.mongo.server.EdmMongoServer;
  41. import eu.europeana.corelib.neo4j.entity.CustomNode;
  42. import eu.europeana.corelib.neo4j.entity.Neo4jBean;
  43. import eu.europeana.corelib.neo4j.entity.Neo4jStructBean;
  44. import eu.europeana.corelib.neo4j.entity.Node2Neo4jBeanConverter;
  45. import eu.europeana.corelib.neo4j.server.Neo4jServer;
  46. import eu.europeana.corelib.search.SearchService;
  47. import eu.europeana.corelib.search.model.ResultSet;
  48. import eu.europeana.corelib.search.query.MoreLikeThis;
  49. import eu.europeana.corelib.search.utils.SearchUtils;
  50. import eu.europeana.corelib.solr.bean.impl.*;
  51. import eu.europeana.corelib.solr.entity.WebResourceImpl;
  52. import eu.europeana.corelib.tools.lookuptable.EuropeanaId;
  53. import eu.europeana.corelib.tools.lookuptable.EuropeanaIdMongoServer;
  54. import eu.europeana.corelib.utils.EuropeanaUriUtils;
  55. import org.apache.commons.lang.StringUtils;
  56. import org.apache.http.HttpException;
  57. import org.apache.http.HttpRequest;
  58. import org.apache.http.HttpRequestInterceptor;
  59. import org.apache.http.auth.UsernamePasswordCredentials;
  60. import org.apache.http.impl.auth.BasicScheme;
  61. import org.apache.http.impl.client.AbstractHttpClient;
  62. import org.apache.http.protocol.HttpContext;
  63. import org.apache.log4j.Logger;
  64. import org.apache.solr.client.solrj.SolrQuery;
  65. import org.apache.solr.client.solrj.SolrQuery.ORDER;
  66. import org.apache.solr.client.solrj.SolrServer;
  67. import org.apache.solr.client.solrj.SolrServerException;
  68. import org.apache.solr.client.solrj.impl.HttpSolrServer;
  69. import org.apache.solr.client.solrj.request.LukeRequest;
  70. import org.apache.solr.client.solrj.response.FacetField;
  71. import org.apache.solr.client.solrj.response.FacetField.Count;
  72. import org.apache.solr.client.solrj.response.QueryResponse;
  73. import org.apache.solr.client.solrj.response.SpellCheckResponse;
  74. import org.apache.solr.client.solrj.response.SpellCheckResponse.Collation;
  75. import org.apache.solr.client.solrj.response.SpellCheckResponse.Correction;
  76. import org.apache.solr.client.solrj.util.ClientUtils;
  77. import org.apache.solr.common.SolrDocument;
  78. import org.apache.solr.common.SolrException;
  79. import org.apache.solr.common.params.CursorMarkParams;
  80. import org.apache.solr.common.params.ModifiableSolrParams;
  81. import org.apache.solr.common.util.NamedList;
  82. import org.neo4j.graphdb.Node;
  83. import org.springframework.beans.factory.annotation.Value;
  84. import javax.annotation.Resource;
  85. import java.io.IOException;
  86. import java.lang.reflect.Type;
  87. import java.text.MessageFormat;
  88. import java.util.*;
  89. /**
  90. * @author Yorgos.Mamakis@ kb.nl
  91. * @see eu.europeana.corelib.search.SearchService
  92. */
  93. public class SearchServiceImpl implements SearchService {
  94. /**
  95. * Default number of documents retrieved by MoreLikeThis
  96. */
  97. private static final int DEFAULT_MLT_COUNT = 10;
  98. private static final String UNION_FACETS_FORMAT = "'{'!ex={0}'}'{0}";
  99. /**
  100. * Number of milliseconds before the query is aborted by SOLR
  101. */
  102. private static final int TIME_ALLOWED = 30000;
  103. /**
  104. * The list of possible field input for spelling suggestions
  105. */
  106. private static final List<String> SPELL_FIELDS = Arrays.asList("who",
  107. "what", "where", "when", "title");
  108. private final static String RESOLVE_PREFIX = "http://www.europeana.eu/resolve/record";
  109. private final static String PORTAL_PREFIX = "http://www.europeana.eu/portal/record";
  110. private static final HashFunction hf = Hashing.md5();
  111. protected static Logger log = Logger.getLogger(SearchServiceImpl.class);
  112. private static boolean STARTED = false;
  113. @Resource(name = "corelib_solr_mongoServer")
  114. protected EdmMongoServer mongoServer;
  115. @Resource(name = "corelib_solr_mongoServer_id")
  116. protected EuropeanaIdMongoServer idServer;
  117. @Resource(name = "corelib_solr_neo4jServer")
  118. protected Neo4jServer neo4jServer;
  119. // provided by setter
  120. private SolrServer solrServer;
  121. @Value("#{europeanaProperties['solr.facetLimit']}")
  122. private int facetLimit;
  123. @Value("#{europeanaProperties['solr.username']}")
  124. private String username;
  125. @Value("#{europeanaProperties['solr.password']}")
  126. private String password;
  127. @Value("#{europeanaProperties['solr.searchLimit']}")
  128. private int searchLimit;
  129. private String mltFields;
  130. @Resource(name = "corelib_solr_mongoServer_metainfo")
  131. protected EdmMongoServer metainfoMongoServer;
  132. @Override
  133. public FullBean findById(String collectionId, String recordId,
  134. boolean similarItems) throws MongoDBException {
  135. return findById(EuropeanaUriUtils.createEuropeanaId(collectionId, recordId),
  136. similarItems
  137. );
  138. }
  139. @SuppressWarnings("unchecked")
  140. private void injectWebMetaInfo(final FullBean fullBean) {
  141. if (fullBean == null) {
  142. // log.error("FullBean is null when injecting web meta info");
  143. return;
  144. }
  145. if (fullBean.getAggregations() == null || fullBean.getAggregations().isEmpty()) {
  146. // log.error("FullBean Aggregation is null or empty when trying to inject web meta info");
  147. return;
  148. }
  149. // Temp fix for missing web resources
  150. Aggregation aggregationFix = fullBean.getAggregations().get(0);
  151. if (aggregationFix.getEdmIsShownBy() != null) {
  152. String isShownBy = fullBean.getAggregations().get(0).getEdmIsShownBy();
  153. boolean containsWr = false;
  154. if (aggregationFix.getWebResources() != null) {
  155. for (WebResource wr : aggregationFix.getWebResources()) {
  156. if (StringUtils.equals(isShownBy, wr.getAbout())) {
  157. containsWr = true;
  158. }
  159. }
  160. }
  161. if (!containsWr) {
  162. List<WebResource> wResources = (List<WebResource>) aggregationFix.getWebResources();
  163. if (wResources == null) {
  164. wResources = new ArrayList<>();
  165. }
  166. WebResourceImpl wr = new WebResourceImpl();
  167. wr.setAbout(isShownBy);
  168. wResources.add(wr);
  169. aggregationFix.setWebResources(wResources);
  170. }
  171. }
  172. if (aggregationFix.getEdmObject() != null) {
  173. String isShownBy = fullBean.getAggregations().get(0).getEdmObject();
  174. boolean containsWr = false;
  175. if (aggregationFix.getWebResources() != null) {
  176. for (WebResource wr : aggregationFix.getWebResources()) {
  177. if (StringUtils.equals(isShownBy, wr.getAbout())) {
  178. containsWr = true;
  179. }
  180. }
  181. }
  182. if (!containsWr) {
  183. List<WebResource> wResources = (List<WebResource>) aggregationFix.getWebResources();
  184. if (wResources == null) {
  185. wResources = new ArrayList<>();
  186. }
  187. WebResourceImpl wr = new WebResourceImpl();
  188. wr.setAbout(isShownBy);
  189. wResources.add(wr);
  190. aggregationFix.setWebResources(wResources);
  191. }
  192. }
  193. if (aggregationFix.getHasView() != null) {
  194. for (String hasView : aggregationFix.getHasView()) {
  195. boolean containsWr = false;
  196. if (aggregationFix.getWebResources() != null) {
  197. for (WebResource wr : aggregationFix.getWebResources()) {
  198. if (StringUtils.equals(hasView, wr.getAbout())) {
  199. containsWr = true;
  200. }
  201. }
  202. }
  203. if (!containsWr) {
  204. List<WebResource> wResources = (List<WebResource>) aggregationFix.getWebResources();
  205. if (wResources == null) {
  206. wResources = new ArrayList<>();
  207. }
  208. WebResourceImpl wr = new WebResourceImpl();
  209. wr.setAbout(hasView);
  210. wResources.add(wr);
  211. aggregationFix.setWebResources(wResources);
  212. }
  213. }
  214. }
  215. ((List<Aggregation>) fullBean.getAggregations()).set(0, aggregationFix);
  216. for (final WebResource webResource : fullBean.getEuropeanaAggregation().getWebResources()) {
  217. WebResourceMetaInfoImpl webMetaInfo = null;
  218. // Locate the technical meta data from the web resource about
  219. if (webResource.getAbout() != null) {
  220. final HashCode hashCodeAbout = hf.newHasher()
  221. .putString(webResource.getAbout(), Charsets.UTF_8)
  222. .putString("-", Charsets.UTF_8)
  223. .putString(fullBean.getAbout(), Charsets.UTF_8)
  224. .hash();
  225. final String webMetaInfoId = hashCodeAbout.toString();
  226. webMetaInfo = getMetaInfo(webMetaInfoId);
  227. }
  228. // Locate the technical meta data from the aggregation is shown by
  229. if (webMetaInfo == null && fullBean.getEuropeanaAggregation().getEdmIsShownBy() != null) {
  230. final HashCode hashCodeIsShownBy = hf.newHasher()
  231. .putString(fullBean.getEuropeanaAggregation().getEdmIsShownBy(), Charsets.UTF_8)
  232. .putString("-", Charsets.UTF_8)
  233. .putString(fullBean.getAbout(), Charsets.UTF_8)
  234. .hash();
  235. final String webMetaInfoId = hashCodeIsShownBy.toString();
  236. webMetaInfo = getMetaInfo(webMetaInfoId);
  237. }
  238. if (webMetaInfo != null) {
  239. ((WebResourceImpl) webResource).setWebResourceMetaInfo(webMetaInfo);
  240. }
  241. }
  242. // Step 2 : Fill in the aggregation
  243. for (final Aggregation aggregation : fullBean.getAggregations()) {
  244. final Set<String> urls = new HashSet<>();
  245. if (StringUtils.isNotEmpty(aggregation.getEdmIsShownBy())) {
  246. urls.add(aggregation.getEdmIsShownBy());
  247. }
  248. if (null != aggregation.getHasView()) {
  249. urls.addAll(Arrays.asList(aggregation.getHasView()));
  250. }
  251. for (final WebResource webResource : aggregation.getWebResources()) {
  252. if (!urls.contains(webResource.getAbout().trim())) {
  253. continue;
  254. }
  255. WebResourceMetaInfoImpl webMetaInfo = null;
  256. if (webResource.getAbout() != null) {
  257. final HashCode hashCodeAbout = hf.newHasher()
  258. .putString(webResource.getAbout(), Charsets.UTF_8)
  259. .putString("-", Charsets.UTF_8)
  260. .putString(fullBean.getAbout(), Charsets.UTF_8)
  261. .hash();
  262. // Locate the technical meta data from the web resource about
  263. final String webMetaInfoId = hashCodeAbout.toString();
  264. webMetaInfo = getMetaInfo(webMetaInfoId);
  265. }
  266. // Locate the technical meta data from the aggregation is shown
  267. // by
  268. if (webMetaInfo == null && aggregation.getEdmIsShownBy() != null) {
  269. final HashCode hashCodeIsShownBy = hf.newHasher()
  270. .putString(aggregation.getEdmIsShownBy(), Charsets.UTF_8)
  271. .putString("-", Charsets.UTF_8)
  272. .putString(aggregation.getAbout(), Charsets.UTF_8)
  273. .hash();
  274. final String webMetaInfoId = hashCodeIsShownBy.toString();
  275. webMetaInfo = getMetaInfo(webMetaInfoId);
  276. }
  277. if (webMetaInfo != null) {
  278. ((WebResourceImpl) webResource).setWebResourceMetaInfo(webMetaInfo);
  279. }
  280. }
  281. }
  282. }
  283. @Override
  284. public FullBean findById(String europeanaObjectId, boolean similarItems)
  285. throws MongoDBException {
  286. FullBean fullBean = mongoServer.getFullBean(europeanaObjectId);
  287. injectWebMetaInfo(fullBean);
  288. if (fullBean != null && isHierarchy(fullBean.getAbout())) {
  289. for (Proxy prx : fullBean.getProxies()) {
  290. prx.setDctermsHasPart(null);
  291. }
  292. }
  293. if (fullBean != null && similarItems) {
  294. try {
  295. fullBean.setSimilarItems(findMoreLikeThis(europeanaObjectId));
  296. } catch (SolrServerException e) {
  297. log.error("SolrServerException: " + e.getMessage());
  298. }
  299. }
  300. if (fullBean != null && (fullBean.getAggregations() != null && !fullBean.getAggregations().isEmpty())){
  301. ((FullBeanImpl) fullBean).setAsParent();
  302. for (Aggregation agg : fullBean.getAggregations()){
  303. if (agg.getWebResources() != null && !agg.getWebResources().isEmpty()){
  304. for (WebResourceImpl wRes : (List<WebResourceImpl>)agg.getWebResources()){
  305. wRes.initAttributionSnippet();
  306. }
  307. }
  308. }
  309. }
  310. return fullBean;
  311. }
  312. @Override
  313. public FullBean resolve(String collectionId, String recordId,
  314. boolean similarItems) throws SolrTypeException {
  315. return resolve(EuropeanaUriUtils.createResolveEuropeanaId(collectionId, recordId), similarItems);
  316. }
  317. @Override
  318. public FullBean resolve(String europeanaObjectId, boolean similarItems)
  319. throws SolrTypeException {
  320. FullBean fullBean = resolveInternal(europeanaObjectId);
  321. FullBean fullBeanNew = fullBean;
  322. if (fullBean != null) {
  323. while (fullBeanNew != null) {
  324. fullBeanNew = resolveInternal(fullBeanNew.getAbout()
  325. );
  326. if (fullBeanNew != null) {
  327. fullBean = fullBeanNew;
  328. }
  329. }
  330. }
  331. return fullBean;
  332. }
  333. private FullBean resolveInternal(String europeanaObjectId) throws SolrTypeException {
  334. if (!STARTED) {
  335. idServer.createDatastore();
  336. STARTED = true;
  337. }
  338. mongoServer.setEuropeanaIdMongoServer(idServer);
  339. FullBean fullBean = mongoServer.resolve(europeanaObjectId);
  340. injectWebMetaInfo(fullBean);
  341. if (fullBean != null) {
  342. try {
  343. fullBean.setSimilarItems(findMoreLikeThis(fullBean.getAbout()));
  344. } catch (SolrServerException e) {
  345. log.error("SolrServerException: " + e.getMessage());
  346. }
  347. }
  348. return fullBean;
  349. }
  350. @Override
  351. public String resolveId(String europeanaObjectId) {
  352. String lastId = resolveIdInternal(europeanaObjectId);
  353. String newId = lastId;
  354. if (lastId != null) {
  355. while (newId != null) {
  356. newId = resolveIdInternal(newId);
  357. if (newId != null) {
  358. lastId = newId;
  359. }
  360. }
  361. }
  362. return lastId;
  363. }
  364. @Override
  365. public String resolveId(String collectionId, String recordId) {
  366. return resolveId(EuropeanaUriUtils.createResolveEuropeanaId(
  367. collectionId, recordId));
  368. }
  369. private String resolveIdInternal(String europeanaObjectId) {
  370. if (!STARTED) {
  371. idServer.createDatastore();
  372. STARTED = true;
  373. }
  374. EuropeanaId newId = idServer
  375. .retrieveEuropeanaIdFromOld(europeanaObjectId);
  376. if (newId != null) {
  377. idServer.updateTime(newId.getNewId(), europeanaObjectId);
  378. return newId.getNewId();
  379. }
  380. newId = idServer.retrieveEuropeanaIdFromOld(RESOLVE_PREFIX
  381. + europeanaObjectId);
  382. if (newId != null) {
  383. idServer.updateTime(newId.getNewId(), RESOLVE_PREFIX
  384. + europeanaObjectId);
  385. return newId.getNewId();
  386. }
  387. newId = idServer.retrieveEuropeanaIdFromOld(PORTAL_PREFIX
  388. + europeanaObjectId);
  389. if (newId != null) {
  390. idServer.updateTime(newId.getNewId(), PORTAL_PREFIX
  391. + europeanaObjectId);
  392. return newId.getNewId();
  393. }
  394. return null;
  395. }
  396. @Override
  397. public List<BriefBean> findMoreLikeThis(String europeanaObjectId)
  398. throws SolrServerException {
  399. return findMoreLikeThis(europeanaObjectId, DEFAULT_MLT_COUNT);
  400. }
  401. @Override
  402. public List<BriefBean> findMoreLikeThis(String europeanaObjectId, int count)
  403. throws SolrServerException {
  404. String query = "europeana_id:\"" + europeanaObjectId + "\"";
  405. SolrQuery solrQuery = new SolrQuery().setQuery(query);
  406. // solrQuery.setQueryType(QueryType.ADVANCED.toString());
  407. solrQuery.set("mlt", true);
  408. if (mltFields == null) {
  409. List<String> fields = new ArrayList<>();
  410. for (MoreLikeThis mltField : MoreLikeThis.values()) {
  411. fields.add(mltField.toString());
  412. }
  413. mltFields = ClientUtils.escapeQueryChars(StringUtils.join(fields, ","));
  414. }
  415. solrQuery.set("mlt.fl", mltFields);
  416. solrQuery.set("mlt.mintf", 1);
  417. solrQuery.set("mlt.match.include", "false");
  418. solrQuery.set("mlt.count", count);
  419. solrQuery.set("rows", 1);
  420. solrQuery.setTimeAllowed(TIME_ALLOWED);
  421. if (log.isDebugEnabled()) {
  422. log.debug(solrQuery.toString());
  423. }
  424. QueryResponse response = solrServer.query(solrQuery);
  425. @SuppressWarnings("unchecked")
  426. NamedList<Object> moreLikeThisList = (NamedList<Object>) response
  427. .getResponse().get("moreLikeThis");
  428. List<BriefBean> beans = new ArrayList<>();
  429. if (moreLikeThisList.size() > 0) {
  430. @SuppressWarnings("unchecked")
  431. List<SolrDocument> docs = (List<SolrDocument>) moreLikeThisList
  432. .getVal(0);
  433. for (SolrDocument doc : docs) {
  434. beans.add(solrServer.getBinder().getBean(BriefBeanImpl.class,
  435. doc));
  436. }
  437. }
  438. return beans;
  439. }
  440. @SuppressWarnings("unchecked")
  441. @Override
  442. public <T extends IdBean> ResultSet<T> search(Class<T> beanInterface,
  443. Query query) throws SolrTypeException {
  444. if (query.getStart() != null && (query.getStart() + query.getPageSize() > searchLimit)) {
  445. throw new SolrTypeException(ProblemType.SEARCH_LIMIT_REACHED);
  446. }
  447. ResultSet<T> resultSet = new ResultSet<>();
  448. Class<? extends IdBeanImpl> beanClazz = SearchUtils
  449. .getImplementationClass(beanInterface);
  450. if (isValidBeanClass(beanClazz)) {
  451. String[] refinements = query.getRefinements(true);
  452. if (SearchUtils.checkTypeFacet(refinements)) {
  453. SolrQuery solrQuery = new SolrQuery().setQuery(query
  454. .getQuery(true));
  455. if (refinements != null) {
  456. solrQuery.addFilterQuery(refinements);
  457. }
  458. solrQuery.setRows(query.getPageSize());
  459. solrQuery.setStart(query.getStart());
  460. // In case of a paginated query or a numbered field query:
  461. // => SORT = [OPTIONAL_EXPLICIT_SORT asc|desc, ] EUR_ID desc
  462. // and in case of a numbered non-field query:
  463. // => SORT = SCORE desc, EUR_ID desc
  464. // Note: timeallowed and cursormark are not allowed together in a query
  465. if (query.getCurrentCursorMark() != null) {
  466. solrQuery.set(CursorMarkParams.CURSOR_MARK_PARAM, query.getCurrentCursorMark());
  467. } else {
  468. if (!isFieldQuery(solrQuery.getQuery())) {
  469. solrQuery.setSort("score", ORDER.desc);
  470. }
  471. solrQuery.setTimeAllowed(TIME_ALLOWED);
  472. }
  473. // will replace sort on score if available
  474. if (!StringUtils.isBlank(query.getSort())) {
  475. solrQuery.setSort(query.getSort(),
  476. (query.getSortOrder() == Query.ORDER_ASC ? ORDER.asc : ORDER.desc));
  477. }
  478. solrQuery.addSort("europeana_id", ORDER.desc);
  479. resultSet.setSortField(solrQuery.getSortField());
  480. // add extra parameters if any
  481. if (query.getParameters() != null) {
  482. Map<String, String> parameters = query.getParameters();
  483. for (String key : parameters.keySet()) {
  484. solrQuery.setParam(key, parameters.get(key));
  485. }
  486. }
  487. // facets are optional
  488. if (query.isAllowFacets()) {
  489. solrQuery.setFacet(true);
  490. List<String> filteredFacets = query.getFilteredFacets();
  491. boolean hasFacetRefinements = (filteredFacets != null && filteredFacets
  492. .size() > 0);
  493. for (String facetToAdd : query.getFacets()) {
  494. if (query.isProduceFacetUnion()) {
  495. if (hasFacetRefinements
  496. && filteredFacets.contains(facetToAdd)) {
  497. facetToAdd = MessageFormat.format(
  498. UNION_FACETS_FORMAT, facetToAdd);
  499. }
  500. }
  501. solrQuery.addFacetField(facetToAdd);
  502. }
  503. solrQuery.setFacetLimit(facetLimit);
  504. }
  505. // spellcheck is optional
  506. if (query.isAllowSpellcheck()) {
  507. if (solrQuery.getStart() == null || solrQuery.getStart() <= 1) {
  508. solrQuery.setParam("spellcheck", "on");
  509. solrQuery.setParam("spellcheck.collate", "true");
  510. solrQuery.setParam("spellcheck.extendedResults", "true");
  511. solrQuery.setParam("spellcheck.onlyMorePopular", "true");
  512. solrQuery.setParam("spellcheck.q", query.getQuery());
  513. }
  514. }
  515. if (query.getFacetQueries() != null) {
  516. for (String facetQuery : query.getFacetQueries()) {
  517. solrQuery.addFacetQuery(facetQuery);
  518. }
  519. }
  520. try {
  521. if (log.isDebugEnabled()) {
  522. log.debug("Solr query is: " + solrQuery);
  523. }
  524. query.setExecutedQuery(solrQuery.toString());
  525. QueryResponse queryResponse = solrServer.query(solrQuery);
  526. resultSet.setResults((List<T>) queryResponse.getBeans(beanClazz));
  527. resultSet.setFacetFields(queryResponse.getFacetFields());
  528. resultSet.setResultSize(queryResponse.getResults().getNumFound());
  529. resultSet.setSearchTime(queryResponse.getElapsedTime());
  530. resultSet.setSpellcheck(queryResponse.getSpellCheckResponse());
  531. resultSet.setCurrentCursorMark(query.getCurrentCursorMark());
  532. resultSet.setNextCursorMark(queryResponse.getNextCursorMark());
  533. if (queryResponse.getFacetQuery() != null) {
  534. resultSet.setQueryFacets(queryResponse.getFacetQuery());
  535. }
  536. } catch (SolrServerException e) {
  537. log.error("SolrServerException: " + e.getMessage()
  538. + " The query was: " + solrQuery);
  539. throw new SolrTypeException(e, ProblemType.MALFORMED_QUERY);
  540. } catch (SolrException e) {
  541. log.error("SolrException: " + e.getMessage()
  542. + " The query was: " + solrQuery);
  543. throw new SolrTypeException(e, ProblemType.MALFORMED_QUERY);
  544. }
  545. } else {
  546. throw new SolrTypeException(ProblemType.INVALIDARGUMENTS);
  547. }
  548. } else {
  549. ProblemType type = ProblemType.INVALIDCLASS;
  550. type.appendMessage("Bean class: " + beanClazz);
  551. throw new SolrTypeException(type);
  552. }
  553. return resultSet;
  554. }
  555. private boolean isFieldQuery(String query) {
  556. //TODO fix
  557. String subquery = StringUtils.substringBefore(query, "filter_tags");
  558. String queryWithoutTags = StringUtils.substringBefore(subquery, "facet_tags");
  559. return !(StringUtils.contains(queryWithoutTags, "who:") || StringUtils.contains(queryWithoutTags, "what:")
  560. || StringUtils.contains(queryWithoutTags, "where:") || StringUtils.contains(queryWithoutTags, "when:")
  561. || StringUtils.contains(queryWithoutTags, "title:")) && StringUtils.contains(queryWithoutTags, ":") && !(StringUtils.contains(queryWithoutTags.trim(), " ") && StringUtils.contains(queryWithoutTags.trim(), "\""));
  562. }
  563. /**
  564. * Flag whether the bean class is one of the allowable ones.
  565. */
  566. private boolean isValidBeanClass(Class<? extends IdBeanImpl> beanClazz) {
  567. return beanClazz == BriefBeanImpl.class
  568. || beanClazz == ApiBeanImpl.class
  569. || beanClazz == RichBeanImpl.class;
  570. }
  571. @Override
  572. public List<Term> suggestions(String query, int pageSize)
  573. throws SolrTypeException {
  574. return suggestions(query, pageSize, null);
  575. }
  576. @Override
  577. public List<Count> createCollections(String facetFieldName,
  578. String queryString, String... refinements) throws SolrTypeException {
  579. Query query = new Query(queryString).setParameter("rows", "0")
  580. .setParameter("facet", "true").setRefinements(refinements)
  581. .setParameter("facet.mincount", "1")
  582. .setParameter("facet.limit", "750").setAllowSpellcheck(false);
  583. query.setFacet(facetFieldName);
  584. final ResultSet<BriefBean> response = search(BriefBean.class, query);
  585. for (FacetField facetField : response.getFacetFields()) {
  586. if (facetField.getName().equalsIgnoreCase(facetFieldName)) {
  587. return facetField.getValues();
  588. }
  589. }
  590. return new ArrayList<>();
  591. }
  592. @Override
  593. public Map<String, Integer> seeAlso(List<String> queries) {
  594. return queryFacetSearch("*:*", null, queries);
  595. }
  596. @Override
  597. public Map<String, Integer> queryFacetSearch(String query, String[] qf,
  598. List<String> queries) {
  599. SolrQuery solrQuery = new SolrQuery();
  600. solrQuery.setQuery(query);
  601. if (qf != null) {
  602. solrQuery.addFilterQuery(qf);
  603. }
  604. solrQuery.setRows(0);
  605. solrQuery.setFacet(true);
  606. solrQuery.setTimeAllowed(TIME_ALLOWED);
  607. for (String queryFacet : queries) {
  608. solrQuery.addFacetQuery(queryFacet);
  609. }
  610. QueryResponse response;
  611. Map<String, Integer> queryFacets = null;
  612. try {
  613. if (log.isDebugEnabled()) {
  614. log.debug("Solr query is: " + solrQuery.toString());
  615. }
  616. response = solrServer.query(solrQuery);
  617. queryFacets = response.getFacetQuery();
  618. } catch (SolrServerException e) {
  619. log.error("SolrServerException: " + e.getMessage() + " for query "
  620. + solrQuery.toString());
  621. e.printStackTrace();
  622. } catch (Exception e) {
  623. log.error("Exception: " + e.getClass().getCanonicalName() + " "
  624. + e.getMessage() + " for query " + solrQuery.toString());
  625. e.printStackTrace();
  626. }
  627. return queryFacets;
  628. }
  629. @SuppressWarnings("unchecked")
  630. @Override
  631. public <T extends IdBean> ResultSet<T> sitemap(Class<T> beanInterface,
  632. Query query) throws SolrTypeException {
  633. ResultSet<T> resultSet = new ResultSet<>();
  634. Class<? extends IdBeanImpl> beanClazz = SearchUtils
  635. .getImplementationClass(beanInterface);
  636. String[] refinements = query.getRefinements(true);
  637. if (SearchUtils.checkTypeFacet(refinements)) {
  638. SolrQuery solrQuery = new SolrQuery().setQuery(query.getQuery());
  639. if (refinements != null) {
  640. solrQuery.addFilterQuery(refinements);
  641. }
  642. solrQuery.setFacet(false);
  643. solrQuery.setRows(query.getPageSize());
  644. solrQuery.setStart(query.getStart());
  645. solrQuery.setSortField("COMPLETENESS", ORDER.desc);
  646. solrQuery.setSortField("score", ORDER.desc);
  647. solrQuery.setTimeAllowed(TIME_ALLOWED);
  648. // add extra parameters if any
  649. if (query.getParameters() != null) {
  650. Map<String, String> parameters = query.getParameters();
  651. for (String key : parameters.keySet()) {
  652. solrQuery.setParam(key, parameters.get(key));
  653. }
  654. }
  655. try {
  656. if (log.isDebugEnabled()) {
  657. log.debug("Solr query is: " + solrQuery);
  658. }
  659. QueryResponse queryResponse = solrServer.query(solrQuery);
  660. resultSet.setResults((List<T>) queryResponse
  661. .getBeans(beanClazz));
  662. resultSet.setResultSize(queryResponse.getResults()
  663. .getNumFound());
  664. resultSet.setSearchTime(queryResponse.getElapsedTime());
  665. if (solrQuery.getBool("facet", false)) {
  666. resultSet.setFacetFields(queryResponse.getFacetFields());
  667. }
  668. } catch (SolrServerException e) {
  669. log.error("SolrServerException: " + e.getMessage());
  670. throw new SolrTypeException(e, ProblemType.MALFORMED_QUERY);
  671. } catch (SolrException e) {
  672. log.error("SolrException: " + e.getMessage());
  673. throw new SolrTypeException(e, ProblemType.MALFORMED_QUERY);
  674. }
  675. }
  676. return resultSet;
  677. }
  678. /**
  679. * Get Suggestions from Solr Suggester
  680. *
  681. * @param query The query term
  682. * @param field The field to query on
  683. * @param rHandler The ReqestHandler to use
  684. * @return A list of Terms for the specific term from the SolrSuggester
  685. */
  686. private List<Term> getSuggestions(String query, String field,
  687. String rHandler) {
  688. List<Term> results = new ArrayList<>();
  689. try {
  690. ModifiableSolrParams params = new ModifiableSolrParams();
  691. params.set("qt", "/" + rHandler);
  692. params.set("q", field + ":" + query);
  693. params.set("rows", 0);
  694. params.set("timeAllowed", TIME_ALLOWED);
  695. // get the query response
  696. QueryResponse queryResponse = solrServer.query(params);
  697. SpellCheckResponse spellcheckResponse = queryResponse
  698. .getSpellCheckResponse();
  699. // if the suggestions are not empty and there are collated results
  700. if (spellcheckResponse != null
  701. && !spellcheckResponse.getSuggestions().isEmpty()
  702. && spellcheckResponse.getCollatedResults() != null) {
  703. for (Collation collation : spellcheckResponse
  704. .getCollatedResults()) {
  705. StringBuilder termResult = new StringBuilder();
  706. for (Correction cor : collation
  707. .getMisspellingsAndCorrections()) {
  708. // pickup the corrections, remove duplicates
  709. String[] terms = cor.getCorrection().trim()
  710. .replaceAll(" ", " ").split(" ");
  711. for (String term : terms) {
  712. if (StringUtils.isBlank(term)) {
  713. continue;
  714. }
  715. // termResult.
  716. if (!StringUtils.contains(termResult.toString(),
  717. term)) {
  718. termResult.append(term).append(" ");
  719. }
  720. }
  721. }
  722. // return the term, the number of hits for each collation
  723. // and the field that it should be mapped to
  724. Term term = new Term(termResult.toString().trim(),
  725. collation.getNumberOfHits(),
  726. SuggestionTitle.getMappedTitle(field),
  727. SearchUtils.escapeFacet(field,
  728. termResult.toString()));
  729. results.add(term);
  730. }
  731. }
  732. } catch (SolrServerException e) {
  733. log.error("Exception :" + e.getMessage());
  734. }
  735. return results;
  736. }
  737. /**
  738. * Get the suggestions
  739. */
  740. @Override
  741. public List<Term> suggestions(String query, int pageSize, String field) {
  742. if (log.isDebugEnabled()) {
  743. log.debug(String.format("%s, %d, %s", query, pageSize, field));
  744. }
  745. List<Term> results = new ArrayList<>();
  746. long start = new Date().getTime();
  747. // if the fiels is null check on all fields else on the requested field
  748. if (StringUtils.isBlank(field) || !SPELL_FIELDS.contains(field)) {
  749. results.addAll(getSuggestions(query, "title", "suggestTitle"));
  750. results.addAll(getSuggestions(query, "who", "suggestWho"));
  751. results.addAll(getSuggestions(query, "what", "suggestWhat"));
  752. results.addAll(getSuggestions(query, "where", "suggestWhere"));
  753. results.addAll(getSuggestions(query, "when", "suggestWhen"));
  754. } else if (StringUtils.equals(field, SuggestionTitle.TITLE.title)) {
  755. results.addAll(getSuggestions(query, field, "suggestTitle"));
  756. } else if (StringUtils.equals(field, SuggestionTitle.PERSON.title)) {
  757. results.addAll(getSuggestions(query, field, "suggestWho"));
  758. } else if (StringUtils.equals(field, SuggestionTitle.SUBJECT.title)) {
  759. results.addAll(getSuggestions(query, field, "suggestWhat"));
  760. } else if (StringUtils.equals(field, SuggestionTitle.PLACE.title)) {
  761. results.addAll(getSuggestions(query, field, "suggestWhere"));
  762. } else if (StringUtils.equals(field, SuggestionTitle.DATE.title)) {
  763. results.addAll(getSuggestions(query, field, "suggestWhen"));
  764. }
  765. // Sort the results by number of hits
  766. Collections.sort(results);
  767. logTime("suggestions", (new Date().getTime() - start));
  768. if (log.isDebugEnabled()) {
  769. log.debug(String.format("Returned %d results in %d ms",
  770. results.size() > pageSize ? pageSize : results.size(),
  771. new Date().getTime() - start));
  772. }
  773. return results.size() > pageSize ? results.subList(0, pageSize)
  774. : results;
  775. }
  776. public void setSolrServer(SolrServer solrServer) {
  777. this.solrServer = setServer(solrServer);
  778. }
  779. private SolrServer setServer(SolrServer solrServer) {
  780. if (solrServer instanceof HttpSolrServer) {
  781. HttpSolrServer server = new HttpSolrServer(
  782. ((HttpSolrServer) solrServer).getBaseURL());
  783. AbstractHttpClient client = (AbstractHttpClient) server
  784. .getHttpClient();
  785. client.addRequestInterceptor(new PreEmptiveBasicAuthenticator(
  786. username, password));
  787. return server;
  788. } else {
  789. return solrServer;
  790. }
  791. }
  792. @Override
  793. public List<Neo4jBean> getChildren(String rdfAbout, int offset, int limit) {
  794. List<Neo4jBean> beans = new ArrayList<>();
  795. long startIndex = offset;
  796. List<CustomNode> children = neo4jServer.getChildren(rdfAbout, offset, limit);
  797. for (CustomNode child : children) {
  798. startIndex += 1L;
  799. beans.add(Node2Neo4jBeanConverter.toNeo4jBean(child, startIndex));
  800. }
  801. return beans;
  802. }
  803. @Override
  804. public boolean isHierarchy(String rdfAbout) {
  805. return neo4jServer.isHierarchy(rdfAbout);
  806. }
  807. @Override
  808. public List<Neo4jBean> getChildren(String rdfAbout, int offset) {
  809. return getChildren(rdfAbout, offset, 10);
  810. }
  811. @Override
  812. public List<Neo4jBean> getChildren(String rdfAbout) {
  813. return getChildren(rdfAbout, 0, 10);
  814. }
  815. private Node getNode(String rdfAbout) {
  816. return neo4jServer.getNode(rdfAbout);
  817. }
  818. @Override
  819. public Neo4jBean getHierarchicalBean(String rdfAbout) {
  820. Node node = getNode(rdfAbout);
  821. if (node != null) {
  822. return Node2Neo4jBeanConverter.toNeo4jBean(node, neo4jServer.getNodeIndex(node));
  823. }
  824. return null;
  825. }
  826. private enum SuggestionTitle {
  827. TITLE("title", "Title"), DATE("when", "Time/Period"), PLACE("where",
  828. "Place"), PERSON("who", "Creator"), SUBJECT("what", "Subject");
  829. String title;
  830. String mappedTitle;
  831. SuggestionTitle(String title, String mappedTitle) {
  832. this.title = title;
  833. this.mappedTitle = mappedTitle;
  834. }
  835. public static String getMappedTitle(String title) {
  836. for (SuggestionTitle st : SuggestionTitle.values()) {
  837. if (StringUtils.equals(title, st.title)) {
  838. return st.mappedTitle;
  839. }
  840. }
  841. return null;
  842. }
  843. }
  844. @Override
  845. @SuppressWarnings("unchecked")
  846. public Date getLastSolrUpdate() throws SolrServerException, IOException {
  847. long t0 = new Date().getTime();
  848. NamedList<Object> namedList = solrServer.request(new LukeRequest());
  849. NamedList<Object> index = (NamedList<Object>) namedList.get("index");
  850. if (log.isInfoEnabled()) {
  851. log.info("spent: " + (new Date().getTime() - t0));
  852. }
  853. return (Date) index.get("lastModified");
  854. }
  855. public void logTime(String type, long time) {
  856. if (log.isDebugEnabled()) {
  857. log.debug(String.format("elapsed time (%s): %d", type, time));
  858. }
  859. }
  860. @Override
  861. public List<Neo4jBean> getPrecedingSiblings(String rdfAbout, int limit) {
  862. List<Neo4jBean> beans = new ArrayList<>();
  863. List<CustomNode> precedingSiblings = neo4jServer.getPrecedingSiblings(rdfAbout, limit);
  864. long startIndex = neo4jServer.getNodeIndexByRdfAbout(rdfAbout);
  865. for (CustomNode precedingSibling : precedingSiblings) {
  866. startIndex -= 1L;
  867. beans.add(Node2Neo4jBeanConverter.toNeo4jBean(precedingSibling, startIndex));
  868. }
  869. return beans;
  870. }
  871. @Override
  872. public List<Neo4jBean> getPrecedingSiblings(String rdfAbout) {
  873. return getPrecedingSiblings(rdfAbout, 10);
  874. }
  875. @Override
  876. public List<Neo4jBean> getFollowingSiblings(String rdfAbout, int limit) {
  877. List<Neo4jBean> beans = new ArrayList<>();
  878. List<CustomNode> followingSiblings = neo4jServer.getFollowingSiblings(rdfAbout, limit);
  879. long startIndex = neo4jServer.getNodeIndexByRdfAbout(rdfAbout);
  880. for (CustomNode followingSibling : followingSiblings) {
  881. startIndex += 1L;
  882. beans.add(Node2Neo4jBeanConverter.toNeo4jBean(followingSibling, startIndex));
  883. }
  884. return beans;
  885. }
  886. @Override
  887. public List<Neo4jBean> getFollowingSiblings(String rdfAbout) {
  888. return getFollowingSiblings(rdfAbout, 10);
  889. }
  890. @Override
  891. public long getChildrenCount(String rdfAbout) {
  892. return neo4jServer.getChildrenCount(getNode(rdfAbout));
  893. }
  894. // note that parents don't have their node indexes set in getInitialStruct
  895. // because they have to be fetched separately; therefore this is done afterwards
  896. @Override
  897. public Neo4jStructBean getInitialStruct(String nodeId) {
  898. return addParentNodeIndex(Node2Neo4jBeanConverter.toNeo4jStruct(neo4jServer.getInitialStruct(nodeId), neo4jServer.getNodeIndex(getNode(nodeId))));
  899. }
  900. private Neo4jStructBean addParentNodeIndex(Neo4jStructBean struct) {
  901. if (!struct.getParents().isEmpty()) {
  902. for (Neo4jBean parent : struct.getParents()) {
  903. parent.setIndex(neo4jServer.getNodeIndex(getNode(parent.getId())));
  904. }
  905. }
  906. return struct;
  907. }
  908. private WebResourceMetaInfoImpl getMetaInfo(final String webResourceMetaInfoId) {
  909. final DB db = metainfoMongoServer.getDatastore().getDB();
  910. final DBCollection webResourceMetaInfoColl = db.getCollection("WebResourceMetaInfo");
  911. final BasicDBObject query = new BasicDBObject("_id", webResourceMetaInfoId);
  912. final DBCursor cursor = webResourceMetaInfoColl.find(query);
  913. final Type type = new TypeToken<WebResourceMetaInfoImpl>() {
  914. }.getType();
  915. if (cursor.hasNext()) {
  916. return new Gson().fromJson(cursor.next().toString(), type);
  917. }
  918. return null;
  919. }
  920. // Filter tag generation
  921. }
  922. class PreEmptiveBasicAuthenticator implements HttpRequestInterceptor {
  923. private final UsernamePasswordCredentials credentials;
  924. public PreEmptiveBasicAuthenticator(String user, String pass) {
  925. credentials = new UsernamePasswordCredentials(user, pass);
  926. }
  927. @Override
  928. public void process(HttpRequest request, HttpContext context)
  929. throws HttpException, IOException {
  930. request.addHeader(BasicScheme.authenticate(credentials, "US-ASCII",
  931. false));
  932. }
  933. }