PageRenderTime 57ms CodeModel.GetById 22ms RepoModel.GetById 0ms app.codeStats 0ms

/Macademia/src/java/org/macademia/MongoWrapper.java

http://macademia.googlecode.com/
Java | 1023 lines | 732 code | 97 blank | 194 comment | 160 complexity | ead38de64229ac6724a8fcc3c4ee4f3e MD5 | raw file
  1. package org.macademia;
  2. import com.mongodb.*;
  3. import org.apache.commons.logging.Log;
  4. import org.apache.commons.logging.LogFactory;
  5. import java.util.*;
  6. import java.util.logging.Logger;
  7. /**
  8. * Authors: Nathaniel Miller and Alex Schneeman
  9. */
  10. public class MongoWrapper {
  11. private final static Log LOG = LogFactory.getLog(MongoWrapper.class);
  12. Mongo mongo ;
  13. //name of the users collection
  14. public static final String USERS = "users";
  15. //name of the interests collection
  16. public static final String INTERESTS = "interests";
  17. //name of the collaboratorRequests collection
  18. public static final String COLLABORATOR_REQUESTS = "collaboratorRequests" ;
  19. //name of the articlesToIds collection
  20. public static final String ARTICLES_TO_IDS = "articlesToIds";
  21. //name of the articlesToInterests collection
  22. public static final String ARTICLES_TO_INTERESTS = "articlesToInterests";
  23. //name of the articleSimilarities collection
  24. public static final String ARTICLE_SIMILARITIES = "articleSimilarities";
  25. public static final String INSTITUTION_INTERESTS ="institutionInterests";
  26. private String dbName = null;
  27. private String wpDbName = "fromWikipedia";
  28. LRUCache<Long, Set<Long>> interestUserCache = new LRUCache<Long, Set<Long>>(2000);
  29. LRUCache<Long, Set<Long>> interestRequestCache = new LRUCache<Long, Set<Long>>(2000);
  30. LRUCache<Long, Set<Long>> userInstitutionCache = new LRUCache<Long, Set<Long>>(2000);
  31. public MongoWrapper(Mongo mongo, String dbName, String wpDbName){
  32. this.mongo=mongo;
  33. this.dbName = dbName;
  34. this.wpDbName = wpDbName;
  35. }
  36. public void changeDB(String dbName) {
  37. this.dbName = dbName;
  38. }
  39. public DB getDb(){
  40. return mongo.getDB(dbName);
  41. }
  42. public DB getDb(boolean wpDb) {
  43. if (wpDb) {
  44. return mongo.getDB(wpDbName);
  45. } else {
  46. return mongo.getDB(dbName);
  47. }
  48. }
  49. public void copyDB(String fromDB, String dbName) {
  50. DB db = mongo.getDB( "admin" );
  51. //System.out.println("copying from '" + fromDB + "' to '" + dbName + "'");
  52. db.command("use " + fromDB);
  53. BasicDBObjectBuilder b = BasicDBObjectBuilder.start();
  54. b.append("copydb", 1);
  55. b.append("fromhost", "localhost");
  56. b.append("fromdb", fromDB);
  57. b.append("todb", dbName);
  58. CommandResult cmd = db.command(b.get());
  59. //CommandResult cmd2 = db.command(((String)("use "+dbName)));
  60. }
  61. public void dropDB(String dbName) {
  62. mongo.dropDatabase(dbName);
  63. }
  64. public void dropCurrentDB(){
  65. mongo.dropDatabase(dbName);
  66. }
  67. public void switchToCopyDB(String toCopy) {
  68. Random rand = new Random();
  69. String dbName = "tmp" + rand.nextInt(10000);
  70. copyDB(toCopy, dbName);
  71. changeDB(dbName);
  72. }
  73. public DBObject findById(String collection, Object id, boolean articleDb) throws IllegalArgumentException{
  74. return findByField(collection, "_id", id, articleDb);
  75. }
  76. public DBObject findByField(String collection, String field, Object value, boolean articleDb) throws IllegalArgumentException{
  77. DBObject searchById= new BasicDBObject(field, value);
  78. DBCollection coll = getDb(articleDb).getCollection(collection);
  79. //System.out.println("DBCollection: " + coll.toString());
  80. //for (DBObject o : coll.find()) {
  81. //System.out.println("DBObject in collection: " + o.toString());
  82. //}
  83. DBObject res = coll.findOne(searchById);
  84. if(res==null){
  85. throw new IllegalArgumentException("No record found in "+collection+" with " + field + " "+value.toString());
  86. }
  87. return res;
  88. }
  89. public DBObject safeFindByField(String collection, String field, Object value, boolean articleDb){
  90. try{
  91. return findByField(collection, field, value, articleDb);
  92. } catch(IllegalArgumentException e){
  93. //System.out.println(e.getMessage());
  94. return null;
  95. }
  96. }
  97. public DBObject safeFindById(String collection, Object id, boolean articleDb){
  98. return safeFindByField(collection, "_id", id, articleDb);
  99. }
  100. public void addUser(Long userId, List<Long> userInterests, List<Long> institutionIds) throws RuntimeException {
  101. if(userId == null){
  102. throw new RuntimeException("User needs an ID");
  103. }
  104. if (institutionIds.size() < 1) {
  105. throw new RuntimeException("user needs at least one institutionId");
  106. } else {
  107. for (Long institutionId: institutionIds) {
  108. if(institutionId == null) {
  109. throw new RuntimeException("users institution has no ID");
  110. }
  111. }
  112. }
  113. for (long i : userInterests) {
  114. addInterestToInstitutions(i, institutionIds);
  115. }
  116. DBObject newUser = new BasicDBObject("_id", userId);
  117. //log.info(interestIds+"addUser")
  118. newUser.put("interests", userInterests);
  119. newUser.put("institutions", institutionIds);
  120. DBObject searchById = safeFindById(USERS, userId, false);
  121. DBCollection users = getDb().getCollection(USERS);
  122. if(searchById != null){
  123. users.update(searchById, newUser);
  124. }
  125. else{
  126. users.insert(newUser);
  127. }
  128. //log.info("User ID: "+id)
  129. //log.info("Institution ID: "+institutionId)
  130. for (long i : userInterests) {
  131. updateInterestUsage(i);
  132. interestUserCache.remove(i);
  133. }
  134. userInstitutionCache.remove(userId);
  135. }
  136. /**
  137. * Removes the user, keeping the appropriate institution's list
  138. * of interests up to date, and removing any interests that are
  139. * owned by no other people.
  140. * @param userId The Long id of the user to remove
  141. * @return A double List of longs. The first list is the list of
  142. * interests needing to be to removed from the database, the second
  143. * is the collaborator requests owned by the person, now in need
  144. * of deletion.
  145. */
  146. public List<List<Long>> removeUser(Long userId) {
  147. Set<Long> interests = getUserInterests(userId);
  148. List<Long> deletedInterests = new LinkedList<Long>();
  149. // Two lists. 1: list of interests to be deleted. 2: list of collaborator
  150. // requests owned by the person, also in need of deletion.
  151. List<List<Long>> delInterestCollaborator = new LinkedList<List<Long>>();
  152. // maps interest ids to the ids of all users with that interest
  153. HashMap<Long, Set<Long>> interestUsers = new HashMap<Long, Set<Long>>();
  154. // maps interest ids to the ids of all collaborator requests with that interest
  155. HashMap<Long, Set<Long>> interestRequests = new HashMap<Long, Set<Long>>();
  156. DBObject user = safeFindById(USERS, userId, false);
  157. if (user == null) {
  158. return delInterestCollaborator;
  159. }
  160. for (Long i : interests) {
  161. Set<Long> iUsers = getInterestUsers(i);
  162. iUsers.remove(userId);
  163. interestUsers.put(i, iUsers);
  164. interestRequests.put(i, getInterestRequests(i));
  165. setInterestUsage(i, getInterestUsage(i)-1);
  166. }
  167. for (long instId : getUserInstitutions(userId)) {
  168. deletedInterests.addAll(handleDisconnects(interests, interestUsers, interestRequests, instId));
  169. }
  170. // finally, remove the user
  171. DBCollection users = getDb().getCollection(USERS);
  172. users.remove(user);
  173. delInterestCollaborator.add(deletedInterests);
  174. delInterestCollaborator.add(getUserRequests(userId));
  175. for (Long i : interests) {
  176. interestUserCache.remove(i);
  177. }
  178. userInstitutionCache.remove(userId);
  179. return delInterestCollaborator;
  180. }
  181. /**
  182. * Returns a set of longs giving all of the institution
  183. * ids that the user is a part of.
  184. * @param userId The long id of the user whose institutions
  185. * are to be returned.
  186. * @return A Set<Long> of ids for the user's institutions.
  187. */
  188. public Set<Long> getUserInstitutions(long userId){
  189. Set<Long> institutionIds = userInstitutionCache.get(userId);
  190. if (institutionIds == null) {
  191. institutionIds = new HashSet<Long>();
  192. DBObject user = safeFindById(USERS, userId, false);
  193. if (user == null) {
  194. LOG.fatal("no user associated with id '" + userId + "'");
  195. }
  196. if (user.get("institutions") == null) {
  197. LOG.fatal("no institutions associated with user '" + userId + "'");
  198. }
  199. for (Long id: (List<Long>)user.get("institutions")) {
  200. institutionIds.add(id);
  201. }
  202. userInstitutionCache.put(userId, institutionIds);
  203. }
  204. return institutionIds;
  205. }
  206. public Set<Long> getUserInterests(long id){
  207. DBObject user = safeFindById(USERS, id, false);
  208. Set<Long> interests = new HashSet<Long>();
  209. BasicDBList userInterests=(BasicDBList) user.get(INTERESTS);
  210. //interests.addAll((ArrayList<Long>)(ArrayList<Object>)userInterests);
  211. //interests.addAll(Arrays.asList((Long[]) userInterests.toArray()));
  212. for (Object l : userInterests) {
  213. interests.add((Long)l);
  214. }
  215. return interests;
  216. }
  217. /**
  218. * Constructs and returns a list of the ids of all
  219. * collaborator requests owned by the person with
  220. * the parameter id.
  221. * @param userId The long id of the user whose collaborator
  222. * requests are desired.
  223. * @return List of long id numbers of all collaborator
  224. * requests owned by the specified user.
  225. */
  226. public List<Long> getUserRequests(long userId) {
  227. List<Long> userRequests = new ArrayList<Long>();
  228. DBObject query = new BasicDBObject("creator",userId);
  229. DBCollection coll = getDb().getCollection(COLLABORATOR_REQUESTS);
  230. DBCursor res =coll.find(query);
  231. for(DBObject rfc: res){
  232. userRequests.add((Long)rfc.get("_id"));
  233. }
  234. return userRequests;
  235. }
  236. public Set<Long> getInterestUsers(long id) {
  237. Set<Long> res = interestUserCache.get(id);
  238. if (res != null) {
  239. return res;
  240. }
  241. res = new HashSet<Long>();
  242. DBObject query = new BasicDBObject("interests", id);
  243. DBObject keys = new BasicDBObject("_id", 1);
  244. DBCollection users = getDb(false).getCollection(USERS);
  245. DBCursor cursor = users.find(query, keys);
  246. for (DBObject user : cursor) {
  247. res.add((Long)user.get("_id"));
  248. }
  249. interestUserCache.put(id, res);
  250. return res;
  251. }
  252. /**
  253. * Updates the parameter interests usage to be the number of Users
  254. * with the interest plus the number of Requests with the interest.
  255. * @param id The long id of the interest whose usage is being set.
  256. */
  257. public void updateInterestUsage(long id) {
  258. int usage = getInterestUsers(id).size() + getInterestRequests(id).size();
  259. setInterestUsage(id, usage);
  260. }
  261. /**
  262. * Sets the number of Users and Requests who possess the
  263. * interest specified by the parameter id.
  264. * @param id The long id of the interest whose usage is being set.
  265. * @param usage The number of Users and Requests who possess
  266. * the interest.
  267. */
  268. private void setInterestUsage(long id, int usage) {
  269. DBCollection interests = getDb().getCollection(INTERESTS);
  270. DBObject interest = safeFindById(INTERESTS, id, false);
  271. if (interest == null) { // FIXME: what should we do if interest is null? Create one? It's an error!
  272. LOG.error("setInterestUsage: no interest with id " + id);
  273. } else {
  274. interest.put("usage", usage);
  275. interests.update(safeFindById(INTERESTS, id, false), interest);
  276. }
  277. }
  278. /**
  279. * Returns the number of Users and Requests who possess the
  280. * interest specified by the parameter id.
  281. * @param id The long id of the interest whose usage is to be returned.
  282. * @return The int number of Users and Requests who possess the
  283. * interest.
  284. */
  285. public int getInterestUsage(long id) {
  286. DBObject interest = safeFindById(INTERESTS, id, false);
  287. if (interest != null && interest.containsField("usage")) {
  288. return (Integer)interest.get("usage");
  289. } else {
  290. return 0;
  291. }
  292. }
  293. public Set<Long> getInterestRequests(long id) {
  294. Set<Long> res = interestRequestCache.get(id);
  295. if (res != null) {
  296. return res;
  297. }
  298. res = new HashSet<Long>();
  299. DBObject query = new BasicDBObject("keywords", id);
  300. DBCollection requests = getDb(false).getCollection(COLLABORATOR_REQUESTS);
  301. DBCursor cursor = requests.find(query);
  302. for (DBObject request : cursor) {
  303. res.add((Long)request.get("_id"));
  304. }
  305. interestRequestCache.put(id, res);
  306. return res;
  307. }
  308. /**
  309. * Returns a set long ids corresponding to all institutions
  310. * who own the interest with parameter interestId
  311. * @param interestId The long id whose institutional presence
  312. * is desired.
  313. * @return A Set<Long> containing the ids of all institutions
  314. * that own the parameter interest.
  315. */
  316. public Set<Long> getInterestInstitutions(long interestId) {
  317. Set<Long> institutions = new HashSet<Long>();
  318. DBCollection allInstitutions = getDb().getCollection(INSTITUTION_INTERESTS);
  319. for (DBObject dbObject : allInstitutions.find()) {
  320. Set<Long> interests = interestStringToSet(dbObject.get("interests")+"");
  321. if (interests.contains(interestId)) {
  322. institutions.add((Long)dbObject.get("_id"));
  323. }
  324. }
  325. return institutions;
  326. }
  327. public void addCollaboratorRequest(long rfcId, List<Long> interests, long creatorId, List<Long> institutionIds) {
  328. DBObject newRFC = new BasicDBObject("_id", rfcId);
  329. //log.info(interestIds+"addCollaboratorRequest")
  330. for (long i : interests) {
  331. addInterestToInstitutions(i, institutionIds);
  332. interestRequestCache.remove(i);
  333. }
  334. newRFC.put("keywords", interests);
  335. newRFC.put("creator", creatorId);
  336. newRFC.put("institutions", institutionIds);
  337. DBCollection collaboratorRequests = getDb().getCollection(COLLABORATOR_REQUESTS);
  338. DBObject searchById = safeFindById(COLLABORATOR_REQUESTS, rfcId, false);
  339. if (searchById != null) {
  340. collaboratorRequests.update(searchById, newRFC);
  341. } else {
  342. collaboratorRequests.insert(newRFC);
  343. }
  344. for (long i : interests) {
  345. updateInterestUsage(i);
  346. }
  347. }
  348. /**
  349. * Returns a set of longs giving all of the institution
  350. * ids that the request is a part of.
  351. * @param id The long id of the request whose institutions
  352. * are to be returned.
  353. * @return A Set<Long> of ids for the request's institutions.
  354. */
  355. public Set<Long> getCollaboratorRequestInstitutions(long id){
  356. DBObject rfc = safeFindById(COLLABORATOR_REQUESTS, id, false);
  357. Set<Long> institutionIds = new HashSet<Long>();
  358. if (rfc != null) {
  359. for (Long institutionId: (List<Long>) rfc.get("institutions")) {
  360. institutionIds.add(institutionId);
  361. }
  362. }
  363. return institutionIds;
  364. }
  365. public Long getCollaboratorRequestCreator(long id){
  366. DBObject rfc = safeFindById(COLLABORATOR_REQUESTS, id, false);
  367. System.out.println("RFC: " + rfc.toString());
  368. return (Long) rfc.get("creator");
  369. }
  370. public Set<Long> getRequestKeywords(long id){
  371. DBObject rfc = safeFindById(COLLABORATOR_REQUESTS, id, false);
  372. System.out.println("RFC: " + rfc.toString());
  373. Set<Long> keywords = new HashSet<Long>();
  374. for(Object l :(BasicDBList)rfc.get("keywords")){
  375. keywords.add((Long) l);
  376. }
  377. return keywords;
  378. }
  379. /**
  380. * Removes the collaborator request with the parameter id
  381. * number from the database.
  382. * @param id The long id, of the collaborator request to
  383. * be removed
  384. * @return List<Long> containing the ids of interests that
  385. * have been removed from mongo as a result of removing
  386. * this collaborator request.
  387. */
  388. public List<Long> removeCollaboratorRequest(long id) {
  389. Set<Long> interests = getRequestKeywords(id);
  390. List<Long> deletedInterests = new LinkedList<Long>();
  391. // maps interest ids to the ids of all users with that interest
  392. HashMap<Long, Set<Long>> interestUsers = new HashMap<Long, Set<Long>>();
  393. // maps interest ids to the ids of all collaborator requests with that interest
  394. HashMap<Long, Set<Long>> interestRequests = new HashMap<Long, Set<Long>>();
  395. DBObject request = safeFindById(COLLABORATOR_REQUESTS, id, false);
  396. if (request == null) {
  397. return deletedInterests;
  398. }
  399. for (Long i : interests) {
  400. interestUsers.put(i, getInterestUsers(i));
  401. Set<Long> iRequests = getInterestRequests(i);
  402. iRequests.remove(id);
  403. interestRequests.put(i, iRequests);
  404. interestRequestCache.remove(i);
  405. }
  406. for (long instId : getCollaboratorRequestInstitutions(id)) {
  407. deletedInterests.addAll(handleDisconnects(interests, interestUsers, interestRequests, instId));
  408. }
  409. // finally, remove the collaborator request
  410. DBCollection collaboratorRequests = getDb().getCollection(COLLABORATOR_REQUESTS);
  411. collaboratorRequests.remove(request);
  412. return deletedInterests;
  413. }
  414. /**
  415. * Add the interest with id secondId to the interest with id firstId's
  416. * list of similar interests, giving the relation a score of similarity.
  417. * @param firstId the interest to be added to
  418. * @param secondId the similar interest to be added
  419. * @param similarity the similarity between the interests
  420. */
  421. public void addToInterests(long firstId, long secondId, double similarity){
  422. //log.info("Similar Interest Id before added to DB: "+secondId)
  423. DBCollection interests = getDb().getCollection(INTERESTS);
  424. DBObject i = safeFindById(INTERESTS, firstId, false);
  425. if(i == null){
  426. i=new BasicDBObject("_id", firstId);
  427. interests.insert(i);
  428. }
  429. //log.info("Similar Interest String added to DB: "+interest)
  430. //log.info(similar+interest+" addToInterests put" )
  431. SimilarInterestList sim = new SimilarInterestList((String)i.get("similar"));
  432. sim.add(new SimilarInterest(secondId, similarity));
  433. i.put("similar",sim.toString());
  434. interests.update(safeFindById(INTERESTS, firstId, false),i);
  435. }
  436. /**
  437. * Removes the interest with the parameter interestId from
  438. * the database. This method assumes that the interest is
  439. * owned by no user, collaborator request, or institution.
  440. * @param interestId The Long id of the interest to remove
  441. */
  442. private void removeInterest(Long interestId) {
  443. DBObject interest = safeFindById(INTERESTS, interestId, false);
  444. if (interest == null) {
  445. return;
  446. }
  447. SimilarInterestList similarInterests = getSimilarInterests(interestId);
  448. for (SimilarInterest sim : similarInterests.list) {
  449. removeSimilarInterest(sim.interestId, interestId);
  450. }
  451. DBCollection interests = getDb().getCollection(INTERESTS);
  452. interests.remove(interest);
  453. }
  454. /**
  455. * Removes the specified interest from the user's interests
  456. * @param interestId The Long id of the interest to remove
  457. * @param userId The Long id of the user to remove the interest from
  458. * @return true if the interest was completely removed removed from
  459. * the database, false otherwise
  460. */
  461. public boolean removeInterestFromUser(Long interestId, Long userId) {
  462. DBObject user = safeFindById(USERS, userId, false);
  463. if (user == null) {
  464. return false;
  465. }
  466. List<Long> interests = (List<Long>)user.get("interests");
  467. if (!interests.contains(interestId)) {
  468. return false;
  469. }
  470. Set<Long> institutions = getInterestInstitutions(interestId);
  471. Set<Long> interestUsers = getInterestUsers(interestId);
  472. Set<Long> interestRequests = getInterestRequests(interestId);
  473. interestUsers.remove(userId);
  474. for (Long u : interestUsers) {
  475. institutions.removeAll(getUserInstitutions(u));
  476. }
  477. for (Long request : interestRequests) {
  478. institutions.removeAll(getCollaboratorRequestInstitutions(request));
  479. }
  480. for (Long institution : institutions) {
  481. removeInterestFromInstitution(interestId, institution);
  482. }
  483. interests.remove(interestId);
  484. user.put("interests", interests);
  485. DBCollection users = getDb().getCollection(USERS);
  486. users.update(safeFindById(USERS, userId, false), user);
  487. setInterestUsage(interestId, getInterestUsage(interestId)-1);
  488. if (interestIsDisconnected(interestId)) {
  489. removeInterest(interestId);
  490. return true;
  491. }
  492. return false;
  493. }
  494. /**
  495. * Removes the specified keyword from the request keywords
  496. * @param keywordId The Long id of the keyword to remove
  497. * @param requestId The Long id of the request to remove the interest from
  498. * @return true if the keyword/interest was completely removed from the
  499. * database, false otherwise
  500. */
  501. public boolean removeKeywordFromRequest(Long keywordId, Long requestId) {
  502. DBObject request = safeFindById(COLLABORATOR_REQUESTS, requestId, false);
  503. if (request == null) {
  504. return false;
  505. }
  506. List<Object> keywords = (List<Object>)request.get("keywords");
  507. if (!keywords.contains(keywordId)) {
  508. return false;
  509. }
  510. Set<Long> institutions = getInterestInstitutions(keywordId);
  511. Set<Long> interestUsers = getInterestUsers(keywordId);
  512. Set<Long> interestRequests = getInterestRequests(keywordId);
  513. interestRequests.remove(requestId);
  514. for (Long u : interestUsers) {
  515. institutions.removeAll(getUserInstitutions(u));
  516. }
  517. for (Long r : interestRequests) {
  518. institutions.removeAll(getCollaboratorRequestInstitutions(r));
  519. }
  520. for (Long institution : institutions) {
  521. removeInterestFromInstitution(keywordId, institution);
  522. }
  523. keywords.remove(keywordId);
  524. request.put("keywords", keywords);
  525. DBCollection requests = getDb().getCollection(COLLABORATOR_REQUESTS);
  526. requests.update(safeFindById(COLLABORATOR_REQUESTS, requestId, false), request);
  527. setInterestUsage(keywordId, getInterestUsage(keywordId)-1);
  528. if (interestIsDisconnected(keywordId)) {
  529. removeInterest(keywordId);
  530. return true;
  531. }
  532. return false;
  533. }
  534. /**
  535. * Remove the parameter secondInterest from the firstInterest's
  536. * list of similar intersts.
  537. * @param firstInterest the interest to be removed from
  538. * @param secondInterest the similar interest to be removed
  539. */
  540. public void removeSimilarInterest(Long firstInterest, Long secondInterest){
  541. DBCollection interests = getDb().getCollection(INTERESTS);
  542. DBObject i = safeFindById(INTERESTS, firstInterest, false);
  543. if (i != null) {
  544. SimilarInterestList similarInterests = new SimilarInterestList((String)i.get("similar"));
  545. similarInterests.remove(new SimilarInterest(secondInterest, (double)0));
  546. i.put("similar", similarInterests.toString());
  547. interests.update(safeFindById(INTERESTS, firstInterest, false),i);
  548. }
  549. }
  550. /**
  551. * Checks if the interest with the parameter id is
  552. * not owned by any user or collaborator request.
  553. * @param interestId The long id of the interest to check
  554. * @return true if the interest is disconncected, false
  555. * otherwise.
  556. */
  557. private boolean interestIsDisconnected(long interestId) {
  558. return ((getInterestUsers(interestId).size() == 0) && (getInterestRequests(interestId).size() == 0));
  559. }
  560. /**
  561. * Removes all orphaned interests from the database.
  562. * @return List<Long> ids of the reaped orphans
  563. */
  564. public List<Long> reapOrphans() {
  565. System.out.println("Witness the reaping");
  566. DBCollection interests = getDb().getCollection(INTERESTS);
  567. List<Long> theReaped = new ArrayList<Long>();
  568. for (DBObject dbObject : interests.find()) {
  569. long id = (Long)(dbObject.get("_id"));
  570. Set<Long> institutions = getInterestInstitutions(id);
  571. for (Long u : getInterestUsers(id)) {
  572. institutions.removeAll(getUserInstitutions(u));
  573. }
  574. for (Long r : getInterestRequests(id)) {
  575. institutions.removeAll(getCollaboratorRequestInstitutions(r));
  576. }
  577. for (Long institution : institutions) {
  578. System.out.println("Interest id: " + id + " text: " + dbObject.get("text") + " removed from institution " + institution);
  579. removeInterestFromInstitution(id, institution);
  580. }
  581. if (interestIsDisconnected(id)) {
  582. System.out.println("Interest id: " + id + " text: " + dbObject.get("text") + " removed from database");
  583. removeInterest(id);
  584. theReaped.add(id);
  585. }
  586. }
  587. return theReaped;
  588. }
  589. public long articleToId(String title){
  590. DBObject res = safeFindById(ARTICLES_TO_IDS, title, true);
  591. if(res == null){
  592. System.out.println("Invalid article title no ID found");
  593. return (long) -1;
  594. }
  595. Object wpId = res.get("wpId");
  596. if (wpId instanceof Integer) {
  597. return ((Integer)wpId).longValue();
  598. } else if (wpId instanceof String) {
  599. return Long.valueOf((String)wpId);
  600. } else if (wpId instanceof Long) {
  601. return (Long) wpId;
  602. } else {
  603. throw new IllegalStateException("invalid article id: '" + wpId + "'");
  604. }
  605. }
  606. public void buildInterestRelations (String text, long interest, long article, boolean relationsBuilt) {
  607. SimilarInterestList articles = getArticleSimilarities(article);
  608. SimilarInterestList list = new SimilarInterestList();
  609. int i = 0;
  610. Map<Long, Double> ids = new HashMap<Long, Double>();
  611. if (article != -1) {
  612. // don't give interests mapped to unknown articles similar interests
  613. while (list.size() < 200 && i < articles.size()) {
  614. SimilarInterest check = articles.get(i);
  615. DBObject articleToInterests = safeFindById(ARTICLES_TO_INTERESTS, check.interestId, false);
  616. if (articleToInterests != null) {
  617. Set<Long> similarInterests = interestStringToSet((String)articleToInterests.get("interests"));
  618. for (long id : similarInterests) {
  619. if (relationsBuilt && id!=interest) {
  620. ids.put(id, check.similarity);
  621. }
  622. if(id!=interest){
  623. list.add(new SimilarInterest(id, check.similarity));
  624. }
  625. }
  626. }
  627. i++;
  628. }
  629. }
  630. // Create a stub record with the text
  631. DBObject dbo = safeFindById(INTERESTS, interest, false);
  632. if(dbo == null) {
  633. dbo = new BasicDBObject("_id", interest);
  634. dbo.put("similar", "");
  635. dbo.put("usage", 0);
  636. }
  637. if (dbo.get("text") == null || dbo.get("text") != text) {
  638. dbo.put("text", text);
  639. getDb().getCollection(INTERESTS).save(dbo);
  640. }
  641. addInterestRelations(interest, list, false);
  642. if (relationsBuilt) {
  643. for (long id : ids.keySet()) {
  644. SimilarInterestList sims = new SimilarInterestList();
  645. sims.add(new SimilarInterest(interest, ids.get(id)));
  646. addInterestRelations(id, sims, true);
  647. }
  648. }
  649. }
  650. public void cleanupInterestRelations(Set<Long> validIds) {
  651. DBCollection interests = getDb().getCollection(INTERESTS);
  652. for (DBObject entry : interests.find()) {
  653. String simStr = (String)entry.get("similar");
  654. if (simStr != null) {
  655. Long id = (Long) entry.get("_id");
  656. DBObject q = new BasicDBObject("_id", id);
  657. if (validIds.contains(id)) {
  658. SimilarInterestList sims = new SimilarInterestList(simStr);
  659. sims.dedupe(validIds);
  660. entry.put("similar", sims.toString());
  661. interests.update(q, entry);
  662. } else {
  663. interests.remove(q);
  664. }
  665. }
  666. }
  667. }
  668. public void cleanupPeople(Set<Long> validIds){
  669. DBCollection people = getDb().getCollection(USERS);
  670. System.out.println(people.find());
  671. for (DBObject entry : people.find()){
  672. if (!validIds.contains(entry.get("_id"))){
  673. people.remove(entry);
  674. }
  675. }
  676. }
  677. public void cleanupCollaboratorRequests(Set<Long> validIds){
  678. DBCollection requests = getDb().getCollection(COLLABORATOR_REQUESTS);
  679. System.out.println(requests.find());
  680. for (DBObject entry : requests.find()){
  681. if (!validIds.contains(entry.get("_id"))){
  682. requests.remove(entry);
  683. }
  684. }
  685. }
  686. public void addInterestRelations(long interestId, SimilarInterestList sims, boolean merge){
  687. DBCollection interests = getDb().getCollection(INTERESTS);
  688. DBObject interest= safeFindById(INTERESTS, interestId, false);
  689. if(interest == null){
  690. interest = new BasicDBObject("_id", interestId);
  691. interest.put("similar", "");
  692. interests.insert(interest);
  693. }
  694. if (merge) {
  695. sims.add((String)interest.get("similar"));
  696. }
  697. interest.put("similar", sims.toString());
  698. interests.update(safeFindById(INTERESTS, interestId, false), interest);
  699. }
  700. public SimilarInterestList getSimilarInterests(long interest){
  701. //System.out.println(interest + " was the interest");
  702. DBObject i = safeFindById(INTERESTS, interest, false);
  703. if (i == null) {
  704. System.out.println("The interest " + interest + " is null");
  705. return new SimilarInterestList();
  706. }
  707. //System.out.println(i +" getSimilarInterests get");
  708. String res = (String)i.get("similar");
  709. if (res == null) {
  710. return new SimilarInterestList();
  711. }
  712. return new SimilarInterestList(res);
  713. }
  714. public SimilarInterestList getSimilarInterests(Long interest, InstitutionFilter institutionFilter) {
  715. DBObject i = safeFindById(INTERESTS, interest, false);
  716. if (i == null) {
  717. return new SimilarInterestList();
  718. }
  719. //log.info(similar +" getSimilarInterests get")
  720. Set<Long> institutionInterests = new HashSet<Long>();
  721. for (long id : institutionFilter.institutionIds) {
  722. institutionInterests.addAll(getInstitutionInterests(id));
  723. }
  724. if (institutionFilter.requiredInstitutionId != null) {
  725. institutionInterests.retainAll(getInstitutionInterests(institutionFilter.requiredInstitutionId));
  726. }
  727. return new SimilarInterestList((String)i.get("similar"), institutionInterests);
  728. }
  729. public void removeLowestSimilarity(Long interest) {
  730. DBCollection interests = getDb().getCollection(INTERESTS);
  731. DBObject i = safeFindById(INTERESTS, interest, false);
  732. SimilarInterestList similarInterests = new SimilarInterestList((String)i.get("similar"));
  733. similarInterests.removeLowest();
  734. i.put("similar", similarInterests.toString());
  735. interests.update(safeFindById(INTERESTS, interest, false),i);
  736. }
  737. /**
  738. *
  739. * @param interest the interest to replace lowest similarity in
  740. * @param newInterest the new similar interest
  741. * @param similarity the new similarity
  742. */
  743. public void replaceLowestSimilarity(Long interest, Long newInterest, Double similarity){
  744. DBCollection interests = getDb().getCollection(INTERESTS);
  745. DBObject i = safeFindById(INTERESTS, interest, false);
  746. SimilarInterestList similarInterests = new SimilarInterestList((String)i.get("similar"));
  747. similarInterests.add(new SimilarInterest(newInterest, similarity));
  748. similarInterests.removeLowest();
  749. i.put("similar", similarInterests.toString());
  750. interests.update(safeFindById(INTERESTS, interest, false),i);
  751. }
  752. private void addInterestToInstitutions(long interestId, List<Long> institutionIds) {
  753. for (long institutionId: institutionIds) {
  754. addInterestToInstitution(interestId, institutionId);
  755. }
  756. }
  757. private void addInterestToInstitution(long interestId ,long institutionId) {
  758. DBObject institution = safeFindById(INSTITUTION_INTERESTS, institutionId, false);
  759. DBCollection institutionInterests = getDb().getCollection(INSTITUTION_INTERESTS);
  760. if(institution==null) {
  761. institution = new BasicDBObject("_id", institutionId);
  762. institution.put("interests","");
  763. institutionInterests.insert(institution);
  764. }
  765. String res = interestSetToString(interestStringToSet(institution.get("interests")+
  766. Long.toString(interestId)+","));
  767. institution.put("interests",res);
  768. institutionInterests.update(safeFindById(INSTITUTION_INTERESTS, institutionId, false),institution);
  769. }
  770. /**
  771. * Removes an interest from the specified institution
  772. * @param interest The id of the interest to remove
  773. * @param institutionId The id of the institution to remove the
  774. * interest from
  775. */
  776. private void removeInterestFromInstitution(Long interest, Long institutionId) {
  777. DBObject institution = safeFindById(INSTITUTION_INTERESTS, institutionId, false);
  778. if (institution == null) {
  779. return;
  780. }
  781. DBCollection institutionInterests = getDb().getCollection(INSTITUTION_INTERESTS);
  782. Set<Long> updateInterests = interestStringToSet(institution.get("interests")+"");
  783. updateInterests.remove(interest);
  784. String res = interestSetToString(updateInterests);
  785. institution.put("interests",res);
  786. institutionInterests.update(safeFindById(INSTITUTION_INTERESTS, institutionId, false),institution);
  787. }
  788. public Set<Long> getInstitutionInterests(long id) {
  789. DBObject institution = safeFindById(INSTITUTION_INTERESTS, id, false);
  790. if(institution == null) {
  791. return new HashSet<Long>();
  792. }
  793. return interestStringToSet((String)institution.get("interests"));
  794. }
  795. private String interestSetToString(Set<Long> interests) {
  796. String res = "";
  797. for (Long i : interests) {
  798. res = res + i.toString() + ",";
  799. }
  800. return res;
  801. }
  802. private Set<Long> interestStringToSet(String interestString) {
  803. Set<Long> res = new HashSet<Long>();
  804. String[] interests = interestString.split(",");
  805. for (String i : interests) {
  806. if (i.length() > 0) {
  807. res.add(Long.parseLong(i));
  808. }
  809. }
  810. return res;
  811. }
  812. public SimilarInterestList getArticleSimilarities(long article) {
  813. DBObject similarities = safeFindById(ARTICLE_SIMILARITIES, "" + article, true);
  814. if (similarities == null) {
  815. System.out.println(article + " does not have an articleSimilarities entry");
  816. return new SimilarInterestList();
  817. }
  818. //System.out.println(article);
  819. //really long print ln statement below
  820. //System.out.println(similarities.toString());
  821. return new SimilarInterestList((String)similarities.get("similarities"));
  822. }
  823. public void addInterestToArticle(long interest, long article){
  824. DBObject articleInterests = safeFindById(ARTICLES_TO_INTERESTS , article, false);
  825. DBCollection articlesToInterests = getDb().getCollection(ARTICLES_TO_INTERESTS);
  826. if(articleInterests==null){
  827. articleInterests=new BasicDBObject("_id", article);
  828. articleInterests.put("interests","");
  829. articlesToInterests.insert(articleInterests);
  830. }
  831. Set<Long> interests = interestStringToSet((String)articleInterests.get("interests"));
  832. interests.add(interest);
  833. articleInterests.put("interests", interestSetToString(interests));
  834. articlesToInterests.update(safeFindById(ARTICLES_TO_INTERESTS, article, false), articleInterests);
  835. }
  836. /**
  837. * For use when removing a user or collaborator request. Handles any
  838. * disconnects. Should no other user or request from the specified
  839. * institution own an interest, removes the interest from that
  840. * institution's list of owned interests. Should no other user or
  841. * request own an interest, removes the interest from the database.
  842. *
  843. * @param interests Set of interest ids to check for disconnects.
  844. * @param interestUsers Maps interestIds (same ids as in the interests) to
  845. * the users with that interest. Should the deletion be caused by
  846. * the removal of a user, then the user being removed should not be
  847. * present in this Map.
  848. * @param interestRequests Map of all ids in the interests set to
  849. * the requests with that interest. Should the deletion be caused by
  850. * the removal of a request, then the request being removed should
  851. * not be present in this Map.
  852. * @param institutionId Set<Long> ids of the institutions to check and
  853. * update should the removal of an interest require the institution
  854. * to update its list of owned interests.
  855. * @return A List<Long> of the interests that were completely removed
  856. * from the database as a result of this remove.
  857. */
  858. private List<Long> handleDisconnects(Set<Long> interests, Map<Long,Set<Long>> interestUsers, Map<Long,Set<Long>> interestRequests, Long institutionId) {
  859. List<Long> deletedInterests = new LinkedList<Long>();
  860. for (Long interestId : interestUsers.keySet()) {
  861. boolean removed = false;
  862. if (interestUsers.get(interestId).size() == 0) {
  863. if (interestRequests.get(interestId).size() == 0) {
  864. // interest is owned by no one, delete it
  865. removeInterest(interestId);
  866. deletedInterests.add(interestId);
  867. }
  868. }
  869. // weed out interests owned by others in the institutions
  870. for (Long u : interestUsers.get(interestId)) {
  871. if (getUserInstitutions(u).contains(institutionId)) {
  872. interests.remove(interestId);
  873. removed = true;
  874. break;
  875. }
  876. }
  877. if (!removed) {
  878. for (Long c : interestRequests.get(interestId)) {
  879. if (getCollaboratorRequestInstitutions(c).contains(institutionId)) {
  880. interests.remove(interestId);
  881. break;
  882. }
  883. }
  884. }
  885. }
  886. // interests holds interests to remove from institution
  887. for (Long i : interests) {
  888. removeInterestFromInstitution(i, institutionId);
  889. }
  890. return deletedInterests;
  891. }
  892. public void extractSmallWpDb(String destinationDb) {
  893. mongo.dropDatabase(destinationDb);
  894. DB dbDest = mongo.getDB(destinationDb);
  895. DBCollection articlesToInterests = getDb().getCollection(ARTICLES_TO_INTERESTS);
  896. DBCollection articlesToIdsDest = dbDest.getCollection(ARTICLES_TO_IDS);
  897. DBCollection articleSimsDest = dbDest.getCollection(ARTICLE_SIMILARITIES);
  898. int total = 0;
  899. int found = 0;
  900. for (DBObject entry : articlesToInterests.find()) {
  901. total += 1;
  902. String article = "" + entry.get("_id");
  903. DBObject articleIds = safeFindByField(ARTICLES_TO_IDS, "wpId", article, true);
  904. if (articleIds == null) {
  905. continue;
  906. }
  907. articlesToIdsDest.insert(articleIds);
  908. DBObject sims = safeFindById(ARTICLE_SIMILARITIES, article, true);
  909. if (sims == null) {
  910. continue;
  911. }
  912. articleSimsDest.insert(sims);
  913. found++;
  914. }
  915. articlesToIdsDest.ensureIndex("wpId");
  916. System.err.println("copied " + found + " of " + total);
  917. }
  918. /**
  919. * Clears all caches
  920. */
  921. public void clearCache(){
  922. interestUserCache.clear();
  923. interestRequestCache.clear();
  924. userInstitutionCache.clear();
  925. }
  926. }