/src/edu/cmu/sv/webcrawler/models/Keywords.java

https://gitlab.com/QawsQAER/riskadvisor · Java · 134 lines · 116 code · 16 blank · 2 comment · 15 complexity · 7100d060cbf1b8901d6f2b21e036ba2a MD5 · raw file

  1. package edu.cmu.sv.webcrawler.models;
  2. import com.mongodb.*;
  3. import edu.cmu.sv.webcrawler.util.MongoHelper;
  4. import java.util.*;
  5. public class Keywords {
  6. private DBCollection collection;
  7. public Keywords() {
  8. MongoHelper helper = new MongoHelper();
  9. this.collection = helper.getDb().getCollection("keywords");
  10. }
  11. public void insert(String line) {
  12. BasicDBObject obj = new BasicDBObject();
  13. obj.put("value", line);
  14. collection.insert(obj);
  15. }
  16. public Set<String> getKeywords() {
  17. Set<String> set = new HashSet<String>();
  18. DBCursor cursor = collection.find();
  19. try {
  20. while (cursor.hasNext()) {
  21. DBObject obj = cursor.next();
  22. String tmp = (String) obj.get("value");
  23. set.add(tmp);
  24. }
  25. } catch (Exception e) {
  26. e.printStackTrace();
  27. }
  28. return set;
  29. }
  30. public void removeAll() {
  31. BasicDBObject doc = new BasicDBObject();
  32. collection.remove(doc);
  33. }
  34. public Map<String, Integer> getKeywords(String symbol,String year) {
  35. Map<String, Integer> result=new HashMap<String,Integer>();
  36. BasicDBObject doc = new BasicDBObject();
  37. doc.put("symbol", symbol);
  38. doc.put("year", year);
  39. DBCursor cursor = MongoHelper.getCollection().find(doc);
  40. Map<String, Integer> map = null;
  41. while (cursor.hasNext()) {
  42. DBObject obj = cursor.next();
  43. BasicDBList keywords = (BasicDBList) obj.get("keywords");
  44. map = getMap(keywords);
  45. merge(result, map);
  46. /*break;*/
  47. }
  48. return result;
  49. }
  50. public Map<String, Integer> getKeywords(String symbol, String year, String docType) {
  51. BasicDBObject doc = new BasicDBObject();
  52. Map<String, Integer> result=new HashMap<String,Integer>();
  53. doc.put("symbol", symbol);
  54. if (year != null)
  55. doc.put("year", year);
  56. if (docType != null)
  57. doc.put("document", docType);
  58. DBCursor cursor = MongoHelper.getCollection().find(doc);
  59. Map<String, Integer> map = null;
  60. while (cursor.hasNext()) {
  61. DBObject obj = cursor.next();
  62. BasicDBList keywords = (BasicDBList) obj.get("keywords");
  63. map = getMap(keywords);
  64. merge(result, map);
  65. /*break;*/
  66. }
  67. return result;
  68. }
  69. public Map<String, Integer> getKeywordsFrequency(String symbol, String year, String docType) {
  70. BasicDBObject doc = new BasicDBObject();
  71. Map<String, Integer> result=new HashMap<String,Integer>();
  72. doc.put("symbol", symbol);
  73. doc.put("year", year);
  74. doc.put("document", docType);
  75. DBCursor cursor = MongoHelper.getCollection().find(doc);
  76. Map<String, Integer> map = null;
  77. String wordCount;
  78. while (cursor.hasNext()) {
  79. DBObject obj = cursor.next();
  80. BasicDBList keywords = (BasicDBList) obj.get("keywords");
  81. wordCount = (String) obj.get("wordCount");
  82. map = getFrequencyMap(keywords, wordCount);
  83. merge(result, map);
  84. }
  85. return result;
  86. }
  87. private void merge( Map<String, Integer> a, Map<String, Integer> b){
  88. for(String key:b.keySet()){
  89. int v_b=b.get(key);
  90. if(a.containsKey(key)){
  91. int v_a=a.get(key);
  92. a.put(key,v_a+v_b);
  93. }
  94. else a.put(key,v_b);
  95. }
  96. }
  97. public static Map<String, Integer> getMap(BasicDBList keywords) {
  98. Map<String,Integer> map = new HashMap<String,Integer>();
  99. for (Iterator<Object> it = keywords.iterator(); it.hasNext();) {
  100. BasicDBObject dbo = (BasicDBObject) it.next();
  101. for (String s : dbo.keySet()) {
  102. map.put(s, dbo.getInt(s));
  103. }
  104. }
  105. return map;
  106. }
  107. public static Map<String, Integer> getFrequencyMap(BasicDBList keywords, String wordCount) {
  108. Map<String, Integer> map = new HashMap<String, Integer>();
  109. int wc = Integer.parseInt(wordCount);
  110. for (Iterator<Object> it = keywords.iterator(); it.hasNext();) {
  111. BasicDBObject dbo = (BasicDBObject) it.next();
  112. for (String s : dbo.keySet()) {
  113. map.put(s, (dbo.getInt(s)*100000 / wc));
  114. }
  115. }
  116. return map;
  117. }
  118. }