PageRenderTime 49ms CodeModel.GetById 23ms RepoModel.GetById 0ms app.codeStats 0ms

/src/php/associated-words/get-associated-words.php

https://bitbucket.org/silverasm/wordseer
PHP | 211 lines | 152 code | 13 blank | 46 comment | 30 complexity | 57c5970d1180dc2cfa6d24cfc2f26a6e MD5 | raw file
Possible License(s): Apache-2.0, LGPL-3.0, BSD-3-Clause
  1. <?php
  2. /* Copyright 2012 Aditi Muralidharan. See the file "LICENSE" for the full license governing this code. */
  3. /*****************************************************************************
  4. get-associated-words.php
  5. Return adjectives, nouns, and verbs with high TF-IDF scores that
  6. tend to occur within 10 sentences of this word
  7. *****************************************************************************/
  8. include_once '../util.php';
  9. $wordseer_instance = getGetParam('instance');
  10. $path = '../../../instances/'.$wordseer_instance.'/config.php';
  11. include_once $path;
  12. $N = 0;
  13. //Query parameters
  14. $wordseer_instance = getGetParam('instance');
  15. $gov = '';
  16. $dep = '';
  17. $relation = '';
  18. $govtype = 'word';
  19. $deptype = 'word';
  20. $searches = decodeGetJson('search');
  21. if (count($searches) <= 1) {
  22. $gov = getGetParam('gov');
  23. $govtype = getGetParam('govtype');
  24. $dep = getGetParam('dep');
  25. $deptype = getGetParam('deptype');
  26. $relation = getGetParam('relation');
  27. }
  28. $collection = getGetParam('collection');
  29. $statistics = getGetParam('statistics');
  30. $phrasess = decodeGetJson('phrases');
  31. $metadata = decodeGetJson('metadata');
  32. $timing = getGetParam('timing');
  33. dispatch();
  34. function dispatch(){
  35. if (array_key_exists('id', $_GET)) {
  36. $id = getGetParam('id');
  37. $ids = array();
  38. array_push($ids, $id);
  39. } else if (array_key_exists('word', $_GET)) {
  40. $cls = getGetParam('class');
  41. if ($cls == 'word-set') {
  42. $ids = explode(", ", getWordIDsFromWordSet(getGetParam('word')));
  43. } else if ($cls == 'word') {
  44. $ids = getWordIDs(getGetParam('word'));
  45. } else if ($cls == "phrase") {
  46. $ids = explode(" ", getGetParam('word'));
  47. }
  48. }
  49. $id_string = join(",", $ids);
  50. $words = getAssociatedWords($id_string, $ids, getGetParam('word'));
  51. echo json_encode($words);
  52. }
  53. function getAssociatedWords($idString, $ids, $word){
  54. global $STOPWORDS; // util.php
  55. global $query_id;
  56. global $timing;
  57. $context_conditions = "";
  58. $table_identifier = "";
  59. $query_id_where = "";
  60. if ($query_id) {
  61. $table = 'filtered_sent_ids';
  62. $query_id_where = '';
  63. global $cache_results;
  64. global $query_id;
  65. global $dont_cache_search_results;
  66. if (($cache_results || $query_id) && !$dont_cache_search_results) {
  67. $table = 'cached_filtered_sent_ids';
  68. $query_id_where = " AND query_id = $query_id ";
  69. }
  70. $table_identifier = ", $table ";
  71. $context_conditions = "AND $table.id = sentence_id
  72. $query_id_where";
  73. }
  74. $sql = "SELECT sentence_id, sentence_number, document_id
  75. FROM sentence_xref_word $table_identifier
  76. WHERE word_id IN (".$idString.")
  77. $context_conditions;";
  78. if ($timing) {
  79. echo $sql."<br>
  80. ";
  81. }
  82. $result = mysql_query($sql) or die("get-associated-words.php L39 <b>A fatal MySQL error occured</b>.
  83. <br/> Query: " . $sql . "
  84. <br/> Error: (" . mysql_errno() . ") " . mysql_error());
  85. $sentence_ids = array();
  86. $sentence_numbers = array();
  87. $document_ids = array();
  88. while($row = mysql_fetch_array($result)){
  89. array_push($sentence_ids, $row['sentence_id']);
  90. array_push($sentence_numbers, $row['sentence_number']);
  91. array_push($document_ids, $row['document_id']);
  92. }
  93. $number_of_sentences = count($sentence_ids);
  94. $sentence_id_string = join(",", $sentence_ids);
  95. // COUNT(word_id)*LOG(".$number_of_sentences."/COUNT(DISTINCT(sentence_id)))
  96. $sql = "SELECT count(distinct sentence_id) as score, word, word_id, pos
  97. FROM word, sentence_xref_word
  98. WHERE word.id = word_id
  99. AND sentence_id IN ($sentence_id_string)
  100. AND word_id NOT IN ($idString)
  101. AND ((pos like 'N%') OR (pos like 'V%') OR (pos like 'J%'))
  102. GROUP BY word
  103. ORDER BY score DESC
  104. LIMIT 500;";
  105. $result = mysql_query($sql) or die("get-associated-words.php L82 <b>A fatal MySQL error occured</b>.
  106. <br/> Query: " . $sql . "
  107. <br/> Error: (" . mysql_errno() . ") " . mysql_error());
  108. $words = array("Adverbs"=>array(), "Adjectives"=>array(), "Nouns"=>array(), "Verbs"=>array(), ""=>array());
  109. $word_set_memberships = getWordSetMemberships();
  110. while($row = mysql_fetch_array($result)){
  111. if ($row['score'] > 0 && !in_array($row['word'], $STOPWORDS)) {
  112. $pos = "";
  113. if(substr($row['pos'], 0, 1) == "V"){
  114. $pos = "Verbs";
  115. }
  116. if(substr($row['pos'], 0, 1) == "J"){
  117. $pos = "Adjectives";
  118. }
  119. if(substr($row['pos'], 0, 1) == "R"){
  120. $pos = "Adverbs";
  121. }
  122. if(substr($row['pos'], 0, 1) == "N"){
  123. $pos = "Nouns";
  124. }
  125. $word = array(
  126. "id"=>$row['word_id'],
  127. "word"=>$row['word'],
  128. "score"=>$row['score']);
  129. if (array_key_exists($row['word_id'], $word_set_memberships)) {
  130. $word['word_set'] = join(" ",
  131. $word_set_memberships[$row['word_id']]);
  132. }
  133. array_push(
  134. $words[$pos], $word);
  135. }
  136. }
  137. // Get synonyms.
  138. $sql = "SELECT *, sum(score) as total from synsets
  139. WHERE word1_id IN ($idString)
  140. GROUP BY word2
  141. ORDER BY total DESC;";
  142. $result = $result = mysql_query($sql) or die("get-associated-words.php l.157
  143. <b>A fatal MySQL error occured</b>.
  144. <br/> Query: " . $sql . "
  145. <br/> Error: (" . mysql_errno() . ") " . mysql_error());
  146. $words["Synsets"] = array();
  147. while ($row = mysql_fetch_assoc($result)) {
  148. $word = array(
  149. "id"=>$row['word2_id'],
  150. "word"=>$row['word2'],
  151. "score"=>$row['total']
  152. );
  153. if (array_key_exists($row['word2_id'], $word_set_memberships)) {
  154. $word['word_set'] = join(" ",
  155. $word_set_memberships[$row['word2_id']]);
  156. }
  157. array_push($words["Synsets"], $word);
  158. }
  159. // Get frequent phrases containing that word in this context
  160. // $id_strings = array();
  161. // foreach ($ids as $id) {
  162. // array_push($id_strings, ".$id.");
  163. // }
  164. // $id_strings = join(" ", $id_strings);
  165. // $sql = "SELECT id from sequence
  166. // where match(id) against(\"$id_strings\" IN BOOLEAN MODE) ;";
  167. // $result = $result = mysql_query($sql) or die("get-associated-words.php l.169
  168. // <b>A fatal MySQL error occured</b>.
  169. // <br/> Query: " . $sql . "
  170. // <br/> Error: (" . mysql_errno() . ") " . mysql_error());
  171. // $sequence_ids = array();
  172. // while ($row = mysql_fetch_assoc($result)) {
  173. // array_push($sequence_ids, "'".$row['id']."'");
  174. // }
  175. // $sequence_id_string = join(",", $sequence_ids);
  176. // $sql = "SELECT sequence as word, sequence_id, count(sequence_id) as score
  177. // FROM sequence_xref_sentence, sequence
  178. // WHERE sequence.id = sequence_id
  179. // AND length > 1
  180. // AND lemmatized = 0
  181. // AND sentence_count > 1
  182. // AND sequence.id in ($sequence_id_string)
  183. // AND sentence_id in ($sentence_id_string)
  184. // GROUP BY sequence_id
  185. // ORDER BY score DESC
  186. // LIMIT 150;";
  187. // $result = $result = mysql_query($sql) or die("get-associated-words.php l.185
  188. // <b>A fatal MySQL error occured</b>.
  189. // <br/> Query: " . $sql . "
  190. // <br/> Error: (" . mysql_errno() . ") " . mysql_error());
  191. // $words["Sequences"] = array();
  192. // while ($row = mysql_fetch_assoc($result)) {
  193. // array_push($words["Sequences"], $row);
  194. // }
  195. return $words;
  196. }
  197. ?>