PageRenderTime 29ms CodeModel.GetById 0ms RepoModel.GetById 0ms app.codeStats 0ms

/src/php/synonym_groups.php

https://bitbucket.org/silverasm/wordseer
PHP | 268 lines | 234 code | 11 blank | 23 comment | 24 complexity | 1c3c0d194ab4d8bd5123c2f13ffbb4ee MD5 | raw file
Possible License(s): Apache-2.0, LGPL-3.0, BSD-3-Clause
  1. <?php
  2. /* Copyright 2012 Aditi Muralidharan. See the file "LICENSE" for the full license governing this code. */
  3. /**************************************************************
  4. synonym groups:
  5. given pairwise similarity scores, groups words into synonym sets
  6. ***********************************************************/
  7. include_once '../../config.php';
  8. include_once 'dbsetup.php';
  9. include_once 'util.php';
  10. include_once 'priorityqueue.php';
  11. /** dispatch procedure
  12. In order to return related words, it expects an input of the format
  13. $_GET{
  14. words:word1_wordID_sentenceID word2_wordID_sentenceID (space separated)
  15. OR words = word1 word2 word3 ... (space separated)
  16. type:"context"
  17. json:"true"
  18. }
  19. **/
  20. if($_GET['id']){
  21. display(synset(mysql_real_escape_string($_GET['id']), "", ""));
  22. } else if($_GET['word']&&$_GET['pos']){
  23. include_once 'dbsetup.php';
  24. include_once 'util.php';
  25. display(synset(false, mysql_real_escape_string($_GET['word']),
  26. mysql_real_escape_string($_GET['pos'])));
  27. } else if($_GET['words'] && $_GET['type']=="context"){
  28. include_once 'associated-words/get-associated-words.php';
  29. $words = decode(mysql_escape_string(trim($_GET['words'])));
  30. $answer = getContext($words);
  31. display($answer);
  32. }
  33. function synset($id, $word, $pos){
  34. $query = "SELECT * from word where word = '".$word."' and pos = '".$pos."';";
  35. if($id){
  36. $query = "SELECT * from word where id = ".$id.";";
  37. }
  38. $result = mysql_query($query);
  39. $row = array();
  40. $row2 = array();
  41. $answer = array();
  42. while($row = mysql_fetch_array($result)){
  43. $id = $row['id'];
  44. $query = "SELECT * from synsets where word1_id = ".$id." ORDER BY similarity DESC;";
  45. $result2 = mysql_query($query);
  46. while($row2 = mysql_fetch_array($result2)){
  47. array_push($answer, array("word"=>$row2['word2'], "id"=>$row2['word2_id'], "similarity"=>$row2['similarity']));
  48. }
  49. }
  50. // if no matching words found for the ID, then try for at least
  51. // the same surface form
  52. if(count($answer) == 0 && mysql_num_rows($result) > 0){
  53. $query = "SELECT * from word where id = ".$id.";";
  54. $result = mysql_query($query);
  55. $row = mysql_fetch_array($result);
  56. $query = "SELECT * from word where word = '".mysql_real_escape_string($row['word'])."';";
  57. $result = mysql_query($query);
  58. while($row = mysql_fetch_array($result)){
  59. $query = "SELECT * from synsets where word1_id = ".$row['id']." ORDER BY similarity DESC;";
  60. $result2 = mysql_query($query);
  61. while($row2 = mysql_fetch_array($result2)){
  62. array_push($answer, array("word"=>$row2['word2'], "id"=>$row2['word2_id'], "similarity"=>$row2['similarity']));
  63. }
  64. }
  65. }
  66. echo json_encode($answer);
  67. return $answer;
  68. }
  69. /** get the group of words most similar to this word **/
  70. function old_synset($id, $word, $pos){
  71. $query = "SELECT * from word where word = '".$word."' and pos = '".$pos."';";
  72. if($id){
  73. $query = "SELECT * from word where id = ".$id.";";
  74. }
  75. $result = mysql_query($query);
  76. $row = array();
  77. $friends = array();
  78. $similarities = array();
  79. $ids = array();
  80. $poss = array();
  81. $best = new PriorityQueue;
  82. $best->clear();
  83. $row;
  84. $word;
  85. $pos;
  86. $friend;
  87. $friends_of_friends;
  88. while($row = mysql_fetch_array($result)){
  89. $id = $row['id'];
  90. $word = $row['word'];
  91. $_GET['word'] = $word;
  92. $pos = $row['pos'];
  93. $_GET['pos'] = $pos;
  94. $friends = getMostSimilar($id);
  95. foreach($friends as $friend){
  96. if($friend['word'] != $word){
  97. if(!array_key_exists($friend['id'], $similarities)){
  98. $similarities[$friend['id']] = 0;
  99. $ids[$friend['id']] = $friend['word'];
  100. $poss[$friend['id']] = $friend['pos'];
  101. }
  102. $similarities[$friend['id']] += $friend['similarity'];
  103. $friends_of_friends = getMostSimilar($friend['id']);
  104. foreach($friends_of_friends as $ff){
  105. if($ff['word'] != $word){
  106. if(!array_key_exists($ff['id'], $similarities)){
  107. $similarities[$ff['id']] = 0;
  108. $ids[$ff['id']] = $ff['word'];
  109. $poss[$ff['id']] = $ff['pos'];
  110. }
  111. $similarities[$ff['id']] += $ff['similarity']*$friend['similarity'];
  112. }
  113. }
  114. }
  115. }
  116. }
  117. foreach(array_keys($similarities) as $id){
  118. $best->push($id, $similarities[$id]);
  119. }
  120. $answer = array();
  121. $max = 0;
  122. $next;
  123. $w;
  124. $pos;
  125. $sim;
  126. while(!$best->IsEmpty()){
  127. $next = $best->pop();
  128. $w = $ids[$next];
  129. $pos = $poss[$next];
  130. $sim = $similarities[$next];
  131. if($sim>$max){
  132. $max = $sim;
  133. }
  134. if($sim >= $max/2){
  135. array_push($answer, array("word"=>$w, "id"=>$next, "similarity"=>$sim, "pos"=>$pos));
  136. }
  137. }
  138. return $answer;
  139. }
  140. /** get the 10 most similar words to this word **/
  141. function old_getMostSimilar($id){
  142. $query = "SELECT word1_id, word.word, word.pos, lin_similarity
  143. from similarity, word
  144. WHERE word.id = word1_id
  145. AND word2_id = ".$id."
  146. ORDER BY lin_similarity desc
  147. LIMIT 10;";
  148. $result = mysql_query($query);
  149. $friends = array();
  150. $row = array();
  151. while($row = mysql_fetch_array($result)){
  152. array_push($friends, array("id"=>$row['word1_id'], "similarity"=>$row['lin_similarity'], "word"=>$row['word'], "pos"=>$row['pos']));
  153. }
  154. return $friends;
  155. }
  156. function getMostSimilar($id){
  157. $query = "SELECT word1_id, lin_similarity
  158. from similarity
  159. WHERE word2_id = ".$id."
  160. ORDER BY lin_similarity desc;";
  161. $result = mysql_query($query);
  162. $friends = array();
  163. $row = array();
  164. $word;
  165. $result2;
  166. $row2;
  167. $index = 0;
  168. while($row = mysql_fetch_array($result)){
  169. if($index < 10){
  170. $query = "SELECT * from word WHERE id = ".$row['word1_id'].";";
  171. $result2 = mysql_query($query);
  172. $row2 = mysql_fetch_array($result2);
  173. array_push($friends, array("id"=>$row['word1_id'], "similarity"=>$row['lin_similarity'], "word"=>$row2['word'], "pos"=>$row2['pos']));
  174. $index += 1;
  175. }else{
  176. break;
  177. }
  178. }
  179. return $friends;
  180. }
  181. /** print out the web page, or display json **/
  182. function display($synset){
  183. if(!$_GET['json']){ // if not JSON-format request
  184. echo '<!DOCTYPE html>
  185. <html>
  186. <head>
  187. <title> Lin Similarity tester </title>
  188. </head>
  189. <body>
  190. <h1> Enter a word and part of speech </h1>
  191. <form action="">
  192. <label>Word</label><input name="word" value="';
  193. if($_GET['word']){echo $_GET['word'];}else{echo "mother";}
  194. echo '"></input><br>
  195. <label>POS</label><input name="pos" value="';
  196. if($_GET['pos']){echo $_GET['pos'];}else{echo "NN";}
  197. echo '"></label>
  198. <input type="submit" value="Go">
  199. </form>
  200. ';
  201. echo '<ul>';
  202. foreach($synset as $s){
  203. echo '<li><a href="?id='.$s['id'].'">';
  204. echo $s['word'];
  205. echo '</a></li>';
  206. }
  207. echo '</ul>';
  208. echo '</body></html>';
  209. }else{
  210. echo json_encode($synset);
  211. }
  212. }
  213. /** unpacks the recieved words **/
  214. function decode($words){
  215. $components = split(" ", $words);
  216. $data = array();
  217. foreach($components as $component){
  218. if(strstr($component, "_")){
  219. $c = split("_", $component);
  220. $c = array("word"=>$c[0], "id"=>$c[1], "sentence"=>$c[2]);
  221. array_push($data, $c);
  222. }else{
  223. $ids = explode(", ", getWordID($component));
  224. foreach($ids as $id){
  225. $c = array("word"=>$component, "id"=>$id, "sentence"=>false);
  226. array_push($data, $c);
  227. }
  228. }
  229. }
  230. return $data;
  231. }
  232. /** find all the relations between the given words and expand them to include_once other words **/
  233. function getContext($words){
  234. $contexts = array();
  235. $information;
  236. $word;
  237. foreach($words as $word){
  238. $information = array("synonyms"=>getSynset($word['id']));
  239. $information['associated'] = getAssociatedWords($word['id']);
  240. $information["context"] = array();
  241. $information["word"] = $word['word'];
  242. array_push($contexts, $information);
  243. }
  244. return $contexts;
  245. }
  246. function getSynset($id){
  247. $answer = array();
  248. $query = "SELECT * from synsets where word1_id = ".$id." ORDER BY similarity DESC;";
  249. $result = mysql_query($query);
  250. while($row = mysql_fetch_array($result)){
  251. array_push($answer, array("word"=>$row['word2'], "id"=>$row['word2_id'], "similarity"=>$row['similarity']));
  252. }
  253. //echo json_encode($answer);
  254. return $answer;
  255. }
  256. ?>