PageRenderTime 77ms CodeModel.GetById 29ms RepoModel.GetById 1ms app.codeStats 0ms

/src/php/similarsentences/similarsentences.php

https://bitbucket.org/silverasm/wordseer
PHP | 607 lines | 410 code | 34 blank | 163 comment | 37 complexity | d34c93edb60f8f91158e42159aa48950 MD5 | raw file
Possible License(s): Apache-2.0, LGPL-3.0, BSD-3-Clause
  1. <?php
  2. /* Copyright 2012 Aditi Muralidharan. See the file "LICENSE" for the full license governing this code. */
  3. /*****************************************************************************
  4. similarsentences.php
  5. Calculates a result set of sentences in response to a query using relevance
  6. feedback, specifically the Rocchio algorithm.
  7. The top-level dispatch procedure works with either a string query,
  8. or with a vector query and a 2 set of sentences: those judged by the user to
  9. be relevant, and non-relevant. In return it always sends back a set of
  10. sentences.
  11. The Rochhio algorithm uses the vector-space model of information retrieval
  12. to refine a query based on relevance feedback.
  13. *****************************************************************************/
  14. include_once '../../../config.php';
  15. include_once '../dbsetup.php';
  16. include_once '../util.php';
  17. include_once 'sparsevector.php';
  18. include_once '../priorityqueue.php';
  19. /** Algorithm parameters **/
  20. // weight with which query should be adjusted towards relevant sentences
  21. $ALPHA_plus = 0.1;
  22. // weight with which query should be adjusted towards relevant words
  23. $ALPHA_w_plus = 1; // treat like a search term
  24. // weight with which query should be adjusted away from irrelevant sentences
  25. $ALPHA_minus = $ALPHA_plus*0.1;
  26. // weight with which query should be adjusged away from irrelevant words
  27. $ALPHA_w_minus = $ALPHA_w_plus*0.1;
  28. // number of returned sentences
  29. $LIMIT = 500;
  30. /** dispatch procedure
  31. Sends back sentences based on the $_POST data sent by the client.
  32. Arguments:
  33. -- 'string_query': "true", or "false". "true" indicates that the query is
  34. a string query, and the query will be interpreted as
  35. such
  36. -- 'vector_query': "true" or "false". "true" indicates that the query is
  37. a vector query, and the query will be interpreted as
  38. a vector.
  39. -- 'query': a String (if string_query is "true"), or (if 'vector_query'
  40. is "true") an object containing
  41. features:
  42. a map from from string feature ID's to floating-point
  43. values
  44. relevant: a list of relevant sentences represented
  45. irrelevant: a list of irrelevant sentences represented
  46. -- 'relevant': a JSON _list_ of integer sentence ID's judged by the user
  47. to be relevant.
  48. -- 'irrelevant': a JSON _list_ of integer sentence ID's judged by the user
  49. to be NOT relevant.
  50. -- 'relevant_words': a JSON _list_ of integer word ID's judged by the user
  51. to be relevant.
  52. -- 'irrelevant_words': a JSON _list_ of integer word ID's judged by the user
  53. to be NOT relevant.
  54. Return values
  55. A JSON response is sent to the client, containing the following data:
  56. -- sentences : a list of sentences. Where each sentence has an ID, a
  57. narrativeID, title, date, and a string.
  58. -- query : a feature-vector representing the current query, to be stored
  59. by the client and sent back with further query refinements.
  60. {sentences:
  61. [{
  62. sentenceID: integer
  63. sentence: string
  64. }
  65. ...,
  66. ....]
  67. vector_query: {featureID: floating-point value, ..., ...}
  68. relevant: [ the list of sentence ID's incorporated as relevant],
  69. irrelevant: [ the list of sentence ID's incorporated as irrelevant],
  70. relevant_words: [ the list of word ID's incorporated as relevant],
  71. irrelevant_words: [ the list of word ID's incorporated as irrelevant]
  72. }
  73. **/
  74. dispatch();
  75. function dispatch(){
  76. $result = array();
  77. if($_POST['string_query'] == "true"){
  78. $current_query = (array) json_decode($_POST['vector-query']);
  79. $result = process_string_query(mysql_escape_string(strtolower($_POST['query'])), $current_query);
  80. }else if($_POST['vector_query'] == "true"){
  81. // unpack the sent data
  82. // unpack sentences
  83. $relevant_sentences = array();
  84. $irrelevant_sentences = array();
  85. if(array_key_exists( 'relevant', $_POST)){
  86. $relevant_sentences = json_decode($_POST['relevant']);
  87. }
  88. if(array_key_exists('irrelevant', $_POST)){
  89. $irrelevant_sentences = json_decode($_POST['irrelevant']);
  90. }
  91. // unpack words
  92. $relevant_words = array();
  93. $irrelevant_words = array();
  94. if(array_key_exists('relevant_words', $_POST)){
  95. $relevant_words = json_decode($_POST['relevant_words']);
  96. }
  97. if(array_key_exists('irrelevant_words', $_POST)){
  98. $irrelevant_words = json_decode($_POST['irrelevant_words']);
  99. }
  100. $query = (array) json_decode($_POST['query']);
  101. // calculate required information
  102. if(array_key_exists('calculate_new_query_words', $_POST)){
  103. // calculate what the new vector query would be
  104. // given a set of sentences marked relevant/irrelevant
  105. // and the previous vector query
  106. $new_query = calculate_new_vector_query($query, $relevant_sentences, $irrelevant_sentences, $relevant_words, $irrelevant_words);
  107. $words = convert_vector_query_to_relevance_words($new_query);
  108. $result = $words;
  109. }else{
  110. // perform relevance feedback and
  111. // send back new sentences based on a set of
  112. // marked sentences and words
  113. $result = process_relevance_feedback($query, $relevant_sentences, $irrelevant_sentences, $relevant_words, $irrelevant_words);
  114. }
  115. }
  116. echo json_encode($result);
  117. }
  118. /**Searches for sentences that match the given search query, and returns them
  119. along with a vector representation, which is the string query translated into vector form.
  120. Arguments:
  121. -- query : the string query typed in by the user, escaped for MYSQL
  122. safety.
  123. Return:
  124. A php array() with the following key-value pairs:
  125. {sentences:[
  126. {id:sentenceID, sentence:string sentence},
  127. ...]
  128. query:{featureID:floating-point value, ...}
  129. }
  130. */
  131. function process_string_query($query, $old_query){
  132. $old_features = (array) $old_query['features'];
  133. $old_vector_query = new SparseVector($old_features);
  134. $vect_query = convert_query_to_sparse_vector($query);
  135. $new_query = $vect_query->vectorAdd($old_vector_query);
  136. $sentences = retrieve_sentences_from_vector_query($new_query);
  137. $result = array();
  138. $result['query'] = array();
  139. $result['sentences'] = $sentences;
  140. $result['query']['features'] = $new_query->features;
  141. $result['query']['relevant'] = $old_query['relevant'];
  142. $result['query']['irrelevant'] = $old_query['irrelevant'];
  143. $result['query']['irrelevant_words'] = $old_query['irrelevant_words'];
  144. $relevant_words = $old_query['relevant_words'];
  145. $words = explode(" ", $query);
  146. foreach($words as $word){
  147. $wordIDs = explode(", ", getWordID($word));
  148. $relevant_words = array_merge($relevant_words, $wordIDs);
  149. }
  150. $result['query']['relevant_words'] = $relevant_words;
  151. return $result;
  152. }
  153. /* Convert a string query to a sparse vector by assigning weights to the words in the query, where words are determined by splitting on whitespace. */
  154. function convert_query_to_sparse_vector($query){
  155. global $STOPS;
  156. $words = explode(" ", $query);
  157. $vector = new SparseVector();
  158. $wordID = -1;
  159. foreach($words as $word){
  160. $wordIDs = explode(", ", getWordIDsAndPOS($word));
  161. if(!strstr($STOPS, strtolower($word))){
  162. // stopwords defined in dbsetup.php, line 403
  163. foreach($wordIDs as $wordID){
  164. $id = explode("-", $wordID);
  165. $id = $id[0];
  166. $pos = explode("-", $wordID);
  167. $pos = $pos[1];
  168. if($word == replaceWeirdCharacters($word)){
  169. add_search_word_feature($id, $pos, $word, 1, $vector);
  170. }
  171. }
  172. }
  173. }
  174. return $vector;
  175. }
  176. /* get ID's and parts of speech of a surface word */
  177. function getWordIDsAndPOS($word){
  178. $query = "";
  179. if(!(strstr($word, "*"))){
  180. $query = "SELECT pos, id FROM word WHERE word ='".mysql_escape_string(trim($word))."';";
  181. }else{
  182. $query = "SELECT pos, id FROM word WHERE word like '".mysql_escape_string(trim(str_replace("*", "%", $word)))."';";
  183. }
  184. $result = mysql_query($query);
  185. if(mysql_num_rows($result)>0){
  186. $ids = array();
  187. while($row = mysql_fetch_array($result)){
  188. array_push($ids, $row['id']."-".$row['pos']);
  189. }
  190. return join(", ", $ids);
  191. }else{
  192. return -1;
  193. }
  194. }
  195. /* Adds a feature corresponding to a word to a sparse vector*/
  196. function add_word_feature($wordID, $pos, $word, $weight, $vector){
  197. $vector->setFeatureValue(make_word_feature_name($wordID, $pos, $word), $weight);
  198. }
  199. /* Adds a feature corresponding to a searched word to a sparse vector*/
  200. function add_search_word_feature($wordID, $pos, $word, $weight, $vector){
  201. $vector->setFeatureValue(make_search_word_feature_name($wordID, $pos, $word), $weight);
  202. }
  203. /** A top-level wrapper for the relevance feedback computation functions.
  204. Helper functions fetch sentences by updating the current query to reflect the relevance feedback given by the user.
  205. - convert the irrelevant and relevant sentences into vectors v_+ and v_-
  206. - calculate the new query q' = q + (a_+v_+) - (a_-v_-)
  207. - calculate the sentences that match the new query
  208. Arguments:
  209. -- query : the (sparse) vector query sent by the client
  210. {featureID:floating-point value, ....}
  211. -- relevant: a list of sentenceID's marked relevant
  212. -- irrelevant: a list of sentenceID's marked irrelevant
  213. Return:
  214. A php array() with the following key-value pairs.
  215. {sentences:[
  216. {id:sentenceID, sentence:string sentence},
  217. ...]
  218. query:{featureID:floating-point value, ...}
  219. }
  220. */
  221. function process_relevance_feedback($query, $relevant, $irrelevant, $relevant_words, $irrelevant_words){
  222. $new_query = calculate_new_vector_query($query, $relevant, $irrelevant, $relevant_words, $irrelevant_words);
  223. $sentences = retrieve_sentences_from_vector_query($new_query);
  224. $result = array();
  225. $result['sentences'] = $sentences;
  226. $result['query'] = array();
  227. $result['query']['features'] = $new_query->features;
  228. $result['query']['relevant'] = $relevant;
  229. $result['query']['irrelevant'] = $irrelevant;
  230. $result['query']['relevant_words'] = $relevant_words;
  231. $result['query']['irrelevant_words'] = $irrelevant_words;
  232. return $result;
  233. }
  234. function calculate_new_vector_query($query, $relevant, $irrelevant, $relevant_words, $irrelevant_words){
  235. $features = (array) $query['features'];
  236. $vector_query = new SparseVector($features);
  237. $vector_query->normalize();
  238. $sentence_adjustment = calculate_sentence_adjustment($query, $vector_query, $relevant, $irrelevant);
  239. $word_adjustment = calculate_word_adjustment($query,$vector_query, $relevant_words, $irrelevant_words);
  240. $adjustment = $sentence_adjustment->vectorAdd($word_adjustment);
  241. $new_query = $vector_query->vectorAdd($adjustment);
  242. $new_query->normalize();
  243. return $new_query;
  244. }
  245. function calculate_sentence_adjustment($query, $vector_query, $relevant, $irrelevant){
  246. global $ALPHA_plus; // relevant sentences weight
  247. global $ALPHA_minus; // irrelevant sentences weight
  248. $already_relevant = $query['relevant'];
  249. $already_irrelevant = $query['irrelevant'];
  250. $new_relevant = array_subtract($already_relevant, $relevant);
  251. $no_longer_relevant = array_subtract($relevant, $already_relevant);
  252. $new_irrelevant = array_subtract($already_irrelevant, $irrelevant);
  253. $no_longer_irrelevant = array_subtract($irrelevant, $already_irrelevant);
  254. $relevant_vect = convert_sentence_IDs_to_sparse_vector($new_relevant);
  255. $relevant_vect->normalize();
  256. $no_longer_relevant_vect = convert_sentence_IDs_to_sparse_vector($no_longer_relevant);
  257. $no_longer_relevant_vect->normalize();
  258. $irrelevant_vect = convert_sentence_IDs_to_sparse_vector($new_irrelevant);
  259. $irrelevant_vect->normalize();
  260. $no_longer_irrelevant_vect = convert_sentence_IDs_to_sparse_vector($no_longer_irrelevant);
  261. $no_longer_irrelevant_vect->normalize();
  262. $positive_adjustment = $relevant_vect->scalarMultiply($ALPHA_plus);
  263. $no_longer_positive_adjustment = $no_longer_relevant_vect->scalarMultiply(-1*$ALPHA_plus);
  264. $negative_adjustment = $irrelevant_vect->scalarMultiply(-1*$ALPHA_minus);
  265. $no_longer_negative_adjustment = $no_longer_irrelevant_vect->scalarMultiply($ALPHA_minus);
  266. $adjustment = $positive_adjustment->vectorAdd($negative_adjustment);
  267. $adjustment = $adjustment->vectorAdd($no_longer_positive_adjustment);
  268. $adjustment = $adjustment->vectorAdd($no_longer_negative_adjustment);
  269. return $adjustment;
  270. }
  271. function calculate_word_adjustment($query,$vector_query, $relevant, $irrelevant){
  272. global $ALPHA_w_plus; // relevant words weight
  273. global $ALPHA_w_minus; // irrelevant words weight
  274. $previous_word_features = array();
  275. $features = $vector_query->features;
  276. foreach(array_keys($features) as $feature){
  277. if(is_search_word_feature($feature)){
  278. //echo $feature;
  279. $previous_word_features[$feature] = -1*$features[$feature];
  280. }
  281. }
  282. $cancellation_adjustment = new SparseVector($previous_word_features);
  283. $relevant_vect = convert_word_IDs_to_sparse_vector($relevant);
  284. //$relevant_vect->normalize();
  285. $irrelevant_vect = convert_word_IDs_to_sparse_vector($irrelevant);
  286. //$irrelevant_vect->normalize();
  287. $positive_adjustment = $relevant_vect->scalarMultiply($ALPHA_w_plus);
  288. $negative_adjustment = $irrelevant_vect->scalarMultiply(-1*$ALPHA_w_minus);
  289. $adjustment = $positive_adjustment->vectorAdd($negative_adjustment);
  290. $adjustment->normalize();
  291. $adjustment = $adjustment->vectorAdd($cancellation_adjustment);
  292. return $adjustment;
  293. }
  294. function array_subtract($to_subtract, $subtract_from){
  295. $result = array();
  296. foreach($subtract_from as $item){
  297. if(!in_array($item, $to_subtract)){
  298. array_push($result, $item);
  299. }
  300. }
  301. return $result;
  302. }
  303. function array_add($array1, $array2){
  304. $result = array();
  305. foreach($array1 as $item){
  306. array_push($result, $item);
  307. }
  308. foreach($array2 as $item){
  309. array_push($result, $item);
  310. }
  311. return $result;
  312. }
  313. function convert_sentence_IDs_to_sparse_vector($sentence_ids){
  314. global $STOPS;
  315. $features = array();
  316. if(count($sentence_ids) > 0){
  317. $sentence_id_string = join(", ", $sentence_ids);
  318. // get all the words in these sentence and add them to the feature vector
  319. $sql = "SELECT * from sentence_word_tf_idf, word
  320. where sentence_id in (".$sentence_id_string.") and word_id = word.id;";
  321. $words_in_sentences = mysql_query($sql)
  322. or die("<b>A fatal MySQL error occured</b>.
  323. <br/> Query: " . $sql . "
  324. <br/> Error: (" . mysql_errno() . ") " . mysql_error());
  325. $word_id = -1;
  326. $weight = -1;
  327. // add word features
  328. while($word_in_sentence = mysql_fetch_array($words_in_sentences)){
  329. // exclude stopwords and weird characters
  330. $word = $word_in_sentence['word'];
  331. if(!strstr($STOPS, strtolower($word)) && $word == replaceWeirdCharacters($word)){
  332. $word_feature = make_word_feature_name($word_in_sentence['word_id'], $word_in_sentence['pos'], $word_in_sentence['word']);
  333. $weight = $word_in_sentence['tf_idf'];
  334. if(array_key_exists($word_feature, $features)){
  335. $features[$word_feature] += $weight;
  336. }else{
  337. $features[$word_feature] = $weight;
  338. }
  339. }
  340. }
  341. // add dependency features? synonym features? Maybe, if needed.
  342. // TODO
  343. }
  344. // create the vector
  345. $vector = new SparseVector($features);
  346. return $vector;
  347. }
  348. function convert_word_IDs_to_sparse_vector($word_ids){
  349. global $STOPS;
  350. $features = array();
  351. if(count($word_ids) > 0){
  352. $word_id_string = join(", ", $word_ids);
  353. // get all the words in these sentence and add them to the feature vector
  354. $sql = "SELECT * from word
  355. where id in (".$word_id_string.");";
  356. $words = mysql_query($sql)
  357. or die("<b>A fatal MySQL error occured</b>.
  358. <br/> Query: " . $sql . "
  359. <br/> Error: (" . mysql_errno() . ") " . mysql_error());
  360. $word_id = -1;
  361. $weight = -1;
  362. // add word features
  363. while($word = mysql_fetch_array($words)){
  364. // exclude stopwords
  365. if(!strstr($STOPS, strtolower($word['word']) && $word['word'] == replaceWeirdCharacters($word['word']))){
  366. $word_feature = make_search_word_feature_name($word['id'], $word['pos'], $word['word']);
  367. $features[$word_feature] = 1;
  368. }
  369. }
  370. }
  371. // create the vector
  372. $vector = new SparseVector($features);
  373. return $vector;
  374. }
  375. /* Use the vector space model of information retrieval to return sentences
  376. that match a given vector query.
  377. Arguments:
  378. -- query: a vector query {featureID:floating-point value, ....}
  379. Return:
  380. A list of N=$LIMIT sentences ordered by best match first
  381. [{id:sentenceID, sentence:string sentence}, ...]
  382. */
  383. function retrieve_sentences_from_vector_query($query){
  384. global $LIMIT;
  385. $sentence_scores = array();
  386. $sentences = array();
  387. // get sentences that match the word-based features
  388. $word_ids = array();
  389. foreach(array_keys($query->features) as $featureID){
  390. if(is_word_feature($featureID) || is_search_word_feature($featureID)){
  391. array_push($word_ids, get_id_from_feature_name($featureID));
  392. }
  393. }
  394. if(count($word_ids) > 0){
  395. $string_word_ids = join(", ", $word_ids);
  396. $score_case = convert_to_case_expression($query->features);
  397. // alternate score formula: "SUM(tf_idf*".$score_case.")/SUM(tf_idf) as score"
  398. $sql = "SELECT sentence_id,
  399. SUM(".$score_case.") as score
  400. from sentence_word_tf_idf
  401. WHERE word_id in (".$string_word_ids.")
  402. GROUP BY sentence_id ORDER BY score desc LIMIT ".$LIMIT.";";
  403. //echo $sql;
  404. $words_in_sentences = mysql_query($sql)
  405. or die("<b>A fatal MySQL error occured</b>.
  406. <br/> Query: " . $sql . "
  407. <br/> Error: (" . mysql_errno() . ") " . mysql_error());
  408. while($scores = mysql_fetch_array($words_in_sentences)){
  409. if($scores['score'] > 0){
  410. array_push($sentences, $scores['sentence_id']);
  411. }
  412. }
  413. }
  414. return fetch_top_n_sentences($sentences);
  415. }
  416. function convert_to_case_expression($features){
  417. $totals = array();
  418. foreach(array_keys($features) as $feature){
  419. $word_id = get_id_from_feature_name($feature);
  420. $score = $features[$feature];
  421. if(!array_key_exists($word_id, $totals)){
  422. $totals[$word_id] = 0;
  423. }
  424. $totals[$word_id] += $score;
  425. }
  426. $sql = "(CASE";
  427. foreach(array_keys($totals) as $id){
  428. $score = $totals[$id];
  429. $sql = $sql."
  430. WHEN word_id = ".$id."
  431. THEN ".$score;
  432. }
  433. $sql = $sql." ELSE 0 END)";
  434. return $sql;
  435. }
  436. /* Fetches the sentences corresponding to the top N sentence ID's in order.
  437. Arguments:
  438. -- sentenceIDs: the list of sentence id's to fetch in order
  439. -- N: the number of sentences to fetch, starting from the beginning of the
  440. given list.
  441. Return:
  442. A list of N sentences ordered by best match first
  443. [{id:sentenceID, sentence:string sentence, [and other metadata]}, ...]
  444. */
  445. function fetch_top_n_sentences($top_n){
  446. $sentences = array();
  447. if(count($top_n) > 0){
  448. $top_n_id_string = join(", ", $top_n);
  449. $sql = "SELECT
  450. sentence.id as id, sentence.narrative_id, sentence, title, date, full as author
  451. from sentence, narrative,
  452. author_xref_narrative as axn, author
  453. WHERE sentence.id in (".$top_n_id_string.")
  454. AND sentence.narrative_id = narrative.id
  455. AND axn.narrative_id = narrative.id
  456. AND axn.author_id = author.id;";
  457. $sentences_result = mysql_query($sql)
  458. or die("<b>A fatal MySQL error occured</b>.
  459. <br/> Query: " . $sql . "
  460. <br/> Error: (" . mysql_errno() . ") " . mysql_error());
  461. $sentence = array();
  462. while($sentence_result = mysql_fetch_array($sentences_result)){
  463. $sentence = array();
  464. $sentence['id'] = $sentence_result['id'];
  465. $sentence['words'] = getWordsInSentence($sentence['id']);
  466. // ... and whatever other metadata here
  467. $sentence['narrative_id'] = $sentence_result['narrative_id'];
  468. $sentence['title'] = $sentence_result['title'];
  469. $sentence['date'] = $sentence_result['date'];
  470. $sentence['author'] = $sentence_result['author'];
  471. // store the information
  472. if(count($sentence['words']) > 0){
  473. $sentences[$sentence['id']] = $sentence;
  474. }
  475. }
  476. }
  477. $ordered = array();
  478. foreach($top_n as $id){
  479. array_push($ordered, $sentences[$id]);
  480. }
  481. return $ordered;
  482. }
  483. function is_word_feature($featureID){
  484. return starts_with($featureID, "w");
  485. }
  486. function is_search_word_feature($featureID){
  487. return starts_with($featureID, "s");
  488. }
  489. function get_id_from_feature_name($featureID){
  490. $components = explode("_", $featureID);
  491. return $components[1];
  492. }
  493. function get_pos_from_feature_name($featureID){
  494. $components = explode("_", $featureID);
  495. return $components[2];
  496. }
  497. function get_word_from_feature_name($featureID){
  498. $components = explode("_", $featureID);
  499. return $components[3];
  500. }
  501. function starts_with($haystack, $needle){
  502. $length = strlen($needle);
  503. return (substr($haystack, 0, $length) === $needle);
  504. }
  505. function make_word_feature_name($wordID, $pos, $word){
  506. return "w_".$wordID.'_'.$pos.'_'.$word;
  507. }
  508. function make_search_word_feature_name($wordID, $pos, $word){
  509. return "s_".$wordID.'_'.$pos.'_'.$word;
  510. }
  511. /**************************************************************
  512. Relevance words
  513. ***************************************************************/
  514. function convert_vector_query_to_relevance_words($query){
  515. $words = array();
  516. $features = $query->features();
  517. foreach(array_keys($features) as $featureID){
  518. if(is_word_feature($featureID) || is_search_word_feature($featureID)){
  519. $id = get_id_from_feature_name($featureID);
  520. $pos = get_pos_from_feature_name($featureID);
  521. $word = get_word_from_feature_name($featureID);
  522. $weight = $features[$featureID];
  523. if(!array_key_exists($word, $words)){
  524. $words[$word] = array();
  525. $words[$word]['total'] = 0;
  526. $words[$word]['ids'] = array();
  527. $words[$word]['words'] = array();
  528. }
  529. array_push($words[$word]['words'], array("pos"=>$pos, "id"=>$id, "weight"=>$weight));
  530. $words[$word]['total'] += $weight;
  531. }
  532. }
  533. uasort($words, 'compare_words');
  534. $relevance = array("relevant"=>array(), "irrelevant"=>array());
  535. foreach(array_keys($words) as $word){
  536. if($words[$word]['total'] >= 0){
  537. array_push($relevance['relevant'], array("word"=>$word,
  538. "total"=>$words[$word]['total'],
  539. "words"=>$words[$word]['words']));
  540. }else{
  541. array_push($relevance['irrelevant'], array("word"=>$word,
  542. "total"=>$words[$word]['total'],
  543. "words"=>$words[$word]['words']));
  544. }
  545. }
  546. return $relevance;
  547. }
  548. function compare_words($word1, $word2){
  549. if($word1['total'] > $word2['total']){
  550. return 0;
  551. }
  552. else{
  553. return $word1['total'] > $word2['total'] ? -1 : 1;
  554. }
  555. }
  556. ?>