PageRenderTime 70ms CodeModel.GetById 36ms RepoModel.GetById 0ms app.codeStats 1ms

/src/php/util.php

https://bitbucket.org/silverasm/wordseer
PHP | 822 lines | 721 code | 30 blank | 71 comment | 117 complexity | 0fd36b875836c54a357f0dc87ee89932 MD5 | raw file
Possible License(s): Apache-2.0, LGPL-3.0, BSD-3-Clause
  1. <?php
  2. /* Copyright 2012 Aditi Muralidharan. See the file "LICENSE" for the full license governing this code. */
  3. ini_set('memory_limit', '3221M');
  4. global $query_id;
  5. $query_id = getGetParam('query_id');
  6. function table_exists($table) {
  7. $sql = "show tables like '".$table."'";
  8. $res = mysql_query($sql);
  9. return (mysql_num_rows($res) > 0);
  10. }
  11. function indexOf($needle, $haystack) {
  12. for ($i=0;$i<count($haystack);$i++) {
  13. if ($haystack[$i] == $needle) {
  14. return $i;
  15. }
  16. }
  17. return -1;
  18. }
  19. function encode_json($a=false)
  20. {
  21. if (is_null($a)) return 'null';
  22. if ($a === false) return 'false';
  23. if ($a === true) return 'true';
  24. if (is_scalar($a))
  25. {
  26. if (is_float($a))
  27. {
  28. // Always use "." for floats.
  29. return floatval(str_replace(",", ".", strval($a)));
  30. }
  31. if (is_string($a))
  32. {
  33. static $jsonReplaces = array(array("\\", "/", "\n", "\t", "\r", "\b", "\f", '"'), array('\\\\', '\\/', '\\n', '\\t', '\\r', '\\b', '\\f', '\"'));
  34. return '"' . str_replace($jsonReplaces[0], $jsonReplaces[1], $a) . '"';
  35. }
  36. else
  37. return $a;
  38. }
  39. $isList = true;
  40. for ($i = 0, reset($a); $i < count($a); $i++, next($a))
  41. {
  42. if (key($a) !== $i)
  43. {
  44. $isList = false;
  45. break;
  46. }
  47. }
  48. $result = array();
  49. if ($isList)
  50. {
  51. foreach ($a as $v) $result[] = json_encode($v);
  52. return '[' . join(',', $result) . ']';
  53. }
  54. else
  55. {
  56. foreach ($a as $k => $v) $result[] = json_encode($k).':'.json_encode($v);
  57. return '{' . join(',', $result) . '}';
  58. }
  59. }
  60. function replaceWeirdCharacters($input){
  61. $text = str_replace(
  62. array("\xe2\x80\x98", "\xe2\x80\x99", "\xe2\x80\x9c", "\xe2\x80\x9d", "\xe2\x80\x93", "\xe2\x80\x94", "\xe2\x80\xa6", "—"),
  63. array("'", "'", '"', '"', '-', '--', '...', "-"),
  64. $input);
  65. // Next, replace their Windows-1252 equivalents.
  66. $text = str_replace(
  67. array(chr(145), chr(146), chr(147), chr(148), chr(150), chr(151), chr(133)),
  68. array("'", "'", '"', '"', '-', '--', '...'),
  69. $text);
  70. return $text;
  71. }
  72. function getRelationsFromFormValue($value){
  73. return $value;
  74. }
  75. function getNameFromRelation($value){
  76. $return = $value;
  77. switch($value){
  78. case "none":
  79. $return = "search";
  80. break;
  81. case "":
  82. $return = "(any relation)";
  83. break;
  84. case "amod advmod":
  85. $return = "described as" ;
  86. break;
  87. case "agent subj nsubj csubj nsubjpass csubjpass":
  88. $return = "done by";
  89. break;
  90. case "obj dobj iobj pobj":
  91. $return = "done to";
  92. break;
  93. case "prep_because prep_because_of prep_on_account_of prep_owing_to prepc_because prepc_because_of prepc_on_account_of prepc_owing_to":
  94. $return = "because";
  95. break;
  96. case "conj_and":
  97. $return = "and";
  98. break;
  99. case "purpcl":
  100. $return = "in order to";
  101. break;
  102. case "prep_with prepc_with prep_by_means_of prepc_by_means_of":
  103. $return = "with";
  104. break;
  105. case "prep_to":
  106. $return = "to";
  107. break;
  108. case "prep_from":
  109. $return = "from";
  110. break;
  111. case "prep_of":
  112. $return = "of";
  113. break;
  114. case "prep_on":
  115. $return = "on";
  116. break;
  117. case "prep_by":
  118. $return = "by";
  119. break;
  120. case "prep_in":
  121. $return = "in";
  122. break;
  123. case "poss":
  124. $return = "possessed by";
  125. break;
  126. }
  127. return $return;
  128. }
  129. function getRelationDescription($val){
  130. $return = $val;
  131. $value = ' '.$val.' ';
  132. if(strpos(" none ", $value) > 0){
  133. $return = "";
  134. } else if(strpos(" ", $value) > 0){
  135. $return = "(any relation)";
  136. } else if(strpos(" amod advmod acomp ", $value) > 0){
  137. $return = "amod advmod acomp" ;
  138. } else if(strpos(" agent subj nsubj csubj nsubjpass csubjpass ", $value) > 0){
  139. $return = "agent subj nsubj xsubj csubj nsubjpass csubjpass";
  140. } else if(strpos(" obj dobj iobj pobj ", $value) > 0){
  141. $return = "dobj iobj pobj";
  142. } else if(strpos(" prep_because prep_because_of prep_on_account_of prep_owing_to prepc_because prepc_because_of prepc_on_account_of prepc_owing_to ", $value) > 0){
  143. $return = "prep_because prep_because_of prep_on_account_of prep_owing_to prepc_because prepc_because_of prepc_on_account_of prepc_owing_to";
  144. } else if(strpos(" prep_with prepc_with prep_by_means_of prepc_by_means_of ", $value) > 0){
  145. $return = "prep_with prepc_with prep_by_means_of prepc_by_means_of";
  146. }
  147. return $return;
  148. }
  149. function remove_spaces_before_punctuation($sentence){
  150. $no_space_before_punctuation = array(".", ",", "!","`", "\"", "?", "`", "'",";", ")", ":", "—");
  151. $no_space_after_punctuation = array("`", "'", "\"", "`", "`", "(", "—");
  152. $sent = replaceWeirdCharacters($sentence);
  153. foreach($no_space_before_punctuation as $mark){
  154. $sent = str_replace(" ".$mark, $mark, $sent);
  155. }
  156. foreach($no_space_after_punctuation as $mark){
  157. $sent = str_replace($mark." ", $mark, $sent);
  158. }
  159. return $sent;
  160. }
  161. function getWordsInSentence($sentenceID){
  162. $sql = "SELECT surface, word_id, space_after
  163. from sentence_xref_word
  164. WHERE sentence_id = ".$sentenceID."
  165. ORDER BY position ASC;";
  166. $result = mysql_query($sql);
  167. $word_set_memberships = getWordSetMemberships();
  168. $words = array();
  169. while($row = mysql_fetch_array($result)){
  170. $word = array(
  171. 'word'=>replaceWeirdCharacters($row['surface']),
  172. 'word_id'=>$row['word_id'],
  173. 'space_after'=>str_replace("\n", "<br>", $row['space_after']));
  174. if (array_key_exists($row['word_id'], $word_set_memberships)) {
  175. $word['word_set'] = join(" ",
  176. $word_set_memberships[$row['word_id']]);
  177. }
  178. array_push($words, $word);
  179. }
  180. return $words;
  181. }
  182. function getWordSetMemberships() {
  183. global $word_set_memberships;
  184. if (!$word_set_memberships) {
  185. $user = getGetParam('user');
  186. $sql = "SELECT * from working_set LEFT JOIN working_set_contents
  187. ON id = working_set_id
  188. WHERE type = 'word'
  189. AND username = '$user';";
  190. $results = mysql_query($sql) or die(
  191. "Error listing word sets.<br>$sql<br> at util.php l.181");
  192. $word_set_memberships = array();
  193. while ($row = mysql_fetch_assoc($results)) {
  194. $word_id = $row['item_id'];
  195. if (!array_key_exists($word_id, $word_set_memberships)) {
  196. $word_set_memberships[$word_id] = array();
  197. }
  198. array_push($word_set_memberships[$word_id], $row['working_set_id']);
  199. }
  200. }
  201. return $word_set_memberships;
  202. }
  203. function startsWith($haystack, $needle)
  204. {
  205. $length = strlen($needle);
  206. return (substr($haystack, 0, $length) === $needle);
  207. }
  208. function endsWith($haystack, $needle)
  209. {
  210. $length = strlen($needle);
  211. $start = $length * -1; //negative
  212. return (substr($haystack, $start) === $needle);
  213. }
  214. function spaceBetweenWords($word1, $word2){
  215. $prev = substr($word1, -1);
  216. $next = substr($word2, 0, 1);
  217. $alphabet = "abcdefghijklmnopqrstuvwxyz&1234567890ABCDEFGHIJKLMNOPQRSTUVWXYZ";
  218. $no_space_before=".!,`)?;:%\"'";
  219. $no_space_after='\'"`(';
  220. if(strpos($alphabet, $prev)){
  221. if(strstr($no_space_before, $next)){
  222. return '';
  223. }else{
  224. return ' ';
  225. }
  226. }else if(strstr($no_space_after, $prev)){
  227. return '';
  228. }else{
  229. return ' ';
  230. }
  231. }
  232. function getGetParam($param){
  233. if(array_key_exists($param, $_GET)){
  234. return trim(mysql_escape_string($_GET[$param]));
  235. }else{
  236. return "";
  237. }
  238. }
  239. function decodeGetJson($param) {
  240. if (array_key_exists($param, $_GET)) {
  241. $str = str_replace('\\"', '"', $_GET[$param]);
  242. return json_decode($str, true);
  243. } else {
  244. return array();
  245. }
  246. }
  247. function getPostParam($param){
  248. if(array_key_exists($param, $_POST)){
  249. return mysql_escape_string($_POST[$param]);
  250. }else{
  251. return "";
  252. }
  253. }
  254. function getRelationID($relation){
  255. $query = "SELECT id FROM relationship WHERE relationship ='".$relation."';";
  256. //echo $query.'
  257. //';
  258. $result = mysql_query($query);
  259. if(mysql_num_rows($result)>0){
  260. $row = mysql_fetch_array($result);
  261. return $row['id'];
  262. }else{
  263. return "";
  264. }
  265. }
  266. /** Returns a comma-separated string of ID's
  267. that correspond to a given surface word. A word can have multiple id's
  268. if it has different parts of speech.
  269. */
  270. function getWordID($word){
  271. $lemmatize = getGetParam('all_word_forms') == 'on';
  272. if (!$lemmatize) {
  273. $query = "";
  274. if(!(strstr($word, "*"))){
  275. $query = "SELECT id FROM word WHERE word ='".mysql_escape_string(trim($word))."';";
  276. }else{
  277. $query = "SELECT idlemma FROM word WHERE word like '".mysql_escape_string(trim(str_replace("*", "%", $word)))."';";
  278. }
  279. $result = mysql_query($query);
  280. if(mysql_num_rows($result)>0){
  281. $ids = array();
  282. while($row = mysql_fetch_array($result)){
  283. array_push($ids, $row['id']);
  284. }
  285. return join(", ", $ids);
  286. }else{
  287. return 'null';
  288. }
  289. } else {
  290. $ids = getLemmaVariantIDs($word);
  291. return join(", ", $ids);
  292. }
  293. }
  294. /** Returns an array of all the ID's that correspond to a
  295. given surface word. A word can have multiple id's
  296. if it has different parts of speech.
  297. */
  298. function getWordIDs($word){
  299. $lemmatize = getGetParam('all_word_forms') == 'on';
  300. if (!$lemmatize) {
  301. $query = "";
  302. if(!(strstr($word, "*"))){
  303. $query = "SELECT id
  304. FROM word
  305. WHERE word ='".mysql_escape_string(trim($word))."';";
  306. } else {
  307. $query = "SELECT id
  308. FROM word
  309. WHERE word like '".mysql_escape_string(trim(str_replace("*", "%", $word)))."';";
  310. }
  311. $result = mysql_query($query);
  312. if(mysql_num_rows($result)>0){
  313. $ids = array();
  314. while($row = mysql_fetch_assoc($result)){
  315. array_push($ids, $row['id']);
  316. }
  317. return $ids;
  318. }else{
  319. return array();
  320. }
  321. } else {
  322. return getLemmaVariantIDs($word);
  323. }
  324. }
  325. function getWord($id){
  326. $query = "SELECT word from word where id = $id;";
  327. $result = mysql_query($query);
  328. if(mysql_num_rows($result)>0){
  329. $row = mysql_fetch_assoc($result);
  330. return $row['word'];
  331. } else {
  332. return '??';
  333. }
  334. }
  335. function getDependencyID($relationID, $govID, $depID){
  336. $query = "SELECT id FROM dependency WHERE relation_id =".$relationID." AND gov_id =".$govID." and dep_id = ".$depID.";";
  337. $result = mysql_query($query);
  338. if(mysql_num_rows($result)>0){
  339. $row= mysql_fetch_array($query);
  340. return $row['id'];
  341. }else{
  342. return "";
  343. }
  344. }
  345. /** Get matching dependency ID's
  346. @param withinSentence True if you want to search within
  347. a set of sentences, or "table" if you're supplying
  348. the name of a temporary table with an id column if you
  349. want to use the id's stored in that table.
  350. @param withinDocument True if you want to search within a
  351. set of documents
  352. @param within a comma-separated string of sentence or document id's
  353. if either <withinSentence> or
  354. <withinDocument> is marked as True.
  355. Otherwise, false. If $withinSentence is "table",
  356. it's assumed to be the name of a temporary table with
  357. an id field
  358. */
  359. function getDependencyIDs($gov, $dep, $relation, $withinDocument,
  360. $withinSentence, $within, $start, $limit) {
  361. global $timing;
  362. global $num_search_conditions;
  363. $table_identifier = 'filtered_sent_ids';
  364. $insertion_fields = '(id, document_id, num_matched, num_searches_matched)';
  365. $field_identifier = "DISTINCT sentence_id, document_id, 0, 1";
  366. $query_id_where = '';
  367. global $cache_results;
  368. global $query_id;
  369. global $dont_cache_search_results;
  370. if (($cache_results || $query_id) && !$dont_cache_search_results) {
  371. $table_identifier = 'cached_filtered_sent_ids';
  372. $insertion_fields = '(id, document_id, query_id, num_matched, num_searches_matched)';
  373. $field_identifier = "DISTINCT sentence_id, document_id, $query_id, 0, 1";
  374. $query_id_where = " AND query_id = $query_id ";
  375. }
  376. $tablenames = "dependency_xref_sentence, sentence ";
  377. $where = "AND sentence.id = sentence_id";
  378. if($withinSentence == true && strlen($within)>0){
  379. $where = $where." AND sentence_id in (".$within.")";
  380. } else if ($withinSentence == "table") {
  381. $tablenames = "dependency_xref_sentence, sentence, $table_identifier ";
  382. $where = $where. " AND $table_identifier.id = sentence_id $query_id_where ";
  383. }else if ($withinDocument && strlen($within)>0){
  384. $where = $where." AND document_id in (".$within.")";
  385. }
  386. $r = strlen($relation)>0;
  387. $g = strlen($gov)>0;
  388. $d = strlen($dep)>0;
  389. $rel_w = "";
  390. $gov_w ="";
  391. $dep_w = "";
  392. if($r){
  393. $rel_w = "relation_id IN (".$relation.")";
  394. }
  395. if($g){
  396. $gov_w = "gov_id IN (".$gov.")";
  397. }
  398. if($d){
  399. $dep_w = "dep_id IN (".$dep.")";
  400. }
  401. if($r || $g || $d ){
  402. $query = "SELECT SQL_CALC_FOUND_ROWS * FROM ".$tablenames." WHERE ";
  403. $main = " ";
  404. if($r && $g && $d){
  405. $main = $main.$rel_w." AND ".$gov_w." AND ".$dep_w;
  406. }
  407. else if($r && $g){
  408. $main = $main.$rel_w." AND ".$gov_w;
  409. }
  410. else if($r && $d){
  411. $main = $main.$rel_w." AND ".$dep_w;
  412. }
  413. else if($g && $d){
  414. $main = $main."((".$gov_w." AND ".$dep_w.") OR ";
  415. $main = $main." (dep_id IN (".$gov.") AND gov_id IN (".$dep."))) ";
  416. }
  417. else if ($r){
  418. $main = $main.$rel_w;
  419. }
  420. else if ($g){
  421. $main = $main." (".$gov_w;
  422. $main = $main." OR dep_id IN (".$gov.")) ";
  423. }
  424. else{
  425. $main = $main." (".$dep_w;
  426. $main = $main." OR gov_id IN (".$dep.")) ";
  427. }
  428. $query = $query.$main.$where;
  429. if ($timing != 0) {
  430. echo $query;
  431. }
  432. if (!$query_id || $cache_results || $dont_cache_search_results) {
  433. // Update the list of filtered sentence ID's to reflect this.
  434. $num_search_conditions += 1;
  435. $sql = "INSERT INTO $table_identifier $insertion_fields
  436. SELECT $field_identifier
  437. FROM dependency_xref_sentence
  438. WHERE ".$main."
  439. ON DUPLICATE KEY
  440. UPDATE num_searches_matched = num_searches_matched + 1";
  441. mysql_query($sql) or die (mysql_error()." On: <br> $sql
  442. <br> util.php l 454.");
  443. if ($dont_cache_search_results && $query_id) {
  444. updateTemporarySentenceFilterTable();
  445. } else {
  446. updateSentenceFilterTable();
  447. }
  448. }
  449. }
  450. else if($withinSentence){
  451. $query = "SELECT SQL_CALC_FOUND_ROWS * FROM ".$tablenames.$where."
  452. LIMIT 500";
  453. }
  454. $paging = '';
  455. if(strlen($start)>0 && $limit && !getGetParam("onlyMetadata")){
  456. $paging = ' LIMIT '.$limit.' OFFSET '.$start;
  457. }
  458. if (!$cache_results) {
  459. $query = $query.$paging;
  460. $result = mysql_query($query." ;") or die (
  461. 'util.php 371 <br> mysql error <br>'.mysql_error().' '.$query);
  462. if ($timing) {
  463. echo "<br>Dependency search query:
  464. <br>$query
  465. <br>";
  466. }
  467. }
  468. return $result;
  469. }
  470. function relationshipIDList($words){
  471. if(strlen($words) > 0){
  472. $exploded = explode(' ', trim($words));
  473. $ids = array();
  474. foreach($exploded as $word){
  475. $r = getRelationID($word);
  476. if(strlen($r) > 0){
  477. array_push($ids, $r);
  478. }
  479. }
  480. return join(", ", $ids);
  481. }else{
  482. return "";
  483. }
  484. }
  485. /**
  486. Converts a list of words to a comma-separated string of word ID's
  487. */
  488. function wordIDList($raw_words) {
  489. $words = str_replace("+", "", $raw_words);
  490. if(strlen($words) > 0){
  491. $exploded = explode(' ', trim($words));
  492. if(strpos($words, ',')){
  493. $exploded = explode(',',trim($words));
  494. }
  495. $all_ids = array(-2);
  496. foreach($exploded as $word){
  497. $word_ids = getWordIDs($word);
  498. $all_ids = array_merge($all_ids, $word_ids);
  499. }
  500. return join(", ", $all_ids);
  501. } else {
  502. return "";
  503. }
  504. }
  505. /** Gets the index of a given word in a sentence **/
  506. function getWordIndexInSentence($word, $wordtype, $sentence_id) {
  507. if ($wordtype == 'word-set') {
  508. $word_ids = getWordIDsFromWordSet($word);
  509. } else {
  510. $ids = getWordIDs($word);
  511. $word_ids = join(", ", $ids);
  512. }
  513. $sql = "SELECT position from sentence_xref_word
  514. WHERE sentence_id = $sentence_id AND word_id in ($word_ids);";
  515. $result = mysql_query($sql) or die ("SQL error in util.php l. 430");
  516. $indexes = array();
  517. while ($row = mysql_fetch_assoc($result)) {
  518. array_push($indexes, $row['position']);
  519. }
  520. return $indexes;
  521. }
  522. /** Returns a string containing all the words in the given
  523. word set ID **/
  524. function getWordsFromWordSet($wordSetID){
  525. $lemmatize = getGetParam('all_word_forms') == "on";
  526. $sql = "SELECT DISTINCT word
  527. FROM working_set_contents, word
  528. WHERE working_set_id = $wordSetID
  529. AND item_id = word.id
  530. AND item_table_name = 'word';";
  531. $result = mysql_query($sql);
  532. $words = array();
  533. while($row = mysql_fetch_assoc($result)){
  534. if ($lemmatize) {
  535. $variants = getLemmaVariants($row['word']);
  536. $words = array_merge($words, $variants);
  537. } else {
  538. array_push($words, $row['word']);
  539. }
  540. }
  541. $word_string = join(" ", array_unique($words));
  542. return $word_string;
  543. }
  544. /** Returns a string containing all the words in the given
  545. word set ID **/
  546. function getWordIDsFromWordSet($wordSetID){
  547. $lemmatize = getGetParam('all_word_forms') == "on";
  548. $sql = "SELECT DISTINCT word.id, word
  549. FROM working_set_contents, word
  550. WHERE working_set_id = $wordSetID
  551. AND item_id = word.id
  552. AND item_table_name = 'word';";
  553. $result = mysql_query($sql);
  554. $ids = array();
  555. while($row = mysql_fetch_assoc($result)){
  556. if ($lemmatize) {
  557. $ids = array_merge($ids, getLemmaVariantIDs($row['word']));
  558. } else {
  559. array_push($ids, $row['id']);
  560. }
  561. }
  562. $id_string = join(", ", array_unique($ids));
  563. return $id_string;
  564. }
  565. /** Returns the name of a set with the given id
  566. */
  567. function getSetName($id){
  568. $sql = "SELECT name
  569. FROM working_set
  570. WHERE id = $id;";
  571. $result = mysql_query($sql);
  572. $row = mysql_fetch_assoc($result);
  573. return $row['name'];
  574. }
  575. /**********************
  576. Lemmas
  577. ***********************/
  578. /** Returns an array of word id's for all the words that have the same lemma as
  579. this one
  580. */
  581. function getLemmaVariantIDs($word) {
  582. $word = trim($word);
  583. $ids = array();
  584. $sql = "SELECT lemma from word where word = '$word';";
  585. $result = mysql_query($sql) or die(" Error getting lemmas of word
  586. <br>".mysql_error()."
  587. <br> $sql
  588. <br>");
  589. $lemmas = array();
  590. while ($row = mysql_fetch_assoc($result)) {
  591. array_push($lemmas, "'".$row['lemma']."'");
  592. }
  593. $lemma_string = join(", ", $lemmas);
  594. if (strlen($lemma_string) > 0) {
  595. $sql = "SELECT id from word where lemma in ($lemma_string);";
  596. $result = mysql_query($sql) or die ("Error getting ids of words with lemma.
  597. <br>".mysql_error()."
  598. <br> $sql
  599. <br>");
  600. while ($row = mysql_fetch_assoc($result)) {
  601. array_push($ids, $row['id']);
  602. }
  603. }
  604. return $ids;
  605. }
  606. /** Returns an array of strings (words) that all have the same lemma as this
  607. word.
  608. */
  609. function getLemmaVariants($word) {
  610. $sql = "SELECT lemma from word where word = '$word';";
  611. $result = mysql_query($sql) or die(" Error getting lemmas of word
  612. <br>".mysql_error()."
  613. <br> $sql
  614. <br>");
  615. $lemmas = array();
  616. while ($row = mysql_fetch_assoc($result)) {
  617. array_push($lemmas, "'".$row['lemma']."'");
  618. }
  619. $lemma_string = join(", ", $lemmas);
  620. $sql = "SELECT distinct word from word where lemma in ($lemma_string);";
  621. $result = mysql_query($sql) or die ("Error getting ids of words with lemma.
  622. <br>".mysql_error()."
  623. <br> $sql
  624. <br>");
  625. $words = array();
  626. while ($row = mysql_fetch_assoc($result)) {
  627. array_push($words, $row['word']);
  628. }
  629. return $words;
  630. }
  631. /********************
  632. Query string
  633. *********************/
  634. /** Returns a string representing the query in a a human-friendly way.
  635. */
  636. function makeQueryString($gov, $govtype, $dep, $deptype, $relation, $collection,
  637. $metadata, $phrases) {
  638. $query_string = $gov;
  639. // if just a regular word search or a phrase search
  640. if(strlen(trim($relation)) == 0){
  641. //default query
  642. $words = "";
  643. if ($gov) {
  644. $words = $gov;
  645. if($govtype == 'word-set'){
  646. $words = explode(" ", getWordsFromWordSet($gov));
  647. $word_id_string = getWordIDsFromWordSet($gov);
  648. $q = "(".join("|", $words).")";
  649. $query_string = getSetName($gov);
  650. } else {
  651. $query_string = $words;
  652. }
  653. }
  654. // If no search query has been specified, but there are phrases
  655. // acting as filters, then use the first filter phrase as the
  656. // search query for the center of the word tree.
  657. else if (count($phrases) > 0) {
  658. $phraseIDs = getPhraseIDs($phrases[0]);
  659. $sql = "SELECT sequence from sequence
  660. WHERE id
  661. IN (".join(",", $phraseIDs).");";
  662. $result = mysql_query($sql) or die ("Error getting phrase with ID
  663. ".$phrases[0].";");
  664. while ($row = mysql_fetch_assoc($result)) {
  665. $q = explode(" ", $row['sequence']);
  666. $query_string = $q[0];
  667. }
  668. }
  669. // If no search query has been specified get the most frequent
  670. // content word (not stop word) from the set of documents matching
  671. // the filters (if any) or the whole collection (if there are no
  672. // filters), and use that as the search query for the center of
  673. // the word tree.
  674. else {
  675. $phrase = mysql_real_escape_string(
  676. getMostFrequentContentPhrase($filtered));
  677. $query_string = $phrase;
  678. }
  679. $q = array("gov"=>$query_string);
  680. return $q;
  681. }
  682. // If it's a grammatical search, extract the gov, dep, and relation from
  683. // the GET parameters and issue a dependency relationship search.
  684. else{
  685. $query_string = array();
  686. $govIDs = wordIDList($gov);
  687. if($govtype == "word-set"){
  688. $govIDs = getWordIDsFromWordSet($gov);
  689. $query_string['gov'] = getSetName($gov);
  690. } else {
  691. $query_string['gov'] = $gov;
  692. }
  693. $depIDs = wordIDList($dep);
  694. if($deptype == 'word-set'){
  695. $depIDs = getWordIDsFromWordSet($dep);
  696. $query_string['dep'] = getSetName($dep);
  697. } else {
  698. $query_string['dep'] = $dep;
  699. }
  700. $relations = relationshipIDList($relation);
  701. $query_string['relation'] = $relation;
  702. if($filtersAreActive){
  703. // To stave off syntax errors if its empty, put a -1 in the list.
  704. array_push($filtered, "-1");
  705. $withinSentence = true;
  706. $within = join(", ", $filtered);
  707. $sentence_where_clause = " AND id IN ($within) ";
  708. }
  709. $dependency_id_results = null;
  710. $dependency_id_results = getDependencyIDs($govIDs,
  711. $depIDs,
  712. $relations,
  713. false,
  714. $withinSentence,
  715. $within,
  716. false,
  717. false);
  718. $sentence_ids = array();
  719. while($row = mysql_fetch_array($dependency_id_results)){
  720. array_push($sentence_ids, $row['sentence_id']);
  721. }
  722. if(count($sentence_ids) > 0){
  723. $id_string = join(", ", $sentence_ids);
  724. $sql = "SELECT
  725. sentence.id, sentence.document_id, number, sentence
  726. FROM
  727. sentence
  728. WHERE id IN (".$id_string.");";
  729. $results = mysql_query($sql) or die("<b>Fatal MySQL error</b>.
  730. <br/> Query: " . $sql . "
  731. <br/> Error: (" . mysql_errno() . ") " . mysql_error());
  732. // pattern
  733. $govs = $gov;
  734. if($govtype == "word-set"){
  735. $govs = getWordsFromWordSet($gov);
  736. }
  737. $deps = $dep;
  738. if($deptype == 'word-set'){
  739. $deps = getWordsFromWordSet($dep);
  740. }
  741. if(strlen($govs) > 0){
  742. $query = str_replace(" ", "|", $govs);
  743. }else if(strlen($deps) > 0){
  744. $query = str_replace(" ", "|", $deps);
  745. }
  746. $pattern = "/\b(".$query.")\b/i";
  747. global $PUNCTUATION;
  748. if (strstr($PUNCTUATION, $query)) {
  749. $pattern = "/$query/i";
  750. }
  751. }
  752. return $query_string;
  753. }
  754. }
  755. /***********************
  756. Document Sub Structures
  757. ************************/
  758. /** Return a list of structures:
  759. {unit_id, unit_name, parent_id, parent_name, document_id}
  760. within a document **/
  761. function getSubStructures($documentID){
  762. $sql = "SELECT * from document_structure WHERE document_id = $documentID;";
  763. $result = mysql_query($sql) or die(mysql_error()." on query:
  764. $sql
  765. made on
  766. on util.php line 471.");
  767. $structures = array();
  768. while($row = mysql_fetch_assoc($result)){
  769. array_push($structures, $row);
  770. }
  771. return $structures;
  772. }
  773. $STOPWORDS = explode(" ", "'ve does o t went was is had be were did are have do has being am 's been go 'm the and so are for be but this what 's did had they doth a to is that was as are at an of with . , ; ? ' \" : `");
  774. $PUNCTUATION = "!@#$%^&*()_+-=~`,./;;\"'{}[]|\\";
  775. ?>